import urllib.request
import csv
import codecs
import numpy as np



url = "https://data.heatonresearch.com/data/t81-558/datasets/sonnet_18.txt"
with urllib.request.urlopen(url) as urlstream:
    for line in codecs.iterdecode(urlstream, 'utf-8'):
        print(line.rstrip())
Sonnet 18 original text
William Shakespeare

Shall I compare thee to a summer's day?
Thou art more lovely and more temperate:
Rough winds do shake the darling buds of May,
And summer's lease hath all too short a date:
Sometime too hot the eye of heaven shines,
And often is his gold complexion dimm'd;
And every fair from fair sometime declines,
By chance or nature's changing course untrimm'd;
But thy eternal summer shall not fade
Nor lose possession of that fair thou owest;
Nor shall Death brag thou wander'st in his shade,
When in eternal lines to time thou growest:
So long as men can breathe or eyes can see,
So long lives this and this gives life to thee.
import spacy

nlp = spacy.load('en')
doc = nlp(line.rstrip())
for token in doc:
    print(token.text)
So
long
lives
this
and
this
gives
life
to
thee
.
import spacy

nlp = spacy.load('en')
doc = nlp(u"Apple is looking at buying a U.K. startup for $1 billion")
for token in doc:
    print(token.text)
Apple
is
looking
at
buying
a
U.K.
startup
for
$
1
billion

You can also obtain the part of speech for each word. Common parts of speech include nouns, verbs, pronouns, and adjectives.

for word in doc:  
    print(word.text,  word.pos_)
Apple PROPN
is AUX
looking VERB
at ADP
buying VERB
a DET
U.K. PROPN
startup NOUN
for ADP
$ SYM
1 NUM
billion NUM

Spacy includes functions to check if parts of a sentence appear to be numbers, acronyms, or other entities.

for word in doc:
    print(f"{word} is like number? {word.like_num}")
Apple is like number? False
is is like number? False
looking is like number? False
at is like number? False
buying is like number? False
a is like number? False
U.K. is like number? False
startup is like number? False
for is like number? False
$ is like number? False
1 is like number? True
billion is like number? True
import spacy
from spacy import displacy

nlp = spacy.load('en')
doc = nlp(u"This is a sentance")
displacy.serve(doc, style="dep")
/home/gao/anaconda3/lib/python3.7/runpy.py:193: UserWarning: [W011] It looks like you're calling displacy.serve from within a Jupyter notebook or a similar environment. This likely means you're already running a local web server, so there's no need to make displaCy start another one. Instead, you should be able to replace displacy.serve with displacy.render to show the visualization.
  "__main__", mod_spec)
<!DOCTYPE html> displaCy
This DET is AUX a DET sentance NOUN nsubj det attr
Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.

Note, you will have to manually stop the above cell

print(doc)
This is a sentance

The following code shows how to reduce words to their stems. Here the sentence words are reduced to their most basic form. For example, "striped" to "stripe."

import spacy

# Initialize spacy 'en' model, keeping only tagger 
# component needed for lemmatization
nlp = spacy.load('en', disable=['parser', 'ner'])

sentence = "The striped bats are hanging on their feet for best"

# Parse the sentence using the loaded 'en' model object `nlp`
doc = nlp(sentence)

# Extract the lemma for each token and join

" ".join([token.lemma_ for token in doc])
'the stripe bat be hang on -PRON- foot for good'
from spacy.lang.en.stop_words import STOP_WORDS

print(STOP_WORDS)
{'moreover', 'does', 'becomes', 'though', 'done', 'often', 'all', 'next', 'sometime', 'show', 'your', 'forty', 'am', 'on', 'however', 'empty', '’m', 'again', 'have', 'up', 'six', 'any', 'ours', 'may', 'mine', 'not', 'upon', 'top', 'twenty', 'please', 'latter', 'noone', 'this', 'make', 'former', 'wherein', 'hereupon', 'nevertheless', "'ll", 'less', 'nowhere', 'side', 'via', 'whatever', '’s', 'becoming', 'onto', 'by', 'being', 'n‘t', 'should', 'themselves', 'almost', 'rather', 'nor', 'once', 'hence', 'few', 'unless', 'along', 'off', 'everyone', 'put', 'fifty', 'one', 'hereby', 'neither', 'anyhow', 'whom', '‘ve', 'it', 'give', 'seemed', '‘s', 'or', 'first', 'is', "'ve", 'everything', 'per', 'front', 'whose', 'whoever', 'three', '’re', 'just', 'could', 'beyond', 'none', 'below', 'you', 'thereupon', 'wherever', 'full', 'a', 'whereupon', 'go', 'then', 'although', 'has', 'yet', 'we', 'call', 'something', 'ten', 'using', 'anything', 'until', 'two', 'but', '‘d', 'now', 'amongst', 'serious', 'if', 'already', 'some', 'me', 'their', 'latterly', 'part', 'further', 'between', 'down', 'get', 'namely', 'more', 'nothing', 'do', 'back', 'anywhere', 'hers', 'become', 'there', 'always', 'eight', 'anyway', 'sixty', '’ll', 'around', 'alone', 'who', 'move', 'over', 'well', 'yourself', 'in', "'d", 'else', 'about', 'name', 'without', 'therefore', 'thence', 'anyone', '‘m', 'least', 'had', "'m", 'see', 'last', 'beside', 'i', 'cannot', 're', 'she', 'therein', 'made', 'must', 'own', 'they', 'became', 'are', 'other', 'at', 'someone', 'never', 'while', 'here', 'when', 'meanwhile', 'each', 'ever', 'his', 'five', 'thru', 'somewhere', 'itself', 'what', 'only', 'than', 'very', 'under', 'many', 'whole', '’d', 'say', 'together', 'most', 'seeming', 'ca', 'where', '‘ll', 'eleven', 'among', 'our', 'otherwise', 'of', 'out', 'myself', 'keep', 'her', 'might', 'really', 'why', 'an', 'against', 'him', 'thereby', 'were', 'twelve', 'towards', "n't", 'can', 'so', 'also', 'whither', 'hundred', 'seems', 'thereafter', 'whereby', 'behind', 'whether', 'ourselves', 'formerly', 'either', 'afterwards', 'its', 'various', 'whereafter', 'mostly', 'doing', 'those', 'to', 'nobody', 'perhaps', 'with', 'too', 'these', 'seem', 'toward', 'third', 'into', 'be', 'bottom', 'the', 'enough', 'amount', 'four', 'regarding', 'which', 'even', 'before', 'them', 'same', 'after', 'that', 'will', 'would', 'hereafter', 'elsewhere', 'through', 'how', 'whence', '‘re', 'above', 'take', 'indeed', 'whereas', 'from', 'himself', 'did', 'quite', 'herein', 'he', 'yours', 'was', 'because', 'herself', 'us', 'thus', 'during', 'everywhere', 'been', "'re", 'another', 'no', 'several', 'much', 'due', 'throughout', 'within', 'still', 'except', 'n’t', 'as', 'my', 'whenever', 'fifteen', 'besides', 'sometimes', 'used', 'nine', "'s", 'across', 'somehow', 'yourselves', 'both', 'others', 'for', 'every', 'such', 'and', 'since', 'beforehand', '’ve'}