CoCalc Public Filesnltk-stopwords.ipynb
Author: Harald Schilly
Description: nltk stopwords
Compute Environment: Ubuntu 18.04 (Deprecated)
In [1]:
from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords
In [2]:
data = "All work and no play makes jack dull boy. All work and no play makes jack a dull boy." stopWords = set(stopwords.words('english')) words = word_tokenize(data) wordsFiltered = []
In [3]:
for w in words: if w not in stopWords: wordsFiltered.append(w) print(wordsFiltered)
['All', 'work', 'play', 'makes', 'jack', 'dull', 'boy', '.', 'All', 'work', 'play', 'makes', 'jack', 'dull', 'boy', '.']
