python – NLTK word tokenizer crashes dosent even work what should i do it gives a big error i dont understand?

So I was working on my first sentiment analysis project and I tried to use a word tokenizer by using the function nltk.word_tokenizer(example)

also used another syntax didn’t work and got an error, searched for the error but realized it might be an issue with the library itself kindly try to solve this problem

example = df['Text'][50]
print(example)
type(example)
nltk.word_tokenize(example)
nltk.word_tokenize()
{
    "name": "LookupError",
    "message": "n**********************************************************************n  Resource u001b[93mpunktu001b[0m not found.n  Please use the NLTK Downloader to obtain the resource:nn  u001b[31m>>> import nltkn  >>> nltk.download('punkt')n  u001b[0mn  For more information see: https://www.nltk.org/data.htmlnn  Attempted to load u001b[93mtokenizers/punkt/english.pickleu001b[0mnn  Searched in:n    - 'C:\\Users\\sudhu/nltk_data'n    - 'c:\\Users\\sudhu\\AppData\\Local\\Programs\\Python\\Python310\\nltk_data'n    - 'c:\\Users\\sudhu\\AppData\\Local\\Programs\\Python\\Python310\\share\\nltk_data'n    - 'c:\\Users\\sudhu\\AppData\\Local\\Programs\\Python\\Python310\\lib\\nltk_data'n    - 'C:\\Users\\sudhu\\AppData\\Roaming\\nltk_data'n    - 'C:\\nltk_data'n    - 'D:\\nltk_data'n    - 'E:\\nltk_data'n    - ''n**********************************************************************n",
    "stack": "u001b[1;31m---------------------------------------------------------------------------u001b[0mnu001b[1;31mLookupErroru001b[0m                               Traceback (most recent call last)nu001b[1;32md:\practiceprograms\sentiment_analysis.ipynb Cell 9u001b[0m in u001b[0;36m<cell line: 1>u001b[1;34m()u001b[0mnu001b[1;32m----> <a href="vscode-notebook-cell:/d%3A/practiceprograms/sentiment_analysis.ipynb#ch0000010?line=0">1</a>u001b[0m nltku001b[39m.u001b[39;49mword_tokenize(example)nu001b[0;32m      <a href="vscode-notebook-cell:/d%3A/practiceprograms/sentiment_analysis.ipynb#ch0000010?line=1">2</a>u001b[0m nltku001b[39m.u001b[39mword_tokenize()nnFile u001b[1;32mc:\Users\sudhu\AppData\Local\Programs\Python\Python310\lib\site-packages\nltk\tokenize\__init__.py:129u001b[0m, in u001b[0;36mword_tokenizeu001b[1;34m(text, language, preserve_line)u001b[0mnu001b[0;32m    114u001b[0m u001b[39mdefu001b[39;00m u001b[39mword_tokenizeu001b[39m(text, languageu001b[39m=u001b[39mu001b[39m"u001b[39mu001b[39menglishu001b[39mu001b[39m"u001b[39m, preserve_lineu001b[39m=u001b[39mu001b[39mFalseu001b[39;00m):nu001b[0;32m    115u001b[0m     u001b[39m"""u001b[39;00mnu001b[0;32m    116u001b[0m u001b[39m    Return a tokenized copy of *text*,u001b[39;00mnu001b[0;32m    117u001b[0m u001b[39m    using NLTK's recommended word tokenizeru001b[39;00mnu001b[1;32m   (...)u001b[0mnu001b[0;32m    127u001b[0m u001b[39m    :type preserve_line: boolu001b[39;00mnu001b[0;32m    128u001b[0m u001b[39m    """u001b[39;00mnu001b[1;32m--> 129u001b[0m     sentences u001b[39m=u001b[39m [text] u001b[39mifu001b[39;00m preserve_line u001b[39melseu001b[39;00m sent_tokenize(text, language)nu001b[0;32m    130u001b[0m     u001b[39mreturnu001b[39;00m [nu001b[0;32m    131u001b[0m         token u001b[39mforu001b[39;00m sent u001b[39minu001b[39;00m sentences u001b[39mforu001b[39;00m token u001b[39minu001b[39;00m _treebank_word_tokenizeru001b[39m.u001b[39mtokenize(sent)nu001b[0;32m    132u001b[0m     ]nnFile u001b[1;32mc:\Users\sudhu\AppData\Local\Programs\Python\Python310\lib\site-packages\nltk\tokenize\__init__.py:106u001b[0m, in u001b[0;36msent_tokenizeu001b[1;34m(text, language)u001b[0mnu001b[0;32m     96u001b[0m u001b[39mdefu001b[39;00m u001b[39msent_tokenizeu001b[39m(text, languageu001b[39m=u001b[39mu001b[39m"u001b[39mu001b[39menglishu001b[39mu001b[39m"u001b[39m):nu001b[0;32m     97u001b[0m     u001b[39m"""u001b[39;00mnu001b[0;32m     98u001b[0m u001b[39m    Return a sentence-tokenized copy of *text*,u001b[39;00mnu001b[0;32m     99u001b[0m u001b[39m    using NLTK's recommended sentence tokenizeru001b[39;00mnu001b[1;32m   (...)u001b[0mnu001b[0;32m    104u001b[0m u001b[39m    :param language: the model name in the Punkt corpusu001b[39;00mnu001b[0;32m    105u001b[0m u001b[39m    """u001b[39;00mnu001b[1;32m--> 106u001b[0m     tokenizer u001b[39m=u001b[39m load(u001b[39mfu001b[39;49mu001b[39m"u001b[39;49mu001b[39mtokenizers/punkt/u001b[39;49mu001b[39m{u001b[39;49;00mlanguageu001b[39m}u001b[39;49;00mu001b[39m.pickleu001b[39;49mu001b[39m"u001b[39;49m)nu001b[0;32m    107u001b[0m     u001b[39mreturnu001b[39;00m tokenizeru001b[39m.u001b[39mtokenize(text)nnFile u001b[1;32mc:\Users\sudhu\AppData\Local\Programs\Python\Python310\lib\site-packages\nltk\data.py:750u001b[0m, in u001b[0;36mloadu001b[1;34m(resource_url, format, cache, verbose, logic_parser, fstruct_reader, encoding)u001b[0mnu001b[0;32m    747u001b[0m     u001b[39mprintu001b[39m(u001b[39mfu001b[39mu001b[39m"u001b[39mu001b[39m<<Loading u001b[39mu001b[39m{u001b[39;00mresource_urlu001b[39m}u001b[39;00mu001b[39m>>u001b[39mu001b[39m"u001b[39m)nu001b[0;32m    749u001b[0m u001b[39m# Load the resource.u001b[39;00mnu001b[1;32m--> 750u001b[0m opened_resource u001b[39m=u001b[39m _open(resource_url)nu001b[0;32m    752u001b[0m u001b[39mifu001b[39;00m u001b[39mformatu001b[39m u001b[39m==u001b[39m u001b[39m"u001b[39mu001b[39mrawu001b[39mu001b[39m"u001b[39m:nu001b[0;32m    753u001b[0m     resource_val u001b[39m=u001b[39m opened_resourceu001b[39m.u001b[39mread()nnFile u001b[1;32mc:\Users\sudhu\AppData\Local\Programs\Python\Python310\lib\site-packages\nltk\data.py:876u001b[0m, in u001b[0;36m_openu001b[1;34m(resource_url)u001b[0mnu001b[0;32m    873u001b[0m protocol, path_ u001b[39m=u001b[39m split_resource_url(resource_url)nu001b[0;32m    875u001b[0m u001b[39mifu001b[39;00m protocol u001b[39misu001b[39;00m u001b[39mNoneu001b[39;00m u001b[39moru001b[39;00m protocolu001b[39m.u001b[39mlower() u001b[39m==u001b[39m u001b[39m"u001b[39mu001b[39mnltku001b[39mu001b[39m"u001b[39m:nu001b[1;32m--> 876u001b[0m     u001b[39mreturnu001b[39;00m find(path_, path u001b[39m+u001b[39;49m [u001b[39m"u001b[39;49mu001b[39m"u001b[39;49m])u001b[39m.u001b[39mopen()nu001b[0;32m    877u001b[0m u001b[39melifu001b[39;00m protocolu001b[39m.u001b[39mlower() u001b[39m==u001b[39m u001b[39m"u001b[39mu001b[39mfileu001b[39mu001b[39m"u001b[39m:nu001b[0;32m    878u001b[0m     u001b[39m# urllib might not use mode="rb", so handle this one ourselves:u001b[39;00mnu001b[0;32m    879u001b[0m     u001b[39mreturnu001b[39;00m find(path_, [u001b[39m"u001b[39mu001b[39m"u001b[39m])u001b[39m.u001b[39mopen()nnFile u001b[1;32mc:\Users\sudhu\AppData\Local\Programs\Python\Python310\lib\site-packages\nltk\data.py:583u001b[0m, in u001b[0;36mfindu001b[1;34m(resource_name, paths)u001b[0mnu001b[0;32m    581u001b[0m sep u001b[39m=u001b[39m u001b[39m"u001b[39mu001b[39m*u001b[39mu001b[39m"u001b[39m u001b[39m*u001b[39m u001b[39m70u001b[39mnu001b[0;32m    582u001b[0m resource_not_found u001b[39m=u001b[39m u001b[39mfu001b[39mu001b[39m"u001b[39mu001b[39m\nu001b[39;00mu001b[39m{u001b[39;00msepu001b[39m}u001b[39;00mu001b[39m\nu001b[39;00mu001b[39m{u001b[39;00mmsgu001b[39m}u001b[39;00mu001b[39m\nu001b[39;00mu001b[39m{u001b[39;00msepu001b[39m}u001b[39;00mu001b[39m\nu001b[39;00mu001b[39m"u001b[39mnu001b[1;32m--> 583u001b[0m u001b[39mraiseu001b[39;00m u001b[39mLookupErroru001b[39;00m(resource_not_found)nnu001b[1;31mLookupErroru001b[0m: n**********************************************************************n  Resource u001b[93mpunktu001b[0m not found.n  Please use the NLTK Downloader to obtain the resource:nn  u001b[31m>>> import nltkn  >>> nltk.download('punkt')n  u001b[0mn  For more information see: https://www.nltk.org/data.htmlnn  Attempted to load u001b[93mtokenizers/punkt/english.pickleu001b[0mnn  Searched in:n    - 'C:\\Users\\sudhu/nltk_data'n    - 'c:\\Users\\sudhu\\AppData\\Local\\Programs\\Python\\Python310\\nltk_data'n    - 'c:\\Users\\sudhu\\AppData\\Local\\Programs\\Python\\Python310\\share\\nltk_data'n    - 'c:\\Users\\sudhu\\AppData\\Local\\Programs\\Python\\Python310\\lib\\nltk_data'n    - 'C:\\Users\\sudhu\\AppData\\Roaming\\nltk_data'n    - 'C:\\nltk_data'n    - 'D:\\nltk_data'n    - 'E:\\nltk_data'n    - ''n**********************************************************************n"
}

Leave a Comment