Simple Text Processing

import re
from nltk.tokenize import word_tokenize
from collections import Counter
from nltk.corpus import stopwords
text = ("""The cat is in the box. The cat likes the box. The box is over the cat.""")
tokens = [w for w in word_tokenize(text.lower()) if (w.isalpha() and len(w) > 2)  ]
print(tokens)
['the', 'cat', 'the', 'box', 'the', 'cat', 'likes', 'the', 'box', 'the', 'box', 'over', 'the', 'cat']