Bag of Words

import re
from nltk.tokenize import word_tokenize
from collections import Counter
word_counter = Counter(word_tokenize("""The cat is in the box. The cat likes the box. The box is over the cat."""))
print(word_counter)
Counter({'The': 3, 'cat': 3, 'the': 3, 'box': 3, '.': 3, 'is': 2, 'in': 1, 'likes': 1, 'over': 1})
print(word_counter.most_common(4))
[('The', 3), ('cat', 3), ('the', 3), ('box', 3)]