import math
from collections import Counter
import timeit
import numpy as np
def eta(data, unit='natural'):
base = {
'shannon' : 2.,
'natural' : math.exp(1),
'hartley' : 10.
}
if len(data) <= 1:
return 0
counts = Counter()
for d in data:
counts[d] += 1
ent = 0
probs = [float(c) / len(data) for c in counts.values()]
for p in probs:
if p > 0.:
ent -= p * math.log(p, base[unit])
return ent
eta(['Canada', 'is', 'the', 'sweetest', 'country'])
1.6094379124341005
repeat_number = 1000
e = timeit.repeat(
stmt='''eta(labels)''',
setup='''labels=[1,3,5,2,3,5,3,2,1,3,4,5];from __main__ import eta''',
repeat=3,
number=repeat_number)
print('Method: {}, Avg.: {:.6f}'.format("eta", np.array(e).mean()))
Method: eta, Avg.: 0.010078