-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmetrics.py
144 lines (110 loc) · 4.04 KB
/
metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
'''
Implementation of different metrics used for evaluating CESI results
C: Clusters produced by algorithm
E: Gold standard cluster
'''
import itertools, sys
def macroPrecision(C_clust2ele, E_ele2clust):
num_prec = 0
for _, cluster in C_clust2ele.items():
isFirst = True
res = set()
for ele in cluster:
if ele not in E_ele2clust:
# sys.stdout.write('.')
continue
if isFirst:
res = E_ele2clust[ele]
isFirst = False
continue
res = res.intersection(E_ele2clust[ele])
if len(res) == 1: num_prec += 1
#else:print('res:', len(res), res)
elif len(res) > 1: print( 'ERROR In Clustering micro!!!')
if len(C_clust2ele) == 0: return 0
return float(num_prec) / float(len(C_clust2ele))
def microPrecision(C_clust2ele, E_ele2clust):
num_prec = 0
total = 0
for _, cluster in C_clust2ele.items():
freq_map = {}
total += len(cluster)
for ent in cluster:
if ent not in E_ele2clust:
# sys.stdout.write('.')
continue
for ele in E_ele2clust[ent]:
freq_map[ele] = freq_map.get(ele, 0)
freq_map[ele] += 1
max_rep = 0
for k, v in freq_map.items(): max_rep = max(max_rep, v)
num_prec += max_rep
if total == 0: return 0
return float(num_prec) / float(total)
def pairPrecision(C_clust2ele, E_ele2clust):
num_hit = 0
num_pairs = 0
for _, cluster in C_clust2ele.items():
all_pairs = list(itertools.combinations(cluster, 2))
num_pairs += len(all_pairs)
for e1, e2 in all_pairs:
if e1 not in E_ele2clust or e2 not in E_ele2clust:
# sys.stdout.write('.')
continue
res = E_ele2clust[e1].intersection(E_ele2clust[e2])
if len(res) == 1: num_hit += 1
# elif len(res) > 1: print( 'ERROR In Clustering pairwise!!!')
if num_pairs == 0: return 0
return float(num_hit) / float(num_pairs)
def pairwiseMetric(C_clust2ele, E_ele2clust, E_clust2ent):
num_hit = 0
num_C_pairs = 0
num_E_pairs = 0
for _, cluster in C_clust2ele.items():
all_pairs = list(itertools.combinations(cluster, 2))
num_C_pairs += len(all_pairs)
for e1, e2 in all_pairs:
if e1 in E_ele2clust and e2 in E_ele2clust and len(E_ele2clust[e1].intersection(E_ele2clust[e2])) > 0: num_hit += 1
for rep, cluster in E_clust2ent.items():
num_E_pairs += len(list(itertools.combinations(cluster, 2)))
if num_C_pairs == 0 or num_E_pairs == 0:
return 1e-6, 1e-6
# print( num_hit, num_C_pairs, num_E_pairs)
return float(num_hit) / float(num_C_pairs), float(num_hit) / float(num_E_pairs)
def calcF1(prec, recall):
if prec + recall == 0: return 0
return 2 * (prec * recall) / (prec + recall)
def microF1(C_ele2clust, C_clust2ele, E_ele2clust, E_clust2ent):
micro_prec = microPrecision(C_clust2ele, E_ele2clust)
micro_recall = microPrecision(E_clust2ent, C_ele2clust)
micro_f1 = calcF1(micro_prec, micro_recall)
return micro_f1
def macroF1(C_ele2clust, C_clust2ele, E_ele2clust, E_clust2ent):
macro_prec = macroPrecision(C_clust2ele, E_ele2clust)
macro_recall = macroPrecision(E_clust2ent, C_ele2clust)
macro_f1 = calcF1(macro_prec, macro_recall)
return macro_f1
def pairF1(C_ele2clust, C_clust2ele, E_ele2clust, E_clust2ent):
pair_prec,pair_recall = pairwiseMetric(C_clust2ele, E_ele2clust, E_clust2ent)
pair_f1 = calcF1(pair_prec, pair_recall)
return pair_f1
def evaluate(C_ele2clust, C_clust2ele, E_ele2clust, E_clust2ent):
macro_prec = macroPrecision(C_clust2ele, E_ele2clust)
macro_recall = macroPrecision(E_clust2ent, C_ele2clust)
macro_f1 = calcF1(macro_prec, macro_recall)
micro_prec = microPrecision(C_clust2ele, E_ele2clust)
micro_recall = microPrecision(E_clust2ent, C_ele2clust)
micro_f1 = calcF1(micro_prec, micro_recall)
pair_prec,pair_recall = pairwiseMetric(C_clust2ele, E_ele2clust, E_clust2ent)
pair_f1 = calcF1(pair_prec, pair_recall)
return {
'macro_prec': round(macro_prec, 4),
'macro_recall': round(macro_recall, 4),
'macro_f1': round(macro_f1, 4),
'micro_prec': round(micro_prec, 4),
'micro_recall': round(micro_recall, 4),
'micro_f1': round(micro_f1, 4),
'pair_prec': round(pair_prec, 4),
'pair_recall': round(pair_recall, 4),
'pair_f1': round(pair_f1, 4),
}