forked from dmlc/dgl
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheval.py
96 lines (77 loc) · 2.67 KB
/
eval.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""
Code adapted from https://github.com/CRIPAC-DIG/GRACE
Linear evaluation on learned node embeddings
"""
import functools
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import normalize, OneHotEncoder
def repeat(n_times):
def decorator(f):
@functools.wraps(f)
def wrapper(*args, **kwargs):
results = [f(*args, **kwargs) for _ in range(n_times)]
statistics = {}
for key in results[0].keys():
values = [r[key] for r in results]
statistics[key] = {
"mean": np.mean(values),
"std": np.std(values),
}
print_statistics(statistics, f.__name__)
return statistics
return wrapper
return decorator
def prob_to_one_hot(y_pred):
ret = np.zeros(y_pred.shape, np.bool)
indices = np.argmax(y_pred, axis=1)
for i in range(y_pred.shape[0]):
ret[i][indices[i]] = True
return ret
def print_statistics(statistics, function_name):
print(f"(E) | {function_name}:", end=" ")
for i, key in enumerate(statistics.keys()):
mean = statistics[key]["mean"]
std = statistics[key]["std"]
print(f"{key}={mean:.4f}+-{std:.4f}", end="")
if i != len(statistics.keys()) - 1:
print(",", end=" ")
else:
print()
@repeat(3)
def label_classification(
embeddings, y, train_mask, test_mask, split="random", ratio=0.1
):
X = embeddings.detach().cpu().numpy()
Y = y.detach().cpu().numpy()
Y = Y.reshape(-1, 1)
onehot_encoder = OneHotEncoder(categories="auto").fit(Y)
Y = onehot_encoder.transform(Y).toarray().astype(np.bool)
X = normalize(X, norm="l2")
if split == "random":
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=1 - ratio
)
elif split == "public":
X_train = X[train_mask]
X_test = X[test_mask]
y_train = Y[train_mask]
y_test = Y[test_mask]
logreg = LogisticRegression(solver="liblinear")
c = 2.0 ** np.arange(-10, 10)
clf = GridSearchCV(
estimator=OneVsRestClassifier(logreg),
param_grid=dict(estimator__C=c),
n_jobs=8,
cv=5,
verbose=0,
)
clf.fit(X_train, y_train)
y_pred = clf.predict_proba(X_test)
y_pred = prob_to_one_hot(y_pred)
micro = f1_score(y_test, y_pred, average="micro")
macro = f1_score(y_test, y_pred, average="macro")
return {"F1Mi": micro, "F1Ma": macro}