forked from BojianHou/FACIMS
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathevaluate.py
More file actions
106 lines (91 loc) · 5.63 KB
/
evaluate.py
File metadata and controls
106 lines (91 loc) · 5.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import numpy as np
from data.dataset import load_tadpole, load_credit, load_drug, load_toy_new
from utils.postprocessing import *
def load_dataset(dataset, seed=42):
if dataset == 'tadpole':
return load_tadpole(seed)
elif dataset == 'credit':
return load_credit(seed)
elif dataset == 'drug':
return load_drug(seed)
elif dataset == 'toy_new':
return load_toy_new(seed)
def result_show(y, y_hat, A_test, output_dim):
accuracy, b_acc, recall = compute_accuracy(y, y_hat, output_dim=output_dim)
if output_dim <= 2: # binary class
DP_score = demographic_parity_binary(y_hat, A_test)
EO_score = equalized_odds_binary(y, y_hat, A_test)
suf_gap_avg_score = standard_suf_gap_all_binary(y, y_hat, A_test)
# print("[Recall] The overall recall is: {:.4f}".format(recall))
else: # multiple class
DP_score = demographic_parity_multiclass(y_hat, A_test)
EO_score = equalized_odds_multiclass(y, y_hat, A_test)
suf_gap_avg_score = standard_suf_gap_all_multiclass(y, y_hat, A_test)
# for idx, rec in enumerate(recall):
# print("[Recall] The overall recall for class {}: {:.4f}".format(idx, rec))
# print("[Accuracy] The overall accuracy is: {:.4f}".format(accuracy))
# print("[Balanced Acc] The overall balanced acc is: {:.4f}".format(b_acc))
# print("[DP] The overall demographic parity is: {:.4f}".format(DP_score))
# print("[EO] The overall equalized odds is: {:.4f}".format(EO_score))
# print("[SufGAP] The overall sufficiency gap is: {:.4f}".format(suf_gap_avg_score))
return accuracy, b_acc, DP_score, EO_score, suf_gap_avg_score, recall
if __name__ == "__main__":
num_class = 2
for dataset in ['drug', 'credit', 'tadpole']: # 'tadpole', 'credit', 'drug'
print("Processing dataset {}...".format(dataset))
X_train, X_val, X_test, A_train, \
A_val, A_test, y_train, y_val, y_test = load_dataset(dataset)
if dataset == 'drug':
num_class = 4
for method in [1, 11, 2, 3, 8]: # 1, 11, 2, 3, 8
print("method {}...".format(method))
for lr_prior in [0.1, 0.01]: # 0.1 0.01 0.001
print("learning rate for prior model {}".format(lr_prior))
for lr_post in [0.4, 0.1, 0.01]: # 0.4 0.1 0.01
print("learning rate for post model {}".format(lr_post))
acc_list, b_acc_list, dp_list, eo_list, sg_list, recall_list = [], [], [], [], [], []
for seed in [0, 42, 666, 777, 1009]:
predict = np.load('./npy/{}/prediction_method_{}_seed_{}_lr_prior_{}_lr_post_{}.npy'
.format(dataset, method, seed, lr_prior, lr_post))
acc, b_acc, dp, eo, sg, recall = \
result_show(y_test, predict, A_test, num_class)
acc_list.append(acc)
b_acc_list.append(b_acc)
dp_list.append(dp)
eo_list.append(eo)
sg_list.append(sg)
recall_list.append(recall)
print('===============dataset {}, method {}, lr_prior {}, lr_post {}=================='
.format(dataset, method, lr_prior, lr_post))
for i in range(num_class):
print('Recall {} Mean±Std {:.4f}±{:.4f}'
.format(i, np.mean(np.array(recall_list)[:, i]), np.std(np.array(recall_list)[:, i])))
print('ACC Mean±Std {:.4f}±{:.4f}'.format(np.mean(acc_list), np.std(acc_list)))
print('BACC Mean±Std {:.4f}±{:.4f}'.format(np.mean(b_acc_list), np.std(b_acc_list)))
print('DP Mean±Std {:.4f}±{:.4f}'.format(np.mean(dp_list), np.std(dp_list)))
print('EO Mean±Std {:.4f}±{:.4f}'.format(np.mean(eo_list), np.std(eo_list)))
print('SG Mean±Std {:.4f}±{:.4f}'.format(np.mean(sg_list), np.std(sg_list)))
for method in [7, 9]: # 1 2 3 8
print("method {}...".format(method))
acc_list, b_acc_list, dp_list, eo_list, sg_list, recall_list = [], [], [], [], [], []
for seed in [0, 42, 666, 777, 1009]:
predict = np.load('./npy/{}/prediction_method_{}_seed_{}_lr_0.01.npy'
.format(dataset, method, seed))
acc, b_acc, dp, eo, sg, recall = \
result_show(y_test, predict, A_test, num_class)
acc_list.append(acc)
b_acc_list.append(b_acc)
dp_list.append(dp)
eo_list.append(eo)
sg_list.append(sg)
recall_list.append(recall)
print('===============dataset {}, method {}, lr 0.01=================='
.format(dataset, method))
for i in range(num_class):
print('Recall {} Mean±Std {:.4f}±{:.4f}'
.format(i, np.mean(np.array(recall_list)[:, i]), np.std(np.array(recall_list)[:, i])))
print('ACC Mean±Std {:.4f}±{:.4f}'.format(np.mean(acc_list), np.std(acc_list)))
print('BACC Mean±Std {:.4f}±{:.4f}'.format(np.mean(b_acc_list), np.std(b_acc_list)))
print('DP Mean±Std {:.4f}±{:.4f}'.format(np.mean(dp_list), np.std(dp_list)))
print('EO Mean±Std {:.4f}±{:.4f}'.format(np.mean(eo_list), np.std(eo_list)))
print('SG Mean±Std {:.4f}±{:.4f}'.format(np.mean(sg_list), np.std(sg_list)))