-
Notifications
You must be signed in to change notification settings - Fork 75
/
common_defs.py
104 lines (69 loc) · 2.38 KB
/
common_defs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
"imports and definitions shared by various defs files"
import numpy as np
from math import log, sqrt
from time import time
from pprint import pprint
from sklearn.metrics import roc_auc_score as AUC, log_loss, accuracy_score as accuracy
from sklearn.metrics import mean_squared_error as MSE, mean_absolute_error as MAE
try:
from hyperopt import hp
from hyperopt.pyll.stochastic import sample
except ImportError:
print "In order to achieve operational capability, this programme requires hyperopt to be installed (pip install hyperopt), unless you make get_params() use something else."
#
# handle floats which should be integers
# works with flat params
def handle_integers( params ):
new_params = {}
for k, v in params.items():
if type( v ) == float and int( v ) == v:
new_params[k] = int( v )
else:
new_params[k] = v
return new_params
###
def train_and_eval_sklearn_classifier( clf, data ):
x_train = data['x_train']
y_train = data['y_train']
x_test = data['x_test']
y_test = data['y_test']
clf.fit( x_train, y_train )
try:
p = clf.predict_proba( x_train )[:,1] # sklearn convention
except IndexError:
p = clf.predict_proba( x_train )
ll = log_loss( y_train, p )
auc = AUC( y_train, p )
acc = accuracy( y_train, np.round( p ))
print "\n# training | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )
#
try:
p = clf.predict_proba( x_test )[:,1] # sklearn convention
except IndexError:
p = clf.predict_proba( x_test )
ll = log_loss( y_test, p )
auc = AUC( y_test, p )
acc = accuracy( y_test, np.round( p ))
print "# testing | log loss: {:.2%}, AUC: {:.2%}, accuracy: {:.2%}".format( ll, auc, acc )
#return { 'loss': 1 - auc, 'log_loss': ll, 'auc': auc }
return { 'loss': ll, 'log_loss': ll, 'auc': auc }
###
# "clf", even though it's a regressor
def train_and_eval_sklearn_regressor( clf, data ):
x_train = data['x_train']
y_train = data['y_train']
x_test = data['x_test']
y_test = data['y_test']
clf.fit( x_train, y_train )
p = clf.predict( x_train )
mse = MSE( y_train, p )
rmse = sqrt( mse )
mae = MAE( y_train, p )
print "\n# training | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae )
#
p = clf.predict( x_test )
mse = MSE( y_test, p )
rmse = sqrt( mse )
mae = MAE( y_test, p )
print "# testing | RMSE: {:.4f}, MAE: {:.4f}".format( rmse, mae )
return { 'loss': rmse, 'rmse': rmse, 'mae': mae }