-
Notifications
You must be signed in to change notification settings - Fork 0
/
pred_model.py
97 lines (75 loc) · 3.8 KB
/
pred_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
class PredictUsage:
def __init__(self) -> None:
self.model = None
self.n_splits= 2
#Hyperparameters to be searched
self.n_estimators_list=[100, 200, 300]
self.max_depths = [3, 5, 10]
self.learning_rates= [0.1, 0.05, 0.01]
def fit(self, X_trainVal: pd.DataFrame, y_trainVal: np.array) -> None:
cv = KFold(n_splits = self.n_splits, shuffle =True, random_state = 42)
scores = []
rownames = []
hyperparamSetting = []
for n_estimators in self.n_estimators_list:
for max_depth in self.max_depths:
for learning_rate in self.learning_rates:
hyperparamSetting.append((n_estimators,max_depth,learning_rate))
for tup in hyperparamSetting:
n_estimators,max_depth,learning_rate = tup
mean_MSE = 0
mean_MAE = 0
mean_R2 = 0
mean_MAPE = 0
for train_idx, val_dix in cv.split(X_trainVal,y_trainVal):
X_train = X_trainVal.iloc[train_idx,:]
X_val =X_trainVal.iloc[val_dix, : ]
y_train = y_trainVal.iloc[train_idx]
y_val = y_trainVal.iloc[val_dix]
model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth)
model.fit(X_train,y_train)
y_val_hat = model.predict(X_val)
mean_MSE = mean_MSE + mean_squared_error(y_val, y_val_hat)
mean_MAE = mean_MAE + mean_absolute_error(y_val, y_val_hat)
mean_R2 = mean_R2 + r2_score(y_val, y_val_hat)
mape = np.abs((y_val - y_val_hat) / y_val) * 100
mape[np.isinf(mape)] = 0 # Replace infinite values with 0
mean_MAPE = mean_MAPE + np.mean(mape)
scores.append((mean_MSE/self.n_splits, mean_MAE/self.n_splits, mean_R2/self.n_splits, mean_MAPE/self.n_splits))
rownames.append(tup)
colnames = ['MSE',"MAE","R2","MAPE"]
df_summary = pd.DataFrame(scores, index=rownames, columns=colnames)
opt_n_estimators,opt_max_depth,opt_learning_rate = df_summary['MAE'].idxmin()
#MAPE를 기준으로 optimal hyperparameter 결정
print("opt_n_estimators,opt_max_depth,opt_learning_rate: ",df_summary['MAE'].idxmin())
self.model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=opt_n_estimators, learning_rate=opt_learning_rate, max_depth=opt_max_depth)
#fitting the model with optimal hyperparameter
self.model.fit(X_trainVal,y_trainVal)
# Print feature importances
print(self.model.feature_importances_)
def predict(self, X_test: pd.DataFrame) -> np.array:
return self.model.predict(X_test)
# Calculate MAPE
def mape(self, y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
# Calculate RMSE
def rmse(self, y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.sqrt(np.mean((y_true - y_pred) ** 2))
# Calculate MAE
def mae(self, y_true, y_pred):
y_true, y_pred = np.array(y_true), np.array(y_pred)
return np.mean(np.abs(y_true - y_pred))
# Calcuate error by all methods
def calculate_error(self, y_true, y_pred):
mae = self.mae(y_true, y_pred)
rmse = self.rmse(y_true, y_pred)
mape = self.mape(y_true, y_pred)
return {'MAE': mae, 'RMSE': rmse, 'MAPE': mape}