-
Notifications
You must be signed in to change notification settings - Fork 1
/
cross_val.py
64 lines (49 loc) · 2.12 KB
/
cross_val.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import (cross_val_score, KFold)
from sklearn.metrics import mean_squared_error
if __name__ == "__main__":
dataset = pd.read_csv('./datasets/felicidad.csv')
#X = dataset.drop(['country', 'score'], axis=1)
#y = dataset['score']
data = dataset.drop(["country","score"],axis=1)
targets = dataset["score"]
# ---- IMPLEMENTACION BÁSICA ----
# seleccion de modelo
model = DecisionTreeRegressor()
# Implementacion básica de Cross validation para calcular un score
score = cross_val_score(model, data, targets,
cv=3,
scoring='neg_mean_squared_error')
# Cada elemento del array score es el error medio cuadratico
print('*'*64)
print('---- IMPLEMENTACION BÁSICA ----')
print('*'*64)
print("Los tres MSE fueron: ", score)
print('='*32)
print(np.mean(score))
print('='*32)
print("El MSE promedio fue: ", np.abs(np.mean(score)))
# ---- IMPLEMENTACION DETALLADA ----
print('*'*64)
print('---- IMPLEMENTACION DETALLADA ----')
print('*'*64)
kf = KFold(n_splits=3, shuffle=True, random_state=42)
mse_values = []
for train, test in kf.split(data):
x_train = pd.DataFrame(columns=list(data),index=range(len(train)))
x_test = pd.DataFrame(columns=list(data),index=range(len(test)))
y_train = pd.DataFrame(columns=['score'],index=range(len(train)))
y_test = pd.DataFrame(columns=['score'],index=range(len(test)))
for i in range(len(train)):
x_train.iloc[i] = data.iloc[train[i]]
y_train.iloc[i] = targets.iloc[train[i]]
for j in range(len(test)):
x_test.iloc[j] = data.iloc[test[j]]
y_test.iloc[j] = targets.iloc[test[j]]
model = DecisionTreeRegressor().fit(x_train,y_train)
predict = model.predict(x_test)
mse_values.append(mean_squared_error(y_test,predict))
print("Los tres MSE fueron: ",mse_values)
print("El MSE promedio fue: ", np.mean(mse_values))