-
Notifications
You must be signed in to change notification settings - Fork 1
/
plots.py
122 lines (110 loc) · 4.62 KB
/
plots.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
import seaborn as sns
# Distribution of data
def distribution(X):
n_columns = len(X.columns)
fig, axes = plt.subplots(1,n_columns,figsize=(20,2),constrained_layout=True)
for name, axis in zip(X.columns, axes):
ax = sns.kdeplot(data=X, x=name, ax=axis, fill=True).set(xlim=(0, np.max(X[name])))
fig.show()
# print the percentile of feature compared to a specific a dataset
def percentile_score(shap_value,X):
df = pd.DataFrame(columns=X.columns)
n_columns = len(X.columns)
fig, axes = plt.subplots(1,n_columns,figsize=(20,2),constrained_layout=True)
for name, value, axis in zip(X.columns, shap_value.data, axes):
score=stats.percentileofscore(X[name],value)
ax = sns.kdeplot(data=X, x=name, ax=axis)
x, y = ax.lines[0].get_data()
width = (np.max(x) - np.min(x))/100
ax.bar(value, np.interp(value, x, y), width=width,color='r')
ax.fill_between(x[x < value], y[x < value], alpha=0.5)
plt.xlim([np.min(x), np.max(x)])
ax.text(0.8, 0.8, f'PCTL: {score:.2f}%\n{name}: {value:.2f}', color='black',
horizontalalignment='center',
verticalalignment='center',
transform = ax.transAxes)
# Plot barh
def prob_barh(data):
with plt.style.context(('ggplot', 'seaborn')):
plt.figure(figsize=(5, 2))
#print("Probability of no lateral spreading: {}\n Probability of lateral spreading: {}" % format(model.predict_proba(X_train)[idx][0], model.predict_proba(X_train)[idx][1]))
plt.barh(["No lateral spreading", "Lateral spreading"], data, 0.5, color=['lightblue', 'orange'])
plt.text(data[0], "No lateral spreading", "{:.2f}".format(data[0]))
plt.text(data[1], "Lateral spreading", "{:.2f}".format(data[1]))
plt.xlim([0,1])
plt.grid()
plt.title("Prediction probabilities")
plt.ylabel("", labelpad=0.1)
plt.show()
return None
# plot prediction probability
def pred_prob(data1, data2=None, labels=None):
with plt.style.context(('ggplot', 'seaborn')):
sns.set(font_scale = 1.25)
sns.set_context(rc = {'patch.linewidth': 0.0})
#sns.set(rc={'axes.facecolor':'white', 'figure.facecolor':'lightgray'})
sns.histplot(data1, kde=True,
bins=int(25), color = 'dimgray',
edgecolor='k').set(xlim=(0,1))
sns.histplot(data2, kde=True,
bins=int(25), color = 'darkgray',
edgecolor='lightgray').set(xlim=(0,1))
if labels is not None:
plt.legend(labels)
plt.xlabel('predictive probability')
plt.ylabel('count')
plt.show()
# Function to Plot confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='Observed',
xlabel='Predicted')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax