Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cv csvs paper #191

Open
wants to merge 18 commits into
base: paper
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,8 @@ dmypy.json

# slurm
.out
.err

# data and reports
data/
reports/
251 changes: 251 additions & 0 deletions src/5_train_models/DeepRank/classMetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,251 @@
import warnings
from sklearn import metrics
import numpy as np
# info
# https://en.wikipedia.org/wiki/Precision_and_recall


def sensitivity(yp, yt):
"""sensitivity, recall or true positive rate (TPR)

Args:
yp (array): predictions
yt (array): targets

Returns:
float: sensitivity value
"""
tp = true_positive(yp, yt)
p = positive(yt)
if p == 0:
tpr = float('inf')
warnings.warn(
f'Number of positive cases is 0, '
f'TPR or sensitivity is assigned as inf')
else:
tpr = tp / p
return tpr


def specificity(yp, yt):
"""specificity, selectivity or true negative rate (TNR)

Args:
yp (array): predictions
yt (array): targets

Returns:
float: specificity value
"""
tn = true_negative(yp, yt)
n = negative(yt)
if n == 0:
warnings.warn(
f'Number of negative cases is 0, '
f'TNR or specificity is assigned as inf')
tnr = float('inf')
else:
tnr = tn / n
return tnr


def precision(yp, yt):
"""precision or positive predictive value (PPV)

Args:
yp (array): predictions
yt (array): targets

Returns:
float: precision value
"""
tp = true_positive(yp, yt)
fp = false_positive(yp, yt)
tp, fp = map(np.float64, [tp, fp])
if tp + fp == 0:
warnings.warn(
f'Total number of true positive and false positive cases is 0, '
f'PPV or precision is assigned as inf')
ppv = float('inf')
else:
ppv = tp / (tp + fp)
return ppv


def accuracy(yp, yt):
"""Accuracy.

Args:
yp (array): predictions
yt (array): targets

Returns:
float: accuracy value
"""
tp = true_positive(yp, yt)
tn = true_negative(yp, yt)
p = positive(yt)
n = negative(yt)
tp, tn, p, n = map(np.float64, [tp, tn, p, n])
acc = (tp + tn) / (p + n)
return acc


def F1(yp, yt):
"""F1 score.

Args:
yp (array): predictions
yt (array): targets

Returns:
float: F1 score
"""
tp = true_positive(yp, yt)
fp = false_positive(yp, yt)
fn = false_negative(yp, yt)
tp, fp, fn = map(np.float64, [tp, fp, fn])
f1 = 2 * tp / (2 * tp + fp + fn)
return f1

def mcc(yp, yt):
"""Matthews correlation coefficient (MCC)

Args:
yp (array): predictions
yt (array): targets

Returns:
float: MCC value
"""
tp = true_positive(yp, yt)
tn = true_negative(yp, yt)
fp = false_positive(yp, yt)
fn = false_negative(yp, yt)
tp, tn, fp, fn = map(np.float64, [tp, tn, fp, fn])

with np.errstate(invalid='raise'):
try:
mcc = (tp * tn - fp * fn) / np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
except FloatingPointError as e:
# if denominator is zero and causes an error, set it to 1 (source: https://en.wikipedia.org/wiki/Phi_coefficient)
mcc = (tp * tn - fp * fn) / 1

return mcc

def roc_auc(yp, yt):
"""compute roc auc with sklearn
Args:
yp (array): predictions
yt (array): targets
Returns:
float: roc auc
"""
return metrics.roc_auc_score(np.expand_dims(yt,1), yp)

def tpr_fpr_thresholds(yp, yt):
"""compute arrays of true positive rate and false positive rate
with sklearn can be used for plotting roc curves and computing roc auc

Args:
yp (ndarray): probabilities for all indices
yt (ndarray): true labels for all indices

Returns:
np.array: true positive rate for each threshold in [0, 0.001.., 1]
np.array: false positive rate for each threshold in [0, 0.001.., 1]
"""
fprs, tprs, _ = metrics.roc_curve(np.expand_dims(yt,1), yp)

return tprs, fprs

def rmse(yp, yt):
"""_summary_

Args:
yp (array): predictions
yt (array): targets

Returns:
float: Root Mean Squared Error (RMSE) score
"""
return np.sqrt(np.sum(((yp - yt)**2)/yp.size))

def true_positive(yp, yt):
"""number of true positive cases.

Args:
yp (array): predictions
yt (array): targets
"""
yp, yt = _to_bool(yp), _to_bool(yt)
tp = np.logical_and(yp, yt)
return(np.sum(tp))


def true_negative(yp, yt):
"""number of true negative cases.

Args:
yp (array): predictions
yt (array): targets
"""
yp, yt = _to_bool(yp), _to_bool(yt)
tn = np.logical_and(yp == False, yt == False)
return(np.sum(tn))


def false_positive(yp, yt):
"""number of false positive cases.

Args:
yp (array): predictions
yt (array): targets
"""
yp, yt = _to_bool(yp), _to_bool(yt)
fp = np.logical_and(yp, yt == False)
return(np.sum(fp))


def false_negative(yp, yt):
"""number of false false cases.

Args:
yp (array): predictions
yt (array): targets
"""
yp, yt = _to_bool(yp), _to_bool(yt)
fn = np.logical_and(yp == False, yt == True)
return(np.sum(fn))


def positive(yt):
"""The number of real positive cases.

Args:
yt (array): targets
"""
yt = _to_bool(yt)
return np.sum(yt)


def negative(yt):
"""The nunber of real negative cases.

Args:
yt (array): targets
"""
yt = _to_bool(yt)
return(np.sum(yt == False))


def _to_bool(x):
"""convert array values to boolean values.

Args:
x (array): values should be 0 or 1

Returns:
array: boolean array
"""
return x.astype(bool)
13 changes: 13 additions & 0 deletions src/5_train_models/DeepRank2/GNN/run_pre-trained_testing.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is this file? I think it should't be here. Also, the structure in the paper branch is the following:
image, please move the files accordingly

#SBATCH --job-name split_h5
#SBATCH --partition thin
#SBATCH -o /projects/0/einf2380/data/test_logs/test_erasmusmcData-%J.out
#SBATCH -e /projects/0/einf2380/data/test_logs/test_erasmusmcData-%J.err
#SBATCH --nodes 1
#SBATCH --ntasks-per-node=1
#SBATCH --cpus-per-task=96
#SBATCH --time=01:00:00


source activate dr2
python -u pre-trained_testing.py
Loading