Skip to content

Commit

Permalink
Set rank as default threshold metric for netmhc tools and add option …
Browse files Browse the repository at this point in the history
…for affinity threshold
  • Loading branch information
jonasscheid committed Jun 23, 2022
1 parent a6f5080 commit 1b9b00a
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 14 deletions.
34 changes: 21 additions & 13 deletions bin/epaa.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,12 +708,12 @@ def create_affinity_values(allele, length, j, method, max_scores, allele_strings
return np.nan


def create_binder_values(aff, method, thresholds):
if not pd.isnull(aff):
def create_binder_values(pred_score, method, thresholds):
if not pd.isnull(pred_score):
if 'syf' in method:
return True if aff > thresholds[method] else False
return True if pred_score > thresholds[method] else False
else:
return True if aff <= thresholds[method.lower()] else False
return True if pred_score <= thresholds[method.lower()] else False
else:
return np.nan

Expand Down Expand Up @@ -759,7 +759,7 @@ def generate_wt_seqs(peptides):
return wt_dict


def make_predictions_from_variants(variants_all, methods, tool_thresholds, alleles, minlength, maxlength, martsadapter, protein_db, identifier, metadata, transcriptProteinMap):
def make_predictions_from_variants(variants_all, methods, tool_thresholds, alleles, use_affinity_thresholds, minlength, maxlength, martsadapter, protein_db, identifier, metadata, transcriptProteinMap):
# list for all peptides and filtered peptides
all_peptides = []
all_peptides_filtered = []
Expand Down Expand Up @@ -840,7 +840,8 @@ def make_predictions_from_variants(variants_all, methods, tool_thresholds, allel
allele = c.rstrip(' Score')
df[c] = df[c].round(4)
df.insert(idx + 1, '%s affinity' % allele, df.apply(lambda x: create_affinity_values(allele, int(x['length']), float(x[c]), x['method'], max_values_matrices, allele_string_map), axis=1))
df.insert(idx + 2, '%s binder' % allele, df.apply(lambda x: create_binder_values(float(x['%s affinity' % allele]), x['method'], tool_thresholds), axis=1))
df.insert(idx + 2, '%s binder' % allele, df.apply(lambda x: create_binder_values(float(x['%s Rank' % allele]), x['method'], tool_thresholds) if 'netmhc' in x['method'] and not use_affinity_thresholds
else create_binder_values(float(x['%s affinity' % allele]), x['method'], tool_thresholds), axis=1))

df.columns = df.columns.str.replace('Score', 'score')
df.columns = df.columns.str.replace('Rank', 'rank')
Expand All @@ -858,7 +859,7 @@ def make_predictions_from_variants(variants_all, methods, tool_thresholds, allel
return pred_dataframes, statistics, all_peptides_filtered, prots


def make_predictions_from_peptides(peptides, methods, tool_thresholds, alleles, protein_db, identifier, metadata):
def make_predictions_from_peptides(peptides, methods, tool_thresholds, use_affinity_thresholds, alleles, protein_db, identifier, metadata):
# dictionaries for syfpeithi matrices max values and allele mapping
max_values_matrices = {}
allele_string_map = {}
Expand Down Expand Up @@ -933,7 +934,8 @@ def make_predictions_from_peptides(peptides, methods, tool_thresholds, alleles,
allele = c.rstrip(' Score')
df[c] = df[c].round(4)
df.insert(idx + 1, '%s affinity' % allele, df.apply(lambda x: create_affinity_values(allele, int(x['length']), float(x[c]), x['method'], max_values_matrices, allele_string_map), axis=1))
df.insert(idx + 2, '%s binder' % allele, df.apply(lambda x: create_binder_values(float(x['%s affinity' % allele]), x['method'], tool_thresholds), axis=1))
df.insert(idx + 2, '%s binder' % allele, df.apply(lambda x: create_binder_values(float(x['%s Rank' % allele]), x['method'], tool_thresholds) if 'netmhc' in x['method'] and not use_affinity_thresholds
else create_binder_values(float(x['%s affinity' % allele]), x['method'], tool_thresholds), axis=1))

df.columns = df.columns.str.replace('Score', 'score')
df.columns = df.columns.str.replace('Rank', 'rank')
Expand All @@ -959,7 +961,8 @@ def __main__():
parser.add_argument('-l', "--max_length", help="Maximum peptide length")
parser.add_argument('-ml', "--min_length", help="Minimum peptide length")
parser.add_argument('-t', "--tools", help="Tools used for peptide predictions", required=True, type=str)
parser.add_argument('-tt', "--tool_thresholds", help="Customize affinity threshold of given tools using a json file", required=False, type=str)
parser.add_argument('-tt', "--tool_thresholds", help="Customize thresholds of given tools using a json file", required=False, type=str)
parser.add_argument('-at', "--affinity_thresholds", help="Use affinity instead of rank for thresholding", required=False, action='store_true')
parser.add_argument('-sv', "--versions", help="File containing parsed software version numbers.", required=True)
parser.add_argument('-a', "--alleles", help="<Required> MHC Alleles", required=True, type=str)
parser.add_argument('-r', "--reference", help="Reference, retrieved information will be based on this ensembl version", required=False, default='GRCh37', choices=['GRCh37', 'GRCh38'])
Expand Down Expand Up @@ -1031,7 +1034,12 @@ def __main__():
if version not in EpitopePredictorFactory.available_methods()[method]:
raise ValueError("The specified version " + version + " for " + method + " is not supported by epytope.")

thresholds = {"syfpeithi":50, "mhcflurry":500, "mhcnuggets-class-1":500, "mhcnuggets-class-2":500, "netmhc":500, "netmhcpan":500, "netmhcii":500, "netmhciipan":500}
# Define binders based on the rank metric for netmhc family tools
if "netmhc" in ''.join(methods.keys()) and not args.affinity_thresholds:
thresholds = {"syfpeithi":50, "mhcflurry":500, "mhcnuggets-class-1":500, "mhcnuggets-class-2":500, "netmhc":2, "netmhcpan":2, "netmhcii":5, "netmhciipan":5}
else:
thresholds = {"syfpeithi":50, "mhcflurry":500, "mhcnuggets-class-1":500, "mhcnuggets-class-2":500, "netmhc":500, "netmhcpan":500, "netmhcii":500, "netmhciipan":500}

if args.tool_thresholds:
with open(args.tool_thresholds, 'r') as json_file:
threshold_file = json.load(json_file)
Expand All @@ -1044,14 +1052,14 @@ def __main__():
# MHC class I or II predictions
if args.mhcclass is 1:
if args.peptides:
pred_dataframes, statistics = make_predictions_from_peptides(peptides, methods, thresholds, alleles, up_db, args.identifier, metadata)
pred_dataframes, statistics = make_predictions_from_peptides(peptides, methods, thresholds, args.affinity_thresholds, alleles, up_db, args.identifier, metadata)
else:
pred_dataframes, statistics, all_peptides_filtered, proteins = make_predictions_from_variants(vl, methods, thresholds, alleles, int(args.min_length), int(args.max_length) + 1, ma, up_db, args.identifier, metadata, transcriptProteinMap)
else:
if args.peptides:
pred_dataframes, statistics = make_predictions_from_peptides(peptides, methods, thresholds, alleles, up_db, args.identifier, metadata)
pred_dataframes, statistics = make_predictions_from_peptides(peptides, methods, thresholds, args.affinity_thresholds, alleles, up_db, args.identifier, metadata)
else:
pred_dataframes, statistics, all_peptides_filtered, proteins = make_predictions_from_variants(vl, methods, thresholds, alleles, int(args.min_length), int(args.max_length) + 1, ma, up_db, args.identifier, metadata, transcriptProteinMap)
pred_dataframes, statistics, all_peptides_filtered, proteins = make_predictions_from_variants(vl, methods, thresholds, args.affinity_thresholds, alleles, int(args.min_length), int(args.max_length) + 1, ma, up_db, args.identifier, metadata, transcriptProteinMap)
# concat dataframes for all peptide lengths
try:
complete_df = pd.concat(pred_dataframes, sort=True)
Expand Down
4 changes: 4 additions & 0 deletions modules/local/epytope_peptide_prediction.nf
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ process EPYTOPE_PEPTIDE_PREDICTION {
argument = "--tool_thresholds ${params.tool_thresholds} " + argument
}

if (params.affinity_thresholds) {
argument = "--affinity_thresholds " + argument
}

def netmhc_paths_string = netmhc_paths.join(",")
"""
# create folder for MHCflurry downloads to avoid permission problems when running pipeline with docker profile and mhcflurry selected
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ params {
mhc_class = 1
tools = 'syfpeithi'
tool_thresholds = null
affinity_thresholds = false

// Options: Filtering
filter_self = false
Expand Down
8 changes: 7 additions & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,13 @@
"tool_thresholds": {
"type": "string",
"description": "Specifies tool-specific binder thresholds in a JSON file. This can be used to override the given default binder threshold values.",
"help_text": "Default affinity thresholds to determine whether a peptide is considered as a binder are the following:: `syfpeithi` > 50, `mhcflurry` <=500, `mhcnuggets-class-1` <= 500, `mhcnuggets-class-2` <= 500, `netmhc` <= 500, `netmhcpan` <= 500, `netmhcii` <= 500, `netmhciipan` <= 500. Thresholds can be customized in a JSON file: `tool-name:value`"
"help_text": "Default thresholds to determine whether a peptide is considered as a binder are the following: `syfpeithi` > 50, `mhcflurry` <=500, `mhcnuggets-class-1` <= 500, `mhcnuggets-class-2` <= 500, `netmhc` <= 2, `netmhcpan` <= 2, `netmhcii` <= 5, `netmhciipan` <= 5. Note that the default threshold for NetMHC tools is based on the rank metric and remaining predictors is based on affinities. Thresholds can be customized in a JSON file: `tool-name:value`"
},
"affinity_thresholds": {
"type": "boolean",
"default": false,
"description": "Specifies the affinity metric instead of the rank metric used for determining whether a peptide is considered as a binder.",
"help_text": "Default affinity thresholds to determine whether a peptide is considered as a binder are the following:: `syfpeithi` > 50, `mhcflurry` <=500, `mhcnuggets-class-1` <= 500, `mhcnuggets-class-2` <= 500, `netmhc` <= 500, `netmhcpan` <= 500, `netmhcii` <= 500, `netmhciipan` <= 500."
},
"wild_type": {
"type": "boolean",
Expand Down

0 comments on commit 1b9b00a

Please sign in to comment.