Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial setup of netmhcpan module #250

Open
wants to merge 1 commit into
base: mhc-binding-subworkflow
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -45,42 +45,58 @@ process {
ext.args = "--tools ${params.tools} --peptides "
}

withName: SYFPEITHI{
withName: PREPARE_PREDICTION_INPUT {
ext.prefix = {"${meta.sample}"}
publishDir = [
path: { "${params.outdir}/prepare_prediction_input" },
mode: params.publish_dir_mode
]
}

withName: SYFPEITHI {
publishDir = [
path: { "${params.outdir}/syfpeithi" },
mode: params.publish_dir_mode
]
}

withName: MHCFLURRY{
withName: MHCFLURRY {
publishDir = [
path: { "${params.outdir}/mhcflurry" },
mode: params.publish_dir_mode
]
}

withName: MHCNUGGETS{
withName: MHCNUGGETS {
publishDir = [
path: { "${params.outdir}/mhcnuggets" },
mode: params.publish_dir_mode
]
}

withName: EPYTOPE_GENERATE_PEPTIDES {
withName: NETMHCPAN{
ext.args = "-BA"
publishDir = [
path: { "${params.outdir}/generated_peptides/${meta.sample}" },
path: { "${params.outdir}/netmhcpan" },
mode: params.publish_dir_mode
]
ext.args = ''
}

withName: NETMHCPAN{
withName: NETMHCIIPAN{
publishDir = [
path: { "${params.outdir}/netmhcpan" },
path: { "${params.outdir}/netmhciipan" },
mode: params.publish_dir_mode
]
}

withName: EPYTOPE_GENERATE_PEPTIDES {
publishDir = [
path: { "${params.outdir}/generated_peptides/${meta.sample}" },
mode: params.publish_dir_mode
]
ext.args = ''
}

withName: SPLIT_PEPTIDES_PEPTIDES {
ext.args = "--min_size ${params.peptides_split_minchunksize} --max_chunks ${params.peptides_split_maxchunks} "
}
Expand Down
4 changes: 2 additions & 2 deletions modules/local/epytope_check_requested_models.nf
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ process EPYTOPE_CHECK_REQUESTED_MODELS {
}

def prefix = task.ext.suffix ? "${meta.sample}_${task.ext.suffix}" : "${meta.sample}_peptides"
def min_length = ("${meta.mhc_class}" == "I") ? params.min_peptide_length : params.min_peptide_length_class2
def max_length = ("${meta.mhc_class}" == "I") ? params.max_peptide_length : params.max_peptide_length_class2
def min_length = ("${meta.mhc_class}" == "I") ? params.min_peptide_length_classI : params.min_peptide_length_classII
def max_length = ("${meta.mhc_class}" == "I") ? params.max_peptide_length_classI : params.max_peptide_length_classII

"""
check_requested_models.py ${argument} \
Expand Down
4 changes: 2 additions & 2 deletions modules/local/epytope_generate_peptides.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ process EPYTOPE_GENERATE_PEPTIDES {

script:
def prefix = task.ext.suffix ? "${meta.sample}_${task.ext.suffix}" : "${meta.sample}_peptides"
def min_length = (meta.mhc_class == "I") ? params.min_peptide_length : params.min_peptide_length_class2
def max_length = (meta.mhc_class == "I") ? params.max_peptide_length : params.max_peptide_length_class2
def min_length = (meta.mhc_class == "I") ? params.min_peptide_length_classI : params.min_peptide_length_classII
def max_length = (meta.mhc_class == "I") ? params.max_peptide_length_classI : params.max_peptide_length_classII

"""
gen_peptides.py --input ${raw} \\
Expand Down
4 changes: 2 additions & 2 deletions modules/local/epytope_peptide_prediction.nf
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ process EPYTOPE_PEPTIDE_PREDICTION {
exit 1, "No tools specified for mhc class ${meta.mhc_class}"
}

def min_length = (meta.mhc_class == "I") ? params.min_peptide_length : params.min_peptide_length_class2
def max_length = (meta.mhc_class == "I") ? params.max_peptide_length : params.max_peptide_length_class2
def min_length = (meta.mhc_class == "I") ? params.min_peptide_length_classI : params.min_peptide_length_classII
def max_length = (meta.mhc_class == "I") ? params.max_peptide_length_classI : params.max_peptide_length_classII

def tools_to_use = ((meta.mhc_class == "I") | (meta.mhc_class == "H-2")) ? class1_tools.join(',') : class2_tools.join(',')

Expand Down
2 changes: 1 addition & 1 deletion modules/local/external_tools_import.nf
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ process EXTERNAL_TOOLS_IMPORT {
script:
"""
#
# CHECK IF THE PROVIDED SOFTWARE TARBALL IS A REGULAR FILES
# CHECK IF THE PROVIDED SOFTWARE TARBALL IS A REGULAR FILE
#
if [ ! -f "$tooltarball" ]; then
echo "Path specified for ${toolname} does not point to a regular file. Please specify a path to the original tool tarball." >&2
Expand Down
3 changes: 1 addition & 2 deletions modules/local/merge_predictions.nf
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ process MERGE_PREDICTIONS {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: meta.sample

"""
"""
template "merge_predictions.py"

stub:
def args = task.ext.args ?: ''
Expand Down
Empty file.
29 changes: 29 additions & 0 deletions modules/local/merge_predictions/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
process MERGE_PREDICTIONS {
label 'process_single'
tag "${meta.sample}"

conda "bioconda::mhcgnomes=1.8.4"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mhcgnomes:1.8.4--pyh7cba7a3_0' :
'quay.io/biocontainers/mhcgnomes:1.8.4--pyh7cba7a3_0' }"

input:
tuple val(meta), path(prediction_files)

output:
tuple val(meta), path("*.tsv"), emit: merged
path "versions.yml", emit: versions


script:
//TODO handle the thresholds (parse the --tools_thresholds and --use_affinity_thresholds)
template "merge_predictions.py"

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: meta.sample
"""
touch merged_prediction.tsv
touch versions.yml
"""
}
Empty file.
168 changes: 168 additions & 0 deletions modules/local/merge_predictions/templates/merge_predictions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
#!/usr/bin/env python

import argparse
import shlex
from enum import Enum
import sys
import typing

import mhcgnomes
import pandas as pd

# Create logger object with date and time
import logging

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)

class Arguments:
"""
Parses the argments, including the ones coming from $task.ext.args.
"""

def __init__(self) -> None:
self.input = "$prediction_files".split(" ")
self.prefix = "$task.ext.prefix" if "$task.ext.prefix" != "null" else "$meta.sample"
self.alleles = sorted("$meta.alleles".split(';'))
self.parse_ext_args("$task.ext.args")

def parse_ext_args(self, args_string: str) -> None:
"""
Parse the extended arguments.
"""
# skip when there are no extended arguments
if args_string == "null":
args_string = ""

# Parse the extended arguments
args_list = shlex.split(args_string) # Split the string into a list of arguments
parser = argparse.ArgumentParser()
# input parameters
args = parser.parse_args(args_list)

# Assign args attributes to self attributes
for attr in vars(args):
setattr(self, attr, getattr(args, attr))


class Version:
"""
Parse the versions of the modules used in the script.
"""

@staticmethod
def get_versions(modules: list) -> dict:
"""
This function takes a list of modules and returns a dictionary with the
versions of each module.
"""
return {module.__name__: module.__version__ for module in modules}

@staticmethod
def format_yaml_like(data: dict, indent: int = 0) -> str:
"""
Formats a dictionary to a YAML-like string.

Args:
data (dict): The dictionary to format.
indent (int): The current indentation level.

Returns:
yaml_str: A string formatted as YAML.
"""
yaml_str = ""
for key, value in data.items():
spaces = " " * indent
if isinstance(value, dict):
yaml_str += f"{spaces}{key}:\\n{Version.format_yaml_like(value, indent + 1)}"
else:
yaml_str += f"{spaces}{key}: {value}\\n"
return yaml_str

class PredictionResult:
def __init__(self, file_path, alleles, threshold=2):
self.file_path = file_path
self.alleles = alleles
self.threshold = threshold
self.predictor = None
self.prediction_df = self._format_prediction_result()

def _format_prediction_result(self):
if 'syfpeithi' in self.file_path:
self.predictor = 'syfpeithi'
return self._format_syfpeithi_prediction()
elif 'mhcflurry' in self.file_path:
self.predictor = 'mhcflurry'
return self._format_mhcflurry_prediction()
elif 'mhcnuggets' in self.file_path:
self.predictor = 'mhcnuggets'
return self._format_mhcnuggets_prediction()
elif 'netmhcpan' in self.file_path:
self.predictor = 'netmhcpan'
return self._format_netmhcpan_prediction()
elif 'netmhciipan' in self.file_path:
self.predictor = 'netmhciipan'
return self._format_netmhciipan_prediction()
else:
logging.error(f'Unsupported predictor type in file: {self.file_path}.')
sys.exit(1)

def _format_syfpeithi_prediction(self):
pass

def _format_mhcflurry_prediction(self):
pass

def _format_mhcnuggets_prediction(self):
pass

def _format_netmhcpan_prediction(self) -> pd.DataFrame:
# Map with allele index to allele name
alleles_dict = {i: allele for i, allele in enumerate(self.alleles)}
# Read the file into a DataFrame with no headers initially
df = pd.read_csv(self.file_path, sep='\t', skiprows=1)
df = df[df.columns[df.columns.str.contains('Peptide|EL_Rank|BA_Rank')]]
# TODO: Naming needs to be harmonized down the line once all predictors are implemented
df = df.rename(columns={'Peptide':'sequence','EL_Rank':'EL_Rank.0','BA_Rank':'BA_Rank.0'})
# to longformat based on .0|1|2..
df_long = pd.melt(
df,
id_vars=["sequence"],
value_vars=[col for col in df.columns if col != "sequence"],
var_name="metric",
value_name="value",
)
# Extract the allele information (e.g., .0, .1, etc.)
df_long["allele"] = df_long["metric"].str.split('.').str[1]
df_long["metric"] = df_long["metric"].str.split('.').str[0]

# Pivot table to organize columns properly
df_pivot = df_long.pivot_table(index=["sequence", "allele"], columns="metric", values="value").reset_index()
df_pivot['allele'] = [alleles_dict[int(index.strip("."))] for index in df_pivot['allele']]
df_pivot['binder'] = df_pivot['EL_Rank'] <= self.threshold
df_pivot['predictor'] = 'netmhcpan'
df_pivot.index.name = ''

return df_pivot

def _format_netmhciipan_prediction(self, threshold=None):
pass

def main():
args = Arguments()

for file in args.input:
result = PredictionResult(file, args.alleles)
result.prediction_df.to_csv(f"{args.prefix}_{result.predictor}.tsv", sep="\t", index=False)

# Parse versions
versions_this_module = {}
versions_this_module["${task.process}"] = Version.get_versions([argparse, pd])
with open("versions.yml", "w") as f:
f.write(Version.format_yaml_like(versions_this_module))

if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions modules/local/merge_predictions/templates/versions.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
${task.process}:\n argparse: 1.1\n pandas: 1.5.3\n
17 changes: 15 additions & 2 deletions modules/local/netmhcpan.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ process NETMHCPAN {
tuple val(meta), path(peptide_file), path(software)

output:
tuple val(meta), path("*.tsv"), emit: predicted
tuple val(meta), path("*.xls"), emit: predicted
path "versions.yml", emit: versions

script:
Expand All @@ -17,15 +17,28 @@ process NETMHCPAN {
}
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: meta.sample
// A*01:217 to HLA-A01:217 for meta.alleles: Add HLA- to the allele names and strip the *.
def alleles = meta.alleles.tokenize(';').collect { 'HLA-' + it.replace('*', '') }.join(',')

"""
netmhcpan/netMHCpan \
-p $peptide_file \
-a $alleles \
-xls \
-xlsfile ${prefix}_predicted_netmhcpan.xls \
$args

cat <<-END_VERSIONS > versions.yml
"${task.process}":
\$(cat netmhcpan/data/version | sed -s 's/ version/:/g')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: meta.sample
"""
touch ${prefix}_predicted_netmhcpan.tsv
touch ${prefix}_predicted_netmhcpan.xls

cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
12 changes: 3 additions & 9 deletions modules/local/prepare_prediction_input.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,16 @@ process PREPARE_PREDICTION_INPUT {
'quay.io/biocontainers/mhcgnomes:1.8.4--pyh7cba7a3_0' }"

input:
tuple val(meta), path(peptide_file)
tuple val(meta), path(tsv)

output:
tuple val(meta), path("*.csv"), emit: prepared
tuple val(meta), path("*.csv|*.tsv"), emit: prepared
path "versions.yml", emit: versions

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: meta.sample
//TODO handle the thresholds (parse the --tools_thresholds and --use_affinity_thresholds)
def min_length = (meta.mhc_class == "I") ? params.min_peptide_length : params.min_peptide_length_class2
def max_length = (meta.mhc_class == "I") ? params.max_peptide_length : params.max_peptide_length_class2
//tools über params.tools ziehen

"""
"""
template "prepare_prediction_input.py"

stub:
def args = task.ext.args ?: ''
Expand Down
Empty file.
Loading
Loading