nf-core · jonasscheid · Dec 10, 2024
diff --git a/conf/modules.config b/conf/modules.config
@@ -45,42 +45,58 @@ process {
         ext.args = "--tools ${params.tools} --peptides "
     }
 
-        withName: SYFPEITHI{
+    withName: PREPARE_PREDICTION_INPUT {
+        ext.prefix = {"${meta.sample}"}
+        publishDir = [
+            path: { "${params.outdir}/prepare_prediction_input" },
+            mode: params.publish_dir_mode
+        ]
+    }
+
+    withName: SYFPEITHI {
         publishDir = [
             path: { "${params.outdir}/syfpeithi" },
             mode: params.publish_dir_mode
         ]
     }
 
-    withName: MHCFLURRY{
+    withName: MHCFLURRY {
             publishDir = [
                 path: { "${params.outdir}/mhcflurry" },
                 mode: params.publish_dir_mode
             ]
         }
 
-    withName: MHCNUGGETS{
+    withName: MHCNUGGETS {
             publishDir = [
                 path: { "${params.outdir}/mhcnuggets" },
                 mode: params.publish_dir_mode
             ]
         }
 
-    withName: EPYTOPE_GENERATE_PEPTIDES {
+    withName: NETMHCPAN{
+        ext.args = "-BA"
         publishDir = [
-            path: { "${params.outdir}/generated_peptides/${meta.sample}" },
+            path: { "${params.outdir}/netmhcpan" },
             mode: params.publish_dir_mode
         ]
-        ext.args = ''
     }
 
-        withName: NETMHCPAN{
+    withName: NETMHCIIPAN{
         publishDir = [
-            path: { "${params.outdir}/netmhcpan" },
+            path: { "${params.outdir}/netmhciipan" },
             mode: params.publish_dir_mode
         ]
     }
 
+    withName: EPYTOPE_GENERATE_PEPTIDES {
+        publishDir = [
+            path: { "${params.outdir}/generated_peptides/${meta.sample}" },
+            mode: params.publish_dir_mode
+        ]
+        ext.args = ''
+    }
+
     withName: SPLIT_PEPTIDES_PEPTIDES {
         ext.args = "--min_size ${params.peptides_split_minchunksize} --max_chunks ${params.peptides_split_maxchunks} "
     }

diff --git a/modules/local/epytope_check_requested_models.nf b/modules/local/epytope_check_requested_models.nf
@@ -30,8 +30,8 @@ process EPYTOPE_CHECK_REQUESTED_MODELS {
     }
 
     def prefix = task.ext.suffix ? "${meta.sample}_${task.ext.suffix}" : "${meta.sample}_peptides"
-    def min_length = ("${meta.mhc_class}" == "I") ? params.min_peptide_length : params.min_peptide_length_class2
-    def max_length = ("${meta.mhc_class}" == "I") ? params.max_peptide_length : params.max_peptide_length_class2
+    def min_length = ("${meta.mhc_class}" == "I") ? params.min_peptide_length_classI : params.min_peptide_length_classII
+    def max_length = ("${meta.mhc_class}" == "I") ? params.max_peptide_length_classI : params.max_peptide_length_classII
 
     """
     check_requested_models.py ${argument} \

diff --git a/modules/local/epytope_generate_peptides.nf b/modules/local/epytope_generate_peptides.nf
@@ -19,8 +19,8 @@ process EPYTOPE_GENERATE_PEPTIDES {
 
     script:
     def prefix = task.ext.suffix ? "${meta.sample}_${task.ext.suffix}" : "${meta.sample}_peptides"
-    def min_length = (meta.mhc_class == "I") ? params.min_peptide_length : params.min_peptide_length_class2
-    def max_length = (meta.mhc_class == "I") ? params.max_peptide_length : params.max_peptide_length_class2
+    def min_length = (meta.mhc_class == "I") ? params.min_peptide_length_classI : params.min_peptide_length_classII
+    def max_length = (meta.mhc_class == "I") ? params.max_peptide_length_classI : params.max_peptide_length_classII
 
     """
     gen_peptides.py --input ${raw} \\

diff --git a/modules/local/epytope_peptide_prediction.nf b/modules/local/epytope_peptide_prediction.nf
@@ -52,8 +52,8 @@ process EPYTOPE_PEPTIDE_PREDICTION {
         exit 1, "No tools specified for mhc class ${meta.mhc_class}"
     }
 
-    def min_length = (meta.mhc_class == "I") ? params.min_peptide_length : params.min_peptide_length_class2
-    def max_length = (meta.mhc_class == "I") ? params.max_peptide_length : params.max_peptide_length_class2
+    def min_length = (meta.mhc_class == "I") ? params.min_peptide_length_classI : params.min_peptide_length_classII
+    def max_length = (meta.mhc_class == "I") ? params.max_peptide_length_classI : params.max_peptide_length_classII
 
     def tools_to_use = ((meta.mhc_class == "I") | (meta.mhc_class == "H-2")) ? class1_tools.join(',') : class2_tools.join(',')
 

diff --git a/modules/local/external_tools_import.nf b/modules/local/external_tools_import.nf
@@ -22,7 +22,7 @@ process EXTERNAL_TOOLS_IMPORT {
     script:
     """
     #
-    # CHECK IF THE PROVIDED SOFTWARE TARBALL IS A REGULAR FILES
+    # CHECK IF THE PROVIDED SOFTWARE TARBALL IS A REGULAR FILE
     #
     if [ ! -f "$tooltarball" ]; then
         echo "Path specified for ${toolname} does not point to a regular file. Please specify a path to the original tool tarball." >&2

diff --git a/modules/local/merge_predictions.nf b/modules/local/merge_predictions.nf
@@ -18,8 +18,7 @@ process MERGE_PREDICTIONS {
     def args       = task.ext.args ?: ''
     def prefix     = task.ext.prefix ?: meta.sample
 
-    """
-    """
+    template "merge_predictions.py"
 
     stub:
     def args       = task.ext.args ?: ''

diff --git a/modules/local/merge_predictions/environment.yml b/modules/local/merge_predictions/environment.yml
diff --git a/modules/local/merge_predictions/main.nf b/modules/local/merge_predictions/main.nf
@@ -0,0 +1,29 @@
+process MERGE_PREDICTIONS {
+    label 'process_single'
+    tag "${meta.sample}"
+
+    conda "bioconda::mhcgnomes=1.8.4"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mhcgnomes:1.8.4--pyh7cba7a3_0' :
+        'quay.io/biocontainers/mhcgnomes:1.8.4--pyh7cba7a3_0' }"
+
+    input:
+    tuple val(meta), path(prediction_files)
+
+    output:
+    tuple val(meta), path("*.tsv"), emit: merged
+    path "versions.yml", emit: versions
+
+
+    script:
+    //TODO handle the thresholds (parse the --tools_thresholds and --use_affinity_thresholds)
+    template "merge_predictions.py"
+
+    stub:
+    def args       = task.ext.args ?: ''
+    def prefix     = task.ext.prefix ?: meta.sample
+    """
+    touch merged_prediction.tsv
+    touch versions.yml
+    """
+}
diff --git a/modules/local/merge_predictions/meta.yaml b/modules/local/merge_predictions/meta.yaml
diff --git a/modules/local/merge_predictions/templates/merge_predictions.py b/modules/local/merge_predictions/templates/merge_predictions.py
@@ -0,0 +1,168 @@
+#!/usr/bin/env python
+
+import argparse
+import shlex
+from enum import Enum
+import sys
+import typing
+
+import mhcgnomes
+import pandas as pd
+
+# Create logger object with date and time
+import logging
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+)
+
+class Arguments:
+    """
+    Parses the argments, including the ones coming from $task.ext.args.
+    """
+
+    def __init__(self) -> None:
+        self.input = "$prediction_files".split(" ")
+        self.prefix = "$task.ext.prefix" if "$task.ext.prefix" != "null" else "$meta.sample"
+        self.alleles = sorted("$meta.alleles".split(';'))
+        self.parse_ext_args("$task.ext.args")
+
+    def parse_ext_args(self, args_string: str) -> None:
+        """
+        Parse the extended arguments.
+        """
+        # skip when there are no extended arguments
+        if args_string == "null":
+            args_string = ""
+
+        # Parse the extended arguments
+        args_list = shlex.split(args_string)  # Split the string into a list of arguments
+        parser = argparse.ArgumentParser()
+        # input parameters
+        args = parser.parse_args(args_list)
+
+        # Assign args attributes to self attributes
+        for attr in vars(args):
+            setattr(self, attr, getattr(args, attr))
+
+
+class Version:
+    """
+    Parse the versions of the modules used in the script.
+    """
+
+    @staticmethod
+    def get_versions(modules: list) -> dict:
+        """
+        This function takes a list of modules and returns a dictionary with the
+        versions of each module.
+        """
+        return {module.__name__: module.__version__ for module in modules}
+
+    @staticmethod
+    def format_yaml_like(data: dict, indent: int = 0) -> str:
+        """
+        Formats a dictionary to a YAML-like string.
+
+        Args:
+            data (dict): The dictionary to format.
+            indent (int): The current indentation level.
+
+        Returns:
+            yaml_str: A string formatted as YAML.
+        """
+        yaml_str = ""
+        for key, value in data.items():
+            spaces = "  " * indent
+            if isinstance(value, dict):
+                yaml_str += f"{spaces}{key}:\\n{Version.format_yaml_like(value, indent + 1)}"
+            else:
+                yaml_str += f"{spaces}{key}: {value}\\n"
+        return yaml_str
+
+class PredictionResult:
+    def __init__(self, file_path, alleles, threshold=2):
+        self.file_path = file_path
+        self.alleles = alleles
+        self.threshold = threshold
+        self.predictor = None
+        self.prediction_df = self._format_prediction_result()
+
+    def _format_prediction_result(self):
+        if 'syfpeithi' in self.file_path:
+            self.predictor = 'syfpeithi'
+            return self._format_syfpeithi_prediction()
+        elif 'mhcflurry' in self.file_path:
+            self.predictor = 'mhcflurry'
+            return self._format_mhcflurry_prediction()
+        elif 'mhcnuggets' in self.file_path:
+            self.predictor = 'mhcnuggets'
+            return self._format_mhcnuggets_prediction()
+        elif 'netmhcpan' in self.file_path:
+            self.predictor = 'netmhcpan'
+            return self._format_netmhcpan_prediction()
+        elif 'netmhciipan' in self.file_path:
+            self.predictor = 'netmhciipan'
+            return self._format_netmhciipan_prediction()
+        else:
+            logging.error(f'Unsupported predictor type in file: {self.file_path}.')
+            sys.exit(1)
+
+    def _format_syfpeithi_prediction(self):
+        pass
+
+    def _format_mhcflurry_prediction(self):
+        pass
+
+    def _format_mhcnuggets_prediction(self):
+        pass
+
+    def _format_netmhcpan_prediction(self) -> pd.DataFrame:
+        # Map with allele index to allele name
+        alleles_dict = {i: allele for i, allele in enumerate(self.alleles)}
+        # Read the file into a DataFrame with no headers initially
+        df = pd.read_csv(self.file_path, sep='\t', skiprows=1)
+        df = df[df.columns[df.columns.str.contains('Peptide|EL_Rank|BA_Rank')]]
+		# TODO: Naming needs to be harmonized down the line once all predictors are implemented
+        df = df.rename(columns={'Peptide':'sequence','EL_Rank':'EL_Rank.0','BA_Rank':'BA_Rank.0'})
+        # to longformat based on .0|1|2..
+        df_long = pd.melt(
+            df,
+            id_vars=["sequence"],
+            value_vars=[col for col in df.columns if col != "sequence"],
+            var_name="metric",
+            value_name="value",
+        )
+        # Extract the allele information (e.g., .0, .1, etc.)
+        df_long["allele"] = df_long["metric"].str.split('.').str[1]
+        df_long["metric"] = df_long["metric"].str.split('.').str[0]
+
+        # Pivot table to organize columns properly
+        df_pivot = df_long.pivot_table(index=["sequence", "allele"], columns="metric", values="value").reset_index()
+        df_pivot['allele'] = [alleles_dict[int(index.strip("."))] for index in df_pivot['allele']]
+        df_pivot['binder'] = df_pivot['EL_Rank'] <= self.threshold
+        df_pivot['predictor'] = 'netmhcpan'
+        df_pivot.index.name = ''
+
+        return df_pivot
+
+    def _format_netmhciipan_prediction(self, threshold=None):
+        pass
+
+def main():
+    args = Arguments()
+
+    for file in args.input:
+        result = PredictionResult(file, args.alleles)
+        result.prediction_df.to_csv(f"{args.prefix}_{result.predictor}.tsv", sep="\t", index=False)
+
+    # Parse versions
+    versions_this_module = {}
+    versions_this_module["${task.process}"] = Version.get_versions([argparse, pd])
+    with open("versions.yml", "w") as f:
+        f.write(Version.format_yaml_like(versions_this_module))
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/merge_predictions/templates/versions.yml b/modules/local/merge_predictions/templates/versions.yml
@@ -0,0 +1 @@
+${task.process}:\n  argparse: 1.1\n  pandas: 1.5.3\n
diff --git a/modules/local/netmhcpan.nf b/modules/local/netmhcpan.nf
@@ -8,7 +8,7 @@ process NETMHCPAN {
     tuple val(meta), path(peptide_file), path(software)
 
     output:
-    tuple val(meta), path("*.tsv"), emit: predicted
+    tuple val(meta), path("*.xls"), emit: predicted
     path "versions.yml", emit: versions
 
     script:
@@ -17,15 +17,28 @@ process NETMHCPAN {
     }
     def args       = task.ext.args ?: ''
     def prefix     = task.ext.prefix ?: meta.sample
+    // A*01:217 to HLA-A01:217 for meta.alleles: Add HLA- to the allele names and strip the *.
+    def alleles    = meta.alleles.tokenize(';').collect { 'HLA-' + it.replace('*', '') }.join(',')
 
     """
+    netmhcpan/netMHCpan \
+        -p $peptide_file \
+        -a $alleles \
+        -xls \
+        -xlsfile ${prefix}_predicted_netmhcpan.xls \
+        $args
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        \$(cat netmhcpan/data/version | sed -s 's/ version/:/g')
+    END_VERSIONS
     """
 
     stub:
     def args       = task.ext.args ?: ''
     def prefix     = task.ext.prefix ?: meta.sample
     """
-    touch ${prefix}_predicted_netmhcpan.tsv
+    touch ${prefix}_predicted_netmhcpan.xls
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/local/prepare_prediction_input.nf b/modules/local/prepare_prediction_input.nf
@@ -10,22 +10,16 @@ process PREPARE_PREDICTION_INPUT {
         'quay.io/biocontainers/mhcgnomes:1.8.4--pyh7cba7a3_0' }"
 
     input:
-    tuple val(meta), path(peptide_file)
+    tuple val(meta), path(tsv)
 
     output:
-    tuple val(meta), path("*.csv"), emit: prepared
+    tuple val(meta), path("*.csv|*.tsv"), emit: prepared
     path "versions.yml", emit: versions
 
     script:
-    def args       = task.ext.args ?: ''
-    def prefix     = task.ext.prefix ?: meta.sample
     //TODO handle the thresholds (parse the --tools_thresholds and --use_affinity_thresholds)
-    def min_length = (meta.mhc_class == "I") ? params.min_peptide_length : params.min_peptide_length_class2
-    def max_length = (meta.mhc_class == "I") ? params.max_peptide_length : params.max_peptide_length_class2
-    //tools über params.tools ziehen
 
-    """
-    """
+    template "prepare_prediction_input.py"
 
     stub:
     def args       = task.ext.args ?: ''

diff --git a/modules/local/prepare_prediction_input/environment.yml b/modules/local/prepare_prediction_input/environment.yml
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		${task.process}:\n argparse: 1.1\n pandas: 1.5.3\n