Skip to content

Commit

Permalink
Add ion mobility prediction support through IM2Deep
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfG committed Jul 8, 2024
1 parent 801b6fb commit a0f3d7f
Show file tree
Hide file tree
Showing 4 changed files with 89 additions and 4 deletions.
4 changes: 4 additions & 0 deletions ms2pip/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,11 @@ def predict_single(*args, **kwargs):

@cli.command(help=ms2pip.core.predict_batch.__doc__)
@click.argument("psms", required=True)
@click.option("--psm-filetype", "-t", type=click.Choice(PSM_FILETYPES), default=None)
@click.option("--output-name", "-o", type=str)
@click.option("--output-format", "-f", type=click.Choice(SUPPORTED_FORMATS), default="tsv")
@click.option("--add-retention-time", "-r", is_flag=True)
@click.option("--add-ion-mobility", "-i", is_flag=True)
@click.option("--model", type=click.Choice(MODELS), default="HCD")
@click.option("--model-dir")
@click.option("--processes", "-n", type=int)
Expand All @@ -110,6 +112,7 @@ def predict_batch(*args, **kwargs):
@click.option("--output-name", "-o", type=str)
@click.option("--output-format", "-f", type=click.Choice(SUPPORTED_FORMATS), default="msp")
@click.option("--add-retention-time", "-r", is_flag=True)
@click.option("--add-ion-mobility", "-i", is_flag=True)
@click.option("--model", type=click.Choice(MODELS), default="HCD")
@click.option("--model-dir")
@click.option("--batch-size", type=int, default=100000)
Expand All @@ -136,6 +139,7 @@ def predict_library(*args, **kwargs):
@click.option("--spectrum-id-pattern", "-p")
@click.option("--compute-correlations", "-x", is_flag=True)
@click.option("--add-retention-time", "-r", is_flag=True)
@click.option("--add-ion-mobility", "-i", is_flag=True)
@click.option("--model", type=click.Choice(MODELS), default="HCD")
@click.option("--model-dir")
@click.option("--ms2-tolerance", type=float, default=0.02)
Expand Down
32 changes: 32 additions & 0 deletions ms2pip/_utils/ion_mobility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Module for ion mobility prediction with IM²Deep."""

import logging

import pandas as pd
from psm_utils import PSMList

logger = logging.getLogger(__name__)


class IonMobility:
"""Predict ion mobility using IM²Deep."""

def __init__(self, processes=1) -> None:
# Lazy import to avoid loading loading heavy dependencies when not needed
try:
from im2deep.im2deep import predict_ccs # noqa: F401

self.predict_fn = predict_ccs
self.processes = processes
except ImportError as e:
raise ImportError(
"The 'im2deep' package is required for ion mobility prediction."
) from e

def add_im_predictions(self, psm_list: PSMList) -> None:
"""Add ion mobility predictions to the PSMList."""
logger.info("Predicting ion mobility...")
predictions: pd.Series = self.predict_fn(
psm_list, write_output=False, n_jobs=self.processes
)
psm_list["ion_mobility"] = predictions.values
26 changes: 26 additions & 0 deletions ms2pip/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from ms2pip._utils.feature_names import get_feature_names
from ms2pip._utils.psm_input import read_psms
from ms2pip._utils.retention_time import RetentionTime
from ms2pip._utils.ion_mobility import IonMobility
from ms2pip._utils.xgb_models import get_predictions_xgb, validate_requested_xgb_model
from ms2pip.constants import MODELS
from ms2pip.result import ProcessingResult, calculate_correlations
Expand Down Expand Up @@ -74,6 +75,7 @@ def predict_single(
def predict_batch(
psms: Union[PSMList, str, Path],
add_retention_time: bool = False,
add_ion_mobility: bool = False,
psm_filetype: Optional[str] = None,
model: Optional[str] = "HCD",
model_dir: Optional[Union[str, Path]] = None,
Expand All @@ -91,6 +93,8 @@ def predict_batch(
filetypes. See https://psm-utils.readthedocs.io/en/stable/#supported-file-formats.
add_retention_time
Add retention time predictions with DeepLC (Requires optional DeepLC dependency).
add_ion_mobility
Add ion mobility predictions with IM2Deep (Requires optional IM2Deep dependency).
model
Model to use for prediction. Default: "HCD".
model_dir
Expand All @@ -113,6 +117,11 @@ def predict_batch(
rt_predictor = RetentionTime(processes=processes)
rt_predictor.add_rt_predictions(psm_list)

if add_ion_mobility:
logger.info("Adding ion mobility predictions")
im_predictor = IonMobility(processes=processes)
im_predictor.add_im_predictions(psm_list)

with Encoder.from_psm_list(psm_list) as encoder:
ms2pip_parallelized = _Parallelized(
encoder=encoder,
Expand All @@ -130,6 +139,7 @@ def predict_library(
fasta_file: Optional[Union[str, Path]] = None,
config: Optional[Union[ProteomeSearchSpace, dict, str, Path]] = None,
add_retention_time: bool = False,
add_ion_mobility: bool = False,
model: Optional[str] = "HCD",
model_dir: Optional[Union[str, Path]] = None,
batch_size: int = 100000,
Expand All @@ -148,6 +158,8 @@ def predict_library(
parameters. Required if `fasta_file` is not provided.
add_retention_time
Add retention time predictions with DeepLC (Requires optional DeepLC dependency).
add_ion_mobility
Add ion mobility predictions with IM2Deep (Requires optional IM2Deep dependency).
model
Model to use for prediction. Default: "HCD".
model_dir
Expand All @@ -157,6 +169,11 @@ def predict_library(
processes
Number of parallel processes for multiprocessing steps. By default, all available.
Yields
------
predictions: List[ProcessingResult]
Predicted spectra with theoretical m/z and predicted intensity values.
"""
if fasta_file and config:
# Use provided proteome, but overwrite fasta_file
Expand All @@ -183,6 +200,7 @@ def predict_library(
yield predict_batch(
search_space.filter_psms_by_mz(PSMList(psm_list=list(batch))),
add_retention_time=add_retention_time,
add_ion_mobility=add_ion_mobility,
model=model,
model_dir=model_dir,
processes=processes,
Expand All @@ -197,6 +215,7 @@ def correlate(
spectrum_id_pattern: Optional[str] = None,
compute_correlations: bool = False,
add_retention_time: bool = False,
add_ion_mobility: bool = False,
model: Optional[str] = "HCD",
model_dir: Optional[Union[str, Path]] = None,
ms2_tolerance: float = 0.02,
Expand All @@ -221,6 +240,8 @@ def correlate(
Compute correlations between predictions and targets.
add_retention_time
Add retention time predictions with DeepLC (Requires optional DeepLC dependency).
add_ion_mobility
Add ion mobility predictions with IM2Deep (Requires optional IM2Deep dependency).
model
Model to use for prediction. Default: "HCD".
model_dir
Expand All @@ -245,6 +266,11 @@ def correlate(
rt_predictor = RetentionTime(processes=processes)
rt_predictor.add_rt_predictions(psm_list)

if add_ion_mobility:
logger.info("Adding ion mobility predictions")
im_predictor = IonMobility(processes=processes)
im_predictor.add_im_predictions(psm_list)

with Encoder.from_psm_list(psm_list) as encoder:
ms2pip_parallelized = _Parallelized(
encoder=encoder,
Expand Down
31 changes: 27 additions & 4 deletions ms2pip/spectrum_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ class TSV(_Writer):
"predicted",
"observed",
"rt",
"im",
]

def write(self, processing_results: List[ProcessingResult]):
Expand Down Expand Up @@ -194,6 +195,7 @@ def _write_row(result: ProcessingResult, ion_type: str, ion_index: int):
if result.observed_intensity
else None,
"rt": result.psm.retention_time if result.psm.retention_time else None,
"im": result.psm.ion_mobility if result.psm.ion_mobility else None,
}


Expand Down Expand Up @@ -243,9 +245,12 @@ def _format_single_modification(
if not modifications:
return None
if len(modifications) > 1:
raise ValueError("Multiple modifications per amino acid not supported.")
raise ValueError("Multiple modifications per amino acid not supported in MSP.")
modification = modifications[0]
return f"{position},{amino_acid},{modification.name}"
try:
return f"{position},{amino_acid},{modification.name}"
except AttributeError: # MassModification has no attribute `name`
return f"{position},{amino_acid},{modification.value}"

sequence_mods = [
_format_single_modification(aa, pos + 1, mods)
Expand Down Expand Up @@ -286,6 +291,14 @@ def _format_retention_time(psm: PSM) -> Union[str, None]:
else:
return None

@staticmethod
def _format_ion_mobility(psm: PSM) -> Union[str, None]:
"""Format ion mobility as string."""
if psm.ion_mobility:
return f"IonMobility={psm.ion_mobility}"
else:
return None

@staticmethod
def _format_identifier(psm: PSM) -> str:
"""Format MS2PIP ID as string."""
Expand All @@ -302,6 +315,7 @@ def _format_comment_line(psm: PSM) -> str:
MSP._format_parent_mass(psm.peptidoform),
MSP._format_protein_string(psm),
MSP._format_retention_time(psm),
MSP._format_ion_mobility(psm),
MSP._format_identifier(psm),
],
)
Expand All @@ -310,7 +324,11 @@ def _format_comment_line(psm: PSM) -> str:


class MGF(_Writer):
"""Write MGF files from MS2PIP processing results."""
"""
Write MGF files from MS2PIP processing results.
See http://www.matrixscience.com/help/data_file_help.html for documentation on the MGF format.
"""

suffix = ".mgf"

Expand All @@ -333,6 +351,7 @@ def _write_result(self, result: ProcessingResult):
f"CHARGE={result.psm.get_precursor_charge()}+",
f"SCANS={result.psm.spectrum_id}",
f"RTINSECONDS={result.psm.retention_time}" if result.psm.retention_time else None,
f"ION_MOBILITY={result.psm.ion_mobility}" if result.psm.ion_mobility else None,
]

# Peaks
Expand Down Expand Up @@ -428,7 +447,9 @@ class Bibliospec(_Writer):
"""
Write Bibliospec SSL and MS2 files from MS2PIP processing results.
Bibliospec SSL and MS2 files are also compatible with Skyline.
Bibliospec SSL and MS2 files are also compatible with Skyline. See
https://skyline.ms/wiki/home/software/BiblioSpec/page.view?name=BiblioSpec%20input%20and%20output%20file%20formats
for documentation on the Bibliospec file formats.
"""

Expand All @@ -442,6 +463,7 @@ class Bibliospec(_Writer):
"score-type",
"score",
"retention-time",
"ion-mobility",
]

def __init__(self, filename: Union[str, Path], write_mode: str = "w"):
Expand Down Expand Up @@ -551,6 +573,7 @@ def _write_result_to_ssl(
"score-type": None,
"score": None,
"retention-time": result.psm.retention_time if result.psm.retention_time else None,
"ion-mobility": result.psm.ion_mobility if result.psm.ion_mobility else None,
}
)

Expand Down

0 comments on commit a0f3d7f

Please sign in to comment.