Skip to content

Commit

Permalink
Merge branch 'timsRescore' of https://github.com/compomics/ms2rescore
Browse files Browse the repository at this point in the history
…into peptide-num-fix
  • Loading branch information
ArthurDeclercq committed Apr 9, 2024
2 parents 09fbbfb + 126c255 commit 5c821c9
Show file tree
Hide file tree
Showing 9 changed files with 172 additions and 71 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.10
FROM python:3.11

# ARG DEBIAN_FRONTEND=noninteractive

Expand All @@ -13,7 +13,7 @@ ADD MANIFEST.in /ms2rescore/MANIFEST.in
ADD ms2rescore /ms2rescore/ms2rescore

RUN apt-get update \
&& apt install -y procps git-lfs \
&& apt install -y procps \
&& pip install /ms2rescore

ENTRYPOINT [""]
2 changes: 1 addition & 1 deletion ms2rescore/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""

__version__ = "3.0.3"
__version__ = "3.1.0-dev1"

from warnings import filterwarnings

Expand Down
43 changes: 32 additions & 11 deletions ms2rescore/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""MS²Rescore: Sensitive PSM rescoring with predicted MS² peak intensities and RTs."""

import argparse
import importlib.resources
import json
import logging
import sys
from pathlib import Path
Expand All @@ -10,7 +12,7 @@
from rich.logging import RichHandler
from rich.text import Text

from ms2rescore import __version__
from ms2rescore import __version__, package_data
from ms2rescore.config_parser import parse_configurations
from ms2rescore.core import rescore
from ms2rescore.exceptions import MS2RescoreConfigurationError
Expand All @@ -33,19 +35,26 @@
CONSOLE = Console(record=True)


def _print_credits():
def _print_credits(tims=False):
"""Print software credits to terminal."""
text = Text()
text.append("\n")
text.append("MS²Rescore", style="bold link https://github.com/compomics/ms2rescore")
if tims:
text.append("TIMS²Rescore", style="bold link https://github.com/compomics/ms2rescore")
else:
text.append("MS²Rescore", style="bold link https://github.com/compomics/ms2rescore")
text.append(f" (v{__version__})\n", style="bold")
if tims:
text.append("MS²Rescore tuned for Bruker timsTOF instruments.\n", style="italic")
text.append("Developed at CompOmics, VIB / Ghent University, Belgium.\n")
text.append("Please cite: ")
text.append(
"Declercq et al. MCP (2022)", style="link https://doi.org/10.1016/j.mcpro.2022.100266"
"Buur & Declercq et al. JPR (2024)",
style="link https://doi.org/10.1021/acs.jproteome.3c00785",
)
text.append("\n")
text.stylize("cyan")
if tims:
text.stylize("#006cb5")
CONSOLE.print(text)


Expand Down Expand Up @@ -152,18 +161,30 @@ def _setup_logging(passed_level: str, log_file: Union[str, Path]):
)


def main():
def main_tims():
"""Run MS²Rescore command-line interface in TIMS²Rescore mode."""
main(tims=True)


def main(tims=False):
"""Run MS²Rescore command-line interface."""
_print_credits()
_print_credits(tims)

# Parse CLI arguments and configuration file
parser = _argument_parser()
cli_args = parser.parse_args()

configurations = []
if cli_args.config_file:
configurations.append(cli_args.config_file)
if tims:
configurations.append(
json.load(importlib.resources.open_text(package_data, "config_default_tims.json"))
)
configurations.append(cli_args)

try:
if cli_args.config_file:
config = parse_configurations([cli_args.config_file, cli_args])
else:
config = parse_configurations(cli_args)
config = parse_configurations(configurations)
except MS2RescoreConfigurationError as e:
LOGGER.critical(e)
sys.exit(1)
Expand Down
60 changes: 47 additions & 13 deletions ms2rescore/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
from multiprocessing import cpu_count
from typing import Dict, Optional

import numpy as np
import psm_utils.io
from psm_utils import PSMList

from ms2rescore import exceptions
from ms2rescore.feature_generators import FEATURE_GENERATORS
from ms2rescore.parse_psms import parse_psms
from ms2rescore.parse_spectra import get_missing_values
from ms2rescore.report import generate
from ms2rescore.rescoring_engines import mokapot, percolator
from ms2rescore import exceptions

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -58,18 +59,8 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
f"PSMs already contain the following rescoring features: {psm_list_feature_names}"
)

# TODO: avoid hard coding feature generators in some way
rt_required = ("deeplc" in config["feature_generators"]) and (
None in psm_list["retention_time"]
)
im_required = (
"ionmob" in config["feature_generators"] or "im2deep" in config["feature_generators"]
) and (None in psm_list["ion_mobility"])
logger.debug(f"RT required: {rt_required}, IM required: {im_required}")

if rt_required or im_required:
logger.info("Parsing missing retention time and/or ion mobility values from spectra...")
get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required)
# Add missing precursor info from spectrum file if needed
_fill_missing_precursor_info(psm_list, config)

# Add rescoring features
for fgen_name, fgen_config in config["feature_generators"].items():
Expand Down Expand Up @@ -166,6 +157,49 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
logger.exception(e)


def _fill_missing_precursor_info(psm_list, config):
"""Fill missing precursor info from spectrum file if needed."""
# Check if required
# TODO: avoid hard coding feature generators in some way
rt_required = ("deeplc" in config["feature_generators"]) and any(
v is None or v == 0 or np.isnan(v) for v in psm_list["retention_time"]
)
im_required = (
"ionmob" in config["feature_generators"] or "im2deep" in config["feature_generators"]
) and any(v is None or v == 0 or np.isnan(v) for v in psm_list["ion_mobility"])
logger.debug(f"RT required: {rt_required}, IM required: {im_required}")

# Add missing values
if rt_required or im_required:
logger.info("Parsing missing retention time and/or ion mobility values from spectra...")
get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required)

# Check if values are now present
for value_name in ["retention_time", "ion_mobility"]:
if (
0.0 in psm_list[value_name]
or None in psm_list[value_name]
or np.isnan(psm_list[value_name]).any()
):
if all(v is None or v == 0.0 or np.isnan(v) for v in psm_list[value_name]):
raise exceptions.MissingValuesError(
f"Could not find any '{value_name}' values in PSM or spectrum files. Disable "
f"feature generators that require '{value_name}' or ensure that the values are "
"present in the input files."
)
else:
missing_value_psms = psm_list[
[v is None or np.isnan(v) for v in psm_list[value_name]]
]
logger.warning(
f"Found {len(missing_value_psms)} PSMs with missing '{value_name}' values. "
"These PSMs will be removed."
)
psm_list = psm_list[
[v is not None and not np.isnan(v) for v in psm_list[value_name]]
]


def _write_feature_names(feature_names, output_file_root):
"""Write feature names to file."""
with open(output_file_root + ".feature_names.tsv", "w") as f:
Expand Down
6 changes: 6 additions & 0 deletions ms2rescore/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ class ModificationParsingError(IDFileParsingError):
pass


class MissingValuesError(MS2RescoreError):
"""Missing values in PSMs and/or spectra."""

pass


class ReportGenerationError(MS2RescoreError):
"""Error while generating report."""

Expand Down
25 changes: 25 additions & 0 deletions ms2rescore/package_data/config_default_tims.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"$schema": "./config_schema.json",
"ms2rescore": {
"feature_generators": {
"basic": {},
"ms2pip": {
"model": "timsTOF",
"ms2_tolerance": 0.02
},
"deeplc": {
"deeplc_retrain": false
},
"im2deep": {},
"maxquant": {}
},
"rescoring_engine": {
"mokapot": {
"write_weights": true,
"write_txt": true,
"write_flashlfq": true
}
},
"psm_file": null
}
}
1 change: 0 additions & 1 deletion ms2rescore/parse_spectra.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

from ms2rescore_rs import get_precursor_info
from psm_utils import PSMList
from rich.progress import track

from ms2rescore.exceptions import MS2RescoreError
from ms2rescore.utils import infer_spectrum_path
Expand Down
71 changes: 43 additions & 28 deletions ms2rescore/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,46 +35,61 @@ def infer_spectrum_path(
"and no run name in PSM file found."
)

# If passed path is directory, join with run name
elif os.path.isdir(configured_path) and not configured_path.endswith(".d"):
if run_name:
resolved_path = os.path.join(configured_path, run_name)
else:
is_bruker_dir = configured_path.endswith(".d") or _is_minitdf(configured_path)

# If passed path is directory (that is not Bruker raw), join with run name
if os.path.isdir(configured_path) and not is_bruker_dir:
if run_name:
resolved_path = os.path.join(configured_path, run_name)
else:
raise MS2RescoreConfigurationError(
"Could not resolve spectrum file name: Spectrum path is directory "
"but no run name in PSM file found."
)

# If passed path is file, use that, but warn if basename doesn't match expected
elif os.path.isfile(configured_path) or (os.path.isdir(configured_path) and is_bruker_dir):
if run_name and Path(configured_path).stem != Path(run_name).stem:
logger.warning(
"Passed spectrum path (`%s`) does not match run name found in PSM "
"file (`%s`). Continuing with passed spectrum path.",
configured_path,
run_name,
)
resolved_path = configured_path
else:
raise MS2RescoreConfigurationError(
"Could not resolve spectrum file name: Spectrum path is directory "
"but no run name in PSM file found."
)

# If passed path is file, use that, but warn if basename doesn't match expected
elif os.path.isfile(configured_path) or (
os.path.isdir(configured_path) and configured_path.endswith(".d")
):
if run_name and Path(configured_path).stem != Path(run_name).stem:
logger.warning(
"Passed spectrum path (`%s`) does not match run name found in PSM "
"file (`%s`). Continuing with passed spectrum path.",
configured_path,
run_name,
"Configured `spectrum_path` must be `None` or a path to an existing file "
"or directory. If `None` or path to directory, spectrum run information "
"should be present in the PSM file."
)
resolved_path = configured_path
else:
raise MS2RescoreConfigurationError(
"Configured `spectrum_path` must be `None` or a path to an existing file "
"or directory. If `None` or path to directory, spectrum run information "
"should be present in the PSM file."
)

# Match with file extension if not in resolved_path yet
if not re.match(r"\.mgf$|\.mzml$|\.d$", resolved_path, flags=re.IGNORECASE):
if not _is_minitdf(resolved_path) and not re.match(
r"\.mgf$|\.mzml$|\.d$", resolved_path, flags=re.IGNORECASE
):
for filename in glob(resolved_path + "*"):
if re.match(r".*(\.mgf$|\.mzml$|\.d)", filename, flags=re.IGNORECASE):
resolved_path = filename
break
else:
raise MS2RescoreConfigurationError(
"Resolved spectrum filename does not contain a supported file "
"extension (mzML, MGF, or .d) and could not find any matching existing "
f"Resolved spectrum filename ('{resolved_path}') does not contain a supported "
"file extension (mzML, MGF, or .d) and could not find any matching existing "
"files."
)

return Path(resolved_path)


def _is_minitdf(spectrum_file: str) -> bool:
"""
Check if the spectrum file is a Bruker miniTDF folder.
A Bruker miniTDF folder has no fixed name, but contains files matching the patterns
``*ms2spectrum.bin`` and ``*ms2spectrum.parquet``.
"""
files = set(Path(spectrum_file).glob("*ms2spectrum.bin"))
files.update(Path(spectrum_file).glob("*ms2spectrum.parquet"))
return len(files) >= 2
31 changes: 16 additions & 15 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,26 +32,26 @@ classifiers = [
dynamic = ["version"]
requires-python = ">=3.8"
dependencies = [
"ms2rescore_rs",
"numpy>=1.16.0; python_version != '3.11'",
"numpy==1.24.3; python_version == '3.11'", # Incompatibility with sklearn, pygam, and TF...
"pandas>=1.0",
"rich>=12",
"pyteomics>=4.1.0, <4.7",
"lxml>=4.5",
"ms2pip>=4.0.0-dev5",
"click>=7",
"cascade-config>=0.4.0",
"click>=7",
"customtkinter>=5,<6",
"deeplc>=2.2",
"deeplcretrainer>=0.2",
"tomli>=2; python_version < '3.11'",
"psm_utils>=0.4",
"customtkinter>=5,<6",
"mokapot>=0.9",
"pydantic>=1.8.2,<2", # Fix compatibility with v2 in psm_utils
"im2deep>=0.1.3",
"jinja2>=3",
"lxml>=4.5",
"mokapot>=0.9",
"ms2pip>=4.0.0-dev10",
"ms2rescore_rs",
"numpy==1.24.3; python_version == '3.11'", # Incompatibility with sklearn, pygam, and TF...
"numpy>=1.16.0; python_version != '3.11'",
"pandas>=1.0",
"plotly>=5",
"im2deep>=0.1.3",
"psm_utils>=0.8",
"pydantic>=1.8.2,<2", # Fix compatibility with v2 in psm_utils
"pyteomics>=4.1.0, <4.7",
"rich>=12",
"tomli>=2; python_version < '3.11'",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -80,6 +80,7 @@ CompOmics = "https://www.compomics.com"
ms2rescore = "ms2rescore.__main__:main"
ms2rescore-gui = "ms2rescore.gui.__main__:main"
ms2rescore-report = "ms2rescore.report.__main__:main"
tims2rescore = "ms2rescore.__main__:main_tims"

[build-system]
requires = ["flit_core >=3.2,<4"]
Expand Down

0 comments on commit 5c821c9

Please sign in to comment.