Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for MS²Rescore report #144

Merged
merged 13 commits into from
Apr 10, 2024
Merged
6 changes: 5 additions & 1 deletion ms2rescore/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,11 @@ def _fill_missing_precursor_info(psm_list, config):
get_missing_values(psm_list, config, rt_required=rt_required, im_required=im_required)

# Check if values are now present
for value_name in ["retention_time", "ion_mobility"]:
for value_name, required in zip(
["retention_time", "ion_mobility"], [rt_required, im_required]
):
ArthurDeclercq marked this conversation as resolved.
Show resolved Hide resolved
if not required:
continue
if (
0.0 in psm_list[value_name]
or None in psm_list[value_name]
ArthurDeclercq marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
4 changes: 2 additions & 2 deletions ms2rescore/report/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ def identification_overlap(
return figure

levels = before.levels # ["psms", "peptides", "proteins"] if all available
indexers = ["index", "index", "mokapot protein group"]
indexers = ["index", "peptide", "mokapot protein group"]

overlap_data = defaultdict(dict)
for level, indexer in zip(levels, indexers):
Expand All @@ -386,7 +386,7 @@ def identification_overlap(
set_after = set(df_after[df_after["mokapot q-value"] <= 0.01][indexer])

overlap_data["removed"][level] = -len(set_before - set_after)
overlap_data["retained"][level] = len(set_before | set_after)
overlap_data["retained"][level] = len(set_after.intersection(set_before))
overlap_data["gained"][level] = len(set_after - set_before)

colors = ["#953331", "#316395", "#319545"]
Expand Down
3 changes: 2 additions & 1 deletion ms2rescore/report/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,13 @@ def _get_stats_context(confidence_before, confidence_after):
if not before or not after:
continue
increase = (after - before) / before * 100
diff = after - before
ArthurDeclercq marked this conversation as resolved.
Show resolved Hide resolved
stats.append(
{
"item": level_name,
"card_color": card_color,
"number": after,
"diff": f"{after - before:+}",
"diff": f"({diff:+})",
ArthurDeclercq marked this conversation as resolved.
Show resolved Hide resolved
"percentage": f"{increase:.1f}%",
"is_increase": increase > 0,
"bar_percentage": before / after * 100 if increase > 0 else after / before * 100,
Expand Down
3 changes: 2 additions & 1 deletion ms2rescore/rescoring_engines/mokapot.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from pyteomics.mass import nist_mass

logger = logging.getLogger(__name__)
logging.getLogger("numba").setLevel(logging.WARNING)


def rescore(
Expand Down Expand Up @@ -89,7 +90,7 @@ def rescore(

# Rescore
logger.debug(f"Mokapot brew options: `{kwargs}`")
confidence_results, models = brew(lin_psm_data, **kwargs)
confidence_results, models = brew(lin_psm_data, rng=8, **kwargs)

# Reshape confidence estimates to match PSMList
mokapot_values_targets = (
Expand Down
22 changes: 17 additions & 5 deletions ms2rescore/rescoring_engines/percolator.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
import logging
import subprocess
from typing import Any, Dict, Optional
from copy import deepcopy

import numpy as np
import psm_utils

from ms2rescore.exceptions import MS2RescoreError
Expand Down Expand Up @@ -103,8 +103,15 @@ def rescore(
# Need to be able to link back to original PSMs, so reindex spectrum IDs, but copy PSM list
# to avoid modifying original...
# TODO: Better approach for this?
psm_list_reindexed = psm_list.copy()
psm_list_reindexed["spectrum_id"] = np.arange(len(psm_list_reindexed))

psm_list_reindexed = deepcopy(psm_list)
psm_list_reindexed.set_ranks()
psm_list_reindexed["spectrum_id"] = [
f"{psm.get_usi(as_url=False)}_{psm.rank}" for psm in psm_list_reindexed
]
spectrum_id_index = {
spectrum_id: index for index, spectrum_id in enumerate(psm_list_reindexed["spectrum_id"])
}

_write_pin_file(psm_list_reindexed, pin_filepath)

Expand Down Expand Up @@ -134,10 +141,13 @@ def rescore(
psm_list,
percolator_kwargs["results-psms"],
percolator_kwargs["decoy-results-psms"],
spectrum_id_index,
)


def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str):
def _update_psm_scores(
psm_list: psm_utils.PSMList, target_pout: str, decoy_pout: str, spectrum_id_index: list
):
"""
Update PSM scores with Percolator results.

Expand All @@ -150,7 +160,9 @@ def _update_psm_scores(psm_list: psm_utils.PSMList, target_pout: str, decoy_pout
psm_list_percolator = psm_utils.PSMList(psm_list=target_psms.psm_list + decoy_psms.psm_list)

# Sort by reindexed spectrum_id so order matches original PSM list
psm_list_percolator[np.argsort(psm_list_percolator["spectrum_id"])]
psm_list_percolator = sorted(
psm_list_percolator, key=lambda psm: spectrum_id_index[psm["spectrum_id"]]
)

if not len(psm_list) == len(psm_list_percolator):
raise MS2RescoreError(
Expand Down
Loading