From bc6c314a076f226c906950df2d3e3fb975e7c0f0 Mon Sep 17 00:00:00 2001
From: RalfG <ralfg@hotmail.be>
Date: Mon, 8 Jul 2024 15:19:23 +0200
Subject: [PATCH] Add options `max_psm_rank_input` and `max_psm_rank_output` to
 control multi-rank rescoring.

---
 docs/source/config_schema.md                |   2 +
 docs/source/userguide/configuration.rst     |  30 +++++
 ms2rescore/core.py                          | 106 +++++++++++------
 ms2rescore/package_data/config_default.json |   2 +
 ms2rescore/package_data/config_schema.json  |  12 ++
 ms2rescore/parse_psms.py                    | 125 ++++++++++----------
 ms2rescore/report/charts.py                 |  36 +++---
 ms2rescore/rescoring_engines/mokapot.py     |   5 +-
 ms2rescore/rescoring_engines/percolator.py  |   2 +
 9 files changed, 201 insertions(+), 119 deletions(-)

diff --git a/docs/source/config_schema.md b/docs/source/config_schema.md
index ad6938c..209bc00 100644
--- a/docs/source/config_schema.md
+++ b/docs/source/config_schema.md
@@ -57,6 +57,8 @@
       - *string*
       - *null*
   - **`lower_score_is_better`** *(boolean)*: Bool indicating if lower score is better. Default: `false`.
+  - **`max_psm_rank_input`** *(number)*: Maximum rank of PSMs to use as input for rescoring. Minimum: `1`. Default: `10`.
+  - **`max_psm_rank_output`** *(number)*: Maximum rank of PSMs to return after rescoring, before final FDR calculation. Minimum: `1`. Default: `1`.
   - **`modification_mapping`** *(object)*: Mapping of modification labels to each replacement label. Default: `{}`.
   - **`fixed_modifications`** *(object)*: Mapping of amino acids with fixed modifications to the modification name. Can contain additional properties. Default: `{}`.
   - **`processes`** *(number)*: Number of parallel processes to use; -1 for all available. Minimum: `-1`. Default: `-1`.
diff --git a/docs/source/userguide/configuration.rst b/docs/source/userguide/configuration.rst
index 8b68ea4..3936d41 100644
--- a/docs/source/userguide/configuration.rst
+++ b/docs/source/userguide/configuration.rst
@@ -240,6 +240,36 @@ expression pattern that extracts the decoy status from the protein name:
       decoy_pattern = "DECOY_"
 
 
+Multi-rank rescoring
+====================
+
+Some search engines, such as MaxQuant, report multiple candidate PSMs for the same spectrum.
+MS²Rescore can rescore multiple candidate PSMs per spectrum. This allows for lower-ranking
+candidate PSMs to become the top-ranked PSM after rescoring. This behavior can be controlled with
+the ``max_psm_rank_input`` option.
+
+To ensure a correct FDR control after rescoring, MS²Rescore filters out lower-ranking PSMs before
+final FDR calculation and writing the output files. To allow for lower-ranking PSMs to be included
+in the final output - for instance, to consider chimeric spectra - the ``max_psm_rank_output``
+option can be used.
+
+For example, to rescore the top 5 PSMs per spectrum and output the best PSM after rescoring,
+the following configuration can be used:
+
+.. tab:: JSON
+
+  .. code-block:: json
+
+    "max_psm_rank_input": 5
+    "max_psm_rank_output": 1
+
+.. tab:: TOML
+
+  .. code-block:: toml
+
+    max_psm_rank_input = 5
+    max_psm_rank_output = 1
+
 
 All configuration options
 =========================
diff --git a/ms2rescore/core.py b/ms2rescore/core.py
index 176fa57..3f4d9cb 100644
--- a/ms2rescore/core.py
+++ b/ms2rescore/core.py
@@ -47,7 +47,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
     psm_list = parse_psms(config, psm_list)
 
     # Log #PSMs identified before rescoring
-    id_psms_before = _log_id_psms_before(psm_list)
+    id_psms_before = _log_id_psms_before(psm_list, max_rank=config["max_psm_rank_output"])
 
     # Define feature names; get existing feature names from PSM file
     feature_names = dict()
@@ -62,7 +62,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
     )
 
     # Add missing precursor info from spectrum file if needed
-    _fill_missing_precursor_info(psm_list, config)
+    psm_list = _fill_missing_precursor_info(psm_list, config)
 
     # Add rescoring features
     for fgen_name, fgen_config in config["feature_generators"].items():
@@ -145,22 +145,26 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
                 **config["rescoring_engine"]["mokapot"],
             )
     except exceptions.RescoringError as e:
-        logger.exception(e)
-        rescoring_succeeded = False
-    else:
-        rescoring_succeeded = True
-        _log_id_psms_after(psm_list, id_psms_before)
+        # Write output
+        logger.info(f"Writing intermediary output to {output_file_root}.psms.tsv...")
+        psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")
+
+        # Reraise exception
+        raise e
 
-    # Workaround for broken PEP calculation if best PSM is decoy
+    # Post-rescoring processing
     if all(psm_list["pep"] == 1.0):
         psm_list = _fix_constant_pep(psm_list)
+    psm_list = _filter_by_rank(psm_list, config["max_psm_rank_output"], False)
+    psm_list = _calculate_confidence(psm_list)
+    _ = _log_id_psms_after(psm_list, id_psms_before, max_rank=config["max_psm_rank_output"])
 
     # Write output
     logger.info(f"Writing output to {output_file_root}.psms.tsv...")
     psm_utils.io.write_file(psm_list, output_file_root + ".psms.tsv", filetype="tsv")
 
     # Write report
-    if config["write_report"] and rescoring_succeeded:
+    if config["write_report"]:
         try:
             generate.generate_report(
                 output_file_root, psm_list=psm_list, feature_names=feature_names, use_txt_log=True
@@ -169,7 +173,7 @@ def rescore(configuration: Dict, psm_list: Optional[PSMList] = None) -> None:
             logger.exception(e)
 
 
-def _fill_missing_precursor_info(psm_list, config):
+def _fill_missing_precursor_info(psm_list: PSMList, config: Dict) -> PSMList:
     """Fill missing precursor info from spectrum file if needed."""
     # Check if required
     # TODO: avoid hard coding feature generators in some way
@@ -211,6 +215,16 @@ def _fill_missing_precursor_info(psm_list, config):
                     [v is not None and not np.isnan(v) for v in psm_list[value_name]]
                 ]
 
+    return psm_list
+
+
+def _filter_by_rank(psm_list: PSMList, max_rank: int, lower_score_better: bool) -> PSMList:
+    """Filter PSMs by rank."""
+    psm_list.set_ranks(lower_score_better=lower_score_better)
+    rank_filter = psm_list["rank"] <= max_rank
+    logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {max_rank}.")
+    return psm_list[rank_filter]
+
 
 def _write_feature_names(feature_names, output_file_root):
     """Write feature names to file."""
@@ -221,31 +235,39 @@ def _write_feature_names(feature_names, output_file_root):
                 f.write(f"{fgen}\t{feature}\n")
 
 
-def _log_id_psms_before(psm_list):
+def _log_id_psms_before(psm_list: PSMList, fdr: float = 0.01, max_rank: int = 1) -> int:
     """Log #PSMs identified before rescoring."""
     id_psms_before = (
-        (psm_list["qvalue"] <= 0.01) & (psm_list["is_decoy"] == False)  # noqa: E712
+        (psm_list["qvalue"] <= 0.01) & (psm_list["rank"] <= max_rank) & (~psm_list["is_decoy"])
     ).sum()
-    logger.info("Found %i identified PSMs at 1%% FDR before rescoring.", id_psms_before)
+    logger.info(
+        f"Found {id_psms_before} identified PSMs with rank <= {max_rank} at {fdr} FDR before "
+        "rescoring."
+    )
     return id_psms_before
 
 
-def _log_id_psms_after(psm_list, id_psms_before):
+def _log_id_psms_after(
+    psm_list: PSMList, id_psms_before: int, fdr: float = 0.01, max_rank: int = 1
+) -> int:
     """Log #PSMs identified after rescoring."""
     id_psms_after = (
-        (psm_list["qvalue"] <= 0.01) & (psm_list["is_decoy"] == False)  # noqa: E712
+        (psm_list["qvalue"] <= 0.01) & (psm_list["rank"] <= max_rank) & (~psm_list["is_decoy"])
     ).sum()
     diff = id_psms_after - id_psms_before
     diff_perc = diff / id_psms_before if id_psms_before > 0 else None
 
     diff_numbers = f"{diff} ({diff_perc:.2%})" if diff_perc is not None else str(diff)
     diff_word = "more" if diff > 0 else "less"
-    logger.info(f"Identified {diff_numbers} {diff_word} PSMs at 1% FDR after rescoring.")
+    logger.info(
+        f"Identified {diff_numbers} {diff_word} PSMs with rank <= {max_rank} at {fdr} FDR after "
+        "rescoring."
+    )
 
     return id_psms_after
 
 
-def _fix_constant_pep(psm_list):
+def _fix_constant_pep(psm_list: PSMList) -> PSMList:
     """Workaround for broken PEP calculation if best PSM is decoy."""
     logger.warning(
         "Attempting to fix constant PEP values by removing decoy PSMs that score higher than the "
@@ -257,30 +279,36 @@ def _fix_constant_pep(psm_list):
     if not higher_scoring_decoys.any():
         logger.warning("No decoys scoring higher than the best target found. Skipping fix.")
     else:
-        logger.warning(f"Removing {higher_scoring_decoys.sum()} decoy PSMs.")
-
         psm_list = psm_list[~higher_scoring_decoys]
+        logger.warning(f"Removed {higher_scoring_decoys.sum()} decoy PSMs.")
 
-        # Minimal conversion to LinearPsmDataset
-        psm_df = psm_list.to_dataframe()
-        psm_df = psm_df.reset_index(drop=True).reset_index()
-        psm_df["peptide"] = (
-            psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
-        )
-        psm_df["is_target"] = ~psm_df["is_decoy"]
-        lin_psm_data = LinearPsmDataset(
-            psms=psm_df[["index", "peptide", "score", "is_target"]],
-            target_column="is_target",
-            spectrum_columns="index",  # Use artificial index to allow multi-rank rescoring
-            peptide_column="peptide",
-            feature_columns=["score"],
-        )
+    return psm_list
+
+
+def _calculate_confidence(psm_list: PSMList) -> PSMList:
+    """
+    Calculate scores, q-values, and PEPs for PSMs and peptides and add them to PSMList.
+    """
+    # Minimal conversion to LinearPsmDataset
+    psm_df = psm_list.to_dataframe()
+    psm_df = psm_df.reset_index(drop=True).reset_index()
+    psm_df["peptide"] = (
+        psm_df["peptidoform"].astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
+    )
+    psm_df["is_target"] = ~psm_df["is_decoy"]
+    lin_psm_data = LinearPsmDataset(
+        psms=psm_df[["index", "peptide", "score", "is_target"]],
+        target_column="is_target",
+        spectrum_columns="index",  # Use artificial index to allow multi-rank rescoring
+        peptide_column="peptide",
+        feature_columns=["score"],
+    )
 
-        # Recalculate confidence
-        new_confidence = lin_psm_data.assign_confidence()
+    # Recalculate confidence
+    new_confidence = lin_psm_data.assign_confidence()
 
-        # Add new confidence estimations to PSMList
-        add_psm_confidence(psm_list, new_confidence)
-        add_peptide_confidence(psm_list, new_confidence)
+    # Add new confidence estimations to PSMList
+    add_psm_confidence(psm_list, new_confidence)
+    add_peptide_confidence(psm_list, new_confidence)
 
-        return psm_list
+    return psm_list
diff --git a/ms2rescore/package_data/config_default.json b/ms2rescore/package_data/config_default.json
index 765c1a0..7dd8894 100644
--- a/ms2rescore/package_data/config_default.json
+++ b/ms2rescore/package_data/config_default.json
@@ -33,6 +33,8 @@
         "psm_id_rt_pattern": null,
         "psm_id_im_pattern": null,
         "lower_score_is_better": false,
+        "max_psm_rank_input": 10,
+        "max_psm_rank_output": 1,
         "modification_mapping": {},
         "fixed_modifications": {},
         "processes": -1,
diff --git a/ms2rescore/package_data/config_schema.json b/ms2rescore/package_data/config_schema.json
index dfed7ca..ab77f3b 100644
--- a/ms2rescore/package_data/config_schema.json
+++ b/ms2rescore/package_data/config_schema.json
@@ -131,6 +131,18 @@
                     "type": "boolean",
                     "default": false
                 },
+                "max_psm_rank_input": {
+                    "description": "Maximum rank of PSMs to use as input for rescoring",
+                    "type": "number",
+                    "default": 10,
+                    "minimum": 1
+                },
+                "max_psm_rank_output": {
+                    "description": "Maximum rank of PSMs to return after rescoring, before final FDR calculation",
+                    "type": "number",
+                    "default": 1,
+                    "minimum": 1
+                },
                 "modification_mapping": {
                     "description": "Mapping of modification labels to each replacement label.",
                     "type": "object",
diff --git a/ms2rescore/parse_psms.py b/ms2rescore/parse_psms.py
index cc8440b..12edd0e 100644
--- a/ms2rescore/parse_psms.py
+++ b/ms2rescore/parse_psms.py
@@ -1,6 +1,6 @@
 import logging
 import re
-from typing import Dict, Union
+from typing import Dict, Optional, Union
 
 import numpy as np
 import psm_utils.io
@@ -24,14 +24,30 @@ def parse_psms(config: Dict, psm_list: Union[PSMList, None]) -> PSMList:
         PSMList object containing PSMs. If None, PSMs will be read from ``psm_file``.
 
     """
-    # Read PSMs, find decoys, calculate q-values
-    psm_list = _read_psms(config, psm_list)
+    # Read PSMs
+    try:
+        psm_list = _read_psms(config, psm_list)
+    except psm_utils.io.PSMUtilsIOException:
+        raise MS2RescoreConfigurationError(
+            "Error occurred while reading PSMs. Please check the 'psm_file' and "
+            "'psm_file_type' settings. See "
+            "https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
+            " for more information."
+        )
+
+    # Filter by PSM rank
+    psm_list.set_ranks(config["lower_score_is_better"])
+    rank_filter = psm_list["rank"] <= config["max_psm_rank_input"]
+    psm_list = psm_list[rank_filter]
+    logger.info(f"Removed {sum(~rank_filter)} PSMs with rank >= {config['max_psm_rank_input']}.")
+
+    # Remove invalid AAs, find decoys, calculate q-values
     psm_list = _remove_invalid_aa(psm_list)
-    _find_decoys(config, psm_list)
-    _calculate_qvalues(config, psm_list)
+    _find_decoys(psm_list, config["id_decoy_pattern"])
+    _calculate_qvalues(psm_list, config["lower_score_is_better"])
     if config["psm_id_rt_pattern"] or config["psm_id_im_pattern"]:
         logger.debug("Parsing retention time and/or ion mobility from PSM identifier...")
-        _parse_values_spectrum_id(config, psm_list)
+        _parse_values_from_spectrum_id(config, psm_list)
 
     # Store scoring values for comparison later
     for psm in psm_list:
@@ -79,39 +95,30 @@ def _read_psms(config, psm_list):
     if isinstance(psm_list, PSMList):
         return psm_list
     else:
-        logger.info("Reading PSMs from file...")
         total_files = len(config["psm_file"])
         psm_list = []
         for current_file, psm_file in enumerate(config["psm_file"]):
             logger.info(
                 f"Reading PSMs from PSM file ({current_file+1}/{total_files}): '{psm_file}'..."
             )
-            try:
-                psm_list.extend(
-                    psm_utils.io.read_file(
-                        psm_file,
-                        filetype=config["psm_file_type"],
-                        show_progressbar=True,
-                        **config["psm_reader_kwargs"],
-                    )
-                )
-            except psm_utils.io.PSMUtilsIOException:
-                raise MS2RescoreConfigurationError(
-                    "Error occurred while reading PSMs. Please check the 'psm_file' and "
-                    "'psm_file_type' settings. See "
-                    "https://ms2rescore.readthedocs.io/en/latest/userguide/input-files/"
-                    " for more information."
+            psm_list.extend(
+                psm_utils.io.read_file(
+                    psm_file,
+                    filetype=config["psm_file_type"],
+                    show_progressbar=True,
+                    **config["psm_reader_kwargs"],
                 )
+            )
             logger.debug(f"Read {len(psm_list)} PSMs from '{psm_file}'.")
 
         return PSMList(psm_list=psm_list)
 
 
-def _find_decoys(config, psm_list):
+def _find_decoys(psm_list: PSMList, id_decoy_pattern: Optional[str] = None):
     """Find decoys in PSMs, log amount, and raise error if none found."""
     logger.debug("Finding decoys...")
-    if config["id_decoy_pattern"]:
-        psm_list.find_decoys(config["id_decoy_pattern"])
+    if id_decoy_pattern:
+        psm_list.find_decoys(id_decoy_pattern)
 
     n_psms = len(psm_list)
     percent_decoys = sum(psm_list["is_decoy"]) / n_psms * 100
@@ -126,12 +133,12 @@ def _find_decoys(config, psm_list):
         )
 
 
-def _calculate_qvalues(config, psm_list):
+def _calculate_qvalues(psm_list: PSMList, lower_score_is_better: bool):
     """Calculate q-values for PSMs if not present."""
     # Calculate q-values if not present
     if None in psm_list["qvalue"]:
         logger.debug("Recalculating q-values...")
-        psm_list.calculate_qvalues(reverse=not config["lower_score_is_better"])
+        psm_list.calculate_qvalues(reverse=not lower_score_is_better)
 
 
 def _match_psm_ids(old_id, regex_pattern):
@@ -146,50 +153,38 @@ def _match_psm_ids(old_id, regex_pattern):
         )
 
 
-def _parse_values_spectrum_id(config, psm_list):
+def _parse_values_from_spectrum_id(
+    psm_list: PSMList,
+    psm_id_rt_pattern: Optional[str] = None,
+    psm_id_im_pattern: Optional[str] = None,
+):
     """Parse retention time and or ion mobility values from the spectrum_id."""
-
-    if config["psm_id_rt_pattern"]:
-        logger.debug(
-            "Parsing retention time from spectrum_id with regex pattern "
-            f"{config['psm_id_rt_pattern']}"
-        )
-        try:
-            rt_pattern = re.compile(config["psm_id_rt_pattern"])
-            psm_list["retention_time"] = [
-                float(rt_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
-            ]
-        except AttributeError:
-            raise MS2RescoreConfigurationError(
-                f"Could not parse retention time from spectrum_id with the "
-                f"{config['psm_id_rt_pattern']} regex pattern. "
-                f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n."
-                "Please make sure the retention time key is present in the spectrum_id "
-                "and the value is in a capturing group or disable the relevant feature generator."
-            )
-
-    if config["psm_id_im_pattern"]:
-        logger.debug(
-            "Parsing ion mobility from spectrum_id with regex pattern "
-            f"{config['psm_id_im_pattern']}"
-        )
-        try:
-            im_pattern = re.compile(config["psm_id_im_pattern"])
-            psm_list["ion_mobility"] = [
-                float(im_pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
-            ]
-        except AttributeError:
-            raise MS2RescoreConfigurationError(
-                f"Could not parse ion mobility from spectrum_id with the "
-                f"{config['psm_id_im_pattern']} regex pattern. "
-                "Please make sure the ion mobility key is present in the spectrum_id "
-                "and the value is in a capturing group or disable the relevant feature generator."
+    for pattern, label, key in zip(
+        [psm_id_rt_pattern, psm_id_im_pattern],
+        ["retention time", "ion mobility"],
+        ["retention_time", "ion_mobility"],
+    ):
+        if pattern:
+            logger.debug(
+                f"Parsing {label} from spectrum_id with regex pattern " f"{psm_id_rt_pattern}"
             )
+            try:
+                pattern = re.compile(pattern)
+                psm_list[key] = [
+                    float(pattern.search(psm.spectrum_id).group(1)) for psm in psm_list
+                ]
+            except AttributeError:
+                raise MS2RescoreConfigurationError(
+                    f"Could not parse {label} from spectrum_id with the "
+                    f"{pattern} regex pattern. "
+                    f"Example spectrum_id: '{psm_list[0].spectrum_id}'\n. "
+                    f"Please make sure the {label} key is present in the spectrum_id "
+                    "and the value is in a capturing group or disable the relevant feature generator."
+                )
 
 
 def _remove_invalid_aa(psm_list: PSMList) -> PSMList:
     """Remove PSMs with invalid amino acids."""
-    logger.debug("Removing PSMs with invalid amino acids...")
     invalid_psms = np.array(
         [any(aa in "BJOUXZ" for aa in psm.peptidoform.sequence) for psm in psm_list]
     )
diff --git a/ms2rescore/report/charts.py b/ms2rescore/report/charts.py
index 8313228..669bcc6 100644
--- a/ms2rescore/report/charts.py
+++ b/ms2rescore/report/charts.py
@@ -242,16 +242,22 @@ def score_scatter_plot(
     ce_psms = pd.concat([ce_psms_targets, ce_psms_decoys], axis=0)
 
     # Get score thresholds
-    score_threshold_before = (
-        ce_psms[ce_psms["mokapot q-value before"] <= fdr_threshold]
-        .sort_values("mokapot q-value before", ascending=False)["mokapot score before"]
-        .iloc[0]
-    )
-    score_threshold_after = (
-        ce_psms[ce_psms["mokapot q-value after"] <= fdr_threshold]
-        .sort_values("mokapot q-value after", ascending=False)["mokapot score after"]
-        .iloc[0]
-    )
+    try:
+        score_threshold_before = (
+            ce_psms[ce_psms["mokapot q-value before"] <= fdr_threshold]
+            .sort_values("mokapot q-value before", ascending=False)["mokapot score before"]
+            .iloc[0]
+        )
+    except IndexError:  # No PSMs below threshold
+        score_threshold_before = None
+    try:
+        score_threshold_after = (
+            ce_psms[ce_psms["mokapot q-value after"] <= fdr_threshold]
+            .sort_values("mokapot q-value after", ascending=False)["mokapot score after"]
+            .iloc[0]
+        )
+    except IndexError:  # No PSMs below threshold
+        score_threshold_after = None
 
     # Plot
     fig = px.scatter(
@@ -268,10 +274,12 @@ def score_scatter_plot(
         },
     )
     # draw FDR thresholds
-    fig.add_vline(x=score_threshold_before, line_dash="dash", row=1, col=1)
-    fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=1)
-    fig.add_vline(x=score_threshold_before, line_dash="dash", row=2, col=1)
-    fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=2)
+    if score_threshold_before:
+        fig.add_vline(x=score_threshold_before, line_dash="dash", row=1, col=1)
+        fig.add_vline(x=score_threshold_before, line_dash="dash", row=2, col=1)
+    if score_threshold_after:
+        fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=1)
+        fig.add_hline(y=score_threshold_after, line_dash="dash", row=1, col=2)
 
     return fig
 
diff --git a/ms2rescore/rescoring_engines/mokapot.py b/ms2rescore/rescoring_engines/mokapot.py
index 86dc20d..f02d877 100644
--- a/ms2rescore/rescoring_engines/mokapot.py
+++ b/ms2rescore/rescoring_engines/mokapot.py
@@ -223,7 +223,7 @@ def save_model_weights(
 def add_psm_confidence(
     psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
 ) -> None:
-    """Add Mokapot PSM-level confidence estimates to PSM list."""
+    """Add PSM-level confidence estimates to PSM list, updating score, qvalue, pep, and rank."""
     # Reshape confidence estimates to match PSMList
     keys = ["mokapot score", "mokapot q-value", "mokapot PEP"]
     mokapot_values_targets = (
@@ -241,6 +241,9 @@ def add_psm_confidence(
     psm_list["qvalue"] = q[:, 1]
     psm_list["pep"] = q[:, 2]
 
+    # Reset ranks to match new scores
+    psm_list.set_ranks(lower_score_better=False)
+
 
 def add_peptide_confidence(
     psm_list: psm_utils.PSMList, confidence_results: mokapot.confidence.Confidence
diff --git a/ms2rescore/rescoring_engines/percolator.py b/ms2rescore/rescoring_engines/percolator.py
index c6ea3d3..9952aed 100644
--- a/ms2rescore/rescoring_engines/percolator.py
+++ b/ms2rescore/rescoring_engines/percolator.py
@@ -175,6 +175,8 @@ def _update_psm_scores(
         original_psm["qvalue"] = new_psm["qvalue"]
         original_psm["pep"] = new_psm["pep"]
 
+    psm_list.set_ranks(lower_score_better=False)
+
 
 def _write_pin_file(psm_list: psm_utils.PSMList, filepath: str):
     """Write PIN file for rescoring."""