Skip to content

Commit

Permalink
Explicitly pass scores to 'assign_confidence' to avoid 'no psms under…
Browse files Browse the repository at this point in the history
… eval_fdr' error when calculating q-values and PEPs.
  • Loading branch information
RalfG committed Jul 9, 2024
1 parent 7744391 commit e3f723d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 24 deletions.
5 changes: 2 additions & 3 deletions ms2rescore/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,15 +297,14 @@ def _calculate_confidence(psm_list: PSMList) -> PSMList:
)
psm_df["is_target"] = ~psm_df["is_decoy"]
lin_psm_data = LinearPsmDataset(
psms=psm_df[["index", "peptide", "score", "is_target"]],
psms=psm_df[["index", "peptide", "is_target"]],
target_column="is_target",
spectrum_columns="index", # Use artificial index to allow multi-rank rescoring
peptide_column="peptide",
feature_columns=["score"],
)

# Recalculate confidence
new_confidence = lin_psm_data.assign_confidence()
new_confidence = lin_psm_data.assign_confidence(scores=psm_list["score"])

# Add new confidence estimations to PSMList
add_psm_confidence(psm_list, new_confidence)
Expand Down
32 changes: 11 additions & 21 deletions ms2rescore/report/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,35 +51,25 @@ def get_confidence_estimates(
"was generated by MS²Rescore. Could not generate report."
) from e

score_after = psm_list["score"]
peptide = (
pd.Series(psm_list["peptidoform"]).astype(str).str.replace(r"(/\d+$)", "", n=1, regex=True)
)
psms = pd.DataFrame(
{
"peptide": peptide,
"is_target": ~psm_list["is_decoy"],
"before": score_before,
"after": psm_list["score"],
}
).reset_index()

psms = pd.DataFrame({"peptide": peptide, "is_target": ~psm_list["is_decoy"]}).reset_index()
lin_psm_dataset = LinearPsmDataset(
psms=psms,
target_column="is_target",
spectrum_columns="index",
peptide_column="peptide",
)
if fasta_file:
fasta = read_fasta(fasta_file)
lin_psm_dataset.add_proteins(fasta)

confidence = dict()
for when in ["before", "after"]:
lin_psm_dataset = LinearPsmDataset(
psms=psms,
target_column="is_target",
spectrum_columns="index",
feature_columns=[when],
peptide_column="peptide",
)
if fasta_file:
lin_psm_dataset.add_proteins(fasta)

for when, scores in [("before", score_before), ("after", score_after)]:
try:
confidence[when] = lin_psm_dataset.assign_confidence()
confidence[when] = lin_psm_dataset.assign_confidence(scores=scores)
except RuntimeError:
confidence[when] = None

Expand Down

0 comments on commit e3f723d

Please sign in to comment.