Skip to content

Commit

Permalink
changes stdout rewriting to devnull
Browse files Browse the repository at this point in the history
  • Loading branch information
ArthurDeclercq committed Oct 11, 2023
1 parent 5db71e2 commit 1249cdf
Show file tree
Hide file tree
Showing 3 changed files with 105 additions and 108 deletions.
105 changes: 51 additions & 54 deletions ms2rescore/feature_generators/deeplc.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,63 +145,60 @@ def add_features(self, psm_list: PSMList) -> None:
logger.info(
f"Running DeepLC for PSMs from run ({current_run}/{total_runs}): `{run}`..."
)
# Prepare PSM file
with contextlib.redirect_stdout(
open(os.devnull, "w")
) if not self._verbose else contextlib.nullcontext():
psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))

psm_list_calibration = self._get_calibration_psms(psm_list_run)

logger.debug("Calibrating DeepLC")
self.deeplc_predictor = self.DeepLC(
n_jobs=self.processes,
verbose=self._verbose,
path_model=self.selected_model or self.user_model,
**self.deeplc_kwargs,

psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))

psm_list_calibration = self._get_calibration_psms(psm_list_run)

logger.debug("Calibrating DeepLC")
self.deeplc_predictor = self.DeepLC(
n_jobs=self.processes,
verbose=self._verbose,
path_model=self.selected_model or self.user_model,
**self.deeplc_kwargs,
)
self.deeplc_predictor.calibrate_preds(
seq_df=self._psm_list_to_deeplc_peprec(psm_list_calibration)
)
# Still calibrate for each run, but do not try out all model options.
# Just use model that was selected based on first run
if not self.selected_model:
self.selected_model = list(self.deeplc_predictor.model.keys())
self.deeplc_kwargs["deeplc_retrain"] = False
logger.debug(
f"Selected DeepLC model {self.selected_model} based on "
"calibration of first run. Using this model (after new "
"calibrations) for the remaining runs."
)

predictions = np.array(
self.deeplc_predictor.make_preds(
seq_df=self._psm_list_to_deeplc_peprec(psm_list_run)
)
self.deeplc_predictor.calibrate_preds(
seq_df=self._psm_list_to_deeplc_peprec(psm_list_calibration)
)
observations = psm_list_run["retention_time"]
rt_diffs_run = np.abs(predictions - observations)

for i, psm in enumerate(psm_list_run):
psm["rescoring_features"].update(
{
"observed_retention_time": observations[i],
"predicted_retention_time": predictions[i],
"rt_diff": rt_diffs_run[i],
}
)
# Still calibrate for each run, but do not try out all model options.
# Just use model that was selected based on first run
if not self.selected_model:
self.selected_model = list(self.deeplc_predictor.model.keys())
self.deeplc_kwargs["deeplc_retrain"] = False
logger.debug(
f"Selected DeepLC model {self.selected_model} based on "
"calibration of first run. Using this model (after new "
"calibrations) for the remaining runs."
)

predictions = np.array(
self.deeplc_predictor.make_preds(
seq_df=self._psm_list_to_deeplc_peprec(psm_list_run)
)
peptide = psm.peptidoform.proforma.split("\\")[0] # remove charge
if peptide_rt_diff_dict[peptide]["rt_diff_best"] > rt_diffs_run[i]:
peptide_rt_diff_dict[peptide] = {
"observed_retention_time_best": observations[i],
"predicted_retention_time_best": predictions[i],
"rt_diff_best": rt_diffs_run[i],
}
for psm in psm_list_run:
psm["rescoring_features"].update(
peptide_rt_diff_dict[psm.peptidoform.proforma.split("\\")[0]]
)
observations = psm_list_run["retention_time"]
rt_diffs_run = np.abs(predictions - observations)

for i, psm in enumerate(psm_list_run):
psm["rescoring_features"].update(
{
"observed_retention_time": observations[i],
"predicted_retention_time": predictions[i],
"rt_diff": rt_diffs_run[i],
}
)
peptide = psm.peptidoform.proforma.split("\\")[0] # remove charge
if peptide_rt_diff_dict[peptide]["rt_diff_best"] > rt_diffs_run[i]:
peptide_rt_diff_dict[peptide] = {
"observed_retention_time_best": observations[i],
"predicted_retention_time_best": predictions[i],
"rt_diff_best": rt_diffs_run[i],
}
for psm in psm_list_run:
psm["rescoring_features"].update(
peptide_rt_diff_dict[psm.peptidoform.proforma.split("\\")[0]]
)
current_run += 1
current_run += 1

# TODO: Remove when DeepLC supports PSMList directly
@staticmethod
Expand Down
103 changes: 50 additions & 53 deletions ms2rescore/feature_generators/ionmob.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,64 +129,61 @@ def add_features(self, psm_list: PSMList) -> None:
logger.info(
f"Running Ionmob for PSMs from run ({current_run}/{total_runs}): `{run}`..."
)
with contextlib.redirect_stdout(
open(os.devnull, "w")
) if not self._verbose else contextlib.nullcontext():
psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))
psm_list_run_df = psm_list_run.to_dataframe()

# prepare data frames for CCS prediction
psm_list_run_df["charge"] = [
peptidoform.precursor_charge
for peptidoform in psm_list_run_df["peptidoform"]
]
psm_list_run_df = psm_list_run_df[
psm_list_run_df["charge"] < 5
] # predictions do not go higher for ionmob

psm_list_run_df["sequence-tokenized"] = psm_list_run_df.apply(
lambda x: self.tokenize_peptidoform(x["peptidoform"]), axis=1
)
psm_list_run_df = psm_list_run_df[
psm_list_run_df.apply(
lambda x: self._is_valid_tokenized_sequence(x["sequence-tokenized"]),
axis=1,
)
]

psm_list_run_df["mz"] = psm_list_run_df.apply(
lambda x: calculate_mz(x["sequence-tokenized"], x["charge"]), axis=1
) # use precursor m/z from PSMs?

psm_list_run_df["ccs_observed"] = psm_list_run_df.apply(
lambda x: reduced_mobility_to_ccs(x["ion_mobility"], x["mz"], x["charge"]),

psm_list_run = PSMList(psm_list=list(chain.from_iterable(psms.values())))
psm_list_run_df = psm_list_run.to_dataframe()

# prepare data frames for CCS prediction
psm_list_run_df["charge"] = [
peptidoform.precursor_charge for peptidoform in psm_list_run_df["peptidoform"]
]
psm_list_run_df = psm_list_run_df[
psm_list_run_df["charge"] < 5
] # predictions do not go higher for ionmob

psm_list_run_df["sequence-tokenized"] = psm_list_run_df.apply(
lambda x: self.tokenize_peptidoform(x["peptidoform"]), axis=1
)
psm_list_run_df = psm_list_run_df[
psm_list_run_df.apply(
lambda x: self._is_valid_tokenized_sequence(x["sequence-tokenized"]),
axis=1,
)
# calibrate CCS values
shift_factor = self.calculate_ccs_shift(psm_list_run_df)
psm_list_run_df["ccs_observed"] = psm_list_run_df.apply(
lambda x: x["ccs_observed"] + shift_factor, axis=1
)
# predict CCS values
tf_ds = to_tf_dataset_inference(
psm_list_run_df["mz"],
psm_list_run_df["charge"],
psm_list_run_df["sequence-tokenized"],
self.tokenizer,
)
]

psm_list_run_df["mz"] = psm_list_run_df.apply(
lambda x: calculate_mz(x["sequence-tokenized"], x["charge"]), axis=1
) # use precursor m/z from PSMs?

psm_list_run_df["ccs_observed"] = psm_list_run_df.apply(
lambda x: reduced_mobility_to_ccs(x["ion_mobility"], x["mz"], x["charge"]),
axis=1,
)
# calibrate CCS values
shift_factor = self.calculate_ccs_shift(psm_list_run_df)
psm_list_run_df["ccs_observed"] = psm_list_run_df.apply(
lambda x: x["ccs_observed"] + shift_factor, axis=1
)
# predict CCS values
tf_ds = to_tf_dataset_inference(
psm_list_run_df["mz"],
psm_list_run_df["charge"],
psm_list_run_df["sequence-tokenized"],
self.tokenizer,
)

psm_list_run_df["ccs_predicted"], _ = self.ionmob_model.predict(tf_ds)
psm_list_run_df["ccs_predicted"], _ = self.ionmob_model.predict(tf_ds)

# calculate CCS features
ccs_features = self._calculate_features(psm_list_run_df)
# calculate CCS features
ccs_features = self._calculate_features(psm_list_run_df)

# add CCS features to PSMs
for psm in psm_list_run:
try:
psm["rescoring_features"].update(ccs_features[psm.spectrum_id])
except KeyError:
psm["rescoring_features"].update({})
current_run += 1
# add CCS features to PSMs
for psm in psm_list_run:
try:
psm["rescoring_features"].update(ccs_features[psm.spectrum_id])
except KeyError:
psm["rescoring_features"].update({})
current_run += 1

def _calculate_features(self, feature_df: pd.DataFrame) -> Dict[str, Dict[str, float]]:
"""Get CCS features for PSMs."""
Expand Down
5 changes: 4 additions & 1 deletion ms2rescore/gui/__main__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
"""Entrypoint for MS²Rescore GUI."""

import multiprocessing
import os
import contextlib

from ms2rescore.gui.app import app


def main():
"""Entrypoint for MS²Rescore GUI."""
multiprocessing.freeze_support()
app()
with contextlib.redirect_stdout(open(os.devnull, "w")):
app()


if __name__ == "__main__":
Expand Down

0 comments on commit 1249cdf

Please sign in to comment.