From 466e1b470a11c5b745040eb65eb3bf9ee3d0bdce Mon Sep 17 00:00:00 2001 From: RobbinBouwmeester Date: Thu, 10 Oct 2024 16:09:33 +0200 Subject: [PATCH] Fix calibration after TL --- CHANGELOG.md | 6 ++++++ deeplc/deeplc.py | 46 ++++++++++++++++++++++++++++++---------------- pyproject.toml | 2 +- 3 files changed, 37 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1268301..ad297c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [3.1.1] - 2024-10-10 + +### Changed + +- Revert to linear calibration after transfer learning only + ## [3.1.0] - 2024-08-31 ### Changed diff --git a/deeplc/deeplc.py b/deeplc/deeplc.py index bc69007..3ec5c13 100644 --- a/deeplc/deeplc.py +++ b/deeplc/deeplc.py @@ -610,7 +610,9 @@ def make_preds_core( try: X ret_preds = mod.predict( - [X, X_sum, X_global, X_hc], batch_size=self.batch_num_tf, verbose=int(self.verbose) + [X, X_sum, X_global, X_hc], + batch_size=self.batch_num_tf, + verbose=int(self.verbose), ).flatten() except UnboundLocalError: logger.debug("X is empty, skipping...") @@ -832,27 +834,39 @@ def calibrate_preds_func_pygam( # measured_tr = list(measured_tr) # Fit a SplineTransformer model - spline = SplineTransformer(degree=4, n_knots=int(len(measured_tr) / 100) + 5) - spline_model = make_pipeline(spline, LinearRegression()) - spline_model.fit(predicted_tr.reshape(-1, 1), measured_tr) + if self.deeplc_retrain: + spline = SplineTransformer(degree=2, n_knots=10) + linear_model = LinearRegression() + linear_model.fit(predicted_tr.reshape(-1, 1), measured_tr) - # Determine the top 10% of data on either end - n_top = int(len(predicted_tr) * 0.1) + linear_model_left = linear_model + spline_model = linear_model + linear_model_right = linear_model + else: + spline = SplineTransformer( + degree=4, n_knots=int(len(measured_tr) / 500) + 5 + ) + spline_model = make_pipeline(spline, LinearRegression()) + spline_model.fit(predicted_tr.reshape(-1, 1), measured_tr) - # Fit a linear model on the bottom 10% (left-side extrapolation) - X_left = predicted_tr[:n_top] - y_left = measured_tr[:n_top] - linear_model_left = LinearRegression() - linear_model_left.fit(X_left.reshape(-1, 1), y_left) + # Determine the top 10% of data on either end + n_top = int(len(predicted_tr) * 0.1) - # Fit a linear model on the top 10% (right-side extrapolation) - X_right = predicted_tr[-n_top:] - y_right = measured_tr[-n_top:] - linear_model_right = LinearRegression() - linear_model_right.fit(X_right.reshape(-1, 1), y_right) + # Fit a linear model on the bottom 10% (left-side extrapolation) + X_left = predicted_tr[:n_top] + y_left = measured_tr[:n_top] + linear_model_left = LinearRegression() + linear_model_left.fit(X_left.reshape(-1, 1), y_left) + + # Fit a linear model on the top 10% (right-side extrapolation) + X_right = predicted_tr[-n_top:] + y_right = measured_tr[-n_top:] + linear_model_right = LinearRegression() + linear_model_right.fit(X_right.reshape(-1, 1), y_right) calibrate_min = min(predicted_tr) calibrate_max = max(predicted_tr) + return ( calibrate_min, calibrate_max, diff --git a/pyproject.toml b/pyproject.toml index 1470691..f345bc8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "deeplc" -version = "3.1.0" +version = "3.1.1" description = "DeepLC: Retention time prediction for (modified) peptides using Deep Learning." readme = "README.md" license = { file = "LICENSE" }