From d4e370b1507516ed5fefc21919c38905e942ee25 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 15:26:02 -0800 Subject: [PATCH 01/14] Modify molecular formula search to use postgres database --- tests/test_molecular_formula_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_molecular_formula_search.py b/tests/test_molecular_formula_search.py index 4455c52d..ece5c3f3 100644 --- a/tests/test_molecular_formula_search.py +++ b/tests/test_molecular_formula_search.py @@ -21,7 +21,7 @@ def test_run_molecular_formula_search(): mass_spectrum_obj.settings.noise_threshold_absolute_abundance = 0 # Set the settings for the molecular search on the mass spectrum object - mass_spectrum_obj.molecular_search_settings.url_database = "" + mass_spectrum_obj.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@localhost:5432/molformula" mass_spectrum_obj.molecular_search_settings.error_method = "None" mass_spectrum_obj.molecular_search_settings.min_ppm_error = -5 mass_spectrum_obj.molecular_search_settings.max_ppm_error = 5 From 5ae16c7137569cda60105c89c42f804bfd1e6291 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 15:45:15 -0800 Subject: [PATCH 02/14] Update gitlab ci to use postgres, not localhost --- .gitlab-ci.yml | 2 +- tests/test_molecular_formula_search.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index de053273..f50eb523 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -10,7 +10,7 @@ variables: POSTGRES_USER: coremsdb POSTGRES_PASSWORD: coremsmolform POSTGRES_HOST_AUTH_METHOD: trust - COREMS_DATABASE_URL: postgresql://coremsdb:coremsmolform@localhost:5432/molformula + COREMS_DATABASE_URL: postgresql://coremsdb:coremsmolform@postgres:5432/molformula PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip" stages: diff --git a/tests/test_molecular_formula_search.py b/tests/test_molecular_formula_search.py index ece5c3f3..960b8a92 100644 --- a/tests/test_molecular_formula_search.py +++ b/tests/test_molecular_formula_search.py @@ -21,7 +21,7 @@ def test_run_molecular_formula_search(): mass_spectrum_obj.settings.noise_threshold_absolute_abundance = 0 # Set the settings for the molecular search on the mass spectrum object - mass_spectrum_obj.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@localhost:5432/molformula" + mass_spectrum_obj.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" mass_spectrum_obj.molecular_search_settings.error_method = "None" mass_spectrum_obj.molecular_search_settings.min_ppm_error = -5 mass_spectrum_obj.molecular_search_settings.max_ppm_error = 5 From 1aea3749043caef740255f0d6597be1f53210782 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 15:58:04 -0800 Subject: [PATCH 03/14] Turn off CICD tests for troubleshooting --- tests/{test_calibration.py => xtest_calibration.py} | 0 tests/{test_classification.py => xtest_classification.py} | 0 tests/{test_gcms.py => xtest_gcms.py} | 0 tests/{test_input.py => xtest_input.py} | 0 tests/{test_mass_spectrum.py => xtest_mass_spectrum.py} | 0 ...trum_export_import.py => xtest_mass_spectrum_export_import.py} | 0 tests/{test_molecular_formula.py => xtest_molecular_formula.py} | 0 ...ormula_db_factory.py => xtest_molecular_formula_db_factory.py} | 0 tests/{test_mspeak.py => xtest_mspeak.py} | 0 tests/{test_output.py => xtest_output.py} | 0 tests/{test_search_mass_list.py => xtest_search_mass_list.py} | 0 tests/{test_setting_settings.py => xtest_setting_settings.py} | 0 tests/{test_wf_lipidomics.py => xtest_wf_lipidomics.py} | 0 13 files changed, 0 insertions(+), 0 deletions(-) rename tests/{test_calibration.py => xtest_calibration.py} (100%) rename tests/{test_classification.py => xtest_classification.py} (100%) rename tests/{test_gcms.py => xtest_gcms.py} (100%) rename tests/{test_input.py => xtest_input.py} (100%) rename tests/{test_mass_spectrum.py => xtest_mass_spectrum.py} (100%) rename tests/{test_mass_spectrum_export_import.py => xtest_mass_spectrum_export_import.py} (100%) rename tests/{test_molecular_formula.py => xtest_molecular_formula.py} (100%) rename tests/{test_molecular_formula_db_factory.py => xtest_molecular_formula_db_factory.py} (100%) rename tests/{test_mspeak.py => xtest_mspeak.py} (100%) rename tests/{test_output.py => xtest_output.py} (100%) rename tests/{test_search_mass_list.py => xtest_search_mass_list.py} (100%) rename tests/{test_setting_settings.py => xtest_setting_settings.py} (100%) rename tests/{test_wf_lipidomics.py => xtest_wf_lipidomics.py} (100%) diff --git a/tests/test_calibration.py b/tests/xtest_calibration.py similarity index 100% rename from tests/test_calibration.py rename to tests/xtest_calibration.py diff --git a/tests/test_classification.py b/tests/xtest_classification.py similarity index 100% rename from tests/test_classification.py rename to tests/xtest_classification.py diff --git a/tests/test_gcms.py b/tests/xtest_gcms.py similarity index 100% rename from tests/test_gcms.py rename to tests/xtest_gcms.py diff --git a/tests/test_input.py b/tests/xtest_input.py similarity index 100% rename from tests/test_input.py rename to tests/xtest_input.py diff --git a/tests/test_mass_spectrum.py b/tests/xtest_mass_spectrum.py similarity index 100% rename from tests/test_mass_spectrum.py rename to tests/xtest_mass_spectrum.py diff --git a/tests/test_mass_spectrum_export_import.py b/tests/xtest_mass_spectrum_export_import.py similarity index 100% rename from tests/test_mass_spectrum_export_import.py rename to tests/xtest_mass_spectrum_export_import.py diff --git a/tests/test_molecular_formula.py b/tests/xtest_molecular_formula.py similarity index 100% rename from tests/test_molecular_formula.py rename to tests/xtest_molecular_formula.py diff --git a/tests/test_molecular_formula_db_factory.py b/tests/xtest_molecular_formula_db_factory.py similarity index 100% rename from tests/test_molecular_formula_db_factory.py rename to tests/xtest_molecular_formula_db_factory.py diff --git a/tests/test_mspeak.py b/tests/xtest_mspeak.py similarity index 100% rename from tests/test_mspeak.py rename to tests/xtest_mspeak.py diff --git a/tests/test_output.py b/tests/xtest_output.py similarity index 100% rename from tests/test_output.py rename to tests/xtest_output.py diff --git a/tests/test_search_mass_list.py b/tests/xtest_search_mass_list.py similarity index 100% rename from tests/test_search_mass_list.py rename to tests/xtest_search_mass_list.py diff --git a/tests/test_setting_settings.py b/tests/xtest_setting_settings.py similarity index 100% rename from tests/test_setting_settings.py rename to tests/xtest_setting_settings.py diff --git a/tests/test_wf_lipidomics.py b/tests/xtest_wf_lipidomics.py similarity index 100% rename from tests/test_wf_lipidomics.py rename to tests/xtest_wf_lipidomics.py From 2a8056c731d5f9ecf44403ce995e8b03916c7497 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:05:17 -0800 Subject: [PATCH 04/14] Add wait step to CI for postgres --- .gitlab-ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index f50eb523..641561d5 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -3,7 +3,7 @@ include: services: - - postgres + - postgres:13 variables: POSTGRES_DB: molformula @@ -27,6 +27,9 @@ test-source: - k8s - rzr - codebuilds + before_script: + - apt-get update && apt-get install -y wait-for-it + - wait-for-it -t 20 postgres:5432 script: - python3 -V # Print out python version for debugging - python3 -m pip install --upgrade pip From 0e5fbd4e1b575c5606c6fbd604f15165a3eb4cd3 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:09:46 -0800 Subject: [PATCH 05/14] Add debug step for CICD --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 641561d5..a258fe15 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -38,6 +38,7 @@ test-source: - python3 -c "import pathlib; [p.unlink() for p in pathlib.Path('.').rglob('tests/win_only/__init__.py')]" - export PYTHONNET_RUNTIME=coreclr - pytest --cache-clear + - docker logs postgres # Debug step: Check logs of the PostgreSQL service to ensure it's running correctly artifacts: paths: From ffa986d48dbf671ef7c21e12661e966e73b640f8 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:12:13 -0800 Subject: [PATCH 06/14] Add manual installation of psycopg2 for cicd --- .gitlab-ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index a258fe15..3627e4e3 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -35,6 +35,7 @@ test-source: - python3 -m pip install --upgrade pip - python3 -m pip install -r requirements.txt - python3 -m pip install pytest pytest-cov + - python3 -m pip install psycopg2 - python3 -c "import pathlib; [p.unlink() for p in pathlib.Path('.').rglob('tests/win_only/__init__.py')]" - export PYTHONNET_RUNTIME=coreclr - pytest --cache-clear From 9b03ba580fcf36a76d4e38aa37f952d6edf38819 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:17:08 -0800 Subject: [PATCH 07/14] Expand all use of postgres db in CI CD --- tests/test_molecular_formula_search.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_molecular_formula_search.py b/tests/test_molecular_formula_search.py index 960b8a92..c06fbe23 100644 --- a/tests/test_molecular_formula_search.py +++ b/tests/test_molecular_formula_search.py @@ -60,7 +60,7 @@ def test_run_molecular_formula_search_adduct(): mass_spectrum_obj.settings.noise_threshold_absolute_abundance = 0 # Set the settings for the molecular search on the mass spectrum object - mass_spectrum_obj.molecular_search_settings.url_database = "" + mass_spectrum_obj.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" mass_spectrum_obj.molecular_search_settings.error_method = "None" mass_spectrum_obj.molecular_search_settings.min_ppm_error = -5 mass_spectrum_obj.molecular_search_settings.max_ppm_error = 5 @@ -91,7 +91,7 @@ def test_run_molecular_formula_search_adduct(): def test_mspeak_search(mass_spectrum_ftms): - mass_spectrum_ftms.molecular_search_settings.url_database = "" + mass_spectrum_ftms.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" mass_spectrum_ftms.molecular_search_settings.usedAtoms = { "C": (1, 100), "H": (4, 200), @@ -117,7 +117,7 @@ def test_mspeak_search(mass_spectrum_ftms): def test_molecular_formula_search_db(mass_spectrum_ftms): - mass_spectrum_ftms.molecular_search_settings.url_database = "" + mass_spectrum_ftms.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" mass_spectrum_ftms.molecular_search_settings.usedAtoms = { "C": (1, 100), "H": (4, 200), @@ -149,7 +149,7 @@ def test_molecular_formula_search_db(mass_spectrum_ftms): def test_priorityAssignment(mass_spectrum_ftms): - mass_spectrum_ftms.molecular_search_settings.url_database = "" + mass_spectrum_ftms.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" mass_spectrum_ftms.molecular_search_settings.error_method = "None" mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -3 mass_spectrum_ftms.molecular_search_settings.max_ppm_error = 5 From 9f820759c3ed7e3c7de51af5811d95dbfc4b7db2 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:18:49 -0800 Subject: [PATCH 08/14] Expand all use of postgres db in CI CD --- tests/{xtest_classification.py => test_classification.py} | 2 +- ...rum_export_import.py => test_mass_spectrum_export_import.py} | 2 +- tests/{xtest_search_mass_list.py => test_search_mass_list.py} | 2 +- tests/{xtest_wf_lipidomics.py => test_wf_lipidomics.py} | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename tests/{xtest_classification.py => test_classification.py} (96%) rename tests/{xtest_mass_spectrum_export_import.py => test_mass_spectrum_export_import.py} (98%) rename tests/{xtest_search_mass_list.py => test_search_mass_list.py} (93%) rename tests/{xtest_wf_lipidomics.py => test_wf_lipidomics.py} (99%) diff --git a/tests/xtest_classification.py b/tests/test_classification.py similarity index 96% rename from tests/xtest_classification.py rename to tests/test_classification.py index d9617877..ec8ef1bf 100644 --- a/tests/xtest_classification.py +++ b/tests/test_classification.py @@ -6,7 +6,7 @@ def test_heteroatoms_classification(mass_spectrum_ftms): - mass_spectrum_ftms.molecular_search_settings.url_database = '' + mass_spectrum_ftms.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" mass_spectrum_ftms.molecular_search_settings.error_method = 'None' mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -10 mass_spectrum_ftms.molecular_search_settings.max_ppm_error = 10 diff --git a/tests/xtest_mass_spectrum_export_import.py b/tests/test_mass_spectrum_export_import.py similarity index 98% rename from tests/xtest_mass_spectrum_export_import.py rename to tests/test_mass_spectrum_export_import.py index dc9e035e..7d15fe50 100644 --- a/tests/xtest_mass_spectrum_export_import.py +++ b/tests/test_mass_spectrum_export_import.py @@ -23,7 +23,7 @@ def mass_spectrum_silico(): mass_spectrum_obj.settings.noise_threshold_method = "relative_abundance" mass_spectrum_obj.settings.noise_threshold_absolute_abundance = 0 - mass_spectrum_obj.molecular_search_settings.url_database = "" + mass_spectrum_obj.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" mass_spectrum_obj.molecular_search_settings.error_method = "None" mass_spectrum_obj.molecular_search_settings.min_ppm_error = -5 mass_spectrum_obj.molecular_search_settings.max_ppm_error = 5 diff --git a/tests/xtest_search_mass_list.py b/tests/test_search_mass_list.py similarity index 93% rename from tests/xtest_search_mass_list.py rename to tests/test_search_mass_list.py index 9573f25f..7652858b 100644 --- a/tests/xtest_search_mass_list.py +++ b/tests/test_search_mass_list.py @@ -7,7 +7,7 @@ def test_search_imported_ref_files(mass_spectrum_ftms, ref_file_location): mass_spectrum_obj = mass_spectrum_ftms - mass_spectrum_obj.molecular_search_settings.url_database = "" + mass_spectrum_obj.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" mf_references_list = ImportMassListRef(ref_file_location).from_bruker_ref_file() assert len(mf_references_list) == 60 assert round(mf_references_list[0].mz_calc, 2) == 149.06 diff --git a/tests/xtest_wf_lipidomics.py b/tests/test_wf_lipidomics.py similarity index 99% rename from tests/xtest_wf_lipidomics.py rename to tests/test_wf_lipidomics.py index 75500037..1ab31e4d 100644 --- a/tests/xtest_wf_lipidomics.py +++ b/tests/test_wf_lipidomics.py @@ -83,7 +83,7 @@ def test_lipidomics_workflow(): ms1_params.mass_spectrum.noise_min_mz, ms1_params.mass_spectrum.min_picking_mz = 0, 0 ms1_params.mass_spectrum.noise_max_mz, ms1_params.mass_spectrum.max_picking_mz = np.inf, np.inf ms1_params.ms_peak.legacy_resolving_power = False - ms1_params.molecular_search.url_database = "" + ms1_params.molecular_search.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" ms1_params.molecular_search.usedAtoms = { 'C': (10, 30), 'H': (18, 200), From c2e6eb229e446543a02f4662a45f4a311df6eed6 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:20:54 -0800 Subject: [PATCH 09/14] Remove wait on ci cd --- .gitlab-ci.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 3627e4e3..ed856afc 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -27,9 +27,6 @@ test-source: - k8s - rzr - codebuilds - before_script: - - apt-get update && apt-get install -y wait-for-it - - wait-for-it -t 20 postgres:5432 script: - python3 -V # Print out python version for debugging - python3 -m pip install --upgrade pip From 13ee373c6b8f3e70c38e332de4c75f986f38c232 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:22:18 -0800 Subject: [PATCH 10/14] Remove debugging for postgres --- .gitlab-ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index ed856afc..a6481094 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -36,7 +36,6 @@ test-source: - python3 -c "import pathlib; [p.unlink() for p in pathlib.Path('.').rglob('tests/win_only/__init__.py')]" - export PYTHONNET_RUNTIME=coreclr - pytest --cache-clear - - docker logs postgres # Debug step: Check logs of the PostgreSQL service to ensure it's running correctly artifacts: paths: From 2bd137f3273b91ef6b8d5aed221277ab8bc370fe Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:29:26 -0800 Subject: [PATCH 11/14] Reinstate all tests --- support_code/nmdc/lipidomics/lipidomics_workflow.py | 1 - tests/{xtest_calibration.py => test_calibration.py} | 2 +- tests/{xtest_gcms.py => test_gcms.py} | 0 tests/{xtest_input.py => test_input.py} | 0 tests/{xtest_mass_spectrum.py => test_mass_spectrum.py} | 0 tests/{xtest_molecular_formula.py => test_molecular_formula.py} | 0 ...rmula_db_factory.py => test_molecular_formula_db_factory.py} | 0 tests/{xtest_mspeak.py => test_mspeak.py} | 0 tests/{xtest_output.py => test_output.py} | 0 tests/{xtest_setting_settings.py => test_setting_settings.py} | 0 10 files changed, 1 insertion(+), 2 deletions(-) rename tests/{xtest_calibration.py => test_calibration.py} (99%) rename tests/{xtest_gcms.py => test_gcms.py} (100%) rename tests/{xtest_input.py => test_input.py} (100%) rename tests/{xtest_mass_spectrum.py => test_mass_spectrum.py} (100%) rename tests/{xtest_molecular_formula.py => test_molecular_formula.py} (100%) rename tests/{xtest_molecular_formula_db_factory.py => test_molecular_formula_db_factory.py} (100%) rename tests/{xtest_mspeak.py => test_mspeak.py} (100%) rename tests/{xtest_output.py => test_output.py} (100%) rename tests/{xtest_setting_settings.py => test_setting_settings.py} (100%) diff --git a/support_code/nmdc/lipidomics/lipidomics_workflow.py b/support_code/nmdc/lipidomics/lipidomics_workflow.py index f73c07b0..77e67f35 100644 --- a/support_code/nmdc/lipidomics/lipidomics_workflow.py +++ b/support_code/nmdc/lipidomics/lipidomics_workflow.py @@ -7,7 +7,6 @@ import sys sys.path.append("./") -import cProfile from multiprocessing import Pool from pathlib import Path import datetime diff --git a/tests/xtest_calibration.py b/tests/test_calibration.py similarity index 99% rename from tests/xtest_calibration.py rename to tests/test_calibration.py index 4e9f6321..4208c086 100644 --- a/tests/xtest_calibration.py +++ b/tests/test_calibration.py @@ -85,7 +85,7 @@ def test_segmentedmzcalibration(mass_spectrum_ftms, ref_file_location): def test_old_calibration(mass_spectrum_ftms): usedatoms = {"C": (1, 100), "H": (4, 200), "O": (1, 10)} - mass_spectrum_ftms.molecular_search_settings.url_database = "" + mass_spectrum_ftms.molecular_search_settings.url_database = "postgresql://coremsdb:coremsmolform@postgres:5432/molformula" mass_spectrum_ftms.molecular_search_settings.error_method = "None" mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -5 mass_spectrum_ftms.molecular_search_settings.max_ppm_error = 5 diff --git a/tests/xtest_gcms.py b/tests/test_gcms.py similarity index 100% rename from tests/xtest_gcms.py rename to tests/test_gcms.py diff --git a/tests/xtest_input.py b/tests/test_input.py similarity index 100% rename from tests/xtest_input.py rename to tests/test_input.py diff --git a/tests/xtest_mass_spectrum.py b/tests/test_mass_spectrum.py similarity index 100% rename from tests/xtest_mass_spectrum.py rename to tests/test_mass_spectrum.py diff --git a/tests/xtest_molecular_formula.py b/tests/test_molecular_formula.py similarity index 100% rename from tests/xtest_molecular_formula.py rename to tests/test_molecular_formula.py diff --git a/tests/xtest_molecular_formula_db_factory.py b/tests/test_molecular_formula_db_factory.py similarity index 100% rename from tests/xtest_molecular_formula_db_factory.py rename to tests/test_molecular_formula_db_factory.py diff --git a/tests/xtest_mspeak.py b/tests/test_mspeak.py similarity index 100% rename from tests/xtest_mspeak.py rename to tests/test_mspeak.py diff --git a/tests/xtest_output.py b/tests/test_output.py similarity index 100% rename from tests/xtest_output.py rename to tests/test_output.py diff --git a/tests/xtest_setting_settings.py b/tests/test_setting_settings.py similarity index 100% rename from tests/xtest_setting_settings.py rename to tests/test_setting_settings.py From bd33d3308084ad38875ccb4104932fcdc030fb81 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:32:01 -0800 Subject: [PATCH 12/14] Add new zenodo DOI to readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 235c785e..e43b70a5 100644 --- a/README.md +++ b/README.md @@ -337,9 +337,9 @@ If you use CoreMS in your work, please use the following citation: Version [3.0.0 Release on GitHub](https://github.com/EMSL-Computing/CoreMS/releases/tag/v3.0.0), archived on Zenodo: -[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4641552.svg)](https://doi.org/10.5281/zenodo.4641552) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.14009575.svg)](https://doi.org/10.5281/zenodo.14009575) -Yuri E. Corilo, William R. Kew, Lee Ann McCue (2021, March 27). EMSL-Computing/CoreMS: CoreMS 2.0.1 (Version v2.0.1), as developed on Github. Zenodo. http://doi.org/10.5281/zenodo.4641552 +Yuri E. Corilo, William R. Kew, Lee Ann McCue, Katherine R . Heal, James C. Carr (2024, October 29). EMSL-Computing/CoreMS: CoreMS 3.0.0 (Version v3.0.0), as developed on Github. Zenodo. http://doi.org/10.5281/zenodo.14009575 ``` From c43b9ca359045a448360ddc3f8e611b7d846c772 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:32:36 -0800 Subject: [PATCH 13/14] Add new zenodo DOI to readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e43b70a5..5757a757 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@

-CoreMS DOI +DOI
From c62789ab28f133d9a269562c7f8b908a02c47706 Mon Sep 17 00:00:00 2001 From: Katherine Heal Date: Mon, 4 Nov 2024 16:33:28 -0800 Subject: [PATCH 14/14] Rerender documentation --- docs/corems.html | 147 +- .../chroma_peak/calc/ChromaPeakCalc.html | 508 +- .../factory/chroma_peak_classes.html | 3708 +++--- docs/corems/encapsulation/constant.html | 4561 +++---- .../encapsulation/factory/parameters.html | 1372 ++- .../factory/processingSetting.html | 3611 +++--- .../input/parameter_from_json.html | 1450 +-- .../output/parameter_to_dict.html | 402 +- .../output/parameter_to_json.html | 858 +- docs/corems/mass_spectra/calc/GC_Calc.html | 417 +- .../mass_spectra/calc/GC_Deconvolution.html | 2721 +++-- .../mass_spectra/calc/GC_RI_Calibration.html | 128 +- docs/corems/mass_spectra/calc/LC_Calc.html | 6261 +++++----- docs/corems/mass_spectra/calc/MZSearch.html | 1208 +- .../mass_spectra/calc/SignalProcessing.html | 2435 ++-- .../corems/mass_spectra/factory/GC_Class.html | 3230 ++--- .../corems/mass_spectra/factory/LC_Class.html | 6023 +++++----- .../mass_spectra/factory/chromat_data.html | 135 +- .../corems/mass_spectra/input/andiNetCDF.html | 745 +- .../mass_spectra/input/boosterHDF5.html | 6 +- .../mass_spectra/input/brukerSolarix.html | 659 +- .../mass_spectra/input/corems_hdf5.html | 2816 ++--- docs/corems/mass_spectra/input/massList.html | 557 +- docs/corems/mass_spectra/input/mzml.html | 2616 ++-- .../corems/mass_spectra/input/parserbase.html | 335 +- .../mass_spectra/input/rawFileReader.html | 4376 ++++--- docs/corems/mass_spectra/output/export.html | 10018 ++++++++-------- .../mass_spectrum/calc/AutoRecalibration.html | 1171 +- .../mass_spectrum/calc/Calibration.html | 2922 +++-- .../mass_spectrum/calc/CalibrationCalc.html | 1278 +- .../mass_spectrum/calc/KendrickGroup.html | 1237 +- .../calc/MassErrorPrediction.html | 2220 ++-- .../mass_spectrum/calc/MassSpectrumCalc.html | 990 +- .../calc/MeanResolvingPowerFilter.html | 982 +- docs/corems/mass_spectrum/calc/NoiseCalc.html | 1949 +-- .../mass_spectrum/calc/NoiseCalc_Bayes.html | 489 +- .../mass_spectrum/calc/PeakPicking.html | 5319 ++++---- .../factory/MassSpectrumClasses.html | 8246 +++++++------ docs/corems/mass_spectrum/input.html | 1 - .../corems/mass_spectrum/input/baseClass.html | 2647 ++-- .../mass_spectrum/input/boosterHDF5.html | 851 +- .../mass_spectrum/input/coremsHDF5.html | 2457 ++-- docs/corems/mass_spectrum/input/massList.html | 1916 +-- .../mass_spectrum/input/numpyArray.html | 442 +- docs/corems/mass_spectrum/output/export.html | 4202 ++++--- .../calc/MolecularFormulaCalc.html | 3416 +++--- .../factory/MolecularFormulaFactory.html | 3862 +++--- .../molecular_formula/input/masslist_ref.html | 1876 ++- docs/corems/molecular_id/calc.html | 1 - .../molecular_id/calc/ClusterFilter.html | 1329 +- .../molecular_id/calc/MolecularFilter.html | 679 +- .../molecular_id/calc/SpectralSimilarity.html | 2967 ++--- .../molecular_id/calc/math_distance.html | 5774 ++++----- docs/corems/molecular_id/factory/EI_SQL.html | 2868 ++--- .../factory/MolecularLookupTable.html | 4858 ++++---- .../molecular_id/factory/classification.html | 3898 +++--- .../factory/lipid_molecular_metadata.html | 10 +- .../molecular_id/factory/molecularSQL.html | 3857 +++--- docs/corems/molecular_id/input/nistMSI.html | 747 +- .../molecular_id/search/compoundSearch.html | 873 +- .../search/database_interfaces.html | 3279 ++--- .../molecular_id/search/findOxygenPeaks.html | 1693 +-- .../search/lcms_spectral_search.html | 672 +- .../search/molecularFormulaSearch.html | 4596 +++---- .../search/priorityAssignment.html | 3649 +++--- docs/corems/ms_peak/calc/MSPeakCalc.html | 5335 ++++---- .../corems/ms_peak/factory/MSPeakClasses.html | 862 +- docs/corems/transient/calc/TransientCalc.html | 1910 +-- .../transient/factory/TransientClasses.html | 2212 ++-- .../corems/transient/input/brukerSolarix.html | 2464 ++-- docs/corems/transient/input/midasDatFile.html | 818 +- docs/search.js | 2 +- 72 files changed, 85364 insertions(+), 79765 deletions(-) diff --git a/docs/corems.html b/docs/corems.html index e8af6857..b2f55165 100644 --- a/docs/corems.html +++ b/docs/corems.html @@ -95,7 +95,7 @@



-CoreMS DOI +DOI
@@ -154,7 +154,7 @@

CoreMS

Current Version

-

2.2.1

+

3.0.0


@@ -198,7 +198,7 @@

Data input formats

  • CoreMS exported processed mass list files(excel, .csv, .txt, pandas dataframe as .pkl)
  • CoreMS self-containing Hierarchical Data Format (.hdf5)
  • Pandas Dataframe
  • -
  • Support for cloud Storage using s3path.S3path(see examples of usage here: S3 Support)
  • +
  • Support for cloud Storage using s3path.S3path
  • Data output formats

    @@ -487,11 +487,11 @@

    Citing CoreMS

    If you use CoreMS in your work, please use the following citation:

    -

    Version 2.2.1 Release on GitHub, archived on Zenodo:

    +

    Version 3.0.0 Release on GitHub, archived on Zenodo:

    -

    DOI

    +

    DOI

    -

    Yuri E. Corilo, William R. Kew, Lee Ann McCue (2021, March 27). EMSL-Computing/CoreMS: CoreMS 2.0.1 (Version v2.0.1), as developed on Github. Zenodo. http://doi.org/10.5281/zenodo.4641552

    +

    Yuri E. Corilo, William R. Kew, Lee Ann McCue, Katherine R . Heal, James C. Carr (2024, October 29). EMSL-Computing/CoreMS: CoreMS 3.0.0 (Version v3.0.0), as developed on Github. Zenodo. http://doi.org/10.5281/zenodo.14009575

    ```

    @@ -526,56 +526,58 @@

    Citing CoreMS

    -
     1__author__ = 'Yuri E. Corilo'
    - 2__version__ = '2.2.1'
    +                        
     1__author__ = "Yuri E. Corilo"
    + 2__version__ = "3.0.0"
      3import time
      4import os
      5import sys
      6import hashlib
      7
      8# Get the path to the README file
    - 9readme_path = os.path.join(os.path.dirname(__file__), '..', 'README.md')
    + 9readme_path = os.path.join(os.path.dirname(__file__), "..", "README.md")
     10
     11# Read the contents of the README file
    -12with open(readme_path, 'r', encoding='utf-8') as readme_file:
    +12with open(readme_path, "r", encoding="utf-8") as readme_file:
     13    __doc__ = readme_file.read()
     14
    -15def timeit(method):
    -16    def timed(*args, **kw):
    -17        ts = time.time()
    -18        result = method(*args, **kw)
    -19        te = time.time()
    -20        if 'log_time' in kw:
    -21            name = kw.get('log_name', method.__name__.upper())
    -22            kw['log_time'][name] = int((te - ts) * 1000)
    -23        else:
    -24            print("%r  %2.2f ms" % (method.__name__, (te - ts) * 1000))
    -25        return result
    -26    return timed
    +15
    +16def timeit(method):
    +17    def timed(*args, **kw):
    +18        ts = time.time()
    +19        result = method(*args, **kw)
    +20        te = time.time()
    +21        if "log_time" in kw:
    +22            name = kw.get("log_name", method.__name__.upper())
    +23            kw["log_time"][name] = int((te - ts) * 1000)
    +24        else:
    +25            print("%r  %2.2f ms" % (method.__name__, (te - ts) * 1000))
    +26        return result
     27
    -28
    -29class SuppressPrints:
    +28    return timed
    +29
     30
    -31    def __enter__(self):
    -32        self._original_stdout = sys.stdout
    -33        sys.stdout = open(os.devnull, 'w')
    -34
    -35    def __exit__(self, exc_type, exc_val, exc_tb):
    -36        sys.stdout.close()
    -37        sys.stdout = self._original_stdout
    -38
    -39def chunks(lst, n):
    -40    """Yield successive n-sized chunks from lst."""
    -41    for i in range(0, len(lst), n):
    -42        yield lst[i:i + n]
    -43
    -44def corems_md5(fname):
    +31class SuppressPrints:
    +32    def __enter__(self):
    +33        self._original_stdout = sys.stdout
    +34        sys.stdout = open(os.devnull, "w")
    +35
    +36    def __exit__(self, exc_type, exc_val, exc_tb):
    +37        sys.stdout.close()
    +38        sys.stdout = self._original_stdout
    +39
    +40
    +41def chunks(lst, n):
    +42    """Yield successive n-sized chunks from lst."""
    +43    for i in range(0, len(lst), n):
    +44        yield lst[i : i + n]
     45
    -46    bytes_io = fname.open('rb').read()
    -47
    -48    md5_returned = hashlib.sha256(bytes_io).hexdigest()
    +46
    +47def corems_md5(fname):
    +48    bytes_io = fname.open("rb").read()
     49
    -50    return "{}:{}".format("sha256", md5_returned)
    +50    md5_returned = hashlib.sha256(bytes_io).hexdigest()
    +51
    +52    return "{}:{}".format("sha256", md5_returned)
     
    @@ -603,18 +605,19 @@

    Citing CoreMS

    -
    16def timeit(method):
    -17    def timed(*args, **kw):
    -18        ts = time.time()
    -19        result = method(*args, **kw)
    -20        te = time.time()
    -21        if 'log_time' in kw:
    -22            name = kw.get('log_name', method.__name__.upper())
    -23            kw['log_time'][name] = int((te - ts) * 1000)
    -24        else:
    -25            print("%r  %2.2f ms" % (method.__name__, (te - ts) * 1000))
    -26        return result
    -27    return timed
    +            
    17def timeit(method):
    +18    def timed(*args, **kw):
    +19        ts = time.time()
    +20        result = method(*args, **kw)
    +21        te = time.time()
    +22        if "log_time" in kw:
    +23            name = kw.get("log_name", method.__name__.upper())
    +24            kw["log_time"][name] = int((te - ts) * 1000)
    +25        else:
    +26            print("%r  %2.2f ms" % (method.__name__, (te - ts) * 1000))
    +27        return result
    +28
    +29    return timed
     
    @@ -632,15 +635,14 @@

    Citing CoreMS

    -
    30class SuppressPrints:
    -31
    -32    def __enter__(self):
    -33        self._original_stdout = sys.stdout
    -34        sys.stdout = open(os.devnull, 'w')
    -35
    -36    def __exit__(self, exc_type, exc_val, exc_tb):
    -37        sys.stdout.close()
    -38        sys.stdout = self._original_stdout
    +            
    32class SuppressPrints:
    +33    def __enter__(self):
    +34        self._original_stdout = sys.stdout
    +35        sys.stdout = open(os.devnull, "w")
    +36
    +37    def __exit__(self, exc_type, exc_val, exc_tb):
    +38        sys.stdout.close()
    +39        sys.stdout = self._original_stdout
     
    @@ -658,10 +660,10 @@

    Citing CoreMS

    -
    40def chunks(lst, n):
    -41    """Yield successive n-sized chunks from lst."""
    -42    for i in range(0, len(lst), n):
    -43        yield lst[i:i + n]
    +            
    42def chunks(lst, n):
    +43    """Yield successive n-sized chunks from lst."""
    +44    for i in range(0, len(lst), n):
    +45        yield lst[i : i + n]
     
    @@ -681,13 +683,12 @@

    Citing CoreMS

    -
    45def corems_md5(fname):
    -46
    -47    bytes_io = fname.open('rb').read()
    -48
    -49    md5_returned = hashlib.sha256(bytes_io).hexdigest()
    +            
    48def corems_md5(fname):
    +49    bytes_io = fname.open("rb").read()
     50
    -51    return "{}:{}".format("sha256", md5_returned)
    +51    md5_returned = hashlib.sha256(bytes_io).hexdigest()
    +52
    +53    return "{}:{}".format("sha256", md5_returned)
     
    diff --git a/docs/corems/chroma_peak/calc/ChromaPeakCalc.html b/docs/corems/chroma_peak/calc/ChromaPeakCalc.html index 83307393..894642e6 100644 --- a/docs/corems/chroma_peak/calc/ChromaPeakCalc.html +++ b/docs/corems/chroma_peak/calc/ChromaPeakCalc.html @@ -257,91 +257,93 @@

    171 right_index = max_index 172 while eic[left_index] > eic[max_index] * fraction and left_index > 0: 173 left_index -= 1 -174 while eic[right_index] > eic[max_index] * fraction and right_index < len(eic) - 1: -175 right_index += 1 -176 -177 # Get the retention times of the indexes just below the half height -178 left_rt = self._eic_data.time[left_index] -179 right_rt = self._eic_data.time[right_index] -180 -181 # If left_rt and right_rt are outside the bounds of the integration, set them to the bounds and set estimated to True -182 estimated = False -183 if left_rt < self.eic_rt_list[0]: -184 left_rt = self.eic_rt_list[0] -185 left_index = np.where(self._eic_data.scans == self._eic_data.apexes[0][0])[ -186 0 -187 ][0] -188 estimated = True -189 if right_rt > self.eic_rt_list[-1]: -190 right_rt = self.eic_rt_list[-1] -191 right_index = np.where( -192 self._eic_data.scans == self._eic_data.apexes[0][-1] -193 )[0][0] -194 estimated = True -195 half_height_width_max = right_rt - left_rt -196 -197 # Get the retention times of the indexes just above the half height -198 left_rt = self._eic_data.time[left_index + 1] -199 right_rt = self._eic_data.time[right_index - 1] -200 half_height_width_min = right_rt - left_rt -201 -202 return half_height_width_min, half_height_width_max, estimated +174 while ( +175 eic[right_index] > eic[max_index] * fraction and right_index < len(eic) - 1 +176 ): +177 right_index += 1 +178 +179 # Get the retention times of the indexes just below the half height +180 left_rt = self._eic_data.time[left_index] +181 right_rt = self._eic_data.time[right_index] +182 +183 # If left_rt and right_rt are outside the bounds of the integration, set them to the bounds and set estimated to True +184 estimated = False +185 if left_rt < self.eic_rt_list[0]: +186 left_rt = self.eic_rt_list[0] +187 left_index = np.where(self._eic_data.scans == self._eic_data.apexes[0][0])[ +188 0 +189 ][0] +190 estimated = True +191 if right_rt > self.eic_rt_list[-1]: +192 right_rt = self.eic_rt_list[-1] +193 right_index = np.where( +194 self._eic_data.scans == self._eic_data.apexes[0][-1] +195 )[0][0] +196 estimated = True +197 half_height_width_max = right_rt - left_rt +198 +199 # Get the retention times of the indexes just above the half height +200 left_rt = self._eic_data.time[left_index + 1] +201 right_rt = self._eic_data.time[right_index - 1] +202 half_height_width_min = right_rt - left_rt 203 -204 def calc_half_height_width(self, accept_estimated: bool = False): -205 """ -206 Calculate the half-height width of the mass feature. -207 -208 This function calculates the half-height width of the mass feature and -209 stores the result in the `_half_height_width` attribute -210 -211 Returns -212 ------- -213 None, stores the result in the `_half_height_width` attribute of the class. -214 """ -215 min_, max_, estimated = self.calc_fraction_height_width(0.5) -216 if not estimated or accept_estimated: -217 self._half_height_width = np.array([min_, max_]) -218 -219 def calc_tailing_factor(self, accept_estimated: bool = False): -220 """ -221 Calculate the peak asymmetry of the mass feature. -222 -223 This function calculates the peak asymmetry of the mass feature and -224 stores the result in the `_tailing_factor` attribute. -225 Calculations completed at 5% of the peak height in accordance with the USP tailing factor calculation. -226 -227 Returns -228 ------- -229 None, stores the result in the `_tailing_factor` attribute of the class. -230 -231 References -232 ---------- -233 1) JIS K0124:2011 General rules for high performance liquid chromatography -234 2) JIS K0214:2013 Technical terms for analytical chemistry -235 """ -236 # First calculate the width of the peak at 5% of the peak height -237 width_min, width_max, estimated = self.calc_fraction_height_width(0.05) -238 -239 if not estimated or accept_estimated: -240 # Next calculate the width of the peak at 95% of the peak height -241 eic = self._eic_data.eic_smoothed -242 max_index = np.where(self._eic_data.scans == self.apex_scan)[0][0] -243 left_index = max_index -244 while eic[left_index] > eic[max_index] * 0.05 and left_index > 0: -245 left_index -= 1 -246 -247 left_half_time_min = ( -248 self._eic_data.time[max_index] - self._eic_data.time[left_index] -249 ) -250 left_half_time_max = ( -251 self._eic_data.time[max_index] - self._eic_data.time[left_index + 1] -252 ) -253 -254 tailing_factor = np.mean([width_min, width_max]) / ( -255 2 * np.mean([left_half_time_min, left_half_time_max]) -256 ) -257 -258 self._tailing_factor = tailing_factor +204 return half_height_width_min, half_height_width_max, estimated +205 +206 def calc_half_height_width(self, accept_estimated: bool = False): +207 """ +208 Calculate the half-height width of the mass feature. +209 +210 This function calculates the half-height width of the mass feature and +211 stores the result in the `_half_height_width` attribute +212 +213 Returns +214 ------- +215 None, stores the result in the `_half_height_width` attribute of the class. +216 """ +217 min_, max_, estimated = self.calc_fraction_height_width(0.5) +218 if not estimated or accept_estimated: +219 self._half_height_width = np.array([min_, max_]) +220 +221 def calc_tailing_factor(self, accept_estimated: bool = False): +222 """ +223 Calculate the peak asymmetry of the mass feature. +224 +225 This function calculates the peak asymmetry of the mass feature and +226 stores the result in the `_tailing_factor` attribute. +227 Calculations completed at 5% of the peak height in accordance with the USP tailing factor calculation. +228 +229 Returns +230 ------- +231 None, stores the result in the `_tailing_factor` attribute of the class. +232 +233 References +234 ---------- +235 1) JIS K0124:2011 General rules for high performance liquid chromatography +236 2) JIS K0214:2013 Technical terms for analytical chemistry +237 """ +238 # First calculate the width of the peak at 5% of the peak height +239 width_min, width_max, estimated = self.calc_fraction_height_width(0.05) +240 +241 if not estimated or accept_estimated: +242 # Next calculate the width of the peak at 95% of the peak height +243 eic = self._eic_data.eic_smoothed +244 max_index = np.where(self._eic_data.scans == self.apex_scan)[0][0] +245 left_index = max_index +246 while eic[left_index] > eic[max_index] * 0.05 and left_index > 0: +247 left_index -= 1 +248 +249 left_half_time_min = ( +250 self._eic_data.time[max_index] - self._eic_data.time[left_index] +251 ) +252 left_half_time_max = ( +253 self._eic_data.time[max_index] - self._eic_data.time[left_index + 1] +254 ) +255 +256 tailing_factor = np.mean([width_min, width_max]) / ( +257 2 * np.mean([left_half_time_min, left_half_time_max]) +258 ) +259 +260 self._tailing_factor = tailing_factor

    @@ -714,91 +716,93 @@
    Parameters
    172 right_index = max_index 173 while eic[left_index] > eic[max_index] * fraction and left_index > 0: 174 left_index -= 1 -175 while eic[right_index] > eic[max_index] * fraction and right_index < len(eic) - 1: -176 right_index += 1 -177 -178 # Get the retention times of the indexes just below the half height -179 left_rt = self._eic_data.time[left_index] -180 right_rt = self._eic_data.time[right_index] -181 -182 # If left_rt and right_rt are outside the bounds of the integration, set them to the bounds and set estimated to True -183 estimated = False -184 if left_rt < self.eic_rt_list[0]: -185 left_rt = self.eic_rt_list[0] -186 left_index = np.where(self._eic_data.scans == self._eic_data.apexes[0][0])[ -187 0 -188 ][0] -189 estimated = True -190 if right_rt > self.eic_rt_list[-1]: -191 right_rt = self.eic_rt_list[-1] -192 right_index = np.where( -193 self._eic_data.scans == self._eic_data.apexes[0][-1] -194 )[0][0] -195 estimated = True -196 half_height_width_max = right_rt - left_rt -197 -198 # Get the retention times of the indexes just above the half height -199 left_rt = self._eic_data.time[left_index + 1] -200 right_rt = self._eic_data.time[right_index - 1] -201 half_height_width_min = right_rt - left_rt -202 -203 return half_height_width_min, half_height_width_max, estimated +175 while ( +176 eic[right_index] > eic[max_index] * fraction and right_index < len(eic) - 1 +177 ): +178 right_index += 1 +179 +180 # Get the retention times of the indexes just below the half height +181 left_rt = self._eic_data.time[left_index] +182 right_rt = self._eic_data.time[right_index] +183 +184 # If left_rt and right_rt are outside the bounds of the integration, set them to the bounds and set estimated to True +185 estimated = False +186 if left_rt < self.eic_rt_list[0]: +187 left_rt = self.eic_rt_list[0] +188 left_index = np.where(self._eic_data.scans == self._eic_data.apexes[0][0])[ +189 0 +190 ][0] +191 estimated = True +192 if right_rt > self.eic_rt_list[-1]: +193 right_rt = self.eic_rt_list[-1] +194 right_index = np.where( +195 self._eic_data.scans == self._eic_data.apexes[0][-1] +196 )[0][0] +197 estimated = True +198 half_height_width_max = right_rt - left_rt +199 +200 # Get the retention times of the indexes just above the half height +201 left_rt = self._eic_data.time[left_index + 1] +202 right_rt = self._eic_data.time[right_index - 1] +203 half_height_width_min = right_rt - left_rt 204 -205 def calc_half_height_width(self, accept_estimated: bool = False): -206 """ -207 Calculate the half-height width of the mass feature. -208 -209 This function calculates the half-height width of the mass feature and -210 stores the result in the `_half_height_width` attribute -211 -212 Returns -213 ------- -214 None, stores the result in the `_half_height_width` attribute of the class. -215 """ -216 min_, max_, estimated = self.calc_fraction_height_width(0.5) -217 if not estimated or accept_estimated: -218 self._half_height_width = np.array([min_, max_]) -219 -220 def calc_tailing_factor(self, accept_estimated: bool = False): -221 """ -222 Calculate the peak asymmetry of the mass feature. -223 -224 This function calculates the peak asymmetry of the mass feature and -225 stores the result in the `_tailing_factor` attribute. -226 Calculations completed at 5% of the peak height in accordance with the USP tailing factor calculation. -227 -228 Returns -229 ------- -230 None, stores the result in the `_tailing_factor` attribute of the class. -231 -232 References -233 ---------- -234 1) JIS K0124:2011 General rules for high performance liquid chromatography -235 2) JIS K0214:2013 Technical terms for analytical chemistry -236 """ -237 # First calculate the width of the peak at 5% of the peak height -238 width_min, width_max, estimated = self.calc_fraction_height_width(0.05) -239 -240 if not estimated or accept_estimated: -241 # Next calculate the width of the peak at 95% of the peak height -242 eic = self._eic_data.eic_smoothed -243 max_index = np.where(self._eic_data.scans == self.apex_scan)[0][0] -244 left_index = max_index -245 while eic[left_index] > eic[max_index] * 0.05 and left_index > 0: -246 left_index -= 1 -247 -248 left_half_time_min = ( -249 self._eic_data.time[max_index] - self._eic_data.time[left_index] -250 ) -251 left_half_time_max = ( -252 self._eic_data.time[max_index] - self._eic_data.time[left_index + 1] -253 ) -254 -255 tailing_factor = np.mean([width_min, width_max]) / ( -256 2 * np.mean([left_half_time_min, left_half_time_max]) -257 ) -258 -259 self._tailing_factor = tailing_factor +205 return half_height_width_min, half_height_width_max, estimated +206 +207 def calc_half_height_width(self, accept_estimated: bool = False): +208 """ +209 Calculate the half-height width of the mass feature. +210 +211 This function calculates the half-height width of the mass feature and +212 stores the result in the `_half_height_width` attribute +213 +214 Returns +215 ------- +216 None, stores the result in the `_half_height_width` attribute of the class. +217 """ +218 min_, max_, estimated = self.calc_fraction_height_width(0.5) +219 if not estimated or accept_estimated: +220 self._half_height_width = np.array([min_, max_]) +221 +222 def calc_tailing_factor(self, accept_estimated: bool = False): +223 """ +224 Calculate the peak asymmetry of the mass feature. +225 +226 This function calculates the peak asymmetry of the mass feature and +227 stores the result in the `_tailing_factor` attribute. +228 Calculations completed at 5% of the peak height in accordance with the USP tailing factor calculation. +229 +230 Returns +231 ------- +232 None, stores the result in the `_tailing_factor` attribute of the class. +233 +234 References +235 ---------- +236 1) JIS K0124:2011 General rules for high performance liquid chromatography +237 2) JIS K0214:2013 Technical terms for analytical chemistry +238 """ +239 # First calculate the width of the peak at 5% of the peak height +240 width_min, width_max, estimated = self.calc_fraction_height_width(0.05) +241 +242 if not estimated or accept_estimated: +243 # Next calculate the width of the peak at 95% of the peak height +244 eic = self._eic_data.eic_smoothed +245 max_index = np.where(self._eic_data.scans == self.apex_scan)[0][0] +246 left_index = max_index +247 while eic[left_index] > eic[max_index] * 0.05 and left_index > 0: +248 left_index -= 1 +249 +250 left_half_time_min = ( +251 self._eic_data.time[max_index] - self._eic_data.time[left_index] +252 ) +253 left_half_time_max = ( +254 self._eic_data.time[max_index] - self._eic_data.time[left_index + 1] +255 ) +256 +257 tailing_factor = np.mean([width_min, width_max]) / ( +258 2 * np.mean([left_half_time_min, left_half_time_max]) +259 ) +260 +261 self._tailing_factor = tailing_factor @@ -931,35 +935,37 @@
    References
    172 right_index = max_index 173 while eic[left_index] > eic[max_index] * fraction and left_index > 0: 174 left_index -= 1 -175 while eic[right_index] > eic[max_index] * fraction and right_index < len(eic) - 1: -176 right_index += 1 -177 -178 # Get the retention times of the indexes just below the half height -179 left_rt = self._eic_data.time[left_index] -180 right_rt = self._eic_data.time[right_index] -181 -182 # If left_rt and right_rt are outside the bounds of the integration, set them to the bounds and set estimated to True -183 estimated = False -184 if left_rt < self.eic_rt_list[0]: -185 left_rt = self.eic_rt_list[0] -186 left_index = np.where(self._eic_data.scans == self._eic_data.apexes[0][0])[ -187 0 -188 ][0] -189 estimated = True -190 if right_rt > self.eic_rt_list[-1]: -191 right_rt = self.eic_rt_list[-1] -192 right_index = np.where( -193 self._eic_data.scans == self._eic_data.apexes[0][-1] -194 )[0][0] -195 estimated = True -196 half_height_width_max = right_rt - left_rt -197 -198 # Get the retention times of the indexes just above the half height -199 left_rt = self._eic_data.time[left_index + 1] -200 right_rt = self._eic_data.time[right_index - 1] -201 half_height_width_min = right_rt - left_rt -202 -203 return half_height_width_min, half_height_width_max, estimated +175 while ( +176 eic[right_index] > eic[max_index] * fraction and right_index < len(eic) - 1 +177 ): +178 right_index += 1 +179 +180 # Get the retention times of the indexes just below the half height +181 left_rt = self._eic_data.time[left_index] +182 right_rt = self._eic_data.time[right_index] +183 +184 # If left_rt and right_rt are outside the bounds of the integration, set them to the bounds and set estimated to True +185 estimated = False +186 if left_rt < self.eic_rt_list[0]: +187 left_rt = self.eic_rt_list[0] +188 left_index = np.where(self._eic_data.scans == self._eic_data.apexes[0][0])[ +189 0 +190 ][0] +191 estimated = True +192 if right_rt > self.eic_rt_list[-1]: +193 right_rt = self.eic_rt_list[-1] +194 right_index = np.where( +195 self._eic_data.scans == self._eic_data.apexes[0][-1] +196 )[0][0] +197 estimated = True +198 half_height_width_max = right_rt - left_rt +199 +200 # Get the retention times of the indexes just above the half height +201 left_rt = self._eic_data.time[left_index + 1] +202 right_rt = self._eic_data.time[right_index - 1] +203 half_height_width_min = right_rt - left_rt +204 +205 return half_height_width_min, half_height_width_max, estimated @@ -995,20 +1001,20 @@
    Returns
    -
    205    def calc_half_height_width(self, accept_estimated: bool = False):
    -206        """
    -207        Calculate the half-height width of the mass feature.
    -208
    -209        This function calculates the half-height width of the mass feature and
    -210        stores the result in the `_half_height_width` attribute
    -211
    -212        Returns
    -213        -------
    -214        None, stores the result in the `_half_height_width` attribute of the class.
    -215        """
    -216        min_, max_, estimated = self.calc_fraction_height_width(0.5)
    -217        if not estimated or accept_estimated:
    -218            self._half_height_width = np.array([min_, max_])
    +            
    207    def calc_half_height_width(self, accept_estimated: bool = False):
    +208        """
    +209        Calculate the half-height width of the mass feature.
    +210
    +211        This function calculates the half-height width of the mass feature and
    +212        stores the result in the `_half_height_width` attribute
    +213
    +214        Returns
    +215        -------
    +216        None, stores the result in the `_half_height_width` attribute of the class.
    +217        """
    +218        min_, max_, estimated = self.calc_fraction_height_width(0.5)
    +219        if not estimated or accept_estimated:
    +220            self._half_height_width = np.array([min_, max_])
     
    @@ -1037,46 +1043,46 @@
    Returns
    -
    220    def calc_tailing_factor(self, accept_estimated: bool = False):
    -221        """
    -222        Calculate the peak asymmetry of the mass feature.
    -223
    -224        This function calculates the peak asymmetry of the mass feature and
    -225        stores the result in the `_tailing_factor` attribute.
    -226        Calculations completed at 5% of the peak height in accordance with the USP tailing factor calculation.
    -227
    -228        Returns
    -229        -------
    -230        None, stores the result in the `_tailing_factor` attribute of the class.
    -231
    -232        References
    -233        ----------
    -234        1) JIS K0124:2011 General rules for high performance liquid chromatography
    -235        2) JIS K0214:2013 Technical terms for analytical chemistry
    -236        """
    -237        # First calculate the width of the peak at 5% of the peak height
    -238        width_min, width_max, estimated = self.calc_fraction_height_width(0.05)
    -239
    -240        if not estimated or accept_estimated:
    -241            # Next calculate the width of the peak at 95% of the peak height
    -242            eic = self._eic_data.eic_smoothed
    -243            max_index = np.where(self._eic_data.scans == self.apex_scan)[0][0]
    -244            left_index = max_index
    -245            while eic[left_index] > eic[max_index] * 0.05 and left_index > 0:
    -246                left_index -= 1
    -247
    -248            left_half_time_min = (
    -249                self._eic_data.time[max_index] - self._eic_data.time[left_index]
    -250            )
    -251            left_half_time_max = (
    -252                self._eic_data.time[max_index] - self._eic_data.time[left_index + 1]
    -253            )
    -254
    -255            tailing_factor = np.mean([width_min, width_max]) / (
    -256                2 * np.mean([left_half_time_min, left_half_time_max])
    -257            )
    -258
    -259            self._tailing_factor = tailing_factor
    +            
    222    def calc_tailing_factor(self, accept_estimated: bool = False):
    +223        """
    +224        Calculate the peak asymmetry of the mass feature.
    +225
    +226        This function calculates the peak asymmetry of the mass feature and
    +227        stores the result in the `_tailing_factor` attribute.
    +228        Calculations completed at 5% of the peak height in accordance with the USP tailing factor calculation.
    +229
    +230        Returns
    +231        -------
    +232        None, stores the result in the `_tailing_factor` attribute of the class.
    +233
    +234        References
    +235        ----------
    +236        1) JIS K0124:2011 General rules for high performance liquid chromatography
    +237        2) JIS K0214:2013 Technical terms for analytical chemistry
    +238        """
    +239        # First calculate the width of the peak at 5% of the peak height
    +240        width_min, width_max, estimated = self.calc_fraction_height_width(0.05)
    +241
    +242        if not estimated or accept_estimated:
    +243            # Next calculate the width of the peak at 95% of the peak height
    +244            eic = self._eic_data.eic_smoothed
    +245            max_index = np.where(self._eic_data.scans == self.apex_scan)[0][0]
    +246            left_index = max_index
    +247            while eic[left_index] > eic[max_index] * 0.05 and left_index > 0:
    +248                left_index -= 1
    +249
    +250            left_half_time_min = (
    +251                self._eic_data.time[max_index] - self._eic_data.time[left_index]
    +252            )
    +253            left_half_time_max = (
    +254                self._eic_data.time[max_index] - self._eic_data.time[left_index + 1]
    +255            )
    +256
    +257            tailing_factor = np.mean([width_min, width_max]) / (
    +258                2 * np.mean([left_half_time_min, left_half_time_max])
    +259            )
    +260
    +261            self._tailing_factor = tailing_factor
     
    diff --git a/docs/corems/chroma_peak/factory/chroma_peak_classes.html b/docs/corems/chroma_peak/factory/chroma_peak_classes.html index 56ae12e2..095f425a 100644 --- a/docs/corems/chroma_peak/factory/chroma_peak_classes.html +++ b/docs/corems/chroma_peak/factory/chroma_peak_classes.html @@ -221,796 +221,824 @@

    6import pandas as pd 7import copy 8 - 9from corems.chroma_peak.calc.ChromaPeakCalc import GCPeakCalculation, LCMSMassFeatureCalculation - 10from corems.mass_spectra.factory.chromat_data import EIC_Data - 11from corems.molecular_id.factory.EI_SQL import LowResCompoundRef - 12 - 13 - 14class ChromaPeakBase: - 15 """Base class for chromatographic peak (ChromaPeak) objects. + 9from corems.chroma_peak.calc.ChromaPeakCalc import ( + 10 GCPeakCalculation, + 11 LCMSMassFeatureCalculation, + 12) + 13from corems.mass_spectra.factory.chromat_data import EIC_Data + 14from corems.molecular_id.factory.EI_SQL import LowResCompoundRef + 15 16 - 17 Parameters - 18 ------- - 19 chromatogram_parent : Chromatogram - 20 The parent chromatogram object. - 21 mass_spectrum_obj : MassSpectrum - 22 The mass spectrum object. - 23 start_index : int - 24 The start index of the peak. - 25 index : int - 26 The index of the peak. - 27 final_index : int - 28 The final index of the peak. - 29 - 30 Attributes - 31 -------- - 32 start_scan : int - 33 The start scan of the peak. - 34 final_scan : int - 35 The final scan of the peak. - 36 apex_scan : int - 37 The apex scan of the peak. - 38 chromatogram_parent : Chromatogram - 39 The parent chromatogram object. - 40 mass_spectrum : MassSpectrum - 41 The mass spectrum object. - 42 _area : float - 43 The area of the peak. - 44 - 45 Properties - 46 -------- - 47 * retention_time : float. - 48 The retention time of the peak. - 49 * tic : float. - 50 The total ion current of the peak. - 51 * area : float. - 52 The area of the peak. - 53 * rt_list : list. - 54 The list of retention times within the peak. - 55 * tic_list : list. - 56 The list of total ion currents within the peak. - 57 - 58 Methods - 59 -------- - 60 * None - 61 """ - 62 - 63 def __init__( - 64 self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index - 65 ): - 66 self.start_scan = start_index - 67 self.final_scan = final_index - 68 self.apex_scan = int(index) - 69 self.chromatogram_parent = chromatogram_parent - 70 self.mass_spectrum = mass_spectrum_obj - 71 self._area = None - 72 - 73 @property - 74 def retention_time(self): - 75 """Retention Time""" - 76 return self.mass_spectrum.retention_time - 77 - 78 @property - 79 def tic(self): - 80 """Total Ion Current""" - 81 return self.mass_spectrum.tic - 82 - 83 @property - 84 def area(self): - 85 """Peak Area""" - 86 return self._area - 87 - 88 @property - 89 def rt_list(self): - 90 """Retention Time List""" - 91 return [ - 92 self.chromatogram_parent.retention_time[i] - 93 for i in range(self.start_scan, self.final_scan + 1) - 94 ] - 95 - 96 @property - 97 def tic_list(self): - 98 """Total Ion Current List""" - 99 return [ -100 self.chromatogram_parent.tic[i] -101 for i in range(self.start_scan, self.final_scan + 1) -102 ] -103 -104 -105class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation): -106 """Class representing a mass feature in a liquid chromatography (LC) chromatogram. + 17class ChromaPeakBase: + 18 """Base class for chromatographic peak (ChromaPeak) objects. + 19 + 20 Parameters + 21 ------- + 22 chromatogram_parent : Chromatogram + 23 The parent chromatogram object. + 24 mass_spectrum_obj : MassSpectrum + 25 The mass spectrum object. + 26 start_index : int + 27 The start index of the peak. + 28 index : int + 29 The index of the peak. + 30 final_index : int + 31 The final index of the peak. + 32 + 33 Attributes + 34 -------- + 35 start_scan : int + 36 The start scan of the peak. + 37 final_scan : int + 38 The final scan of the peak. + 39 apex_scan : int + 40 The apex scan of the peak. + 41 chromatogram_parent : Chromatogram + 42 The parent chromatogram object. + 43 mass_spectrum : MassSpectrum + 44 The mass spectrum object. + 45 _area : float + 46 The area of the peak. + 47 + 48 Properties + 49 -------- + 50 * retention_time : float. + 51 The retention time of the peak. + 52 * tic : float. + 53 The total ion current of the peak. + 54 * area : float. + 55 The area of the peak. + 56 * rt_list : list. + 57 The list of retention times within the peak. + 58 * tic_list : list. + 59 The list of total ion currents within the peak. + 60 + 61 Methods + 62 -------- + 63 * None + 64 """ + 65 + 66 def __init__( + 67 self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index + 68 ): + 69 self.start_scan = start_index + 70 self.final_scan = final_index + 71 self.apex_scan = int(index) + 72 self.chromatogram_parent = chromatogram_parent + 73 self.mass_spectrum = mass_spectrum_obj + 74 self._area = None + 75 + 76 @property + 77 def retention_time(self): + 78 """Retention Time""" + 79 return self.mass_spectrum.retention_time + 80 + 81 @property + 82 def tic(self): + 83 """Total Ion Current""" + 84 return self.mass_spectrum.tic + 85 + 86 @property + 87 def area(self): + 88 """Peak Area""" + 89 return self._area + 90 + 91 @property + 92 def rt_list(self): + 93 """Retention Time List""" + 94 return [ + 95 self.chromatogram_parent.retention_time[i] + 96 for i in range(self.start_scan, self.final_scan + 1) + 97 ] + 98 + 99 @property +100 def tic_list(self): +101 """Total Ion Current List""" +102 return [ +103 self.chromatogram_parent.tic[i] +104 for i in range(self.start_scan, self.final_scan + 1) +105 ] +106 107 -108 Parameters -109 ------- -110 lcms_parent : LCMS -111 The parent LCMSBase object. -112 mz : float -113 The observed mass to charge ratio of the feature. -114 retention_time : float -115 The retention time of the feature (in minutes), at the apex. -116 intensity : float -117 The intensity of the feature. -118 apex_scan : int -119 The scan number of the apex of the feature. -120 persistence : float, optional -121 The persistence of the feature. Default is None. -122 -123 Attributes -124 -------- -125 _mz_exp : float -126 The observed mass to charge ratio of the feature. -127 _mz_cal : float -128 The calibrated mass to charge ratio of the feature. -129 _retention_time : float -130 The retention time of the feature (in minutes), at the apex. -131 _apex_scan : int -132 The scan number of the apex of the feature. -133 _intensity : float -134 The intensity of the feature. -135 _persistence : float -136 The persistence of the feature. -137 _eic_data : EIC_Data -138 The EIC data object associated with the feature. -139 _dispersity_index : float -140 The dispersity index of the feature. -141 _half_height_width : numpy.ndarray -142 The half height width of the feature (in minutes, as an array of min and max values). -143 _tailing_factor : float -144 The tailing factor of the feature. -145 > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak. -146 _ms_deconvoluted_idx : [int] -147 The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum. -148 is_calibrated : bool -149 If True, the feature has been calibrated. Default is False. -150 monoisotopic_mf_id : int -151 Mass feature id that is the monoisotopic version of self. -152 If self.id, then self is the monoisotopic feature). Default is None. -153 isotopologue_type : str -154 The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. -155 Default is None. -156 ms2_scan_numbers : list -157 List of scan numbers of the MS2 spectra associated with the feature. -158 Default is an empty list. -159 ms2_mass_spectra : dict -160 Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). -161 Default is an empty dictionary. -162 ms2_similarity_results : list -163 List of MS2 similarity results associated with the mass feature. -164 Default is an empty list. -165 id : int -166 The ID of the feature, also the key in the parent LCMS object's -167 `mass_features` dictionary. -168 mass_spectrum_deconvoluted_parent : bool -169 If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None. -170 associated_mass_features_deconvoluted : list -171 List of mass features associated with the deconvoluted mass spectrum. Default is an empty list. -172 -173 """ -174 -175 def __init__( -176 self, -177 lcms_parent, -178 mz: float, -179 retention_time: float, -180 intensity: float, -181 apex_scan: int, -182 persistence: float = None, -183 id: int = None, -184 ): -185 super().__init__( -186 chromatogram_parent=lcms_parent, -187 mass_spectrum_obj=None, -188 start_index=None, -189 index=apex_scan, -190 final_index=None, -191 ) -192 # Core attributes, marked as private -193 self._mz_exp: float = mz -194 self._mz_cal: float = None -195 self._retention_time: float = retention_time -196 self._apex_scan: int = apex_scan -197 self._intensity: float = intensity -198 self._persistence: float = persistence -199 self._eic_data: EIC_Data = None -200 self._dispersity_index: float = None -201 self._half_height_width: np.ndarray = None -202 self._ms_deconvoluted_idx = None -203 -204 # Additional attributes -205 self.monoisotopic_mf_id = None -206 self.isotopologue_type = None -207 self.ms2_scan_numbers = [] -208 self.ms2_mass_spectra = {} -209 self.ms2_similarity_results = [] -210 self.mass_spectrum_deconvoluted_parent: bool = None -211 self.associated_mass_features_deconvoluted = [] -212 -213 if id: -214 self.id = id -215 else: -216 # get the parent's mass feature keys and add 1 to the max value to get the new key -217 self.id = ( -218 max(lcms_parent.mass_features.keys()) + 1 -219 if lcms_parent.mass_features.keys() -220 else 0 -221 ) -222 -223 def update_mz(self): -224 """Update the mass to charge ratio from the mass spectrum object.""" -225 if self.mass_spectrum is None: -226 raise ValueError( -227 "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object" -228 ) -229 if len(self.mass_spectrum.mz_exp) == 0: -230 raise ValueError( -231 "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed" -232 ) -233 new_mz = self.ms1_peak.mz_exp -234 -235 # calculate the difference between the new and old m/z, only update if it is close -236 mz_diff = new_mz - self.mz -237 if abs(mz_diff) < 0.01: -238 self._mz_exp = new_mz -239 -240 def plot(self, to_plot=["EIC", "MS1", "MS2"], return_fig=True): -241 """Plot the mass feature. +108class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation): +109 """Class representing a mass feature in a liquid chromatography (LC) chromatogram. +110 +111 Parameters +112 ------- +113 lcms_parent : LCMS +114 The parent LCMSBase object. +115 mz : float +116 The observed mass to charge ratio of the feature. +117 retention_time : float +118 The retention time of the feature (in minutes), at the apex. +119 intensity : float +120 The intensity of the feature. +121 apex_scan : int +122 The scan number of the apex of the feature. +123 persistence : float, optional +124 The persistence of the feature. Default is None. +125 +126 Attributes +127 -------- +128 _mz_exp : float +129 The observed mass to charge ratio of the feature. +130 _mz_cal : float +131 The calibrated mass to charge ratio of the feature. +132 _retention_time : float +133 The retention time of the feature (in minutes), at the apex. +134 _apex_scan : int +135 The scan number of the apex of the feature. +136 _intensity : float +137 The intensity of the feature. +138 _persistence : float +139 The persistence of the feature. +140 _eic_data : EIC_Data +141 The EIC data object associated with the feature. +142 _dispersity_index : float +143 The dispersity index of the feature. +144 _half_height_width : numpy.ndarray +145 The half height width of the feature (in minutes, as an array of min and max values). +146 _tailing_factor : float +147 The tailing factor of the feature. +148 > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak. +149 _ms_deconvoluted_idx : [int] +150 The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum. +151 is_calibrated : bool +152 If True, the feature has been calibrated. Default is False. +153 monoisotopic_mf_id : int +154 Mass feature id that is the monoisotopic version of self. +155 If self.id, then self is the monoisotopic feature). Default is None. +156 isotopologue_type : str +157 The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. +158 Default is None. +159 ms2_scan_numbers : list +160 List of scan numbers of the MS2 spectra associated with the feature. +161 Default is an empty list. +162 ms2_mass_spectra : dict +163 Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). +164 Default is an empty dictionary. +165 ms2_similarity_results : list +166 List of MS2 similarity results associated with the mass feature. +167 Default is an empty list. +168 id : int +169 The ID of the feature, also the key in the parent LCMS object's +170 `mass_features` dictionary. +171 mass_spectrum_deconvoluted_parent : bool +172 If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None. +173 associated_mass_features_deconvoluted : list +174 List of mass features associated with the deconvoluted mass spectrum. Default is an empty list. +175 +176 """ +177 +178 def __init__( +179 self, +180 lcms_parent, +181 mz: float, +182 retention_time: float, +183 intensity: float, +184 apex_scan: int, +185 persistence: float = None, +186 id: int = None, +187 ): +188 super().__init__( +189 chromatogram_parent=lcms_parent, +190 mass_spectrum_obj=None, +191 start_index=None, +192 index=apex_scan, +193 final_index=None, +194 ) +195 # Core attributes, marked as private +196 self._mz_exp: float = mz +197 self._mz_cal: float = None +198 self._retention_time: float = retention_time +199 self._apex_scan: int = apex_scan +200 self._intensity: float = intensity +201 self._persistence: float = persistence +202 self._eic_data: EIC_Data = None +203 self._dispersity_index: float = None +204 self._half_height_width: np.ndarray = None +205 self._ms_deconvoluted_idx = None +206 +207 # Additional attributes +208 self.monoisotopic_mf_id = None +209 self.isotopologue_type = None +210 self.ms2_scan_numbers = [] +211 self.ms2_mass_spectra = {} +212 self.ms2_similarity_results = [] +213 self.mass_spectrum_deconvoluted_parent: bool = None +214 self.associated_mass_features_deconvoluted = [] +215 +216 if id: +217 self.id = id +218 else: +219 # get the parent's mass feature keys and add 1 to the max value to get the new key +220 self.id = ( +221 max(lcms_parent.mass_features.keys()) + 1 +222 if lcms_parent.mass_features.keys() +223 else 0 +224 ) +225 +226 def update_mz(self): +227 """Update the mass to charge ratio from the mass spectrum object.""" +228 if self.mass_spectrum is None: +229 raise ValueError( +230 "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object" +231 ) +232 if len(self.mass_spectrum.mz_exp) == 0: +233 raise ValueError( +234 "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed" +235 ) +236 new_mz = self.ms1_peak.mz_exp +237 +238 # calculate the difference between the new and old m/z, only update if it is close +239 mz_diff = new_mz - self.mz +240 if abs(mz_diff) < 0.01: +241 self._mz_exp = new_mz 242 -243 Parameters -244 ---------- -245 to_plot : list, optional -246 List of strings specifying what to plot, any iteration of -247 "EIC", "MS2", and "MS1". -248 Default is ["EIC", "MS1", "MS2"]. -249 return_fig : bool, optional -250 If True, the figure is returned. Default is True. -251 -252 Returns -253 ------- -254 matplotlib.figure.Figure or None -255 The figure object if `return_fig` is True. -256 Otherwise None and the figure is displayed. -257 """ -258 -259 # EIC plot preparation -260 eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time +243 def plot(self, to_plot=["EIC", "MS1", "MS2"], return_fig=True): +244 """Plot the mass feature. +245 +246 Parameters +247 ---------- +248 to_plot : list, optional +249 List of strings specifying what to plot, any iteration of +250 "EIC", "MS2", and "MS1". +251 Default is ["EIC", "MS1", "MS2"]. +252 return_fig : bool, optional +253 If True, the figure is returned. Default is True. +254 +255 Returns +256 ------- +257 matplotlib.figure.Figure or None +258 The figure object if `return_fig` is True. +259 Otherwise None and the figure is displayed. +260 """ 261 -262 # Adjust to_plot list if there are not spectra added to the mass features -263 if self.mass_spectrum is None: -264 to_plot = [x for x in to_plot if x != "MS1"] -265 if len(self.ms2_mass_spectra) == 0: -266 to_plot = [x for x in to_plot if x != "MS2"] -267 if self._eic_data is None: -268 to_plot = [x for x in to_plot if x != "EIC"] -269 if self._ms_deconvoluted_idx is not None: -270 deconvoluted = True -271 else: -272 deconvoluted = False -273 -274 fig, axs = plt.subplots( -275 len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False -276 ) -277 fig.suptitle( -278 "Mass Feature " -279 + str(self.id) -280 + ": m/z = " -281 + str(round(self.mz, ndigits=4)) -282 + "; time = " -283 + str(round(self.retention_time, ndigits=1)) -284 + " minutes" -285 ) -286 -287 i = 0 -288 # EIC plot -289 if "EIC" in to_plot: -290 if self._eic_data is None: -291 raise ValueError( -292 "EIC data is not available, cannot plot the mass feature's EIC" -293 ) -294 axs[i][0].set_title("EIC", loc="left") -295 axs[i][0].plot(self._eic_data.time, self._eic_data.eic) -296 if self.start_scan is not None: -297 axs[i][0].fill_between( -298 self.eic_rt_list, self.eic_list, color="b", alpha=0.2 -299 ) -300 else: -301 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: -302 print( -303 "No start and final scan numbers were provided for mass feature " -304 + str(self.id) -305 ) -306 axs[i][0].set_ylabel("Intensity") -307 axs[i][0].set_xlabel("Time (minutes)") -308 axs[i][0].set_ylim(0, self.eic_list.max() * 1.1) -309 axs[i][0].set_xlim( -310 self.retention_time - eic_buffer_time, -311 self.retention_time + eic_buffer_time, -312 ) -313 axs[i][0].axvline( -314 x=self.retention_time, color="k", label="MS1 scan time (apex)" +262 # EIC plot preparation +263 eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time +264 +265 # Adjust to_plot list if there are not spectra added to the mass features +266 if self.mass_spectrum is None: +267 to_plot = [x for x in to_plot if x != "MS1"] +268 if len(self.ms2_mass_spectra) == 0: +269 to_plot = [x for x in to_plot if x != "MS2"] +270 if self._eic_data is None: +271 to_plot = [x for x in to_plot if x != "EIC"] +272 if self._ms_deconvoluted_idx is not None: +273 deconvoluted = True +274 else: +275 deconvoluted = False +276 +277 fig, axs = plt.subplots( +278 len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False +279 ) +280 fig.suptitle( +281 "Mass Feature " +282 + str(self.id) +283 + ": m/z = " +284 + str(round(self.mz, ndigits=4)) +285 + "; time = " +286 + str(round(self.retention_time, ndigits=1)) +287 + " minutes" +288 ) +289 +290 i = 0 +291 # EIC plot +292 if "EIC" in to_plot: +293 if self._eic_data is None: +294 raise ValueError( +295 "EIC data is not available, cannot plot the mass feature's EIC" +296 ) +297 axs[i][0].set_title("EIC", loc="left") +298 axs[i][0].plot(self._eic_data.time, self._eic_data.eic) +299 if self.start_scan is not None: +300 axs[i][0].fill_between( +301 self.eic_rt_list, self.eic_list, color="b", alpha=0.2 +302 ) +303 else: +304 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: +305 print( +306 "No start and final scan numbers were provided for mass feature " +307 + str(self.id) +308 ) +309 axs[i][0].set_ylabel("Intensity") +310 axs[i][0].set_xlabel("Time (minutes)") +311 axs[i][0].set_ylim(0, self.eic_list.max() * 1.1) +312 axs[i][0].set_xlim( +313 self.retention_time - eic_buffer_time, +314 self.retention_time + eic_buffer_time, 315 ) -316 if len(self.ms2_scan_numbers) > 0: -317 axs[i][0].axvline( -318 x=self.chromatogram_parent.get_time_of_scan_id( -319 self.best_ms2.scan_number -320 ), -321 color="grey", -322 linestyle="--", -323 label="MS2 scan time", -324 ) -325 axs[i][0].legend(loc="upper left") -326 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) -327 i += 1 -328 -329 # MS1 plot -330 if "MS1" in to_plot: -331 if deconvoluted: -332 axs[i][0].set_title("MS1 (deconvoluted)", loc="left") -333 axs[i][0].vlines( -334 self.mass_spectrum.mz_exp, 0, self.mass_spectrum.abundance, color="k", alpha=0.2, label="Raw MS1" -335 ) +316 axs[i][0].axvline( +317 x=self.retention_time, color="k", label="MS1 scan time (apex)" +318 ) +319 if len(self.ms2_scan_numbers) > 0: +320 axs[i][0].axvline( +321 x=self.chromatogram_parent.get_time_of_scan_id( +322 self.best_ms2.scan_number +323 ), +324 color="grey", +325 linestyle="--", +326 label="MS2 scan time", +327 ) +328 axs[i][0].legend(loc="upper left") +329 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) +330 i += 1 +331 +332 # MS1 plot +333 if "MS1" in to_plot: +334 if deconvoluted: +335 axs[i][0].set_title("MS1 (deconvoluted)", loc="left") 336 axs[i][0].vlines( -337 self.mass_spectrum_deconvoluted.mz_exp, 0, self.mass_spectrum_deconvoluted.abundance, color="k", label="Deconvoluted MS1" -338 ) -339 axs[i][0].set_xlim(self.mass_spectrum_deconvoluted.mz_exp.min()*.8, self.mass_spectrum_deconvoluted.mz_exp.max()*1.1) -340 axs[i][0].set_ylim(0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1) -341 else: -342 axs[i][0].set_title("MS1 (raw)", loc="left") -343 axs[i][0].vlines( -344 self.mass_spectrum.mz_exp, 0, self.mass_spectrum.abundance, color="k", label="Raw MS1" -345 ) -346 axs[i][0].set_xlim(self.mass_spectrum.mz_exp.min()*.8, self.mass_spectrum.mz_exp.max()*1.1) -347 axs[i][0].set_ylim(bottom=0) -348 -349 if (self.ms1_peak.mz_exp - self.mz) < 0.01: -350 axs[i][0].vlines( -351 self.ms1_peak.mz_exp, -352 0, -353 self.ms1_peak.abundance, -354 color="m", -355 label="Feature m/z", -356 ) -357 +337 self.mass_spectrum.mz_exp, +338 0, +339 self.mass_spectrum.abundance, +340 color="k", +341 alpha=0.2, +342 label="Raw MS1", +343 ) +344 axs[i][0].vlines( +345 self.mass_spectrum_deconvoluted.mz_exp, +346 0, +347 self.mass_spectrum_deconvoluted.abundance, +348 color="k", +349 label="Deconvoluted MS1", +350 ) +351 axs[i][0].set_xlim( +352 self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8, +353 self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1, +354 ) +355 axs[i][0].set_ylim( +356 0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1 +357 ) 358 else: -359 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: -360 print( -361 "The m/z of the mass feature " -362 + str(self.id) -363 + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted" -364 ) -365 axs[i][0].legend(loc="upper left") -366 axs[i][0].set_ylabel("Intensity") -367 axs[i][0].set_xlabel("m/z") -368 axs[i][0].yaxis.set_tick_params(labelleft=False) -369 i += 1 -370 -371 # MS2 plot -372 if "MS2" in to_plot: -373 axs[i][0].set_title("MS2", loc="left") -374 axs[i][0].vlines( -375 self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k" -376 ) -377 axs[i][0].set_ylabel("Intensity") -378 axs[i][0].set_xlabel("m/z") -379 axs[i][0].set_ylim(bottom=0) -380 axs[i][0].yaxis.get_major_formatter().set_scientific(False) -381 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) -382 axs[i][0].set_xlim(self.best_ms2.mz_exp.min()*.8, self.best_ms2.mz_exp.max()*1.1) -383 axs[i][0].yaxis.set_tick_params(labelleft=False) -384 -385 # Add space between subplots -386 plt.tight_layout() -387 -388 if return_fig: -389 # Close figure -390 plt.close(fig) -391 return fig -392 -393 @property -394 def mz(self): -395 """Mass to charge ratio of the mass feature""" -396 # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z -397 if self._mz_cal is not None: -398 return self._mz_cal -399 else: -400 return self._mz_exp -401 -402 @property -403 def mass_spectrum_deconvoluted(self): -404 """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.""" -405 if self._ms_deconvoluted_idx is not None: -406 ms_deconvoluted = copy.deepcopy(self.mass_spectrum) -407 ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx) -408 return ms_deconvoluted -409 else: -410 raise ValueError("Deconvolution has not been performed for mass feature " + str(self.id)) -411 -412 @property -413 def retention_time(self): -414 """Retention time of the mass feature""" -415 return self._retention_time -416 -417 @retention_time.setter -418 def retention_time(self, value): -419 """Set the retention time of the mass feature""" -420 if not isinstance(value, float): -421 raise ValueError("The retention time of the mass feature must be a float") -422 self._retention_time = value -423 -424 @property -425 def apex_scan(self): -426 """Apex scan of the mass feature""" -427 return self._apex_scan -428 -429 @apex_scan.setter -430 def apex_scan(self, value): -431 """Set the apex scan of the mass feature""" -432 if not isinstance(value, int): -433 raise ValueError("The apex scan of the mass feature must be an integer") -434 self._apex_scan = value -435 -436 @property -437 def intensity(self): -438 """Intensity of the mass feature""" -439 return self._intensity -440 -441 @intensity.setter -442 def intensity(self, value): -443 """Set the intensity of the mass feature""" -444 if not isinstance(value, float): -445 raise ValueError("The intensity of the mass feature must be a float") -446 self._intensity = value -447 -448 @property -449 def persistence(self): -450 """Persistence of the mass feature""" -451 return self._persistence -452 -453 @persistence.setter -454 def persistence(self, value): -455 """Set the persistence of the mass feature""" -456 if not isinstance(value, float): -457 raise ValueError("The persistence of the mass feature must be a float") -458 self._persistence = value -459 -460 @property -461 def eic_rt_list(self): -462 """Retention time list between the beginning and end of the mass feature""" -463 # Find index of the start and final scans in the EIC data -464 start_index = self._eic_data.scans.tolist().index(self.start_scan) -465 final_index = self._eic_data.scans.tolist().index(self.final_scan) -466 -467 # Get the retention time list -468 rt_list = self._eic_data.time[start_index : final_index + 1] -469 return rt_list -470 -471 @property -472 def eic_list(self): -473 """EIC List between the beginning and end of the mass feature""" -474 # Find index of the start and final scans in the EIC data -475 start_index = self._eic_data.scans.tolist().index(self.start_scan) -476 final_index = self._eic_data.scans.tolist().index(self.final_scan) -477 -478 # Get the retention time list -479 eic = self._eic_data.eic[start_index : final_index + 1] -480 return eic -481 -482 @property -483 def ms1_peak(self): -484 """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z""" -485 # Find index array self.mass_spectrum.mz_exp that is closest to self.mz -486 closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz)) -487 closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz) -488 -489 return self.mass_spectrum._mspeaks[closest_mz_index] -490 -491 @property -492 def tailing_factor(self): -493 """Tailing factor of the mass feature""" -494 return self._tailing_factor -495 -496 @tailing_factor.setter -497 def tailing_factor(self, value): -498 """Set the tailing factor of the mass feature""" -499 if not isinstance(value, float): -500 raise ValueError("The tailing factor of the mass feature must be a float") -501 self._tailing_factor = value -502 -503 @property -504 def dispersity_index(self): -505 """Dispersity index of the mass feature""" -506 return self._dispersity_index -507 -508 @dispersity_index.setter -509 def dispersity_index(self, value): -510 """Set the dispersity index of the mass feature""" -511 if not isinstance(value, float): -512 raise ValueError("The dispersity index of the mass feature must be a float") -513 self._dispersity_index = value -514 -515 @property -516 def half_height_width(self): -517 """Half height width of the mass feature, average of min and max values, in minutes""" -518 return np.mean(self._half_height_width) -519 -520 @property -521 def best_ms2(self): -522 """Points to the best representative MS2 mass spectrum +359 axs[i][0].set_title("MS1 (raw)", loc="left") +360 axs[i][0].vlines( +361 self.mass_spectrum.mz_exp, +362 0, +363 self.mass_spectrum.abundance, +364 color="k", +365 label="Raw MS1", +366 ) +367 axs[i][0].set_xlim( +368 self.mass_spectrum.mz_exp.min() * 0.8, +369 self.mass_spectrum.mz_exp.max() * 1.1, +370 ) +371 axs[i][0].set_ylim(bottom=0) +372 +373 if (self.ms1_peak.mz_exp - self.mz) < 0.01: +374 axs[i][0].vlines( +375 self.ms1_peak.mz_exp, +376 0, +377 self.ms1_peak.abundance, +378 color="m", +379 label="Feature m/z", +380 ) +381 +382 else: +383 if self.chromatogram_parent.parameters.lc_ms.verbose_processing: +384 print( +385 "The m/z of the mass feature " +386 + str(self.id) +387 + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted" +388 ) +389 axs[i][0].legend(loc="upper left") +390 axs[i][0].set_ylabel("Intensity") +391 axs[i][0].set_xlabel("m/z") +392 axs[i][0].yaxis.set_tick_params(labelleft=False) +393 i += 1 +394 +395 # MS2 plot +396 if "MS2" in to_plot: +397 axs[i][0].set_title("MS2", loc="left") +398 axs[i][0].vlines( +399 self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k" +400 ) +401 axs[i][0].set_ylabel("Intensity") +402 axs[i][0].set_xlabel("m/z") +403 axs[i][0].set_ylim(bottom=0) +404 axs[i][0].yaxis.get_major_formatter().set_scientific(False) +405 axs[i][0].yaxis.get_major_formatter().set_useOffset(False) +406 axs[i][0].set_xlim( +407 self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1 +408 ) +409 axs[i][0].yaxis.set_tick_params(labelleft=False) +410 +411 # Add space between subplots +412 plt.tight_layout() +413 +414 if return_fig: +415 # Close figure +416 plt.close(fig) +417 return fig +418 +419 @property +420 def mz(self): +421 """Mass to charge ratio of the mass feature""" +422 # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z +423 if self._mz_cal is not None: +424 return self._mz_cal +425 else: +426 return self._mz_exp +427 +428 @property +429 def mass_spectrum_deconvoluted(self): +430 """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.""" +431 if self._ms_deconvoluted_idx is not None: +432 ms_deconvoluted = copy.deepcopy(self.mass_spectrum) +433 ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx) +434 return ms_deconvoluted +435 else: +436 raise ValueError( +437 "Deconvolution has not been performed for mass feature " + str(self.id) +438 ) +439 +440 @property +441 def retention_time(self): +442 """Retention time of the mass feature""" +443 return self._retention_time +444 +445 @retention_time.setter +446 def retention_time(self, value): +447 """Set the retention time of the mass feature""" +448 if not isinstance(value, float): +449 raise ValueError("The retention time of the mass feature must be a float") +450 self._retention_time = value +451 +452 @property +453 def apex_scan(self): +454 """Apex scan of the mass feature""" +455 return self._apex_scan +456 +457 @apex_scan.setter +458 def apex_scan(self, value): +459 """Set the apex scan of the mass feature""" +460 if not isinstance(value, int): +461 raise ValueError("The apex scan of the mass feature must be an integer") +462 self._apex_scan = value +463 +464 @property +465 def intensity(self): +466 """Intensity of the mass feature""" +467 return self._intensity +468 +469 @intensity.setter +470 def intensity(self, value): +471 """Set the intensity of the mass feature""" +472 if not isinstance(value, float): +473 raise ValueError("The intensity of the mass feature must be a float") +474 self._intensity = value +475 +476 @property +477 def persistence(self): +478 """Persistence of the mass feature""" +479 return self._persistence +480 +481 @persistence.setter +482 def persistence(self, value): +483 """Set the persistence of the mass feature""" +484 if not isinstance(value, float): +485 raise ValueError("The persistence of the mass feature must be a float") +486 self._persistence = value +487 +488 @property +489 def eic_rt_list(self): +490 """Retention time list between the beginning and end of the mass feature""" +491 # Find index of the start and final scans in the EIC data +492 start_index = self._eic_data.scans.tolist().index(self.start_scan) +493 final_index = self._eic_data.scans.tolist().index(self.final_scan) +494 +495 # Get the retention time list +496 rt_list = self._eic_data.time[start_index : final_index + 1] +497 return rt_list +498 +499 @property +500 def eic_list(self): +501 """EIC List between the beginning and end of the mass feature""" +502 # Find index of the start and final scans in the EIC data +503 start_index = self._eic_data.scans.tolist().index(self.start_scan) +504 final_index = self._eic_data.scans.tolist().index(self.final_scan) +505 +506 # Get the retention time list +507 eic = self._eic_data.eic[start_index : final_index + 1] +508 return eic +509 +510 @property +511 def ms1_peak(self): +512 """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z""" +513 # Find index array self.mass_spectrum.mz_exp that is closest to self.mz +514 closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz)) +515 closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz) +516 +517 return self.mass_spectrum._mspeaks[closest_mz_index] +518 +519 @property +520 def tailing_factor(self): +521 """Tailing factor of the mass feature""" +522 return self._tailing_factor 523 -524 Notes -525 ----- -526 If there is only one MS2 mass spectrum, it will be returned -527 If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score. -528 If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra. -529 -530 Returns -531 ------- -532 MassSpectrum or None -533 The best MS2 mass spectrum. -534 """ -535 if len(self.ms2_similarity_results) > 0: -536 # the scan number with the highest similarity score -537 results_df = [x.to_dataframe() for x in self.ms2_similarity_results] -538 results_df = pd.concat(results_df) -539 results_df = results_df.sort_values( -540 by="entropy_similarity", ascending=False -541 ) -542 best_scan_number = results_df.iloc[0]["query_spectrum_id"] -543 return self.ms2_mass_spectra[best_scan_number] -544 -545 ms2_scans = list(self.ms2_mass_spectra.keys()) -546 if len(ms2_scans) > 1: -547 mz_diff_list = [] # List of mz difference between mz of mass feature and mass of nearest mz in each scan -548 res_list = [] # List of maximum resolving power of peaks in each scan -549 time_diff_list = [] # List of time difference between scan and apex scan in each scan -550 for scan in ms2_scans: -551 if len(self.ms2_mass_spectra[scan].mspeaks) > 0: -552 # Find mz closest to mass feature mz, return both the difference in mass and its resolution -553 closest_mz = min( -554 self.ms2_mass_spectra[scan].mz_exp, -555 key=lambda x: abs(x - self.mz), -556 ) -557 if all( -558 np.isnan(self.ms2_mass_spectra[scan].resolving_power) -559 ): # All NA for resolving power in peaks, not uncommon in CID spectra -560 res_list.append(2) # Assumes very low resolving power -561 else: -562 res_list.append( -563 np.nanmax(self.ms2_mass_spectra[scan].resolving_power) -564 ) -565 mz_diff_list.append(np.abs(closest_mz - self.mz)) -566 time_diff_list.append( -567 np.abs( -568 self.chromatogram_parent.get_time_of_scan_id(scan) -569 - self.retention_time -570 ) -571 ) -572 else: -573 res_list.append(np.nan) -574 mz_diff_list.append(np.nan) -575 time_diff_list.append(np.nan) -576 # Convert diff_lists into logical scores (higher is better for each score) -577 time_score = 1 - np.array(time_diff_list) / np.nanmax( -578 np.array(time_diff_list) -579 ) -580 res_score = np.array(res_list) / np.nanmax(np.array(res_list)) -581 # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning) -582 mz_score = np.zeros(len(ms2_scans)) -583 for i in np.arange(0, len(ms2_scans)): -584 if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1: # Possible chimeric -585 mz_score[i] = 0 -586 else: -587 mz_score[i] = 1 -588 # get the index of the best score and return the mass spectrum -589 if len([np.nanargmax(time_score * res_score * mz_score)]) == 1: -590 return self.ms2_mass_spectra[ -591 ms2_scans[np.nanargmax(time_score * res_score * mz_score)] -592 ] -593 # remove the mz_score condition and try again -594 elif len(np.argmax(time_score * res_score)) == 1: -595 return self.ms2_mass_spectra[ -596 ms2_scans[np.nanargmax(time_score * res_score)] -597 ] -598 else: -599 raise ValueError( -600 "No best MS2 mass spectrum could be found for mass feature " -601 + str(self.id) -602 ) -603 elif len(ms2_scans) == 1: # if only one ms2 spectra, return it -604 return self.ms2_mass_spectra[ms2_scans[0]] -605 else: # if no ms2 spectra, return None -606 return None -607 -608 -609class GCPeak(ChromaPeakBase, GCPeakCalculation): -610 """Class representing a peak in a gas chromatography (GC) chromatogram. -611 -612 Parameters -613 ---------- -614 chromatogram_parent : Chromatogram -615 The parent chromatogram object. -616 mass_spectrum_obj : MassSpectrum -617 The mass spectrum object associated with the peak. -618 indexes : tuple -619 The indexes of the peak in the chromatogram. -620 -621 Attributes -622 ---------- -623 _compounds : list -624 List of compounds associated with the peak. -625 _ri : float or None -626 Retention index of the peak. -627 -628 Methods -629 ------- -630 * __len__(). Returns the number of compounds associated with the peak. -631 * __getitem__(position). Returns the compound at the specified position. -632 * remove_compound(compounds_obj). Removes the specified compound from the peak. -633 * clear_compounds(). Removes all compounds from the peak. -634 * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes. -635 * ri(). Returns the retention index of the peak. -636 * highest_ss_compound(). Returns the compound with the highest spectral similarity score. -637 * highest_score_compound(). Returns the compound with the highest similarity score. -638 * compound_names(). Returns a list of names of compounds associated with the peak. -639 """ -640 -641 def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes): -642 self._compounds = [] -643 self._ri = None -644 super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes) -645 -646 def __len__(self): -647 return len(self._compounds) +524 @tailing_factor.setter +525 def tailing_factor(self, value): +526 """Set the tailing factor of the mass feature""" +527 if not isinstance(value, float): +528 raise ValueError("The tailing factor of the mass feature must be a float") +529 self._tailing_factor = value +530 +531 @property +532 def dispersity_index(self): +533 """Dispersity index of the mass feature""" +534 return self._dispersity_index +535 +536 @dispersity_index.setter +537 def dispersity_index(self, value): +538 """Set the dispersity index of the mass feature""" +539 if not isinstance(value, float): +540 raise ValueError("The dispersity index of the mass feature must be a float") +541 self._dispersity_index = value +542 +543 @property +544 def half_height_width(self): +545 """Half height width of the mass feature, average of min and max values, in minutes""" +546 return np.mean(self._half_height_width) +547 +548 @property +549 def best_ms2(self): +550 """Points to the best representative MS2 mass spectrum +551 +552 Notes +553 ----- +554 If there is only one MS2 mass spectrum, it will be returned +555 If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score. +556 If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra. +557 +558 Returns +559 ------- +560 MassSpectrum or None +561 The best MS2 mass spectrum. +562 """ +563 if len(self.ms2_similarity_results) > 0: +564 # the scan number with the highest similarity score +565 results_df = [x.to_dataframe() for x in self.ms2_similarity_results] +566 results_df = pd.concat(results_df) +567 results_df = results_df.sort_values( +568 by="entropy_similarity", ascending=False +569 ) +570 best_scan_number = results_df.iloc[0]["query_spectrum_id"] +571 return self.ms2_mass_spectra[best_scan_number] +572 +573 ms2_scans = list(self.ms2_mass_spectra.keys()) +574 if len(ms2_scans) > 1: +575 mz_diff_list = [] # List of mz difference between mz of mass feature and mass of nearest mz in each scan +576 res_list = [] # List of maximum resolving power of peaks in each scan +577 time_diff_list = [] # List of time difference between scan and apex scan in each scan +578 for scan in ms2_scans: +579 if len(self.ms2_mass_spectra[scan].mspeaks) > 0: +580 # Find mz closest to mass feature mz, return both the difference in mass and its resolution +581 closest_mz = min( +582 self.ms2_mass_spectra[scan].mz_exp, +583 key=lambda x: abs(x - self.mz), +584 ) +585 if all( +586 np.isnan(self.ms2_mass_spectra[scan].resolving_power) +587 ): # All NA for resolving power in peaks, not uncommon in CID spectra +588 res_list.append(2) # Assumes very low resolving power +589 else: +590 res_list.append( +591 np.nanmax(self.ms2_mass_spectra[scan].resolving_power) +592 ) +593 mz_diff_list.append(np.abs(closest_mz - self.mz)) +594 time_diff_list.append( +595 np.abs( +596 self.chromatogram_parent.get_time_of_scan_id(scan) +597 - self.retention_time +598 ) +599 ) +600 else: +601 res_list.append(np.nan) +602 mz_diff_list.append(np.nan) +603 time_diff_list.append(np.nan) +604 # Convert diff_lists into logical scores (higher is better for each score) +605 time_score = 1 - np.array(time_diff_list) / np.nanmax( +606 np.array(time_diff_list) +607 ) +608 res_score = np.array(res_list) / np.nanmax(np.array(res_list)) +609 # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning) +610 mz_score = np.zeros(len(ms2_scans)) +611 for i in np.arange(0, len(ms2_scans)): +612 if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1: # Possible chimeric +613 mz_score[i] = 0 +614 else: +615 mz_score[i] = 1 +616 # get the index of the best score and return the mass spectrum +617 if len([np.nanargmax(time_score * res_score * mz_score)]) == 1: +618 return self.ms2_mass_spectra[ +619 ms2_scans[np.nanargmax(time_score * res_score * mz_score)] +620 ] +621 # remove the mz_score condition and try again +622 elif len(np.argmax(time_score * res_score)) == 1: +623 return self.ms2_mass_spectra[ +624 ms2_scans[np.nanargmax(time_score * res_score)] +625 ] +626 else: +627 raise ValueError( +628 "No best MS2 mass spectrum could be found for mass feature " +629 + str(self.id) +630 ) +631 elif len(ms2_scans) == 1: # if only one ms2 spectra, return it +632 return self.ms2_mass_spectra[ms2_scans[0]] +633 else: # if no ms2 spectra, return None +634 return None +635 +636 +637class GCPeak(ChromaPeakBase, GCPeakCalculation): +638 """Class representing a peak in a gas chromatography (GC) chromatogram. +639 +640 Parameters +641 ---------- +642 chromatogram_parent : Chromatogram +643 The parent chromatogram object. +644 mass_spectrum_obj : MassSpectrum +645 The mass spectrum object associated with the peak. +646 indexes : tuple +647 The indexes of the peak in the chromatogram. 648 -649 def __getitem__(self, position): -650 return self._compounds[position] -651 -652 def remove_compound(self, compounds_obj): -653 self._compounds.remove(compounds_obj) -654 -655 def clear_compounds(self): -656 self._compounds = [] -657 -658 def add_compound( -659 self, -660 compounds_dict, -661 spectral_similarity_scores, -662 ri_score=None, -663 similarity_score=None, -664 ): -665 """Adds a compound to the peak with the specified attributes. -666 -667 Parameters -668 ---------- -669 compounds_dict : dict -670 Dictionary containing the compound information. -671 spectral_similarity_scores : dict -672 Dictionary containing the spectral similarity scores. -673 ri_score : float or None, optional -674 The retention index score of the compound. Default is None. -675 similarity_score : float or None, optional -676 The similarity score of the compound. Default is None. -677 """ -678 compound_obj = LowResCompoundRef(compounds_dict) -679 compound_obj.spectral_similarity_scores = spectral_similarity_scores -680 compound_obj.spectral_similarity_score = spectral_similarity_scores.get( -681 "cosine_correlation" -682 ) -683 # TODO check is the above line correct? -684 compound_obj.ri_score = ri_score -685 compound_obj.similarity_score = similarity_score -686 self._compounds.append(compound_obj) -687 if similarity_score: -688 self._compounds.sort(key=lambda c: c.similarity_score, reverse=True) -689 else: -690 self._compounds.sort( -691 key=lambda c: c.spectral_similarity_score, reverse=True -692 ) -693 -694 @property -695 def ri(self): -696 """Returns the retention index of the peak. -697 -698 Returns -699 ------- -700 float or None -701 The retention index of the peak. -702 """ -703 return self._ri -704 -705 @property -706 def highest_ss_compound(self): -707 """Returns the compound with the highest spectral similarity score. -708 -709 Returns -710 ------- -711 LowResCompoundRef or None -712 The compound with the highest spectral similarity score. -713 """ -714 if self: -715 return max(self, key=lambda c: c.spectral_similarity_score) -716 else: -717 return None -718 -719 @property -720 def highest_score_compound(self): -721 """Returns the compound with the highest similarity score. -722 -723 Returns -724 ------- -725 LowResCompoundRef or None -726 The compound with the highest similarity score. -727 """ -728 if self: -729 return max(self, key=lambda c: c.similarity_score) -730 else: -731 return None +649 Attributes +650 ---------- +651 _compounds : list +652 List of compounds associated with the peak. +653 _ri : float or None +654 Retention index of the peak. +655 +656 Methods +657 ------- +658 * __len__(). Returns the number of compounds associated with the peak. +659 * __getitem__(position). Returns the compound at the specified position. +660 * remove_compound(compounds_obj). Removes the specified compound from the peak. +661 * clear_compounds(). Removes all compounds from the peak. +662 * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes. +663 * ri(). Returns the retention index of the peak. +664 * highest_ss_compound(). Returns the compound with the highest spectral similarity score. +665 * highest_score_compound(). Returns the compound with the highest similarity score. +666 * compound_names(). Returns a list of names of compounds associated with the peak. +667 """ +668 +669 def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes): +670 self._compounds = [] +671 self._ri = None +672 super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes) +673 +674 def __len__(self): +675 return len(self._compounds) +676 +677 def __getitem__(self, position): +678 return self._compounds[position] +679 +680 def remove_compound(self, compounds_obj): +681 self._compounds.remove(compounds_obj) +682 +683 def clear_compounds(self): +684 self._compounds = [] +685 +686 def add_compound( +687 self, +688 compounds_dict, +689 spectral_similarity_scores, +690 ri_score=None, +691 similarity_score=None, +692 ): +693 """Adds a compound to the peak with the specified attributes. +694 +695 Parameters +696 ---------- +697 compounds_dict : dict +698 Dictionary containing the compound information. +699 spectral_similarity_scores : dict +700 Dictionary containing the spectral similarity scores. +701 ri_score : float or None, optional +702 The retention index score of the compound. Default is None. +703 similarity_score : float or None, optional +704 The similarity score of the compound. Default is None. +705 """ +706 compound_obj = LowResCompoundRef(compounds_dict) +707 compound_obj.spectral_similarity_scores = spectral_similarity_scores +708 compound_obj.spectral_similarity_score = spectral_similarity_scores.get( +709 "cosine_correlation" +710 ) +711 # TODO check is the above line correct? +712 compound_obj.ri_score = ri_score +713 compound_obj.similarity_score = similarity_score +714 self._compounds.append(compound_obj) +715 if similarity_score: +716 self._compounds.sort(key=lambda c: c.similarity_score, reverse=True) +717 else: +718 self._compounds.sort( +719 key=lambda c: c.spectral_similarity_score, reverse=True +720 ) +721 +722 @property +723 def ri(self): +724 """Returns the retention index of the peak. +725 +726 Returns +727 ------- +728 float or None +729 The retention index of the peak. +730 """ +731 return self._ri 732 733 @property -734 def compound_names(self): -735 """Returns a list of names of compounds associated with the peak. +734 def highest_ss_compound(self): +735 """Returns the compound with the highest spectral similarity score. 736 737 Returns 738 ------- -739 list -740 List of names of compounds associated with the peak. +739 LowResCompoundRef or None +740 The compound with the highest spectral similarity score. 741 """ 742 if self: -743 return [c.name for c in self] +743 return max(self, key=lambda c: c.spectral_similarity_score) 744 else: -745 return [] +745 return None 746 -747 -748class GCPeakDeconvolved(GCPeak): -749 """Represents a deconvolved peak in a chromatogram. +747 @property +748 def highest_score_compound(self): +749 """Returns the compound with the highest similarity score. 750 -751 Parameters -752 ---------- -753 chromatogram_parent : Chromatogram -754 The parent chromatogram object. -755 mass_spectra : list -756 List of mass spectra associated with the peak. -757 apex_index : int -758 Index of the apex mass spectrum in the `mass_spectra` list. -759 rt_list : list -760 List of retention times. -761 tic_list : list -762 List of total ion currents. -763 """ +751 Returns +752 ------- +753 LowResCompoundRef or None +754 The compound with the highest similarity score. +755 """ +756 if self: +757 return max(self, key=lambda c: c.similarity_score) +758 else: +759 return None +760 +761 @property +762 def compound_names(self): +763 """Returns a list of names of compounds associated with the peak. 764 -765 def __init__( -766 self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list -767 ): -768 self._ri = None -769 self._rt_list = list(rt_list) -770 self._tic_list = list(tic_list) -771 self.mass_spectra = list(mass_spectra) -772 super().__init__( -773 chromatogram_parent, -774 self.mass_spectra[apex_index], -775 (0, apex_index, len(self.mass_spectra) - 1), -776 ) -777 -778 @property -779 def rt_list(self): -780 """Get the list of retention times. -781 -782 Returns -783 ------- -784 list -785 The list of retention times. -786 """ -787 return self._rt_list -788 -789 @property -790 def tic_list(self): -791 """Get the list of total ion currents. +765 Returns +766 ------- +767 list +768 List of names of compounds associated with the peak. +769 """ +770 if self: +771 return [c.name for c in self] +772 else: +773 return [] +774 +775 +776class GCPeakDeconvolved(GCPeak): +777 """Represents a deconvolved peak in a chromatogram. +778 +779 Parameters +780 ---------- +781 chromatogram_parent : Chromatogram +782 The parent chromatogram object. +783 mass_spectra : list +784 List of mass spectra associated with the peak. +785 apex_index : int +786 Index of the apex mass spectrum in the `mass_spectra` list. +787 rt_list : list +788 List of retention times. +789 tic_list : list +790 List of total ion currents. +791 """ 792 -793 Returns -794 ------- -795 list -796 The list of total ion currents. -797 """ -798 return self._tic_list +793 def __init__( +794 self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list +795 ): +796 self._ri = None +797 self._rt_list = list(rt_list) +798 self._tic_list = list(tic_list) +799 self.mass_spectra = list(mass_spectra) +800 super().__init__( +801 chromatogram_parent, +802 self.mass_spectra[apex_index], +803 (0, apex_index, len(self.mass_spectra) - 1), +804 ) +805 +806 @property +807 def rt_list(self): +808 """Get the list of retention times. +809 +810 Returns +811 ------- +812 list +813 The list of retention times. +814 """ +815 return self._rt_list +816 +817 @property +818 def tic_list(self): +819 """Get the list of total ion currents. +820 +821 Returns +822 ------- +823 list +824 The list of total ion currents. +825 """ +826 return self._tic_list

    @@ -1026,95 +1054,95 @@

    -
     15class ChromaPeakBase:
    - 16    """Base class for chromatographic peak (ChromaPeak) objects.
    - 17
    - 18    Parameters
    - 19    -------
    - 20    chromatogram_parent : Chromatogram
    - 21        The parent chromatogram object.
    - 22    mass_spectrum_obj : MassSpectrum
    - 23        The mass spectrum object.
    - 24    start_index : int
    - 25        The start index of the peak.
    - 26    index : int
    - 27        The index of the peak.
    - 28    final_index : int
    - 29        The final index of the peak.
    - 30
    - 31    Attributes
    - 32    --------
    - 33    start_scan : int
    - 34        The start scan of the peak.
    - 35    final_scan : int
    - 36        The final scan of the peak.
    - 37    apex_scan : int
    - 38        The apex scan of the peak.
    - 39    chromatogram_parent : Chromatogram
    - 40        The parent chromatogram object.
    - 41    mass_spectrum : MassSpectrum
    - 42        The mass spectrum object.
    - 43    _area : float
    - 44        The area of the peak.
    - 45
    - 46    Properties
    - 47    --------
    - 48    * retention_time : float.
    - 49        The retention time of the peak.
    - 50    * tic : float.
    - 51        The total ion current of the peak.
    - 52    * area : float.
    - 53        The area of the peak.
    - 54    * rt_list : list.
    - 55        The list of retention times within the peak.
    - 56    * tic_list : list.
    - 57        The list of total ion currents within the peak.
    - 58
    - 59    Methods
    - 60    --------
    - 61    * None
    - 62    """
    - 63
    - 64    def __init__(
    - 65        self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index
    - 66    ):
    - 67        self.start_scan = start_index
    - 68        self.final_scan = final_index
    - 69        self.apex_scan = int(index)
    - 70        self.chromatogram_parent = chromatogram_parent
    - 71        self.mass_spectrum = mass_spectrum_obj
    - 72        self._area = None
    - 73
    - 74    @property
    - 75    def retention_time(self):
    - 76        """Retention Time"""
    - 77        return self.mass_spectrum.retention_time
    - 78
    - 79    @property
    - 80    def tic(self):
    - 81        """Total Ion Current"""
    - 82        return self.mass_spectrum.tic
    - 83
    - 84    @property
    - 85    def area(self):
    - 86        """Peak Area"""
    - 87        return self._area
    - 88
    - 89    @property
    - 90    def rt_list(self):
    - 91        """Retention Time List"""
    - 92        return [
    - 93            self.chromatogram_parent.retention_time[i]
    - 94            for i in range(self.start_scan, self.final_scan + 1)
    - 95        ]
    - 96
    - 97    @property
    - 98    def tic_list(self):
    - 99        """Total Ion Current List"""
    -100        return [
    -101            self.chromatogram_parent.tic[i]
    -102            for i in range(self.start_scan, self.final_scan + 1)
    -103        ]
    +            
     18class ChromaPeakBase:
    + 19    """Base class for chromatographic peak (ChromaPeak) objects.
    + 20
    + 21    Parameters
    + 22    -------
    + 23    chromatogram_parent : Chromatogram
    + 24        The parent chromatogram object.
    + 25    mass_spectrum_obj : MassSpectrum
    + 26        The mass spectrum object.
    + 27    start_index : int
    + 28        The start index of the peak.
    + 29    index : int
    + 30        The index of the peak.
    + 31    final_index : int
    + 32        The final index of the peak.
    + 33
    + 34    Attributes
    + 35    --------
    + 36    start_scan : int
    + 37        The start scan of the peak.
    + 38    final_scan : int
    + 39        The final scan of the peak.
    + 40    apex_scan : int
    + 41        The apex scan of the peak.
    + 42    chromatogram_parent : Chromatogram
    + 43        The parent chromatogram object.
    + 44    mass_spectrum : MassSpectrum
    + 45        The mass spectrum object.
    + 46    _area : float
    + 47        The area of the peak.
    + 48
    + 49    Properties
    + 50    --------
    + 51    * retention_time : float.
    + 52        The retention time of the peak.
    + 53    * tic : float.
    + 54        The total ion current of the peak.
    + 55    * area : float.
    + 56        The area of the peak.
    + 57    * rt_list : list.
    + 58        The list of retention times within the peak.
    + 59    * tic_list : list.
    + 60        The list of total ion currents within the peak.
    + 61
    + 62    Methods
    + 63    --------
    + 64    * None
    + 65    """
    + 66
    + 67    def __init__(
    + 68        self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index
    + 69    ):
    + 70        self.start_scan = start_index
    + 71        self.final_scan = final_index
    + 72        self.apex_scan = int(index)
    + 73        self.chromatogram_parent = chromatogram_parent
    + 74        self.mass_spectrum = mass_spectrum_obj
    + 75        self._area = None
    + 76
    + 77    @property
    + 78    def retention_time(self):
    + 79        """Retention Time"""
    + 80        return self.mass_spectrum.retention_time
    + 81
    + 82    @property
    + 83    def tic(self):
    + 84        """Total Ion Current"""
    + 85        return self.mass_spectrum.tic
    + 86
    + 87    @property
    + 88    def area(self):
    + 89        """Peak Area"""
    + 90        return self._area
    + 91
    + 92    @property
    + 93    def rt_list(self):
    + 94        """Retention Time List"""
    + 95        return [
    + 96            self.chromatogram_parent.retention_time[i]
    + 97            for i in range(self.start_scan, self.final_scan + 1)
    + 98        ]
    + 99
    +100    @property
    +101    def tic_list(self):
    +102        """Total Ion Current List"""
    +103        return [
    +104            self.chromatogram_parent.tic[i]
    +105            for i in range(self.start_scan, self.final_scan + 1)
    +106        ]
     
    @@ -1185,15 +1213,15 @@
    Methods
    -
    64    def __init__(
    -65        self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index
    -66    ):
    -67        self.start_scan = start_index
    -68        self.final_scan = final_index
    -69        self.apex_scan = int(index)
    -70        self.chromatogram_parent = chromatogram_parent
    -71        self.mass_spectrum = mass_spectrum_obj
    -72        self._area = None
    +            
    67    def __init__(
    +68        self, chromatogram_parent, mass_spectrum_obj, start_index, index, final_index
    +69    ):
    +70        self.start_scan = start_index
    +71        self.final_scan = final_index
    +72        self.apex_scan = int(index)
    +73        self.chromatogram_parent = chromatogram_parent
    +74        self.mass_spectrum = mass_spectrum_obj
    +75        self._area = None
     
    @@ -1332,508 +1360,533 @@
    Methods
    -
    106class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation):
    -107    """Class representing a mass feature in a liquid chromatography (LC) chromatogram.
    -108
    -109    Parameters
    -110    -------
    -111    lcms_parent : LCMS
    -112        The parent LCMSBase object.
    -113    mz : float
    -114        The observed mass to charge ratio of the feature.
    -115    retention_time : float
    -116        The retention time of the feature (in minutes), at the apex.
    -117    intensity : float
    -118        The intensity of the feature.
    -119    apex_scan : int
    -120        The scan number of the apex of the feature.
    -121    persistence : float, optional
    -122        The persistence of the feature. Default is None.
    -123
    -124    Attributes
    -125    --------
    -126    _mz_exp : float
    -127        The observed mass to charge ratio of the feature.
    -128    _mz_cal : float
    -129        The calibrated mass to charge ratio of the feature.
    -130    _retention_time : float
    -131        The retention time of the feature (in minutes), at the apex.
    -132    _apex_scan : int
    -133        The scan number of the apex of the feature.
    -134    _intensity : float
    -135        The intensity of the feature.
    -136    _persistence : float
    -137        The persistence of the feature.
    -138    _eic_data : EIC_Data
    -139        The EIC data object associated with the feature.
    -140    _dispersity_index : float
    -141        The dispersity index of the feature.
    -142    _half_height_width : numpy.ndarray
    -143        The half height width of the feature (in minutes, as an array of min and max values).
    -144    _tailing_factor : float
    -145        The tailing factor of the feature. 
    -146        > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
    -147    _ms_deconvoluted_idx : [int]
    -148        The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
    -149    is_calibrated : bool
    -150        If True, the feature has been calibrated. Default is False.
    -151    monoisotopic_mf_id : int
    -152        Mass feature id that is the monoisotopic version of self.
    -153        If self.id, then self is the monoisotopic feature). Default is None.
    -154    isotopologue_type : str
    -155        The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. 
    -156        Default is None.
    -157    ms2_scan_numbers : list
    -158        List of scan numbers of the MS2 spectra associated with the feature. 
    -159        Default is an empty list.
    -160    ms2_mass_spectra : dict
    -161        Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). 
    -162        Default is an empty dictionary.
    -163    ms2_similarity_results : list
    -164        List of MS2 similarity results associated with the mass feature. 
    -165        Default is an empty list.
    -166    id : int
    -167        The ID of the feature, also the key in the parent LCMS object's 
    -168        `mass_features` dictionary.
    -169    mass_spectrum_deconvoluted_parent : bool
    -170        If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
    -171    associated_mass_features_deconvoluted : list
    -172        List of mass features associated with the deconvoluted mass spectrum. Default is an empty list.       
    -173
    -174    """
    -175
    -176    def __init__(
    -177        self,
    -178        lcms_parent,
    -179        mz: float,
    -180        retention_time: float,
    -181        intensity: float,
    -182        apex_scan: int,
    -183        persistence: float = None,
    -184        id: int = None,
    -185    ):
    -186        super().__init__(
    -187            chromatogram_parent=lcms_parent,
    -188            mass_spectrum_obj=None,
    -189            start_index=None,
    -190            index=apex_scan,
    -191            final_index=None,
    -192        )
    -193        # Core attributes, marked as private
    -194        self._mz_exp: float = mz
    -195        self._mz_cal: float = None
    -196        self._retention_time: float = retention_time
    -197        self._apex_scan: int = apex_scan
    -198        self._intensity: float = intensity
    -199        self._persistence: float = persistence
    -200        self._eic_data: EIC_Data = None
    -201        self._dispersity_index: float = None
    -202        self._half_height_width: np.ndarray = None
    -203        self._ms_deconvoluted_idx = None
    -204
    -205        # Additional attributes
    -206        self.monoisotopic_mf_id = None
    -207        self.isotopologue_type = None
    -208        self.ms2_scan_numbers = []
    -209        self.ms2_mass_spectra = {}
    -210        self.ms2_similarity_results = []
    -211        self.mass_spectrum_deconvoluted_parent: bool = None
    -212        self.associated_mass_features_deconvoluted = []
    -213
    -214        if id:
    -215            self.id = id
    -216        else:
    -217            # get the parent's mass feature keys and add 1 to the max value to get the new key
    -218            self.id = (
    -219                max(lcms_parent.mass_features.keys()) + 1
    -220                if lcms_parent.mass_features.keys()
    -221                else 0
    -222            )
    -223
    -224    def update_mz(self):
    -225        """Update the mass to charge ratio from the mass spectrum object."""
    -226        if self.mass_spectrum is None:
    -227            raise ValueError(
    -228                "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object"
    -229            )
    -230        if len(self.mass_spectrum.mz_exp) == 0:
    -231            raise ValueError(
    -232                "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed"
    -233            )
    -234        new_mz = self.ms1_peak.mz_exp
    -235
    -236        # calculate the difference between the new and old m/z, only update if it is close
    -237        mz_diff = new_mz - self.mz
    -238        if abs(mz_diff) < 0.01:
    -239            self._mz_exp = new_mz
    -240
    -241    def plot(self, to_plot=["EIC", "MS1", "MS2"], return_fig=True):
    -242        """Plot the mass feature.
    +            
    109class LCMSMassFeature(ChromaPeakBase, LCMSMassFeatureCalculation):
    +110    """Class representing a mass feature in a liquid chromatography (LC) chromatogram.
    +111
    +112    Parameters
    +113    -------
    +114    lcms_parent : LCMS
    +115        The parent LCMSBase object.
    +116    mz : float
    +117        The observed mass to charge ratio of the feature.
    +118    retention_time : float
    +119        The retention time of the feature (in minutes), at the apex.
    +120    intensity : float
    +121        The intensity of the feature.
    +122    apex_scan : int
    +123        The scan number of the apex of the feature.
    +124    persistence : float, optional
    +125        The persistence of the feature. Default is None.
    +126
    +127    Attributes
    +128    --------
    +129    _mz_exp : float
    +130        The observed mass to charge ratio of the feature.
    +131    _mz_cal : float
    +132        The calibrated mass to charge ratio of the feature.
    +133    _retention_time : float
    +134        The retention time of the feature (in minutes), at the apex.
    +135    _apex_scan : int
    +136        The scan number of the apex of the feature.
    +137    _intensity : float
    +138        The intensity of the feature.
    +139    _persistence : float
    +140        The persistence of the feature.
    +141    _eic_data : EIC_Data
    +142        The EIC data object associated with the feature.
    +143    _dispersity_index : float
    +144        The dispersity index of the feature.
    +145    _half_height_width : numpy.ndarray
    +146        The half height width of the feature (in minutes, as an array of min and max values).
    +147    _tailing_factor : float
    +148        The tailing factor of the feature.
    +149        > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
    +150    _ms_deconvoluted_idx : [int]
    +151        The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
    +152    is_calibrated : bool
    +153        If True, the feature has been calibrated. Default is False.
    +154    monoisotopic_mf_id : int
    +155        Mass feature id that is the monoisotopic version of self.
    +156        If self.id, then self is the monoisotopic feature). Default is None.
    +157    isotopologue_type : str
    +158        The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc.
    +159        Default is None.
    +160    ms2_scan_numbers : list
    +161        List of scan numbers of the MS2 spectra associated with the feature.
    +162        Default is an empty list.
    +163    ms2_mass_spectra : dict
    +164        Dictionary of MS2 spectra associated with the feature (key = scan number for DDA).
    +165        Default is an empty dictionary.
    +166    ms2_similarity_results : list
    +167        List of MS2 similarity results associated with the mass feature.
    +168        Default is an empty list.
    +169    id : int
    +170        The ID of the feature, also the key in the parent LCMS object's
    +171        `mass_features` dictionary.
    +172    mass_spectrum_deconvoluted_parent : bool
    +173        If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
    +174    associated_mass_features_deconvoluted : list
    +175        List of mass features associated with the deconvoluted mass spectrum. Default is an empty list.
    +176
    +177    """
    +178
    +179    def __init__(
    +180        self,
    +181        lcms_parent,
    +182        mz: float,
    +183        retention_time: float,
    +184        intensity: float,
    +185        apex_scan: int,
    +186        persistence: float = None,
    +187        id: int = None,
    +188    ):
    +189        super().__init__(
    +190            chromatogram_parent=lcms_parent,
    +191            mass_spectrum_obj=None,
    +192            start_index=None,
    +193            index=apex_scan,
    +194            final_index=None,
    +195        )
    +196        # Core attributes, marked as private
    +197        self._mz_exp: float = mz
    +198        self._mz_cal: float = None
    +199        self._retention_time: float = retention_time
    +200        self._apex_scan: int = apex_scan
    +201        self._intensity: float = intensity
    +202        self._persistence: float = persistence
    +203        self._eic_data: EIC_Data = None
    +204        self._dispersity_index: float = None
    +205        self._half_height_width: np.ndarray = None
    +206        self._ms_deconvoluted_idx = None
    +207
    +208        # Additional attributes
    +209        self.monoisotopic_mf_id = None
    +210        self.isotopologue_type = None
    +211        self.ms2_scan_numbers = []
    +212        self.ms2_mass_spectra = {}
    +213        self.ms2_similarity_results = []
    +214        self.mass_spectrum_deconvoluted_parent: bool = None
    +215        self.associated_mass_features_deconvoluted = []
    +216
    +217        if id:
    +218            self.id = id
    +219        else:
    +220            # get the parent's mass feature keys and add 1 to the max value to get the new key
    +221            self.id = (
    +222                max(lcms_parent.mass_features.keys()) + 1
    +223                if lcms_parent.mass_features.keys()
    +224                else 0
    +225            )
    +226
    +227    def update_mz(self):
    +228        """Update the mass to charge ratio from the mass spectrum object."""
    +229        if self.mass_spectrum is None:
    +230            raise ValueError(
    +231                "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object"
    +232            )
    +233        if len(self.mass_spectrum.mz_exp) == 0:
    +234            raise ValueError(
    +235                "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed"
    +236            )
    +237        new_mz = self.ms1_peak.mz_exp
    +238
    +239        # calculate the difference between the new and old m/z, only update if it is close
    +240        mz_diff = new_mz - self.mz
    +241        if abs(mz_diff) < 0.01:
    +242            self._mz_exp = new_mz
     243
    -244        Parameters
    -245        ----------
    -246        to_plot : list, optional
    -247            List of strings specifying what to plot, any iteration of 
    -248            "EIC", "MS2", and "MS1". 
    -249            Default is ["EIC", "MS1", "MS2"].
    -250        return_fig : bool, optional
    -251            If True, the figure is returned. Default is True.
    -252
    -253        Returns
    -254        -------
    -255        matplotlib.figure.Figure or None
    -256            The figure object if `return_fig` is True. 
    -257            Otherwise None and the figure is displayed.
    -258        """
    -259
    -260        # EIC plot preparation
    -261        eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time
    +244    def plot(self, to_plot=["EIC", "MS1", "MS2"], return_fig=True):
    +245        """Plot the mass feature.
    +246
    +247        Parameters
    +248        ----------
    +249        to_plot : list, optional
    +250            List of strings specifying what to plot, any iteration of
    +251            "EIC", "MS2", and "MS1".
    +252            Default is ["EIC", "MS1", "MS2"].
    +253        return_fig : bool, optional
    +254            If True, the figure is returned. Default is True.
    +255
    +256        Returns
    +257        -------
    +258        matplotlib.figure.Figure or None
    +259            The figure object if `return_fig` is True.
    +260            Otherwise None and the figure is displayed.
    +261        """
     262
    -263        # Adjust to_plot list if there are not spectra added to the mass features
    -264        if self.mass_spectrum is None:
    -265            to_plot = [x for x in to_plot if x != "MS1"]
    -266        if len(self.ms2_mass_spectra) == 0:
    -267            to_plot = [x for x in to_plot if x != "MS2"]
    -268        if self._eic_data is None:
    -269            to_plot = [x for x in to_plot if x != "EIC"]
    -270        if self._ms_deconvoluted_idx is not None:
    -271            deconvoluted = True
    -272        else:
    -273            deconvoluted = False
    -274
    -275        fig, axs = plt.subplots(
    -276            len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False
    -277        )
    -278        fig.suptitle(
    -279            "Mass Feature "
    -280            + str(self.id)
    -281            + ": m/z = "
    -282            + str(round(self.mz, ndigits=4))
    -283            + "; time = "
    -284            + str(round(self.retention_time, ndigits=1))
    -285            + " minutes"
    -286        )
    -287
    -288        i = 0
    -289        # EIC plot
    -290        if "EIC" in to_plot:
    -291            if self._eic_data is None:
    -292                raise ValueError(
    -293                    "EIC data is not available, cannot plot the mass feature's EIC"
    -294                )
    -295            axs[i][0].set_title("EIC", loc="left")
    -296            axs[i][0].plot(self._eic_data.time, self._eic_data.eic)
    -297            if self.start_scan is not None:
    -298                axs[i][0].fill_between(
    -299                    self.eic_rt_list, self.eic_list, color="b", alpha=0.2
    -300                )
    -301            else:
    -302                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
    -303                    print(
    -304                        "No start and final scan numbers were provided for mass feature "
    -305                        + str(self.id)
    -306                    )
    -307            axs[i][0].set_ylabel("Intensity")
    -308            axs[i][0].set_xlabel("Time (minutes)")
    -309            axs[i][0].set_ylim(0, self.eic_list.max() * 1.1)
    -310            axs[i][0].set_xlim(
    -311                self.retention_time - eic_buffer_time,
    -312                self.retention_time + eic_buffer_time,
    -313            )
    -314            axs[i][0].axvline(
    -315                x=self.retention_time, color="k", label="MS1 scan time (apex)"
    +263        # EIC plot preparation
    +264        eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time
    +265
    +266        # Adjust to_plot list if there are not spectra added to the mass features
    +267        if self.mass_spectrum is None:
    +268            to_plot = [x for x in to_plot if x != "MS1"]
    +269        if len(self.ms2_mass_spectra) == 0:
    +270            to_plot = [x for x in to_plot if x != "MS2"]
    +271        if self._eic_data is None:
    +272            to_plot = [x for x in to_plot if x != "EIC"]
    +273        if self._ms_deconvoluted_idx is not None:
    +274            deconvoluted = True
    +275        else:
    +276            deconvoluted = False
    +277
    +278        fig, axs = plt.subplots(
    +279            len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False
    +280        )
    +281        fig.suptitle(
    +282            "Mass Feature "
    +283            + str(self.id)
    +284            + ": m/z = "
    +285            + str(round(self.mz, ndigits=4))
    +286            + "; time = "
    +287            + str(round(self.retention_time, ndigits=1))
    +288            + " minutes"
    +289        )
    +290
    +291        i = 0
    +292        # EIC plot
    +293        if "EIC" in to_plot:
    +294            if self._eic_data is None:
    +295                raise ValueError(
    +296                    "EIC data is not available, cannot plot the mass feature's EIC"
    +297                )
    +298            axs[i][0].set_title("EIC", loc="left")
    +299            axs[i][0].plot(self._eic_data.time, self._eic_data.eic)
    +300            if self.start_scan is not None:
    +301                axs[i][0].fill_between(
    +302                    self.eic_rt_list, self.eic_list, color="b", alpha=0.2
    +303                )
    +304            else:
    +305                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
    +306                    print(
    +307                        "No start and final scan numbers were provided for mass feature "
    +308                        + str(self.id)
    +309                    )
    +310            axs[i][0].set_ylabel("Intensity")
    +311            axs[i][0].set_xlabel("Time (minutes)")
    +312            axs[i][0].set_ylim(0, self.eic_list.max() * 1.1)
    +313            axs[i][0].set_xlim(
    +314                self.retention_time - eic_buffer_time,
    +315                self.retention_time + eic_buffer_time,
     316            )
    -317            if len(self.ms2_scan_numbers) > 0:
    -318                axs[i][0].axvline(
    -319                    x=self.chromatogram_parent.get_time_of_scan_id(
    -320                        self.best_ms2.scan_number
    -321                    ),
    -322                    color="grey",
    -323                    linestyle="--",
    -324                    label="MS2 scan time",
    -325                )
    -326            axs[i][0].legend(loc="upper left")
    -327            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
    -328            i += 1
    -329
    -330        # MS1 plot
    -331        if "MS1" in to_plot:
    -332            if deconvoluted:
    -333                axs[i][0].set_title("MS1 (deconvoluted)", loc="left")
    -334                axs[i][0].vlines(
    -335                    self.mass_spectrum.mz_exp, 0, self.mass_spectrum.abundance, color="k", alpha=0.2, label="Raw MS1"
    -336                )
    +317            axs[i][0].axvline(
    +318                x=self.retention_time, color="k", label="MS1 scan time (apex)"
    +319            )
    +320            if len(self.ms2_scan_numbers) > 0:
    +321                axs[i][0].axvline(
    +322                    x=self.chromatogram_parent.get_time_of_scan_id(
    +323                        self.best_ms2.scan_number
    +324                    ),
    +325                    color="grey",
    +326                    linestyle="--",
    +327                    label="MS2 scan time",
    +328                )
    +329            axs[i][0].legend(loc="upper left")
    +330            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
    +331            i += 1
    +332
    +333        # MS1 plot
    +334        if "MS1" in to_plot:
    +335            if deconvoluted:
    +336                axs[i][0].set_title("MS1 (deconvoluted)", loc="left")
     337                axs[i][0].vlines(
    -338                    self.mass_spectrum_deconvoluted.mz_exp, 0, self.mass_spectrum_deconvoluted.abundance, color="k", label="Deconvoluted MS1"
    -339                )
    -340                axs[i][0].set_xlim(self.mass_spectrum_deconvoluted.mz_exp.min()*.8, self.mass_spectrum_deconvoluted.mz_exp.max()*1.1)
    -341                axs[i][0].set_ylim(0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1)
    -342            else:
    -343                axs[i][0].set_title("MS1 (raw)", loc="left")
    -344                axs[i][0].vlines(
    -345                    self.mass_spectrum.mz_exp, 0, self.mass_spectrum.abundance, color="k", label="Raw MS1"
    -346                )
    -347                axs[i][0].set_xlim(self.mass_spectrum.mz_exp.min()*.8, self.mass_spectrum.mz_exp.max()*1.1)
    -348                axs[i][0].set_ylim(bottom=0)
    -349
    -350            if (self.ms1_peak.mz_exp - self.mz) < 0.01:
    -351                axs[i][0].vlines(
    -352                    self.ms1_peak.mz_exp,
    -353                    0,
    -354                    self.ms1_peak.abundance,
    -355                    color="m",
    -356                    label="Feature m/z",
    -357                )
    -358
    +338                    self.mass_spectrum.mz_exp,
    +339                    0,
    +340                    self.mass_spectrum.abundance,
    +341                    color="k",
    +342                    alpha=0.2,
    +343                    label="Raw MS1",
    +344                )
    +345                axs[i][0].vlines(
    +346                    self.mass_spectrum_deconvoluted.mz_exp,
    +347                    0,
    +348                    self.mass_spectrum_deconvoluted.abundance,
    +349                    color="k",
    +350                    label="Deconvoluted MS1",
    +351                )
    +352                axs[i][0].set_xlim(
    +353                    self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8,
    +354                    self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1,
    +355                )
    +356                axs[i][0].set_ylim(
    +357                    0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1
    +358                )
     359            else:
    -360                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
    -361                    print(
    -362                        "The m/z of the mass feature "
    -363                        + str(self.id)
    -364                        + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted"
    -365                    )
    -366            axs[i][0].legend(loc="upper left")
    -367            axs[i][0].set_ylabel("Intensity")
    -368            axs[i][0].set_xlabel("m/z")
    -369            axs[i][0].yaxis.set_tick_params(labelleft=False)
    -370            i += 1
    -371
    -372        # MS2 plot
    -373        if "MS2" in to_plot:
    -374            axs[i][0].set_title("MS2", loc="left")
    -375            axs[i][0].vlines(
    -376                self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k"
    -377            )
    -378            axs[i][0].set_ylabel("Intensity")
    -379            axs[i][0].set_xlabel("m/z")
    -380            axs[i][0].set_ylim(bottom=0)
    -381            axs[i][0].yaxis.get_major_formatter().set_scientific(False)
    -382            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
    -383            axs[i][0].set_xlim(self.best_ms2.mz_exp.min()*.8, self.best_ms2.mz_exp.max()*1.1)
    -384            axs[i][0].yaxis.set_tick_params(labelleft=False)
    -385
    -386        # Add space between subplots
    -387        plt.tight_layout()
    -388
    -389        if return_fig:
    -390            # Close figure
    -391            plt.close(fig)
    -392            return fig
    -393
    -394    @property
    -395    def mz(self):
    -396        """Mass to charge ratio of the mass feature"""
    -397        # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z
    -398        if self._mz_cal is not None:
    -399            return self._mz_cal
    -400        else:
    -401            return self._mz_exp
    -402    
    -403    @property
    -404    def mass_spectrum_deconvoluted(self):
    -405        """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed."""
    -406        if self._ms_deconvoluted_idx is not None:
    -407            ms_deconvoluted = copy.deepcopy(self.mass_spectrum)
    -408            ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx)
    -409            return ms_deconvoluted
    -410        else:
    -411            raise ValueError("Deconvolution has not been performed for mass feature " + str(self.id))
    -412
    -413    @property
    -414    def retention_time(self):
    -415        """Retention time of the mass feature"""
    -416        return self._retention_time
    -417
    -418    @retention_time.setter
    -419    def retention_time(self, value):
    -420        """Set the retention time of the mass feature"""
    -421        if not isinstance(value, float):
    -422            raise ValueError("The retention time of the mass feature must be a float")
    -423        self._retention_time = value
    -424
    -425    @property
    -426    def apex_scan(self):
    -427        """Apex scan of the mass feature"""
    -428        return self._apex_scan
    -429
    -430    @apex_scan.setter
    -431    def apex_scan(self, value):
    -432        """Set the apex scan of the mass feature"""
    -433        if not isinstance(value, int):
    -434            raise ValueError("The apex scan of the mass feature must be an integer")
    -435        self._apex_scan = value
    -436
    -437    @property
    -438    def intensity(self):
    -439        """Intensity of the mass feature"""
    -440        return self._intensity
    -441
    -442    @intensity.setter
    -443    def intensity(self, value):
    -444        """Set the intensity of the mass feature"""
    -445        if not isinstance(value, float):
    -446            raise ValueError("The intensity of the mass feature must be a float")
    -447        self._intensity = value
    -448
    -449    @property
    -450    def persistence(self):
    -451        """Persistence of the mass feature"""
    -452        return self._persistence
    -453
    -454    @persistence.setter
    -455    def persistence(self, value):
    -456        """Set the persistence of the mass feature"""
    -457        if not isinstance(value, float):
    -458            raise ValueError("The persistence of the mass feature must be a float")
    -459        self._persistence = value
    -460
    -461    @property
    -462    def eic_rt_list(self):
    -463        """Retention time list between the beginning and end of the mass feature"""
    -464        # Find index of the start and final scans in the EIC data
    -465        start_index = self._eic_data.scans.tolist().index(self.start_scan)
    -466        final_index = self._eic_data.scans.tolist().index(self.final_scan)
    -467
    -468        # Get the retention time list
    -469        rt_list = self._eic_data.time[start_index : final_index + 1]
    -470        return rt_list
    -471
    -472    @property
    -473    def eic_list(self):
    -474        """EIC List between the beginning and end of the mass feature"""
    -475        # Find index of the start and final scans in the EIC data
    -476        start_index = self._eic_data.scans.tolist().index(self.start_scan)
    -477        final_index = self._eic_data.scans.tolist().index(self.final_scan)
    -478
    -479        # Get the retention time list
    -480        eic = self._eic_data.eic[start_index : final_index + 1]
    -481        return eic
    -482
    -483    @property
    -484    def ms1_peak(self):
    -485        """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z"""
    -486        # Find index array self.mass_spectrum.mz_exp that is closest to self.mz
    -487        closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz))
    -488        closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz)
    -489
    -490        return self.mass_spectrum._mspeaks[closest_mz_index]
    -491    
    -492    @property
    -493    def tailing_factor(self):
    -494        """Tailing factor of the mass feature"""
    -495        return self._tailing_factor
    -496    
    -497    @tailing_factor.setter
    -498    def tailing_factor(self, value):
    -499        """Set the tailing factor of the mass feature"""
    -500        if not isinstance(value, float):
    -501            raise ValueError("The tailing factor of the mass feature must be a float")
    -502        self._tailing_factor = value
    -503
    -504    @property
    -505    def dispersity_index(self):
    -506        """Dispersity index of the mass feature"""
    -507        return self._dispersity_index
    -508    
    -509    @dispersity_index.setter
    -510    def dispersity_index(self, value):
    -511        """Set the dispersity index of the mass feature"""
    -512        if not isinstance(value, float):
    -513            raise ValueError("The dispersity index of the mass feature must be a float")
    -514        self._dispersity_index = value
    -515
    -516    @property
    -517    def half_height_width(self):
    -518        """Half height width of the mass feature, average of min and max values, in minutes"""
    -519        return np.mean(self._half_height_width)
    -520
    -521    @property
    -522    def best_ms2(self):
    -523        """Points to the best representative MS2 mass spectrum
    +360                axs[i][0].set_title("MS1 (raw)", loc="left")
    +361                axs[i][0].vlines(
    +362                    self.mass_spectrum.mz_exp,
    +363                    0,
    +364                    self.mass_spectrum.abundance,
    +365                    color="k",
    +366                    label="Raw MS1",
    +367                )
    +368                axs[i][0].set_xlim(
    +369                    self.mass_spectrum.mz_exp.min() * 0.8,
    +370                    self.mass_spectrum.mz_exp.max() * 1.1,
    +371                )
    +372                axs[i][0].set_ylim(bottom=0)
    +373
    +374            if (self.ms1_peak.mz_exp - self.mz) < 0.01:
    +375                axs[i][0].vlines(
    +376                    self.ms1_peak.mz_exp,
    +377                    0,
    +378                    self.ms1_peak.abundance,
    +379                    color="m",
    +380                    label="Feature m/z",
    +381                )
    +382
    +383            else:
    +384                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
    +385                    print(
    +386                        "The m/z of the mass feature "
    +387                        + str(self.id)
    +388                        + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted"
    +389                    )
    +390            axs[i][0].legend(loc="upper left")
    +391            axs[i][0].set_ylabel("Intensity")
    +392            axs[i][0].set_xlabel("m/z")
    +393            axs[i][0].yaxis.set_tick_params(labelleft=False)
    +394            i += 1
    +395
    +396        # MS2 plot
    +397        if "MS2" in to_plot:
    +398            axs[i][0].set_title("MS2", loc="left")
    +399            axs[i][0].vlines(
    +400                self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k"
    +401            )
    +402            axs[i][0].set_ylabel("Intensity")
    +403            axs[i][0].set_xlabel("m/z")
    +404            axs[i][0].set_ylim(bottom=0)
    +405            axs[i][0].yaxis.get_major_formatter().set_scientific(False)
    +406            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
    +407            axs[i][0].set_xlim(
    +408                self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1
    +409            )
    +410            axs[i][0].yaxis.set_tick_params(labelleft=False)
    +411
    +412        # Add space between subplots
    +413        plt.tight_layout()
    +414
    +415        if return_fig:
    +416            # Close figure
    +417            plt.close(fig)
    +418            return fig
    +419
    +420    @property
    +421    def mz(self):
    +422        """Mass to charge ratio of the mass feature"""
    +423        # If the mass feature has been calibrated, return the calibrated m/z, otherwise return the measured m/z
    +424        if self._mz_cal is not None:
    +425            return self._mz_cal
    +426        else:
    +427            return self._mz_exp
    +428
    +429    @property
    +430    def mass_spectrum_deconvoluted(self):
    +431        """Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed."""
    +432        if self._ms_deconvoluted_idx is not None:
    +433            ms_deconvoluted = copy.deepcopy(self.mass_spectrum)
    +434            ms_deconvoluted.set_indexes(self._ms_deconvoluted_idx)
    +435            return ms_deconvoluted
    +436        else:
    +437            raise ValueError(
    +438                "Deconvolution has not been performed for mass feature " + str(self.id)
    +439            )
    +440
    +441    @property
    +442    def retention_time(self):
    +443        """Retention time of the mass feature"""
    +444        return self._retention_time
    +445
    +446    @retention_time.setter
    +447    def retention_time(self, value):
    +448        """Set the retention time of the mass feature"""
    +449        if not isinstance(value, float):
    +450            raise ValueError("The retention time of the mass feature must be a float")
    +451        self._retention_time = value
    +452
    +453    @property
    +454    def apex_scan(self):
    +455        """Apex scan of the mass feature"""
    +456        return self._apex_scan
    +457
    +458    @apex_scan.setter
    +459    def apex_scan(self, value):
    +460        """Set the apex scan of the mass feature"""
    +461        if not isinstance(value, int):
    +462            raise ValueError("The apex scan of the mass feature must be an integer")
    +463        self._apex_scan = value
    +464
    +465    @property
    +466    def intensity(self):
    +467        """Intensity of the mass feature"""
    +468        return self._intensity
    +469
    +470    @intensity.setter
    +471    def intensity(self, value):
    +472        """Set the intensity of the mass feature"""
    +473        if not isinstance(value, float):
    +474            raise ValueError("The intensity of the mass feature must be a float")
    +475        self._intensity = value
    +476
    +477    @property
    +478    def persistence(self):
    +479        """Persistence of the mass feature"""
    +480        return self._persistence
    +481
    +482    @persistence.setter
    +483    def persistence(self, value):
    +484        """Set the persistence of the mass feature"""
    +485        if not isinstance(value, float):
    +486            raise ValueError("The persistence of the mass feature must be a float")
    +487        self._persistence = value
    +488
    +489    @property
    +490    def eic_rt_list(self):
    +491        """Retention time list between the beginning and end of the mass feature"""
    +492        # Find index of the start and final scans in the EIC data
    +493        start_index = self._eic_data.scans.tolist().index(self.start_scan)
    +494        final_index = self._eic_data.scans.tolist().index(self.final_scan)
    +495
    +496        # Get the retention time list
    +497        rt_list = self._eic_data.time[start_index : final_index + 1]
    +498        return rt_list
    +499
    +500    @property
    +501    def eic_list(self):
    +502        """EIC List between the beginning and end of the mass feature"""
    +503        # Find index of the start and final scans in the EIC data
    +504        start_index = self._eic_data.scans.tolist().index(self.start_scan)
    +505        final_index = self._eic_data.scans.tolist().index(self.final_scan)
    +506
    +507        # Get the retention time list
    +508        eic = self._eic_data.eic[start_index : final_index + 1]
    +509        return eic
    +510
    +511    @property
    +512    def ms1_peak(self):
    +513        """MS1 peak from associated mass spectrum that is closest to the mass feature's m/z"""
    +514        # Find index array self.mass_spectrum.mz_exp that is closest to self.mz
    +515        closest_mz = min(self.mass_spectrum.mz_exp, key=lambda x: abs(x - self.mz))
    +516        closest_mz_index = self.mass_spectrum.mz_exp.tolist().index(closest_mz)
    +517
    +518        return self.mass_spectrum._mspeaks[closest_mz_index]
    +519
    +520    @property
    +521    def tailing_factor(self):
    +522        """Tailing factor of the mass feature"""
    +523        return self._tailing_factor
     524
    -525        Notes
    -526        -----
    -527        If there is only one MS2 mass spectrum, it will be returned
    -528        If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score.
    -529        If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power.  Checks for and disqualifies possible chimeric spectra.
    -530
    -531        Returns
    -532        -------
    -533        MassSpectrum or None
    -534            The best MS2 mass spectrum.
    -535        """
    -536        if len(self.ms2_similarity_results) > 0:
    -537            # the scan number with the highest similarity score
    -538            results_df = [x.to_dataframe() for x in self.ms2_similarity_results]
    -539            results_df = pd.concat(results_df)
    -540            results_df = results_df.sort_values(
    -541                by="entropy_similarity", ascending=False
    -542            )
    -543            best_scan_number = results_df.iloc[0]["query_spectrum_id"]
    -544            return self.ms2_mass_spectra[best_scan_number]
    -545
    -546        ms2_scans = list(self.ms2_mass_spectra.keys())
    -547        if len(ms2_scans) > 1:
    -548            mz_diff_list = []  # List of mz difference between mz of mass feature and mass of nearest mz in each scan
    -549            res_list = []  # List of maximum resolving power of peaks in each scan
    -550            time_diff_list = []  # List of time difference between scan and apex scan in each scan
    -551            for scan in ms2_scans:
    -552                if len(self.ms2_mass_spectra[scan].mspeaks) > 0:
    -553                    # Find mz closest to mass feature mz, return both the difference in mass and its resolution
    -554                    closest_mz = min(
    -555                        self.ms2_mass_spectra[scan].mz_exp,
    -556                        key=lambda x: abs(x - self.mz),
    -557                    )
    -558                    if all(
    -559                        np.isnan(self.ms2_mass_spectra[scan].resolving_power)
    -560                    ):  # All NA for resolving power in peaks, not uncommon in CID spectra
    -561                        res_list.append(2)  # Assumes very low resolving power
    -562                    else:
    -563                        res_list.append(
    -564                            np.nanmax(self.ms2_mass_spectra[scan].resolving_power)
    -565                        )
    -566                    mz_diff_list.append(np.abs(closest_mz - self.mz))
    -567                    time_diff_list.append(
    -568                        np.abs(
    -569                            self.chromatogram_parent.get_time_of_scan_id(scan)
    -570                            - self.retention_time
    -571                        )
    -572                    )
    -573                else:
    -574                    res_list.append(np.nan)
    -575                    mz_diff_list.append(np.nan)
    -576                    time_diff_list.append(np.nan)
    -577            # Convert diff_lists into logical scores (higher is better for each score)
    -578            time_score = 1 - np.array(time_diff_list) / np.nanmax(
    -579                np.array(time_diff_list)
    -580            )
    -581            res_score = np.array(res_list) / np.nanmax(np.array(res_list))
    -582            # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning)
    -583            mz_score = np.zeros(len(ms2_scans))
    -584            for i in np.arange(0, len(ms2_scans)):
    -585                if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1:  # Possible chimeric
    -586                    mz_score[i] = 0
    -587                else:
    -588                    mz_score[i] = 1
    -589            # get the index of the best score and return the mass spectrum
    -590            if len([np.nanargmax(time_score * res_score * mz_score)]) == 1:
    -591                return self.ms2_mass_spectra[
    -592                    ms2_scans[np.nanargmax(time_score * res_score * mz_score)]
    -593                ]
    -594            # remove the mz_score condition and try again
    -595            elif len(np.argmax(time_score * res_score)) == 1:
    -596                return self.ms2_mass_spectra[
    -597                    ms2_scans[np.nanargmax(time_score * res_score)]
    -598                ]
    -599            else:
    -600                raise ValueError(
    -601                    "No best MS2 mass spectrum could be found for mass feature "
    -602                    + str(self.id)
    -603                )
    -604        elif len(ms2_scans) == 1:  # if only one ms2 spectra, return it
    -605            return self.ms2_mass_spectra[ms2_scans[0]]
    -606        else:  # if no ms2 spectra, return None
    -607            return None
    +525    @tailing_factor.setter
    +526    def tailing_factor(self, value):
    +527        """Set the tailing factor of the mass feature"""
    +528        if not isinstance(value, float):
    +529            raise ValueError("The tailing factor of the mass feature must be a float")
    +530        self._tailing_factor = value
    +531
    +532    @property
    +533    def dispersity_index(self):
    +534        """Dispersity index of the mass feature"""
    +535        return self._dispersity_index
    +536
    +537    @dispersity_index.setter
    +538    def dispersity_index(self, value):
    +539        """Set the dispersity index of the mass feature"""
    +540        if not isinstance(value, float):
    +541            raise ValueError("The dispersity index of the mass feature must be a float")
    +542        self._dispersity_index = value
    +543
    +544    @property
    +545    def half_height_width(self):
    +546        """Half height width of the mass feature, average of min and max values, in minutes"""
    +547        return np.mean(self._half_height_width)
    +548
    +549    @property
    +550    def best_ms2(self):
    +551        """Points to the best representative MS2 mass spectrum
    +552
    +553        Notes
    +554        -----
    +555        If there is only one MS2 mass spectrum, it will be returned
    +556        If there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score.
    +557        If there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power.  Checks for and disqualifies possible chimeric spectra.
    +558
    +559        Returns
    +560        -------
    +561        MassSpectrum or None
    +562            The best MS2 mass spectrum.
    +563        """
    +564        if len(self.ms2_similarity_results) > 0:
    +565            # the scan number with the highest similarity score
    +566            results_df = [x.to_dataframe() for x in self.ms2_similarity_results]
    +567            results_df = pd.concat(results_df)
    +568            results_df = results_df.sort_values(
    +569                by="entropy_similarity", ascending=False
    +570            )
    +571            best_scan_number = results_df.iloc[0]["query_spectrum_id"]
    +572            return self.ms2_mass_spectra[best_scan_number]
    +573
    +574        ms2_scans = list(self.ms2_mass_spectra.keys())
    +575        if len(ms2_scans) > 1:
    +576            mz_diff_list = []  # List of mz difference between mz of mass feature and mass of nearest mz in each scan
    +577            res_list = []  # List of maximum resolving power of peaks in each scan
    +578            time_diff_list = []  # List of time difference between scan and apex scan in each scan
    +579            for scan in ms2_scans:
    +580                if len(self.ms2_mass_spectra[scan].mspeaks) > 0:
    +581                    # Find mz closest to mass feature mz, return both the difference in mass and its resolution
    +582                    closest_mz = min(
    +583                        self.ms2_mass_spectra[scan].mz_exp,
    +584                        key=lambda x: abs(x - self.mz),
    +585                    )
    +586                    if all(
    +587                        np.isnan(self.ms2_mass_spectra[scan].resolving_power)
    +588                    ):  # All NA for resolving power in peaks, not uncommon in CID spectra
    +589                        res_list.append(2)  # Assumes very low resolving power
    +590                    else:
    +591                        res_list.append(
    +592                            np.nanmax(self.ms2_mass_spectra[scan].resolving_power)
    +593                        )
    +594                    mz_diff_list.append(np.abs(closest_mz - self.mz))
    +595                    time_diff_list.append(
    +596                        np.abs(
    +597                            self.chromatogram_parent.get_time_of_scan_id(scan)
    +598                            - self.retention_time
    +599                        )
    +600                    )
    +601                else:
    +602                    res_list.append(np.nan)
    +603                    mz_diff_list.append(np.nan)
    +604                    time_diff_list.append(np.nan)
    +605            # Convert diff_lists into logical scores (higher is better for each score)
    +606            time_score = 1 - np.array(time_diff_list) / np.nanmax(
    +607                np.array(time_diff_list)
    +608            )
    +609            res_score = np.array(res_list) / np.nanmax(np.array(res_list))
    +610            # mz_score is 0 for possible chimerics, 1 for all others (already within mass tolerance before assigning)
    +611            mz_score = np.zeros(len(ms2_scans))
    +612            for i in np.arange(0, len(ms2_scans)):
    +613                if mz_diff_list[i] < 0.8 and mz_diff_list[i] > 0.1:  # Possible chimeric
    +614                    mz_score[i] = 0
    +615                else:
    +616                    mz_score[i] = 1
    +617            # get the index of the best score and return the mass spectrum
    +618            if len([np.nanargmax(time_score * res_score * mz_score)]) == 1:
    +619                return self.ms2_mass_spectra[
    +620                    ms2_scans[np.nanargmax(time_score * res_score * mz_score)]
    +621                ]
    +622            # remove the mz_score condition and try again
    +623            elif len(np.argmax(time_score * res_score)) == 1:
    +624                return self.ms2_mass_spectra[
    +625                    ms2_scans[np.nanargmax(time_score * res_score)]
    +626                ]
    +627            else:
    +628                raise ValueError(
    +629                    "No best MS2 mass spectrum could be found for mass feature "
    +630                    + str(self.id)
    +631                )
    +632        elif len(ms2_scans) == 1:  # if only one ms2 spectra, return it
    +633            return self.ms2_mass_spectra[ms2_scans[0]]
    +634        else:  # if no ms2 spectra, return None
    +635            return None
     
    @@ -1878,7 +1931,7 @@
    Attributes
  • _half_height_width (numpy.ndarray): The half height width of the feature (in minutes, as an array of min and max values).
  • _tailing_factor (float): -The tailing factor of the feature. +The tailing factor of the feature. > 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
  • _ms_deconvoluted_idx ([int]): The indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
  • @@ -1888,19 +1941,19 @@
    Attributes
    Mass feature id that is the monoisotopic version of self. If self.id, then self is the monoisotopic feature). Default is None.
  • isotopologue_type (str): -The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. +The isotopic class of the feature, i.e. "13C1", "13C2", "13C1 37Cl1" etc. Default is None.
  • ms2_scan_numbers (list): -List of scan numbers of the MS2 spectra associated with the feature. +List of scan numbers of the MS2 spectra associated with the feature. Default is an empty list.
  • ms2_mass_spectra (dict): -Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). +Dictionary of MS2 spectra associated with the feature (key = scan number for DDA). Default is an empty dictionary.
  • ms2_similarity_results (list): -List of MS2 similarity results associated with the mass feature. +List of MS2 similarity results associated with the mass feature. Default is an empty list.
  • id (int): -The ID of the feature, also the key in the parent LCMS object's +The ID of the feature, also the key in the parent LCMS object's mass_features dictionary.
  • mass_spectrum_deconvoluted_parent (bool): If True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
  • @@ -1920,53 +1973,53 @@
    Attributes
    -
    176    def __init__(
    -177        self,
    -178        lcms_parent,
    -179        mz: float,
    -180        retention_time: float,
    -181        intensity: float,
    -182        apex_scan: int,
    -183        persistence: float = None,
    -184        id: int = None,
    -185    ):
    -186        super().__init__(
    -187            chromatogram_parent=lcms_parent,
    -188            mass_spectrum_obj=None,
    -189            start_index=None,
    -190            index=apex_scan,
    -191            final_index=None,
    -192        )
    -193        # Core attributes, marked as private
    -194        self._mz_exp: float = mz
    -195        self._mz_cal: float = None
    -196        self._retention_time: float = retention_time
    -197        self._apex_scan: int = apex_scan
    -198        self._intensity: float = intensity
    -199        self._persistence: float = persistence
    -200        self._eic_data: EIC_Data = None
    -201        self._dispersity_index: float = None
    -202        self._half_height_width: np.ndarray = None
    -203        self._ms_deconvoluted_idx = None
    -204
    -205        # Additional attributes
    -206        self.monoisotopic_mf_id = None
    -207        self.isotopologue_type = None
    -208        self.ms2_scan_numbers = []
    -209        self.ms2_mass_spectra = {}
    -210        self.ms2_similarity_results = []
    -211        self.mass_spectrum_deconvoluted_parent: bool = None
    -212        self.associated_mass_features_deconvoluted = []
    -213
    -214        if id:
    -215            self.id = id
    -216        else:
    -217            # get the parent's mass feature keys and add 1 to the max value to get the new key
    -218            self.id = (
    -219                max(lcms_parent.mass_features.keys()) + 1
    -220                if lcms_parent.mass_features.keys()
    -221                else 0
    -222            )
    +            
    179    def __init__(
    +180        self,
    +181        lcms_parent,
    +182        mz: float,
    +183        retention_time: float,
    +184        intensity: float,
    +185        apex_scan: int,
    +186        persistence: float = None,
    +187        id: int = None,
    +188    ):
    +189        super().__init__(
    +190            chromatogram_parent=lcms_parent,
    +191            mass_spectrum_obj=None,
    +192            start_index=None,
    +193            index=apex_scan,
    +194            final_index=None,
    +195        )
    +196        # Core attributes, marked as private
    +197        self._mz_exp: float = mz
    +198        self._mz_cal: float = None
    +199        self._retention_time: float = retention_time
    +200        self._apex_scan: int = apex_scan
    +201        self._intensity: float = intensity
    +202        self._persistence: float = persistence
    +203        self._eic_data: EIC_Data = None
    +204        self._dispersity_index: float = None
    +205        self._half_height_width: np.ndarray = None
    +206        self._ms_deconvoluted_idx = None
    +207
    +208        # Additional attributes
    +209        self.monoisotopic_mf_id = None
    +210        self.isotopologue_type = None
    +211        self.ms2_scan_numbers = []
    +212        self.ms2_mass_spectra = {}
    +213        self.ms2_similarity_results = []
    +214        self.mass_spectrum_deconvoluted_parent: bool = None
    +215        self.associated_mass_features_deconvoluted = []
    +216
    +217        if id:
    +218            self.id = id
    +219        else:
    +220            # get the parent's mass feature keys and add 1 to the max value to get the new key
    +221            self.id = (
    +222                max(lcms_parent.mass_features.keys()) + 1
    +223                if lcms_parent.mass_features.keys()
    +224                else 0
    +225            )
     
    @@ -2061,22 +2114,22 @@
    Attributes
    -
    224    def update_mz(self):
    -225        """Update the mass to charge ratio from the mass spectrum object."""
    -226        if self.mass_spectrum is None:
    -227            raise ValueError(
    -228                "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object"
    -229            )
    -230        if len(self.mass_spectrum.mz_exp) == 0:
    -231            raise ValueError(
    -232                "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed"
    -233            )
    -234        new_mz = self.ms1_peak.mz_exp
    -235
    -236        # calculate the difference between the new and old m/z, only update if it is close
    -237        mz_diff = new_mz - self.mz
    -238        if abs(mz_diff) < 0.01:
    -239            self._mz_exp = new_mz
    +            
    227    def update_mz(self):
    +228        """Update the mass to charge ratio from the mass spectrum object."""
    +229        if self.mass_spectrum is None:
    +230            raise ValueError(
    +231                "The mass spectrum object is not set, cannot update the m/z from the MassSpectrum object"
    +232            )
    +233        if len(self.mass_spectrum.mz_exp) == 0:
    +234            raise ValueError(
    +235                "The mass spectrum object has no m/z values, cannot update the m/z from the MassSpectrum object until it is processed"
    +236            )
    +237        new_mz = self.ms1_peak.mz_exp
    +238
    +239        # calculate the difference between the new and old m/z, only update if it is close
    +240        mz_diff = new_mz - self.mz
    +241        if abs(mz_diff) < 0.01:
    +242            self._mz_exp = new_mz
     
    @@ -2096,158 +2149,181 @@
    Attributes
    -
    241    def plot(self, to_plot=["EIC", "MS1", "MS2"], return_fig=True):
    -242        """Plot the mass feature.
    -243
    -244        Parameters
    -245        ----------
    -246        to_plot : list, optional
    -247            List of strings specifying what to plot, any iteration of 
    -248            "EIC", "MS2", and "MS1". 
    -249            Default is ["EIC", "MS1", "MS2"].
    -250        return_fig : bool, optional
    -251            If True, the figure is returned. Default is True.
    -252
    -253        Returns
    -254        -------
    -255        matplotlib.figure.Figure or None
    -256            The figure object if `return_fig` is True. 
    -257            Otherwise None and the figure is displayed.
    -258        """
    -259
    -260        # EIC plot preparation
    -261        eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time
    +            
    244    def plot(self, to_plot=["EIC", "MS1", "MS2"], return_fig=True):
    +245        """Plot the mass feature.
    +246
    +247        Parameters
    +248        ----------
    +249        to_plot : list, optional
    +250            List of strings specifying what to plot, any iteration of
    +251            "EIC", "MS2", and "MS1".
    +252            Default is ["EIC", "MS1", "MS2"].
    +253        return_fig : bool, optional
    +254            If True, the figure is returned. Default is True.
    +255
    +256        Returns
    +257        -------
    +258        matplotlib.figure.Figure or None
    +259            The figure object if `return_fig` is True.
    +260            Otherwise None and the figure is displayed.
    +261        """
     262
    -263        # Adjust to_plot list if there are not spectra added to the mass features
    -264        if self.mass_spectrum is None:
    -265            to_plot = [x for x in to_plot if x != "MS1"]
    -266        if len(self.ms2_mass_spectra) == 0:
    -267            to_plot = [x for x in to_plot if x != "MS2"]
    -268        if self._eic_data is None:
    -269            to_plot = [x for x in to_plot if x != "EIC"]
    -270        if self._ms_deconvoluted_idx is not None:
    -271            deconvoluted = True
    -272        else:
    -273            deconvoluted = False
    -274
    -275        fig, axs = plt.subplots(
    -276            len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False
    -277        )
    -278        fig.suptitle(
    -279            "Mass Feature "
    -280            + str(self.id)
    -281            + ": m/z = "
    -282            + str(round(self.mz, ndigits=4))
    -283            + "; time = "
    -284            + str(round(self.retention_time, ndigits=1))
    -285            + " minutes"
    -286        )
    -287
    -288        i = 0
    -289        # EIC plot
    -290        if "EIC" in to_plot:
    -291            if self._eic_data is None:
    -292                raise ValueError(
    -293                    "EIC data is not available, cannot plot the mass feature's EIC"
    -294                )
    -295            axs[i][0].set_title("EIC", loc="left")
    -296            axs[i][0].plot(self._eic_data.time, self._eic_data.eic)
    -297            if self.start_scan is not None:
    -298                axs[i][0].fill_between(
    -299                    self.eic_rt_list, self.eic_list, color="b", alpha=0.2
    -300                )
    -301            else:
    -302                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
    -303                    print(
    -304                        "No start and final scan numbers were provided for mass feature "
    -305                        + str(self.id)
    -306                    )
    -307            axs[i][0].set_ylabel("Intensity")
    -308            axs[i][0].set_xlabel("Time (minutes)")
    -309            axs[i][0].set_ylim(0, self.eic_list.max() * 1.1)
    -310            axs[i][0].set_xlim(
    -311                self.retention_time - eic_buffer_time,
    -312                self.retention_time + eic_buffer_time,
    -313            )
    -314            axs[i][0].axvline(
    -315                x=self.retention_time, color="k", label="MS1 scan time (apex)"
    +263        # EIC plot preparation
    +264        eic_buffer_time = self.chromatogram_parent.parameters.lc_ms.eic_buffer_time
    +265
    +266        # Adjust to_plot list if there are not spectra added to the mass features
    +267        if self.mass_spectrum is None:
    +268            to_plot = [x for x in to_plot if x != "MS1"]
    +269        if len(self.ms2_mass_spectra) == 0:
    +270            to_plot = [x for x in to_plot if x != "MS2"]
    +271        if self._eic_data is None:
    +272            to_plot = [x for x in to_plot if x != "EIC"]
    +273        if self._ms_deconvoluted_idx is not None:
    +274            deconvoluted = True
    +275        else:
    +276            deconvoluted = False
    +277
    +278        fig, axs = plt.subplots(
    +279            len(to_plot), 1, figsize=(9, len(to_plot) * 4), squeeze=False
    +280        )
    +281        fig.suptitle(
    +282            "Mass Feature "
    +283            + str(self.id)
    +284            + ": m/z = "
    +285            + str(round(self.mz, ndigits=4))
    +286            + "; time = "
    +287            + str(round(self.retention_time, ndigits=1))
    +288            + " minutes"
    +289        )
    +290
    +291        i = 0
    +292        # EIC plot
    +293        if "EIC" in to_plot:
    +294            if self._eic_data is None:
    +295                raise ValueError(
    +296                    "EIC data is not available, cannot plot the mass feature's EIC"
    +297                )
    +298            axs[i][0].set_title("EIC", loc="left")
    +299            axs[i][0].plot(self._eic_data.time, self._eic_data.eic)
    +300            if self.start_scan is not None:
    +301                axs[i][0].fill_between(
    +302                    self.eic_rt_list, self.eic_list, color="b", alpha=0.2
    +303                )
    +304            else:
    +305                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
    +306                    print(
    +307                        "No start and final scan numbers were provided for mass feature "
    +308                        + str(self.id)
    +309                    )
    +310            axs[i][0].set_ylabel("Intensity")
    +311            axs[i][0].set_xlabel("Time (minutes)")
    +312            axs[i][0].set_ylim(0, self.eic_list.max() * 1.1)
    +313            axs[i][0].set_xlim(
    +314                self.retention_time - eic_buffer_time,
    +315                self.retention_time + eic_buffer_time,
     316            )
    -317            if len(self.ms2_scan_numbers) > 0:
    -318                axs[i][0].axvline(
    -319                    x=self.chromatogram_parent.get_time_of_scan_id(
    -320                        self.best_ms2.scan_number
    -321                    ),
    -322                    color="grey",
    -323                    linestyle="--",
    -324                    label="MS2 scan time",
    -325                )
    -326            axs[i][0].legend(loc="upper left")
    -327            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
    -328            i += 1
    -329
    -330        # MS1 plot
    -331        if "MS1" in to_plot:
    -332            if deconvoluted:
    -333                axs[i][0].set_title("MS1 (deconvoluted)", loc="left")
    -334                axs[i][0].vlines(
    -335                    self.mass_spectrum.mz_exp, 0, self.mass_spectrum.abundance, color="k", alpha=0.2, label="Raw MS1"
    -336                )
    +317            axs[i][0].axvline(
    +318                x=self.retention_time, color="k", label="MS1 scan time (apex)"
    +319            )
    +320            if len(self.ms2_scan_numbers) > 0:
    +321                axs[i][0].axvline(
    +322                    x=self.chromatogram_parent.get_time_of_scan_id(
    +323                        self.best_ms2.scan_number
    +324                    ),
    +325                    color="grey",
    +326                    linestyle="--",
    +327                    label="MS2 scan time",
    +328                )
    +329            axs[i][0].legend(loc="upper left")
    +330            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
    +331            i += 1
    +332
    +333        # MS1 plot
    +334        if "MS1" in to_plot:
    +335            if deconvoluted:
    +336                axs[i][0].set_title("MS1 (deconvoluted)", loc="left")
     337                axs[i][0].vlines(
    -338                    self.mass_spectrum_deconvoluted.mz_exp, 0, self.mass_spectrum_deconvoluted.abundance, color="k", label="Deconvoluted MS1"
    -339                )
    -340                axs[i][0].set_xlim(self.mass_spectrum_deconvoluted.mz_exp.min()*.8, self.mass_spectrum_deconvoluted.mz_exp.max()*1.1)
    -341                axs[i][0].set_ylim(0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1)
    -342            else:
    -343                axs[i][0].set_title("MS1 (raw)", loc="left")
    -344                axs[i][0].vlines(
    -345                    self.mass_spectrum.mz_exp, 0, self.mass_spectrum.abundance, color="k", label="Raw MS1"
    -346                )
    -347                axs[i][0].set_xlim(self.mass_spectrum.mz_exp.min()*.8, self.mass_spectrum.mz_exp.max()*1.1)
    -348                axs[i][0].set_ylim(bottom=0)
    -349
    -350            if (self.ms1_peak.mz_exp - self.mz) < 0.01:
    -351                axs[i][0].vlines(
    -352                    self.ms1_peak.mz_exp,
    -353                    0,
    -354                    self.ms1_peak.abundance,
    -355                    color="m",
    -356                    label="Feature m/z",
    -357                )
    -358
    +338                    self.mass_spectrum.mz_exp,
    +339                    0,
    +340                    self.mass_spectrum.abundance,
    +341                    color="k",
    +342                    alpha=0.2,
    +343                    label="Raw MS1",
    +344                )
    +345                axs[i][0].vlines(
    +346                    self.mass_spectrum_deconvoluted.mz_exp,
    +347                    0,
    +348                    self.mass_spectrum_deconvoluted.abundance,
    +349                    color="k",
    +350                    label="Deconvoluted MS1",
    +351                )
    +352                axs[i][0].set_xlim(
    +353                    self.mass_spectrum_deconvoluted.mz_exp.min() * 0.8,
    +354                    self.mass_spectrum_deconvoluted.mz_exp.max() * 1.1,
    +355                )
    +356                axs[i][0].set_ylim(
    +357                    0, self.mass_spectrum_deconvoluted.abundance.max() * 1.1
    +358                )
     359            else:
    -360                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
    -361                    print(
    -362                        "The m/z of the mass feature "
    -363                        + str(self.id)
    -364                        + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted"
    -365                    )
    -366            axs[i][0].legend(loc="upper left")
    -367            axs[i][0].set_ylabel("Intensity")
    -368            axs[i][0].set_xlabel("m/z")
    -369            axs[i][0].yaxis.set_tick_params(labelleft=False)
    -370            i += 1
    -371
    -372        # MS2 plot
    -373        if "MS2" in to_plot:
    -374            axs[i][0].set_title("MS2", loc="left")
    -375            axs[i][0].vlines(
    -376                self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k"
    -377            )
    -378            axs[i][0].set_ylabel("Intensity")
    -379            axs[i][0].set_xlabel("m/z")
    -380            axs[i][0].set_ylim(bottom=0)
    -381            axs[i][0].yaxis.get_major_formatter().set_scientific(False)
    -382            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
    -383            axs[i][0].set_xlim(self.best_ms2.mz_exp.min()*.8, self.best_ms2.mz_exp.max()*1.1)
    -384            axs[i][0].yaxis.set_tick_params(labelleft=False)
    -385
    -386        # Add space between subplots
    -387        plt.tight_layout()
    -388
    -389        if return_fig:
    -390            # Close figure
    -391            plt.close(fig)
    -392            return fig
    +360                axs[i][0].set_title("MS1 (raw)", loc="left")
    +361                axs[i][0].vlines(
    +362                    self.mass_spectrum.mz_exp,
    +363                    0,
    +364                    self.mass_spectrum.abundance,
    +365                    color="k",
    +366                    label="Raw MS1",
    +367                )
    +368                axs[i][0].set_xlim(
    +369                    self.mass_spectrum.mz_exp.min() * 0.8,
    +370                    self.mass_spectrum.mz_exp.max() * 1.1,
    +371                )
    +372                axs[i][0].set_ylim(bottom=0)
    +373
    +374            if (self.ms1_peak.mz_exp - self.mz) < 0.01:
    +375                axs[i][0].vlines(
    +376                    self.ms1_peak.mz_exp,
    +377                    0,
    +378                    self.ms1_peak.abundance,
    +379                    color="m",
    +380                    label="Feature m/z",
    +381                )
    +382
    +383            else:
    +384                if self.chromatogram_parent.parameters.lc_ms.verbose_processing:
    +385                    print(
    +386                        "The m/z of the mass feature "
    +387                        + str(self.id)
    +388                        + " is different from the m/z of MS1 peak, the MS1 peak will not be plotted"
    +389                    )
    +390            axs[i][0].legend(loc="upper left")
    +391            axs[i][0].set_ylabel("Intensity")
    +392            axs[i][0].set_xlabel("m/z")
    +393            axs[i][0].yaxis.set_tick_params(labelleft=False)
    +394            i += 1
    +395
    +396        # MS2 plot
    +397        if "MS2" in to_plot:
    +398            axs[i][0].set_title("MS2", loc="left")
    +399            axs[i][0].vlines(
    +400                self.best_ms2.mz_exp, 0, self.best_ms2.abundance, color="k"
    +401            )
    +402            axs[i][0].set_ylabel("Intensity")
    +403            axs[i][0].set_xlabel("m/z")
    +404            axs[i][0].set_ylim(bottom=0)
    +405            axs[i][0].yaxis.get_major_formatter().set_scientific(False)
    +406            axs[i][0].yaxis.get_major_formatter().set_useOffset(False)
    +407            axs[i][0].set_xlim(
    +408                self.best_ms2.mz_exp.min() * 0.8, self.best_ms2.mz_exp.max() * 1.1
    +409            )
    +410            axs[i][0].yaxis.set_tick_params(labelleft=False)
    +411
    +412        # Add space between subplots
    +413        plt.tight_layout()
    +414
    +415        if return_fig:
    +416            # Close figure
    +417            plt.close(fig)
    +418            return fig
     
    @@ -2257,8 +2333,8 @@
    Parameters
    • to_plot (list, optional): -List of strings specifying what to plot, any iteration of -"EIC", "MS2", and "MS1". +List of strings specifying what to plot, any iteration of +"EIC", "MS2", and "MS1". Default is ["EIC", "MS1", "MS2"].
    • return_fig (bool, optional): If True, the figure is returned. Default is True.
    • @@ -2267,7 +2343,7 @@
      Parameters
      Returns
        -
      • matplotlib.figure.Figure or None: The figure object if return_fig is True. +
      • matplotlib.figure.Figure or None: The figure object if return_fig is True. Otherwise None and the figure is displayed.
    @@ -2490,143 +2566,143 @@

    Inherited Members
    -
    610class GCPeak(ChromaPeakBase, GCPeakCalculation):
    -611    """Class representing a peak in a gas chromatography (GC) chromatogram.
    -612
    -613    Parameters
    -614    ----------
    -615    chromatogram_parent : Chromatogram
    -616        The parent chromatogram object.
    -617    mass_spectrum_obj : MassSpectrum
    -618        The mass spectrum object associated with the peak.
    -619    indexes : tuple
    -620        The indexes of the peak in the chromatogram.
    -621
    -622    Attributes
    -623    ----------
    -624    _compounds : list
    -625        List of compounds associated with the peak.
    -626    _ri : float or None
    -627        Retention index of the peak.
    -628
    -629    Methods
    -630    -------
    -631    * __len__(). Returns the number of compounds associated with the peak.
    -632    * __getitem__(position).  Returns the compound at the specified position.
    -633    * remove_compound(compounds_obj). Removes the specified compound from the peak.
    -634    * clear_compounds(). Removes all compounds from the peak.
    -635    * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes.
    -636    * ri().  Returns the retention index of the peak.
    -637    * highest_ss_compound(). Returns the compound with the highest spectral similarity score.
    -638    * highest_score_compound(). Returns the compound with the highest similarity score.
    -639    * compound_names(). Returns a list of names of compounds associated with the peak.
    -640    """
    -641
    -642    def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes):
    -643        self._compounds = []
    -644        self._ri = None
    -645        super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes)
    -646
    -647    def __len__(self):
    -648        return len(self._compounds)
    +            
    638class GCPeak(ChromaPeakBase, GCPeakCalculation):
    +639    """Class representing a peak in a gas chromatography (GC) chromatogram.
    +640
    +641    Parameters
    +642    ----------
    +643    chromatogram_parent : Chromatogram
    +644        The parent chromatogram object.
    +645    mass_spectrum_obj : MassSpectrum
    +646        The mass spectrum object associated with the peak.
    +647    indexes : tuple
    +648        The indexes of the peak in the chromatogram.
     649
    -650    def __getitem__(self, position):
    -651        return self._compounds[position]
    -652
    -653    def remove_compound(self, compounds_obj):
    -654        self._compounds.remove(compounds_obj)
    -655
    -656    def clear_compounds(self):
    -657        self._compounds = []
    -658
    -659    def add_compound(
    -660        self,
    -661        compounds_dict,
    -662        spectral_similarity_scores,
    -663        ri_score=None,
    -664        similarity_score=None,
    -665    ):
    -666        """Adds a compound to the peak with the specified attributes.
    -667
    -668        Parameters
    -669        ----------
    -670        compounds_dict : dict
    -671            Dictionary containing the compound information.
    -672        spectral_similarity_scores : dict
    -673            Dictionary containing the spectral similarity scores.
    -674        ri_score : float or None, optional
    -675            The retention index score of the compound. Default is None.
    -676        similarity_score : float or None, optional
    -677            The similarity score of the compound. Default is None.
    -678        """
    -679        compound_obj = LowResCompoundRef(compounds_dict)
    -680        compound_obj.spectral_similarity_scores = spectral_similarity_scores
    -681        compound_obj.spectral_similarity_score = spectral_similarity_scores.get(
    -682            "cosine_correlation"
    -683        )
    -684        # TODO check is the above line correct?
    -685        compound_obj.ri_score = ri_score
    -686        compound_obj.similarity_score = similarity_score
    -687        self._compounds.append(compound_obj)
    -688        if similarity_score:
    -689            self._compounds.sort(key=lambda c: c.similarity_score, reverse=True)
    -690        else:
    -691            self._compounds.sort(
    -692                key=lambda c: c.spectral_similarity_score, reverse=True
    -693            )
    -694
    -695    @property
    -696    def ri(self):
    -697        """Returns the retention index of the peak.
    -698
    -699        Returns
    -700        -------
    -701        float or None
    -702            The retention index of the peak.
    -703        """
    -704        return self._ri
    -705
    -706    @property
    -707    def highest_ss_compound(self):
    -708        """Returns the compound with the highest spectral similarity score.
    -709
    -710        Returns
    -711        -------
    -712        LowResCompoundRef or None
    -713            The compound with the highest spectral similarity score.
    -714        """
    -715        if self:
    -716            return max(self, key=lambda c: c.spectral_similarity_score)
    -717        else:
    -718            return None
    -719
    -720    @property
    -721    def highest_score_compound(self):
    -722        """Returns the compound with the highest similarity score.
    -723
    -724        Returns
    -725        -------
    -726        LowResCompoundRef or None
    -727            The compound with the highest similarity score.
    -728        """
    -729        if self:
    -730            return max(self, key=lambda c: c.similarity_score)
    -731        else:
    -732            return None
    +650    Attributes
    +651    ----------
    +652    _compounds : list
    +653        List of compounds associated with the peak.
    +654    _ri : float or None
    +655        Retention index of the peak.
    +656
    +657    Methods
    +658    -------
    +659    * __len__(). Returns the number of compounds associated with the peak.
    +660    * __getitem__(position).  Returns the compound at the specified position.
    +661    * remove_compound(compounds_obj). Removes the specified compound from the peak.
    +662    * clear_compounds(). Removes all compounds from the peak.
    +663    * add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes.
    +664    * ri().  Returns the retention index of the peak.
    +665    * highest_ss_compound(). Returns the compound with the highest spectral similarity score.
    +666    * highest_score_compound(). Returns the compound with the highest similarity score.
    +667    * compound_names(). Returns a list of names of compounds associated with the peak.
    +668    """
    +669
    +670    def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes):
    +671        self._compounds = []
    +672        self._ri = None
    +673        super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes)
    +674
    +675    def __len__(self):
    +676        return len(self._compounds)
    +677
    +678    def __getitem__(self, position):
    +679        return self._compounds[position]
    +680
    +681    def remove_compound(self, compounds_obj):
    +682        self._compounds.remove(compounds_obj)
    +683
    +684    def clear_compounds(self):
    +685        self._compounds = []
    +686
    +687    def add_compound(
    +688        self,
    +689        compounds_dict,
    +690        spectral_similarity_scores,
    +691        ri_score=None,
    +692        similarity_score=None,
    +693    ):
    +694        """Adds a compound to the peak with the specified attributes.
    +695
    +696        Parameters
    +697        ----------
    +698        compounds_dict : dict
    +699            Dictionary containing the compound information.
    +700        spectral_similarity_scores : dict
    +701            Dictionary containing the spectral similarity scores.
    +702        ri_score : float or None, optional
    +703            The retention index score of the compound. Default is None.
    +704        similarity_score : float or None, optional
    +705            The similarity score of the compound. Default is None.
    +706        """
    +707        compound_obj = LowResCompoundRef(compounds_dict)
    +708        compound_obj.spectral_similarity_scores = spectral_similarity_scores
    +709        compound_obj.spectral_similarity_score = spectral_similarity_scores.get(
    +710            "cosine_correlation"
    +711        )
    +712        # TODO check is the above line correct?
    +713        compound_obj.ri_score = ri_score
    +714        compound_obj.similarity_score = similarity_score
    +715        self._compounds.append(compound_obj)
    +716        if similarity_score:
    +717            self._compounds.sort(key=lambda c: c.similarity_score, reverse=True)
    +718        else:
    +719            self._compounds.sort(
    +720                key=lambda c: c.spectral_similarity_score, reverse=True
    +721            )
    +722
    +723    @property
    +724    def ri(self):
    +725        """Returns the retention index of the peak.
    +726
    +727        Returns
    +728        -------
    +729        float or None
    +730            The retention index of the peak.
    +731        """
    +732        return self._ri
     733
     734    @property
    -735    def compound_names(self):
    -736        """Returns a list of names of compounds associated with the peak.
    +735    def highest_ss_compound(self):
    +736        """Returns the compound with the highest spectral similarity score.
     737
     738        Returns
     739        -------
    -740        list
    -741            List of names of compounds associated with the peak.
    +740        LowResCompoundRef or None
    +741            The compound with the highest spectral similarity score.
     742        """
     743        if self:
    -744            return [c.name for c in self]
    +744            return max(self, key=lambda c: c.spectral_similarity_score)
     745        else:
    -746            return []
    +746            return None
    +747
    +748    @property
    +749    def highest_score_compound(self):
    +750        """Returns the compound with the highest similarity score.
    +751
    +752        Returns
    +753        -------
    +754        LowResCompoundRef or None
    +755            The compound with the highest similarity score.
    +756        """
    +757        if self:
    +758            return max(self, key=lambda c: c.similarity_score)
    +759        else:
    +760            return None
    +761
    +762    @property
    +763    def compound_names(self):
    +764        """Returns a list of names of compounds associated with the peak.
    +765
    +766        Returns
    +767        -------
    +768        list
    +769            List of names of compounds associated with the peak.
    +770        """
    +771        if self:
    +772            return [c.name for c in self]
    +773        else:
    +774            return []
     
    @@ -2678,10 +2754,10 @@
    Methods
    -
    642    def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes):
    -643        self._compounds = []
    -644        self._ri = None
    -645        super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes)
    +            
    670    def __init__(self, chromatogram_parent, mass_spectrum_obj, indexes):
    +671        self._compounds = []
    +672        self._ri = None
    +673        super().__init__(chromatogram_parent, mass_spectrum_obj, *indexes)
     
    @@ -2699,8 +2775,8 @@
    Methods
    -
    653    def remove_compound(self, compounds_obj):
    -654        self._compounds.remove(compounds_obj)
    +            
    681    def remove_compound(self, compounds_obj):
    +682        self._compounds.remove(compounds_obj)
     
    @@ -2718,8 +2794,8 @@
    Methods
    -
    656    def clear_compounds(self):
    -657        self._compounds = []
    +            
    684    def clear_compounds(self):
    +685        self._compounds = []
     
    @@ -2737,41 +2813,41 @@
    Methods
    -
    659    def add_compound(
    -660        self,
    -661        compounds_dict,
    -662        spectral_similarity_scores,
    -663        ri_score=None,
    -664        similarity_score=None,
    -665    ):
    -666        """Adds a compound to the peak with the specified attributes.
    -667
    -668        Parameters
    -669        ----------
    -670        compounds_dict : dict
    -671            Dictionary containing the compound information.
    -672        spectral_similarity_scores : dict
    -673            Dictionary containing the spectral similarity scores.
    -674        ri_score : float or None, optional
    -675            The retention index score of the compound. Default is None.
    -676        similarity_score : float or None, optional
    -677            The similarity score of the compound. Default is None.
    -678        """
    -679        compound_obj = LowResCompoundRef(compounds_dict)
    -680        compound_obj.spectral_similarity_scores = spectral_similarity_scores
    -681        compound_obj.spectral_similarity_score = spectral_similarity_scores.get(
    -682            "cosine_correlation"
    -683        )
    -684        # TODO check is the above line correct?
    -685        compound_obj.ri_score = ri_score
    -686        compound_obj.similarity_score = similarity_score
    -687        self._compounds.append(compound_obj)
    -688        if similarity_score:
    -689            self._compounds.sort(key=lambda c: c.similarity_score, reverse=True)
    -690        else:
    -691            self._compounds.sort(
    -692                key=lambda c: c.spectral_similarity_score, reverse=True
    -693            )
    +            
    687    def add_compound(
    +688        self,
    +689        compounds_dict,
    +690        spectral_similarity_scores,
    +691        ri_score=None,
    +692        similarity_score=None,
    +693    ):
    +694        """Adds a compound to the peak with the specified attributes.
    +695
    +696        Parameters
    +697        ----------
    +698        compounds_dict : dict
    +699            Dictionary containing the compound information.
    +700        spectral_similarity_scores : dict
    +701            Dictionary containing the spectral similarity scores.
    +702        ri_score : float or None, optional
    +703            The retention index score of the compound. Default is None.
    +704        similarity_score : float or None, optional
    +705            The similarity score of the compound. Default is None.
    +706        """
    +707        compound_obj = LowResCompoundRef(compounds_dict)
    +708        compound_obj.spectral_similarity_scores = spectral_similarity_scores
    +709        compound_obj.spectral_similarity_score = spectral_similarity_scores.get(
    +710            "cosine_correlation"
    +711        )
    +712        # TODO check is the above line correct?
    +713        compound_obj.ri_score = ri_score
    +714        compound_obj.similarity_score = similarity_score
    +715        self._compounds.append(compound_obj)
    +716        if similarity_score:
    +717            self._compounds.sort(key=lambda c: c.similarity_score, reverse=True)
    +718        else:
    +719            self._compounds.sort(
    +720                key=lambda c: c.spectral_similarity_score, reverse=True
    +721            )
     
    @@ -2905,57 +2981,57 @@
    Inherited Members
    -
    749class GCPeakDeconvolved(GCPeak):
    -750    """Represents a deconvolved peak in a chromatogram.
    -751
    -752    Parameters
    -753    ----------
    -754    chromatogram_parent : Chromatogram
    -755        The parent chromatogram object.
    -756    mass_spectra : list
    -757        List of mass spectra associated with the peak.
    -758    apex_index : int
    -759        Index of the apex mass spectrum in the `mass_spectra` list.
    -760    rt_list : list
    -761        List of retention times.
    -762    tic_list : list
    -763        List of total ion currents.
    -764    """
    -765
    -766    def __init__(
    -767        self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list
    -768    ):
    -769        self._ri = None
    -770        self._rt_list = list(rt_list)
    -771        self._tic_list = list(tic_list)
    -772        self.mass_spectra = list(mass_spectra)
    -773        super().__init__(
    -774            chromatogram_parent,
    -775            self.mass_spectra[apex_index],
    -776            (0, apex_index, len(self.mass_spectra) - 1),
    -777        )
    -778
    -779    @property
    -780    def rt_list(self):
    -781        """Get the list of retention times.
    -782
    -783        Returns
    -784        -------
    -785        list
    -786            The list of retention times.
    -787        """
    -788        return self._rt_list
    -789
    -790    @property
    -791    def tic_list(self):
    -792        """Get the list of total ion currents.
    +            
    777class GCPeakDeconvolved(GCPeak):
    +778    """Represents a deconvolved peak in a chromatogram.
    +779
    +780    Parameters
    +781    ----------
    +782    chromatogram_parent : Chromatogram
    +783        The parent chromatogram object.
    +784    mass_spectra : list
    +785        List of mass spectra associated with the peak.
    +786    apex_index : int
    +787        Index of the apex mass spectrum in the `mass_spectra` list.
    +788    rt_list : list
    +789        List of retention times.
    +790    tic_list : list
    +791        List of total ion currents.
    +792    """
     793
    -794        Returns
    -795        -------
    -796        list
    -797            The list of total ion currents.
    -798        """
    -799        return self._tic_list
    +794    def __init__(
    +795        self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list
    +796    ):
    +797        self._ri = None
    +798        self._rt_list = list(rt_list)
    +799        self._tic_list = list(tic_list)
    +800        self.mass_spectra = list(mass_spectra)
    +801        super().__init__(
    +802            chromatogram_parent,
    +803            self.mass_spectra[apex_index],
    +804            (0, apex_index, len(self.mass_spectra) - 1),
    +805        )
    +806
    +807    @property
    +808    def rt_list(self):
    +809        """Get the list of retention times.
    +810
    +811        Returns
    +812        -------
    +813        list
    +814            The list of retention times.
    +815        """
    +816        return self._rt_list
    +817
    +818    @property
    +819    def tic_list(self):
    +820        """Get the list of total ion currents.
    +821
    +822        Returns
    +823        -------
    +824        list
    +825            The list of total ion currents.
    +826        """
    +827        return self._tic_list
     
    @@ -2988,18 +3064,18 @@
    Parameters
    -
    766    def __init__(
    -767        self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list
    -768    ):
    -769        self._ri = None
    -770        self._rt_list = list(rt_list)
    -771        self._tic_list = list(tic_list)
    -772        self.mass_spectra = list(mass_spectra)
    -773        super().__init__(
    -774            chromatogram_parent,
    -775            self.mass_spectra[apex_index],
    -776            (0, apex_index, len(self.mass_spectra) - 1),
    -777        )
    +            
    794    def __init__(
    +795        self, chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list
    +796    ):
    +797        self._ri = None
    +798        self._rt_list = list(rt_list)
    +799        self._tic_list = list(tic_list)
    +800        self.mass_spectra = list(mass_spectra)
    +801        super().__init__(
    +802            chromatogram_parent,
    +803            self.mass_spectra[apex_index],
    +804            (0, apex_index, len(self.mass_spectra) - 1),
    +805        )
     
    diff --git a/docs/corems/encapsulation/constant.html b/docs/corems/encapsulation/constant.html index a9a4c3eb..3edf7dfc 100644 --- a/docs/corems/encapsulation/constant.html +++ b/docs/corems/encapsulation/constant.html @@ -150,1013 +150,1289 @@

    -
       1class Labels: #pragma: no cover
    -   2    """ Class for Labels used in CoreMS
    -   3    
    +                        
       1class Labels:  # pragma: no cover
    +   2    """Class for Labels used in CoreMS
    +   3
        4    These labels are used to define:
    -   5    * types of columns in plaintext data inputs, 
    +   5    * types of columns in plaintext data inputs,
        6    * types of data/mass spectra
        7    * types of assignment for ions
        8
        9    """
    -  10    mz = "m/z"
    -  11    abundance = "Peak Height"
    -  12    rp = "Resolving Power"
    -  13    s2n = "S/N"
    -  14
    -  15    label = 'label'
    -  16    bruker_profile = 'Bruker_Profile'
    -  17    thermo_profile = 'Thermo_Profile'
    -  18    simulated_profile = 'Simulated Profile'
    -  19    booster_profile = 'Booster Profile'
    -  20    bruker_frequency = 'Bruker_Frequency'
    -  21    midas_frequency = 'Midas_Frequency'
    -  22    thermo_centroid = 'Thermo_Centroid'
    -  23    corems_centroid = 'CoreMS_Centroid'
    -  24    gcms_centroid = 'Thermo_Centroid'
    -  25    
    -  26    unassigned = 'unassigned'
    -  27
    -  28    radical_ion = 'RADICAL'
    -  29    protonated_de_ion = 'DE_OR_PROTONATED'
    -  30    protonated = "protonated"
    -  31    de_protonated = "de-protonated"
    -  32    adduct_ion = "ADDUCT"
    -  33    neutral = 'neutral'
    -  34    ion_type = 'IonType'
    -  35
    -  36    ion_type_translate = { 'protonated': 'DE_OR_PROTONATED',
    -  37                          'de-protonated': 'DE_OR_PROTONATED',
    -  38                          'radical': 'RADICAL',
    -  39                          'adduct': 'ADDUCT',
    -  40                          'ADDUCT': 'ADDUCT'}
    -  41    
    -  42class Atoms: #pragma: no cover
    -  43    """ Class for Atoms in CoreMS
    +  10
    +  11    mz = "m/z"
    +  12    abundance = "Peak Height"
    +  13    rp = "Resolving Power"
    +  14    s2n = "S/N"
    +  15
    +  16    label = "label"
    +  17    bruker_profile = "Bruker_Profile"
    +  18    thermo_profile = "Thermo_Profile"
    +  19    simulated_profile = "Simulated Profile"
    +  20    booster_profile = "Booster Profile"
    +  21    bruker_frequency = "Bruker_Frequency"
    +  22    midas_frequency = "Midas_Frequency"
    +  23    thermo_centroid = "Thermo_Centroid"
    +  24    corems_centroid = "CoreMS_Centroid"
    +  25    gcms_centroid = "Thermo_Centroid"
    +  26
    +  27    unassigned = "unassigned"
    +  28
    +  29    radical_ion = "RADICAL"
    +  30    protonated_de_ion = "DE_OR_PROTONATED"
    +  31    protonated = "protonated"
    +  32    de_protonated = "de-protonated"
    +  33    adduct_ion = "ADDUCT"
    +  34    neutral = "neutral"
    +  35    ion_type = "IonType"
    +  36
    +  37    ion_type_translate = {
    +  38        "protonated": "DE_OR_PROTONATED",
    +  39        "de-protonated": "DE_OR_PROTONATED",
    +  40        "radical": "RADICAL",
    +  41        "adduct": "ADDUCT",
    +  42        "ADDUCT": "ADDUCT",
    +  43    }
       44
    -  45    This class includes key properties of atoms (and the electron) and isotopes, including their exact masses, relative abundances, and covalences. 
    -  46    It also associates which isotopes are for the same element, and provides an ordering of elements.
    -  47
    -  48    IUPAC definition of monoisotopic mass is based on the most abundant isotopes of each element present.
    -  49    Here, we will use atom symbols with isotope numbers for all isotopes excluding the most abundant one.
    -  50    This list has been corrected up to Iodine. 
    -  51    
    -  52    References
    -  53    ----------
    -  54
    -  55    1. NIST - Last Accessed 2019-06-12
    -  56    https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses
    -  57
    -  58    """
    -  59    electron_mass = 0.0005_485_799_090_65 #NIST value
    -  60
    -  61    atomic_masses = {'H': 1.00782503223,
    -  62                     'D': 2.01410177812,
    -  63                     'T': 3.0160492779, #consider removing 
    -  64                     '3He': 3.0160293201,
    -  65                     'He': 4.00260325413,
    -  66                     '6Li': 6.0151228874,
    -  67                     'Li': 7.0160034366,
    -  68                     'Be': 9.012183065,
    -  69                     '10B': 10.01293695,
    -  70                     'B': 11.00930536,
    -  71                     'C': 12.0,
    -  72                     '13C': 13.00335483507,
    -  73                     '14C': 14.0032419884,
    -  74                     'N': 14.00307400443,
    -  75                     '15N': 15.00010889888,
    -  76                     'O': 15.99491461957,
    -  77                     '17O': 16.99913175650,
    -  78                     '18O': 17.99915961286,
    -  79                     'F': 18.99840316273,
    -  80                     'Ne': 19.9924401762,
    -  81                     '21Ne': 20.993846685,
    -  82                     '22Ne': 21.991385114,
    -  83                     'Na': 22.9897692820,
    -  84                     'Mg': 23.985041697,
    -  85                     '25Mg': 24.985836976,
    -  86                     '26Mg': 25.982592968,
    -  87                     'Al': 26.98153853,
    -  88                     'Si': 27.97692653465,
    -  89                     '29Si': 28.97649466490,
    -  90                     '30Si': 29.973770136,
    -  91                     'P': 30.97376199842,
    -  92                     'S': 31.9720711744,
    -  93                     '33S': 32.9714589098,
    -  94                     '34S': 33.967867004,
    -  95                     '36S': 35.96708071,
    -  96                     'Cl': 34.968852682,
    -  97                     '37Cl': 36.965902602,
    -  98                     '36Ar': 35.967545105,
    -  99                     '38Ar': 37.96273211,
    - 100                     'Ar': 39.9623831237,
    - 101                     'K': 38.9637064864,
    - 102                     '40K': 39.963998166,
    - 103                     '41K': 40.9618252579,
    - 104                     'Ca': 39.962590863,
    - 105                     '42Ca': 41.95861783,
    - 106                     '43Ca': 42.95876644,
    - 107                     '44Ca': 43.95548156,
    - 108                     '46Ca': 45.9536890,
    - 109                     '48Ca': 47.95252276,
    - 110                     'Sc': 44.95590828,
    - 111                     '46Ti': 45.95262772,
    - 112                     '47Ti': 46.95175879,
    - 113                     'Ti': 47.94794198,
    - 114                     '49Ti': 48.94786568,
    - 115                     '50Ti': 49.94478689,
    - 116                     '50V': 49.94715601,
    - 117                     'V': 50.94395704,
    - 118                     '50Cr': 49.94604183,
    - 119                     'Cr': 51.94050623,
    - 120                     '53Cr': 52.94064815,
    - 121                     '54Cr': 53.93887916,
    - 122                     'Mn': 54.93804391,
    - 123                     '54Fe': 53.93960899,
    - 124                     'Fe': 55.93493633,
    - 125                     '57Fe': 56.93539284,
    - 126                     '58Fe': 57.93327443,
    - 127                     'Co': 58.93319429,
    - 128                     'Ni': 57.93534241,
    - 129                     '60Ni': 59.93078588,
    - 130                     '61Ni': 60.93105557,
    - 131                     '62Ni': 61.92834537,
    - 132                     '64Ni': 63.92796682,
    - 133                     'Cu': 62.92959772,
    - 134                     '65Cu': 64.92778970,
    - 135                     'Zn': 63.92914201,
    - 136                     '66Zn': 65.92603381,
    - 137                     '67Zn': 66.92712775,
    - 138                     '68Zn': 67.92484455,
    - 139                     '70Zn': 69.9253192,
    - 140                     'Ga': 68.9255735,
    - 141                     '71Ga': 70.92470258,
    - 142                     '70Ge': 69.92424875,
    - 143                     '72Ge': 71.922075826,
    - 144                     '73Ge': 72.923458956,
    - 145                     'Ge': 73.921177761,
    - 146                     '76Ge': 75.921402726,
    - 147                     'As': 74.92159457,
    - 148                     '74Se': 73.922475934,
    - 149                     '76Se': 75.919213704,
    - 150                     '77Se': 76.919914154,
    - 151                     '78Se': 77.91730928,
    - 152                     'Se': 79.9165218,
    - 153                     '82Se': 81.9166995,
    - 154                     'Br': 78.9183376,
    - 155                     '81Br': 80.9162897,
    - 156                     '78Kr': 77.92036494,
    - 157                     '80Kr': 79.91637808,
    - 158                     '82Kr': 81.91348273,
    - 159                     '83Kr': 82.91412716,
    - 160                     'Kr': 83.9114977282,
    - 161                     '86Kr': 85.9106106269,
    - 162                     'Rb': 84.9117897379,
    - 163                     '87Rb': 86.9091805310,
    - 164                     '84Sr': 83.9134191,
    - 165                     '86Sr': 85.9092606,
    - 166                     '87Sr': 86.9088775,
    - 167                     'Sr': 87.9056125,
    - 168                     'Y': 88.9058403,
    - 169                     'Zr': 89.9046977,
    - 170                     '91Zr': 90.9056396,
    - 171                     '92Zr': 91.9050347,
    - 172                     '94Zr': 93.9063108,
    - 173                     '96Zr': 95.9082714,
    - 174                     'Nb': 92.9063730,
    - 175                     '92Mo': 91.90680796,
    - 176                     '94Mo': 93.90508490,
    - 177                     '95Mo': 94.90583877,
    - 178                     '96Mo': 95.90467612,
    - 179                     '97Mo': 96.90601812,
    - 180                     'Mo': 97.90540482,
    - 181                     '100Mo': 99.9074718,
    - 182                     #'97Tc': 96.9063667,
    - 183                     #'98Tc': 97.9072124,
    - 184                     'Tc': 98.9062508, #consider removing technetium as it is radioactive
    - 185                     '96Ru': 95.90759025,
    - 186                     '98Ru': 97.9052868,
    - 187                     '99Ru': 98.9059341,
    - 188                     '100Ru': 99.9042143,
    - 189                     '101Ru': 100.9055769,
    - 190                     'Ru': 101.9043441,
    - 191                     '104Ru': 103.9054275,
    - 192                     'Rh': 102.9054980,
    - 193                     '102Pd': 101.9056022,
    - 194                     '104Pd': 103.9040305,
    - 195                     '105Pd': 104.9050796,
    - 196                     'Pd': 105.9034804,
    - 197                     '108Pd': 107.9038916,
    - 198                     '110Pd': 109.9051722,
    - 199                     'Ag': 106.9050916,
    - 200                     '109Ag': 108.9047553,
    - 201                     '106Cd': 105.9064599,
    - 202                     '108Cd': 107.9041834,
    - 203                     '110Cd': 109.90300661,
    - 204                     '111Cd': 110.90418287,
    - 205                     'Cd': 111.90276287,
    - 206                     '113Cd': 112.90440813,
    - 207                     '114Cd': 113.90336509,
    - 208                     '116Cd': 115.90476315,
    - 209                     '113In': 112.90406184,
    - 210                     'In': 114.903878776,
    - 211                     '112Sn': 111.90482387,
    - 212                     '114Sn': 113.9027827,
    - 213                     '115Sn': 114.903344699,
    - 214                     '116Sn': 115.90174280,
    - 215                     '117Sn': 116.90295398,
    - 216                     '118Sn': 117.90160657,
    - 217                     '119Sn': 118.90331117,
    - 218                     'Sn': 119.90220163,
    - 219                     '122Sn': 121.9034438,
    - 220                     '124Sn': 123.9052766,
    - 221                     'Sb': 120.9038120,
    - 222                     '123Sb': 122.9042132,
    - 223                     '120Te': 119.9040593,
    - 224                     '122Te': 121.9030435,
    - 225                     '123Te': 122.9042698,
    - 226                     '124Te': 123.9028171,
    - 227                     '125Te': 124.9044299,
    - 228                     '126Te': 125.9033109,
    - 229                     '128Te': 127.90446128,
    - 230                     'Te': 129.906222748,
    - 231                     'I': 126.9044719,
    - 232                     '124Xe': 123.9058920,
    - 233                     '126Xe': 125.9042983,
    - 234                     '128Xe': 127.9035310,
    - 235                     '129Xe': 128.9047808611,
    - 236                     '130Xe': 129.903509349,
    - 237                     '131Xe': 130.90508406,
    - 238                     'Xe': 131.9041550856,
    - 239                     '134Xe': 133.90539466,
    - 240                     '136Xe': 135.907214484,
    - 241                     'Cs': 132.9054519610,
    - 242                     '130Ba': 129.9063207,
    - 243                     '132Ba': 131.9050611,
    - 244                     '134Ba': 133.90450818,
    - 245                     '135Ba': 134.90568838,
    - 246                     '136Ba': 135.90457573,
    - 247                     '137Ba': 136.90582714,
    - 248                     'Ba': 137.90524700,
    - 249                     '138La': 137.9071149,
    - 250                     'La': 138.9063563,
    - 251                     '136Ce': 135.90712921,
    - 252                     '138Ce': 137.905991,
    - 253                     'Ce': 139.9054431,
    - 254                     '142Ce': 141.9092504,
    - 255                     'Pr': 140.9076576,
    - 256                     'Nd': 141.9077290,
    - 257                     '143Nd': 142.9098200,
    - 258                     '144Nd': 143.9100930,
    - 259                     '145Nd': 144.9125793,
    - 260                     '146Nd': 145.9131226,
    - 261                     '148Nd': 147.9168993,
    - 262                     '150Nd': 149.9209022,
    - 263                     '145Pm': 144.9127559,
    - 264                     '147Pm': 146.9151450,
    - 265                     '144Sm': 143.9120065,
    - 266                     '147Sm': 146.9149044,
    - 267                     '148Sm': 147.9148292,
    - 268                     '149Sm': 148.9171921,
    - 269                     '150Sm': 149.9172829,
    - 270                     'Sm': 151.9197397,
    - 271                     '154Sm': 153.9222169,
    - 272                     '151Eu': 150.9198578,
    - 273                     'Eu': 152.9212380,
    - 274                     '152Gd': 151.9197995,
    - 275                     '154Gd': 153.9208741,
    - 276                     '155Gd': 154.9226305,
    - 277                     '156Gd': 155.9221312,
    - 278                     '157Gd': 156.9239686,
    - 279                     'Gd': 157.9241123,
    - 280                     '160Gd': 159.9270624,
    - 281                     'Tb': 158.9253547,
    - 282                     '156Dy': 155.9242847,
    - 283                     '158Dy': 157.9244159,
    - 284                     '160Dy': 159.9252046,
    - 285                     '161Dy': 160.9269405,
    - 286                     '162Dy': 161.9268056,
    - 287                     '163Dy': 162.9287383,
    - 288                     'Dy': 163.9291819,
    - 289                     'Ho': 164.9303288,
    - 290                     '162Er': 161.9287884,
    - 291                     '164Er': 163.9292088,
    - 292                     'Er': 165.9302995,
    - 293                     '167Er': 166.9320546,
    - 294                     '168Er': 167.9323767,
    - 295                     '170Er': 169.9354702,
    - 296                     'Tm': 168.9342179,
    - 297                     '168Yb': 167.9338896,
    - 298                     '170Yb': 169.9347664,
    - 299                     '171Yb': 170.9363302,
    - 300                     '172Yb': 171.9363859,
    - 301                     '173Yb': 172.9382151,
    - 302                     'Yb': 173.9388664,
    - 303                     '176Yb': 175.9425764,
    - 304                     'Lu': 174.9407752,
    - 305                     '176Lu': 175.9426897,
    - 306                     '174Hf': 173.9400461,
    - 307                     '176Hf': 175.9414076,
    - 308                     '177Hf': 176.9432277,
    - 309                     '178Hf': 177.9437058,
    - 310                     '179Hf': 178.9458232,
    - 311                     'Hf': 179.9465570,
    - 312                     '180Ta': 179.9474648,
    - 313                     'Ta': 180.9479958,
    - 314                     '180W': 179.9467108,
    - 315                     '182W': 181.94820394,
    - 316                     '183W': 182.95022275,
    - 317                     'W': 183.95093092,
    - 318                     '186W': 185.9543628,
    - 319                     '185Re': 184.9529545,
    - 320                     'Re': 186.9557501,
    - 321                     '184Os': 183.9524885,
    - 322                     '186Os': 185.9538350,
    - 323                     '187Os': 186.9557474,
    - 324                     '188Os': 187.9558352,
    - 325                     '189Os': 188.9581442,
    - 326                     '190Os': 189.9584437,
    - 327                     '192Os': 191.9614770,
    - 328                     '191Ir': 190.9605893,
    - 329                     'Ir': 192.9629216,
    - 330                     '190Pt': 189.9599297,
    - 331                     '192Pt': 191.9610387,
    - 332                     '194Pt': 193.9626809,
    - 333                     'Pt': 194.9647917,
    - 334                     '196Pt': 195.96495209,
    - 335                     '198Pt': 197.9678949,
    - 336                     'Au': 196.96656879,
    - 337                     '196Hg': 195.9658326,
    - 338                     '198Hg': 197.96676860,
    - 339                     '199Hg': 198.96828064,
    - 340                     '200Hg': 199.96832659,
    - 341                     '201Hg': 200.97030284,
    - 342                     'Hg': 201.97064340,
    - 343                     '204Hg': 203.97349398,
    - 344                     '203Tl': 202.9723446,
    - 345                     'Tl': 204.9744278,
    - 346                     '204Pb': 203.9730440,
    - 347                     '206Pb': 205.9744657,
    - 348                     '207Pb': 206.9758973,
    - 349                     'Pb': 207.9766525,
    - 350                     'Bi': 208.9803991,
    - 351                     '209Po': 208.9824308,
    - 352                     '210Po': 209.9828741,
    - 353                     '210At': 209.9871479,
    - 354                     '211At': 210.9874966,
    - 355                     '211Rn': 210.9906011,
    - 356                     '220Rn': 220.0113941,
    - 357                     '222Rn': 222.0175782,
    - 358                     '223Fr': 223.0197360,
    - 359                     '223Ra': 223.0185023,
    - 360                     '224Ra': 224.0202120,
    - 361                     '226Ra': 226.0254103,
    - 362                     '228Ra': 228.0310707,
    - 363                     '227Ac': 227.0277523,
    - 364                     '230Th': 230.0331341,
    - 365                     'Th': 232.0380558,
    - 366                     'Pa': 231.0358842,
    - 367                     '233U': 233.0396355,
    - 368                     '234U': 234.0409523,
    - 369                     '235U': 235.0439301,
    - 370                     '236U': 236.0455682,
    - 371                     'U': 238.0507884,
    - 372                     '236Np': 236.046570,
    - 373                     '237Np': 237.0481736,
    - 374                     '238Pu': 238.0495601,
    - 375                     '239Pu': 239.0521636,
    - 376                     '240Pu': 240.0538138,
    - 377                     '241Pu': 241.0568517,
    - 378                     '242Pu': 242.0587428,
    - 379                     '244Pu': 244.0642053,
    - 380                     '241Am': 241.0568293,
    - 381                     '243Am': 243.0613813,
    - 382                     '243Cm': 243.0613893,
    - 383                     '244Cm': 244.0627528,
    - 384                     '245Cm': 245.0654915,
    - 385                     '246Cm': 246.0672238,
    - 386                     '247Cm': 247.0703541,
    - 387                     '248Cm': 248.0723499,
    - 388                     '247Bk': 247.0703073,
    - 389                     '249Bk': 249.0749877,
    - 390                     '249Cf': 249.0748539,
    - 391                     '250Cf': 250.0764062,
    - 392                     '251Cf': 251.0795886,
    - 393                     '252Cf': 252.0816272,
    - 394                     '252Es': 252.082980,
    - 395                     '257Fm': 257.0951061,
    - 396                     '258Md': 258.0984315,
    - 397                     '260Md': 260.10365, 
    - 398                     '259No': 259.10103,
    - 399                     '262Lr': 262.10961,
    - 400                     '267Rf': 267.12179,
    - 401                     '268Db': 268.12567,
    - 402                     '271Sg': 271.13393,
    - 403                     '272Bh': 272.13826,
    - 404                     '270Hs': 270.13429,
    - 405                     '276Mt': 276.15159,
    - 406                     '281Ds': 281.16451,
    - 407                     '280Rg': 280.16514,
    - 408                     '285Cn': 285.17712,
    - 409                     '284Nh': 284.17873,
    - 410                     '289Fl': 289.19042,
    - 411                     '288Mc': 288.19274,
    - 412                     '293Lv': 293.20449,
    - 413                     '292Ts': 292.20746,
    - 414                     '294Og': 294.21392}
    - 415    
    - 416
    - 417    # This list orders atoms, starting with the most abundant isotopes first, ordered as described. 
    - 418    # Less abundant isotopes are ordered by mass at the end of the list.
    - 419    atoms_order = ['C', 'H', 'O', 'N', 'P', 'S', # CHONPS
    - 420                    'F', 'Cl', 'Br', 'I', 'At', #Halogens
    - 421                    'Li','Na','K','Rb','Cs','Fr', #Alkali
    - 422                    'He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn', #Noble gasses
    - 423                    'Be','B', #Row 2
    - 424                    'Mg', 'Al','Si', #Row 3
    - 425                    'Ca','Sc','Ti','V','Cr','Mn','Fe','Co','Ni','Cu','Zn','Ga','Ge','As','Se', #Row 4
    - 426                    'Sr','Y','Zr','Nb','Mo','Tc','Ru','Rh','Pd','Ag','Cd','In','Sn','Sb','Te', #Row 5
    - 427                    'Ba','La', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', #Row 6
    - 428                    'Ra', 'Ac', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og', #Row 7
    - 429                    'Ce','Pr','Nd','Pm','Sm','Eu','Gd','Tb','Dy','Ho','Er','Tm','Yb','Lu', #Lanthanides
    - 430                    'Th','Pa','U','Np','Pu','Am','Cm','Bk','Cf','Es','Fm','Md','No','Lr', #Actinides
    - 431                    # Less abundant isotopes follow
    - 432                    'D','6Li',
    - 433                    '10B', '13C','15N','17O','18O',
    - 434                    '22Ne',  '25Mg', '26Mg',  '29Si', 
    - 435                    '30Si', '33S', '34S','36S', '37Cl',
    - 436                    '40Ca', '41K', '44Ca','46Ti', '47Ti',  '49Ti', 
    - 437                    '50Cr', '50Ti', '50V', '53Cr', '54Cr',  '54Fe', '57Fe', '58Fe',
    - 438                    '60Ni', '61Ni', '62Ni', '65Cu', '66Zn','67Zn', '68Zn',  
    - 439                    '70Ge', '71Ga', '72Ge', '73Ge',  '76Ge', '76Se', '77Se', '78Se',  
    - 440                    '81Br', '80Kr','82Kr', '82Se', '83Kr', '85Rb', '86Kr', '86Sr', '87Rb', '87Sr', '88Sr',  
    - 441                    '91Zr', '92Mo', '92Zr','94Mo', '94Zr', '95Mo', '96Mo', '96Ru', '96Zr', '97Mo',  '98Ru', '99Ru',
    - 442                    '100Mo', '100Ru', '101Ru', '102Pd', '104Pd', '104Ru', '105Pd', '106Cd', '106Pd', '108Cd', '108Pd','109Ag',
    - 443                    '110Cd', '110Pd', '111Cd', '112Cd', '112Sn', '113Cd', '113In', '114Cd', '114Sn','115In', '115Sn', '116Cd', '116Sn', '117Sn', '118Sn', '119Sn', 
    - 444                    '120Sn', '120Te', '121Sb', '122Sn', '122Te', '123Sb', '123Te', '124Sn', '124Te', '124Xe', '125Te', '126Te', '126Xe',
    - 445                    '128Te', '128Xe', '129Xe', '130Ba', '130Te', '130Xe', '131Xe', '132Ba', '132Xe', '134Ba',
    - 446                    '134Xe', '135Ba', '136Ba', '136Xe', '137Ba', '138Ba', 
    - 447                    '174Hf', '176Hf', '177Hf', '178Hf','179Hf', 
    - 448                    '180Hf', '180W', '182W', '183W', '184Os', '184W', '185Re', '186Os', '186W', '187Os', '187Re','188Os', '189Os', 
    - 449                    '190Os', '190Pt', '191Ir', '192Ir', '192Os', '192Pt', '194Pt', '195Pt', '196Hg','196Pt', '198Hg', '198Pt','199Hg', 
    - 450                    '200Hg', '201Hg', '202Hg', '203Tl', '204Hg', '204Pb', '205Tl','206Pb','207Pb', '208Pb'
    - 451                    ]
    - 452
    - 453    atoms_covalence = {'C': (4),
    - 454                     '13C': (4),
    - 455                     'N': (3),
    - 456                     'O': (2),
    - 457                     'S': (2),
    - 458                     'H': (1),
    - 459                     'F': (1, 0),
    - 460                     'Cl': (1, 0),
    - 461                     'Br': (1, 0),
    - 462                     'I': (1, 0),
    - 463                     'At': (1),
    - 464                     'Li': (1, 0),
    - 465                     'Na': (1, 0),
    - 466                     'K':  (1, 0),
    - 467                     'Rb': (1),
    - 468                     'Cs': (1),
    - 469                     'Fr': (1),
    - 470                     'B': (4, 3, 2, 1),
    - 471                     'In': (3, 2, 1),
    - 472                     'Al': (3, 1, 2),
    - 473                     'P': (3, 5, 4, 2, 1),
    - 474                     'Ga': (3, 1, 2),
    - 475                     'Mg': (2, 1),
    - 476                     'Be': (2, 1),
    - 477                     'Ca': (2, 1),
    - 478                     'Sr': (2, 1),
    - 479                     'Ba': (2),
    - 480                     'Ra': (2),
    - 481                     'V': (5, 4, 3, 2, 1),
    - 482                     'Fe': (3, 2, 4, 5, 6),
    - 483                     'Si': (4, 3, 2),
    - 484                     'Sc': (3, 2, 1),
    - 485                     'Ti': (4, 3, 2, 1),
    - 486                     'Cr': (1, 2, 3, 4, 5, 6),
    - 487                     'Mn': (1, 2, 3, 4, 5, 6, 7),
    - 488                     'Co': (1, 2, 3, 4, 5),
    - 489                     'Ni': (1, 2, 3, 4),
    - 490                     'Cu': (2, 1, 3, 4),
    - 491                     'Zn': (2, 1),
    - 492                     'Ge': (4, 3, 2, 1),
    - 493                     'As': (5, 3, 2, 1),
    - 494                     'Se': (6, 4, 2, 1),
    - 495                     'Y': (3, 2, 1),
    - 496                     'Zr': (4, 3, 2, 1),
    - 497                     'Nb': (5, 4, 3, 2, 1),
    - 498                     'Mo': (6, 5, 4, 3, 2, 1),
    - 499                     'Tc': (7, 6, 5, 4, 3, 2, 1),
    - 500                     'Ru': (8, 7, 6, 5, 4, 3, 2, 1),
    - 501                     'Rh': (6, 5, 4, 3, 2, 1),
    - 502                     'Pd': (4, 2, 1),
    - 503                     'Ag': (0, 1, 2, 3, 4),
    - 504                     'Cd': (2, 1),
    - 505                     'Sn': (4, 2),
    - 506                     'Sb': (5, 3),
    - 507                     'Te': (6, 5, 4, 2),
    - 508                     'La': (3, 2),
    - 509                     'Hf': (4, 3, 2),
    - 510                     'Ta': (5, 4, 3, 2),
    - 511                     'W': (6, 5, 4, 3, 2, 1),
    - 512                     'Re': (4, 7, 6, 5, 3, 2, 1),
    - 513                     'Os': (4, 8, 7, 6, 5, 3, 2, 1),
    - 514                     'Ir': (4, 8, 6, 5, 3, 2, 1),
    - 515                     'Pt': (4, 6, 5, 3, 2, 1),
    - 516                     'Au': (3, 5, 2, 1),
    - 517                     'Hg': (1, 2, 4),
    - 518                     'Tl': (3, 1),
    - 519                     'Pb': (4, 2),
    - 520                     'Bi': (3, 1, 5),
    - 521                     'Po': (2, 4, 6),
    - 522                     'Ac': (3, 2)
    - 523                     }
    - 524
    - 525    isotopic_abundance = {'H': 0.999885,
    - 526                          'D': 0.000115,
    - 527                          'T': 0, #Consider removing.
    - 528                          '3He': 0.00000134,
    - 529                          'He': 0.99999866,
    - 530                          '6Li': 0.0759,
    - 531                          'Li': 0.9241,
    - 532                          'Be': 1.0,
    - 533                          '10B': 0.199,
    - 534                          'B': 0.801,
    - 535                          'C': 0.9893,
    - 536                          '13C': 0.0107,
    - 537                          '14C': 0,
    - 538                          'N': 0.99636,
    - 539                          '15N': 0.00364,
    - 540                          'O': 0.99757,
    - 541                          '17O': 0.00038,
    - 542                          '18O': 0.00205,
    - 543                          'F': 1.0,
    - 544                          'Ne': 0.9048,
    - 545                          '21Ne': 0.0027,
    - 546                          '22Ne': 0.0925,
    - 547                          'Na': 1.0,
    - 548                          'Mg': 0.7899,
    - 549                          '25Mg': 0.1000,
    - 550                          '26Mg': 0.1101,
    - 551                          'Al': 1.0,
    - 552                          'Si': 0.92223,
    - 553                          '29Si': 0.04685,
    - 554                          '30Si': 0.03092,
    - 555                          'P': 1.0,
    - 556                          'S': 0.9499,
    - 557                          '33S': 0.0075,
    - 558                          '34S': 0.0425,
    - 559                          '36S': 0.0001,
    - 560                          'Cl': 0.7576,
    - 561                          '37Cl': 0.2424,
    - 562                          '36Ar': 0.003336,
    - 563                          '38Ar': 0.000629,
    - 564                          'Ar': 0.996035,
    - 565                          'K': 0.932581,
    - 566                          '40K': 0.000117,
    - 567                          '41K': 0.067302,
    - 568                          'Ca': 0.96941,
    - 569                          '42Ca': 0.00647,
    - 570                          '43Ca': 0.00135,
    - 571                          '44Ca': 0.02086,
    - 572                          '46Ca': 0.00004,
    - 573                          '48Ca': 0.001872,
    - 574                          'Sc': 1.0,
    - 575                          '46Ti': 0.0825,
    - 576                          '47Ti': 0.0744,
    - 577                          'Ti': 0.7372,
    - 578                          '49Ti': 0.0541,
    - 579                          '50Ti': 0.0518,
    - 580                          '50V': 0.00250,
    - 581                          'V': 0.9975,
    - 582                          '50Cr': 0.04345,
    - 583                          'Cr': 0.83789,
    - 584                          '53Cr': 0.09501,
    - 585                          '54Cr': 0.02365,
    - 586                          'Mn': 1.0,
    - 587                          '54Fe': 0.05845,
    - 588                          'Fe': 0.91754,
    - 589                          '57Fe': 0.02119,
    - 590                          '58Fe': 0.00282,
    - 591                          'Co': 1.0,
    - 592                          'Ni': 0.68077,
    - 593                          '60Ni': 0.26223,
    - 594                          '61Ni': 0.011399,
    - 595                          '62Ni': 0.036346,
    - 596                          '64Ni': 0.009255,
    - 597                          'Cu': 0.6915,
    - 598                          '65Cu': 0.3085,
    - 599                          'Zn': 0.4917,
    - 600                          '66Zn': 0.2773,
    - 601                          '67Zn': 0.0404,
    - 602                          '68Zn': 0.1845,
    - 603                          '70Zn': 0.0061,
    - 604                          'Ga': 0.60108,
    - 605                          '71Ga': 0.39892,
    - 606                          '70Ge': 0.2057,
    - 607                          '72Ge': 0.2745,
    - 608                          '73Ge': 0.0775,
    - 609                          'Ge': 0.3650,
    - 610                          '76Ge': 0.0773,
    - 611                          'As': 1.0,
    - 612                          '74Se': 0.0089,
    - 613                          '76Se': 0.0937,
    - 614                          '77Se': 0.0763,
    - 615                          '78Se': 0.2377,
    - 616                          'Se': 0.4961,
    - 617                          '82Se': 0.0873,
    - 618                          'Br': 0.5069,
    - 619                          '81Br': 0.4931,
    - 620                          '78Kr': 0.00355,
    - 621                          '80Kr': 0.02286,
    - 622                          '82Kr': 0.11593,
    - 623                          '83Kr': 0.11500,
    - 624                          'Kr': 0.56987,
    - 625                          '86Kr': 0.17279,
    - 626                          'Rb': 0.7217,
    - 627                          '87Rb': 0.2783,
    - 628                          '84Sr': 0.0056,
    - 629                          '86Sr': 0.0986,
    - 630                          '87Sr': 0.0700,
    - 631                          'Sr': 0.8258,
    - 632                          'Y': 1.0,
    - 633                          'Zr': 0.5145,
    - 634                          '91Zr': 0.1122,
    - 635                          '92Zr': 0.1715,
    - 636                          '94Zr': 0.1738,
    - 637                          '96Zr': 0.0280,
    - 638                          'Nb': 1.0,
    - 639                          '92Mo': 0.1453,
    - 640                          '94Mo': 0.0915,
    - 641                          '95Mo': 0.1584,
    - 642                          '96Mo': 0.1667,
    - 643                          '97Mo': 0.0960,
    - 644                          'Mo': 0.2439,
    - 645                          '100Mo': 0.0982,
    - 646                          '99Tc': 0, #consider removing
    - 647                          '96Ru': 0.0554,
    - 648                          '98Ru': 0.0187,
    - 649                          '99Ru': 0.1276,
    - 650                          '100Ru': 0.1260,
    - 651                          '101Ru': 0.1706,
    - 652                          'Ru': 0.3155,
    - 653                          '104Ru': 0.1862,
    - 654                          'Rh': 1.0,
    - 655                          '102Pd': 0.0102,
    - 656                          '104Pd': 0.1114,
    - 657                          '105Pd': 0.2233,
    - 658                          'Pd': 0.2733,
    - 659                          '108Pd': 0.2646,
    - 660                          '110Pd': 0.1172,
    - 661                          'Ag': 0.51839,
    - 662                          '109Ag': 0.48161,
    - 663                          '106Cd': 0.0125,
    - 664                          '108Cd': 0.0089,
    - 665                          '110Cd': 0.1249,
    - 666                          '111Cd': 0.1280,
    - 667                          'Cd': 0.2413,
    - 668                          '113Cd': 0.1222,
    - 669                          '114Cd': 0.2873,
    - 670                          '116Cd': 0.0749,
    - 671                          '113In': 0.0429,
    - 672                          'In': 0.9571,
    - 673                          '112Sn': 0.0097,
    - 674                          '114Sn': 0.0066,
    - 675                          '115Sn': 0.0034,
    - 676                          '116Sn': 0.1454,
    - 677                          '117Sn': 0.0768,
    - 678                          '118Sn': 0.2422,
    - 679                          '119Sn': 0.0859,
    - 680                          'Sn': 0.3258,
    - 681                          '122Sn': 0.0463,
    - 682                          '124Sn': 0.0579,
    - 683                          'Sb': 0.5721,
    - 684                          '123Sb': 0.4279,
    - 685                          '120Te': 0.0009,
    - 686                          '122Te': 0.0255,
    - 687                          '123Te': 0.0089,
    - 688                          '124Te': 0.0474,
    - 689                          '125Te': 0.0707,
    - 690                          '126Te': 0.1884,
    - 691                          '128Te': 0.3174,
    - 692                          'Te': 0.3408,
    - 693                          'I': 1.0,
    - 694                          '124Xe': 0.000952,
    - 695                          '126Xe': 0.000890,
    - 696                          '128Xe': 0.019102,
    - 697                          '129Xe': 0.264006,
    - 698                          '130Xe': 0.040710,
    - 699                          '131Xe': 0.212324,
    - 700                          'Xe': 0.269086,
    - 701                          '134Xe': 0.104357,
    - 702                          '136Xe': 0.088573,
    - 703                          'Cs': 1.0,
    - 704                          '130Ba': 0.00106,
    - 705                          '132Ba': 0.00101,
    - 706                          '134Ba': 0.02417,
    - 707                          '135Ba': 0.06592,
    - 708                          '136Ba': 0.07854,
    - 709                          '137Ba': 0.11232,
    - 710                          'Ba': 0.71698,
    - 711                          '138La': 0.0008881,
    - 712                          'La': 0.9991119,
    - 713                          '136Ce': 0.00185,
    - 714                          '138Ce': 0.00251,
    - 715                          'Ce': 0.88450,
    - 716                          '142Ce': 0.11114,
    - 717                          'Pr': 1.0,
    - 718                          'Nd': 0.27152,
    - 719                          '143Nd': 0.12174,
    - 720                          '144Nd': 0.23798,
    - 721                          '145Nd': 0.08293,
    - 722                          '146Nd': 0.17189,
    - 723                          '148Nd': 0.05756,
    - 724                          '150Nd': 0.05638,
    - 725                          '145Pm': 0,
    - 726                          '147Pm': 0,
    - 727                          '144Sm': 0.0307,
    - 728                          '147Sm': 0.1499,
    - 729                          '148Sm': 0.1124,
    - 730                          '149Sm': 0.1382,
    - 731                          '150Sm': 0.0738,
    - 732                          'Sm': 0.2675,
    - 733                          '154Sm': 0.2275,
    - 734                          '151Eu': 0.4781,
    - 735                          'Eu': 0.5219,
    - 736                          '152Gd': 0.0020,
    - 737                          '154Gd': 0.0218,
    - 738                          '155Gd': 0.1480,
    - 739                          '156Gd': 0.2047,
    - 740                          '157Gd': 0.1565,
    - 741                          'Gd': 0.2484,
    - 742                          '160Gd': 0.2186,
    - 743                          'Tb': 1.0,
    - 744                          '156Dy': 0.00056,
    - 745                          '158Dy': 0.00095,
    - 746                          '160Dy': 0.02329,
    - 747                          '161Dy': 0.18889,
    - 748                          '162Dy': 0.25475,
    - 749                          '163Dy': 0.24896,
    - 750                          'Dy': 0.28260,
    - 751                          'Ho': 1.0,
    - 752                          '162Er': 0.00139,
    - 753                          '164Er': 0.01601,
    - 754                          'Er': 0.33503,
    - 755                          '167Er': 0.22869,
    - 756                          '168Er': 0.26978,
    - 757                          '170Er': 0.14910,
    - 758                          'Tm': 1.0,
    - 759                          '168Yb': 0.00123,
    - 760                          '170Yb': 0.02982,
    - 761                          '171Yb': 0.1409,
    - 762                          '172Yb': 0.2168,
    - 763                          '173Yb': 0.16103,
    - 764                          'Yb': 0.32026,
    - 765                          '176Yb': 0.12996,
    - 766                          'Lu': 0.97401,
    - 767                          '176Lu': 0.02599,
    - 768                          '174Hf': 0.0016,
    - 769                          '176Hf': 0.0526,
    - 770                          '177Hf': 0.1860,
    - 771                          '178Hf': 0.2728,
    - 772                          '179Hf': 0.1362,
    - 773                          'Hf': 0.3508,
    - 774                          '180Ta': 0.0001201,
    - 775                          'Ta': 0.9998799,
    - 776                          '180W': 0.0012,
    - 777                          '182W': 0.2650,
    - 778                          '183W': 0.1431,
    - 779                          'W': 0.3064,
    - 780                          '186W': 0.2843,
    - 781                          '185Re': 0.3740,
    - 782                          'Re': 0.6260,
    - 783                          '184Os': 0.0002,
    - 784                          '186Os': 0.0159,
    - 785                          '187Os': 0.0196,
    - 786                          '188Os': 0.1324,
    - 787                          '189Os': 0.1615,
    - 788                          '190Os': 0.2626,
    - 789                          'Os': 0.4078,
    - 790                          '191Ir': 0.373,
    - 791                          'Ir': 0.627,
    - 792                          '190Pt': 0.00012,
    - 793                          '192Pt': 0.00782,
    - 794                          '194Pt': 0.3286,
    - 795                          'Pt': 0.3378,
    - 796                          '196Pt': 0.2521,
    - 797                          '198Pt': 0.07356,
    - 798                          'Au': 1.0,
    - 799                          '196Hg': 0.0015,
    - 800                          '198Hg': 0.0997,
    - 801                          '199Hg': 0.16872,
    - 802                          '200Hg': 0.2310,
    - 803                          '201Hg': 0.1318,
    - 804                          'Hg': 0.2986,
    - 805                          '204Hg': 0.0687,
    - 806                          '203Tl': 0.2952,
    - 807                          'Tl': 0.7048,
    - 808                          '204Pb': 0.014,
    - 809                          '206Pb': 0.241,
    - 810                          '207Pb': 0.221,
    - 811                          'Pb': 0.524,
    - 812                          'Bi': 1.0,
    - 813                          '209Po': 0,
    - 814                          '210Po': 0,
    - 815                          '210At': 0,
    - 816                          '211At': 0,
    - 817                          '211Rn': 0,
    - 818                          '220Rn': 0,
    - 819                          '222Rn': 0,
    - 820                          '223Fr': 0,
    - 821                          '223Ra': 0,
    - 822                          '224Ra': 0,
    - 823                          '226Ra': 0,
    - 824                          '228Ra': 0,
    - 825                          '227Ac': 0,
    - 826                          '230Th': 0,
    - 827                          'Th': 1.0,
    - 828                          'Pa': 1.0,
    - 829                          '233U': 0,
    - 830                          '234U': 0.000054,
    - 831                          '235U': 0.007204,
    - 832                          '236U': 0,
    - 833                          'U': 0.992742,
    - 834                          '236Np': 0,
    - 835                          '237Np': 0,
    - 836                          '238Pu': 0,
    - 837                          '239Pu': 0,
    - 838                          '240Pu': 0,
    - 839                          '241Pu': 0,
    - 840                          '242Pu': 0,
    - 841                          '244Pu': 0,
    - 842                          '241Am': 0,
    - 843                          '243Am': 0,
    - 844                          '243Cm': 0,
    - 845                          '244Cm': 0,
    - 846                          '245Cm': 0,
    - 847                          '246Cm': 0,
    - 848                          '247Cm': 0,
    - 849                          '248Cm': 0,
    - 850                          '247Bk': 0,
    - 851                          '249Bk': 0,
    - 852                          '249Cf': 0,
    - 853                          '250Cf': 0,
    - 854                          '251Cf': 0,
    - 855                          '252Cf': 0,
    - 856                          '252Es': 0,
    - 857                          '257Fm': 0,
    - 858                          '258Md': 0,
    - 859                          '260Md': 0,
    - 860                          '259No': 0,
    - 861                          '262Lr': 0,
    - 862                          '267Rf': 0,
    - 863                          '268Db': 0,
    - 864                          '271Sg': 0,
    - 865                          '272Bh': 0,
    - 866                          '270Hs': 0,
    - 867                          '276Mt': 0,
    - 868                          '281Ds': 0,
    - 869                          '280Rg': 0,
    - 870                          '285Cn': 0,
    - 871                          '284Nh': 0,
    - 872                          '289Fl': 0,
    - 873                          '288Mc': 0,
    - 874                          '293Lv': 0,
    - 875                          '292Ts': 0,
    - 876                          '294Og': 0}
    - 877    
    - 878    # Isotopes here is a dictionary of symbol, including full name, 
    - 879    # and then the isotopes which arent the most abundant one, sorted by abundance.
    - 880    # None indicates no stable isotopes/naturally occuring ones. 
    - 881    # This has been manually checked as far as Iodine only.
    - 882    isotopes = {'H': ['Hydrogen', ['D','T']],
    - 883                'He': ['Helium', ['3He']],
    - 884                'Li': ['Lithium', ['6Li']],
    - 885                'Be': ['Beryllium', [None]],
    - 886                'B': ['Boron', ['10B']],
    - 887                'C': ['Carbon', ['13C']],
    - 888                'N': ['Nitrogen', ['15N']],
    - 889                'O': ['Oxygen', ['18O', '17O']],
    - 890                'F': ['Fluorine', [None]],
    - 891                'Ne': ['Neon', ['22Ne', '21Ne']],
    - 892
    - 893                'Na': ['Sodium', [None]],
    - 894                'Mg': ['Magnesium', ['26Mg', '25Mg']],
    - 895                'Al': ['Aluminum', [None]],
    - 896                'Si': ['Silicon', ['29Si', '30Si']],
    - 897                'P': ['Phosphorus', [None]],
    - 898                'S': ['Sulfur', ['34S','33S','36S']],
    - 899                'Cl': ['Chlorine', ['37Cl']],
    - 900                'Ar': ['Argon', ['36Ar','38Ar']],
    - 901                
    - 902                'K': ['Potassium', ['41K', '40K']],
    - 903                'Ca': ['Calcium', ['44Ca', '48Ca','43Ca','42Ca','46Ca']],
    - 904                'Sc': ['Scandium', [None]],
    - 905                'Ti': ['Titanium', ['46Ti', '47Ti', '49Ti', '50Ti']],
    - 906                'V': ['Vanadium', ['50V']],
    - 907                'Cr': ['Chromium', ['53Cr', '50Cr', '54Cr']],
    - 908                'Mn': ['Manganese', [None]],
    - 909                'Fe': ['Iron', ['54Fe', '57Fe', '58Fe']],
    - 910                'Co': ['Cobalt', [None]],
    - 911                'Ni': ['Nickel', ['60Ni', '62Ni', '61Ni','64Ni']],
    - 912                'Cu': ['Copper', ['65Cu']],
    - 913                'Zn': ['Zinc', ['66Zn', '68Zn', '67Zn', '70Zn']],
    - 914                'Ga': ['Gallium', ['71Ga']],
    - 915                'Ge': ['Germanium', ['72Ge', '70Ge', '73Ge', '76Ge']],
    - 916                'As': ['Arsenic', [None]],
    - 917                'Se': ['Selenium', ['78Se', '76Se', '82Se', '77Se','74Se']],
    - 918                'Br': ['Bromine', ['81Br']],
    - 919                'Kr': ['Krypton', ['86Kr', '82Kr', '83Kr', '80Kr']],
    - 920
    - 921                'Rb': ['Rubidium', ['87Rb']],
    - 922                'Sr': ['Strontium', ['86Sr', '87Sr','84Sr']],
    - 923                'Y': ['Yttrium', [None]],
    - 924                'Zr': ['Zirconium', ['94Zr', '92Zr', '91Zr', '96Zr']],
    - 925                'Nb': ['Niobium', [None]],
    - 926                'Mo': ['Molybdenum', ['96Mo', '95Mo', '92Mo', '100Mo', '97Mo', '94Mo']],
    - 927                'Tc': ['Technetium', [None]], #consider removing
    - 928                'Ru': ['Ruthenium', ['104Ru', '101Ru', '99Ru', '100Ru', '96Ru', '98Ru']],
    - 929                'Rh': ['Rhodium', [None]],
    - 930                'Pd': ['Palladium', ['108Pd', '105Pd', '110Pd', '104Pd', '102Pd']],
    - 931                'Ag': ['Silver', ['109Ag']],
    - 932                'Cd': ['Cadmium', ['114Cd', '111Cd', '110Cd', '113Cd', '116Cd', '106Cd', '108Cd']],
    - 933                'In': ['Indium', ['113In']],
    - 934                'Sn': ['Tin', ['118Sn', '116Sn', '119Sn', '117Sn', '124Sn', '122Sn', '112Sn','114Sn', '115Sn']],
    - 935                'Sb': ['Antimony', ['123Sb']],
    - 936                'Te': ['Tellurium', ['128Te', '126Te', '125Te', '124Te', '122Te','123Te','120Te']],
    - 937                'I': ['Iodine', [None]],
    - 938                'Xe': ['Xenon', ['129Xe','131Xe', '134Xe', '136Xe', '130Xe', '128Xe']],
    - 939
    - 940                'Cs': ['Cesium', [None]],
    - 941                'Ba': ['Barium', ['137Ba', '136Ba', '135Ba', '134Ba']],
    - 942                'La': ['Lanthanum',['138La']],
    - 943                'Hf': ['Hafnium', ['178Hf', '177Hf', '179Hf', '176Hf']],
    - 944                'Ta': ['Tantalum', ['180Ta']],
    - 945                'W': ['Tungsten', ['186W', '182W', '183W']],
    - 946                'Re': ['Rhenium', ['185Re']],
    - 947                'Os': ['Osmium', ['190Os', '189Os', '188Os', '187Os', '186Os']],
    - 948                'Ir': ['Iridium', ['191Ir']],
    - 949                'Pt': ['Platinum', ['194Pt', '196Pt', '198Pt', '192Pt']],
    - 950                'Au': ['Gold', [None]],
    - 951                'Hg': ['Mercury', ['200Hg', '199Hg', '201Hg', '198Hg', '204Hg']],
    - 952                'Tl': ['Thallium', ['203Tl']],
    - 953                'Pb': ['Lead', ['206Pb', '207Pb', '204Pb']],
    - 954                'Bi': ['Bismuth', [None]],
    - 955                'Po': ['Polonium', [None]],
    - 956                'At': ['Astatine', [None]],
    - 957                'Rn': ['Radon', [None]],
    - 958
    - 959                'Fr': ['Francium', [None]],
    - 960                'Ra': ['Radium', [None]],
    - 961                'Ac': ['Actinium', [None]],
    - 962                'Rf': ['Rutherfordium', [None]],
    - 963                'Db': ['Dubnium', [None]],
    - 964                'Sg': ['Seaborgium', [None]],
    - 965                'Bh': ['Bohrium', [None]],
    - 966                'Hs': ['Hassium', [None]],
    - 967                'Mt': ['Meitnerium', [None]],
    - 968                'Ds': ['Darmstadtium', [None]],
    - 969                'Rg': ['Roentgenium', [None]],
    - 970                'Cn': ['Copernicium', [None]],
    - 971                'Nh': ['Nihonium', [None]],
    - 972                'Fl': ['Flerovium', [None]],
    - 973                'Mc': ['Moscovium', [None]],
    - 974                'Lv': ['Livermorium', [None]],
    - 975                'Ts': ['Tennessine', [None]],
    - 976                'Og': ['Oganesson', [None]],
    - 977
    - 978                'Ce': ['Cerium', ['142Ce','138Ce''136Ce']],
    - 979                'Pr': ['Praseodymium', [None]],
    - 980                'Nd': ['Neodymium', [None]],
    - 981                'Pm': ['Promethium', [None]],
    - 982                'Sm': ['Samarium', [None]],
    - 983                'Eu': ['Europium', [None]],
    - 984                'Gd': ['Gadolinium', [None]],
    - 985                'Tb': ['Terbium', [None]],
    - 986                'Dy': ['Dysprosium', [None]],
    - 987                'Ho': ['Holmium', [None]],
    - 988                'Er': ['Erbium', [None]],
    - 989                'Tm': ['Thulium', [None]],
    - 990                'Yb': ['Ytterbium', [None]],
    - 991                'Lu': ['Lutetium', ['176Lu']],
    - 992
    - 993                'Th': ['Thorium', [None]],
    - 994                'Pa': ['Protactinium', [None]],
    - 995                'U': ['Uranium', ['235U','234U']],
    - 996                'Np': ['Neptunium', [None]],
    - 997                'Pu': ['Plutonium', [None]],
    - 998                'Am': ['Americium', [None]],
    - 999                'Cm': ['Curium', [None]],
    -1000                'Bk': ['Berkelium', [None]],
    -1001                'Cf': ['Californium', [None]],
    -1002                'Es': ['Einsteinium', [None]],
    -1003                'Fm': ['Fermium', [None]],
    -1004                'Md': ['Mendelevium', [None]],
    -1005                'No': ['Nobelium', [None]],
    -1006                'Lr': ['Lawrencium', [None]]
    -1007                }
    +  45
    +  46class Atoms:  # pragma: no cover
    +  47    """Class for Atoms in CoreMS
    +  48
    +  49    This class includes key properties of atoms (and the electron) and isotopes, including their exact masses, relative abundances, and covalences.
    +  50    It also associates which isotopes are for the same element, and provides an ordering of elements.
    +  51
    +  52    IUPAC definition of monoisotopic mass is based on the most abundant isotopes of each element present.
    +  53    Here, we will use atom symbols with isotope numbers for all isotopes excluding the most abundant one.
    +  54    This list has been corrected up to Iodine.
    +  55
    +  56    References
    +  57    ----------
    +  58
    +  59    1. NIST - Last Accessed 2019-06-12
    +  60    https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses
    +  61
    +  62    """
    +  63
    +  64    electron_mass = 0.0005_485_799_090_65  # NIST value
    +  65
    +  66    atomic_masses = {
    +  67        "H": 1.00782503223,
    +  68        "D": 2.01410177812,
    +  69        "T": 3.0160492779,  # consider removing
    +  70        "3He": 3.0160293201,
    +  71        "He": 4.00260325413,
    +  72        "6Li": 6.0151228874,
    +  73        "Li": 7.0160034366,
    +  74        "Be": 9.012183065,
    +  75        "10B": 10.01293695,
    +  76        "B": 11.00930536,
    +  77        "C": 12.0,
    +  78        "13C": 13.00335483507,
    +  79        "14C": 14.0032419884,
    +  80        "N": 14.00307400443,
    +  81        "15N": 15.00010889888,
    +  82        "O": 15.99491461957,
    +  83        "17O": 16.99913175650,
    +  84        "18O": 17.99915961286,
    +  85        "F": 18.99840316273,
    +  86        "Ne": 19.9924401762,
    +  87        "21Ne": 20.993846685,
    +  88        "22Ne": 21.991385114,
    +  89        "Na": 22.9897692820,
    +  90        "Mg": 23.985041697,
    +  91        "25Mg": 24.985836976,
    +  92        "26Mg": 25.982592968,
    +  93        "Al": 26.98153853,
    +  94        "Si": 27.97692653465,
    +  95        "29Si": 28.97649466490,
    +  96        "30Si": 29.973770136,
    +  97        "P": 30.97376199842,
    +  98        "S": 31.9720711744,
    +  99        "33S": 32.9714589098,
    + 100        "34S": 33.967867004,
    + 101        "36S": 35.96708071,
    + 102        "Cl": 34.968852682,
    + 103        "37Cl": 36.965902602,
    + 104        "36Ar": 35.967545105,
    + 105        "38Ar": 37.96273211,
    + 106        "Ar": 39.9623831237,
    + 107        "K": 38.9637064864,
    + 108        "40K": 39.963998166,
    + 109        "41K": 40.9618252579,
    + 110        "Ca": 39.962590863,
    + 111        "42Ca": 41.95861783,
    + 112        "43Ca": 42.95876644,
    + 113        "44Ca": 43.95548156,
    + 114        "46Ca": 45.9536890,
    + 115        "48Ca": 47.95252276,
    + 116        "Sc": 44.95590828,
    + 117        "46Ti": 45.95262772,
    + 118        "47Ti": 46.95175879,
    + 119        "Ti": 47.94794198,
    + 120        "49Ti": 48.94786568,
    + 121        "50Ti": 49.94478689,
    + 122        "50V": 49.94715601,
    + 123        "V": 50.94395704,
    + 124        "50Cr": 49.94604183,
    + 125        "Cr": 51.94050623,
    + 126        "53Cr": 52.94064815,
    + 127        "54Cr": 53.93887916,
    + 128        "Mn": 54.93804391,
    + 129        "54Fe": 53.93960899,
    + 130        "Fe": 55.93493633,
    + 131        "57Fe": 56.93539284,
    + 132        "58Fe": 57.93327443,
    + 133        "Co": 58.93319429,
    + 134        "Ni": 57.93534241,
    + 135        "60Ni": 59.93078588,
    + 136        "61Ni": 60.93105557,
    + 137        "62Ni": 61.92834537,
    + 138        "64Ni": 63.92796682,
    + 139        "Cu": 62.92959772,
    + 140        "65Cu": 64.92778970,
    + 141        "Zn": 63.92914201,
    + 142        "66Zn": 65.92603381,
    + 143        "67Zn": 66.92712775,
    + 144        "68Zn": 67.92484455,
    + 145        "70Zn": 69.9253192,
    + 146        "Ga": 68.9255735,
    + 147        "71Ga": 70.92470258,
    + 148        "70Ge": 69.92424875,
    + 149        "72Ge": 71.922075826,
    + 150        "73Ge": 72.923458956,
    + 151        "Ge": 73.921177761,
    + 152        "76Ge": 75.921402726,
    + 153        "As": 74.92159457,
    + 154        "74Se": 73.922475934,
    + 155        "76Se": 75.919213704,
    + 156        "77Se": 76.919914154,
    + 157        "78Se": 77.91730928,
    + 158        "Se": 79.9165218,
    + 159        "82Se": 81.9166995,
    + 160        "Br": 78.9183376,
    + 161        "81Br": 80.9162897,
    + 162        "78Kr": 77.92036494,
    + 163        "80Kr": 79.91637808,
    + 164        "82Kr": 81.91348273,
    + 165        "83Kr": 82.91412716,
    + 166        "Kr": 83.9114977282,
    + 167        "86Kr": 85.9106106269,
    + 168        "Rb": 84.9117897379,
    + 169        "87Rb": 86.9091805310,
    + 170        "84Sr": 83.9134191,
    + 171        "86Sr": 85.9092606,
    + 172        "87Sr": 86.9088775,
    + 173        "Sr": 87.9056125,
    + 174        "Y": 88.9058403,
    + 175        "Zr": 89.9046977,
    + 176        "91Zr": 90.9056396,
    + 177        "92Zr": 91.9050347,
    + 178        "94Zr": 93.9063108,
    + 179        "96Zr": 95.9082714,
    + 180        "Nb": 92.9063730,
    + 181        "92Mo": 91.90680796,
    + 182        "94Mo": 93.90508490,
    + 183        "95Mo": 94.90583877,
    + 184        "96Mo": 95.90467612,
    + 185        "97Mo": 96.90601812,
    + 186        "Mo": 97.90540482,
    + 187        "100Mo": 99.9074718,
    + 188        #'97Tc': 96.9063667,
    + 189        #'98Tc': 97.9072124,
    + 190        "Tc": 98.9062508,  # consider removing technetium as it is radioactive
    + 191        "96Ru": 95.90759025,
    + 192        "98Ru": 97.9052868,
    + 193        "99Ru": 98.9059341,
    + 194        "100Ru": 99.9042143,
    + 195        "101Ru": 100.9055769,
    + 196        "Ru": 101.9043441,
    + 197        "104Ru": 103.9054275,
    + 198        "Rh": 102.9054980,
    + 199        "102Pd": 101.9056022,
    + 200        "104Pd": 103.9040305,
    + 201        "105Pd": 104.9050796,
    + 202        "Pd": 105.9034804,
    + 203        "108Pd": 107.9038916,
    + 204        "110Pd": 109.9051722,
    + 205        "Ag": 106.9050916,
    + 206        "109Ag": 108.9047553,
    + 207        "106Cd": 105.9064599,
    + 208        "108Cd": 107.9041834,
    + 209        "110Cd": 109.90300661,
    + 210        "111Cd": 110.90418287,
    + 211        "Cd": 111.90276287,
    + 212        "113Cd": 112.90440813,
    + 213        "114Cd": 113.90336509,
    + 214        "116Cd": 115.90476315,
    + 215        "113In": 112.90406184,
    + 216        "In": 114.903878776,
    + 217        "112Sn": 111.90482387,
    + 218        "114Sn": 113.9027827,
    + 219        "115Sn": 114.903344699,
    + 220        "116Sn": 115.90174280,
    + 221        "117Sn": 116.90295398,
    + 222        "118Sn": 117.90160657,
    + 223        "119Sn": 118.90331117,
    + 224        "Sn": 119.90220163,
    + 225        "122Sn": 121.9034438,
    + 226        "124Sn": 123.9052766,
    + 227        "Sb": 120.9038120,
    + 228        "123Sb": 122.9042132,
    + 229        "120Te": 119.9040593,
    + 230        "122Te": 121.9030435,
    + 231        "123Te": 122.9042698,
    + 232        "124Te": 123.9028171,
    + 233        "125Te": 124.9044299,
    + 234        "126Te": 125.9033109,
    + 235        "128Te": 127.90446128,
    + 236        "Te": 129.906222748,
    + 237        "I": 126.9044719,
    + 238        "124Xe": 123.9058920,
    + 239        "126Xe": 125.9042983,
    + 240        "128Xe": 127.9035310,
    + 241        "129Xe": 128.9047808611,
    + 242        "130Xe": 129.903509349,
    + 243        "131Xe": 130.90508406,
    + 244        "Xe": 131.9041550856,
    + 245        "134Xe": 133.90539466,
    + 246        "136Xe": 135.907214484,
    + 247        "Cs": 132.9054519610,
    + 248        "130Ba": 129.9063207,
    + 249        "132Ba": 131.9050611,
    + 250        "134Ba": 133.90450818,
    + 251        "135Ba": 134.90568838,
    + 252        "136Ba": 135.90457573,
    + 253        "137Ba": 136.90582714,
    + 254        "Ba": 137.90524700,
    + 255        "138La": 137.9071149,
    + 256        "La": 138.9063563,
    + 257        "136Ce": 135.90712921,
    + 258        "138Ce": 137.905991,
    + 259        "Ce": 139.9054431,
    + 260        "142Ce": 141.9092504,
    + 261        "Pr": 140.9076576,
    + 262        "Nd": 141.9077290,
    + 263        "143Nd": 142.9098200,
    + 264        "144Nd": 143.9100930,
    + 265        "145Nd": 144.9125793,
    + 266        "146Nd": 145.9131226,
    + 267        "148Nd": 147.9168993,
    + 268        "150Nd": 149.9209022,
    + 269        "145Pm": 144.9127559,
    + 270        "147Pm": 146.9151450,
    + 271        "144Sm": 143.9120065,
    + 272        "147Sm": 146.9149044,
    + 273        "148Sm": 147.9148292,
    + 274        "149Sm": 148.9171921,
    + 275        "150Sm": 149.9172829,
    + 276        "Sm": 151.9197397,
    + 277        "154Sm": 153.9222169,
    + 278        "151Eu": 150.9198578,
    + 279        "Eu": 152.9212380,
    + 280        "152Gd": 151.9197995,
    + 281        "154Gd": 153.9208741,
    + 282        "155Gd": 154.9226305,
    + 283        "156Gd": 155.9221312,
    + 284        "157Gd": 156.9239686,
    + 285        "Gd": 157.9241123,
    + 286        "160Gd": 159.9270624,
    + 287        "Tb": 158.9253547,
    + 288        "156Dy": 155.9242847,
    + 289        "158Dy": 157.9244159,
    + 290        "160Dy": 159.9252046,
    + 291        "161Dy": 160.9269405,
    + 292        "162Dy": 161.9268056,
    + 293        "163Dy": 162.9287383,
    + 294        "Dy": 163.9291819,
    + 295        "Ho": 164.9303288,
    + 296        "162Er": 161.9287884,
    + 297        "164Er": 163.9292088,
    + 298        "Er": 165.9302995,
    + 299        "167Er": 166.9320546,
    + 300        "168Er": 167.9323767,
    + 301        "170Er": 169.9354702,
    + 302        "Tm": 168.9342179,
    + 303        "168Yb": 167.9338896,
    + 304        "170Yb": 169.9347664,
    + 305        "171Yb": 170.9363302,
    + 306        "172Yb": 171.9363859,
    + 307        "173Yb": 172.9382151,
    + 308        "Yb": 173.9388664,
    + 309        "176Yb": 175.9425764,
    + 310        "Lu": 174.9407752,
    + 311        "176Lu": 175.9426897,
    + 312        "174Hf": 173.9400461,
    + 313        "176Hf": 175.9414076,
    + 314        "177Hf": 176.9432277,
    + 315        "178Hf": 177.9437058,
    + 316        "179Hf": 178.9458232,
    + 317        "Hf": 179.9465570,
    + 318        "180Ta": 179.9474648,
    + 319        "Ta": 180.9479958,
    + 320        "180W": 179.9467108,
    + 321        "182W": 181.94820394,
    + 322        "183W": 182.95022275,
    + 323        "W": 183.95093092,
    + 324        "186W": 185.9543628,
    + 325        "185Re": 184.9529545,
    + 326        "Re": 186.9557501,
    + 327        "184Os": 183.9524885,
    + 328        "186Os": 185.9538350,
    + 329        "187Os": 186.9557474,
    + 330        "188Os": 187.9558352,
    + 331        "189Os": 188.9581442,
    + 332        "190Os": 189.9584437,
    + 333        "192Os": 191.9614770,
    + 334        "191Ir": 190.9605893,
    + 335        "Ir": 192.9629216,
    + 336        "190Pt": 189.9599297,
    + 337        "192Pt": 191.9610387,
    + 338        "194Pt": 193.9626809,
    + 339        "Pt": 194.9647917,
    + 340        "196Pt": 195.96495209,
    + 341        "198Pt": 197.9678949,
    + 342        "Au": 196.96656879,
    + 343        "196Hg": 195.9658326,
    + 344        "198Hg": 197.96676860,
    + 345        "199Hg": 198.96828064,
    + 346        "200Hg": 199.96832659,
    + 347        "201Hg": 200.97030284,
    + 348        "Hg": 201.97064340,
    + 349        "204Hg": 203.97349398,
    + 350        "203Tl": 202.9723446,
    + 351        "Tl": 204.9744278,
    + 352        "204Pb": 203.9730440,
    + 353        "206Pb": 205.9744657,
    + 354        "207Pb": 206.9758973,
    + 355        "Pb": 207.9766525,
    + 356        "Bi": 208.9803991,
    + 357        "209Po": 208.9824308,
    + 358        "210Po": 209.9828741,
    + 359        "210At": 209.9871479,
    + 360        "211At": 210.9874966,
    + 361        "211Rn": 210.9906011,
    + 362        "220Rn": 220.0113941,
    + 363        "222Rn": 222.0175782,
    + 364        "223Fr": 223.0197360,
    + 365        "223Ra": 223.0185023,
    + 366        "224Ra": 224.0202120,
    + 367        "226Ra": 226.0254103,
    + 368        "228Ra": 228.0310707,
    + 369        "227Ac": 227.0277523,
    + 370        "230Th": 230.0331341,
    + 371        "Th": 232.0380558,
    + 372        "Pa": 231.0358842,
    + 373        "233U": 233.0396355,
    + 374        "234U": 234.0409523,
    + 375        "235U": 235.0439301,
    + 376        "236U": 236.0455682,
    + 377        "U": 238.0507884,
    + 378        "236Np": 236.046570,
    + 379        "237Np": 237.0481736,
    + 380        "238Pu": 238.0495601,
    + 381        "239Pu": 239.0521636,
    + 382        "240Pu": 240.0538138,
    + 383        "241Pu": 241.0568517,
    + 384        "242Pu": 242.0587428,
    + 385        "244Pu": 244.0642053,
    + 386        "241Am": 241.0568293,
    + 387        "243Am": 243.0613813,
    + 388        "243Cm": 243.0613893,
    + 389        "244Cm": 244.0627528,
    + 390        "245Cm": 245.0654915,
    + 391        "246Cm": 246.0672238,
    + 392        "247Cm": 247.0703541,
    + 393        "248Cm": 248.0723499,
    + 394        "247Bk": 247.0703073,
    + 395        "249Bk": 249.0749877,
    + 396        "249Cf": 249.0748539,
    + 397        "250Cf": 250.0764062,
    + 398        "251Cf": 251.0795886,
    + 399        "252Cf": 252.0816272,
    + 400        "252Es": 252.082980,
    + 401        "257Fm": 257.0951061,
    + 402        "258Md": 258.0984315,
    + 403        "260Md": 260.10365,
    + 404        "259No": 259.10103,
    + 405        "262Lr": 262.10961,
    + 406        "267Rf": 267.12179,
    + 407        "268Db": 268.12567,
    + 408        "271Sg": 271.13393,
    + 409        "272Bh": 272.13826,
    + 410        "270Hs": 270.13429,
    + 411        "276Mt": 276.15159,
    + 412        "281Ds": 281.16451,
    + 413        "280Rg": 280.16514,
    + 414        "285Cn": 285.17712,
    + 415        "284Nh": 284.17873,
    + 416        "289Fl": 289.19042,
    + 417        "288Mc": 288.19274,
    + 418        "293Lv": 293.20449,
    + 419        "292Ts": 292.20746,
    + 420        "294Og": 294.21392,
    + 421    }
    + 422
    + 423    # This list orders atoms, starting with the most abundant isotopes first, ordered as described.
    + 424    # Less abundant isotopes are ordered by mass at the end of the list.
    + 425    atoms_order = [
    + 426        "C",
    + 427        "H",
    + 428        "O",
    + 429        "N",
    + 430        "P",
    + 431        "S",  # CHONPS
    + 432        "F",
    + 433        "Cl",
    + 434        "Br",
    + 435        "I",
    + 436        "At",  # Halogens
    + 437        "Li",
    + 438        "Na",
    + 439        "K",
    + 440        "Rb",
    + 441        "Cs",
    + 442        "Fr",  # Alkali
    + 443        "He",
    + 444        "Ne",
    + 445        "Ar",
    + 446        "Kr",
    + 447        "Xe",
    + 448        "Rn",  # Noble gasses
    + 449        "Be",
    + 450        "B",  # Row 2
    + 451        "Mg",
    + 452        "Al",
    + 453        "Si",  # Row 3
    + 454        "Ca",
    + 455        "Sc",
    + 456        "Ti",
    + 457        "V",
    + 458        "Cr",
    + 459        "Mn",
    + 460        "Fe",
    + 461        "Co",
    + 462        "Ni",
    + 463        "Cu",
    + 464        "Zn",
    + 465        "Ga",
    + 466        "Ge",
    + 467        "As",
    + 468        "Se",  # Row 4
    + 469        "Sr",
    + 470        "Y",
    + 471        "Zr",
    + 472        "Nb",
    + 473        "Mo",
    + 474        "Tc",
    + 475        "Ru",
    + 476        "Rh",
    + 477        "Pd",
    + 478        "Ag",
    + 479        "Cd",
    + 480        "In",
    + 481        "Sn",
    + 482        "Sb",
    + 483        "Te",  # Row 5
    + 484        "Ba",
    + 485        "La",
    + 486        "Hf",
    + 487        "Ta",
    + 488        "W",
    + 489        "Re",
    + 490        "Os",
    + 491        "Ir",
    + 492        "Pt",
    + 493        "Au",
    + 494        "Hg",
    + 495        "Tl",
    + 496        "Pb",
    + 497        "Bi",
    + 498        "Po",  # Row 6
    + 499        "Ra",
    + 500        "Ac",
    + 501        "Rf",
    + 502        "Db",
    + 503        "Sg",
    + 504        "Bh",
    + 505        "Hs",
    + 506        "Mt",
    + 507        "Ds",
    + 508        "Rg",
    + 509        "Cn",
    + 510        "Nh",
    + 511        "Fl",
    + 512        "Mc",
    + 513        "Lv",
    + 514        "Ts",
    + 515        "Og",  # Row 7
    + 516        "Ce",
    + 517        "Pr",
    + 518        "Nd",
    + 519        "Pm",
    + 520        "Sm",
    + 521        "Eu",
    + 522        "Gd",
    + 523        "Tb",
    + 524        "Dy",
    + 525        "Ho",
    + 526        "Er",
    + 527        "Tm",
    + 528        "Yb",
    + 529        "Lu",  # Lanthanides
    + 530        "Th",
    + 531        "Pa",
    + 532        "U",
    + 533        "Np",
    + 534        "Pu",
    + 535        "Am",
    + 536        "Cm",
    + 537        "Bk",
    + 538        "Cf",
    + 539        "Es",
    + 540        "Fm",
    + 541        "Md",
    + 542        "No",
    + 543        "Lr",  # Actinides
    + 544        # Less abundant isotopes follow
    + 545        "D",
    + 546        "6Li",
    + 547        "10B",
    + 548        "13C",
    + 549        "15N",
    + 550        "17O",
    + 551        "18O",
    + 552        "22Ne",
    + 553        "25Mg",
    + 554        "26Mg",
    + 555        "29Si",
    + 556        "30Si",
    + 557        "33S",
    + 558        "34S",
    + 559        "36S",
    + 560        "37Cl",
    + 561        "40Ca",
    + 562        "41K",
    + 563        "44Ca",
    + 564        "46Ti",
    + 565        "47Ti",
    + 566        "49Ti",
    + 567        "50Cr",
    + 568        "50Ti",
    + 569        "50V",
    + 570        "53Cr",
    + 571        "54Cr",
    + 572        "54Fe",
    + 573        "57Fe",
    + 574        "58Fe",
    + 575        "60Ni",
    + 576        "61Ni",
    + 577        "62Ni",
    + 578        "65Cu",
    + 579        "66Zn",
    + 580        "67Zn",
    + 581        "68Zn",
    + 582        "70Ge",
    + 583        "71Ga",
    + 584        "72Ge",
    + 585        "73Ge",
    + 586        "76Ge",
    + 587        "76Se",
    + 588        "77Se",
    + 589        "78Se",
    + 590        "81Br",
    + 591        "80Kr",
    + 592        "82Kr",
    + 593        "82Se",
    + 594        "83Kr",
    + 595        "85Rb",
    + 596        "86Kr",
    + 597        "86Sr",
    + 598        "87Rb",
    + 599        "87Sr",
    + 600        "88Sr",
    + 601        "91Zr",
    + 602        "92Mo",
    + 603        "92Zr",
    + 604        "94Mo",
    + 605        "94Zr",
    + 606        "95Mo",
    + 607        "96Mo",
    + 608        "96Ru",
    + 609        "96Zr",
    + 610        "97Mo",
    + 611        "98Ru",
    + 612        "99Ru",
    + 613        "100Mo",
    + 614        "100Ru",
    + 615        "101Ru",
    + 616        "102Pd",
    + 617        "104Pd",
    + 618        "104Ru",
    + 619        "105Pd",
    + 620        "106Cd",
    + 621        "106Pd",
    + 622        "108Cd",
    + 623        "108Pd",
    + 624        "109Ag",
    + 625        "110Cd",
    + 626        "110Pd",
    + 627        "111Cd",
    + 628        "112Cd",
    + 629        "112Sn",
    + 630        "113Cd",
    + 631        "113In",
    + 632        "114Cd",
    + 633        "114Sn",
    + 634        "115In",
    + 635        "115Sn",
    + 636        "116Cd",
    + 637        "116Sn",
    + 638        "117Sn",
    + 639        "118Sn",
    + 640        "119Sn",
    + 641        "120Sn",
    + 642        "120Te",
    + 643        "121Sb",
    + 644        "122Sn",
    + 645        "122Te",
    + 646        "123Sb",
    + 647        "123Te",
    + 648        "124Sn",
    + 649        "124Te",
    + 650        "124Xe",
    + 651        "125Te",
    + 652        "126Te",
    + 653        "126Xe",
    + 654        "128Te",
    + 655        "128Xe",
    + 656        "129Xe",
    + 657        "130Ba",
    + 658        "130Te",
    + 659        "130Xe",
    + 660        "131Xe",
    + 661        "132Ba",
    + 662        "132Xe",
    + 663        "134Ba",
    + 664        "134Xe",
    + 665        "135Ba",
    + 666        "136Ba",
    + 667        "136Xe",
    + 668        "137Ba",
    + 669        "138Ba",
    + 670        "174Hf",
    + 671        "176Hf",
    + 672        "177Hf",
    + 673        "178Hf",
    + 674        "179Hf",
    + 675        "180Hf",
    + 676        "180W",
    + 677        "182W",
    + 678        "183W",
    + 679        "184Os",
    + 680        "184W",
    + 681        "185Re",
    + 682        "186Os",
    + 683        "186W",
    + 684        "187Os",
    + 685        "187Re",
    + 686        "188Os",
    + 687        "189Os",
    + 688        "190Os",
    + 689        "190Pt",
    + 690        "191Ir",
    + 691        "192Ir",
    + 692        "192Os",
    + 693        "192Pt",
    + 694        "194Pt",
    + 695        "195Pt",
    + 696        "196Hg",
    + 697        "196Pt",
    + 698        "198Hg",
    + 699        "198Pt",
    + 700        "199Hg",
    + 701        "200Hg",
    + 702        "201Hg",
    + 703        "202Hg",
    + 704        "203Tl",
    + 705        "204Hg",
    + 706        "204Pb",
    + 707        "205Tl",
    + 708        "206Pb",
    + 709        "207Pb",
    + 710        "208Pb",
    + 711    ]
    + 712
    + 713    atoms_covalence = {
    + 714        "C": (4),
    + 715        "13C": (4),
    + 716        "N": (3),
    + 717        "O": (2),
    + 718        "S": (2),
    + 719        "H": (1),
    + 720        "F": (1, 0),
    + 721        "Cl": (1, 0),
    + 722        "Br": (1, 0),
    + 723        "I": (1, 0),
    + 724        "At": (1),
    + 725        "Li": (1, 0),
    + 726        "Na": (1, 0),
    + 727        "K": (1, 0),
    + 728        "Rb": (1),
    + 729        "Cs": (1),
    + 730        "Fr": (1),
    + 731        "B": (4, 3, 2, 1),
    + 732        "In": (3, 2, 1),
    + 733        "Al": (3, 1, 2),
    + 734        "P": (3, 5, 4, 2, 1),
    + 735        "Ga": (3, 1, 2),
    + 736        "Mg": (2, 1),
    + 737        "Be": (2, 1),
    + 738        "Ca": (2, 1),
    + 739        "Sr": (2, 1),
    + 740        "Ba": (2),
    + 741        "Ra": (2),
    + 742        "V": (5, 4, 3, 2, 1),
    + 743        "Fe": (3, 2, 4, 5, 6),
    + 744        "Si": (4, 3, 2),
    + 745        "Sc": (3, 2, 1),
    + 746        "Ti": (4, 3, 2, 1),
    + 747        "Cr": (1, 2, 3, 4, 5, 6),
    + 748        "Mn": (1, 2, 3, 4, 5, 6, 7),
    + 749        "Co": (1, 2, 3, 4, 5),
    + 750        "Ni": (1, 2, 3, 4),
    + 751        "Cu": (2, 1, 3, 4),
    + 752        "Zn": (2, 1),
    + 753        "Ge": (4, 3, 2, 1),
    + 754        "As": (5, 3, 2, 1),
    + 755        "Se": (6, 4, 2, 1),
    + 756        "Y": (3, 2, 1),
    + 757        "Zr": (4, 3, 2, 1),
    + 758        "Nb": (5, 4, 3, 2, 1),
    + 759        "Mo": (6, 5, 4, 3, 2, 1),
    + 760        "Tc": (7, 6, 5, 4, 3, 2, 1),
    + 761        "Ru": (8, 7, 6, 5, 4, 3, 2, 1),
    + 762        "Rh": (6, 5, 4, 3, 2, 1),
    + 763        "Pd": (4, 2, 1),
    + 764        "Ag": (0, 1, 2, 3, 4),
    + 765        "Cd": (2, 1),
    + 766        "Sn": (4, 2),
    + 767        "Sb": (5, 3),
    + 768        "Te": (6, 5, 4, 2),
    + 769        "La": (3, 2),
    + 770        "Hf": (4, 3, 2),
    + 771        "Ta": (5, 4, 3, 2),
    + 772        "W": (6, 5, 4, 3, 2, 1),
    + 773        "Re": (4, 7, 6, 5, 3, 2, 1),
    + 774        "Os": (4, 8, 7, 6, 5, 3, 2, 1),
    + 775        "Ir": (4, 8, 6, 5, 3, 2, 1),
    + 776        "Pt": (4, 6, 5, 3, 2, 1),
    + 777        "Au": (3, 5, 2, 1),
    + 778        "Hg": (1, 2, 4),
    + 779        "Tl": (3, 1),
    + 780        "Pb": (4, 2),
    + 781        "Bi": (3, 1, 5),
    + 782        "Po": (2, 4, 6),
    + 783        "Ac": (3, 2),
    + 784    }
    + 785
    + 786    isotopic_abundance = {
    + 787        "H": 0.999885,
    + 788        "D": 0.000115,
    + 789        "T": 0,  # Consider removing.
    + 790        "3He": 0.00000134,
    + 791        "He": 0.99999866,
    + 792        "6Li": 0.0759,
    + 793        "Li": 0.9241,
    + 794        "Be": 1.0,
    + 795        "10B": 0.199,
    + 796        "B": 0.801,
    + 797        "C": 0.9893,
    + 798        "13C": 0.0107,
    + 799        "14C": 0,
    + 800        "N": 0.99636,
    + 801        "15N": 0.00364,
    + 802        "O": 0.99757,
    + 803        "17O": 0.00038,
    + 804        "18O": 0.00205,
    + 805        "F": 1.0,
    + 806        "Ne": 0.9048,
    + 807        "21Ne": 0.0027,
    + 808        "22Ne": 0.0925,
    + 809        "Na": 1.0,
    + 810        "Mg": 0.7899,
    + 811        "25Mg": 0.1000,
    + 812        "26Mg": 0.1101,
    + 813        "Al": 1.0,
    + 814        "Si": 0.92223,
    + 815        "29Si": 0.04685,
    + 816        "30Si": 0.03092,
    + 817        "P": 1.0,
    + 818        "S": 0.9499,
    + 819        "33S": 0.0075,
    + 820        "34S": 0.0425,
    + 821        "36S": 0.0001,
    + 822        "Cl": 0.7576,
    + 823        "37Cl": 0.2424,
    + 824        "36Ar": 0.003336,
    + 825        "38Ar": 0.000629,
    + 826        "Ar": 0.996035,
    + 827        "K": 0.932581,
    + 828        "40K": 0.000117,
    + 829        "41K": 0.067302,
    + 830        "Ca": 0.96941,
    + 831        "42Ca": 0.00647,
    + 832        "43Ca": 0.00135,
    + 833        "44Ca": 0.02086,
    + 834        "46Ca": 0.00004,
    + 835        "48Ca": 0.001872,
    + 836        "Sc": 1.0,
    + 837        "46Ti": 0.0825,
    + 838        "47Ti": 0.0744,
    + 839        "Ti": 0.7372,
    + 840        "49Ti": 0.0541,
    + 841        "50Ti": 0.0518,
    + 842        "50V": 0.00250,
    + 843        "V": 0.9975,
    + 844        "50Cr": 0.04345,
    + 845        "Cr": 0.83789,
    + 846        "53Cr": 0.09501,
    + 847        "54Cr": 0.02365,
    + 848        "Mn": 1.0,
    + 849        "54Fe": 0.05845,
    + 850        "Fe": 0.91754,
    + 851        "57Fe": 0.02119,
    + 852        "58Fe": 0.00282,
    + 853        "Co": 1.0,
    + 854        "Ni": 0.68077,
    + 855        "60Ni": 0.26223,
    + 856        "61Ni": 0.011399,
    + 857        "62Ni": 0.036346,
    + 858        "64Ni": 0.009255,
    + 859        "Cu": 0.6915,
    + 860        "65Cu": 0.3085,
    + 861        "Zn": 0.4917,
    + 862        "66Zn": 0.2773,
    + 863        "67Zn": 0.0404,
    + 864        "68Zn": 0.1845,
    + 865        "70Zn": 0.0061,
    + 866        "Ga": 0.60108,
    + 867        "71Ga": 0.39892,
    + 868        "70Ge": 0.2057,
    + 869        "72Ge": 0.2745,
    + 870        "73Ge": 0.0775,
    + 871        "Ge": 0.3650,
    + 872        "76Ge": 0.0773,
    + 873        "As": 1.0,
    + 874        "74Se": 0.0089,
    + 875        "76Se": 0.0937,
    + 876        "77Se": 0.0763,
    + 877        "78Se": 0.2377,
    + 878        "Se": 0.4961,
    + 879        "82Se": 0.0873,
    + 880        "Br": 0.5069,
    + 881        "81Br": 0.4931,
    + 882        "78Kr": 0.00355,
    + 883        "80Kr": 0.02286,
    + 884        "82Kr": 0.11593,
    + 885        "83Kr": 0.11500,
    + 886        "Kr": 0.56987,
    + 887        "86Kr": 0.17279,
    + 888        "Rb": 0.7217,
    + 889        "87Rb": 0.2783,
    + 890        "84Sr": 0.0056,
    + 891        "86Sr": 0.0986,
    + 892        "87Sr": 0.0700,
    + 893        "Sr": 0.8258,
    + 894        "Y": 1.0,
    + 895        "Zr": 0.5145,
    + 896        "91Zr": 0.1122,
    + 897        "92Zr": 0.1715,
    + 898        "94Zr": 0.1738,
    + 899        "96Zr": 0.0280,
    + 900        "Nb": 1.0,
    + 901        "92Mo": 0.1453,
    + 902        "94Mo": 0.0915,
    + 903        "95Mo": 0.1584,
    + 904        "96Mo": 0.1667,
    + 905        "97Mo": 0.0960,
    + 906        "Mo": 0.2439,
    + 907        "100Mo": 0.0982,
    + 908        "99Tc": 0,  # consider removing
    + 909        "96Ru": 0.0554,
    + 910        "98Ru": 0.0187,
    + 911        "99Ru": 0.1276,
    + 912        "100Ru": 0.1260,
    + 913        "101Ru": 0.1706,
    + 914        "Ru": 0.3155,
    + 915        "104Ru": 0.1862,
    + 916        "Rh": 1.0,
    + 917        "102Pd": 0.0102,
    + 918        "104Pd": 0.1114,
    + 919        "105Pd": 0.2233,
    + 920        "Pd": 0.2733,
    + 921        "108Pd": 0.2646,
    + 922        "110Pd": 0.1172,
    + 923        "Ag": 0.51839,
    + 924        "109Ag": 0.48161,
    + 925        "106Cd": 0.0125,
    + 926        "108Cd": 0.0089,
    + 927        "110Cd": 0.1249,
    + 928        "111Cd": 0.1280,
    + 929        "Cd": 0.2413,
    + 930        "113Cd": 0.1222,
    + 931        "114Cd": 0.2873,
    + 932        "116Cd": 0.0749,
    + 933        "113In": 0.0429,
    + 934        "In": 0.9571,
    + 935        "112Sn": 0.0097,
    + 936        "114Sn": 0.0066,
    + 937        "115Sn": 0.0034,
    + 938        "116Sn": 0.1454,
    + 939        "117Sn": 0.0768,
    + 940        "118Sn": 0.2422,
    + 941        "119Sn": 0.0859,
    + 942        "Sn": 0.3258,
    + 943        "122Sn": 0.0463,
    + 944        "124Sn": 0.0579,
    + 945        "Sb": 0.5721,
    + 946        "123Sb": 0.4279,
    + 947        "120Te": 0.0009,
    + 948        "122Te": 0.0255,
    + 949        "123Te": 0.0089,
    + 950        "124Te": 0.0474,
    + 951        "125Te": 0.0707,
    + 952        "126Te": 0.1884,
    + 953        "128Te": 0.3174,
    + 954        "Te": 0.3408,
    + 955        "I": 1.0,
    + 956        "124Xe": 0.000952,
    + 957        "126Xe": 0.000890,
    + 958        "128Xe": 0.019102,
    + 959        "129Xe": 0.264006,
    + 960        "130Xe": 0.040710,
    + 961        "131Xe": 0.212324,
    + 962        "Xe": 0.269086,
    + 963        "134Xe": 0.104357,
    + 964        "136Xe": 0.088573,
    + 965        "Cs": 1.0,
    + 966        "130Ba": 0.00106,
    + 967        "132Ba": 0.00101,
    + 968        "134Ba": 0.02417,
    + 969        "135Ba": 0.06592,
    + 970        "136Ba": 0.07854,
    + 971        "137Ba": 0.11232,
    + 972        "Ba": 0.71698,
    + 973        "138La": 0.0008881,
    + 974        "La": 0.9991119,
    + 975        "136Ce": 0.00185,
    + 976        "138Ce": 0.00251,
    + 977        "Ce": 0.88450,
    + 978        "142Ce": 0.11114,
    + 979        "Pr": 1.0,
    + 980        "Nd": 0.27152,
    + 981        "143Nd": 0.12174,
    + 982        "144Nd": 0.23798,
    + 983        "145Nd": 0.08293,
    + 984        "146Nd": 0.17189,
    + 985        "148Nd": 0.05756,
    + 986        "150Nd": 0.05638,
    + 987        "145Pm": 0,
    + 988        "147Pm": 0,
    + 989        "144Sm": 0.0307,
    + 990        "147Sm": 0.1499,
    + 991        "148Sm": 0.1124,
    + 992        "149Sm": 0.1382,
    + 993        "150Sm": 0.0738,
    + 994        "Sm": 0.2675,
    + 995        "154Sm": 0.2275,
    + 996        "151Eu": 0.4781,
    + 997        "Eu": 0.5219,
    + 998        "152Gd": 0.0020,
    + 999        "154Gd": 0.0218,
    +1000        "155Gd": 0.1480,
    +1001        "156Gd": 0.2047,
    +1002        "157Gd": 0.1565,
    +1003        "Gd": 0.2484,
    +1004        "160Gd": 0.2186,
    +1005        "Tb": 1.0,
    +1006        "156Dy": 0.00056,
    +1007        "158Dy": 0.00095,
    +1008        "160Dy": 0.02329,
    +1009        "161Dy": 0.18889,
    +1010        "162Dy": 0.25475,
    +1011        "163Dy": 0.24896,
    +1012        "Dy": 0.28260,
    +1013        "Ho": 1.0,
    +1014        "162Er": 0.00139,
    +1015        "164Er": 0.01601,
    +1016        "Er": 0.33503,
    +1017        "167Er": 0.22869,
    +1018        "168Er": 0.26978,
    +1019        "170Er": 0.14910,
    +1020        "Tm": 1.0,
    +1021        "168Yb": 0.00123,
    +1022        "170Yb": 0.02982,
    +1023        "171Yb": 0.1409,
    +1024        "172Yb": 0.2168,
    +1025        "173Yb": 0.16103,
    +1026        "Yb": 0.32026,
    +1027        "176Yb": 0.12996,
    +1028        "Lu": 0.97401,
    +1029        "176Lu": 0.02599,
    +1030        "174Hf": 0.0016,
    +1031        "176Hf": 0.0526,
    +1032        "177Hf": 0.1860,
    +1033        "178Hf": 0.2728,
    +1034        "179Hf": 0.1362,
    +1035        "Hf": 0.3508,
    +1036        "180Ta": 0.0001201,
    +1037        "Ta": 0.9998799,
    +1038        "180W": 0.0012,
    +1039        "182W": 0.2650,
    +1040        "183W": 0.1431,
    +1041        "W": 0.3064,
    +1042        "186W": 0.2843,
    +1043        "185Re": 0.3740,
    +1044        "Re": 0.6260,
    +1045        "184Os": 0.0002,
    +1046        "186Os": 0.0159,
    +1047        "187Os": 0.0196,
    +1048        "188Os": 0.1324,
    +1049        "189Os": 0.1615,
    +1050        "190Os": 0.2626,
    +1051        "Os": 0.4078,
    +1052        "191Ir": 0.373,
    +1053        "Ir": 0.627,
    +1054        "190Pt": 0.00012,
    +1055        "192Pt": 0.00782,
    +1056        "194Pt": 0.3286,
    +1057        "Pt": 0.3378,
    +1058        "196Pt": 0.2521,
    +1059        "198Pt": 0.07356,
    +1060        "Au": 1.0,
    +1061        "196Hg": 0.0015,
    +1062        "198Hg": 0.0997,
    +1063        "199Hg": 0.16872,
    +1064        "200Hg": 0.2310,
    +1065        "201Hg": 0.1318,
    +1066        "Hg": 0.2986,
    +1067        "204Hg": 0.0687,
    +1068        "203Tl": 0.2952,
    +1069        "Tl": 0.7048,
    +1070        "204Pb": 0.014,
    +1071        "206Pb": 0.241,
    +1072        "207Pb": 0.221,
    +1073        "Pb": 0.524,
    +1074        "Bi": 1.0,
    +1075        "209Po": 0,
    +1076        "210Po": 0,
    +1077        "210At": 0,
    +1078        "211At": 0,
    +1079        "211Rn": 0,
    +1080        "220Rn": 0,
    +1081        "222Rn": 0,
    +1082        "223Fr": 0,
    +1083        "223Ra": 0,
    +1084        "224Ra": 0,
    +1085        "226Ra": 0,
    +1086        "228Ra": 0,
    +1087        "227Ac": 0,
    +1088        "230Th": 0,
    +1089        "Th": 1.0,
    +1090        "Pa": 1.0,
    +1091        "233U": 0,
    +1092        "234U": 0.000054,
    +1093        "235U": 0.007204,
    +1094        "236U": 0,
    +1095        "U": 0.992742,
    +1096        "236Np": 0,
    +1097        "237Np": 0,
    +1098        "238Pu": 0,
    +1099        "239Pu": 0,
    +1100        "240Pu": 0,
    +1101        "241Pu": 0,
    +1102        "242Pu": 0,
    +1103        "244Pu": 0,
    +1104        "241Am": 0,
    +1105        "243Am": 0,
    +1106        "243Cm": 0,
    +1107        "244Cm": 0,
    +1108        "245Cm": 0,
    +1109        "246Cm": 0,
    +1110        "247Cm": 0,
    +1111        "248Cm": 0,
    +1112        "247Bk": 0,
    +1113        "249Bk": 0,
    +1114        "249Cf": 0,
    +1115        "250Cf": 0,
    +1116        "251Cf": 0,
    +1117        "252Cf": 0,
    +1118        "252Es": 0,
    +1119        "257Fm": 0,
    +1120        "258Md": 0,
    +1121        "260Md": 0,
    +1122        "259No": 0,
    +1123        "262Lr": 0,
    +1124        "267Rf": 0,
    +1125        "268Db": 0,
    +1126        "271Sg": 0,
    +1127        "272Bh": 0,
    +1128        "270Hs": 0,
    +1129        "276Mt": 0,
    +1130        "281Ds": 0,
    +1131        "280Rg": 0,
    +1132        "285Cn": 0,
    +1133        "284Nh": 0,
    +1134        "289Fl": 0,
    +1135        "288Mc": 0,
    +1136        "293Lv": 0,
    +1137        "292Ts": 0,
    +1138        "294Og": 0,
    +1139    }
    +1140
    +1141    # Isotopes here is a dictionary of symbol, including full name,
    +1142    # and then the isotopes which arent the most abundant one, sorted by abundance.
    +1143    # None indicates no stable isotopes/naturally occuring ones.
    +1144    # This has been manually checked as far as Iodine only.
    +1145    isotopes = {
    +1146        "H": ["Hydrogen", ["D", "T"]],
    +1147        "He": ["Helium", ["3He"]],
    +1148        "Li": ["Lithium", ["6Li"]],
    +1149        "Be": ["Beryllium", [None]],
    +1150        "B": ["Boron", ["10B"]],
    +1151        "C": ["Carbon", ["13C"]],
    +1152        "N": ["Nitrogen", ["15N"]],
    +1153        "O": ["Oxygen", ["18O", "17O"]],
    +1154        "F": ["Fluorine", [None]],
    +1155        "Ne": ["Neon", ["22Ne", "21Ne"]],
    +1156        "Na": ["Sodium", [None]],
    +1157        "Mg": ["Magnesium", ["26Mg", "25Mg"]],
    +1158        "Al": ["Aluminum", [None]],
    +1159        "Si": ["Silicon", ["29Si", "30Si"]],
    +1160        "P": ["Phosphorus", [None]],
    +1161        "S": ["Sulfur", ["34S", "33S", "36S"]],
    +1162        "Cl": ["Chlorine", ["37Cl"]],
    +1163        "Ar": ["Argon", ["36Ar", "38Ar"]],
    +1164        "K": ["Potassium", ["41K", "40K"]],
    +1165        "Ca": ["Calcium", ["44Ca", "48Ca", "43Ca", "42Ca", "46Ca"]],
    +1166        "Sc": ["Scandium", [None]],
    +1167        "Ti": ["Titanium", ["46Ti", "47Ti", "49Ti", "50Ti"]],
    +1168        "V": ["Vanadium", ["50V"]],
    +1169        "Cr": ["Chromium", ["53Cr", "50Cr", "54Cr"]],
    +1170        "Mn": ["Manganese", [None]],
    +1171        "Fe": ["Iron", ["54Fe", "57Fe", "58Fe"]],
    +1172        "Co": ["Cobalt", [None]],
    +1173        "Ni": ["Nickel", ["60Ni", "62Ni", "61Ni", "64Ni"]],
    +1174        "Cu": ["Copper", ["65Cu"]],
    +1175        "Zn": ["Zinc", ["66Zn", "68Zn", "67Zn", "70Zn"]],
    +1176        "Ga": ["Gallium", ["71Ga"]],
    +1177        "Ge": ["Germanium", ["72Ge", "70Ge", "73Ge", "76Ge"]],
    +1178        "As": ["Arsenic", [None]],
    +1179        "Se": ["Selenium", ["78Se", "76Se", "82Se", "77Se", "74Se"]],
    +1180        "Br": ["Bromine", ["81Br"]],
    +1181        "Kr": ["Krypton", ["86Kr", "82Kr", "83Kr", "80Kr"]],
    +1182        "Rb": ["Rubidium", ["87Rb"]],
    +1183        "Sr": ["Strontium", ["86Sr", "87Sr", "84Sr"]],
    +1184        "Y": ["Yttrium", [None]],
    +1185        "Zr": ["Zirconium", ["94Zr", "92Zr", "91Zr", "96Zr"]],
    +1186        "Nb": ["Niobium", [None]],
    +1187        "Mo": ["Molybdenum", ["96Mo", "95Mo", "92Mo", "100Mo", "97Mo", "94Mo"]],
    +1188        "Tc": ["Technetium", [None]],  # consider removing
    +1189        "Ru": ["Ruthenium", ["104Ru", "101Ru", "99Ru", "100Ru", "96Ru", "98Ru"]],
    +1190        "Rh": ["Rhodium", [None]],
    +1191        "Pd": ["Palladium", ["108Pd", "105Pd", "110Pd", "104Pd", "102Pd"]],
    +1192        "Ag": ["Silver", ["109Ag"]],
    +1193        "Cd": [
    +1194            "Cadmium",
    +1195            ["114Cd", "111Cd", "110Cd", "113Cd", "116Cd", "106Cd", "108Cd"],
    +1196        ],
    +1197        "In": ["Indium", ["113In"]],
    +1198        "Sn": [
    +1199            "Tin",
    +1200            [
    +1201                "118Sn",
    +1202                "116Sn",
    +1203                "119Sn",
    +1204                "117Sn",
    +1205                "124Sn",
    +1206                "122Sn",
    +1207                "112Sn",
    +1208                "114Sn",
    +1209                "115Sn",
    +1210            ],
    +1211        ],
    +1212        "Sb": ["Antimony", ["123Sb"]],
    +1213        "Te": [
    +1214            "Tellurium",
    +1215            ["128Te", "126Te", "125Te", "124Te", "122Te", "123Te", "120Te"],
    +1216        ],
    +1217        "I": ["Iodine", [None]],
    +1218        "Xe": ["Xenon", ["129Xe", "131Xe", "134Xe", "136Xe", "130Xe", "128Xe"]],
    +1219        "Cs": ["Cesium", [None]],
    +1220        "Ba": ["Barium", ["137Ba", "136Ba", "135Ba", "134Ba"]],
    +1221        "La": ["Lanthanum", ["138La"]],
    +1222        "Hf": ["Hafnium", ["178Hf", "177Hf", "179Hf", "176Hf"]],
    +1223        "Ta": ["Tantalum", ["180Ta"]],
    +1224        "W": ["Tungsten", ["186W", "182W", "183W"]],
    +1225        "Re": ["Rhenium", ["185Re"]],
    +1226        "Os": ["Osmium", ["190Os", "189Os", "188Os", "187Os", "186Os"]],
    +1227        "Ir": ["Iridium", ["191Ir"]],
    +1228        "Pt": ["Platinum", ["194Pt", "196Pt", "198Pt", "192Pt"]],
    +1229        "Au": ["Gold", [None]],
    +1230        "Hg": ["Mercury", ["200Hg", "199Hg", "201Hg", "198Hg", "204Hg"]],
    +1231        "Tl": ["Thallium", ["203Tl"]],
    +1232        "Pb": ["Lead", ["206Pb", "207Pb", "204Pb"]],
    +1233        "Bi": ["Bismuth", [None]],
    +1234        "Po": ["Polonium", [None]],
    +1235        "At": ["Astatine", [None]],
    +1236        "Rn": ["Radon", [None]],
    +1237        "Fr": ["Francium", [None]],
    +1238        "Ra": ["Radium", [None]],
    +1239        "Ac": ["Actinium", [None]],
    +1240        "Rf": ["Rutherfordium", [None]],
    +1241        "Db": ["Dubnium", [None]],
    +1242        "Sg": ["Seaborgium", [None]],
    +1243        "Bh": ["Bohrium", [None]],
    +1244        "Hs": ["Hassium", [None]],
    +1245        "Mt": ["Meitnerium", [None]],
    +1246        "Ds": ["Darmstadtium", [None]],
    +1247        "Rg": ["Roentgenium", [None]],
    +1248        "Cn": ["Copernicium", [None]],
    +1249        "Nh": ["Nihonium", [None]],
    +1250        "Fl": ["Flerovium", [None]],
    +1251        "Mc": ["Moscovium", [None]],
    +1252        "Lv": ["Livermorium", [None]],
    +1253        "Ts": ["Tennessine", [None]],
    +1254        "Og": ["Oganesson", [None]],
    +1255        "Ce": ["Cerium", ["142Ce", "138Ce" "136Ce"]],
    +1256        "Pr": ["Praseodymium", [None]],
    +1257        "Nd": ["Neodymium", [None]],
    +1258        "Pm": ["Promethium", [None]],
    +1259        "Sm": ["Samarium", [None]],
    +1260        "Eu": ["Europium", [None]],
    +1261        "Gd": ["Gadolinium", [None]],
    +1262        "Tb": ["Terbium", [None]],
    +1263        "Dy": ["Dysprosium", [None]],
    +1264        "Ho": ["Holmium", [None]],
    +1265        "Er": ["Erbium", [None]],
    +1266        "Tm": ["Thulium", [None]],
    +1267        "Yb": ["Ytterbium", [None]],
    +1268        "Lu": ["Lutetium", ["176Lu"]],
    +1269        "Th": ["Thorium", [None]],
    +1270        "Pa": ["Protactinium", [None]],
    +1271        "U": ["Uranium", ["235U", "234U"]],
    +1272        "Np": ["Neptunium", [None]],
    +1273        "Pu": ["Plutonium", [None]],
    +1274        "Am": ["Americium", [None]],
    +1275        "Cm": ["Curium", [None]],
    +1276        "Bk": ["Berkelium", [None]],
    +1277        "Cf": ["Californium", [None]],
    +1278        "Es": ["Einsteinium", [None]],
    +1279        "Fm": ["Fermium", [None]],
    +1280        "Md": ["Mendelevium", [None]],
    +1281        "No": ["Nobelium", [None]],
    +1282        "Lr": ["Lawrencium", [None]],
    +1283    }
     
    @@ -1172,46 +1448,49 @@

    -
     2class Labels: #pragma: no cover
    - 3    """ Class for Labels used in CoreMS
    - 4    
    +            
     2class Labels:  # pragma: no cover
    + 3    """Class for Labels used in CoreMS
    + 4
      5    These labels are used to define:
    - 6    * types of columns in plaintext data inputs, 
    + 6    * types of columns in plaintext data inputs,
      7    * types of data/mass spectra
      8    * types of assignment for ions
      9
     10    """
    -11    mz = "m/z"
    -12    abundance = "Peak Height"
    -13    rp = "Resolving Power"
    -14    s2n = "S/N"
    -15
    -16    label = 'label'
    -17    bruker_profile = 'Bruker_Profile'
    -18    thermo_profile = 'Thermo_Profile'
    -19    simulated_profile = 'Simulated Profile'
    -20    booster_profile = 'Booster Profile'
    -21    bruker_frequency = 'Bruker_Frequency'
    -22    midas_frequency = 'Midas_Frequency'
    -23    thermo_centroid = 'Thermo_Centroid'
    -24    corems_centroid = 'CoreMS_Centroid'
    -25    gcms_centroid = 'Thermo_Centroid'
    -26    
    -27    unassigned = 'unassigned'
    -28
    -29    radical_ion = 'RADICAL'
    -30    protonated_de_ion = 'DE_OR_PROTONATED'
    -31    protonated = "protonated"
    -32    de_protonated = "de-protonated"
    -33    adduct_ion = "ADDUCT"
    -34    neutral = 'neutral'
    -35    ion_type = 'IonType'
    -36
    -37    ion_type_translate = { 'protonated': 'DE_OR_PROTONATED',
    -38                          'de-protonated': 'DE_OR_PROTONATED',
    -39                          'radical': 'RADICAL',
    -40                          'adduct': 'ADDUCT',
    -41                          'ADDUCT': 'ADDUCT'}
    +11
    +12    mz = "m/z"
    +13    abundance = "Peak Height"
    +14    rp = "Resolving Power"
    +15    s2n = "S/N"
    +16
    +17    label = "label"
    +18    bruker_profile = "Bruker_Profile"
    +19    thermo_profile = "Thermo_Profile"
    +20    simulated_profile = "Simulated Profile"
    +21    booster_profile = "Booster Profile"
    +22    bruker_frequency = "Bruker_Frequency"
    +23    midas_frequency = "Midas_Frequency"
    +24    thermo_centroid = "Thermo_Centroid"
    +25    corems_centroid = "CoreMS_Centroid"
    +26    gcms_centroid = "Thermo_Centroid"
    +27
    +28    unassigned = "unassigned"
    +29
    +30    radical_ion = "RADICAL"
    +31    protonated_de_ion = "DE_OR_PROTONATED"
    +32    protonated = "protonated"
    +33    de_protonated = "de-protonated"
    +34    adduct_ion = "ADDUCT"
    +35    neutral = "neutral"
    +36    ion_type = "IonType"
    +37
    +38    ion_type_translate = {
    +39        "protonated": "DE_OR_PROTONATED",
    +40        "de-protonated": "DE_OR_PROTONATED",
    +41        "radical": "RADICAL",
    +42        "adduct": "ADDUCT",
    +43        "ADDUCT": "ADDUCT",
    +44    }
     
    @@ -1220,7 +1499,7 @@

    These labels are used to define:

      -
    • types of columns in plaintext data inputs,
    • +
    • types of columns in plaintext data inputs,
    • types of data/mass spectra
    • types of assignment for ions
    @@ -1516,983 +1795,1255 @@

    -
      43class Atoms: #pragma: no cover
    -  44    """ Class for Atoms in CoreMS
    -  45
    -  46    This class includes key properties of atoms (and the electron) and isotopes, including their exact masses, relative abundances, and covalences. 
    -  47    It also associates which isotopes are for the same element, and provides an ordering of elements.
    -  48
    -  49    IUPAC definition of monoisotopic mass is based on the most abundant isotopes of each element present.
    -  50    Here, we will use atom symbols with isotope numbers for all isotopes excluding the most abundant one.
    -  51    This list has been corrected up to Iodine. 
    -  52    
    -  53    References
    -  54    ----------
    -  55
    -  56    1. NIST - Last Accessed 2019-06-12
    -  57    https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses
    -  58
    -  59    """
    -  60    electron_mass = 0.0005_485_799_090_65 #NIST value
    -  61
    -  62    atomic_masses = {'H': 1.00782503223,
    -  63                     'D': 2.01410177812,
    -  64                     'T': 3.0160492779, #consider removing 
    -  65                     '3He': 3.0160293201,
    -  66                     'He': 4.00260325413,
    -  67                     '6Li': 6.0151228874,
    -  68                     'Li': 7.0160034366,
    -  69                     'Be': 9.012183065,
    -  70                     '10B': 10.01293695,
    -  71                     'B': 11.00930536,
    -  72                     'C': 12.0,
    -  73                     '13C': 13.00335483507,
    -  74                     '14C': 14.0032419884,
    -  75                     'N': 14.00307400443,
    -  76                     '15N': 15.00010889888,
    -  77                     'O': 15.99491461957,
    -  78                     '17O': 16.99913175650,
    -  79                     '18O': 17.99915961286,
    -  80                     'F': 18.99840316273,
    -  81                     'Ne': 19.9924401762,
    -  82                     '21Ne': 20.993846685,
    -  83                     '22Ne': 21.991385114,
    -  84                     'Na': 22.9897692820,
    -  85                     'Mg': 23.985041697,
    -  86                     '25Mg': 24.985836976,
    -  87                     '26Mg': 25.982592968,
    -  88                     'Al': 26.98153853,
    -  89                     'Si': 27.97692653465,
    -  90                     '29Si': 28.97649466490,
    -  91                     '30Si': 29.973770136,
    -  92                     'P': 30.97376199842,
    -  93                     'S': 31.9720711744,
    -  94                     '33S': 32.9714589098,
    -  95                     '34S': 33.967867004,
    -  96                     '36S': 35.96708071,
    -  97                     'Cl': 34.968852682,
    -  98                     '37Cl': 36.965902602,
    -  99                     '36Ar': 35.967545105,
    - 100                     '38Ar': 37.96273211,
    - 101                     'Ar': 39.9623831237,
    - 102                     'K': 38.9637064864,
    - 103                     '40K': 39.963998166,
    - 104                     '41K': 40.9618252579,
    - 105                     'Ca': 39.962590863,
    - 106                     '42Ca': 41.95861783,
    - 107                     '43Ca': 42.95876644,
    - 108                     '44Ca': 43.95548156,
    - 109                     '46Ca': 45.9536890,
    - 110                     '48Ca': 47.95252276,
    - 111                     'Sc': 44.95590828,
    - 112                     '46Ti': 45.95262772,
    - 113                     '47Ti': 46.95175879,
    - 114                     'Ti': 47.94794198,
    - 115                     '49Ti': 48.94786568,
    - 116                     '50Ti': 49.94478689,
    - 117                     '50V': 49.94715601,
    - 118                     'V': 50.94395704,
    - 119                     '50Cr': 49.94604183,
    - 120                     'Cr': 51.94050623,
    - 121                     '53Cr': 52.94064815,
    - 122                     '54Cr': 53.93887916,
    - 123                     'Mn': 54.93804391,
    - 124                     '54Fe': 53.93960899,
    - 125                     'Fe': 55.93493633,
    - 126                     '57Fe': 56.93539284,
    - 127                     '58Fe': 57.93327443,
    - 128                     'Co': 58.93319429,
    - 129                     'Ni': 57.93534241,
    - 130                     '60Ni': 59.93078588,
    - 131                     '61Ni': 60.93105557,
    - 132                     '62Ni': 61.92834537,
    - 133                     '64Ni': 63.92796682,
    - 134                     'Cu': 62.92959772,
    - 135                     '65Cu': 64.92778970,
    - 136                     'Zn': 63.92914201,
    - 137                     '66Zn': 65.92603381,
    - 138                     '67Zn': 66.92712775,
    - 139                     '68Zn': 67.92484455,
    - 140                     '70Zn': 69.9253192,
    - 141                     'Ga': 68.9255735,
    - 142                     '71Ga': 70.92470258,
    - 143                     '70Ge': 69.92424875,
    - 144                     '72Ge': 71.922075826,
    - 145                     '73Ge': 72.923458956,
    - 146                     'Ge': 73.921177761,
    - 147                     '76Ge': 75.921402726,
    - 148                     'As': 74.92159457,
    - 149                     '74Se': 73.922475934,
    - 150                     '76Se': 75.919213704,
    - 151                     '77Se': 76.919914154,
    - 152                     '78Se': 77.91730928,
    - 153                     'Se': 79.9165218,
    - 154                     '82Se': 81.9166995,
    - 155                     'Br': 78.9183376,
    - 156                     '81Br': 80.9162897,
    - 157                     '78Kr': 77.92036494,
    - 158                     '80Kr': 79.91637808,
    - 159                     '82Kr': 81.91348273,
    - 160                     '83Kr': 82.91412716,
    - 161                     'Kr': 83.9114977282,
    - 162                     '86Kr': 85.9106106269,
    - 163                     'Rb': 84.9117897379,
    - 164                     '87Rb': 86.9091805310,
    - 165                     '84Sr': 83.9134191,
    - 166                     '86Sr': 85.9092606,
    - 167                     '87Sr': 86.9088775,
    - 168                     'Sr': 87.9056125,
    - 169                     'Y': 88.9058403,
    - 170                     'Zr': 89.9046977,
    - 171                     '91Zr': 90.9056396,
    - 172                     '92Zr': 91.9050347,
    - 173                     '94Zr': 93.9063108,
    - 174                     '96Zr': 95.9082714,
    - 175                     'Nb': 92.9063730,
    - 176                     '92Mo': 91.90680796,
    - 177                     '94Mo': 93.90508490,
    - 178                     '95Mo': 94.90583877,
    - 179                     '96Mo': 95.90467612,
    - 180                     '97Mo': 96.90601812,
    - 181                     'Mo': 97.90540482,
    - 182                     '100Mo': 99.9074718,
    - 183                     #'97Tc': 96.9063667,
    - 184                     #'98Tc': 97.9072124,
    - 185                     'Tc': 98.9062508, #consider removing technetium as it is radioactive
    - 186                     '96Ru': 95.90759025,
    - 187                     '98Ru': 97.9052868,
    - 188                     '99Ru': 98.9059341,
    - 189                     '100Ru': 99.9042143,
    - 190                     '101Ru': 100.9055769,
    - 191                     'Ru': 101.9043441,
    - 192                     '104Ru': 103.9054275,
    - 193                     'Rh': 102.9054980,
    - 194                     '102Pd': 101.9056022,
    - 195                     '104Pd': 103.9040305,
    - 196                     '105Pd': 104.9050796,
    - 197                     'Pd': 105.9034804,
    - 198                     '108Pd': 107.9038916,
    - 199                     '110Pd': 109.9051722,
    - 200                     'Ag': 106.9050916,
    - 201                     '109Ag': 108.9047553,
    - 202                     '106Cd': 105.9064599,
    - 203                     '108Cd': 107.9041834,
    - 204                     '110Cd': 109.90300661,
    - 205                     '111Cd': 110.90418287,
    - 206                     'Cd': 111.90276287,
    - 207                     '113Cd': 112.90440813,
    - 208                     '114Cd': 113.90336509,
    - 209                     '116Cd': 115.90476315,
    - 210                     '113In': 112.90406184,
    - 211                     'In': 114.903878776,
    - 212                     '112Sn': 111.90482387,
    - 213                     '114Sn': 113.9027827,
    - 214                     '115Sn': 114.903344699,
    - 215                     '116Sn': 115.90174280,
    - 216                     '117Sn': 116.90295398,
    - 217                     '118Sn': 117.90160657,
    - 218                     '119Sn': 118.90331117,
    - 219                     'Sn': 119.90220163,
    - 220                     '122Sn': 121.9034438,
    - 221                     '124Sn': 123.9052766,
    - 222                     'Sb': 120.9038120,
    - 223                     '123Sb': 122.9042132,
    - 224                     '120Te': 119.9040593,
    - 225                     '122Te': 121.9030435,
    - 226                     '123Te': 122.9042698,
    - 227                     '124Te': 123.9028171,
    - 228                     '125Te': 124.9044299,
    - 229                     '126Te': 125.9033109,
    - 230                     '128Te': 127.90446128,
    - 231                     'Te': 129.906222748,
    - 232                     'I': 126.9044719,
    - 233                     '124Xe': 123.9058920,
    - 234                     '126Xe': 125.9042983,
    - 235                     '128Xe': 127.9035310,
    - 236                     '129Xe': 128.9047808611,
    - 237                     '130Xe': 129.903509349,
    - 238                     '131Xe': 130.90508406,
    - 239                     'Xe': 131.9041550856,
    - 240                     '134Xe': 133.90539466,
    - 241                     '136Xe': 135.907214484,
    - 242                     'Cs': 132.9054519610,
    - 243                     '130Ba': 129.9063207,
    - 244                     '132Ba': 131.9050611,
    - 245                     '134Ba': 133.90450818,
    - 246                     '135Ba': 134.90568838,
    - 247                     '136Ba': 135.90457573,
    - 248                     '137Ba': 136.90582714,
    - 249                     'Ba': 137.90524700,
    - 250                     '138La': 137.9071149,
    - 251                     'La': 138.9063563,
    - 252                     '136Ce': 135.90712921,
    - 253                     '138Ce': 137.905991,
    - 254                     'Ce': 139.9054431,
    - 255                     '142Ce': 141.9092504,
    - 256                     'Pr': 140.9076576,
    - 257                     'Nd': 141.9077290,
    - 258                     '143Nd': 142.9098200,
    - 259                     '144Nd': 143.9100930,
    - 260                     '145Nd': 144.9125793,
    - 261                     '146Nd': 145.9131226,
    - 262                     '148Nd': 147.9168993,
    - 263                     '150Nd': 149.9209022,
    - 264                     '145Pm': 144.9127559,
    - 265                     '147Pm': 146.9151450,
    - 266                     '144Sm': 143.9120065,
    - 267                     '147Sm': 146.9149044,
    - 268                     '148Sm': 147.9148292,
    - 269                     '149Sm': 148.9171921,
    - 270                     '150Sm': 149.9172829,
    - 271                     'Sm': 151.9197397,
    - 272                     '154Sm': 153.9222169,
    - 273                     '151Eu': 150.9198578,
    - 274                     'Eu': 152.9212380,
    - 275                     '152Gd': 151.9197995,
    - 276                     '154Gd': 153.9208741,
    - 277                     '155Gd': 154.9226305,
    - 278                     '156Gd': 155.9221312,
    - 279                     '157Gd': 156.9239686,
    - 280                     'Gd': 157.9241123,
    - 281                     '160Gd': 159.9270624,
    - 282                     'Tb': 158.9253547,
    - 283                     '156Dy': 155.9242847,
    - 284                     '158Dy': 157.9244159,
    - 285                     '160Dy': 159.9252046,
    - 286                     '161Dy': 160.9269405,
    - 287                     '162Dy': 161.9268056,
    - 288                     '163Dy': 162.9287383,
    - 289                     'Dy': 163.9291819,
    - 290                     'Ho': 164.9303288,
    - 291                     '162Er': 161.9287884,
    - 292                     '164Er': 163.9292088,
    - 293                     'Er': 165.9302995,
    - 294                     '167Er': 166.9320546,
    - 295                     '168Er': 167.9323767,
    - 296                     '170Er': 169.9354702,
    - 297                     'Tm': 168.9342179,
    - 298                     '168Yb': 167.9338896,
    - 299                     '170Yb': 169.9347664,
    - 300                     '171Yb': 170.9363302,
    - 301                     '172Yb': 171.9363859,
    - 302                     '173Yb': 172.9382151,
    - 303                     'Yb': 173.9388664,
    - 304                     '176Yb': 175.9425764,
    - 305                     'Lu': 174.9407752,
    - 306                     '176Lu': 175.9426897,
    - 307                     '174Hf': 173.9400461,
    - 308                     '176Hf': 175.9414076,
    - 309                     '177Hf': 176.9432277,
    - 310                     '178Hf': 177.9437058,
    - 311                     '179Hf': 178.9458232,
    - 312                     'Hf': 179.9465570,
    - 313                     '180Ta': 179.9474648,
    - 314                     'Ta': 180.9479958,
    - 315                     '180W': 179.9467108,
    - 316                     '182W': 181.94820394,
    - 317                     '183W': 182.95022275,
    - 318                     'W': 183.95093092,
    - 319                     '186W': 185.9543628,
    - 320                     '185Re': 184.9529545,
    - 321                     'Re': 186.9557501,
    - 322                     '184Os': 183.9524885,
    - 323                     '186Os': 185.9538350,
    - 324                     '187Os': 186.9557474,
    - 325                     '188Os': 187.9558352,
    - 326                     '189Os': 188.9581442,
    - 327                     '190Os': 189.9584437,
    - 328                     '192Os': 191.9614770,
    - 329                     '191Ir': 190.9605893,
    - 330                     'Ir': 192.9629216,
    - 331                     '190Pt': 189.9599297,
    - 332                     '192Pt': 191.9610387,
    - 333                     '194Pt': 193.9626809,
    - 334                     'Pt': 194.9647917,
    - 335                     '196Pt': 195.96495209,
    - 336                     '198Pt': 197.9678949,
    - 337                     'Au': 196.96656879,
    - 338                     '196Hg': 195.9658326,
    - 339                     '198Hg': 197.96676860,
    - 340                     '199Hg': 198.96828064,
    - 341                     '200Hg': 199.96832659,
    - 342                     '201Hg': 200.97030284,
    - 343                     'Hg': 201.97064340,
    - 344                     '204Hg': 203.97349398,
    - 345                     '203Tl': 202.9723446,
    - 346                     'Tl': 204.9744278,
    - 347                     '204Pb': 203.9730440,
    - 348                     '206Pb': 205.9744657,
    - 349                     '207Pb': 206.9758973,
    - 350                     'Pb': 207.9766525,
    - 351                     'Bi': 208.9803991,
    - 352                     '209Po': 208.9824308,
    - 353                     '210Po': 209.9828741,
    - 354                     '210At': 209.9871479,
    - 355                     '211At': 210.9874966,
    - 356                     '211Rn': 210.9906011,
    - 357                     '220Rn': 220.0113941,
    - 358                     '222Rn': 222.0175782,
    - 359                     '223Fr': 223.0197360,
    - 360                     '223Ra': 223.0185023,
    - 361                     '224Ra': 224.0202120,
    - 362                     '226Ra': 226.0254103,
    - 363                     '228Ra': 228.0310707,
    - 364                     '227Ac': 227.0277523,
    - 365                     '230Th': 230.0331341,
    - 366                     'Th': 232.0380558,
    - 367                     'Pa': 231.0358842,
    - 368                     '233U': 233.0396355,
    - 369                     '234U': 234.0409523,
    - 370                     '235U': 235.0439301,
    - 371                     '236U': 236.0455682,
    - 372                     'U': 238.0507884,
    - 373                     '236Np': 236.046570,
    - 374                     '237Np': 237.0481736,
    - 375                     '238Pu': 238.0495601,
    - 376                     '239Pu': 239.0521636,
    - 377                     '240Pu': 240.0538138,
    - 378                     '241Pu': 241.0568517,
    - 379                     '242Pu': 242.0587428,
    - 380                     '244Pu': 244.0642053,
    - 381                     '241Am': 241.0568293,
    - 382                     '243Am': 243.0613813,
    - 383                     '243Cm': 243.0613893,
    - 384                     '244Cm': 244.0627528,
    - 385                     '245Cm': 245.0654915,
    - 386                     '246Cm': 246.0672238,
    - 387                     '247Cm': 247.0703541,
    - 388                     '248Cm': 248.0723499,
    - 389                     '247Bk': 247.0703073,
    - 390                     '249Bk': 249.0749877,
    - 391                     '249Cf': 249.0748539,
    - 392                     '250Cf': 250.0764062,
    - 393                     '251Cf': 251.0795886,
    - 394                     '252Cf': 252.0816272,
    - 395                     '252Es': 252.082980,
    - 396                     '257Fm': 257.0951061,
    - 397                     '258Md': 258.0984315,
    - 398                     '260Md': 260.10365, 
    - 399                     '259No': 259.10103,
    - 400                     '262Lr': 262.10961,
    - 401                     '267Rf': 267.12179,
    - 402                     '268Db': 268.12567,
    - 403                     '271Sg': 271.13393,
    - 404                     '272Bh': 272.13826,
    - 405                     '270Hs': 270.13429,
    - 406                     '276Mt': 276.15159,
    - 407                     '281Ds': 281.16451,
    - 408                     '280Rg': 280.16514,
    - 409                     '285Cn': 285.17712,
    - 410                     '284Nh': 284.17873,
    - 411                     '289Fl': 289.19042,
    - 412                     '288Mc': 288.19274,
    - 413                     '293Lv': 293.20449,
    - 414                     '292Ts': 292.20746,
    - 415                     '294Og': 294.21392}
    - 416    
    - 417
    - 418    # This list orders atoms, starting with the most abundant isotopes first, ordered as described. 
    - 419    # Less abundant isotopes are ordered by mass at the end of the list.
    - 420    atoms_order = ['C', 'H', 'O', 'N', 'P', 'S', # CHONPS
    - 421                    'F', 'Cl', 'Br', 'I', 'At', #Halogens
    - 422                    'Li','Na','K','Rb','Cs','Fr', #Alkali
    - 423                    'He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn', #Noble gasses
    - 424                    'Be','B', #Row 2
    - 425                    'Mg', 'Al','Si', #Row 3
    - 426                    'Ca','Sc','Ti','V','Cr','Mn','Fe','Co','Ni','Cu','Zn','Ga','Ge','As','Se', #Row 4
    - 427                    'Sr','Y','Zr','Nb','Mo','Tc','Ru','Rh','Pd','Ag','Cd','In','Sn','Sb','Te', #Row 5
    - 428                    'Ba','La', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', #Row 6
    - 429                    'Ra', 'Ac', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og', #Row 7
    - 430                    'Ce','Pr','Nd','Pm','Sm','Eu','Gd','Tb','Dy','Ho','Er','Tm','Yb','Lu', #Lanthanides
    - 431                    'Th','Pa','U','Np','Pu','Am','Cm','Bk','Cf','Es','Fm','Md','No','Lr', #Actinides
    - 432                    # Less abundant isotopes follow
    - 433                    'D','6Li',
    - 434                    '10B', '13C','15N','17O','18O',
    - 435                    '22Ne',  '25Mg', '26Mg',  '29Si', 
    - 436                    '30Si', '33S', '34S','36S', '37Cl',
    - 437                    '40Ca', '41K', '44Ca','46Ti', '47Ti',  '49Ti', 
    - 438                    '50Cr', '50Ti', '50V', '53Cr', '54Cr',  '54Fe', '57Fe', '58Fe',
    - 439                    '60Ni', '61Ni', '62Ni', '65Cu', '66Zn','67Zn', '68Zn',  
    - 440                    '70Ge', '71Ga', '72Ge', '73Ge',  '76Ge', '76Se', '77Se', '78Se',  
    - 441                    '81Br', '80Kr','82Kr', '82Se', '83Kr', '85Rb', '86Kr', '86Sr', '87Rb', '87Sr', '88Sr',  
    - 442                    '91Zr', '92Mo', '92Zr','94Mo', '94Zr', '95Mo', '96Mo', '96Ru', '96Zr', '97Mo',  '98Ru', '99Ru',
    - 443                    '100Mo', '100Ru', '101Ru', '102Pd', '104Pd', '104Ru', '105Pd', '106Cd', '106Pd', '108Cd', '108Pd','109Ag',
    - 444                    '110Cd', '110Pd', '111Cd', '112Cd', '112Sn', '113Cd', '113In', '114Cd', '114Sn','115In', '115Sn', '116Cd', '116Sn', '117Sn', '118Sn', '119Sn', 
    - 445                    '120Sn', '120Te', '121Sb', '122Sn', '122Te', '123Sb', '123Te', '124Sn', '124Te', '124Xe', '125Te', '126Te', '126Xe',
    - 446                    '128Te', '128Xe', '129Xe', '130Ba', '130Te', '130Xe', '131Xe', '132Ba', '132Xe', '134Ba',
    - 447                    '134Xe', '135Ba', '136Ba', '136Xe', '137Ba', '138Ba', 
    - 448                    '174Hf', '176Hf', '177Hf', '178Hf','179Hf', 
    - 449                    '180Hf', '180W', '182W', '183W', '184Os', '184W', '185Re', '186Os', '186W', '187Os', '187Re','188Os', '189Os', 
    - 450                    '190Os', '190Pt', '191Ir', '192Ir', '192Os', '192Pt', '194Pt', '195Pt', '196Hg','196Pt', '198Hg', '198Pt','199Hg', 
    - 451                    '200Hg', '201Hg', '202Hg', '203Tl', '204Hg', '204Pb', '205Tl','206Pb','207Pb', '208Pb'
    - 452                    ]
    - 453
    - 454    atoms_covalence = {'C': (4),
    - 455                     '13C': (4),
    - 456                     'N': (3),
    - 457                     'O': (2),
    - 458                     'S': (2),
    - 459                     'H': (1),
    - 460                     'F': (1, 0),
    - 461                     'Cl': (1, 0),
    - 462                     'Br': (1, 0),
    - 463                     'I': (1, 0),
    - 464                     'At': (1),
    - 465                     'Li': (1, 0),
    - 466                     'Na': (1, 0),
    - 467                     'K':  (1, 0),
    - 468                     'Rb': (1),
    - 469                     'Cs': (1),
    - 470                     'Fr': (1),
    - 471                     'B': (4, 3, 2, 1),
    - 472                     'In': (3, 2, 1),
    - 473                     'Al': (3, 1, 2),
    - 474                     'P': (3, 5, 4, 2, 1),
    - 475                     'Ga': (3, 1, 2),
    - 476                     'Mg': (2, 1),
    - 477                     'Be': (2, 1),
    - 478                     'Ca': (2, 1),
    - 479                     'Sr': (2, 1),
    - 480                     'Ba': (2),
    - 481                     'Ra': (2),
    - 482                     'V': (5, 4, 3, 2, 1),
    - 483                     'Fe': (3, 2, 4, 5, 6),
    - 484                     'Si': (4, 3, 2),
    - 485                     'Sc': (3, 2, 1),
    - 486                     'Ti': (4, 3, 2, 1),
    - 487                     'Cr': (1, 2, 3, 4, 5, 6),
    - 488                     'Mn': (1, 2, 3, 4, 5, 6, 7),
    - 489                     'Co': (1, 2, 3, 4, 5),
    - 490                     'Ni': (1, 2, 3, 4),
    - 491                     'Cu': (2, 1, 3, 4),
    - 492                     'Zn': (2, 1),
    - 493                     'Ge': (4, 3, 2, 1),
    - 494                     'As': (5, 3, 2, 1),
    - 495                     'Se': (6, 4, 2, 1),
    - 496                     'Y': (3, 2, 1),
    - 497                     'Zr': (4, 3, 2, 1),
    - 498                     'Nb': (5, 4, 3, 2, 1),
    - 499                     'Mo': (6, 5, 4, 3, 2, 1),
    - 500                     'Tc': (7, 6, 5, 4, 3, 2, 1),
    - 501                     'Ru': (8, 7, 6, 5, 4, 3, 2, 1),
    - 502                     'Rh': (6, 5, 4, 3, 2, 1),
    - 503                     'Pd': (4, 2, 1),
    - 504                     'Ag': (0, 1, 2, 3, 4),
    - 505                     'Cd': (2, 1),
    - 506                     'Sn': (4, 2),
    - 507                     'Sb': (5, 3),
    - 508                     'Te': (6, 5, 4, 2),
    - 509                     'La': (3, 2),
    - 510                     'Hf': (4, 3, 2),
    - 511                     'Ta': (5, 4, 3, 2),
    - 512                     'W': (6, 5, 4, 3, 2, 1),
    - 513                     'Re': (4, 7, 6, 5, 3, 2, 1),
    - 514                     'Os': (4, 8, 7, 6, 5, 3, 2, 1),
    - 515                     'Ir': (4, 8, 6, 5, 3, 2, 1),
    - 516                     'Pt': (4, 6, 5, 3, 2, 1),
    - 517                     'Au': (3, 5, 2, 1),
    - 518                     'Hg': (1, 2, 4),
    - 519                     'Tl': (3, 1),
    - 520                     'Pb': (4, 2),
    - 521                     'Bi': (3, 1, 5),
    - 522                     'Po': (2, 4, 6),
    - 523                     'Ac': (3, 2)
    - 524                     }
    - 525
    - 526    isotopic_abundance = {'H': 0.999885,
    - 527                          'D': 0.000115,
    - 528                          'T': 0, #Consider removing.
    - 529                          '3He': 0.00000134,
    - 530                          'He': 0.99999866,
    - 531                          '6Li': 0.0759,
    - 532                          'Li': 0.9241,
    - 533                          'Be': 1.0,
    - 534                          '10B': 0.199,
    - 535                          'B': 0.801,
    - 536                          'C': 0.9893,
    - 537                          '13C': 0.0107,
    - 538                          '14C': 0,
    - 539                          'N': 0.99636,
    - 540                          '15N': 0.00364,
    - 541                          'O': 0.99757,
    - 542                          '17O': 0.00038,
    - 543                          '18O': 0.00205,
    - 544                          'F': 1.0,
    - 545                          'Ne': 0.9048,
    - 546                          '21Ne': 0.0027,
    - 547                          '22Ne': 0.0925,
    - 548                          'Na': 1.0,
    - 549                          'Mg': 0.7899,
    - 550                          '25Mg': 0.1000,
    - 551                          '26Mg': 0.1101,
    - 552                          'Al': 1.0,
    - 553                          'Si': 0.92223,
    - 554                          '29Si': 0.04685,
    - 555                          '30Si': 0.03092,
    - 556                          'P': 1.0,
    - 557                          'S': 0.9499,
    - 558                          '33S': 0.0075,
    - 559                          '34S': 0.0425,
    - 560                          '36S': 0.0001,
    - 561                          'Cl': 0.7576,
    - 562                          '37Cl': 0.2424,
    - 563                          '36Ar': 0.003336,
    - 564                          '38Ar': 0.000629,
    - 565                          'Ar': 0.996035,
    - 566                          'K': 0.932581,
    - 567                          '40K': 0.000117,
    - 568                          '41K': 0.067302,
    - 569                          'Ca': 0.96941,
    - 570                          '42Ca': 0.00647,
    - 571                          '43Ca': 0.00135,
    - 572                          '44Ca': 0.02086,
    - 573                          '46Ca': 0.00004,
    - 574                          '48Ca': 0.001872,
    - 575                          'Sc': 1.0,
    - 576                          '46Ti': 0.0825,
    - 577                          '47Ti': 0.0744,
    - 578                          'Ti': 0.7372,
    - 579                          '49Ti': 0.0541,
    - 580                          '50Ti': 0.0518,
    - 581                          '50V': 0.00250,
    - 582                          'V': 0.9975,
    - 583                          '50Cr': 0.04345,
    - 584                          'Cr': 0.83789,
    - 585                          '53Cr': 0.09501,
    - 586                          '54Cr': 0.02365,
    - 587                          'Mn': 1.0,
    - 588                          '54Fe': 0.05845,
    - 589                          'Fe': 0.91754,
    - 590                          '57Fe': 0.02119,
    - 591                          '58Fe': 0.00282,
    - 592                          'Co': 1.0,
    - 593                          'Ni': 0.68077,
    - 594                          '60Ni': 0.26223,
    - 595                          '61Ni': 0.011399,
    - 596                          '62Ni': 0.036346,
    - 597                          '64Ni': 0.009255,
    - 598                          'Cu': 0.6915,
    - 599                          '65Cu': 0.3085,
    - 600                          'Zn': 0.4917,
    - 601                          '66Zn': 0.2773,
    - 602                          '67Zn': 0.0404,
    - 603                          '68Zn': 0.1845,
    - 604                          '70Zn': 0.0061,
    - 605                          'Ga': 0.60108,
    - 606                          '71Ga': 0.39892,
    - 607                          '70Ge': 0.2057,
    - 608                          '72Ge': 0.2745,
    - 609                          '73Ge': 0.0775,
    - 610                          'Ge': 0.3650,
    - 611                          '76Ge': 0.0773,
    - 612                          'As': 1.0,
    - 613                          '74Se': 0.0089,
    - 614                          '76Se': 0.0937,
    - 615                          '77Se': 0.0763,
    - 616                          '78Se': 0.2377,
    - 617                          'Se': 0.4961,
    - 618                          '82Se': 0.0873,
    - 619                          'Br': 0.5069,
    - 620                          '81Br': 0.4931,
    - 621                          '78Kr': 0.00355,
    - 622                          '80Kr': 0.02286,
    - 623                          '82Kr': 0.11593,
    - 624                          '83Kr': 0.11500,
    - 625                          'Kr': 0.56987,
    - 626                          '86Kr': 0.17279,
    - 627                          'Rb': 0.7217,
    - 628                          '87Rb': 0.2783,
    - 629                          '84Sr': 0.0056,
    - 630                          '86Sr': 0.0986,
    - 631                          '87Sr': 0.0700,
    - 632                          'Sr': 0.8258,
    - 633                          'Y': 1.0,
    - 634                          'Zr': 0.5145,
    - 635                          '91Zr': 0.1122,
    - 636                          '92Zr': 0.1715,
    - 637                          '94Zr': 0.1738,
    - 638                          '96Zr': 0.0280,
    - 639                          'Nb': 1.0,
    - 640                          '92Mo': 0.1453,
    - 641                          '94Mo': 0.0915,
    - 642                          '95Mo': 0.1584,
    - 643                          '96Mo': 0.1667,
    - 644                          '97Mo': 0.0960,
    - 645                          'Mo': 0.2439,
    - 646                          '100Mo': 0.0982,
    - 647                          '99Tc': 0, #consider removing
    - 648                          '96Ru': 0.0554,
    - 649                          '98Ru': 0.0187,
    - 650                          '99Ru': 0.1276,
    - 651                          '100Ru': 0.1260,
    - 652                          '101Ru': 0.1706,
    - 653                          'Ru': 0.3155,
    - 654                          '104Ru': 0.1862,
    - 655                          'Rh': 1.0,
    - 656                          '102Pd': 0.0102,
    - 657                          '104Pd': 0.1114,
    - 658                          '105Pd': 0.2233,
    - 659                          'Pd': 0.2733,
    - 660                          '108Pd': 0.2646,
    - 661                          '110Pd': 0.1172,
    - 662                          'Ag': 0.51839,
    - 663                          '109Ag': 0.48161,
    - 664                          '106Cd': 0.0125,
    - 665                          '108Cd': 0.0089,
    - 666                          '110Cd': 0.1249,
    - 667                          '111Cd': 0.1280,
    - 668                          'Cd': 0.2413,
    - 669                          '113Cd': 0.1222,
    - 670                          '114Cd': 0.2873,
    - 671                          '116Cd': 0.0749,
    - 672                          '113In': 0.0429,
    - 673                          'In': 0.9571,
    - 674                          '112Sn': 0.0097,
    - 675                          '114Sn': 0.0066,
    - 676                          '115Sn': 0.0034,
    - 677                          '116Sn': 0.1454,
    - 678                          '117Sn': 0.0768,
    - 679                          '118Sn': 0.2422,
    - 680                          '119Sn': 0.0859,
    - 681                          'Sn': 0.3258,
    - 682                          '122Sn': 0.0463,
    - 683                          '124Sn': 0.0579,
    - 684                          'Sb': 0.5721,
    - 685                          '123Sb': 0.4279,
    - 686                          '120Te': 0.0009,
    - 687                          '122Te': 0.0255,
    - 688                          '123Te': 0.0089,
    - 689                          '124Te': 0.0474,
    - 690                          '125Te': 0.0707,
    - 691                          '126Te': 0.1884,
    - 692                          '128Te': 0.3174,
    - 693                          'Te': 0.3408,
    - 694                          'I': 1.0,
    - 695                          '124Xe': 0.000952,
    - 696                          '126Xe': 0.000890,
    - 697                          '128Xe': 0.019102,
    - 698                          '129Xe': 0.264006,
    - 699                          '130Xe': 0.040710,
    - 700                          '131Xe': 0.212324,
    - 701                          'Xe': 0.269086,
    - 702                          '134Xe': 0.104357,
    - 703                          '136Xe': 0.088573,
    - 704                          'Cs': 1.0,
    - 705                          '130Ba': 0.00106,
    - 706                          '132Ba': 0.00101,
    - 707                          '134Ba': 0.02417,
    - 708                          '135Ba': 0.06592,
    - 709                          '136Ba': 0.07854,
    - 710                          '137Ba': 0.11232,
    - 711                          'Ba': 0.71698,
    - 712                          '138La': 0.0008881,
    - 713                          'La': 0.9991119,
    - 714                          '136Ce': 0.00185,
    - 715                          '138Ce': 0.00251,
    - 716                          'Ce': 0.88450,
    - 717                          '142Ce': 0.11114,
    - 718                          'Pr': 1.0,
    - 719                          'Nd': 0.27152,
    - 720                          '143Nd': 0.12174,
    - 721                          '144Nd': 0.23798,
    - 722                          '145Nd': 0.08293,
    - 723                          '146Nd': 0.17189,
    - 724                          '148Nd': 0.05756,
    - 725                          '150Nd': 0.05638,
    - 726                          '145Pm': 0,
    - 727                          '147Pm': 0,
    - 728                          '144Sm': 0.0307,
    - 729                          '147Sm': 0.1499,
    - 730                          '148Sm': 0.1124,
    - 731                          '149Sm': 0.1382,
    - 732                          '150Sm': 0.0738,
    - 733                          'Sm': 0.2675,
    - 734                          '154Sm': 0.2275,
    - 735                          '151Eu': 0.4781,
    - 736                          'Eu': 0.5219,
    - 737                          '152Gd': 0.0020,
    - 738                          '154Gd': 0.0218,
    - 739                          '155Gd': 0.1480,
    - 740                          '156Gd': 0.2047,
    - 741                          '157Gd': 0.1565,
    - 742                          'Gd': 0.2484,
    - 743                          '160Gd': 0.2186,
    - 744                          'Tb': 1.0,
    - 745                          '156Dy': 0.00056,
    - 746                          '158Dy': 0.00095,
    - 747                          '160Dy': 0.02329,
    - 748                          '161Dy': 0.18889,
    - 749                          '162Dy': 0.25475,
    - 750                          '163Dy': 0.24896,
    - 751                          'Dy': 0.28260,
    - 752                          'Ho': 1.0,
    - 753                          '162Er': 0.00139,
    - 754                          '164Er': 0.01601,
    - 755                          'Er': 0.33503,
    - 756                          '167Er': 0.22869,
    - 757                          '168Er': 0.26978,
    - 758                          '170Er': 0.14910,
    - 759                          'Tm': 1.0,
    - 760                          '168Yb': 0.00123,
    - 761                          '170Yb': 0.02982,
    - 762                          '171Yb': 0.1409,
    - 763                          '172Yb': 0.2168,
    - 764                          '173Yb': 0.16103,
    - 765                          'Yb': 0.32026,
    - 766                          '176Yb': 0.12996,
    - 767                          'Lu': 0.97401,
    - 768                          '176Lu': 0.02599,
    - 769                          '174Hf': 0.0016,
    - 770                          '176Hf': 0.0526,
    - 771                          '177Hf': 0.1860,
    - 772                          '178Hf': 0.2728,
    - 773                          '179Hf': 0.1362,
    - 774                          'Hf': 0.3508,
    - 775                          '180Ta': 0.0001201,
    - 776                          'Ta': 0.9998799,
    - 777                          '180W': 0.0012,
    - 778                          '182W': 0.2650,
    - 779                          '183W': 0.1431,
    - 780                          'W': 0.3064,
    - 781                          '186W': 0.2843,
    - 782                          '185Re': 0.3740,
    - 783                          'Re': 0.6260,
    - 784                          '184Os': 0.0002,
    - 785                          '186Os': 0.0159,
    - 786                          '187Os': 0.0196,
    - 787                          '188Os': 0.1324,
    - 788                          '189Os': 0.1615,
    - 789                          '190Os': 0.2626,
    - 790                          'Os': 0.4078,
    - 791                          '191Ir': 0.373,
    - 792                          'Ir': 0.627,
    - 793                          '190Pt': 0.00012,
    - 794                          '192Pt': 0.00782,
    - 795                          '194Pt': 0.3286,
    - 796                          'Pt': 0.3378,
    - 797                          '196Pt': 0.2521,
    - 798                          '198Pt': 0.07356,
    - 799                          'Au': 1.0,
    - 800                          '196Hg': 0.0015,
    - 801                          '198Hg': 0.0997,
    - 802                          '199Hg': 0.16872,
    - 803                          '200Hg': 0.2310,
    - 804                          '201Hg': 0.1318,
    - 805                          'Hg': 0.2986,
    - 806                          '204Hg': 0.0687,
    - 807                          '203Tl': 0.2952,
    - 808                          'Tl': 0.7048,
    - 809                          '204Pb': 0.014,
    - 810                          '206Pb': 0.241,
    - 811                          '207Pb': 0.221,
    - 812                          'Pb': 0.524,
    - 813                          'Bi': 1.0,
    - 814                          '209Po': 0,
    - 815                          '210Po': 0,
    - 816                          '210At': 0,
    - 817                          '211At': 0,
    - 818                          '211Rn': 0,
    - 819                          '220Rn': 0,
    - 820                          '222Rn': 0,
    - 821                          '223Fr': 0,
    - 822                          '223Ra': 0,
    - 823                          '224Ra': 0,
    - 824                          '226Ra': 0,
    - 825                          '228Ra': 0,
    - 826                          '227Ac': 0,
    - 827                          '230Th': 0,
    - 828                          'Th': 1.0,
    - 829                          'Pa': 1.0,
    - 830                          '233U': 0,
    - 831                          '234U': 0.000054,
    - 832                          '235U': 0.007204,
    - 833                          '236U': 0,
    - 834                          'U': 0.992742,
    - 835                          '236Np': 0,
    - 836                          '237Np': 0,
    - 837                          '238Pu': 0,
    - 838                          '239Pu': 0,
    - 839                          '240Pu': 0,
    - 840                          '241Pu': 0,
    - 841                          '242Pu': 0,
    - 842                          '244Pu': 0,
    - 843                          '241Am': 0,
    - 844                          '243Am': 0,
    - 845                          '243Cm': 0,
    - 846                          '244Cm': 0,
    - 847                          '245Cm': 0,
    - 848                          '246Cm': 0,
    - 849                          '247Cm': 0,
    - 850                          '248Cm': 0,
    - 851                          '247Bk': 0,
    - 852                          '249Bk': 0,
    - 853                          '249Cf': 0,
    - 854                          '250Cf': 0,
    - 855                          '251Cf': 0,
    - 856                          '252Cf': 0,
    - 857                          '252Es': 0,
    - 858                          '257Fm': 0,
    - 859                          '258Md': 0,
    - 860                          '260Md': 0,
    - 861                          '259No': 0,
    - 862                          '262Lr': 0,
    - 863                          '267Rf': 0,
    - 864                          '268Db': 0,
    - 865                          '271Sg': 0,
    - 866                          '272Bh': 0,
    - 867                          '270Hs': 0,
    - 868                          '276Mt': 0,
    - 869                          '281Ds': 0,
    - 870                          '280Rg': 0,
    - 871                          '285Cn': 0,
    - 872                          '284Nh': 0,
    - 873                          '289Fl': 0,
    - 874                          '288Mc': 0,
    - 875                          '293Lv': 0,
    - 876                          '292Ts': 0,
    - 877                          '294Og': 0}
    - 878    
    - 879    # Isotopes here is a dictionary of symbol, including full name, 
    - 880    # and then the isotopes which arent the most abundant one, sorted by abundance.
    - 881    # None indicates no stable isotopes/naturally occuring ones. 
    - 882    # This has been manually checked as far as Iodine only.
    - 883    isotopes = {'H': ['Hydrogen', ['D','T']],
    - 884                'He': ['Helium', ['3He']],
    - 885                'Li': ['Lithium', ['6Li']],
    - 886                'Be': ['Beryllium', [None]],
    - 887                'B': ['Boron', ['10B']],
    - 888                'C': ['Carbon', ['13C']],
    - 889                'N': ['Nitrogen', ['15N']],
    - 890                'O': ['Oxygen', ['18O', '17O']],
    - 891                'F': ['Fluorine', [None]],
    - 892                'Ne': ['Neon', ['22Ne', '21Ne']],
    - 893
    - 894                'Na': ['Sodium', [None]],
    - 895                'Mg': ['Magnesium', ['26Mg', '25Mg']],
    - 896                'Al': ['Aluminum', [None]],
    - 897                'Si': ['Silicon', ['29Si', '30Si']],
    - 898                'P': ['Phosphorus', [None]],
    - 899                'S': ['Sulfur', ['34S','33S','36S']],
    - 900                'Cl': ['Chlorine', ['37Cl']],
    - 901                'Ar': ['Argon', ['36Ar','38Ar']],
    - 902                
    - 903                'K': ['Potassium', ['41K', '40K']],
    - 904                'Ca': ['Calcium', ['44Ca', '48Ca','43Ca','42Ca','46Ca']],
    - 905                'Sc': ['Scandium', [None]],
    - 906                'Ti': ['Titanium', ['46Ti', '47Ti', '49Ti', '50Ti']],
    - 907                'V': ['Vanadium', ['50V']],
    - 908                'Cr': ['Chromium', ['53Cr', '50Cr', '54Cr']],
    - 909                'Mn': ['Manganese', [None]],
    - 910                'Fe': ['Iron', ['54Fe', '57Fe', '58Fe']],
    - 911                'Co': ['Cobalt', [None]],
    - 912                'Ni': ['Nickel', ['60Ni', '62Ni', '61Ni','64Ni']],
    - 913                'Cu': ['Copper', ['65Cu']],
    - 914                'Zn': ['Zinc', ['66Zn', '68Zn', '67Zn', '70Zn']],
    - 915                'Ga': ['Gallium', ['71Ga']],
    - 916                'Ge': ['Germanium', ['72Ge', '70Ge', '73Ge', '76Ge']],
    - 917                'As': ['Arsenic', [None]],
    - 918                'Se': ['Selenium', ['78Se', '76Se', '82Se', '77Se','74Se']],
    - 919                'Br': ['Bromine', ['81Br']],
    - 920                'Kr': ['Krypton', ['86Kr', '82Kr', '83Kr', '80Kr']],
    - 921
    - 922                'Rb': ['Rubidium', ['87Rb']],
    - 923                'Sr': ['Strontium', ['86Sr', '87Sr','84Sr']],
    - 924                'Y': ['Yttrium', [None]],
    - 925                'Zr': ['Zirconium', ['94Zr', '92Zr', '91Zr', '96Zr']],
    - 926                'Nb': ['Niobium', [None]],
    - 927                'Mo': ['Molybdenum', ['96Mo', '95Mo', '92Mo', '100Mo', '97Mo', '94Mo']],
    - 928                'Tc': ['Technetium', [None]], #consider removing
    - 929                'Ru': ['Ruthenium', ['104Ru', '101Ru', '99Ru', '100Ru', '96Ru', '98Ru']],
    - 930                'Rh': ['Rhodium', [None]],
    - 931                'Pd': ['Palladium', ['108Pd', '105Pd', '110Pd', '104Pd', '102Pd']],
    - 932                'Ag': ['Silver', ['109Ag']],
    - 933                'Cd': ['Cadmium', ['114Cd', '111Cd', '110Cd', '113Cd', '116Cd', '106Cd', '108Cd']],
    - 934                'In': ['Indium', ['113In']],
    - 935                'Sn': ['Tin', ['118Sn', '116Sn', '119Sn', '117Sn', '124Sn', '122Sn', '112Sn','114Sn', '115Sn']],
    - 936                'Sb': ['Antimony', ['123Sb']],
    - 937                'Te': ['Tellurium', ['128Te', '126Te', '125Te', '124Te', '122Te','123Te','120Te']],
    - 938                'I': ['Iodine', [None]],
    - 939                'Xe': ['Xenon', ['129Xe','131Xe', '134Xe', '136Xe', '130Xe', '128Xe']],
    - 940
    - 941                'Cs': ['Cesium', [None]],
    - 942                'Ba': ['Barium', ['137Ba', '136Ba', '135Ba', '134Ba']],
    - 943                'La': ['Lanthanum',['138La']],
    - 944                'Hf': ['Hafnium', ['178Hf', '177Hf', '179Hf', '176Hf']],
    - 945                'Ta': ['Tantalum', ['180Ta']],
    - 946                'W': ['Tungsten', ['186W', '182W', '183W']],
    - 947                'Re': ['Rhenium', ['185Re']],
    - 948                'Os': ['Osmium', ['190Os', '189Os', '188Os', '187Os', '186Os']],
    - 949                'Ir': ['Iridium', ['191Ir']],
    - 950                'Pt': ['Platinum', ['194Pt', '196Pt', '198Pt', '192Pt']],
    - 951                'Au': ['Gold', [None]],
    - 952                'Hg': ['Mercury', ['200Hg', '199Hg', '201Hg', '198Hg', '204Hg']],
    - 953                'Tl': ['Thallium', ['203Tl']],
    - 954                'Pb': ['Lead', ['206Pb', '207Pb', '204Pb']],
    - 955                'Bi': ['Bismuth', [None]],
    - 956                'Po': ['Polonium', [None]],
    - 957                'At': ['Astatine', [None]],
    - 958                'Rn': ['Radon', [None]],
    - 959
    - 960                'Fr': ['Francium', [None]],
    - 961                'Ra': ['Radium', [None]],
    - 962                'Ac': ['Actinium', [None]],
    - 963                'Rf': ['Rutherfordium', [None]],
    - 964                'Db': ['Dubnium', [None]],
    - 965                'Sg': ['Seaborgium', [None]],
    - 966                'Bh': ['Bohrium', [None]],
    - 967                'Hs': ['Hassium', [None]],
    - 968                'Mt': ['Meitnerium', [None]],
    - 969                'Ds': ['Darmstadtium', [None]],
    - 970                'Rg': ['Roentgenium', [None]],
    - 971                'Cn': ['Copernicium', [None]],
    - 972                'Nh': ['Nihonium', [None]],
    - 973                'Fl': ['Flerovium', [None]],
    - 974                'Mc': ['Moscovium', [None]],
    - 975                'Lv': ['Livermorium', [None]],
    - 976                'Ts': ['Tennessine', [None]],
    - 977                'Og': ['Oganesson', [None]],
    - 978
    - 979                'Ce': ['Cerium', ['142Ce','138Ce''136Ce']],
    - 980                'Pr': ['Praseodymium', [None]],
    - 981                'Nd': ['Neodymium', [None]],
    - 982                'Pm': ['Promethium', [None]],
    - 983                'Sm': ['Samarium', [None]],
    - 984                'Eu': ['Europium', [None]],
    - 985                'Gd': ['Gadolinium', [None]],
    - 986                'Tb': ['Terbium', [None]],
    - 987                'Dy': ['Dysprosium', [None]],
    - 988                'Ho': ['Holmium', [None]],
    - 989                'Er': ['Erbium', [None]],
    - 990                'Tm': ['Thulium', [None]],
    - 991                'Yb': ['Ytterbium', [None]],
    - 992                'Lu': ['Lutetium', ['176Lu']],
    - 993
    - 994                'Th': ['Thorium', [None]],
    - 995                'Pa': ['Protactinium', [None]],
    - 996                'U': ['Uranium', ['235U','234U']],
    - 997                'Np': ['Neptunium', [None]],
    - 998                'Pu': ['Plutonium', [None]],
    - 999                'Am': ['Americium', [None]],
    -1000                'Cm': ['Curium', [None]],
    -1001                'Bk': ['Berkelium', [None]],
    -1002                'Cf': ['Californium', [None]],
    -1003                'Es': ['Einsteinium', [None]],
    -1004                'Fm': ['Fermium', [None]],
    -1005                'Md': ['Mendelevium', [None]],
    -1006                'No': ['Nobelium', [None]],
    -1007                'Lr': ['Lawrencium', [None]]
    -1008                }
    +            
      47class Atoms:  # pragma: no cover
    +  48    """Class for Atoms in CoreMS
    +  49
    +  50    This class includes key properties of atoms (and the electron) and isotopes, including their exact masses, relative abundances, and covalences.
    +  51    It also associates which isotopes are for the same element, and provides an ordering of elements.
    +  52
    +  53    IUPAC definition of monoisotopic mass is based on the most abundant isotopes of each element present.
    +  54    Here, we will use atom symbols with isotope numbers for all isotopes excluding the most abundant one.
    +  55    This list has been corrected up to Iodine.
    +  56
    +  57    References
    +  58    ----------
    +  59
    +  60    1. NIST - Last Accessed 2019-06-12
    +  61    https://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses
    +  62
    +  63    """
    +  64
    +  65    electron_mass = 0.0005_485_799_090_65  # NIST value
    +  66
    +  67    atomic_masses = {
    +  68        "H": 1.00782503223,
    +  69        "D": 2.01410177812,
    +  70        "T": 3.0160492779,  # consider removing
    +  71        "3He": 3.0160293201,
    +  72        "He": 4.00260325413,
    +  73        "6Li": 6.0151228874,
    +  74        "Li": 7.0160034366,
    +  75        "Be": 9.012183065,
    +  76        "10B": 10.01293695,
    +  77        "B": 11.00930536,
    +  78        "C": 12.0,
    +  79        "13C": 13.00335483507,
    +  80        "14C": 14.0032419884,
    +  81        "N": 14.00307400443,
    +  82        "15N": 15.00010889888,
    +  83        "O": 15.99491461957,
    +  84        "17O": 16.99913175650,
    +  85        "18O": 17.99915961286,
    +  86        "F": 18.99840316273,
    +  87        "Ne": 19.9924401762,
    +  88        "21Ne": 20.993846685,
    +  89        "22Ne": 21.991385114,
    +  90        "Na": 22.9897692820,
    +  91        "Mg": 23.985041697,
    +  92        "25Mg": 24.985836976,
    +  93        "26Mg": 25.982592968,
    +  94        "Al": 26.98153853,
    +  95        "Si": 27.97692653465,
    +  96        "29Si": 28.97649466490,
    +  97        "30Si": 29.973770136,
    +  98        "P": 30.97376199842,
    +  99        "S": 31.9720711744,
    + 100        "33S": 32.9714589098,
    + 101        "34S": 33.967867004,
    + 102        "36S": 35.96708071,
    + 103        "Cl": 34.968852682,
    + 104        "37Cl": 36.965902602,
    + 105        "36Ar": 35.967545105,
    + 106        "38Ar": 37.96273211,
    + 107        "Ar": 39.9623831237,
    + 108        "K": 38.9637064864,
    + 109        "40K": 39.963998166,
    + 110        "41K": 40.9618252579,
    + 111        "Ca": 39.962590863,
    + 112        "42Ca": 41.95861783,
    + 113        "43Ca": 42.95876644,
    + 114        "44Ca": 43.95548156,
    + 115        "46Ca": 45.9536890,
    + 116        "48Ca": 47.95252276,
    + 117        "Sc": 44.95590828,
    + 118        "46Ti": 45.95262772,
    + 119        "47Ti": 46.95175879,
    + 120        "Ti": 47.94794198,
    + 121        "49Ti": 48.94786568,
    + 122        "50Ti": 49.94478689,
    + 123        "50V": 49.94715601,
    + 124        "V": 50.94395704,
    + 125        "50Cr": 49.94604183,
    + 126        "Cr": 51.94050623,
    + 127        "53Cr": 52.94064815,
    + 128        "54Cr": 53.93887916,
    + 129        "Mn": 54.93804391,
    + 130        "54Fe": 53.93960899,
    + 131        "Fe": 55.93493633,
    + 132        "57Fe": 56.93539284,
    + 133        "58Fe": 57.93327443,
    + 134        "Co": 58.93319429,
    + 135        "Ni": 57.93534241,
    + 136        "60Ni": 59.93078588,
    + 137        "61Ni": 60.93105557,
    + 138        "62Ni": 61.92834537,
    + 139        "64Ni": 63.92796682,
    + 140        "Cu": 62.92959772,
    + 141        "65Cu": 64.92778970,
    + 142        "Zn": 63.92914201,
    + 143        "66Zn": 65.92603381,
    + 144        "67Zn": 66.92712775,
    + 145        "68Zn": 67.92484455,
    + 146        "70Zn": 69.9253192,
    + 147        "Ga": 68.9255735,
    + 148        "71Ga": 70.92470258,
    + 149        "70Ge": 69.92424875,
    + 150        "72Ge": 71.922075826,
    + 151        "73Ge": 72.923458956,
    + 152        "Ge": 73.921177761,
    + 153        "76Ge": 75.921402726,
    + 154        "As": 74.92159457,
    + 155        "74Se": 73.922475934,
    + 156        "76Se": 75.919213704,
    + 157        "77Se": 76.919914154,
    + 158        "78Se": 77.91730928,
    + 159        "Se": 79.9165218,
    + 160        "82Se": 81.9166995,
    + 161        "Br": 78.9183376,
    + 162        "81Br": 80.9162897,
    + 163        "78Kr": 77.92036494,
    + 164        "80Kr": 79.91637808,
    + 165        "82Kr": 81.91348273,
    + 166        "83Kr": 82.91412716,
    + 167        "Kr": 83.9114977282,
    + 168        "86Kr": 85.9106106269,
    + 169        "Rb": 84.9117897379,
    + 170        "87Rb": 86.9091805310,
    + 171        "84Sr": 83.9134191,
    + 172        "86Sr": 85.9092606,
    + 173        "87Sr": 86.9088775,
    + 174        "Sr": 87.9056125,
    + 175        "Y": 88.9058403,
    + 176        "Zr": 89.9046977,
    + 177        "91Zr": 90.9056396,
    + 178        "92Zr": 91.9050347,
    + 179        "94Zr": 93.9063108,
    + 180        "96Zr": 95.9082714,
    + 181        "Nb": 92.9063730,
    + 182        "92Mo": 91.90680796,
    + 183        "94Mo": 93.90508490,
    + 184        "95Mo": 94.90583877,
    + 185        "96Mo": 95.90467612,
    + 186        "97Mo": 96.90601812,
    + 187        "Mo": 97.90540482,
    + 188        "100Mo": 99.9074718,
    + 189        #'97Tc': 96.9063667,
    + 190        #'98Tc': 97.9072124,
    + 191        "Tc": 98.9062508,  # consider removing technetium as it is radioactive
    + 192        "96Ru": 95.90759025,
    + 193        "98Ru": 97.9052868,
    + 194        "99Ru": 98.9059341,
    + 195        "100Ru": 99.9042143,
    + 196        "101Ru": 100.9055769,
    + 197        "Ru": 101.9043441,
    + 198        "104Ru": 103.9054275,
    + 199        "Rh": 102.9054980,
    + 200        "102Pd": 101.9056022,
    + 201        "104Pd": 103.9040305,
    + 202        "105Pd": 104.9050796,
    + 203        "Pd": 105.9034804,
    + 204        "108Pd": 107.9038916,
    + 205        "110Pd": 109.9051722,
    + 206        "Ag": 106.9050916,
    + 207        "109Ag": 108.9047553,
    + 208        "106Cd": 105.9064599,
    + 209        "108Cd": 107.9041834,
    + 210        "110Cd": 109.90300661,
    + 211        "111Cd": 110.90418287,
    + 212        "Cd": 111.90276287,
    + 213        "113Cd": 112.90440813,
    + 214        "114Cd": 113.90336509,
    + 215        "116Cd": 115.90476315,
    + 216        "113In": 112.90406184,
    + 217        "In": 114.903878776,
    + 218        "112Sn": 111.90482387,
    + 219        "114Sn": 113.9027827,
    + 220        "115Sn": 114.903344699,
    + 221        "116Sn": 115.90174280,
    + 222        "117Sn": 116.90295398,
    + 223        "118Sn": 117.90160657,
    + 224        "119Sn": 118.90331117,
    + 225        "Sn": 119.90220163,
    + 226        "122Sn": 121.9034438,
    + 227        "124Sn": 123.9052766,
    + 228        "Sb": 120.9038120,
    + 229        "123Sb": 122.9042132,
    + 230        "120Te": 119.9040593,
    + 231        "122Te": 121.9030435,
    + 232        "123Te": 122.9042698,
    + 233        "124Te": 123.9028171,
    + 234        "125Te": 124.9044299,
    + 235        "126Te": 125.9033109,
    + 236        "128Te": 127.90446128,
    + 237        "Te": 129.906222748,
    + 238        "I": 126.9044719,
    + 239        "124Xe": 123.9058920,
    + 240        "126Xe": 125.9042983,
    + 241        "128Xe": 127.9035310,
    + 242        "129Xe": 128.9047808611,
    + 243        "130Xe": 129.903509349,
    + 244        "131Xe": 130.90508406,
    + 245        "Xe": 131.9041550856,
    + 246        "134Xe": 133.90539466,
    + 247        "136Xe": 135.907214484,
    + 248        "Cs": 132.9054519610,
    + 249        "130Ba": 129.9063207,
    + 250        "132Ba": 131.9050611,
    + 251        "134Ba": 133.90450818,
    + 252        "135Ba": 134.90568838,
    + 253        "136Ba": 135.90457573,
    + 254        "137Ba": 136.90582714,
    + 255        "Ba": 137.90524700,
    + 256        "138La": 137.9071149,
    + 257        "La": 138.9063563,
    + 258        "136Ce": 135.90712921,
    + 259        "138Ce": 137.905991,
    + 260        "Ce": 139.9054431,
    + 261        "142Ce": 141.9092504,
    + 262        "Pr": 140.9076576,
    + 263        "Nd": 141.9077290,
    + 264        "143Nd": 142.9098200,
    + 265        "144Nd": 143.9100930,
    + 266        "145Nd": 144.9125793,
    + 267        "146Nd": 145.9131226,
    + 268        "148Nd": 147.9168993,
    + 269        "150Nd": 149.9209022,
    + 270        "145Pm": 144.9127559,
    + 271        "147Pm": 146.9151450,
    + 272        "144Sm": 143.9120065,
    + 273        "147Sm": 146.9149044,
    + 274        "148Sm": 147.9148292,
    + 275        "149Sm": 148.9171921,
    + 276        "150Sm": 149.9172829,
    + 277        "Sm": 151.9197397,
    + 278        "154Sm": 153.9222169,
    + 279        "151Eu": 150.9198578,
    + 280        "Eu": 152.9212380,
    + 281        "152Gd": 151.9197995,
    + 282        "154Gd": 153.9208741,
    + 283        "155Gd": 154.9226305,
    + 284        "156Gd": 155.9221312,
    + 285        "157Gd": 156.9239686,
    + 286        "Gd": 157.9241123,
    + 287        "160Gd": 159.9270624,
    + 288        "Tb": 158.9253547,
    + 289        "156Dy": 155.9242847,
    + 290        "158Dy": 157.9244159,
    + 291        "160Dy": 159.9252046,
    + 292        "161Dy": 160.9269405,
    + 293        "162Dy": 161.9268056,
    + 294        "163Dy": 162.9287383,
    + 295        "Dy": 163.9291819,
    + 296        "Ho": 164.9303288,
    + 297        "162Er": 161.9287884,
    + 298        "164Er": 163.9292088,
    + 299        "Er": 165.9302995,
    + 300        "167Er": 166.9320546,
    + 301        "168Er": 167.9323767,
    + 302        "170Er": 169.9354702,
    + 303        "Tm": 168.9342179,
    + 304        "168Yb": 167.9338896,
    + 305        "170Yb": 169.9347664,
    + 306        "171Yb": 170.9363302,
    + 307        "172Yb": 171.9363859,
    + 308        "173Yb": 172.9382151,
    + 309        "Yb": 173.9388664,
    + 310        "176Yb": 175.9425764,
    + 311        "Lu": 174.9407752,
    + 312        "176Lu": 175.9426897,
    + 313        "174Hf": 173.9400461,
    + 314        "176Hf": 175.9414076,
    + 315        "177Hf": 176.9432277,
    + 316        "178Hf": 177.9437058,
    + 317        "179Hf": 178.9458232,
    + 318        "Hf": 179.9465570,
    + 319        "180Ta": 179.9474648,
    + 320        "Ta": 180.9479958,
    + 321        "180W": 179.9467108,
    + 322        "182W": 181.94820394,
    + 323        "183W": 182.95022275,
    + 324        "W": 183.95093092,
    + 325        "186W": 185.9543628,
    + 326        "185Re": 184.9529545,
    + 327        "Re": 186.9557501,
    + 328        "184Os": 183.9524885,
    + 329        "186Os": 185.9538350,
    + 330        "187Os": 186.9557474,
    + 331        "188Os": 187.9558352,
    + 332        "189Os": 188.9581442,
    + 333        "190Os": 189.9584437,
    + 334        "192Os": 191.9614770,
    + 335        "191Ir": 190.9605893,
    + 336        "Ir": 192.9629216,
    + 337        "190Pt": 189.9599297,
    + 338        "192Pt": 191.9610387,
    + 339        "194Pt": 193.9626809,
    + 340        "Pt": 194.9647917,
    + 341        "196Pt": 195.96495209,
    + 342        "198Pt": 197.9678949,
    + 343        "Au": 196.96656879,
    + 344        "196Hg": 195.9658326,
    + 345        "198Hg": 197.96676860,
    + 346        "199Hg": 198.96828064,
    + 347        "200Hg": 199.96832659,
    + 348        "201Hg": 200.97030284,
    + 349        "Hg": 201.97064340,
    + 350        "204Hg": 203.97349398,
    + 351        "203Tl": 202.9723446,
    + 352        "Tl": 204.9744278,
    + 353        "204Pb": 203.9730440,
    + 354        "206Pb": 205.9744657,
    + 355        "207Pb": 206.9758973,
    + 356        "Pb": 207.9766525,
    + 357        "Bi": 208.9803991,
    + 358        "209Po": 208.9824308,
    + 359        "210Po": 209.9828741,
    + 360        "210At": 209.9871479,
    + 361        "211At": 210.9874966,
    + 362        "211Rn": 210.9906011,
    + 363        "220Rn": 220.0113941,
    + 364        "222Rn": 222.0175782,
    + 365        "223Fr": 223.0197360,
    + 366        "223Ra": 223.0185023,
    + 367        "224Ra": 224.0202120,
    + 368        "226Ra": 226.0254103,
    + 369        "228Ra": 228.0310707,
    + 370        "227Ac": 227.0277523,
    + 371        "230Th": 230.0331341,
    + 372        "Th": 232.0380558,
    + 373        "Pa": 231.0358842,
    + 374        "233U": 233.0396355,
    + 375        "234U": 234.0409523,
    + 376        "235U": 235.0439301,
    + 377        "236U": 236.0455682,
    + 378        "U": 238.0507884,
    + 379        "236Np": 236.046570,
    + 380        "237Np": 237.0481736,
    + 381        "238Pu": 238.0495601,
    + 382        "239Pu": 239.0521636,
    + 383        "240Pu": 240.0538138,
    + 384        "241Pu": 241.0568517,
    + 385        "242Pu": 242.0587428,
    + 386        "244Pu": 244.0642053,
    + 387        "241Am": 241.0568293,
    + 388        "243Am": 243.0613813,
    + 389        "243Cm": 243.0613893,
    + 390        "244Cm": 244.0627528,
    + 391        "245Cm": 245.0654915,
    + 392        "246Cm": 246.0672238,
    + 393        "247Cm": 247.0703541,
    + 394        "248Cm": 248.0723499,
    + 395        "247Bk": 247.0703073,
    + 396        "249Bk": 249.0749877,
    + 397        "249Cf": 249.0748539,
    + 398        "250Cf": 250.0764062,
    + 399        "251Cf": 251.0795886,
    + 400        "252Cf": 252.0816272,
    + 401        "252Es": 252.082980,
    + 402        "257Fm": 257.0951061,
    + 403        "258Md": 258.0984315,
    + 404        "260Md": 260.10365,
    + 405        "259No": 259.10103,
    + 406        "262Lr": 262.10961,
    + 407        "267Rf": 267.12179,
    + 408        "268Db": 268.12567,
    + 409        "271Sg": 271.13393,
    + 410        "272Bh": 272.13826,
    + 411        "270Hs": 270.13429,
    + 412        "276Mt": 276.15159,
    + 413        "281Ds": 281.16451,
    + 414        "280Rg": 280.16514,
    + 415        "285Cn": 285.17712,
    + 416        "284Nh": 284.17873,
    + 417        "289Fl": 289.19042,
    + 418        "288Mc": 288.19274,
    + 419        "293Lv": 293.20449,
    + 420        "292Ts": 292.20746,
    + 421        "294Og": 294.21392,
    + 422    }
    + 423
    + 424    # This list orders atoms, starting with the most abundant isotopes first, ordered as described.
    + 425    # Less abundant isotopes are ordered by mass at the end of the list.
    + 426    atoms_order = [
    + 427        "C",
    + 428        "H",
    + 429        "O",
    + 430        "N",
    + 431        "P",
    + 432        "S",  # CHONPS
    + 433        "F",
    + 434        "Cl",
    + 435        "Br",
    + 436        "I",
    + 437        "At",  # Halogens
    + 438        "Li",
    + 439        "Na",
    + 440        "K",
    + 441        "Rb",
    + 442        "Cs",
    + 443        "Fr",  # Alkali
    + 444        "He",
    + 445        "Ne",
    + 446        "Ar",
    + 447        "Kr",
    + 448        "Xe",
    + 449        "Rn",  # Noble gasses
    + 450        "Be",
    + 451        "B",  # Row 2
    + 452        "Mg",
    + 453        "Al",
    + 454        "Si",  # Row 3
    + 455        "Ca",
    + 456        "Sc",
    + 457        "Ti",
    + 458        "V",
    + 459        "Cr",
    + 460        "Mn",
    + 461        "Fe",
    + 462        "Co",
    + 463        "Ni",
    + 464        "Cu",
    + 465        "Zn",
    + 466        "Ga",
    + 467        "Ge",
    + 468        "As",
    + 469        "Se",  # Row 4
    + 470        "Sr",
    + 471        "Y",
    + 472        "Zr",
    + 473        "Nb",
    + 474        "Mo",
    + 475        "Tc",
    + 476        "Ru",
    + 477        "Rh",
    + 478        "Pd",
    + 479        "Ag",
    + 480        "Cd",
    + 481        "In",
    + 482        "Sn",
    + 483        "Sb",
    + 484        "Te",  # Row 5
    + 485        "Ba",
    + 486        "La",
    + 487        "Hf",
    + 488        "Ta",
    + 489        "W",
    + 490        "Re",
    + 491        "Os",
    + 492        "Ir",
    + 493        "Pt",
    + 494        "Au",
    + 495        "Hg",
    + 496        "Tl",
    + 497        "Pb",
    + 498        "Bi",
    + 499        "Po",  # Row 6
    + 500        "Ra",
    + 501        "Ac",
    + 502        "Rf",
    + 503        "Db",
    + 504        "Sg",
    + 505        "Bh",
    + 506        "Hs",
    + 507        "Mt",
    + 508        "Ds",
    + 509        "Rg",
    + 510        "Cn",
    + 511        "Nh",
    + 512        "Fl",
    + 513        "Mc",
    + 514        "Lv",
    + 515        "Ts",
    + 516        "Og",  # Row 7
    + 517        "Ce",
    + 518        "Pr",
    + 519        "Nd",
    + 520        "Pm",
    + 521        "Sm",
    + 522        "Eu",
    + 523        "Gd",
    + 524        "Tb",
    + 525        "Dy",
    + 526        "Ho",
    + 527        "Er",
    + 528        "Tm",
    + 529        "Yb",
    + 530        "Lu",  # Lanthanides
    + 531        "Th",
    + 532        "Pa",
    + 533        "U",
    + 534        "Np",
    + 535        "Pu",
    + 536        "Am",
    + 537        "Cm",
    + 538        "Bk",
    + 539        "Cf",
    + 540        "Es",
    + 541        "Fm",
    + 542        "Md",
    + 543        "No",
    + 544        "Lr",  # Actinides
    + 545        # Less abundant isotopes follow
    + 546        "D",
    + 547        "6Li",
    + 548        "10B",
    + 549        "13C",
    + 550        "15N",
    + 551        "17O",
    + 552        "18O",
    + 553        "22Ne",
    + 554        "25Mg",
    + 555        "26Mg",
    + 556        "29Si",
    + 557        "30Si",
    + 558        "33S",
    + 559        "34S",
    + 560        "36S",
    + 561        "37Cl",
    + 562        "40Ca",
    + 563        "41K",
    + 564        "44Ca",
    + 565        "46Ti",
    + 566        "47Ti",
    + 567        "49Ti",
    + 568        "50Cr",
    + 569        "50Ti",
    + 570        "50V",
    + 571        "53Cr",
    + 572        "54Cr",
    + 573        "54Fe",
    + 574        "57Fe",
    + 575        "58Fe",
    + 576        "60Ni",
    + 577        "61Ni",
    + 578        "62Ni",
    + 579        "65Cu",
    + 580        "66Zn",
    + 581        "67Zn",
    + 582        "68Zn",
    + 583        "70Ge",
    + 584        "71Ga",
    + 585        "72Ge",
    + 586        "73Ge",
    + 587        "76Ge",
    + 588        "76Se",
    + 589        "77Se",
    + 590        "78Se",
    + 591        "81Br",
    + 592        "80Kr",
    + 593        "82Kr",
    + 594        "82Se",
    + 595        "83Kr",
    + 596        "85Rb",
    + 597        "86Kr",
    + 598        "86Sr",
    + 599        "87Rb",
    + 600        "87Sr",
    + 601        "88Sr",
    + 602        "91Zr",
    + 603        "92Mo",
    + 604        "92Zr",
    + 605        "94Mo",
    + 606        "94Zr",
    + 607        "95Mo",
    + 608        "96Mo",
    + 609        "96Ru",
    + 610        "96Zr",
    + 611        "97Mo",
    + 612        "98Ru",
    + 613        "99Ru",
    + 614        "100Mo",
    + 615        "100Ru",
    + 616        "101Ru",
    + 617        "102Pd",
    + 618        "104Pd",
    + 619        "104Ru",
    + 620        "105Pd",
    + 621        "106Cd",
    + 622        "106Pd",
    + 623        "108Cd",
    + 624        "108Pd",
    + 625        "109Ag",
    + 626        "110Cd",
    + 627        "110Pd",
    + 628        "111Cd",
    + 629        "112Cd",
    + 630        "112Sn",
    + 631        "113Cd",
    + 632        "113In",
    + 633        "114Cd",
    + 634        "114Sn",
    + 635        "115In",
    + 636        "115Sn",
    + 637        "116Cd",
    + 638        "116Sn",
    + 639        "117Sn",
    + 640        "118Sn",
    + 641        "119Sn",
    + 642        "120Sn",
    + 643        "120Te",
    + 644        "121Sb",
    + 645        "122Sn",
    + 646        "122Te",
    + 647        "123Sb",
    + 648        "123Te",
    + 649        "124Sn",
    + 650        "124Te",
    + 651        "124Xe",
    + 652        "125Te",
    + 653        "126Te",
    + 654        "126Xe",
    + 655        "128Te",
    + 656        "128Xe",
    + 657        "129Xe",
    + 658        "130Ba",
    + 659        "130Te",
    + 660        "130Xe",
    + 661        "131Xe",
    + 662        "132Ba",
    + 663        "132Xe",
    + 664        "134Ba",
    + 665        "134Xe",
    + 666        "135Ba",
    + 667        "136Ba",
    + 668        "136Xe",
    + 669        "137Ba",
    + 670        "138Ba",
    + 671        "174Hf",
    + 672        "176Hf",
    + 673        "177Hf",
    + 674        "178Hf",
    + 675        "179Hf",
    + 676        "180Hf",
    + 677        "180W",
    + 678        "182W",
    + 679        "183W",
    + 680        "184Os",
    + 681        "184W",
    + 682        "185Re",
    + 683        "186Os",
    + 684        "186W",
    + 685        "187Os",
    + 686        "187Re",
    + 687        "188Os",
    + 688        "189Os",
    + 689        "190Os",
    + 690        "190Pt",
    + 691        "191Ir",
    + 692        "192Ir",
    + 693        "192Os",
    + 694        "192Pt",
    + 695        "194Pt",
    + 696        "195Pt",
    + 697        "196Hg",
    + 698        "196Pt",
    + 699        "198Hg",
    + 700        "198Pt",
    + 701        "199Hg",
    + 702        "200Hg",
    + 703        "201Hg",
    + 704        "202Hg",
    + 705        "203Tl",
    + 706        "204Hg",
    + 707        "204Pb",
    + 708        "205Tl",
    + 709        "206Pb",
    + 710        "207Pb",
    + 711        "208Pb",
    + 712    ]
    + 713
    + 714    atoms_covalence = {
    + 715        "C": (4),
    + 716        "13C": (4),
    + 717        "N": (3),
    + 718        "O": (2),
    + 719        "S": (2),
    + 720        "H": (1),
    + 721        "F": (1, 0),
    + 722        "Cl": (1, 0),
    + 723        "Br": (1, 0),
    + 724        "I": (1, 0),
    + 725        "At": (1),
    + 726        "Li": (1, 0),
    + 727        "Na": (1, 0),
    + 728        "K": (1, 0),
    + 729        "Rb": (1),
    + 730        "Cs": (1),
    + 731        "Fr": (1),
    + 732        "B": (4, 3, 2, 1),
    + 733        "In": (3, 2, 1),
    + 734        "Al": (3, 1, 2),
    + 735        "P": (3, 5, 4, 2, 1),
    + 736        "Ga": (3, 1, 2),
    + 737        "Mg": (2, 1),
    + 738        "Be": (2, 1),
    + 739        "Ca": (2, 1),
    + 740        "Sr": (2, 1),
    + 741        "Ba": (2),
    + 742        "Ra": (2),
    + 743        "V": (5, 4, 3, 2, 1),
    + 744        "Fe": (3, 2, 4, 5, 6),
    + 745        "Si": (4, 3, 2),
    + 746        "Sc": (3, 2, 1),
    + 747        "Ti": (4, 3, 2, 1),
    + 748        "Cr": (1, 2, 3, 4, 5, 6),
    + 749        "Mn": (1, 2, 3, 4, 5, 6, 7),
    + 750        "Co": (1, 2, 3, 4, 5),
    + 751        "Ni": (1, 2, 3, 4),
    + 752        "Cu": (2, 1, 3, 4),
    + 753        "Zn": (2, 1),
    + 754        "Ge": (4, 3, 2, 1),
    + 755        "As": (5, 3, 2, 1),
    + 756        "Se": (6, 4, 2, 1),
    + 757        "Y": (3, 2, 1),
    + 758        "Zr": (4, 3, 2, 1),
    + 759        "Nb": (5, 4, 3, 2, 1),
    + 760        "Mo": (6, 5, 4, 3, 2, 1),
    + 761        "Tc": (7, 6, 5, 4, 3, 2, 1),
    + 762        "Ru": (8, 7, 6, 5, 4, 3, 2, 1),
    + 763        "Rh": (6, 5, 4, 3, 2, 1),
    + 764        "Pd": (4, 2, 1),
    + 765        "Ag": (0, 1, 2, 3, 4),
    + 766        "Cd": (2, 1),
    + 767        "Sn": (4, 2),
    + 768        "Sb": (5, 3),
    + 769        "Te": (6, 5, 4, 2),
    + 770        "La": (3, 2),
    + 771        "Hf": (4, 3, 2),
    + 772        "Ta": (5, 4, 3, 2),
    + 773        "W": (6, 5, 4, 3, 2, 1),
    + 774        "Re": (4, 7, 6, 5, 3, 2, 1),
    + 775        "Os": (4, 8, 7, 6, 5, 3, 2, 1),
    + 776        "Ir": (4, 8, 6, 5, 3, 2, 1),
    + 777        "Pt": (4, 6, 5, 3, 2, 1),
    + 778        "Au": (3, 5, 2, 1),
    + 779        "Hg": (1, 2, 4),
    + 780        "Tl": (3, 1),
    + 781        "Pb": (4, 2),
    + 782        "Bi": (3, 1, 5),
    + 783        "Po": (2, 4, 6),
    + 784        "Ac": (3, 2),
    + 785    }
    + 786
    + 787    isotopic_abundance = {
    + 788        "H": 0.999885,
    + 789        "D": 0.000115,
    + 790        "T": 0,  # Consider removing.
    + 791        "3He": 0.00000134,
    + 792        "He": 0.99999866,
    + 793        "6Li": 0.0759,
    + 794        "Li": 0.9241,
    + 795        "Be": 1.0,
    + 796        "10B": 0.199,
    + 797        "B": 0.801,
    + 798        "C": 0.9893,
    + 799        "13C": 0.0107,
    + 800        "14C": 0,
    + 801        "N": 0.99636,
    + 802        "15N": 0.00364,
    + 803        "O": 0.99757,
    + 804        "17O": 0.00038,
    + 805        "18O": 0.00205,
    + 806        "F": 1.0,
    + 807        "Ne": 0.9048,
    + 808        "21Ne": 0.0027,
    + 809        "22Ne": 0.0925,
    + 810        "Na": 1.0,
    + 811        "Mg": 0.7899,
    + 812        "25Mg": 0.1000,
    + 813        "26Mg": 0.1101,
    + 814        "Al": 1.0,
    + 815        "Si": 0.92223,
    + 816        "29Si": 0.04685,
    + 817        "30Si": 0.03092,
    + 818        "P": 1.0,
    + 819        "S": 0.9499,
    + 820        "33S": 0.0075,
    + 821        "34S": 0.0425,
    + 822        "36S": 0.0001,
    + 823        "Cl": 0.7576,
    + 824        "37Cl": 0.2424,
    + 825        "36Ar": 0.003336,
    + 826        "38Ar": 0.000629,
    + 827        "Ar": 0.996035,
    + 828        "K": 0.932581,
    + 829        "40K": 0.000117,
    + 830        "41K": 0.067302,
    + 831        "Ca": 0.96941,
    + 832        "42Ca": 0.00647,
    + 833        "43Ca": 0.00135,
    + 834        "44Ca": 0.02086,
    + 835        "46Ca": 0.00004,
    + 836        "48Ca": 0.001872,
    + 837        "Sc": 1.0,
    + 838        "46Ti": 0.0825,
    + 839        "47Ti": 0.0744,
    + 840        "Ti": 0.7372,
    + 841        "49Ti": 0.0541,
    + 842        "50Ti": 0.0518,
    + 843        "50V": 0.00250,
    + 844        "V": 0.9975,
    + 845        "50Cr": 0.04345,
    + 846        "Cr": 0.83789,
    + 847        "53Cr": 0.09501,
    + 848        "54Cr": 0.02365,
    + 849        "Mn": 1.0,
    + 850        "54Fe": 0.05845,
    + 851        "Fe": 0.91754,
    + 852        "57Fe": 0.02119,
    + 853        "58Fe": 0.00282,
    + 854        "Co": 1.0,
    + 855        "Ni": 0.68077,
    + 856        "60Ni": 0.26223,
    + 857        "61Ni": 0.011399,
    + 858        "62Ni": 0.036346,
    + 859        "64Ni": 0.009255,
    + 860        "Cu": 0.6915,
    + 861        "65Cu": 0.3085,
    + 862        "Zn": 0.4917,
    + 863        "66Zn": 0.2773,
    + 864        "67Zn": 0.0404,
    + 865        "68Zn": 0.1845,
    + 866        "70Zn": 0.0061,
    + 867        "Ga": 0.60108,
    + 868        "71Ga": 0.39892,
    + 869        "70Ge": 0.2057,
    + 870        "72Ge": 0.2745,
    + 871        "73Ge": 0.0775,
    + 872        "Ge": 0.3650,
    + 873        "76Ge": 0.0773,
    + 874        "As": 1.0,
    + 875        "74Se": 0.0089,
    + 876        "76Se": 0.0937,
    + 877        "77Se": 0.0763,
    + 878        "78Se": 0.2377,
    + 879        "Se": 0.4961,
    + 880        "82Se": 0.0873,
    + 881        "Br": 0.5069,
    + 882        "81Br": 0.4931,
    + 883        "78Kr": 0.00355,
    + 884        "80Kr": 0.02286,
    + 885        "82Kr": 0.11593,
    + 886        "83Kr": 0.11500,
    + 887        "Kr": 0.56987,
    + 888        "86Kr": 0.17279,
    + 889        "Rb": 0.7217,
    + 890        "87Rb": 0.2783,
    + 891        "84Sr": 0.0056,
    + 892        "86Sr": 0.0986,
    + 893        "87Sr": 0.0700,
    + 894        "Sr": 0.8258,
    + 895        "Y": 1.0,
    + 896        "Zr": 0.5145,
    + 897        "91Zr": 0.1122,
    + 898        "92Zr": 0.1715,
    + 899        "94Zr": 0.1738,
    + 900        "96Zr": 0.0280,
    + 901        "Nb": 1.0,
    + 902        "92Mo": 0.1453,
    + 903        "94Mo": 0.0915,
    + 904        "95Mo": 0.1584,
    + 905        "96Mo": 0.1667,
    + 906        "97Mo": 0.0960,
    + 907        "Mo": 0.2439,
    + 908        "100Mo": 0.0982,
    + 909        "99Tc": 0,  # consider removing
    + 910        "96Ru": 0.0554,
    + 911        "98Ru": 0.0187,
    + 912        "99Ru": 0.1276,
    + 913        "100Ru": 0.1260,
    + 914        "101Ru": 0.1706,
    + 915        "Ru": 0.3155,
    + 916        "104Ru": 0.1862,
    + 917        "Rh": 1.0,
    + 918        "102Pd": 0.0102,
    + 919        "104Pd": 0.1114,
    + 920        "105Pd": 0.2233,
    + 921        "Pd": 0.2733,
    + 922        "108Pd": 0.2646,
    + 923        "110Pd": 0.1172,
    + 924        "Ag": 0.51839,
    + 925        "109Ag": 0.48161,
    + 926        "106Cd": 0.0125,
    + 927        "108Cd": 0.0089,
    + 928        "110Cd": 0.1249,
    + 929        "111Cd": 0.1280,
    + 930        "Cd": 0.2413,
    + 931        "113Cd": 0.1222,
    + 932        "114Cd": 0.2873,
    + 933        "116Cd": 0.0749,
    + 934        "113In": 0.0429,
    + 935        "In": 0.9571,
    + 936        "112Sn": 0.0097,
    + 937        "114Sn": 0.0066,
    + 938        "115Sn": 0.0034,
    + 939        "116Sn": 0.1454,
    + 940        "117Sn": 0.0768,
    + 941        "118Sn": 0.2422,
    + 942        "119Sn": 0.0859,
    + 943        "Sn": 0.3258,
    + 944        "122Sn": 0.0463,
    + 945        "124Sn": 0.0579,
    + 946        "Sb": 0.5721,
    + 947        "123Sb": 0.4279,
    + 948        "120Te": 0.0009,
    + 949        "122Te": 0.0255,
    + 950        "123Te": 0.0089,
    + 951        "124Te": 0.0474,
    + 952        "125Te": 0.0707,
    + 953        "126Te": 0.1884,
    + 954        "128Te": 0.3174,
    + 955        "Te": 0.3408,
    + 956        "I": 1.0,
    + 957        "124Xe": 0.000952,
    + 958        "126Xe": 0.000890,
    + 959        "128Xe": 0.019102,
    + 960        "129Xe": 0.264006,
    + 961        "130Xe": 0.040710,
    + 962        "131Xe": 0.212324,
    + 963        "Xe": 0.269086,
    + 964        "134Xe": 0.104357,
    + 965        "136Xe": 0.088573,
    + 966        "Cs": 1.0,
    + 967        "130Ba": 0.00106,
    + 968        "132Ba": 0.00101,
    + 969        "134Ba": 0.02417,
    + 970        "135Ba": 0.06592,
    + 971        "136Ba": 0.07854,
    + 972        "137Ba": 0.11232,
    + 973        "Ba": 0.71698,
    + 974        "138La": 0.0008881,
    + 975        "La": 0.9991119,
    + 976        "136Ce": 0.00185,
    + 977        "138Ce": 0.00251,
    + 978        "Ce": 0.88450,
    + 979        "142Ce": 0.11114,
    + 980        "Pr": 1.0,
    + 981        "Nd": 0.27152,
    + 982        "143Nd": 0.12174,
    + 983        "144Nd": 0.23798,
    + 984        "145Nd": 0.08293,
    + 985        "146Nd": 0.17189,
    + 986        "148Nd": 0.05756,
    + 987        "150Nd": 0.05638,
    + 988        "145Pm": 0,
    + 989        "147Pm": 0,
    + 990        "144Sm": 0.0307,
    + 991        "147Sm": 0.1499,
    + 992        "148Sm": 0.1124,
    + 993        "149Sm": 0.1382,
    + 994        "150Sm": 0.0738,
    + 995        "Sm": 0.2675,
    + 996        "154Sm": 0.2275,
    + 997        "151Eu": 0.4781,
    + 998        "Eu": 0.5219,
    + 999        "152Gd": 0.0020,
    +1000        "154Gd": 0.0218,
    +1001        "155Gd": 0.1480,
    +1002        "156Gd": 0.2047,
    +1003        "157Gd": 0.1565,
    +1004        "Gd": 0.2484,
    +1005        "160Gd": 0.2186,
    +1006        "Tb": 1.0,
    +1007        "156Dy": 0.00056,
    +1008        "158Dy": 0.00095,
    +1009        "160Dy": 0.02329,
    +1010        "161Dy": 0.18889,
    +1011        "162Dy": 0.25475,
    +1012        "163Dy": 0.24896,
    +1013        "Dy": 0.28260,
    +1014        "Ho": 1.0,
    +1015        "162Er": 0.00139,
    +1016        "164Er": 0.01601,
    +1017        "Er": 0.33503,
    +1018        "167Er": 0.22869,
    +1019        "168Er": 0.26978,
    +1020        "170Er": 0.14910,
    +1021        "Tm": 1.0,
    +1022        "168Yb": 0.00123,
    +1023        "170Yb": 0.02982,
    +1024        "171Yb": 0.1409,
    +1025        "172Yb": 0.2168,
    +1026        "173Yb": 0.16103,
    +1027        "Yb": 0.32026,
    +1028        "176Yb": 0.12996,
    +1029        "Lu": 0.97401,
    +1030        "176Lu": 0.02599,
    +1031        "174Hf": 0.0016,
    +1032        "176Hf": 0.0526,
    +1033        "177Hf": 0.1860,
    +1034        "178Hf": 0.2728,
    +1035        "179Hf": 0.1362,
    +1036        "Hf": 0.3508,
    +1037        "180Ta": 0.0001201,
    +1038        "Ta": 0.9998799,
    +1039        "180W": 0.0012,
    +1040        "182W": 0.2650,
    +1041        "183W": 0.1431,
    +1042        "W": 0.3064,
    +1043        "186W": 0.2843,
    +1044        "185Re": 0.3740,
    +1045        "Re": 0.6260,
    +1046        "184Os": 0.0002,
    +1047        "186Os": 0.0159,
    +1048        "187Os": 0.0196,
    +1049        "188Os": 0.1324,
    +1050        "189Os": 0.1615,
    +1051        "190Os": 0.2626,
    +1052        "Os": 0.4078,
    +1053        "191Ir": 0.373,
    +1054        "Ir": 0.627,
    +1055        "190Pt": 0.00012,
    +1056        "192Pt": 0.00782,
    +1057        "194Pt": 0.3286,
    +1058        "Pt": 0.3378,
    +1059        "196Pt": 0.2521,
    +1060        "198Pt": 0.07356,
    +1061        "Au": 1.0,
    +1062        "196Hg": 0.0015,
    +1063        "198Hg": 0.0997,
    +1064        "199Hg": 0.16872,
    +1065        "200Hg": 0.2310,
    +1066        "201Hg": 0.1318,
    +1067        "Hg": 0.2986,
    +1068        "204Hg": 0.0687,
    +1069        "203Tl": 0.2952,
    +1070        "Tl": 0.7048,
    +1071        "204Pb": 0.014,
    +1072        "206Pb": 0.241,
    +1073        "207Pb": 0.221,
    +1074        "Pb": 0.524,
    +1075        "Bi": 1.0,
    +1076        "209Po": 0,
    +1077        "210Po": 0,
    +1078        "210At": 0,
    +1079        "211At": 0,
    +1080        "211Rn": 0,
    +1081        "220Rn": 0,
    +1082        "222Rn": 0,
    +1083        "223Fr": 0,
    +1084        "223Ra": 0,
    +1085        "224Ra": 0,
    +1086        "226Ra": 0,
    +1087        "228Ra": 0,
    +1088        "227Ac": 0,
    +1089        "230Th": 0,
    +1090        "Th": 1.0,
    +1091        "Pa": 1.0,
    +1092        "233U": 0,
    +1093        "234U": 0.000054,
    +1094        "235U": 0.007204,
    +1095        "236U": 0,
    +1096        "U": 0.992742,
    +1097        "236Np": 0,
    +1098        "237Np": 0,
    +1099        "238Pu": 0,
    +1100        "239Pu": 0,
    +1101        "240Pu": 0,
    +1102        "241Pu": 0,
    +1103        "242Pu": 0,
    +1104        "244Pu": 0,
    +1105        "241Am": 0,
    +1106        "243Am": 0,
    +1107        "243Cm": 0,
    +1108        "244Cm": 0,
    +1109        "245Cm": 0,
    +1110        "246Cm": 0,
    +1111        "247Cm": 0,
    +1112        "248Cm": 0,
    +1113        "247Bk": 0,
    +1114        "249Bk": 0,
    +1115        "249Cf": 0,
    +1116        "250Cf": 0,
    +1117        "251Cf": 0,
    +1118        "252Cf": 0,
    +1119        "252Es": 0,
    +1120        "257Fm": 0,
    +1121        "258Md": 0,
    +1122        "260Md": 0,
    +1123        "259No": 0,
    +1124        "262Lr": 0,
    +1125        "267Rf": 0,
    +1126        "268Db": 0,
    +1127        "271Sg": 0,
    +1128        "272Bh": 0,
    +1129        "270Hs": 0,
    +1130        "276Mt": 0,
    +1131        "281Ds": 0,
    +1132        "280Rg": 0,
    +1133        "285Cn": 0,
    +1134        "284Nh": 0,
    +1135        "289Fl": 0,
    +1136        "288Mc": 0,
    +1137        "293Lv": 0,
    +1138        "292Ts": 0,
    +1139        "294Og": 0,
    +1140    }
    +1141
    +1142    # Isotopes here is a dictionary of symbol, including full name,
    +1143    # and then the isotopes which arent the most abundant one, sorted by abundance.
    +1144    # None indicates no stable isotopes/naturally occuring ones.
    +1145    # This has been manually checked as far as Iodine only.
    +1146    isotopes = {
    +1147        "H": ["Hydrogen", ["D", "T"]],
    +1148        "He": ["Helium", ["3He"]],
    +1149        "Li": ["Lithium", ["6Li"]],
    +1150        "Be": ["Beryllium", [None]],
    +1151        "B": ["Boron", ["10B"]],
    +1152        "C": ["Carbon", ["13C"]],
    +1153        "N": ["Nitrogen", ["15N"]],
    +1154        "O": ["Oxygen", ["18O", "17O"]],
    +1155        "F": ["Fluorine", [None]],
    +1156        "Ne": ["Neon", ["22Ne", "21Ne"]],
    +1157        "Na": ["Sodium", [None]],
    +1158        "Mg": ["Magnesium", ["26Mg", "25Mg"]],
    +1159        "Al": ["Aluminum", [None]],
    +1160        "Si": ["Silicon", ["29Si", "30Si"]],
    +1161        "P": ["Phosphorus", [None]],
    +1162        "S": ["Sulfur", ["34S", "33S", "36S"]],
    +1163        "Cl": ["Chlorine", ["37Cl"]],
    +1164        "Ar": ["Argon", ["36Ar", "38Ar"]],
    +1165        "K": ["Potassium", ["41K", "40K"]],
    +1166        "Ca": ["Calcium", ["44Ca", "48Ca", "43Ca", "42Ca", "46Ca"]],
    +1167        "Sc": ["Scandium", [None]],
    +1168        "Ti": ["Titanium", ["46Ti", "47Ti", "49Ti", "50Ti"]],
    +1169        "V": ["Vanadium", ["50V"]],
    +1170        "Cr": ["Chromium", ["53Cr", "50Cr", "54Cr"]],
    +1171        "Mn": ["Manganese", [None]],
    +1172        "Fe": ["Iron", ["54Fe", "57Fe", "58Fe"]],
    +1173        "Co": ["Cobalt", [None]],
    +1174        "Ni": ["Nickel", ["60Ni", "62Ni", "61Ni", "64Ni"]],
    +1175        "Cu": ["Copper", ["65Cu"]],
    +1176        "Zn": ["Zinc", ["66Zn", "68Zn", "67Zn", "70Zn"]],
    +1177        "Ga": ["Gallium", ["71Ga"]],
    +1178        "Ge": ["Germanium", ["72Ge", "70Ge", "73Ge", "76Ge"]],
    +1179        "As": ["Arsenic", [None]],
    +1180        "Se": ["Selenium", ["78Se", "76Se", "82Se", "77Se", "74Se"]],
    +1181        "Br": ["Bromine", ["81Br"]],
    +1182        "Kr": ["Krypton", ["86Kr", "82Kr", "83Kr", "80Kr"]],
    +1183        "Rb": ["Rubidium", ["87Rb"]],
    +1184        "Sr": ["Strontium", ["86Sr", "87Sr", "84Sr"]],
    +1185        "Y": ["Yttrium", [None]],
    +1186        "Zr": ["Zirconium", ["94Zr", "92Zr", "91Zr", "96Zr"]],
    +1187        "Nb": ["Niobium", [None]],
    +1188        "Mo": ["Molybdenum", ["96Mo", "95Mo", "92Mo", "100Mo", "97Mo", "94Mo"]],
    +1189        "Tc": ["Technetium", [None]],  # consider removing
    +1190        "Ru": ["Ruthenium", ["104Ru", "101Ru", "99Ru", "100Ru", "96Ru", "98Ru"]],
    +1191        "Rh": ["Rhodium", [None]],
    +1192        "Pd": ["Palladium", ["108Pd", "105Pd", "110Pd", "104Pd", "102Pd"]],
    +1193        "Ag": ["Silver", ["109Ag"]],
    +1194        "Cd": [
    +1195            "Cadmium",
    +1196            ["114Cd", "111Cd", "110Cd", "113Cd", "116Cd", "106Cd", "108Cd"],
    +1197        ],
    +1198        "In": ["Indium", ["113In"]],
    +1199        "Sn": [
    +1200            "Tin",
    +1201            [
    +1202                "118Sn",
    +1203                "116Sn",
    +1204                "119Sn",
    +1205                "117Sn",
    +1206                "124Sn",
    +1207                "122Sn",
    +1208                "112Sn",
    +1209                "114Sn",
    +1210                "115Sn",
    +1211            ],
    +1212        ],
    +1213        "Sb": ["Antimony", ["123Sb"]],
    +1214        "Te": [
    +1215            "Tellurium",
    +1216            ["128Te", "126Te", "125Te", "124Te", "122Te", "123Te", "120Te"],
    +1217        ],
    +1218        "I": ["Iodine", [None]],
    +1219        "Xe": ["Xenon", ["129Xe", "131Xe", "134Xe", "136Xe", "130Xe", "128Xe"]],
    +1220        "Cs": ["Cesium", [None]],
    +1221        "Ba": ["Barium", ["137Ba", "136Ba", "135Ba", "134Ba"]],
    +1222        "La": ["Lanthanum", ["138La"]],
    +1223        "Hf": ["Hafnium", ["178Hf", "177Hf", "179Hf", "176Hf"]],
    +1224        "Ta": ["Tantalum", ["180Ta"]],
    +1225        "W": ["Tungsten", ["186W", "182W", "183W"]],
    +1226        "Re": ["Rhenium", ["185Re"]],
    +1227        "Os": ["Osmium", ["190Os", "189Os", "188Os", "187Os", "186Os"]],
    +1228        "Ir": ["Iridium", ["191Ir"]],
    +1229        "Pt": ["Platinum", ["194Pt", "196Pt", "198Pt", "192Pt"]],
    +1230        "Au": ["Gold", [None]],
    +1231        "Hg": ["Mercury", ["200Hg", "199Hg", "201Hg", "198Hg", "204Hg"]],
    +1232        "Tl": ["Thallium", ["203Tl"]],
    +1233        "Pb": ["Lead", ["206Pb", "207Pb", "204Pb"]],
    +1234        "Bi": ["Bismuth", [None]],
    +1235        "Po": ["Polonium", [None]],
    +1236        "At": ["Astatine", [None]],
    +1237        "Rn": ["Radon", [None]],
    +1238        "Fr": ["Francium", [None]],
    +1239        "Ra": ["Radium", [None]],
    +1240        "Ac": ["Actinium", [None]],
    +1241        "Rf": ["Rutherfordium", [None]],
    +1242        "Db": ["Dubnium", [None]],
    +1243        "Sg": ["Seaborgium", [None]],
    +1244        "Bh": ["Bohrium", [None]],
    +1245        "Hs": ["Hassium", [None]],
    +1246        "Mt": ["Meitnerium", [None]],
    +1247        "Ds": ["Darmstadtium", [None]],
    +1248        "Rg": ["Roentgenium", [None]],
    +1249        "Cn": ["Copernicium", [None]],
    +1250        "Nh": ["Nihonium", [None]],
    +1251        "Fl": ["Flerovium", [None]],
    +1252        "Mc": ["Moscovium", [None]],
    +1253        "Lv": ["Livermorium", [None]],
    +1254        "Ts": ["Tennessine", [None]],
    +1255        "Og": ["Oganesson", [None]],
    +1256        "Ce": ["Cerium", ["142Ce", "138Ce" "136Ce"]],
    +1257        "Pr": ["Praseodymium", [None]],
    +1258        "Nd": ["Neodymium", [None]],
    +1259        "Pm": ["Promethium", [None]],
    +1260        "Sm": ["Samarium", [None]],
    +1261        "Eu": ["Europium", [None]],
    +1262        "Gd": ["Gadolinium", [None]],
    +1263        "Tb": ["Terbium", [None]],
    +1264        "Dy": ["Dysprosium", [None]],
    +1265        "Ho": ["Holmium", [None]],
    +1266        "Er": ["Erbium", [None]],
    +1267        "Tm": ["Thulium", [None]],
    +1268        "Yb": ["Ytterbium", [None]],
    +1269        "Lu": ["Lutetium", ["176Lu"]],
    +1270        "Th": ["Thorium", [None]],
    +1271        "Pa": ["Protactinium", [None]],
    +1272        "U": ["Uranium", ["235U", "234U"]],
    +1273        "Np": ["Neptunium", [None]],
    +1274        "Pu": ["Plutonium", [None]],
    +1275        "Am": ["Americium", [None]],
    +1276        "Cm": ["Curium", [None]],
    +1277        "Bk": ["Berkelium", [None]],
    +1278        "Cf": ["Californium", [None]],
    +1279        "Es": ["Einsteinium", [None]],
    +1280        "Fm": ["Fermium", [None]],
    +1281        "Md": ["Mendelevium", [None]],
    +1282        "No": ["Nobelium", [None]],
    +1283        "Lr": ["Lawrencium", [None]],
    +1284    }
     

    Class for Atoms in CoreMS

    -

    This class includes key properties of atoms (and the electron) and isotopes, including their exact masses, relative abundances, and covalences. +

    This class includes key properties of atoms (and the electron) and isotopes, including their exact masses, relative abundances, and covalences. It also associates which isotopes are for the same element, and provides an ordering of elements.

    IUPAC definition of monoisotopic mass is based on the most abundant isotopes of each element present. Here, we will use atom symbols with isotope numbers for all isotopes excluding the most abundant one. -This list has been corrected up to Iodine.

    +This list has been corrected up to Iodine.

    References
    diff --git a/docs/corems/encapsulation/factory/parameters.html b/docs/corems/encapsulation/factory/parameters.html index 079ac89a..0a0ba31a 100644 --- a/docs/corems/encapsulation/factory/parameters.html +++ b/docs/corems/encapsulation/factory/parameters.html @@ -137,308 +137,346 @@

      1import dataclasses
       2
    -  3from corems.encapsulation.factory.processingSetting  import LiquidChromatographSetting, MolecularFormulaSearchSettings, TransientSetting, MassSpecPeakSetting, MassSpectrumSetting
    -  4from corems.encapsulation.factory.processingSetting  import CompoundSearchSettings, GasChromatographSetting
    -  5from corems.encapsulation.factory.processingSetting  import DataInputSetting
    -  6
    -  7def reset_ms_parameters():
    -  8    """Reset the MSParameter class to the default values"""
    -  9    MSParameters.molecular_search = MolecularFormulaSearchSettings()
    - 10    MSParameters.transient = TransientSetting()
    - 11    MSParameters.mass_spectrum = MassSpectrumSetting()
    - 12    MSParameters.ms_peak = MassSpecPeakSetting()
    - 13    MSParameters.data_input = DataInputSetting()
    - 14
    - 15def reset_gcms_parameters():
    - 16    """Reset the GCMSParameters class to the default values"""
    - 17    GCMSParameters.molecular_search = CompoundSearchSettings()
    - 18    GCMSParameters.gc_ms = GasChromatographSetting()
    - 19
    - 20def reset_lcms_parameters():
    - 21    """Reset the LCMSParameters class to the default values"""
    - 22    reset_ms_parameters()
    - 23    LCMSParameters.lc_ms = LiquidChromatographSetting()
    +  3from corems.encapsulation.factory.processingSetting import (
    +  4    LiquidChromatographSetting,
    +  5    MolecularFormulaSearchSettings,
    +  6    TransientSetting,
    +  7    MassSpecPeakSetting,
    +  8    MassSpectrumSetting,
    +  9)
    + 10from corems.encapsulation.factory.processingSetting import (
    + 11    CompoundSearchSettings,
    + 12    GasChromatographSetting,
    + 13)
    + 14from corems.encapsulation.factory.processingSetting import DataInputSetting
    + 15
    + 16
    + 17def reset_ms_parameters():
    + 18    """Reset the MSParameter class to the default values"""
    + 19    MSParameters.molecular_search = MolecularFormulaSearchSettings()
    + 20    MSParameters.transient = TransientSetting()
    + 21    MSParameters.mass_spectrum = MassSpectrumSetting()
    + 22    MSParameters.ms_peak = MassSpecPeakSetting()
    + 23    MSParameters.data_input = DataInputSetting()
      24
    - 25class MSParameters:
    - 26    """MSParameters class is used to store the parameters used for the processing of the mass spectrum
    - 27    
    - 28    Each attibute is a class that contains the parameters for the processing of the mass spectrum, see the corems.encapsulation.factory.processingSetting module for more details.
    - 29
    - 30    Parameters
    - 31    ----------
    - 32    use_defaults: bool, optional
    - 33        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    - 34
    - 35    Attributes
    - 36    -----------
    - 37    molecular_search: MolecularFormulaSearchSettings
    - 38        MolecularFormulaSearchSettings object
    - 39    transient: TransientSetting
    - 40        TransientSetting object
    - 41    mass_spectrum: MassSpectrumSetting
    - 42        MassSpectrumSetting object
    - 43    ms_peak: MassSpecPeakSetting
    - 44        MassSpecPeakSetting object
    - 45    data_input: DataInputSetting
    - 46        DataInputSetting object
    + 25
    + 26def reset_gcms_parameters():
    + 27    """Reset the GCMSParameters class to the default values"""
    + 28    GCMSParameters.molecular_search = CompoundSearchSettings()
    + 29    GCMSParameters.gc_ms = GasChromatographSetting()
    + 30
    + 31
    + 32def reset_lcms_parameters():
    + 33    """Reset the LCMSParameters class to the default values"""
    + 34    reset_ms_parameters()
    + 35    LCMSParameters.lc_ms = LiquidChromatographSetting()
    + 36
    + 37
    + 38class MSParameters:
    + 39    """MSParameters class is used to store the parameters used for the processing of the mass spectrum
    + 40
    + 41    Each attibute is a class that contains the parameters for the processing of the mass spectrum, see the corems.encapsulation.factory.processingSetting module for more details.
    + 42
    + 43    Parameters
    + 44    ----------
    + 45    use_defaults: bool, optional
    + 46        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
      47
    - 48    Notes
    - 49    -----
    - 50    One can use the use_defaults parameter to reset the parameters to the default values.
    - 51    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    - 52    """
    - 53
    - 54    molecular_search = MolecularFormulaSearchSettings()
    - 55    transient = TransientSetting()
    - 56    mass_spectrum = MassSpectrumSetting()
    - 57    ms_peak = MassSpecPeakSetting()
    - 58    data_input = DataInputSetting()
    - 59
    - 60    def __init__(self, use_defaults = False) -> None:
    - 61        if not use_defaults:
    - 62            self.molecular_search = dataclasses.replace(MSParameters.molecular_search)
    - 63            self.transient = dataclasses.replace(MSParameters.transient)
    - 64            self.mass_spectrum = dataclasses.replace(MSParameters.mass_spectrum)
    - 65            self.ms_peak = dataclasses.replace(MSParameters.ms_peak)
    - 66            self.data_input = dataclasses.replace(MSParameters.data_input)
    - 67        else:
    - 68            self.molecular_search = MolecularFormulaSearchSettings()
    - 69            self.transient = TransientSetting()
    - 70            self.mass_spectrum = MassSpectrumSetting()
    - 71            self.ms_peak = MassSpecPeakSetting()
    - 72            self.data_input = DataInputSetting()
    - 73    
    - 74    def copy(self):
    - 75        """Create a copy of the MSParameters object"""
    - 76        new_ms_parameters = MSParameters()
    - 77        new_ms_parameters.molecular_search = dataclasses.replace(self.molecular_search)
    - 78        new_ms_parameters.transient = dataclasses.replace(self.transient)
    - 79        new_ms_parameters.mass_spectrum = dataclasses.replace(self.mass_spectrum)
    - 80        new_ms_parameters.ms_peak = dataclasses.replace(self.ms_peak)
    - 81        new_ms_parameters.data_input = dataclasses.replace(self.data_input)
    - 82
    - 83        return new_ms_parameters
    - 84    
    - 85    def print(self):
    - 86        """Print the MSParameters object"""
    - 87        for k, v in self.__dict__.items():
    - 88            print(k, type(v).__name__)
    - 89
    - 90            for k2, v2 in v.__dict__.items():
    - 91                print("    {}: {}".format(k2, v2))
    - 92    
    - 93    def __eq__(self, value: object) -> bool:
    - 94        # Check that the object is of the same type
    - 95        if not isinstance(value, MSParameters):
    - 96            return False
    - 97        equality_check = []
    - 98        equality_check.append(self.molecular_search == value.molecular_search)
    - 99        equality_check.append(self.transient == value.transient)
    -100        equality_check.append(self.mass_spectrum ==value.mass_spectrum)
    -101        equality_check.append(self.ms_peak == value.ms_peak)
    -102        equality_check.append(self.data_input == value.data_input)
    -103
    -104        return all(equality_check)
    + 48    Attributes
    + 49    -----------
    + 50    molecular_search: MolecularFormulaSearchSettings
    + 51        MolecularFormulaSearchSettings object
    + 52    transient: TransientSetting
    + 53        TransientSetting object
    + 54    mass_spectrum: MassSpectrumSetting
    + 55        MassSpectrumSetting object
    + 56    ms_peak: MassSpecPeakSetting
    + 57        MassSpecPeakSetting object
    + 58    data_input: DataInputSetting
    + 59        DataInputSetting object
    + 60
    + 61    Notes
    + 62    -----
    + 63    One can use the use_defaults parameter to reset the parameters to the default values.
    + 64    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    + 65    """
    + 66
    + 67    molecular_search = MolecularFormulaSearchSettings()
    + 68    transient = TransientSetting()
    + 69    mass_spectrum = MassSpectrumSetting()
    + 70    ms_peak = MassSpecPeakSetting()
    + 71    data_input = DataInputSetting()
    + 72
    + 73    def __init__(self, use_defaults=False) -> None:
    + 74        if not use_defaults:
    + 75            self.molecular_search = dataclasses.replace(MSParameters.molecular_search)
    + 76            self.transient = dataclasses.replace(MSParameters.transient)
    + 77            self.mass_spectrum = dataclasses.replace(MSParameters.mass_spectrum)
    + 78            self.ms_peak = dataclasses.replace(MSParameters.ms_peak)
    + 79            self.data_input = dataclasses.replace(MSParameters.data_input)
    + 80        else:
    + 81            self.molecular_search = MolecularFormulaSearchSettings()
    + 82            self.transient = TransientSetting()
    + 83            self.mass_spectrum = MassSpectrumSetting()
    + 84            self.ms_peak = MassSpecPeakSetting()
    + 85            self.data_input = DataInputSetting()
    + 86
    + 87    def copy(self):
    + 88        """Create a copy of the MSParameters object"""
    + 89        new_ms_parameters = MSParameters()
    + 90        new_ms_parameters.molecular_search = dataclasses.replace(self.molecular_search)
    + 91        new_ms_parameters.transient = dataclasses.replace(self.transient)
    + 92        new_ms_parameters.mass_spectrum = dataclasses.replace(self.mass_spectrum)
    + 93        new_ms_parameters.ms_peak = dataclasses.replace(self.ms_peak)
    + 94        new_ms_parameters.data_input = dataclasses.replace(self.data_input)
    + 95
    + 96        return new_ms_parameters
    + 97
    + 98    def print(self):
    + 99        """Print the MSParameters object"""
    +100        for k, v in self.__dict__.items():
    +101            print(k, type(v).__name__)
    +102
    +103            for k2, v2 in v.__dict__.items():
    +104                print("    {}: {}".format(k2, v2))
     105
    -106class GCMSParameters:
    -107    """GCMSParameters class is used to store the parameters used for the processing of the gas chromatograph mass spectrum
    -108
    -109    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.
    -110
    -111    Parameters
    -112    ----------
    -113    use_defaults: bool, optional
    -114        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    -115
    -116    Attributes
    -117    -----------
    -118    molecular_search: MolecularFormulaSearchSettings
    -119        MolecularFormulaSearchSettings object
    -120    gc_ms: GasChromatographSetting
    -121        GasChromatographSetting object
    +106    def __eq__(self, value: object) -> bool:
    +107        # Check that the object is of the same type
    +108        if not isinstance(value, MSParameters):
    +109            return False
    +110        equality_check = []
    +111        equality_check.append(self.molecular_search == value.molecular_search)
    +112        equality_check.append(self.transient == value.transient)
    +113        equality_check.append(self.mass_spectrum == value.mass_spectrum)
    +114        equality_check.append(self.ms_peak == value.ms_peak)
    +115        equality_check.append(self.data_input == value.data_input)
    +116
    +117        return all(equality_check)
    +118
    +119
    +120class GCMSParameters:
    +121    """GCMSParameters class is used to store the parameters used for the processing of the gas chromatograph mass spectrum
     122
    -123    Notes
    -124    -----
    -125    One can use the use_defaults parameter to reset the parameters to the default values.
    -126    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    -127    """
    -128
    -129    molecular_search = CompoundSearchSettings()
    -130    gc_ms = GasChromatographSetting()
    -131
    -132    def __init__(self, use_defaults = False) -> None:
    -133        if not use_defaults:
    -134            self.molecular_search = dataclasses.replace(GCMSParameters.molecular_search)
    -135            self.gc_ms = dataclasses.replace(GCMSParameters.gc_ms)
    -136        else:
    -137            self.molecular_search = CompoundSearchSettings()
    -138            self.gc_ms = GasChromatographSetting()
    -139
    -140    def copy(self):
    -141        """Create a copy of the GCMSParameters object"""
    -142        new_gcms_parameters = GCMSParameters()
    -143        new_gcms_parameters.molecular_search = dataclasses.replace(self.molecular_search)
    -144        new_gcms_parameters.gc_ms = dataclasses.replace(self.gc_ms)
    +123    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.
    +124
    +125    Parameters
    +126    ----------
    +127    use_defaults: bool, optional
    +128        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    +129
    +130    Attributes
    +131    -----------
    +132    molecular_search: MolecularFormulaSearchSettings
    +133        MolecularFormulaSearchSettings object
    +134    gc_ms: GasChromatographSetting
    +135        GasChromatographSetting object
    +136
    +137    Notes
    +138    -----
    +139    One can use the use_defaults parameter to reset the parameters to the default values.
    +140    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    +141    """
    +142
    +143    molecular_search = CompoundSearchSettings()
    +144    gc_ms = GasChromatographSetting()
     145
    -146        return new_gcms_parameters
    -147    
    -148    def __eq__(self, value: object) -> bool:
    -149        # Check that the object is of the same type
    -150        if not isinstance(value, GCMSParameters):
    -151            return False
    -152        equality_check = []
    -153        equality_check.append(self.molecular_search == value.molecular_search)
    -154        equality_check.append(self.gc_ms == value.gc_ms)
    -155
    -156        return all(equality_check)
    -157
    -158    def print(self):
    -159        """Print the GCMSParameters object"""
    -160        for k, v in self.__dict__.items():
    -161            print(k, type(v).__name__)
    -162
    -163            for k2, v2 in v.__dict__.items():
    -164                print("    {}: {}".format(k2, v2))
    -165
    -166class LCMSParameters:
    -167    """LCMSParameters class is used to store the parameters used for the processing of the liquid chromatograph mass spectrum
    -168
    -169    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.
    -170
    -171    Parameters
    -172    ----------
    -173    use_defaults: bool, optional
    -174        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    -175
    -176    Attributes
    -177    -----------
    -178    lc_ms: LiquidChromatographSetting
    -179        LiquidChromatographSetting object
    -180    mass_spectrum: dict
    -181        dictionary with the mass spectrum parameters for ms1 and ms2, each value is a MSParameters object
    +146    def __init__(self, use_defaults=False) -> None:
    +147        if not use_defaults:
    +148            self.molecular_search = dataclasses.replace(GCMSParameters.molecular_search)
    +149            self.gc_ms = dataclasses.replace(GCMSParameters.gc_ms)
    +150        else:
    +151            self.molecular_search = CompoundSearchSettings()
    +152            self.gc_ms = GasChromatographSetting()
    +153
    +154    def copy(self):
    +155        """Create a copy of the GCMSParameters object"""
    +156        new_gcms_parameters = GCMSParameters()
    +157        new_gcms_parameters.molecular_search = dataclasses.replace(
    +158            self.molecular_search
    +159        )
    +160        new_gcms_parameters.gc_ms = dataclasses.replace(self.gc_ms)
    +161
    +162        return new_gcms_parameters
    +163
    +164    def __eq__(self, value: object) -> bool:
    +165        # Check that the object is of the same type
    +166        if not isinstance(value, GCMSParameters):
    +167            return False
    +168        equality_check = []
    +169        equality_check.append(self.molecular_search == value.molecular_search)
    +170        equality_check.append(self.gc_ms == value.gc_ms)
    +171
    +172        return all(equality_check)
    +173
    +174    def print(self):
    +175        """Print the GCMSParameters object"""
    +176        for k, v in self.__dict__.items():
    +177            print(k, type(v).__name__)
    +178
    +179            for k2, v2 in v.__dict__.items():
    +180                print("    {}: {}".format(k2, v2))
    +181
     182
    -183    Notes
    -184    -----
    -185    One can use the use_defaults parameter to reset the parameters to the default values.
    -186    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    -187    """
    -188    lc_ms = LiquidChromatographSetting()
    -189    mass_spectrum = {"ms1":MSParameters(), "ms2":MSParameters()}
    -190
    -191    def __init__(self, use_defaults = False) -> None:
    -192        if not use_defaults:
    -193            self.lc_ms = dataclasses.replace(LCMSParameters.lc_ms)
    -194            self.mass_spectrum = {"ms1":MSParameters(use_defaults=False), "ms2":MSParameters(use_defaults=False)}
    -195        else:
    -196            self.lc_ms = LiquidChromatographSetting()
    -197            self.mass_spectrum = {"ms1":MSParameters(use_defaults=True), "ms2":MSParameters(use_defaults=True)}
    -198
    -199    def copy(self):
    -200        """Create a copy of the LCMSParameters object"""
    -201        new_lcms_parameters = LCMSParameters()
    -202        new_lcms_parameters.lc_ms = dataclasses.replace(self.lc_ms)
    -203        for key in self.mass_spectrum:
    -204            new_lcms_parameters.mass_spectrum[key] = self.mass_spectrum[key].copy()
    +183class LCMSParameters:
    +184    """LCMSParameters class is used to store the parameters used for the processing of the liquid chromatograph mass spectrum
    +185
    +186    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.
    +187
    +188    Parameters
    +189    ----------
    +190    use_defaults: bool, optional
    +191        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    +192
    +193    Attributes
    +194    -----------
    +195    lc_ms: LiquidChromatographSetting
    +196        LiquidChromatographSetting object
    +197    mass_spectrum: dict
    +198        dictionary with the mass spectrum parameters for ms1 and ms2, each value is a MSParameters object
    +199
    +200    Notes
    +201    -----
    +202    One can use the use_defaults parameter to reset the parameters to the default values.
    +203    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    +204    """
     205
    -206        return new_lcms_parameters
    -207    
    -208    def __eq__(self, value: object) -> bool:
    -209        # Check that the object is of the same type
    -210        if not isinstance(value, LCMSParameters):
    -211            return False
    -212        equality_check = []
    -213        equality_check.append(self.lc_ms == value.lc_ms)
    -214
    -215        # Check that the mass_spectrum dictionary has the same keys
    -216        equality_check.append(self.mass_spectrum.keys() == value.mass_spectrum.keys())
    -217
    -218        # Check that the values of the mass_spectrum dictionary are equal
    -219        for key in self.mass_spectrum.keys():
    -220            equality_check.append(self.mass_spectrum[key].mass_spectrum == value.mass_spectrum[key].mass_spectrum)
    -221            equality_check.append(self.mass_spectrum[key].ms_peak == value.mass_spectrum[key].ms_peak)
    -222            equality_check.append(self.mass_spectrum[key].molecular_search == value.mass_spectrum[key].molecular_search)
    -223            equality_check.append(self.mass_spectrum[key].transient == value.mass_spectrum[key].transient)
    -224            equality_check.append(self.mass_spectrum[key].data_input == value.mass_spectrum[key].data_input)
    -225
    -226        return all(equality_check)
    -227    
    -228    def print(self):
    -229        """Print the LCMSParameters object"""
    -230        # Print the lcms paramters
    -231        for k, v in self.__dict__.items():
    -232            if k == "lc_ms":
    -233                print(k, type(v).__name__)
    -234
    -235        for k2, v2 in self.mass_spectrum.items():
    -236            """Print the MSParameters object"""
    -237            for k3, v3 in v2.__dict__.items():
    -238                print("{} - {}: {}".format(k2, k3, type(v3).__name__))
    -239
    -240                for k4, v4 in v3.__dict__.items():
    -241                    print("    {}: {}".format(k4, v4))
    -242
    -243def default_parameters(file_location):  # pragma: no cover
    -244    """Generate parameters dictionary with the default parameters for data processing
    -245       To gather parameters from instrument data during the data parsing step, a parameters dictionary with the default parameters needs to be generated.
    -246       This dictionary acts as a placeholder and is later used as an argument for all the class constructor methods during instantiation. 
    -247       The data gathered from the instrument is added to the class properties.
    -248
    -249    Parameters
    -250    ----------
    -251    file_location: str
    -252        path to the file
    -253
    -254    Returns
    -255    -------
    -256    parameters: dict
    -257        dictionary with the default parameters for data processing    
    -258    """
    -259
    -260    parameters = dict()
    -261
    -262    parameters["Aterm"] = 0
    -263
    -264    parameters["Bterm"] = 0
    -265
    -266    parameters["Cterm"] = 0
    -267
    -268    parameters["exc_high_freq"] = 0
    -269
    -270    parameters["exc_low_freq"] = 0
    +206    lc_ms = LiquidChromatographSetting()
    +207    mass_spectrum = {"ms1": MSParameters(), "ms2": MSParameters()}
    +208
    +209    def __init__(self, use_defaults=False) -> None:
    +210        if not use_defaults:
    +211            self.lc_ms = dataclasses.replace(LCMSParameters.lc_ms)
    +212            self.mass_spectrum = {
    +213                "ms1": MSParameters(use_defaults=False),
    +214                "ms2": MSParameters(use_defaults=False),
    +215            }
    +216        else:
    +217            self.lc_ms = LiquidChromatographSetting()
    +218            self.mass_spectrum = {
    +219                "ms1": MSParameters(use_defaults=True),
    +220                "ms2": MSParameters(use_defaults=True),
    +221            }
    +222
    +223    def copy(self):
    +224        """Create a copy of the LCMSParameters object"""
    +225        new_lcms_parameters = LCMSParameters()
    +226        new_lcms_parameters.lc_ms = dataclasses.replace(self.lc_ms)
    +227        for key in self.mass_spectrum:
    +228            new_lcms_parameters.mass_spectrum[key] = self.mass_spectrum[key].copy()
    +229
    +230        return new_lcms_parameters
    +231
    +232    def __eq__(self, value: object) -> bool:
    +233        # Check that the object is of the same type
    +234        if not isinstance(value, LCMSParameters):
    +235            return False
    +236        equality_check = []
    +237        equality_check.append(self.lc_ms == value.lc_ms)
    +238
    +239        # Check that the mass_spectrum dictionary has the same keys
    +240        equality_check.append(self.mass_spectrum.keys() == value.mass_spectrum.keys())
    +241
    +242        # Check that the values of the mass_spectrum dictionary are equal
    +243        for key in self.mass_spectrum.keys():
    +244            equality_check.append(
    +245                self.mass_spectrum[key].mass_spectrum
    +246                == value.mass_spectrum[key].mass_spectrum
    +247            )
    +248            equality_check.append(
    +249                self.mass_spectrum[key].ms_peak == value.mass_spectrum[key].ms_peak
    +250            )
    +251            equality_check.append(
    +252                self.mass_spectrum[key].molecular_search
    +253                == value.mass_spectrum[key].molecular_search
    +254            )
    +255            equality_check.append(
    +256                self.mass_spectrum[key].transient == value.mass_spectrum[key].transient
    +257            )
    +258            equality_check.append(
    +259                self.mass_spectrum[key].data_input
    +260                == value.mass_spectrum[key].data_input
    +261            )
    +262
    +263        return all(equality_check)
    +264
    +265    def print(self):
    +266        """Print the LCMSParameters object"""
    +267        # Print the lcms paramters
    +268        for k, v in self.__dict__.items():
    +269            if k == "lc_ms":
    +270                print(k, type(v).__name__)
     271
    -272    parameters["mw_low"] = 0
    -273        
    -274    parameters["mw_high"] = 0
    -275
    -276    parameters["qpd_enabled"] = 0
    -277
    -278    parameters["bandwidth"] = 0
    +272        for k2, v2 in self.mass_spectrum.items():
    +273            """Print the MSParameters object"""
    +274            for k3, v3 in v2.__dict__.items():
    +275                print("{} - {}: {}".format(k2, k3, type(v3).__name__))
    +276
    +277                for k4, v4 in v3.__dict__.items():
    +278                    print("    {}: {}".format(k4, v4))
     279
    -280    parameters['analyzer'] = 'Unknown'
    -281
    -282    parameters['acquisition_time'] = None
    -283
    -284    parameters['instrument_label'] = 'Unknown' 
    -285
    -286    parameters['sample_name'] = 'Unknown'
    -287
    -288    parameters["number_data_points"] = 0
    -289
    -290    parameters["polarity"] = 'Unknown'
    +280
    +281def default_parameters(file_location):  # pragma: no cover
    +282    """Generate parameters dictionary with the default parameters for data processing
    +283       To gather parameters from instrument data during the data parsing step, a parameters dictionary with the default parameters needs to be generated.
    +284       This dictionary acts as a placeholder and is later used as an argument for all the class constructor methods during instantiation.
    +285       The data gathered from the instrument is added to the class properties.
    +286
    +287    Parameters
    +288    ----------
    +289    file_location: str
    +290        path to the file
     291
    -292    parameters["filename_path"] = str(file_location)
    -293
    -294    """scan_number and rt will be need to lc ms"""
    -295
    -296    parameters["mobility_scan"] = 0
    +292    Returns
    +293    -------
    +294    parameters: dict
    +295        dictionary with the default parameters for data processing
    +296    """
     297
    -298    parameters["mobility_rt"] = 0
    +298    parameters = dict()
     299
    -300    parameters["scan_number"] = 0
    +300    parameters["Aterm"] = 0
     301
    -302    parameters["rt"] = 0
    +302    parameters["Bterm"] = 0
     303
    -304    return parameters
    +304    parameters["Cterm"] = 0
    +305
    +306    parameters["exc_high_freq"] = 0
    +307
    +308    parameters["exc_low_freq"] = 0
    +309
    +310    parameters["mw_low"] = 0
    +311
    +312    parameters["mw_high"] = 0
    +313
    +314    parameters["qpd_enabled"] = 0
    +315
    +316    parameters["bandwidth"] = 0
    +317
    +318    parameters["analyzer"] = "Unknown"
    +319
    +320    parameters["acquisition_time"] = None
    +321
    +322    parameters["instrument_label"] = "Unknown"
    +323
    +324    parameters["sample_name"] = "Unknown"
    +325
    +326    parameters["number_data_points"] = 0
    +327
    +328    parameters["polarity"] = "Unknown"
    +329
    +330    parameters["filename_path"] = str(file_location)
    +331
    +332    """scan_number and rt will be need to lc ms"""
    +333
    +334    parameters["mobility_scan"] = 0
    +335
    +336    parameters["mobility_rt"] = 0
    +337
    +338    parameters["scan_number"] = 0
    +339
    +340    parameters["rt"] = 0
    +341
    +342    return parameters
     
    @@ -454,13 +492,13 @@

    -
     8def reset_ms_parameters():
    - 9    """Reset the MSParameter class to the default values"""
    -10    MSParameters.molecular_search = MolecularFormulaSearchSettings()
    -11    MSParameters.transient = TransientSetting()
    -12    MSParameters.mass_spectrum = MassSpectrumSetting()
    -13    MSParameters.ms_peak = MassSpecPeakSetting()
    -14    MSParameters.data_input = DataInputSetting()
    +            
    18def reset_ms_parameters():
    +19    """Reset the MSParameter class to the default values"""
    +20    MSParameters.molecular_search = MolecularFormulaSearchSettings()
    +21    MSParameters.transient = TransientSetting()
    +22    MSParameters.mass_spectrum = MassSpectrumSetting()
    +23    MSParameters.ms_peak = MassSpecPeakSetting()
    +24    MSParameters.data_input = DataInputSetting()
     
    @@ -480,10 +518,10 @@

    -
    16def reset_gcms_parameters():
    -17    """Reset the GCMSParameters class to the default values"""
    -18    GCMSParameters.molecular_search = CompoundSearchSettings()
    -19    GCMSParameters.gc_ms = GasChromatographSetting()
    +            
    27def reset_gcms_parameters():
    +28    """Reset the GCMSParameters class to the default values"""
    +29    GCMSParameters.molecular_search = CompoundSearchSettings()
    +30    GCMSParameters.gc_ms = GasChromatographSetting()
     
    @@ -503,10 +541,10 @@

    -
    21def reset_lcms_parameters():
    -22    """Reset the LCMSParameters class to the default values"""
    -23    reset_ms_parameters()
    -24    LCMSParameters.lc_ms = LiquidChromatographSetting()
    +            
    33def reset_lcms_parameters():
    +34    """Reset the LCMSParameters class to the default values"""
    +35    reset_ms_parameters()
    +36    LCMSParameters.lc_ms = LiquidChromatographSetting()
     
    @@ -526,86 +564,86 @@

    -
     26class MSParameters:
    - 27    """MSParameters class is used to store the parameters used for the processing of the mass spectrum
    - 28    
    - 29    Each attibute is a class that contains the parameters for the processing of the mass spectrum, see the corems.encapsulation.factory.processingSetting module for more details.
    - 30
    - 31    Parameters
    - 32    ----------
    - 33    use_defaults: bool, optional
    - 34        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    - 35
    - 36    Attributes
    - 37    -----------
    - 38    molecular_search: MolecularFormulaSearchSettings
    - 39        MolecularFormulaSearchSettings object
    - 40    transient: TransientSetting
    - 41        TransientSetting object
    - 42    mass_spectrum: MassSpectrumSetting
    - 43        MassSpectrumSetting object
    - 44    ms_peak: MassSpecPeakSetting
    - 45        MassSpecPeakSetting object
    - 46    data_input: DataInputSetting
    - 47        DataInputSetting object
    +            
     39class MSParameters:
    + 40    """MSParameters class is used to store the parameters used for the processing of the mass spectrum
    + 41
    + 42    Each attibute is a class that contains the parameters for the processing of the mass spectrum, see the corems.encapsulation.factory.processingSetting module for more details.
    + 43
    + 44    Parameters
    + 45    ----------
    + 46    use_defaults: bool, optional
    + 47        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
      48
    - 49    Notes
    - 50    -----
    - 51    One can use the use_defaults parameter to reset the parameters to the default values.
    - 52    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    - 53    """
    - 54
    - 55    molecular_search = MolecularFormulaSearchSettings()
    - 56    transient = TransientSetting()
    - 57    mass_spectrum = MassSpectrumSetting()
    - 58    ms_peak = MassSpecPeakSetting()
    - 59    data_input = DataInputSetting()
    - 60
    - 61    def __init__(self, use_defaults = False) -> None:
    - 62        if not use_defaults:
    - 63            self.molecular_search = dataclasses.replace(MSParameters.molecular_search)
    - 64            self.transient = dataclasses.replace(MSParameters.transient)
    - 65            self.mass_spectrum = dataclasses.replace(MSParameters.mass_spectrum)
    - 66            self.ms_peak = dataclasses.replace(MSParameters.ms_peak)
    - 67            self.data_input = dataclasses.replace(MSParameters.data_input)
    - 68        else:
    - 69            self.molecular_search = MolecularFormulaSearchSettings()
    - 70            self.transient = TransientSetting()
    - 71            self.mass_spectrum = MassSpectrumSetting()
    - 72            self.ms_peak = MassSpecPeakSetting()
    - 73            self.data_input = DataInputSetting()
    - 74    
    - 75    def copy(self):
    - 76        """Create a copy of the MSParameters object"""
    - 77        new_ms_parameters = MSParameters()
    - 78        new_ms_parameters.molecular_search = dataclasses.replace(self.molecular_search)
    - 79        new_ms_parameters.transient = dataclasses.replace(self.transient)
    - 80        new_ms_parameters.mass_spectrum = dataclasses.replace(self.mass_spectrum)
    - 81        new_ms_parameters.ms_peak = dataclasses.replace(self.ms_peak)
    - 82        new_ms_parameters.data_input = dataclasses.replace(self.data_input)
    - 83
    - 84        return new_ms_parameters
    - 85    
    - 86    def print(self):
    - 87        """Print the MSParameters object"""
    - 88        for k, v in self.__dict__.items():
    - 89            print(k, type(v).__name__)
    - 90
    - 91            for k2, v2 in v.__dict__.items():
    - 92                print("    {}: {}".format(k2, v2))
    - 93    
    - 94    def __eq__(self, value: object) -> bool:
    - 95        # Check that the object is of the same type
    - 96        if not isinstance(value, MSParameters):
    - 97            return False
    - 98        equality_check = []
    - 99        equality_check.append(self.molecular_search == value.molecular_search)
    -100        equality_check.append(self.transient == value.transient)
    -101        equality_check.append(self.mass_spectrum ==value.mass_spectrum)
    -102        equality_check.append(self.ms_peak == value.ms_peak)
    -103        equality_check.append(self.data_input == value.data_input)
    -104
    -105        return all(equality_check)
    + 49    Attributes
    + 50    -----------
    + 51    molecular_search: MolecularFormulaSearchSettings
    + 52        MolecularFormulaSearchSettings object
    + 53    transient: TransientSetting
    + 54        TransientSetting object
    + 55    mass_spectrum: MassSpectrumSetting
    + 56        MassSpectrumSetting object
    + 57    ms_peak: MassSpecPeakSetting
    + 58        MassSpecPeakSetting object
    + 59    data_input: DataInputSetting
    + 60        DataInputSetting object
    + 61
    + 62    Notes
    + 63    -----
    + 64    One can use the use_defaults parameter to reset the parameters to the default values.
    + 65    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    + 66    """
    + 67
    + 68    molecular_search = MolecularFormulaSearchSettings()
    + 69    transient = TransientSetting()
    + 70    mass_spectrum = MassSpectrumSetting()
    + 71    ms_peak = MassSpecPeakSetting()
    + 72    data_input = DataInputSetting()
    + 73
    + 74    def __init__(self, use_defaults=False) -> None:
    + 75        if not use_defaults:
    + 76            self.molecular_search = dataclasses.replace(MSParameters.molecular_search)
    + 77            self.transient = dataclasses.replace(MSParameters.transient)
    + 78            self.mass_spectrum = dataclasses.replace(MSParameters.mass_spectrum)
    + 79            self.ms_peak = dataclasses.replace(MSParameters.ms_peak)
    + 80            self.data_input = dataclasses.replace(MSParameters.data_input)
    + 81        else:
    + 82            self.molecular_search = MolecularFormulaSearchSettings()
    + 83            self.transient = TransientSetting()
    + 84            self.mass_spectrum = MassSpectrumSetting()
    + 85            self.ms_peak = MassSpecPeakSetting()
    + 86            self.data_input = DataInputSetting()
    + 87
    + 88    def copy(self):
    + 89        """Create a copy of the MSParameters object"""
    + 90        new_ms_parameters = MSParameters()
    + 91        new_ms_parameters.molecular_search = dataclasses.replace(self.molecular_search)
    + 92        new_ms_parameters.transient = dataclasses.replace(self.transient)
    + 93        new_ms_parameters.mass_spectrum = dataclasses.replace(self.mass_spectrum)
    + 94        new_ms_parameters.ms_peak = dataclasses.replace(self.ms_peak)
    + 95        new_ms_parameters.data_input = dataclasses.replace(self.data_input)
    + 96
    + 97        return new_ms_parameters
    + 98
    + 99    def print(self):
    +100        """Print the MSParameters object"""
    +101        for k, v in self.__dict__.items():
    +102            print(k, type(v).__name__)
    +103
    +104            for k2, v2 in v.__dict__.items():
    +105                print("    {}: {}".format(k2, v2))
    +106
    +107    def __eq__(self, value: object) -> bool:
    +108        # Check that the object is of the same type
    +109        if not isinstance(value, MSParameters):
    +110            return False
    +111        equality_check = []
    +112        equality_check.append(self.molecular_search == value.molecular_search)
    +113        equality_check.append(self.transient == value.transient)
    +114        equality_check.append(self.mass_spectrum == value.mass_spectrum)
    +115        equality_check.append(self.ms_peak == value.ms_peak)
    +116        equality_check.append(self.data_input == value.data_input)
    +117
    +118        return all(equality_check)
     
    @@ -652,19 +690,19 @@
    Notes
    -
    61    def __init__(self, use_defaults = False) -> None:
    -62        if not use_defaults:
    -63            self.molecular_search = dataclasses.replace(MSParameters.molecular_search)
    -64            self.transient = dataclasses.replace(MSParameters.transient)
    -65            self.mass_spectrum = dataclasses.replace(MSParameters.mass_spectrum)
    -66            self.ms_peak = dataclasses.replace(MSParameters.ms_peak)
    -67            self.data_input = dataclasses.replace(MSParameters.data_input)
    -68        else:
    -69            self.molecular_search = MolecularFormulaSearchSettings()
    -70            self.transient = TransientSetting()
    -71            self.mass_spectrum = MassSpectrumSetting()
    -72            self.ms_peak = MassSpecPeakSetting()
    -73            self.data_input = DataInputSetting()
    +            
    74    def __init__(self, use_defaults=False) -> None:
    +75        if not use_defaults:
    +76            self.molecular_search = dataclasses.replace(MSParameters.molecular_search)
    +77            self.transient = dataclasses.replace(MSParameters.transient)
    +78            self.mass_spectrum = dataclasses.replace(MSParameters.mass_spectrum)
    +79            self.ms_peak = dataclasses.replace(MSParameters.ms_peak)
    +80            self.data_input = dataclasses.replace(MSParameters.data_input)
    +81        else:
    +82            self.molecular_search = MolecularFormulaSearchSettings()
    +83            self.transient = TransientSetting()
    +84            self.mass_spectrum = MassSpectrumSetting()
    +85            self.ms_peak = MassSpecPeakSetting()
    +86            self.data_input = DataInputSetting()
     
    @@ -747,16 +785,16 @@
    Notes
    -
    75    def copy(self):
    -76        """Create a copy of the MSParameters object"""
    -77        new_ms_parameters = MSParameters()
    -78        new_ms_parameters.molecular_search = dataclasses.replace(self.molecular_search)
    -79        new_ms_parameters.transient = dataclasses.replace(self.transient)
    -80        new_ms_parameters.mass_spectrum = dataclasses.replace(self.mass_spectrum)
    -81        new_ms_parameters.ms_peak = dataclasses.replace(self.ms_peak)
    -82        new_ms_parameters.data_input = dataclasses.replace(self.data_input)
    -83
    -84        return new_ms_parameters
    +            
    88    def copy(self):
    +89        """Create a copy of the MSParameters object"""
    +90        new_ms_parameters = MSParameters()
    +91        new_ms_parameters.molecular_search = dataclasses.replace(self.molecular_search)
    +92        new_ms_parameters.transient = dataclasses.replace(self.transient)
    +93        new_ms_parameters.mass_spectrum = dataclasses.replace(self.mass_spectrum)
    +94        new_ms_parameters.ms_peak = dataclasses.replace(self.ms_peak)
    +95        new_ms_parameters.data_input = dataclasses.replace(self.data_input)
    +96
    +97        return new_ms_parameters
     
    @@ -776,13 +814,13 @@
    Notes
    -
    86    def print(self):
    -87        """Print the MSParameters object"""
    -88        for k, v in self.__dict__.items():
    -89            print(k, type(v).__name__)
    -90
    -91            for k2, v2 in v.__dict__.items():
    -92                print("    {}: {}".format(k2, v2))
    +            
     99    def print(self):
    +100        """Print the MSParameters object"""
    +101        for k, v in self.__dict__.items():
    +102            print(k, type(v).__name__)
    +103
    +104            for k2, v2 in v.__dict__.items():
    +105                print("    {}: {}".format(k2, v2))
     
    @@ -803,65 +841,67 @@
    Notes
    -
    107class GCMSParameters:
    -108    """GCMSParameters class is used to store the parameters used for the processing of the gas chromatograph mass spectrum
    -109
    -110    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.
    -111
    -112    Parameters
    -113    ----------
    -114    use_defaults: bool, optional
    -115        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    -116
    -117    Attributes
    -118    -----------
    -119    molecular_search: MolecularFormulaSearchSettings
    -120        MolecularFormulaSearchSettings object
    -121    gc_ms: GasChromatographSetting
    -122        GasChromatographSetting object
    +            
    121class GCMSParameters:
    +122    """GCMSParameters class is used to store the parameters used for the processing of the gas chromatograph mass spectrum
     123
    -124    Notes
    -125    -----
    -126    One can use the use_defaults parameter to reset the parameters to the default values.
    -127    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    -128    """
    -129
    -130    molecular_search = CompoundSearchSettings()
    -131    gc_ms = GasChromatographSetting()
    -132
    -133    def __init__(self, use_defaults = False) -> None:
    -134        if not use_defaults:
    -135            self.molecular_search = dataclasses.replace(GCMSParameters.molecular_search)
    -136            self.gc_ms = dataclasses.replace(GCMSParameters.gc_ms)
    -137        else:
    -138            self.molecular_search = CompoundSearchSettings()
    -139            self.gc_ms = GasChromatographSetting()
    -140
    -141    def copy(self):
    -142        """Create a copy of the GCMSParameters object"""
    -143        new_gcms_parameters = GCMSParameters()
    -144        new_gcms_parameters.molecular_search = dataclasses.replace(self.molecular_search)
    -145        new_gcms_parameters.gc_ms = dataclasses.replace(self.gc_ms)
    +124    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.
    +125
    +126    Parameters
    +127    ----------
    +128    use_defaults: bool, optional
    +129        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    +130
    +131    Attributes
    +132    -----------
    +133    molecular_search: MolecularFormulaSearchSettings
    +134        MolecularFormulaSearchSettings object
    +135    gc_ms: GasChromatographSetting
    +136        GasChromatographSetting object
    +137
    +138    Notes
    +139    -----
    +140    One can use the use_defaults parameter to reset the parameters to the default values.
    +141    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    +142    """
    +143
    +144    molecular_search = CompoundSearchSettings()
    +145    gc_ms = GasChromatographSetting()
     146
    -147        return new_gcms_parameters
    -148    
    -149    def __eq__(self, value: object) -> bool:
    -150        # Check that the object is of the same type
    -151        if not isinstance(value, GCMSParameters):
    -152            return False
    -153        equality_check = []
    -154        equality_check.append(self.molecular_search == value.molecular_search)
    -155        equality_check.append(self.gc_ms == value.gc_ms)
    -156
    -157        return all(equality_check)
    -158
    -159    def print(self):
    -160        """Print the GCMSParameters object"""
    -161        for k, v in self.__dict__.items():
    -162            print(k, type(v).__name__)
    -163
    -164            for k2, v2 in v.__dict__.items():
    -165                print("    {}: {}".format(k2, v2))
    +147    def __init__(self, use_defaults=False) -> None:
    +148        if not use_defaults:
    +149            self.molecular_search = dataclasses.replace(GCMSParameters.molecular_search)
    +150            self.gc_ms = dataclasses.replace(GCMSParameters.gc_ms)
    +151        else:
    +152            self.molecular_search = CompoundSearchSettings()
    +153            self.gc_ms = GasChromatographSetting()
    +154
    +155    def copy(self):
    +156        """Create a copy of the GCMSParameters object"""
    +157        new_gcms_parameters = GCMSParameters()
    +158        new_gcms_parameters.molecular_search = dataclasses.replace(
    +159            self.molecular_search
    +160        )
    +161        new_gcms_parameters.gc_ms = dataclasses.replace(self.gc_ms)
    +162
    +163        return new_gcms_parameters
    +164
    +165    def __eq__(self, value: object) -> bool:
    +166        # Check that the object is of the same type
    +167        if not isinstance(value, GCMSParameters):
    +168            return False
    +169        equality_check = []
    +170        equality_check.append(self.molecular_search == value.molecular_search)
    +171        equality_check.append(self.gc_ms == value.gc_ms)
    +172
    +173        return all(equality_check)
    +174
    +175    def print(self):
    +176        """Print the GCMSParameters object"""
    +177        for k, v in self.__dict__.items():
    +178            print(k, type(v).__name__)
    +179
    +180            for k2, v2 in v.__dict__.items():
    +181                print("    {}: {}".format(k2, v2))
     
    @@ -902,13 +942,13 @@
    Notes
    -
    133    def __init__(self, use_defaults = False) -> None:
    -134        if not use_defaults:
    -135            self.molecular_search = dataclasses.replace(GCMSParameters.molecular_search)
    -136            self.gc_ms = dataclasses.replace(GCMSParameters.gc_ms)
    -137        else:
    -138            self.molecular_search = CompoundSearchSettings()
    -139            self.gc_ms = GasChromatographSetting()
    +            
    147    def __init__(self, use_defaults=False) -> None:
    +148        if not use_defaults:
    +149            self.molecular_search = dataclasses.replace(GCMSParameters.molecular_search)
    +150            self.gc_ms = dataclasses.replace(GCMSParameters.gc_ms)
    +151        else:
    +152            self.molecular_search = CompoundSearchSettings()
    +153            self.gc_ms = GasChromatographSetting()
     
    @@ -952,13 +992,15 @@
    Notes
    -
    141    def copy(self):
    -142        """Create a copy of the GCMSParameters object"""
    -143        new_gcms_parameters = GCMSParameters()
    -144        new_gcms_parameters.molecular_search = dataclasses.replace(self.molecular_search)
    -145        new_gcms_parameters.gc_ms = dataclasses.replace(self.gc_ms)
    -146
    -147        return new_gcms_parameters
    +            
    155    def copy(self):
    +156        """Create a copy of the GCMSParameters object"""
    +157        new_gcms_parameters = GCMSParameters()
    +158        new_gcms_parameters.molecular_search = dataclasses.replace(
    +159            self.molecular_search
    +160        )
    +161        new_gcms_parameters.gc_ms = dataclasses.replace(self.gc_ms)
    +162
    +163        return new_gcms_parameters
     
    @@ -978,13 +1020,13 @@
    Notes
    -
    159    def print(self):
    -160        """Print the GCMSParameters object"""
    -161        for k, v in self.__dict__.items():
    -162            print(k, type(v).__name__)
    -163
    -164            for k2, v2 in v.__dict__.items():
    -165                print("    {}: {}".format(k2, v2))
    +            
    175    def print(self):
    +176        """Print the GCMSParameters object"""
    +177        for k, v in self.__dict__.items():
    +178            print(k, type(v).__name__)
    +179
    +180            for k2, v2 in v.__dict__.items():
    +181                print("    {}: {}".format(k2, v2))
     
    @@ -1005,82 +1047,102 @@
    Notes
    -
    167class LCMSParameters:
    -168    """LCMSParameters class is used to store the parameters used for the processing of the liquid chromatograph mass spectrum
    -169
    -170    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.
    -171
    -172    Parameters
    -173    ----------
    -174    use_defaults: bool, optional
    -175        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    -176
    -177    Attributes
    -178    -----------
    -179    lc_ms: LiquidChromatographSetting
    -180        LiquidChromatographSetting object
    -181    mass_spectrum: dict
    -182        dictionary with the mass spectrum parameters for ms1 and ms2, each value is a MSParameters object
    -183
    -184    Notes
    -185    -----
    -186    One can use the use_defaults parameter to reset the parameters to the default values.
    -187    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    -188    """
    -189    lc_ms = LiquidChromatographSetting()
    -190    mass_spectrum = {"ms1":MSParameters(), "ms2":MSParameters()}
    -191
    -192    def __init__(self, use_defaults = False) -> None:
    -193        if not use_defaults:
    -194            self.lc_ms = dataclasses.replace(LCMSParameters.lc_ms)
    -195            self.mass_spectrum = {"ms1":MSParameters(use_defaults=False), "ms2":MSParameters(use_defaults=False)}
    -196        else:
    -197            self.lc_ms = LiquidChromatographSetting()
    -198            self.mass_spectrum = {"ms1":MSParameters(use_defaults=True), "ms2":MSParameters(use_defaults=True)}
    -199
    -200    def copy(self):
    -201        """Create a copy of the LCMSParameters object"""
    -202        new_lcms_parameters = LCMSParameters()
    -203        new_lcms_parameters.lc_ms = dataclasses.replace(self.lc_ms)
    -204        for key in self.mass_spectrum:
    -205            new_lcms_parameters.mass_spectrum[key] = self.mass_spectrum[key].copy()
    +            
    184class LCMSParameters:
    +185    """LCMSParameters class is used to store the parameters used for the processing of the liquid chromatograph mass spectrum
    +186
    +187    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.
    +188
    +189    Parameters
    +190    ----------
    +191    use_defaults: bool, optional
    +192        if True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    +193
    +194    Attributes
    +195    -----------
    +196    lc_ms: LiquidChromatographSetting
    +197        LiquidChromatographSetting object
    +198    mass_spectrum: dict
    +199        dictionary with the mass spectrum parameters for ms1 and ms2, each value is a MSParameters object
    +200
    +201    Notes
    +202    -----
    +203    One can use the use_defaults parameter to reset the parameters to the default values.
    +204    Alternatively, to use the current values - modify the class's contents before instantiating the class.
    +205    """
     206
    -207        return new_lcms_parameters
    -208    
    -209    def __eq__(self, value: object) -> bool:
    -210        # Check that the object is of the same type
    -211        if not isinstance(value, LCMSParameters):
    -212            return False
    -213        equality_check = []
    -214        equality_check.append(self.lc_ms == value.lc_ms)
    -215
    -216        # Check that the mass_spectrum dictionary has the same keys
    -217        equality_check.append(self.mass_spectrum.keys() == value.mass_spectrum.keys())
    -218
    -219        # Check that the values of the mass_spectrum dictionary are equal
    -220        for key in self.mass_spectrum.keys():
    -221            equality_check.append(self.mass_spectrum[key].mass_spectrum == value.mass_spectrum[key].mass_spectrum)
    -222            equality_check.append(self.mass_spectrum[key].ms_peak == value.mass_spectrum[key].ms_peak)
    -223            equality_check.append(self.mass_spectrum[key].molecular_search == value.mass_spectrum[key].molecular_search)
    -224            equality_check.append(self.mass_spectrum[key].transient == value.mass_spectrum[key].transient)
    -225            equality_check.append(self.mass_spectrum[key].data_input == value.mass_spectrum[key].data_input)
    -226
    -227        return all(equality_check)
    -228    
    -229    def print(self):
    -230        """Print the LCMSParameters object"""
    -231        # Print the lcms paramters
    -232        for k, v in self.__dict__.items():
    -233            if k == "lc_ms":
    -234                print(k, type(v).__name__)
    -235
    -236        for k2, v2 in self.mass_spectrum.items():
    -237            """Print the MSParameters object"""
    -238            for k3, v3 in v2.__dict__.items():
    -239                print("{} - {}: {}".format(k2, k3, type(v3).__name__))
    -240
    -241                for k4, v4 in v3.__dict__.items():
    -242                    print("    {}: {}".format(k4, v4))
    +207    lc_ms = LiquidChromatographSetting()
    +208    mass_spectrum = {"ms1": MSParameters(), "ms2": MSParameters()}
    +209
    +210    def __init__(self, use_defaults=False) -> None:
    +211        if not use_defaults:
    +212            self.lc_ms = dataclasses.replace(LCMSParameters.lc_ms)
    +213            self.mass_spectrum = {
    +214                "ms1": MSParameters(use_defaults=False),
    +215                "ms2": MSParameters(use_defaults=False),
    +216            }
    +217        else:
    +218            self.lc_ms = LiquidChromatographSetting()
    +219            self.mass_spectrum = {
    +220                "ms1": MSParameters(use_defaults=True),
    +221                "ms2": MSParameters(use_defaults=True),
    +222            }
    +223
    +224    def copy(self):
    +225        """Create a copy of the LCMSParameters object"""
    +226        new_lcms_parameters = LCMSParameters()
    +227        new_lcms_parameters.lc_ms = dataclasses.replace(self.lc_ms)
    +228        for key in self.mass_spectrum:
    +229            new_lcms_parameters.mass_spectrum[key] = self.mass_spectrum[key].copy()
    +230
    +231        return new_lcms_parameters
    +232
    +233    def __eq__(self, value: object) -> bool:
    +234        # Check that the object is of the same type
    +235        if not isinstance(value, LCMSParameters):
    +236            return False
    +237        equality_check = []
    +238        equality_check.append(self.lc_ms == value.lc_ms)
    +239
    +240        # Check that the mass_spectrum dictionary has the same keys
    +241        equality_check.append(self.mass_spectrum.keys() == value.mass_spectrum.keys())
    +242
    +243        # Check that the values of the mass_spectrum dictionary are equal
    +244        for key in self.mass_spectrum.keys():
    +245            equality_check.append(
    +246                self.mass_spectrum[key].mass_spectrum
    +247                == value.mass_spectrum[key].mass_spectrum
    +248            )
    +249            equality_check.append(
    +250                self.mass_spectrum[key].ms_peak == value.mass_spectrum[key].ms_peak
    +251            )
    +252            equality_check.append(
    +253                self.mass_spectrum[key].molecular_search
    +254                == value.mass_spectrum[key].molecular_search
    +255            )
    +256            equality_check.append(
    +257                self.mass_spectrum[key].transient == value.mass_spectrum[key].transient
    +258            )
    +259            equality_check.append(
    +260                self.mass_spectrum[key].data_input
    +261                == value.mass_spectrum[key].data_input
    +262            )
    +263
    +264        return all(equality_check)
    +265
    +266    def print(self):
    +267        """Print the LCMSParameters object"""
    +268        # Print the lcms paramters
    +269        for k, v in self.__dict__.items():
    +270            if k == "lc_ms":
    +271                print(k, type(v).__name__)
    +272
    +273        for k2, v2 in self.mass_spectrum.items():
    +274            """Print the MSParameters object"""
    +275            for k3, v3 in v2.__dict__.items():
    +276                print("{} - {}: {}".format(k2, k3, type(v3).__name__))
    +277
    +278                for k4, v4 in v3.__dict__.items():
    +279                    print("    {}: {}".format(k4, v4))
     
    @@ -1121,13 +1183,19 @@
    Notes
    -
    192    def __init__(self, use_defaults = False) -> None:
    -193        if not use_defaults:
    -194            self.lc_ms = dataclasses.replace(LCMSParameters.lc_ms)
    -195            self.mass_spectrum = {"ms1":MSParameters(use_defaults=False), "ms2":MSParameters(use_defaults=False)}
    -196        else:
    -197            self.lc_ms = LiquidChromatographSetting()
    -198            self.mass_spectrum = {"ms1":MSParameters(use_defaults=True), "ms2":MSParameters(use_defaults=True)}
    +            
    210    def __init__(self, use_defaults=False) -> None:
    +211        if not use_defaults:
    +212            self.lc_ms = dataclasses.replace(LCMSParameters.lc_ms)
    +213            self.mass_spectrum = {
    +214                "ms1": MSParameters(use_defaults=False),
    +215                "ms2": MSParameters(use_defaults=False),
    +216            }
    +217        else:
    +218            self.lc_ms = LiquidChromatographSetting()
    +219            self.mass_spectrum = {
    +220                "ms1": MSParameters(use_defaults=True),
    +221                "ms2": MSParameters(use_defaults=True),
    +222            }
     
    @@ -1171,14 +1239,14 @@
    Notes
    -
    200    def copy(self):
    -201        """Create a copy of the LCMSParameters object"""
    -202        new_lcms_parameters = LCMSParameters()
    -203        new_lcms_parameters.lc_ms = dataclasses.replace(self.lc_ms)
    -204        for key in self.mass_spectrum:
    -205            new_lcms_parameters.mass_spectrum[key] = self.mass_spectrum[key].copy()
    -206
    -207        return new_lcms_parameters
    +            
    224    def copy(self):
    +225        """Create a copy of the LCMSParameters object"""
    +226        new_lcms_parameters = LCMSParameters()
    +227        new_lcms_parameters.lc_ms = dataclasses.replace(self.lc_ms)
    +228        for key in self.mass_spectrum:
    +229            new_lcms_parameters.mass_spectrum[key] = self.mass_spectrum[key].copy()
    +230
    +231        return new_lcms_parameters
     
    @@ -1198,20 +1266,20 @@
    Notes
    -
    229    def print(self):
    -230        """Print the LCMSParameters object"""
    -231        # Print the lcms paramters
    -232        for k, v in self.__dict__.items():
    -233            if k == "lc_ms":
    -234                print(k, type(v).__name__)
    -235
    -236        for k2, v2 in self.mass_spectrum.items():
    -237            """Print the MSParameters object"""
    -238            for k3, v3 in v2.__dict__.items():
    -239                print("{} - {}: {}".format(k2, k3, type(v3).__name__))
    -240
    -241                for k4, v4 in v3.__dict__.items():
    -242                    print("    {}: {}".format(k4, v4))
    +            
    266    def print(self):
    +267        """Print the LCMSParameters object"""
    +268        # Print the lcms paramters
    +269        for k, v in self.__dict__.items():
    +270            if k == "lc_ms":
    +271                print(k, type(v).__name__)
    +272
    +273        for k2, v2 in self.mass_spectrum.items():
    +274            """Print the MSParameters object"""
    +275            for k3, v3 in v2.__dict__.items():
    +276                print("{} - {}: {}".format(k2, k3, type(v3).__name__))
    +277
    +278                for k4, v4 in v3.__dict__.items():
    +279                    print("    {}: {}".format(k4, v4))
     
    @@ -1232,74 +1300,74 @@
    Notes
    -
    244def default_parameters(file_location):  # pragma: no cover
    -245    """Generate parameters dictionary with the default parameters for data processing
    -246       To gather parameters from instrument data during the data parsing step, a parameters dictionary with the default parameters needs to be generated.
    -247       This dictionary acts as a placeholder and is later used as an argument for all the class constructor methods during instantiation. 
    -248       The data gathered from the instrument is added to the class properties.
    -249
    -250    Parameters
    -251    ----------
    -252    file_location: str
    -253        path to the file
    -254
    -255    Returns
    -256    -------
    -257    parameters: dict
    -258        dictionary with the default parameters for data processing    
    -259    """
    -260
    -261    parameters = dict()
    -262
    -263    parameters["Aterm"] = 0
    -264
    -265    parameters["Bterm"] = 0
    -266
    -267    parameters["Cterm"] = 0
    -268
    -269    parameters["exc_high_freq"] = 0
    -270
    -271    parameters["exc_low_freq"] = 0
    -272
    -273    parameters["mw_low"] = 0
    -274        
    -275    parameters["mw_high"] = 0
    -276
    -277    parameters["qpd_enabled"] = 0
    -278
    -279    parameters["bandwidth"] = 0
    -280
    -281    parameters['analyzer'] = 'Unknown'
    -282
    -283    parameters['acquisition_time'] = None
    -284
    -285    parameters['instrument_label'] = 'Unknown' 
    -286
    -287    parameters['sample_name'] = 'Unknown'
    -288
    -289    parameters["number_data_points"] = 0
    -290
    -291    parameters["polarity"] = 'Unknown'
    +            
    282def default_parameters(file_location):  # pragma: no cover
    +283    """Generate parameters dictionary with the default parameters for data processing
    +284       To gather parameters from instrument data during the data parsing step, a parameters dictionary with the default parameters needs to be generated.
    +285       This dictionary acts as a placeholder and is later used as an argument for all the class constructor methods during instantiation.
    +286       The data gathered from the instrument is added to the class properties.
    +287
    +288    Parameters
    +289    ----------
    +290    file_location: str
    +291        path to the file
     292
    -293    parameters["filename_path"] = str(file_location)
    -294
    -295    """scan_number and rt will be need to lc ms"""
    -296
    -297    parameters["mobility_scan"] = 0
    +293    Returns
    +294    -------
    +295    parameters: dict
    +296        dictionary with the default parameters for data processing
    +297    """
     298
    -299    parameters["mobility_rt"] = 0
    +299    parameters = dict()
     300
    -301    parameters["scan_number"] = 0
    +301    parameters["Aterm"] = 0
     302
    -303    parameters["rt"] = 0
    +303    parameters["Bterm"] = 0
     304
    -305    return parameters
    +305    parameters["Cterm"] = 0
    +306
    +307    parameters["exc_high_freq"] = 0
    +308
    +309    parameters["exc_low_freq"] = 0
    +310
    +311    parameters["mw_low"] = 0
    +312
    +313    parameters["mw_high"] = 0
    +314
    +315    parameters["qpd_enabled"] = 0
    +316
    +317    parameters["bandwidth"] = 0
    +318
    +319    parameters["analyzer"] = "Unknown"
    +320
    +321    parameters["acquisition_time"] = None
    +322
    +323    parameters["instrument_label"] = "Unknown"
    +324
    +325    parameters["sample_name"] = "Unknown"
    +326
    +327    parameters["number_data_points"] = 0
    +328
    +329    parameters["polarity"] = "Unknown"
    +330
    +331    parameters["filename_path"] = str(file_location)
    +332
    +333    """scan_number and rt will be need to lc ms"""
    +334
    +335    parameters["mobility_scan"] = 0
    +336
    +337    parameters["mobility_rt"] = 0
    +338
    +339    parameters["scan_number"] = 0
    +340
    +341    parameters["rt"] = 0
    +342
    +343    return parameters
     

    Generate parameters dictionary with the default parameters for data processing To gather parameters from instrument data during the data parsing step, a parameters dictionary with the default parameters needs to be generated. - This dictionary acts as a placeholder and is later used as an argument for all the class constructor methods during instantiation. + This dictionary acts as a placeholder and is later used as an argument for all the class constructor methods during instantiation. The data gathered from the instrument is added to the class properties.

    Parameters
    diff --git a/docs/corems/encapsulation/factory/processingSetting.html b/docs/corems/encapsulation/factory/processingSetting.html index 9d7238ec..dbf8fcc1 100644 --- a/docs/corems/encapsulation/factory/processingSetting.html +++ b/docs/corems/encapsulation/factory/processingSetting.html @@ -615,8 +615,8 @@

    -
      1__author__ = 'Yuri E. Corilo'
    -  2__date__ = 'Jul 02, 2019'
    +                        
      1__author__ = "Yuri E. Corilo"
    +  2__date__ = "Jul 02, 2019"
       3
       4import dataclasses
       5import os
    @@ -624,887 +624,952 @@ 

    7 8from corems.encapsulation.constant import Atoms, Labels 9 - 10@dataclasses.dataclass - 11class TransientSetting: - 12 """Transient processing settings class - 13 - 14 Attributes - 15 ---------- - 16 implemented_apodization_function : tuple - 17 Available apodization functions - 18 apodization_method : str - 19 Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate. - 20 number_of_truncations : int - 21 How many times to truncate the transient prior to Fourier transform - 22 number_of_zero_fills : int - 23 How many times to zero fille the transient prior to Fourier transform. - 24 next_power_of_two : bool - 25 If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)). - 26 kaiser_beta : float - 27 Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, - 28 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs) - 29 - 30 """ - 31 implemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman','Full-Sine','Half-Sine','Kaiser','Half-Kaiser') - 32 apodization_method: str = 'Hanning' - 33 number_of_truncations: int = 0 - 34 number_of_zero_fills: int = 1 - 35 next_power_of_two: bool = False - 36 kaiser_beta: float = 8.6 - 37 - 38 def __post_init__(self): - 39 - 40 # enforce datatype - 41 for field in dataclasses.fields(self): - 42 value = getattr(self, field.name) - 43 if not isinstance(value, field.type): - 44 - 45 value = field.type(value) - 46 setattr(self, field.name, value) + 10 + 11@dataclasses.dataclass + 12class TransientSetting: + 13 """Transient processing settings class + 14 + 15 Attributes + 16 ---------- + 17 implemented_apodization_function : tuple + 18 Available apodization functions + 19 apodization_method : str + 20 Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate. + 21 number_of_truncations : int + 22 How many times to truncate the transient prior to Fourier transform + 23 number_of_zero_fills : int + 24 How many times to zero fille the transient prior to Fourier transform. + 25 next_power_of_two : bool + 26 If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)). + 27 kaiser_beta : float + 28 Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming, + 29 6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs) + 30 + 31 """ + 32 + 33 implemented_apodization_function: tuple = ( + 34 "Hamming", + 35 "Hanning", + 36 "Blackman", + 37 "Full-Sine", + 38 "Half-Sine", + 39 "Kaiser", + 40 "Half-Kaiser", + 41 ) + 42 apodization_method: str = "Hanning" + 43 number_of_truncations: int = 0 + 44 number_of_zero_fills: int = 1 + 45 next_power_of_two: bool = False + 46 kaiser_beta: float = 8.6 47 - 48@dataclasses.dataclass - 49class DataInputSetting: - 50 """Data input settings class - 51 - 52 Attributes - 53 ---------- - 54 header_translate : dict - 55 Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'} - 56 """ - 57 # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER - 58 # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"} - 59 header_translate: dict = dataclasses.field(default_factory=dict) + 48 def __post_init__(self): + 49 # enforce datatype + 50 for field in dataclasses.fields(self): + 51 value = getattr(self, field.name) + 52 if not isinstance(value, field.type): + 53 value = field.type(value) + 54 setattr(self, field.name, value) + 55 + 56 + 57@dataclasses.dataclass + 58class DataInputSetting: + 59 """Data input settings class 60 - 61 def __post_init__(self): - 62 - 63 self.header_translate = {'m/z': Labels.mz, - 64 'mOz': Labels.mz, - 65 'Mass': Labels.mz, - 66 'Resolving Power': Labels.rp, - 67 'Res.': Labels.rp, - 68 'resolution': Labels.rp, - 69 'Intensity': Labels.abundance, - 70 'Peak Height': Labels.abundance, - 71 'I': Labels.abundance, - 72 'Abundance': Labels.abundance, - 73 'abs_abu': Labels.abundance, - 74 'Signal/Noise': Labels.s2n, - 75 'S/N': Labels.s2n, - 76 'sn': Labels.s2n} - 77 - 78 def add_mz_label(self, label): - 79 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" - 80 self.header_translate[label] = Labels.mz - 81 - 82 def add_peak_height_label(self, label): - 83 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" - 84 - 85 self.header_translate[label] = Labels.abundance - 86 - 87 def add_sn_label(self, label): - 88 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" - 89 self.header_translate[label] = Labels.s2n - 90 - 91 def add_resolving_power_label(self, label): - 92 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" - 93 self.header_translate[label] = Labels.rp - 94 - 95@dataclasses.dataclass - 96class LiquidChromatographSetting: - 97 """Liquid chromatograph processing settings class - 98 - 99 Attributes -100 ---------- -101 scans : list or tuple, optional -102 List of select scan to average or a tuple containing the range to average. Default is (0, 1). -103 eic_tolerance_ppm : float, optional -104 Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5. -105 correct_eic_baseline : bool, optional -106 If True, correct the baseline of the extracted ion chromatogram. Default is True. -107 smooth_window : int, optional -108 Window size for smoothing the ion chromatogram (extracted or total). Default is 5. -109 smooth_method : str, optional -110 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. -111 implemented_smooth_method : tuple, optional -112 Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). -113 savgol_pol_order : int, optional -114 Polynomial order for Savitzky-Golay smoothing. Default is 2. -115 peak_height_max_percent : float, optional -116 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10. -117 peak_max_prominence_percent : float, optional -118 1-100 % used for baseline detection. Default is 1. -119 peak_derivative_threshold : float, optional -120 Threshold for defining derivative crossing. Default is 0.0005. -121 min_peak_datapoints : float, optional -122 minimum data point to define a chromatografic peak. Default is 5. -123 noise_threshold_method : str, optional -124 Method for detecting noise threshold. Default is 'manual_relative_abundance'. -125 noise_threshold_methods_implemented : tuple, optional -126 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). -127 peak_height_min_percent : float, optional -128 0-100 % used for peak detection. Default is 0.1. -129 eic_signal_threshold : float, optional -130 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. -131 eic_buffer_time : float, optional -132 Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5. -133 ph_smooth_it : int, optional -134 Number of iterations to use for smoothing prior to finding mass features. -135 Called within the PHCalculations.find_mass_features_ph() method. Default is 7. -136 ph_smooth_radius_mz : int, optional -137 Radius in m/z steps (not daltons) for smoothing prior to finding mass features. -138 Called within the PHCalculations.find_mass_features_ph() method. Default is 0. -139 ph_smooth_radius_scan : int, optional -140 Radius in scan steps for smoothing prior to finding mass features. -141 Called within the PHCalculations.find_mass_features_ph() method. Default is 3. -142 ph_inten_min_rel : int, optional -143 Relative minimum intensity to use for finding mass features. -144 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). -145 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. -146 ph_persis_min_rel : int, optional -147 Relative minimum persistence for retaining mass features. -148 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). -149 Should be greater to or equal to ph_inten_min_rel. -150 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. -151 mass_feature_cluster_mz_tolerance_rel : float, optional -152 Relative m/z tolerance to use for clustering mass features. -153 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. -154 Default is 5E-6 (5 ppm). -155 mass_feature_cluster_rt_tolerance : float, optional -156 Retention time tolerance to use for clustering mass features, in minutes. -157 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. -158 Default is 0.2. -159 ms1_scans_to_average : int, optional -160 Number of MS1 scans to average for mass-feature associated m/zs. -161 Called within the LCMSBase.add_associated_ms1() method. Default is 1. -162 ms1_deconvolution_corr_min : float, optional -163 Minimum correlation to use for deconvoluting MS1 mass features. -164 Called within the LCCalculations.deconvolute_ms1_mass_features() method. -165 Default is 0.8. -166 ms2_dda_rt_tolerance : float, optional -167 Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15. -168 ms2_dda_mz_tolerance : float, optional -169 Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05. -170 ms2_min_fe_score : float, optional -171 Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2. -172 search_as_lipids : bool, optional -173 If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False. -174 include_fragment_types : bool, optional -175 If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False. -176 verbose_processing : bool, optional -177 If True, print verbose processing information. Default is True. -178 """ -179 scans: list | tuple = (-1,-1) -180 -181 # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing -182 eic_tolerance_ppm: float = 5 -183 correct_eic_baseline = True -184 smooth_window: int = 5 -185 smooth_method: str = 'savgol' -186 implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar') -187 savgol_pol_order: int = 2 -188 peak_height_max_percent: float = 10 -189 peak_max_prominence_percent: float = 1 -190 peak_derivative_threshold:float = 0.0005 -191 min_peak_datapoints: float = 5 -192 noise_threshold_method: str = 'manual_relative_abundance' -193 noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative') -194 peak_height_min_percent: float = 0.1 -195 eic_signal_threshold: float = 0.01 -196 eic_buffer_time = 1.5 -197 -198 # Parameters used for 2D peak picking -199 peak_picking_method: str = "persistent homology" -200 implemented_peak_picking_methods: tuple = ('persistent homology',) -201 -202 # Parameters used in persistent homology calculations -203 ph_smooth_it = 1 -204 ph_smooth_radius_mz = 0 -205 ph_smooth_radius_scan = 1 -206 ph_inten_min_rel = 0.001 -207 ph_persis_min_rel = 0.001 -208 -209 # Parameters used to cluster mass features -210 mass_feature_cluster_mz_tolerance_rel: float = 5E-6 -211 mass_feature_cluster_rt_tolerance: float = 0.3 -212 -213 # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features -214 ms1_scans_to_average: int = 1 -215 ms1_deconvolution_corr_min: float = 0.8 -216 ms2_dda_rt_tolerance: float = 0.15 -217 ms2_dda_mz_tolerance: float = 0.05 -218 -219 # Parameters used for flash entropy searching and database preparation -220 ms2_min_fe_score: float = 0.2 -221 search_as_lipids: bool = False -222 include_fragment_types: bool = False -223 -224 # Parameters used for saving the data -225 export_profile_spectra: bool = False -226 export_eics: bool = True -227 export_unprocessed_ms1: bool = False -228 -229 # Parameters used for verbose processing -230 verbose_processing: bool = True -231 -232 def __post_init__(self): -233 # enforce datatype -234 for field in dataclasses.fields(self): -235 value = getattr(self, field.name) -236 if not isinstance(value, field.type): -237 -238 value = field.type(value) -239 setattr(self, field.name, value) -240 -241@dataclasses.dataclass -242class MassSpectrumSetting: -243 """Mass spectrum processing settings class -244 -245 Attributes -246 ---------- -247 noise_threshold_method : str, optional -248 Method for detecting noise threshold. Default is 'log'. -249 noise_threshold_methods_implemented : tuple, optional -250 Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'). -251 noise_threshold_min_std : int, optional -252 Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6. -253 noise_threshold_min_s2n : float, optional -254 Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4. -255 noise_threshold_min_relative_abundance : float, optional -256 Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%). -257 noise_threshold_absolute_abundance : float, optional -258 Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000. -259 noise_threshold_log_nsigma : int, optional -260 Number of standard deviations to use when using 'log' noise threshold method. Default is 6. -261 noise_threshold_log_nsigma_corr_factor : float, optional -262 Correction factor for log noise threshold method. Default is 0.463. -263 noise_threshold_log_nsigma_bins : int, optional -264 Number of bins to use for histogram when using 'log' noise threshold method. Default is 500. -265 noise_min_mz : float, optional -266 Minimum m/z to use for noise thresholding. Default is 50.0. -267 noise_max_mz : float, optional -268 Maximum m/z to use for noise thresholding. Default is 1200.0. -269 min_picking_mz : float, optional -270 Minimum m/z to use for peak picking. Default is 50.0. -271 max_picking_mz : float, optional -272 Maximum m/z to use for peak picking. Default is 1200.0. -273 picking_point_extrapolate : int, optional -274 How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. -275 Recommend 3 for reduced profile data or if peak picking faults -276 calib_minimize_method : str, optional -277 Minimization method to use for calibration. Default is 'Powell'. -278 calib_pol_order : int, optional -279 Polynomial order to use for calibration. Default is 2. -280 max_calib_ppm_error : float, optional -281 Maximum ppm error to use for calibration. Default is 1.0. -282 min_calib_ppm_error : float, optional -283 Minimum ppm error to use for calibration. Default is -1.0. -284 calib_sn_threshold : float, optional -285 Signal to noise threshold to use for calibration. Default is 2.0. -286 calibration_ref_match_method: string, optional -287 Method for matching reference masses with measured masses for recalibration. Default is 'legacy'. -288 calibration_ref_match_tolerance: float, optional -289 If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003 -290 do_calibration : bool, optional -291 If True, perform calibration. Default is True. -292 verbose_processing : bool, optional -293 If True, print verbose processing information. Default is True. -294 """ -295 noise_threshold_method: str = 'log' -296 -297 noise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log') -298 -299 noise_threshold_min_std: int = 6 # when using 'minima' method -300 -301 noise_threshold_min_s2n: float = 4 # when using 'signal_noise' method -302 -303 noise_threshold_min_relative_abundance: float = 6 # from 0-100, when using 'relative_abundance' method -304 -305 noise_threshold_absolute_abundance: float = 1_000_000 # when using 'absolute_abundance' method -306 -307 noise_threshold_log_nsigma: int = 6 # when using 'log' method -308 noise_threshold_log_nsigma_corr_factor: float = 0.463 #mFT is 0.463, aFT is 1.0 -309 noise_threshold_log_nsigma_bins: int = 500 # bins for the histogram for the noise -310 -311 noise_min_mz: float = 50.0 -312 noise_max_mz: float = 1200.0 -313 -314 min_picking_mz: float = 50.0 -315 max_picking_mz: float = 1200.0 -316 -317 # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis -318 # This will fix peak picking at spectrum limit issues -319 # 0 to keep normal behaviour, typical value 3 to fix -320 picking_point_extrapolate: int = 3 + 61 Attributes + 62 ---------- + 63 header_translate : dict + 64 Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'} + 65 """ + 66 + 67 # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER + 68 # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"} + 69 header_translate: dict = dataclasses.field(default_factory=dict) + 70 + 71 def __post_init__(self): + 72 self.header_translate = { + 73 "m/z": Labels.mz, + 74 "mOz": Labels.mz, + 75 "Mass": Labels.mz, + 76 "Resolving Power": Labels.rp, + 77 "Res.": Labels.rp, + 78 "resolution": Labels.rp, + 79 "Intensity": Labels.abundance, + 80 "Peak Height": Labels.abundance, + 81 "I": Labels.abundance, + 82 "Abundance": Labels.abundance, + 83 "abs_abu": Labels.abundance, + 84 "Signal/Noise": Labels.s2n, + 85 "S/N": Labels.s2n, + 86 "sn": Labels.s2n, + 87 } + 88 + 89 def add_mz_label(self, label): + 90 """Add a label to the header_translate dictionary to be translated to the corems label for mz.""" + 91 self.header_translate[label] = Labels.mz + 92 + 93 def add_peak_height_label(self, label): + 94 """Add a label to the header_translate dictionary to be translated to the corems label for peak height.""" + 95 + 96 self.header_translate[label] = Labels.abundance + 97 + 98 def add_sn_label(self, label): + 99 """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.""" +100 self.header_translate[label] = Labels.s2n +101 +102 def add_resolving_power_label(self, label): +103 """Add a label to the header_translate dictionary to be translated to the corems label for resolving power.""" +104 self.header_translate[label] = Labels.rp +105 +106 +107@dataclasses.dataclass +108class LiquidChromatographSetting: +109 """Liquid chromatograph processing settings class +110 +111 Attributes +112 ---------- +113 scans : list or tuple, optional +114 List of select scan to average or a tuple containing the range to average. Default is (0, 1). +115 eic_tolerance_ppm : float, optional +116 Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5. +117 correct_eic_baseline : bool, optional +118 If True, correct the baseline of the extracted ion chromatogram. Default is True. +119 smooth_window : int, optional +120 Window size for smoothing the ion chromatogram (extracted or total). Default is 5. +121 smooth_method : str, optional +122 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. +123 implemented_smooth_method : tuple, optional +124 Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). +125 savgol_pol_order : int, optional +126 Polynomial order for Savitzky-Golay smoothing. Default is 2. +127 peak_height_max_percent : float, optional +128 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10. +129 peak_max_prominence_percent : float, optional +130 1-100 % used for baseline detection. Default is 1. +131 peak_derivative_threshold : float, optional +132 Threshold for defining derivative crossing. Default is 0.0005. +133 min_peak_datapoints : float, optional +134 minimum data point to define a chromatografic peak. Default is 5. +135 noise_threshold_method : str, optional +136 Method for detecting noise threshold. Default is 'manual_relative_abundance'. +137 noise_threshold_methods_implemented : tuple, optional +138 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). +139 peak_height_min_percent : float, optional +140 0-100 % used for peak detection. Default is 0.1. +141 eic_signal_threshold : float, optional +142 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. +143 eic_buffer_time : float, optional +144 Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5. +145 ph_smooth_it : int, optional +146 Number of iterations to use for smoothing prior to finding mass features. +147 Called within the PHCalculations.find_mass_features_ph() method. Default is 7. +148 ph_smooth_radius_mz : int, optional +149 Radius in m/z steps (not daltons) for smoothing prior to finding mass features. +150 Called within the PHCalculations.find_mass_features_ph() method. Default is 0. +151 ph_smooth_radius_scan : int, optional +152 Radius in scan steps for smoothing prior to finding mass features. +153 Called within the PHCalculations.find_mass_features_ph() method. Default is 3. +154 ph_inten_min_rel : int, optional +155 Relative minimum intensity to use for finding mass features. +156 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). +157 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. +158 ph_persis_min_rel : int, optional +159 Relative minimum persistence for retaining mass features. +160 Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). +161 Should be greater to or equal to ph_inten_min_rel. +162 Called within the PH_Calculations.find_mass_features() method. Default is 0.001. +163 mass_feature_cluster_mz_tolerance_rel : float, optional +164 Relative m/z tolerance to use for clustering mass features. +165 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. +166 Default is 5E-6 (5 ppm). +167 mass_feature_cluster_rt_tolerance : float, optional +168 Retention time tolerance to use for clustering mass features, in minutes. +169 Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. +170 Default is 0.2. +171 ms1_scans_to_average : int, optional +172 Number of MS1 scans to average for mass-feature associated m/zs. +173 Called within the LCMSBase.add_associated_ms1() method. Default is 1. +174 ms1_deconvolution_corr_min : float, optional +175 Minimum correlation to use for deconvoluting MS1 mass features. +176 Called within the LCCalculations.deconvolute_ms1_mass_features() method. +177 Default is 0.8. +178 ms2_dda_rt_tolerance : float, optional +179 Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15. +180 ms2_dda_mz_tolerance : float, optional +181 Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05. +182 ms2_min_fe_score : float, optional +183 Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2. +184 search_as_lipids : bool, optional +185 If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False. +186 include_fragment_types : bool, optional +187 If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False. +188 verbose_processing : bool, optional +189 If True, print verbose processing information. Default is True. +190 """ +191 +192 scans: list | tuple = (-1, -1) +193 +194 # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing +195 eic_tolerance_ppm: float = 5 +196 correct_eic_baseline = True +197 smooth_window: int = 5 +198 smooth_method: str = "savgol" +199 implemented_smooth_method: tuple = ( +200 "savgol", +201 "hanning", +202 "blackman", +203 "bartlett", +204 "flat", +205 "boxcar", +206 ) +207 savgol_pol_order: int = 2 +208 peak_height_max_percent: float = 10 +209 peak_max_prominence_percent: float = 1 +210 peak_derivative_threshold: float = 0.0005 +211 min_peak_datapoints: float = 5 +212 noise_threshold_method: str = "manual_relative_abundance" +213 noise_threshold_methods_implemented: tuple = ( +214 "auto_relative_abundance", +215 "manual_relative_abundance", +216 "second_derivative", +217 ) +218 peak_height_min_percent: float = 0.1 +219 eic_signal_threshold: float = 0.01 +220 eic_buffer_time = 1.5 +221 +222 # Parameters used for 2D peak picking +223 peak_picking_method: str = "persistent homology" +224 implemented_peak_picking_methods: tuple = ("persistent homology",) +225 +226 # Parameters used in persistent homology calculations +227 ph_smooth_it = 1 +228 ph_smooth_radius_mz = 0 +229 ph_smooth_radius_scan = 1 +230 ph_inten_min_rel = 0.001 +231 ph_persis_min_rel = 0.001 +232 +233 # Parameters used to cluster mass features +234 mass_feature_cluster_mz_tolerance_rel: float = 5e-6 +235 mass_feature_cluster_rt_tolerance: float = 0.3 +236 +237 # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features +238 ms1_scans_to_average: int = 1 +239 ms1_deconvolution_corr_min: float = 0.8 +240 ms2_dda_rt_tolerance: float = 0.15 +241 ms2_dda_mz_tolerance: float = 0.05 +242 +243 # Parameters used for flash entropy searching and database preparation +244 ms2_min_fe_score: float = 0.2 +245 search_as_lipids: bool = False +246 include_fragment_types: bool = False +247 +248 # Parameters used for saving the data +249 export_profile_spectra: bool = False +250 export_eics: bool = True +251 export_unprocessed_ms1: bool = False +252 +253 # Parameters used for verbose processing +254 verbose_processing: bool = True +255 +256 def __post_init__(self): +257 # enforce datatype +258 for field in dataclasses.fields(self): +259 value = getattr(self, field.name) +260 if not isinstance(value, field.type): +261 value = field.type(value) +262 setattr(self, field.name, value) +263 +264 +265@dataclasses.dataclass +266class MassSpectrumSetting: +267 """Mass spectrum processing settings class +268 +269 Attributes +270 ---------- +271 noise_threshold_method : str, optional +272 Method for detecting noise threshold. Default is 'log'. +273 noise_threshold_methods_implemented : tuple, optional +274 Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'). +275 noise_threshold_min_std : int, optional +276 Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6. +277 noise_threshold_min_s2n : float, optional +278 Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4. +279 noise_threshold_min_relative_abundance : float, optional +280 Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%). +281 noise_threshold_absolute_abundance : float, optional +282 Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000. +283 noise_threshold_log_nsigma : int, optional +284 Number of standard deviations to use when using 'log' noise threshold method. Default is 6. +285 noise_threshold_log_nsigma_corr_factor : float, optional +286 Correction factor for log noise threshold method. Default is 0.463. +287 noise_threshold_log_nsigma_bins : int, optional +288 Number of bins to use for histogram when using 'log' noise threshold method. Default is 500. +289 noise_min_mz : float, optional +290 Minimum m/z to use for noise thresholding. Default is 50.0. +291 noise_max_mz : float, optional +292 Maximum m/z to use for noise thresholding. Default is 1200.0. +293 min_picking_mz : float, optional +294 Minimum m/z to use for peak picking. Default is 50.0. +295 max_picking_mz : float, optional +296 Maximum m/z to use for peak picking. Default is 1200.0. +297 picking_point_extrapolate : int, optional +298 How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3. +299 Recommend 3 for reduced profile data or if peak picking faults +300 calib_minimize_method : str, optional +301 Minimization method to use for calibration. Default is 'Powell'. +302 calib_pol_order : int, optional +303 Polynomial order to use for calibration. Default is 2. +304 max_calib_ppm_error : float, optional +305 Maximum ppm error to use for calibration. Default is 1.0. +306 min_calib_ppm_error : float, optional +307 Minimum ppm error to use for calibration. Default is -1.0. +308 calib_sn_threshold : float, optional +309 Signal to noise threshold to use for calibration. Default is 2.0. +310 calibration_ref_match_method: string, optional +311 Method for matching reference masses with measured masses for recalibration. Default is 'legacy'. +312 calibration_ref_match_tolerance: float, optional +313 If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003 +314 do_calibration : bool, optional +315 If True, perform calibration. Default is True. +316 verbose_processing : bool, optional +317 If True, print verbose processing information. Default is True. +318 """ +319 +320 noise_threshold_method: str = "log" 321 -322 calib_minimize_method: str = 'Powell' -323 calib_pol_order: int = 2 -324 max_calib_ppm_error: float = 1.0 -325 min_calib_ppm_error: float = -1.0 -326 calib_sn_threshold: float = 2.0 -327 calibration_ref_match_method: str = 'legacy' -328 calibration_ref_match_method_implemented: tuple = ('legacy', 'merged') -329 calibration_ref_match_tolerance: float = 0.003 -330 calibration_ref_match_std_raw_error_limit: float = 1.5 -331 #calib_ref_mzs: list = [0] -332 -333 do_calibration: bool = True -334 verbose_processing: bool = True -335 -336 def __post_init__(self): -337 # enforce datatype -338 for field in dataclasses.fields(self): -339 value = getattr(self, field.name) -340 if not isinstance(value, field.type): +322 noise_threshold_methods_implemented: tuple = ( +323 "minima", +324 "signal_noise", +325 "relative_abundance", +326 "absolute_abundance", +327 "log", +328 ) +329 +330 noise_threshold_min_std: int = 6 # when using 'minima' method +331 +332 noise_threshold_min_s2n: float = 4 # when using 'signal_noise' method +333 +334 noise_threshold_min_relative_abundance: float = ( +335 6 # from 0-100, when using 'relative_abundance' method +336 ) +337 +338 noise_threshold_absolute_abundance: float = ( +339 1_000_000 # when using 'absolute_abundance' method +340 ) 341 -342 value = field.type(value) -343 setattr(self, field.name, value) -344 -345@dataclasses.dataclass -346class MassSpecPeakSetting: -347 """Mass spectrum peak processing settings class +342 noise_threshold_log_nsigma: int = 6 # when using 'log' method +343 noise_threshold_log_nsigma_corr_factor: float = 0.463 # mFT is 0.463, aFT is 1.0 +344 noise_threshold_log_nsigma_bins: int = 500 # bins for the histogram for the noise +345 +346 noise_min_mz: float = 50.0 +347 noise_max_mz: float = 1200.0 348 -349 Attributes -350 ---------- -351 kendrick_base : Dict, optional -352 Dictionary specifying the elements and their counts in the Kendrick base. -353 Defaults to {'C': 1, 'H': 2}. -354 kendrick_rounding_method : str, optional -355 Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. -356 Defaults to 'floor'. -357 implemented_kendrick_rounding_methods : tuple -358 Tuple of valid rounding methods for calculating the nominal Kendrick mass. -359 Defaults to ('floor', 'ceil', 'round'). -360 peak_derivative_threshold : float, optional -361 Threshold for defining derivative crossing. Should be a value between 0 and 1. -362 Defaults to 0.0. -363 peak_min_prominence_percent : float, optional -364 Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. -365 Defaults to 0.1. -366 min_peak_datapoints : float, optional -367 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. -368 Defaults to 5. -369 peak_max_prominence_percent : float, optional -370 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. -371 Defaults to 0.1. -372 peak_height_max_percent : float, optional -373 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. -374 Defaults to 10. -375 legacy_resolving_power : bool, optional -376 Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. -377 Defaults to True. -378 legacy_centroid_polyfit : bool, optional -379 Use legacy (numpy polyfit) to fit centroid -380 Default false. -381 """ -382 -383 kendrick_base: Dict = dataclasses.field(default_factory=dict) -384 -385 kendrick_rounding_method: str = 'floor' # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass -386 -387 implemented_kendrick_rounding_methods : tuple = ('floor','ceil','round') -388 -389 peak_derivative_threshold: float = 0.0 # define derivative crossing threshould 0-1 -390 -391 peak_min_prominence_percent: float = 0.1 # 1-100 % used for peak detection -392 -393 min_peak_datapoints: float = 5 # 0-inf used for peak detection -394 -395 peak_max_prominence_percent: float = 0.1 # 1-100 % used for baseline detection -396 -397 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection -398 -399 legacy_resolving_power: bool = True # Use the legacy (CoreMS v1) resolving power calculation (True) -400 -401 legacy_centroid_polyfit: bool = False -402 -403 def __post_init__(self): -404 -405 # default to CH2 -406 if not self.kendrick_base: -407 self.kendrick_base = {'C': 1, 'H': 2} -408 # enforce datatype -409 for field in dataclasses.fields(self): -410 value = getattr(self, field.name) -411 if not isinstance(value, field.type): -412 -413 value = field.type(value) -414 setattr(self, field.name, value) -415 -416@dataclasses.dataclass -417class GasChromatographSetting: -418 """Gas chromatograph processing settings class +349 min_picking_mz: float = 50.0 +350 max_picking_mz: float = 1200.0 +351 +352 # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis +353 # This will fix peak picking at spectrum limit issues +354 # 0 to keep normal behaviour, typical value 3 to fix +355 picking_point_extrapolate: int = 3 +356 +357 calib_minimize_method: str = "Powell" +358 calib_pol_order: int = 2 +359 max_calib_ppm_error: float = 1.0 +360 min_calib_ppm_error: float = -1.0 +361 calib_sn_threshold: float = 2.0 +362 calibration_ref_match_method: str = "legacy" +363 calibration_ref_match_method_implemented: tuple = ("legacy", "merged") +364 calibration_ref_match_tolerance: float = 0.003 +365 calibration_ref_match_std_raw_error_limit: float = 1.5 +366 # calib_ref_mzs: list = [0] +367 +368 do_calibration: bool = True +369 verbose_processing: bool = True +370 +371 def __post_init__(self): +372 # enforce datatype +373 for field in dataclasses.fields(self): +374 value = getattr(self, field.name) +375 if not isinstance(value, field.type): +376 value = field.type(value) +377 setattr(self, field.name, value) +378 +379 +380@dataclasses.dataclass +381class MassSpecPeakSetting: +382 """Mass spectrum peak processing settings class +383 +384 Attributes +385 ---------- +386 kendrick_base : Dict, optional +387 Dictionary specifying the elements and their counts in the Kendrick base. +388 Defaults to {'C': 1, 'H': 2}. +389 kendrick_rounding_method : str, optional +390 Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'. +391 Defaults to 'floor'. +392 implemented_kendrick_rounding_methods : tuple +393 Tuple of valid rounding methods for calculating the nominal Kendrick mass. +394 Defaults to ('floor', 'ceil', 'round'). +395 peak_derivative_threshold : float, optional +396 Threshold for defining derivative crossing. Should be a value between 0 and 1. +397 Defaults to 0.0. +398 peak_min_prominence_percent : float, optional +399 Minimum prominence percentage used for peak detection. Should be a value between 1 and 100. +400 Defaults to 0.1. +401 min_peak_datapoints : float, optional +402 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. +403 Defaults to 5. +404 peak_max_prominence_percent : float, optional +405 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. +406 Defaults to 0.1. +407 peak_height_max_percent : float, optional +408 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. +409 Defaults to 10. +410 legacy_resolving_power : bool, optional +411 Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation. +412 Defaults to True. +413 legacy_centroid_polyfit : bool, optional +414 Use legacy (numpy polyfit) to fit centroid +415 Default false. +416 """ +417 +418 kendrick_base: Dict = dataclasses.field(default_factory=dict) 419 -420 Attributes -421 ---------- -422 use_deconvolution : bool, optional -423 If True, use deconvolution. Default is False. -424 implemented_smooth_method : tuple, optional -425 Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). -426 smooth_window : int, optional -427 Window size for smoothing the ion chromatogram. Default is 5. -428 smooth_method : str, optional -429 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. -430 savgol_pol_order : int, optional -431 Polynomial order for Savitzky-Golay smoothing. Default is 2. -432 peak_derivative_threshold : float, optional -433 Threshold for defining derivative crossing. Should be a value between 0 and 1. -434 Defaults to 0.0005. -435 peak_height_max_percent : float, optional -436 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. -437 Defaults to 10. -438 peak_max_prominence_percent : float, optional -439 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. -440 Defaults to 1. -441 min_peak_datapoints : float, optional -442 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. -443 Defaults to 5. -444 max_peak_width : float, optional -445 Maximum peak width used for peak detection. Should be a value between 0 and infinity. -446 Defaults to 0.1. -447 noise_threshold_method : str, optional -448 Method for detecting noise threshold. Default is 'manual_relative_abundance'. -449 noise_threshold_methods_implemented : tuple, optional -450 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). -451 std_noise_threshold : int, optional -452 Default is 3. -453 peak_height_min_percent : float, optional -454 0-100 % used for peak detection. Default is 0.1. -455 peak_min_prominence_percent : float, optional -456 0-100 % used for peak detection. Default is 0.1. -457 eic_signal_threshold : float, optional -458 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. -459 max_rt_distance : float, optional -460 Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025. -461 verbose_processing : bool, optional -462 If True, print verbose processing information. Default is True. -463 """ -464 use_deconvolution: bool = False -465 -466 implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar') -467 -468 smooth_window: int = 5 -469 -470 smooth_method: str = 'savgol' -471 -472 savgol_pol_order: int = 2 -473 -474 peak_derivative_threshold:float = 0.0005 -475 -476 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods -477 -478 peak_max_prominence_percent: float = 1 # 1-100 % used for baseline detection -479 -480 min_peak_datapoints: float = 5 -481 -482 max_peak_width: float = 0.1 -483 -484 noise_threshold_method: str = 'manual_relative_abundance' -485 -486 noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', -487 'second_derivative') -488 -489 std_noise_threshold: int = 3 -490 -491 peak_height_min_percent: float = 0.1 # 0-100 % used for peak detection -492 -493 peak_min_prominence_percent: float = 0.1 # 0-100 % used for peak detection -494 -495 eic_signal_threshold: float = 0.01 # 0-100 % used for extracted ion chromatogram peak detection -496 -497 max_rt_distance: float = 0.025 # minutes, max distance allowance hierarchical clutter -498 -499 verbose_processing: bool = True +420 kendrick_rounding_method: str = "floor" # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass +421 +422 implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round") +423 +424 peak_derivative_threshold: float = 0.0 # define derivative crossing threshould 0-1 +425 +426 peak_min_prominence_percent: float = 0.1 # 1-100 % used for peak detection +427 +428 min_peak_datapoints: float = 5 # 0-inf used for peak detection +429 +430 peak_max_prominence_percent: float = 0.1 # 1-100 % used for baseline detection +431 +432 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection +433 +434 legacy_resolving_power: bool = ( +435 True # Use the legacy (CoreMS v1) resolving power calculation (True) +436 ) +437 +438 legacy_centroid_polyfit: bool = False +439 +440 def __post_init__(self): +441 # default to CH2 +442 if not self.kendrick_base: +443 self.kendrick_base = {"C": 1, "H": 2} +444 # enforce datatype +445 for field in dataclasses.fields(self): +446 value = getattr(self, field.name) +447 if not isinstance(value, field.type): +448 value = field.type(value) +449 setattr(self, field.name, value) +450 +451 +452@dataclasses.dataclass +453class GasChromatographSetting: +454 """Gas chromatograph processing settings class +455 +456 Attributes +457 ---------- +458 use_deconvolution : bool, optional +459 If True, use deconvolution. Default is False. +460 implemented_smooth_method : tuple, optional +461 Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'). +462 smooth_window : int, optional +463 Window size for smoothing the ion chromatogram. Default is 5. +464 smooth_method : str, optional +465 Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'. +466 savgol_pol_order : int, optional +467 Polynomial order for Savitzky-Golay smoothing. Default is 2. +468 peak_derivative_threshold : float, optional +469 Threshold for defining derivative crossing. Should be a value between 0 and 1. +470 Defaults to 0.0005. +471 peak_height_max_percent : float, optional +472 Maximum height percentage used for baseline detection. Should be a value between 1 and 100. +473 Defaults to 10. +474 peak_max_prominence_percent : float, optional +475 Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100. +476 Defaults to 1. +477 min_peak_datapoints : float, optional +478 Minimum number of data points used for peak detection. Should be a value between 0 and infinity. +479 Defaults to 5. +480 max_peak_width : float, optional +481 Maximum peak width used for peak detection. Should be a value between 0 and infinity. +482 Defaults to 0.1. +483 noise_threshold_method : str, optional +484 Method for detecting noise threshold. Default is 'manual_relative_abundance'. +485 noise_threshold_methods_implemented : tuple, optional +486 Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'). +487 std_noise_threshold : int, optional +488 Default is 3. +489 peak_height_min_percent : float, optional +490 0-100 % used for peak detection. Default is 0.1. +491 peak_min_prominence_percent : float, optional +492 0-100 % used for peak detection. Default is 0.1. +493 eic_signal_threshold : float, optional +494 0-100 % used for extracted ion chromatogram peak detection. Default is 0.01. +495 max_rt_distance : float, optional +496 Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025. +497 verbose_processing : bool, optional +498 If True, print verbose processing information. Default is True. +499 """ 500 -501 def __post_init__(self): +501 use_deconvolution: bool = False 502 -503 # enforce datatype -504 for field in dataclasses.fields(self): -505 value = getattr(self, field.name) -506 if not isinstance(value, field.type): -507 -508 value = field.type(value) -509 setattr(self, field.name, value) -510 -511@dataclasses.dataclass -512class CompoundSearchSettings: -513 """Settings for compound search -514 -515 Attributes -516 ---------- -517 url_database : str, optional -518 URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'. -519 ri_search_range : float, optional -520 Retention index search range. Default is 35. -521 rt_search_range : float, optional -522 Retention time search range, in minutes. Default is 1.0. -523 correlation_threshold : float, optional -524 Threshold for correlation for spectral similarity. Default is 0.5. -525 score_threshold : float, optional -526 Threshold for compsite score. Default is 0.0. -527 ri_spacing : float, optional -528 Retention index spacing. Default is 200. -529 ri_std : float, optional -530 Retention index standard deviation. Default is 3. -531 ri_calibration_compound_names : list, optional -532 List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate']. -533 -534 """ -535 url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres" # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite' -536 -537 ri_search_range: float = 35 -538 -539 rt_search_range: float = 1.0 # used for retention index calibration -540 -541 correlation_threshold: float = 0.5 # used for calibration, spectral similarity -542 -543 score_threshold: float = 0.0 -544 -545 ri_spacing: float = 200 -546 -547 ri_std: float = 3 # in standard deviation -548 -549 ri_calibration_compound_names: List = dataclasses.field(default_factory=list) -550 -551 # calculates and export all spectral similarity methods -552 exploratory_mode: bool = False -553 -554 score_methods: tuple = ('highest_sim_score', 'highest_ss') -555 -556 output_score_method: str = 'All' -557 -558 +503 implemented_smooth_method: tuple = ( +504 "savgol", +505 "hanning", +506 "blackman", +507 "bartlett", +508 "flat", +509 "boxcar", +510 ) +511 +512 smooth_window: int = 5 +513 +514 smooth_method: str = "savgol" +515 +516 savgol_pol_order: int = 2 +517 +518 peak_derivative_threshold: float = 0.0005 +519 +520 peak_height_max_percent: float = 10 # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods +521 +522 peak_max_prominence_percent: float = 1 # 1-100 % used for baseline detection +523 +524 min_peak_datapoints: float = 5 +525 +526 max_peak_width: float = 0.1 +527 +528 noise_threshold_method: str = "manual_relative_abundance" +529 +530 noise_threshold_methods_implemented: tuple = ( +531 "auto_relative_abundance", +532 "manual_relative_abundance", +533 "second_derivative", +534 ) +535 +536 std_noise_threshold: int = 3 +537 +538 peak_height_min_percent: float = 0.1 # 0-100 % used for peak detection +539 +540 peak_min_prominence_percent: float = 0.1 # 0-100 % used for peak detection +541 +542 eic_signal_threshold: float = ( +543 0.01 # 0-100 % used for extracted ion chromatogram peak detection +544 ) +545 +546 max_rt_distance: float = ( +547 0.025 # minutes, max distance allowance hierarchical clutter +548 ) +549 +550 verbose_processing: bool = True +551 +552 def __post_init__(self): +553 # enforce datatype +554 for field in dataclasses.fields(self): +555 value = getattr(self, field.name) +556 if not isinstance(value, field.type): +557 value = field.type(value) +558 setattr(self, field.name, value) 559 -560 def __post_init__(self): -561 # enforce datatype -562 self.url_database = os.getenv('SPECTRAL_GCMS_DATABASE_URL', 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite') -563 -564 for field in dataclasses.fields(self): -565 value = getattr(self, field.name) -566 if not isinstance(value, field.type): -567 -568 value = field.type(value) -569 setattr(self, field.name, value) -570 -571 self.ri_calibration_compound_names = ['Methyl Caprylate', -572 'Methyl Caprate', -573 'Methyl Pelargonate', -574 'Methyl Laurate', -575 'Methyl Myristate', -576 'Methyl Palmitate', -577 'Methyl Stearate', -578 'Methyl Eicosanoate', -579 'Methyl Docosanoate', -580 'Methyl Linocerate', -581 'Methyl Hexacosanoate', -582 'Methyl Octacosanoate', -583 'Methyl Triacontanoate'] -584 -585class MolecularLookupDictSettings: -586 """Settings for molecular searching +560 +561@dataclasses.dataclass +562class CompoundSearchSettings: +563 """Settings for compound search +564 +565 Attributes +566 ---------- +567 url_database : str, optional +568 URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'. +569 ri_search_range : float, optional +570 Retention index search range. Default is 35. +571 rt_search_range : float, optional +572 Retention time search range, in minutes. Default is 1.0. +573 correlation_threshold : float, optional +574 Threshold for correlation for spectral similarity. Default is 0.5. +575 score_threshold : float, optional +576 Threshold for compsite score. Default is 0.0. +577 ri_spacing : float, optional +578 Retention index spacing. Default is 200. +579 ri_std : float, optional +580 Retention index standard deviation. Default is 3. +581 ri_calibration_compound_names : list, optional +582 List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate']. +583 +584 """ +585 +586 url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres" # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite' 587 -588 These are used to generate the database entries, do not change. +588 ri_search_range: float = 35 589 -590 Attributes -591 ---------- -592 usedAtoms : dict, optional -593 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. -594 min_mz : float, optional -595 Minimum m/z to use for searching. Default is 50.0. -596 max_mz : float, optional -597 Maximum m/z to use for searching. Default is 1200.0. -598 min_dbe : float, optional -599 Minimum double bond equivalent to use for searching. Default is 0. -600 max_dbe : float, optional -601 Maximum double bond equivalent to use for searching. Default is 50. -602 use_pah_line_rule : bool, optional -603 If True, use the PAH line rule. Default is False. -604 isRadical : bool, optional -605 If True, search for radical ions. Default is True. -606 isProtonated : bool, optional -607 If True, search for protonated ions. Default is True. -608 url_database : str, optional -609 URL for the database. Default is None. -610 db_jobs : int, optional -611 Number of jobs to use for database queries. Default is 1. -612 used_atom_valences : dict, optional -613 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. -614 -615 """ -616 ### DO NOT CHANGE IT! These are used to generate the database entries -617 -618 ### DO change when creating a new application database -619 -620 ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below +590 rt_search_range: float = 1.0 # used for retention index calibration +591 +592 correlation_threshold: float = 0.5 # used for calibration, spectral similarity +593 +594 score_threshold: float = 0.0 +595 +596 ri_spacing: float = 200 +597 +598 ri_std: float = 3 # in standard deviation +599 +600 ri_calibration_compound_names: List = dataclasses.field(default_factory=list) +601 +602 # calculates and export all spectral similarity methods +603 exploratory_mode: bool = False +604 +605 score_methods: tuple = ("highest_sim_score", "highest_ss") +606 +607 output_score_method: str = "All" +608 +609 def __post_init__(self): +610 # enforce datatype +611 self.url_database = os.getenv( +612 "SPECTRAL_GCMS_DATABASE_URL", +613 "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite", +614 ) +615 +616 for field in dataclasses.fields(self): +617 value = getattr(self, field.name) +618 if not isinstance(value, field.type): +619 value = field.type(value) +620 setattr(self, field.name, value) 621 -622 ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms -623 ### if you don't want to include one of those atoms set the max and min at 0 -624 ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module -625 ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms -626 ### NOTE : Adducts atoms have zero covalence -627 ### NOTE : Not using static variable because this class is distributed using multiprocessing -628 def __init__(self): -629 -630 self.usedAtoms = {'C': (1, 90), -631 'H': (4, 200), -632 'O': (0, 12), -633 'N': (0, 0), -634 'S': (0, 0), -635 'P': (0, 0), -636 'Cl': (0, 0), -637 } +622 self.ri_calibration_compound_names = [ +623 "Methyl Caprylate", +624 "Methyl Caprate", +625 "Methyl Pelargonate", +626 "Methyl Laurate", +627 "Methyl Myristate", +628 "Methyl Palmitate", +629 "Methyl Stearate", +630 "Methyl Eicosanoate", +631 "Methyl Docosanoate", +632 "Methyl Linocerate", +633 "Methyl Hexacosanoate", +634 "Methyl Octacosanoate", +635 "Methyl Triacontanoate", +636 ] +637 638 -639 self.min_mz = 50 -640 -641 self.max_mz = 1200 -642 -643 self.min_dbe = 0 -644 -645 self.max_dbe = 50 -646 -647 # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9 -648 self.use_pah_line_rule = False -649 -650 self.isRadical = True -651 -652 self.isProtonated = True -653 -654 self.url_database = None -655 -656 self.db_jobs = 1 -657 -658 self.used_atom_valences = {'C': 4, -659 '13C': 4, -660 'H': 1, -661 'O': 2, -662 '18O': 2, -663 'N': 3, -664 'S': 2, -665 '34S': 2, -666 'P': 3, -667 'Cl': 1, -668 '37Cl': 1, -669 'Br': 1, -670 'Na': 1, -671 'F': 1, -672 'K': 0, -673 } +639class MolecularLookupDictSettings: +640 """Settings for molecular searching +641 +642 These are used to generate the database entries, do not change. +643 +644 Attributes +645 ---------- +646 usedAtoms : dict, optional +647 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. +648 min_mz : float, optional +649 Minimum m/z to use for searching. Default is 50.0. +650 max_mz : float, optional +651 Maximum m/z to use for searching. Default is 1200.0. +652 min_dbe : float, optional +653 Minimum double bond equivalent to use for searching. Default is 0. +654 max_dbe : float, optional +655 Maximum double bond equivalent to use for searching. Default is 50. +656 use_pah_line_rule : bool, optional +657 If True, use the PAH line rule. Default is False. +658 isRadical : bool, optional +659 If True, search for radical ions. Default is True. +660 isProtonated : bool, optional +661 If True, search for protonated ions. Default is True. +662 url_database : str, optional +663 URL for the database. Default is None. +664 db_jobs : int, optional +665 Number of jobs to use for database queries. Default is 1. +666 used_atom_valences : dict, optional +667 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. +668 +669 """ +670 +671 ### DO NOT CHANGE IT! These are used to generate the database entries +672 +673 ### DO change when creating a new application database 674 -675@dataclasses.dataclass -676class MolecularFormulaSearchSettings: -677 """Settings for molecular searching -678 -679 Attributes -680 ---------- -681 use_isotopologue_filter : bool, optional -682 If True, use isotopologue filter. Default is False. -683 isotopologue_filter_threshold : float, optional -684 Threshold for isotopologue filter. Default is 33. -685 isotopologue_filter_atoms : tuple, optional -686 Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br'). -687 use_runtime_kendrick_filter : bool, optional -688 If True, use runtime Kendrick filter. Default is False. -689 use_min_peaks_filter : bool, optional -690 If True, use minimum peaks filter. Default is True. -691 min_peaks_per_class : int, optional -692 Minimum number of peaks per class. Default is 15. -693 url_database : str, optional -694 URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'. -695 db_jobs : int, optional -696 Number of jobs to use for database queries. Default is 3. -697 db_chunk_size : int, optional -698 Chunk size to use for database queries. Default is 300. -699 ion_charge : int, optional -700 Ion charge. Default is -1. -701 min_hc_filter : float, optional -702 Minimum hydrogen to carbon ratio. Default is 0.3. -703 max_hc_filter : float, optional -704 Maximum hydrogen to carbon ratio. Default is 3. -705 min_oc_filter : float, optional -706 Minimum oxygen to carbon ratio. Default is 0.0. -707 max_oc_filter : float, optional -708 Maximum oxygen to carbon ratio. Default is 1.2. -709 min_op_filter : float, optional -710 Minimum oxygen to phosphorous ratio. Default is 2. -711 use_pah_line_rule : bool, optional -712 If True, use the PAH line rule. Default is False. -713 min_dbe : float, optional -714 Minimum double bond equivalent to use for searching. Default is 0. -715 max_dbe : float, optional -716 Maximum double bond equivalent to use for searching. Default is 40. -717 mz_error_score_weight : float, optional -718 Weight for m/z error score to contribute to composite score. Default is 0.6. -719 isotopologue_score_weight : float, optional -720 Weight for isotopologue score to contribute to composite score. Default is 0.4. -721 adduct_atoms_neg : tuple, optional -722 Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br'). -723 adduct_atoms_pos : tuple, optional -724 Tuple of atoms to use in positive polarity. Default is ('Na', 'K'). -725 score_methods : tuple, optional -726 Tuple of score method that can be implemented. -727 Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'). -728 score_method : str, optional -729 Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'. -730 output_min_score : float, optional -731 Minimum score for output. Default is 0.1. -732 output_score_method : str, optional -733 Score method to use for output. Default is 'All Candidates'. -734 isRadical : bool, optional -735 If True, search for radical ions. Default is False. -736 isProtonated : bool, optional -737 If True, search for protonated ions. Default is True. -738 isAdduct : bool, optional -739 If True, search for adduct ions. Default is False. -740 usedAtoms : dict, optional -741 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. -742 ion_types_excluded : list, optional -743 List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is []. -744 ionization_type : str, optional -745 Ionization type. Default is 'ESI'. -746 min_ppm_error : float, optional -747 Minimum ppm error. Default is -10.0. -748 max_ppm_error : float, optional -749 Maximum ppm error. Default is 10.0. -750 min_abun_error : float, optional -751 Minimum abundance error for isotolopologue search. Default is -100.0. -752 max_abun_error : float, optional -753 Maximum abundance error for isotolopologue search. Default is 100.0. -754 mz_error_range : float, optional -755 m/z error range. Default is 1.5. -756 error_method : str, optional -757 Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'. -758 mz_error_average : float, optional -759 m/z error average. Default is 0.0. -760 used_atom_valences : dict, optional -761 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. -762 """ -763 use_isotopologue_filter: bool = False -764 -765 isotopologue_filter_threshold: float = 33 -766 -767 isotopologue_filter_atoms: tuple = ('Cl', 'Br') -768 -769 use_runtime_kendrick_filter: bool = False -770 -771 use_min_peaks_filter: bool = True -772 -773 min_peaks_per_class: int = 15 -774 -775 url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp' -776 -777 db_jobs: int = 3 -778 -779 db_chunk_size: int = 300 -780 -781 #query setting======== -782 ion_charge: int = -1 -783 -784 min_hc_filter: float = 0.3 -785 -786 max_hc_filter: float = 3 -787 -788 min_oc_filter: float = 0.0 -789 -790 max_oc_filter: float = 1.2 -791 -792 min_op_filter: float = 2 -793 -794 use_pah_line_rule: bool = False -795 -796 min_dbe: float = 0 -797 -798 max_dbe: float = 40 -799 -800 mz_error_score_weight: float = 0.6 -801 -802 isotopologue_score_weight: float = 0.4 -803 -804 # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms -805 adduct_atoms_neg: tuple = ('Cl', 'Br') -806 -807 adduct_atoms_pos: tuple = ('Na', 'K') -808 -809 score_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', -810 'air_filter_error', 'water_filter_error', 'earth_filter_error') -811 -812 score_method: str = 'prob_score' -813 -814 output_min_score: float = 0.1 -815 -816 output_score_method: str = 'All Candidates' -817 -818 # depending on the polarity mode it looks for [M].+ , [M].- -819 # query and automatically compile add entry if it doesn't exist +675 ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below +676 +677 ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms +678 ### if you don't want to include one of those atoms set the max and min at 0 +679 ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module +680 ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms +681 ### NOTE : Adducts atoms have zero covalence +682 ### NOTE : Not using static variable because this class is distributed using multiprocessing +683 def __init__(self): +684 self.usedAtoms = { +685 "C": (1, 90), +686 "H": (4, 200), +687 "O": (0, 12), +688 "N": (0, 0), +689 "S": (0, 0), +690 "P": (0, 0), +691 "Cl": (0, 0), +692 } +693 +694 self.min_mz = 50 +695 +696 self.max_mz = 1200 +697 +698 self.min_dbe = 0 +699 +700 self.max_dbe = 50 +701 +702 # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9 +703 self.use_pah_line_rule = False +704 +705 self.isRadical = True +706 +707 self.isProtonated = True +708 +709 self.url_database = None +710 +711 self.db_jobs = 1 +712 +713 self.used_atom_valences = { +714 "C": 4, +715 "13C": 4, +716 "H": 1, +717 "O": 2, +718 "18O": 2, +719 "N": 3, +720 "S": 2, +721 "34S": 2, +722 "P": 3, +723 "Cl": 1, +724 "37Cl": 1, +725 "Br": 1, +726 "Na": 1, +727 "F": 1, +728 "K": 0, +729 } +730 +731 +732@dataclasses.dataclass +733class MolecularFormulaSearchSettings: +734 """Settings for molecular searching +735 +736 Attributes +737 ---------- +738 use_isotopologue_filter : bool, optional +739 If True, use isotopologue filter. Default is False. +740 isotopologue_filter_threshold : float, optional +741 Threshold for isotopologue filter. Default is 33. +742 isotopologue_filter_atoms : tuple, optional +743 Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br'). +744 use_runtime_kendrick_filter : bool, optional +745 If True, use runtime Kendrick filter. Default is False. +746 use_min_peaks_filter : bool, optional +747 If True, use minimum peaks filter. Default is True. +748 min_peaks_per_class : int, optional +749 Minimum number of peaks per class. Default is 15. +750 url_database : str, optional +751 URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'. +752 db_jobs : int, optional +753 Number of jobs to use for database queries. Default is 3. +754 db_chunk_size : int, optional +755 Chunk size to use for database queries. Default is 300. +756 ion_charge : int, optional +757 Ion charge. Default is -1. +758 min_hc_filter : float, optional +759 Minimum hydrogen to carbon ratio. Default is 0.3. +760 max_hc_filter : float, optional +761 Maximum hydrogen to carbon ratio. Default is 3. +762 min_oc_filter : float, optional +763 Minimum oxygen to carbon ratio. Default is 0.0. +764 max_oc_filter : float, optional +765 Maximum oxygen to carbon ratio. Default is 1.2. +766 min_op_filter : float, optional +767 Minimum oxygen to phosphorous ratio. Default is 2. +768 use_pah_line_rule : bool, optional +769 If True, use the PAH line rule. Default is False. +770 min_dbe : float, optional +771 Minimum double bond equivalent to use for searching. Default is 0. +772 max_dbe : float, optional +773 Maximum double bond equivalent to use for searching. Default is 40. +774 mz_error_score_weight : float, optional +775 Weight for m/z error score to contribute to composite score. Default is 0.6. +776 isotopologue_score_weight : float, optional +777 Weight for isotopologue score to contribute to composite score. Default is 0.4. +778 adduct_atoms_neg : tuple, optional +779 Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br'). +780 adduct_atoms_pos : tuple, optional +781 Tuple of atoms to use in positive polarity. Default is ('Na', 'K'). +782 score_methods : tuple, optional +783 Tuple of score method that can be implemented. +784 Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'). +785 score_method : str, optional +786 Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'. +787 output_min_score : float, optional +788 Minimum score for output. Default is 0.1. +789 output_score_method : str, optional +790 Score method to use for output. Default is 'All Candidates'. +791 isRadical : bool, optional +792 If True, search for radical ions. Default is False. +793 isProtonated : bool, optional +794 If True, search for protonated ions. Default is True. +795 isAdduct : bool, optional +796 If True, search for adduct ions. Default is False. +797 usedAtoms : dict, optional +798 Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}. +799 ion_types_excluded : list, optional +800 List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is []. +801 ionization_type : str, optional +802 Ionization type. Default is 'ESI'. +803 min_ppm_error : float, optional +804 Minimum ppm error. Default is -10.0. +805 max_ppm_error : float, optional +806 Maximum ppm error. Default is 10.0. +807 min_abun_error : float, optional +808 Minimum abundance error for isotolopologue search. Default is -100.0. +809 max_abun_error : float, optional +810 Maximum abundance error for isotolopologue search. Default is 100.0. +811 mz_error_range : float, optional +812 m/z error range. Default is 1.5. +813 error_method : str, optional +814 Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'. +815 mz_error_average : float, optional +816 m/z error average. Default is 0.0. +817 used_atom_valences : dict, optional +818 Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}. +819 """ 820 -821 isRadical: bool = False +821 use_isotopologue_filter: bool = False 822 -823 # depending on the polarity mode it looks for [M + H]+ , [M - H]+ -824 # query and automatically compile and push options if it doesn't exist -825 isProtonated: bool = True +823 isotopologue_filter_threshold: float = 33 +824 +825 isotopologue_filter_atoms: tuple = ("Cl", "Br") 826 -827 isAdduct: bool = False +827 use_runtime_kendrick_filter: bool = False 828 -829 usedAtoms: dict = dataclasses.field(default_factory=dict) -830 ion_types_excluded: list = dataclasses.field(default_factory=list) -831 -832 # search setting ======== -833 -834 ionization_type: str = 'ESI' -835 -836 # empirically set / needs optimization -837 min_ppm_error: float = -10.0 # ppm +829 use_min_peaks_filter: bool = True +830 +831 min_peaks_per_class: int = 15 +832 +833 url_database: str = ( +834 "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp" +835 ) +836 +837 db_jobs: int = 3 838 -839 # empirically set / needs optimization -840 max_ppm_error: float = 10.0 # ppm -841 -842 # empirically set / needs optimization set for isotopologue search -843 min_abun_error: float = -100.0 # percentage -844 -845 # empirically set / needs optimization set for isotopologue search -846 max_abun_error: float = 100.0 # percentage +839 db_chunk_size: int = 300 +840 +841 # query setting======== +842 ion_charge: int = -1 +843 +844 min_hc_filter: float = 0.3 +845 +846 max_hc_filter: float = 3 847 -848 # empirically set / needs optimization -849 mz_error_range: float = 1.5 -850 -851 # 'distance', 'lowest', 'symmetrical','average' 'None' -852 error_method: str = 'None' +848 min_oc_filter: float = 0.0 +849 +850 max_oc_filter: float = 1.2 +851 +852 min_op_filter: float = 2 853 -854 mz_error_average: float = 0.0 +854 use_pah_line_rule: bool = False 855 -856 # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict) -857 used_atom_valences: dict = dataclasses.field(default_factory=dict) -858 -859 def __post_init__(self): -860 -861 self.url_database = os.getenv('COREMS_DATABASE_URL', 'sqlite:///db/molformula.db') -862 # enforce datatype -863 for field in dataclasses.fields(self): -864 value = getattr(self, field.name) -865 if not isinstance(value, field.type): +856 min_dbe: float = 0 +857 +858 max_dbe: float = 40 +859 +860 mz_error_score_weight: float = 0.6 +861 +862 isotopologue_score_weight: float = 0.4 +863 +864 # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms +865 adduct_atoms_neg: tuple = ("Cl", "Br") 866 -867 value = field.type(value) -868 setattr(self, field.name, value) -869 -870 # enforce C and H if either do not exists -871 if 'C' not in self.usedAtoms.keys(): -872 self.usedAtoms['C'] = (1, 100) -873 if 'H' not in self.usedAtoms.keys(): -874 self.usedAtoms['H'] = (1, 200) -875 -876 # add cummon values -877 current_used_atoms = self.used_atom_valences.keys() -878 -879 for atom in Atoms.atoms_covalence.keys(): -880 -881 if atom not in current_used_atoms: -882 -883 covalence = Atoms.atoms_covalence.get(atom) -884 -885 if isinstance(covalence , int): -886 self.used_atom_valences[atom] = covalence -887 -888 else: -889 #will get the first number of all possible covalances, which should be the most commum -890 self.used_atom_valences[atom] = covalence[0] +867 adduct_atoms_pos: tuple = ("Na", "K") +868 +869 score_methods: tuple = ( +870 "S_P_lowest_error", +871 "N_S_P_lowest_error", +872 "lowest_error", +873 "prob_score", +874 "air_filter_error", +875 "water_filter_error", +876 "earth_filter_error", +877 ) +878 +879 score_method: str = "prob_score" +880 +881 output_min_score: float = 0.1 +882 +883 output_score_method: str = "All Candidates" +884 +885 # depending on the polarity mode it looks for [M].+ , [M].- +886 # query and automatically compile add entry if it doesn't exist +887 +888 isRadical: bool = False +889 +890 # depending on the polarity mode it looks for [M + H]+ , [M - H]+ +891 # query and automatically compile and push options if it doesn't exist +892 isProtonated: bool = True +893 +894 isAdduct: bool = False +895 +896 usedAtoms: dict = dataclasses.field(default_factory=dict) +897 ion_types_excluded: list = dataclasses.field(default_factory=list) +898 +899 # search setting ======== +900 +901 ionization_type: str = "ESI" +902 +903 # empirically set / needs optimization +904 min_ppm_error: float = -10.0 # ppm +905 +906 # empirically set / needs optimization +907 max_ppm_error: float = 10.0 # ppm +908 +909 # empirically set / needs optimization set for isotopologue search +910 min_abun_error: float = -100.0 # percentage +911 +912 # empirically set / needs optimization set for isotopologue search +913 max_abun_error: float = 100.0 # percentage +914 +915 # empirically set / needs optimization +916 mz_error_range: float = 1.5 +917 +918 # 'distance', 'lowest', 'symmetrical','average' 'None' +919 error_method: str = "None" +920 +921 mz_error_average: float = 0.0 +922 +923 # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict) +924 used_atom_valences: dict = dataclasses.field(default_factory=dict) +925 +926 def __post_init__(self): +927 self.url_database = os.getenv( +928 "COREMS_DATABASE_URL", "sqlite:///db/molformula.db" +929 ) +930 # enforce datatype +931 for field in dataclasses.fields(self): +932 value = getattr(self, field.name) +933 if not isinstance(value, field.type): +934 value = field.type(value) +935 setattr(self, field.name, value) +936 +937 # enforce C and H if either do not exists +938 if "C" not in self.usedAtoms.keys(): +939 self.usedAtoms["C"] = (1, 100) +940 if "H" not in self.usedAtoms.keys(): +941 self.usedAtoms["H"] = (1, 200) +942 +943 # add cummon values +944 current_used_atoms = self.used_atom_valences.keys() +945 +946 for atom in Atoms.atoms_covalence.keys(): +947 if atom not in current_used_atoms: +948 covalence = Atoms.atoms_covalence.get(atom) +949 +950 if isinstance(covalence, int): +951 self.used_atom_valences[atom] = covalence +952 +953 else: +954 # will get the first number of all possible covalances, which should be the most commum +955 self.used_atom_valences[atom] = covalence[0]

    @@ -1521,43 +1586,50 @@

    -
    11@dataclasses.dataclass
    -12class TransientSetting:
    -13    """Transient processing settings class
    -14
    -15    Attributes
    -16    ----------
    -17    implemented_apodization_function : tuple
    -18        Available apodization functions
    -19    apodization_method : str
    -20        Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
    -21    number_of_truncations : int
    -22        How many times to truncate the transient prior to Fourier transform
    -23    number_of_zero_fills : int
    -24        How many times to zero fille the transient prior to Fourier transform.
    -25    next_power_of_two : bool
    -26        If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)). 
    -27    kaiser_beta : float
    -28        Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular,  5 is similar to Hamming,
    -29        6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
    -30
    -31    """
    -32    implemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman','Full-Sine','Half-Sine','Kaiser','Half-Kaiser')
    -33    apodization_method: str = 'Hanning'
    -34    number_of_truncations: int = 0
    -35    number_of_zero_fills: int = 1
    -36    next_power_of_two: bool = False
    -37    kaiser_beta: float = 8.6
    -38
    -39    def __post_init__(self):
    -40
    -41        # enforce datatype
    -42        for field in dataclasses.fields(self):
    -43            value = getattr(self, field.name)
    -44            if not isinstance(value, field.type):
    -45
    -46                value = field.type(value)
    -47                setattr(self, field.name, value)
    +            
    12@dataclasses.dataclass
    +13class TransientSetting:
    +14    """Transient processing settings class
    +15
    +16    Attributes
    +17    ----------
    +18    implemented_apodization_function : tuple
    +19        Available apodization functions
    +20    apodization_method : str
    +21        Apodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
    +22    number_of_truncations : int
    +23        How many times to truncate the transient prior to Fourier transform
    +24    number_of_zero_fills : int
    +25        How many times to zero fille the transient prior to Fourier transform.
    +26    next_power_of_two : bool
    +27        If True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
    +28    kaiser_beta : float
    +29        Beta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular,  5 is similar to Hamming,
    +30        6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
    +31
    +32    """
    +33
    +34    implemented_apodization_function: tuple = (
    +35        "Hamming",
    +36        "Hanning",
    +37        "Blackman",
    +38        "Full-Sine",
    +39        "Half-Sine",
    +40        "Kaiser",
    +41        "Half-Kaiser",
    +42    )
    +43    apodization_method: str = "Hanning"
    +44    number_of_truncations: int = 0
    +45    number_of_zero_fills: int = 1
    +46    next_power_of_two: bool = False
    +47    kaiser_beta: float = 8.6
    +48
    +49    def __post_init__(self):
    +50        # enforce datatype
    +51        for field in dataclasses.fields(self):
    +52            value = getattr(self, field.name)
    +53            if not isinstance(value, field.type):
    +54                value = field.type(value)
    +55                setattr(self, field.name, value)
     
    @@ -1680,52 +1752,54 @@
    Attributes
    -
    49@dataclasses.dataclass
    -50class DataInputSetting:
    -51    """Data input settings class
    -52    
    -53    Attributes
    -54    ----------
    -55    header_translate : dict
    -56        Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
    -57    """
    -58    # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER
    -59    # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"}
    -60    header_translate: dict = dataclasses.field(default_factory=dict)
    -61
    -62    def __post_init__(self):
    -63
    -64        self.header_translate = {'m/z': Labels.mz, 
    -65                                 'mOz': Labels.mz,
    -66                                 'Mass': Labels.mz,
    -67                                 'Resolving Power': Labels.rp,
    -68                                 'Res.': Labels.rp,
    -69                                 'resolution': Labels.rp,
    -70                                 'Intensity': Labels.abundance,
    -71                                 'Peak Height': Labels.abundance,
    -72                                 'I': Labels.abundance,
    -73                                 'Abundance': Labels.abundance,
    -74                                 'abs_abu': Labels.abundance,
    -75                                 'Signal/Noise': Labels.s2n,
    -76                                 'S/N': Labels.s2n,
    -77                                 'sn': Labels.s2n}
    -78
    -79    def add_mz_label(self, label):
    -80        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
    -81        self.header_translate[label] = Labels.mz
    -82
    -83    def add_peak_height_label(self, label):
    -84        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
    -85
    -86        self.header_translate[label] = Labels.abundance
    -87
    -88    def add_sn_label(self, label):
    -89        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
    -90        self.header_translate[label] = Labels.s2n
    -91
    -92    def add_resolving_power_label(self, label):
    -93        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
    -94        self.header_translate[label] = Labels.rp
    +            
     58@dataclasses.dataclass
    + 59class DataInputSetting:
    + 60    """Data input settings class
    + 61
    + 62    Attributes
    + 63    ----------
    + 64    header_translate : dict
    + 65        Dictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
    + 66    """
    + 67
    + 68    # add to this dict the VALUES to match your labels, THE ORDER WON"T MATTER
    + 69    # "column_translate" : {"m/z":"m/z", "Resolving Power":"Resolving Power", "Abundance":"Abundance" , "S/N":"S/N"}
    + 70    header_translate: dict = dataclasses.field(default_factory=dict)
    + 71
    + 72    def __post_init__(self):
    + 73        self.header_translate = {
    + 74            "m/z": Labels.mz,
    + 75            "mOz": Labels.mz,
    + 76            "Mass": Labels.mz,
    + 77            "Resolving Power": Labels.rp,
    + 78            "Res.": Labels.rp,
    + 79            "resolution": Labels.rp,
    + 80            "Intensity": Labels.abundance,
    + 81            "Peak Height": Labels.abundance,
    + 82            "I": Labels.abundance,
    + 83            "Abundance": Labels.abundance,
    + 84            "abs_abu": Labels.abundance,
    + 85            "Signal/Noise": Labels.s2n,
    + 86            "S/N": Labels.s2n,
    + 87            "sn": Labels.s2n,
    + 88        }
    + 89
    + 90    def add_mz_label(self, label):
    + 91        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
    + 92        self.header_translate[label] = Labels.mz
    + 93
    + 94    def add_peak_height_label(self, label):
    + 95        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
    + 96
    + 97        self.header_translate[label] = Labels.abundance
    + 98
    + 99    def add_sn_label(self, label):
    +100        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
    +101        self.header_translate[label] = Labels.s2n
    +102
    +103    def add_resolving_power_label(self, label):
    +104        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
    +105        self.header_translate[label] = Labels.rp
     
    @@ -1774,9 +1848,9 @@
    Attributes
    -
    79    def add_mz_label(self, label):
    -80        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
    -81        self.header_translate[label] = Labels.mz
    +            
    90    def add_mz_label(self, label):
    +91        """Add a label to the header_translate dictionary to be translated to the corems label for mz."""
    +92        self.header_translate[label] = Labels.mz
     
    @@ -1796,10 +1870,10 @@
    Attributes
    -
    83    def add_peak_height_label(self, label):
    -84        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
    -85
    -86        self.header_translate[label] = Labels.abundance
    +            
    94    def add_peak_height_label(self, label):
    +95        """Add a label to the header_translate dictionary to be translated to the corems label for peak height."""
    +96
    +97        self.header_translate[label] = Labels.abundance
     
    @@ -1819,9 +1893,9 @@
    Attributes
    -
    88    def add_sn_label(self, label):
    -89        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
    -90        self.header_translate[label] = Labels.s2n
    +            
     99    def add_sn_label(self, label):
    +100        """Add a label to the header_translate dictionary to be translated to the corems label for signal to noise."""
    +101        self.header_translate[label] = Labels.s2n
     
    @@ -1841,9 +1915,9 @@
    Attributes
    -
    92    def add_resolving_power_label(self, label):
    -93        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
    -94        self.header_translate[label] = Labels.rp
    +            
    103    def add_resolving_power_label(self, label):
    +104        """Add a label to the header_translate dictionary to be translated to the corems label for resolving power."""
    +105        self.header_translate[label] = Labels.rp
     
    @@ -1865,151 +1939,162 @@
    Attributes
    -
     96@dataclasses.dataclass
    - 97class LiquidChromatographSetting:
    - 98    """Liquid chromatograph processing settings class
    - 99
    -100    Attributes
    -101    ----------
    -102    scans : list or tuple, optional
    -103        List of select scan to average or a tuple containing the range to average. Default is (0, 1).
    -104    eic_tolerance_ppm : float, optional
    -105        Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
    -106    correct_eic_baseline : bool, optional
    -107        If True, correct the baseline of the extracted ion chromatogram. Default is True.
    -108    smooth_window : int, optional
    -109        Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
    -110    smooth_method : str, optional
    -111        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
    -112    implemented_smooth_method : tuple, optional
    -113        Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
    -114    savgol_pol_order : int, optional
    -115        Polynomial order for Savitzky-Golay smoothing. Default is 2.
    -116    peak_height_max_percent : float, optional
    -117        1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
    -118    peak_max_prominence_percent : float, optional
    -119        1-100 % used for baseline detection. Default is 1.
    -120    peak_derivative_threshold : float, optional
    -121        Threshold for defining derivative crossing. Default is 0.0005.
    -122    min_peak_datapoints : float, optional
    -123        minimum data point to define a chromatografic peak. Default is 5.
    -124    noise_threshold_method : str, optional
    -125        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
    -126    noise_threshold_methods_implemented : tuple, optional
    -127        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
    -128    peak_height_min_percent : float, optional
    -129        0-100 % used for peak detection. Default is 0.1.
    -130    eic_signal_threshold : float, optional
    -131        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
    -132    eic_buffer_time : float, optional
    -133        Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
    -134    ph_smooth_it : int, optional
    -135        Number of iterations to use for smoothing prior to finding mass features. 
    -136        Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
    -137    ph_smooth_radius_mz : int, optional
    -138        Radius in m/z steps (not daltons) for smoothing prior to finding mass features. 
    -139        Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
    -140    ph_smooth_radius_scan : int, optional
    -141        Radius in scan steps for smoothing prior to finding mass features. 
    -142        Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
    -143    ph_inten_min_rel : int, optional
    -144        Relative minimum intensity to use for finding mass features. 
    -145        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
    -146        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
    -147    ph_persis_min_rel : int, optional
    -148        Relative minimum persistence for retaining mass features. 
    -149        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
    -150        Should be greater to or equal to ph_inten_min_rel.
    -151        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
    -152    mass_feature_cluster_mz_tolerance_rel : float, optional
    -153        Relative m/z tolerance to use for clustering mass features. 
    -154        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
    -155        Default is 5E-6 (5 ppm).
    -156    mass_feature_cluster_rt_tolerance : float, optional
    -157        Retention time tolerance to use for clustering mass features, in minutes. 
    -158        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. 
    -159        Default is 0.2.
    -160    ms1_scans_to_average : int, optional
    -161        Number of MS1 scans to average for mass-feature associated m/zs. 
    -162        Called within the LCMSBase.add_associated_ms1() method. Default is 1. 
    -163    ms1_deconvolution_corr_min : float, optional
    -164        Minimum correlation to use for deconvoluting MS1 mass features. 
    -165        Called within the LCCalculations.deconvolute_ms1_mass_features() method. 
    -166        Default is 0.8.
    -167    ms2_dda_rt_tolerance : float, optional
    -168        Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
    -169    ms2_dda_mz_tolerance : float, optional
    -170        Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
    -171    ms2_min_fe_score : float, optional
    -172        Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
    -173    search_as_lipids : bool, optional
    -174        If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
    -175    include_fragment_types : bool, optional
    -176        If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
    -177    verbose_processing : bool, optional
    -178        If True, print verbose processing information. Default is True.
    -179    """
    -180    scans: list | tuple = (-1,-1)
    -181
    -182    # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing
    -183    eic_tolerance_ppm: float = 5
    -184    correct_eic_baseline = True
    -185    smooth_window: int = 5
    -186    smooth_method: str = 'savgol'
    -187    implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')
    -188    savgol_pol_order: int = 2
    -189    peak_height_max_percent: float = 10  
    -190    peak_max_prominence_percent: float = 1
    -191    peak_derivative_threshold:float = 0.0005
    -192    min_peak_datapoints: float = 5
    -193    noise_threshold_method: str = 'manual_relative_abundance'
    -194    noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')
    -195    peak_height_min_percent: float = 0.1  
    -196    eic_signal_threshold: float = 0.01  
    -197    eic_buffer_time = 1.5
    -198
    -199    # Parameters used for 2D peak picking
    -200    peak_picking_method: str  = "persistent homology"
    -201    implemented_peak_picking_methods: tuple =  ('persistent homology',)
    -202
    -203    # Parameters used in persistent homology calculations
    -204    ph_smooth_it = 1
    -205    ph_smooth_radius_mz = 0
    -206    ph_smooth_radius_scan = 1
    -207    ph_inten_min_rel = 0.001
    -208    ph_persis_min_rel = 0.001
    -209
    -210    # Parameters used to cluster mass features
    -211    mass_feature_cluster_mz_tolerance_rel: float = 5E-6
    -212    mass_feature_cluster_rt_tolerance: float = 0.3
    -213
    -214    # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features
    -215    ms1_scans_to_average: int = 1
    -216    ms1_deconvolution_corr_min: float = 0.8
    -217    ms2_dda_rt_tolerance: float = 0.15
    -218    ms2_dda_mz_tolerance: float = 0.05
    -219
    -220    # Parameters used for flash entropy searching and database preparation
    -221    ms2_min_fe_score: float = 0.2
    -222    search_as_lipids: bool = False
    -223    include_fragment_types: bool = False
    -224
    -225    # Parameters used for saving the data
    -226    export_profile_spectra: bool = False
    -227    export_eics: bool = True
    -228    export_unprocessed_ms1: bool = False
    -229
    -230    # Parameters used for verbose processing
    -231    verbose_processing: bool = True
    -232
    -233    def __post_init__(self):
    -234        # enforce datatype
    -235        for field in dataclasses.fields(self):
    -236            value = getattr(self, field.name)
    -237            if not isinstance(value, field.type):
    -238
    -239                value = field.type(value)
    -240                setattr(self, field.name, value)
    +            
    108@dataclasses.dataclass
    +109class LiquidChromatographSetting:
    +110    """Liquid chromatograph processing settings class
    +111
    +112    Attributes
    +113    ----------
    +114    scans : list or tuple, optional
    +115        List of select scan to average or a tuple containing the range to average. Default is (0, 1).
    +116    eic_tolerance_ppm : float, optional
    +117        Mass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
    +118    correct_eic_baseline : bool, optional
    +119        If True, correct the baseline of the extracted ion chromatogram. Default is True.
    +120    smooth_window : int, optional
    +121        Window size for smoothing the ion chromatogram (extracted or total). Default is 5.
    +122    smooth_method : str, optional
    +123        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
    +124    implemented_smooth_method : tuple, optional
    +125        Smoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
    +126    savgol_pol_order : int, optional
    +127        Polynomial order for Savitzky-Golay smoothing. Default is 2.
    +128    peak_height_max_percent : float, optional
    +129        1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
    +130    peak_max_prominence_percent : float, optional
    +131        1-100 % used for baseline detection. Default is 1.
    +132    peak_derivative_threshold : float, optional
    +133        Threshold for defining derivative crossing. Default is 0.0005.
    +134    min_peak_datapoints : float, optional
    +135        minimum data point to define a chromatografic peak. Default is 5.
    +136    noise_threshold_method : str, optional
    +137        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
    +138    noise_threshold_methods_implemented : tuple, optional
    +139        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
    +140    peak_height_min_percent : float, optional
    +141        0-100 % used for peak detection. Default is 0.1.
    +142    eic_signal_threshold : float, optional
    +143        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
    +144    eic_buffer_time : float, optional
    +145        Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
    +146    ph_smooth_it : int, optional
    +147        Number of iterations to use for smoothing prior to finding mass features.
    +148        Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
    +149    ph_smooth_radius_mz : int, optional
    +150        Radius in m/z steps (not daltons) for smoothing prior to finding mass features.
    +151        Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
    +152    ph_smooth_radius_scan : int, optional
    +153        Radius in scan steps for smoothing prior to finding mass features.
    +154        Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
    +155    ph_inten_min_rel : int, optional
    +156        Relative minimum intensity to use for finding mass features.
    +157        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
    +158        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
    +159    ph_persis_min_rel : int, optional
    +160        Relative minimum persistence for retaining mass features.
    +161        Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).
    +162        Should be greater to or equal to ph_inten_min_rel.
    +163        Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
    +164    mass_feature_cluster_mz_tolerance_rel : float, optional
    +165        Relative m/z tolerance to use for clustering mass features.
    +166        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
    +167        Default is 5E-6 (5 ppm).
    +168    mass_feature_cluster_rt_tolerance : float, optional
    +169        Retention time tolerance to use for clustering mass features, in minutes.
    +170        Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.
    +171        Default is 0.2.
    +172    ms1_scans_to_average : int, optional
    +173        Number of MS1 scans to average for mass-feature associated m/zs.
    +174        Called within the LCMSBase.add_associated_ms1() method. Default is 1.
    +175    ms1_deconvolution_corr_min : float, optional
    +176        Minimum correlation to use for deconvoluting MS1 mass features.
    +177        Called within the LCCalculations.deconvolute_ms1_mass_features() method.
    +178        Default is 0.8.
    +179    ms2_dda_rt_tolerance : float, optional
    +180        Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
    +181    ms2_dda_mz_tolerance : float, optional
    +182        Mass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
    +183    ms2_min_fe_score : float, optional
    +184        Minimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
    +185    search_as_lipids : bool, optional
    +186        If True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
    +187    include_fragment_types : bool, optional
    +188        If True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
    +189    verbose_processing : bool, optional
    +190        If True, print verbose processing information. Default is True.
    +191    """
    +192
    +193    scans: list | tuple = (-1, -1)
    +194
    +195    # Parameters used for generating EICs and performing 1D peak picking and EIC/TIC smoothing
    +196    eic_tolerance_ppm: float = 5
    +197    correct_eic_baseline = True
    +198    smooth_window: int = 5
    +199    smooth_method: str = "savgol"
    +200    implemented_smooth_method: tuple = (
    +201        "savgol",
    +202        "hanning",
    +203        "blackman",
    +204        "bartlett",
    +205        "flat",
    +206        "boxcar",
    +207    )
    +208    savgol_pol_order: int = 2
    +209    peak_height_max_percent: float = 10
    +210    peak_max_prominence_percent: float = 1
    +211    peak_derivative_threshold: float = 0.0005
    +212    min_peak_datapoints: float = 5
    +213    noise_threshold_method: str = "manual_relative_abundance"
    +214    noise_threshold_methods_implemented: tuple = (
    +215        "auto_relative_abundance",
    +216        "manual_relative_abundance",
    +217        "second_derivative",
    +218    )
    +219    peak_height_min_percent: float = 0.1
    +220    eic_signal_threshold: float = 0.01
    +221    eic_buffer_time = 1.5
    +222
    +223    # Parameters used for 2D peak picking
    +224    peak_picking_method: str = "persistent homology"
    +225    implemented_peak_picking_methods: tuple = ("persistent homology",)
    +226
    +227    # Parameters used in persistent homology calculations
    +228    ph_smooth_it = 1
    +229    ph_smooth_radius_mz = 0
    +230    ph_smooth_radius_scan = 1
    +231    ph_inten_min_rel = 0.001
    +232    ph_persis_min_rel = 0.001
    +233
    +234    # Parameters used to cluster mass features
    +235    mass_feature_cluster_mz_tolerance_rel: float = 5e-6
    +236    mass_feature_cluster_rt_tolerance: float = 0.3
    +237
    +238    # Parameters used in associating MS1 and MS2 spectra to LCMS mass features and deconvoluting MS1 mass features
    +239    ms1_scans_to_average: int = 1
    +240    ms1_deconvolution_corr_min: float = 0.8
    +241    ms2_dda_rt_tolerance: float = 0.15
    +242    ms2_dda_mz_tolerance: float = 0.05
    +243
    +244    # Parameters used for flash entropy searching and database preparation
    +245    ms2_min_fe_score: float = 0.2
    +246    search_as_lipids: bool = False
    +247    include_fragment_types: bool = False
    +248
    +249    # Parameters used for saving the data
    +250    export_profile_spectra: bool = False
    +251    export_eics: bool = True
    +252    export_unprocessed_ms1: bool = False
    +253
    +254    # Parameters used for verbose processing
    +255    verbose_processing: bool = True
    +256
    +257    def __post_init__(self):
    +258        # enforce datatype
    +259        for field in dataclasses.fields(self):
    +260            value = getattr(self, field.name)
    +261            if not isinstance(value, field.type):
    +262                value = field.type(value)
    +263                setattr(self, field.name, value)
     
    @@ -2051,37 +2136,37 @@
    Attributes
  • eic_buffer_time (float, optional): Buffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
  • ph_smooth_it (int, optional): -Number of iterations to use for smoothing prior to finding mass features. +Number of iterations to use for smoothing prior to finding mass features. Called within the PHCalculations.find_mass_features_ph() method. Default is 7.
  • ph_smooth_radius_mz (int, optional): -Radius in m/z steps (not daltons) for smoothing prior to finding mass features. +Radius in m/z steps (not daltons) for smoothing prior to finding mass features. Called within the PHCalculations.find_mass_features_ph() method. Default is 0.
  • ph_smooth_radius_scan (int, optional): -Radius in scan steps for smoothing prior to finding mass features. +Radius in scan steps for smoothing prior to finding mass features. Called within the PHCalculations.find_mass_features_ph() method. Default is 3.
  • ph_inten_min_rel (int, optional): -Relative minimum intensity to use for finding mass features. +Relative minimum intensity to use for finding mass features. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
  • ph_persis_min_rel (int, optional): -Relative minimum persistence for retaining mass features. +Relative minimum persistence for retaining mass features. Calculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan). Should be greater to or equal to ph_inten_min_rel. Called within the PH_Calculations.find_mass_features() method. Default is 0.001.
  • mass_feature_cluster_mz_tolerance_rel (float, optional): -Relative m/z tolerance to use for clustering mass features. +Relative m/z tolerance to use for clustering mass features. Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 5E-6 (5 ppm).
  • mass_feature_cluster_rt_tolerance (float, optional): -Retention time tolerance to use for clustering mass features, in minutes. -Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. +Retention time tolerance to use for clustering mass features, in minutes. +Called with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. Default is 0.2.
  • ms1_scans_to_average (int, optional): -Number of MS1 scans to average for mass-feature associated m/zs. +Number of MS1 scans to average for mass-feature associated m/zs. Called within the LCMSBase.add_associated_ms1() method. Default is 1.
  • ms1_deconvolution_corr_min (float, optional): -Minimum correlation to use for deconvoluting MS1 mass features. -Called within the LCCalculations.deconvolute_ms1_mass_features() method. +Minimum correlation to use for deconvoluting MS1 mass features. +Called within the LCCalculations.deconvolute_ms1_mass_features() method. Default is 0.8.
  • ms2_dda_rt_tolerance (float, optional): Retention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
  • @@ -2556,109 +2641,119 @@
    Attributes
    -
    242@dataclasses.dataclass
    -243class MassSpectrumSetting:
    -244    """Mass spectrum processing settings class
    -245
    -246    Attributes
    -247    ----------
    -248    noise_threshold_method : str, optional
    -249        Method for detecting noise threshold. Default is 'log'.
    -250    noise_threshold_methods_implemented : tuple, optional
    -251        Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
    -252    noise_threshold_min_std : int, optional
    -253        Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
    -254    noise_threshold_min_s2n : float, optional
    -255        Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
    -256    noise_threshold_min_relative_abundance : float, optional
    -257        Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
    -258    noise_threshold_absolute_abundance : float, optional
    -259        Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
    -260    noise_threshold_log_nsigma : int, optional
    -261        Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
    -262    noise_threshold_log_nsigma_corr_factor : float, optional
    -263        Correction factor for log noise threshold method. Default is 0.463.
    -264    noise_threshold_log_nsigma_bins : int, optional
    -265        Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
    -266    noise_min_mz : float, optional
    -267        Minimum m/z to use for noise thresholding. Default is 50.0.
    -268    noise_max_mz : float, optional
    -269        Maximum m/z to use for noise thresholding. Default is 1200.0.
    -270    min_picking_mz : float, optional
    -271        Minimum m/z to use for peak picking. Default is 50.0.
    -272    max_picking_mz : float, optional
    -273        Maximum m/z to use for peak picking. Default is 1200.0.
    -274    picking_point_extrapolate : int, optional
    -275        How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.
    -276        Recommend 3 for reduced profile data or if peak picking faults
    -277    calib_minimize_method : str, optional
    -278        Minimization method to use for calibration. Default is 'Powell'.
    -279    calib_pol_order : int, optional
    -280        Polynomial order to use for calibration. Default is 2.
    -281    max_calib_ppm_error : float, optional
    -282        Maximum ppm error to use for calibration. Default is 1.0.
    -283    min_calib_ppm_error : float, optional
    -284        Minimum ppm error to use for calibration. Default is -1.0.
    -285    calib_sn_threshold : float, optional
    -286        Signal to noise threshold to use for calibration. Default is 2.0.
    -287    calibration_ref_match_method: string, optional
    -288        Method for matching reference masses with measured masses for recalibration. Default is 'legacy'. 
    -289    calibration_ref_match_tolerance: float, optional
    -290        If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
    -291    do_calibration : bool, optional
    -292        If True, perform calibration. Default is True.
    -293    verbose_processing : bool, optional
    -294        If True, print verbose processing information. Default is True.    
    -295    """
    -296    noise_threshold_method: str = 'log'
    -297
    -298    noise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log')
    -299
    -300    noise_threshold_min_std: int = 6 # when using 'minima' method
    -301
    -302    noise_threshold_min_s2n: float = 4 # when using 'signal_noise' method
    -303
    -304    noise_threshold_min_relative_abundance: float = 6  # from 0-100, when using 'relative_abundance' method
    -305
    -306    noise_threshold_absolute_abundance: float = 1_000_000 # when using 'absolute_abundance' method
    -307
    -308    noise_threshold_log_nsigma: int = 6 # when using 'log' method
    -309    noise_threshold_log_nsigma_corr_factor: float = 0.463 #mFT is 0.463, aFT is 1.0
    -310    noise_threshold_log_nsigma_bins: int = 500 # bins for the histogram for the noise
    -311
    -312    noise_min_mz: float = 50.0
    -313    noise_max_mz: float = 1200.0
    -314
    -315    min_picking_mz: float = 50.0
    -316    max_picking_mz: float = 1200.0
    -317
    -318    # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis
    -319    # This will fix peak picking at spectrum limit issues
    -320    #  0 to keep normal behaviour, typical value 3 to fix
    -321    picking_point_extrapolate: int = 3 
    +            
    266@dataclasses.dataclass
    +267class MassSpectrumSetting:
    +268    """Mass spectrum processing settings class
    +269
    +270    Attributes
    +271    ----------
    +272    noise_threshold_method : str, optional
    +273        Method for detecting noise threshold. Default is 'log'.
    +274    noise_threshold_methods_implemented : tuple, optional
    +275        Methods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
    +276    noise_threshold_min_std : int, optional
    +277        Minumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
    +278    noise_threshold_min_s2n : float, optional
    +279        Minimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
    +280    noise_threshold_min_relative_abundance : float, optional
    +281        Minimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
    +282    noise_threshold_absolute_abundance : float, optional
    +283        Minimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
    +284    noise_threshold_log_nsigma : int, optional
    +285        Number of standard deviations to use when using 'log' noise threshold method. Default is 6.
    +286    noise_threshold_log_nsigma_corr_factor : float, optional
    +287        Correction factor for log noise threshold method. Default is 0.463.
    +288    noise_threshold_log_nsigma_bins : int, optional
    +289        Number of bins to use for histogram when using 'log' noise threshold method. Default is 500.
    +290    noise_min_mz : float, optional
    +291        Minimum m/z to use for noise thresholding. Default is 50.0.
    +292    noise_max_mz : float, optional
    +293        Maximum m/z to use for noise thresholding. Default is 1200.0.
    +294    min_picking_mz : float, optional
    +295        Minimum m/z to use for peak picking. Default is 50.0.
    +296    max_picking_mz : float, optional
    +297        Maximum m/z to use for peak picking. Default is 1200.0.
    +298    picking_point_extrapolate : int, optional
    +299        How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.
    +300        Recommend 3 for reduced profile data or if peak picking faults
    +301    calib_minimize_method : str, optional
    +302        Minimization method to use for calibration. Default is 'Powell'.
    +303    calib_pol_order : int, optional
    +304        Polynomial order to use for calibration. Default is 2.
    +305    max_calib_ppm_error : float, optional
    +306        Maximum ppm error to use for calibration. Default is 1.0.
    +307    min_calib_ppm_error : float, optional
    +308        Minimum ppm error to use for calibration. Default is -1.0.
    +309    calib_sn_threshold : float, optional
    +310        Signal to noise threshold to use for calibration. Default is 2.0.
    +311    calibration_ref_match_method: string, optional
    +312        Method for matching reference masses with measured masses for recalibration. Default is 'legacy'.
    +313    calibration_ref_match_tolerance: float, optional
    +314        If using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
    +315    do_calibration : bool, optional
    +316        If True, perform calibration. Default is True.
    +317    verbose_processing : bool, optional
    +318        If True, print verbose processing information. Default is True.
    +319    """
    +320
    +321    noise_threshold_method: str = "log"
     322
    -323    calib_minimize_method: str = 'Powell'
    -324    calib_pol_order: int = 2
    -325    max_calib_ppm_error: float = 1.0
    -326    min_calib_ppm_error: float = -1.0
    -327    calib_sn_threshold: float = 2.0
    -328    calibration_ref_match_method: str = 'legacy'
    -329    calibration_ref_match_method_implemented: tuple = ('legacy', 'merged')
    -330    calibration_ref_match_tolerance: float = 0.003
    -331    calibration_ref_match_std_raw_error_limit: float = 1.5
    -332    #calib_ref_mzs: list = [0]
    -333
    -334    do_calibration: bool = True
    -335    verbose_processing: bool = True
    -336
    -337    def __post_init__(self):
    -338        # enforce datatype
    -339        for field in dataclasses.fields(self):
    -340            value = getattr(self, field.name)
    -341            if not isinstance(value, field.type):
    +323    noise_threshold_methods_implemented: tuple = (
    +324        "minima",
    +325        "signal_noise",
    +326        "relative_abundance",
    +327        "absolute_abundance",
    +328        "log",
    +329    )
    +330
    +331    noise_threshold_min_std: int = 6  # when using 'minima' method
    +332
    +333    noise_threshold_min_s2n: float = 4  # when using 'signal_noise' method
    +334
    +335    noise_threshold_min_relative_abundance: float = (
    +336        6  # from 0-100, when using 'relative_abundance' method
    +337    )
    +338
    +339    noise_threshold_absolute_abundance: float = (
    +340        1_000_000  # when using 'absolute_abundance' method
    +341    )
     342
    -343                value = field.type(value)
    -344                setattr(self, field.name, value)
    +343    noise_threshold_log_nsigma: int = 6  # when using 'log' method
    +344    noise_threshold_log_nsigma_corr_factor: float = 0.463  # mFT is 0.463, aFT is 1.0
    +345    noise_threshold_log_nsigma_bins: int = 500  # bins for the histogram for the noise
    +346
    +347    noise_min_mz: float = 50.0
    +348    noise_max_mz: float = 1200.0
    +349
    +350    min_picking_mz: float = 50.0
    +351    max_picking_mz: float = 1200.0
    +352
    +353    # How many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis
    +354    # This will fix peak picking at spectrum limit issues
    +355    #  0 to keep normal behaviour, typical value 3 to fix
    +356    picking_point_extrapolate: int = 3
    +357
    +358    calib_minimize_method: str = "Powell"
    +359    calib_pol_order: int = 2
    +360    max_calib_ppm_error: float = 1.0
    +361    min_calib_ppm_error: float = -1.0
    +362    calib_sn_threshold: float = 2.0
    +363    calibration_ref_match_method: str = "legacy"
    +364    calibration_ref_match_method_implemented: tuple = ("legacy", "merged")
    +365    calibration_ref_match_tolerance: float = 0.003
    +366    calibration_ref_match_std_raw_error_limit: float = 1.5
    +367    # calib_ref_mzs: list = [0]
    +368
    +369    do_calibration: bool = True
    +370    verbose_processing: bool = True
    +371
    +372    def __post_init__(self):
    +373        # enforce datatype
    +374        for field in dataclasses.fields(self):
    +375            value = getattr(self, field.name)
    +376            if not isinstance(value, field.type):
    +377                value = field.type(value)
    +378                setattr(self, field.name, value)
     
    @@ -3043,76 +3138,76 @@
    Attributes
    -
    346@dataclasses.dataclass
    -347class MassSpecPeakSetting:
    -348    """Mass spectrum peak processing settings class
    -349
    -350    Attributes
    -351    ----------
    -352    kendrick_base : Dict, optional
    -353        Dictionary specifying the elements and their counts in the Kendrick base.
    -354        Defaults to {'C': 1, 'H': 2}.
    -355    kendrick_rounding_method : str, optional
    -356        Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.
    -357        Defaults to 'floor'.
    -358    implemented_kendrick_rounding_methods : tuple
    -359        Tuple of valid rounding methods for calculating the nominal Kendrick mass.
    -360        Defaults to ('floor', 'ceil', 'round').
    -361    peak_derivative_threshold : float, optional
    -362        Threshold for defining derivative crossing. Should be a value between 0 and 1.
    -363        Defaults to 0.0.
    -364    peak_min_prominence_percent : float, optional
    -365        Minimum prominence percentage used for peak detection. Should be a value between 1 and 100.
    -366        Defaults to 0.1.
    -367    min_peak_datapoints : float, optional
    -368        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
    -369        Defaults to 5.
    -370    peak_max_prominence_percent : float, optional
    -371        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
    -372        Defaults to 0.1.
    -373    peak_height_max_percent : float, optional
    -374        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
    -375        Defaults to 10.
    -376    legacy_resolving_power : bool, optional
    -377        Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation.
    -378        Defaults to True.
    -379    legacy_centroid_polyfit : bool, optional
    -380        Use legacy (numpy polyfit) to fit centroid
    -381        Default false.
    -382    """
    -383
    -384    kendrick_base: Dict = dataclasses.field(default_factory=dict)
    -385    
    -386    kendrick_rounding_method: str = 'floor' # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass
    -387    
    -388    implemented_kendrick_rounding_methods : tuple = ('floor','ceil','round')
    -389
    -390    peak_derivative_threshold: float =  0.0 # define derivative crossing threshould 0-1
    -391
    -392    peak_min_prominence_percent: float = 0.1  # 1-100 % used for peak detection
    -393
    -394    min_peak_datapoints: float = 5 # 0-inf used for peak detection
    -395
    -396    peak_max_prominence_percent: float = 0.1  # 1-100 % used for baseline detection
    -397
    -398    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection
    -399
    -400    legacy_resolving_power: bool = True # Use the legacy (CoreMS v1) resolving power calculation (True)
    -401
    -402    legacy_centroid_polyfit: bool = False
    -403
    -404    def __post_init__(self):
    -405
    -406        # default to CH2
    -407        if not self.kendrick_base:
    -408            self.kendrick_base = {'C': 1, 'H': 2}
    -409        # enforce datatype
    -410        for field in dataclasses.fields(self):
    -411            value = getattr(self, field.name)
    -412            if not isinstance(value, field.type):
    -413
    -414                value = field.type(value)
    -415                setattr(self, field.name, value)
    +            
    381@dataclasses.dataclass
    +382class MassSpecPeakSetting:
    +383    """Mass spectrum peak processing settings class
    +384
    +385    Attributes
    +386    ----------
    +387    kendrick_base : Dict, optional
    +388        Dictionary specifying the elements and their counts in the Kendrick base.
    +389        Defaults to {'C': 1, 'H': 2}.
    +390    kendrick_rounding_method : str, optional
    +391        Method for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.
    +392        Defaults to 'floor'.
    +393    implemented_kendrick_rounding_methods : tuple
    +394        Tuple of valid rounding methods for calculating the nominal Kendrick mass.
    +395        Defaults to ('floor', 'ceil', 'round').
    +396    peak_derivative_threshold : float, optional
    +397        Threshold for defining derivative crossing. Should be a value between 0 and 1.
    +398        Defaults to 0.0.
    +399    peak_min_prominence_percent : float, optional
    +400        Minimum prominence percentage used for peak detection. Should be a value between 1 and 100.
    +401        Defaults to 0.1.
    +402    min_peak_datapoints : float, optional
    +403        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
    +404        Defaults to 5.
    +405    peak_max_prominence_percent : float, optional
    +406        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
    +407        Defaults to 0.1.
    +408    peak_height_max_percent : float, optional
    +409        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
    +410        Defaults to 10.
    +411    legacy_resolving_power : bool, optional
    +412        Flag indicating whether to use the legacy (CoreMS v1) resolving power calculation.
    +413        Defaults to True.
    +414    legacy_centroid_polyfit : bool, optional
    +415        Use legacy (numpy polyfit) to fit centroid
    +416        Default false.
    +417    """
    +418
    +419    kendrick_base: Dict = dataclasses.field(default_factory=dict)
    +420
    +421    kendrick_rounding_method: str = "floor"  # 'floor', 'ceil' or 'round' are valid methods for calculating nominal kendrick mass
    +422
    +423    implemented_kendrick_rounding_methods: tuple = ("floor", "ceil", "round")
    +424
    +425    peak_derivative_threshold: float = 0.0  # define derivative crossing threshould 0-1
    +426
    +427    peak_min_prominence_percent: float = 0.1  # 1-100 % used for peak detection
    +428
    +429    min_peak_datapoints: float = 5  # 0-inf used for peak detection
    +430
    +431    peak_max_prominence_percent: float = 0.1  # 1-100 % used for baseline detection
    +432
    +433    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection
    +434
    +435    legacy_resolving_power: bool = (
    +436        True  # Use the legacy (CoreMS v1) resolving power calculation (True)
    +437    )
    +438
    +439    legacy_centroid_polyfit: bool = False
    +440
    +441    def __post_init__(self):
    +442        # default to CH2
    +443        if not self.kendrick_base:
    +444            self.kendrick_base = {"C": 1, "H": 2}
    +445        # enforce datatype
    +446        for field in dataclasses.fields(self):
    +447            value = getattr(self, field.name)
    +448            if not isinstance(value, field.type):
    +449                value = field.type(value)
    +450                setattr(self, field.name, value)
     
    @@ -3299,100 +3394,113 @@
    Attributes
    -
    417@dataclasses.dataclass
    -418class GasChromatographSetting:
    -419    """Gas chromatograph processing settings class
    -420
    -421    Attributes
    -422    ----------
    -423    use_deconvolution : bool, optional
    -424        If True, use deconvolution. Default is False.
    -425    implemented_smooth_method : tuple, optional
    -426        Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
    -427    smooth_window : int, optional
    -428        Window size for smoothing the ion chromatogram. Default is 5.
    -429    smooth_method : str, optional
    -430        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
    -431    savgol_pol_order : int, optional
    -432        Polynomial order for Savitzky-Golay smoothing. Default is 2.
    -433    peak_derivative_threshold : float, optional
    -434        Threshold for defining derivative crossing. Should be a value between 0 and 1.
    -435        Defaults to 0.0005.
    -436    peak_height_max_percent : float, optional
    -437        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
    -438        Defaults to 10.
    -439    peak_max_prominence_percent : float, optional
    -440        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
    -441        Defaults to 1.
    -442    min_peak_datapoints : float, optional
    -443        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
    -444        Defaults to 5.
    -445    max_peak_width : float, optional
    -446        Maximum peak width used for peak detection. Should be a value between 0 and infinity.
    -447        Defaults to 0.1.
    -448    noise_threshold_method : str, optional
    -449        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
    -450    noise_threshold_methods_implemented : tuple, optional
    -451        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
    -452    std_noise_threshold : int, optional
    -453        Default is 3.
    -454    peak_height_min_percent : float, optional
    -455        0-100 % used for peak detection. Default is 0.1.
    -456    peak_min_prominence_percent : float, optional
    -457        0-100 % used for peak detection. Default is 0.1.
    -458    eic_signal_threshold : float, optional
    -459        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
    -460    max_rt_distance : float, optional
    -461        Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
    -462    verbose_processing : bool, optional
    -463        If True, print verbose processing information. Default is True.
    -464    """
    -465    use_deconvolution: bool = False
    -466
    -467    implemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')
    -468
    -469    smooth_window: int = 5
    -470
    -471    smooth_method: str = 'savgol'
    -472
    -473    savgol_pol_order: int = 2
    -474
    -475    peak_derivative_threshold:float = 0.0005
    -476
    -477    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods
    -478
    -479    peak_max_prominence_percent: float = 1  # 1-100 % used for baseline detection
    -480
    -481    min_peak_datapoints: float = 5
    -482
    -483    max_peak_width: float = 0.1
    -484
    -485    noise_threshold_method: str = 'manual_relative_abundance'
    -486
    -487    noise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 
    -488                                                  'second_derivative')
    -489
    -490    std_noise_threshold: int = 3
    -491
    -492    peak_height_min_percent: float = 0.1  # 0-100 % used for peak detection
    -493
    -494    peak_min_prominence_percent: float = 0.1  # 0-100 % used for peak detection
    -495
    -496    eic_signal_threshold: float = 0.01  # 0-100 % used for extracted ion chromatogram peak detection
    -497
    -498    max_rt_distance: float = 0.025  # minutes, max distance allowance hierarchical clutter
    -499
    -500    verbose_processing: bool = True
    +            
    453@dataclasses.dataclass
    +454class GasChromatographSetting:
    +455    """Gas chromatograph processing settings class
    +456
    +457    Attributes
    +458    ----------
    +459    use_deconvolution : bool, optional
    +460        If True, use deconvolution. Default is False.
    +461    implemented_smooth_method : tuple, optional
    +462        Smoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
    +463    smooth_window : int, optional
    +464        Window size for smoothing the ion chromatogram. Default is 5.
    +465    smooth_method : str, optional
    +466        Smoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
    +467    savgol_pol_order : int, optional
    +468        Polynomial order for Savitzky-Golay smoothing. Default is 2.
    +469    peak_derivative_threshold : float, optional
    +470        Threshold for defining derivative crossing. Should be a value between 0 and 1.
    +471        Defaults to 0.0005.
    +472    peak_height_max_percent : float, optional
    +473        Maximum height percentage used for baseline detection. Should be a value between 1 and 100.
    +474        Defaults to 10.
    +475    peak_max_prominence_percent : float, optional
    +476        Maximum prominence percentage used for baseline detection. Should be a value between 1 and 100.
    +477        Defaults to 1.
    +478    min_peak_datapoints : float, optional
    +479        Minimum number of data points used for peak detection. Should be a value between 0 and infinity.
    +480        Defaults to 5.
    +481    max_peak_width : float, optional
    +482        Maximum peak width used for peak detection. Should be a value between 0 and infinity.
    +483        Defaults to 0.1.
    +484    noise_threshold_method : str, optional
    +485        Method for detecting noise threshold. Default is 'manual_relative_abundance'.
    +486    noise_threshold_methods_implemented : tuple, optional
    +487        Methods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
    +488    std_noise_threshold : int, optional
    +489        Default is 3.
    +490    peak_height_min_percent : float, optional
    +491        0-100 % used for peak detection. Default is 0.1.
    +492    peak_min_prominence_percent : float, optional
    +493        0-100 % used for peak detection. Default is 0.1.
    +494    eic_signal_threshold : float, optional
    +495        0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
    +496    max_rt_distance : float, optional
    +497        Maximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
    +498    verbose_processing : bool, optional
    +499        If True, print verbose processing information. Default is True.
    +500    """
     501
    -502    def __post_init__(self):
    +502    use_deconvolution: bool = False
     503
    -504        # enforce datatype
    -505        for field in dataclasses.fields(self):
    -506            value = getattr(self, field.name)
    -507            if not isinstance(value, field.type):
    -508
    -509                value = field.type(value)
    -510                setattr(self, field.name, value)
    +504    implemented_smooth_method: tuple = (
    +505        "savgol",
    +506        "hanning",
    +507        "blackman",
    +508        "bartlett",
    +509        "flat",
    +510        "boxcar",
    +511    )
    +512
    +513    smooth_window: int = 5
    +514
    +515    smooth_method: str = "savgol"
    +516
    +517    savgol_pol_order: int = 2
    +518
    +519    peak_derivative_threshold: float = 0.0005
    +520
    +521    peak_height_max_percent: float = 10  # 1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods
    +522
    +523    peak_max_prominence_percent: float = 1  # 1-100 % used for baseline detection
    +524
    +525    min_peak_datapoints: float = 5
    +526
    +527    max_peak_width: float = 0.1
    +528
    +529    noise_threshold_method: str = "manual_relative_abundance"
    +530
    +531    noise_threshold_methods_implemented: tuple = (
    +532        "auto_relative_abundance",
    +533        "manual_relative_abundance",
    +534        "second_derivative",
    +535    )
    +536
    +537    std_noise_threshold: int = 3
    +538
    +539    peak_height_min_percent: float = 0.1  # 0-100 % used for peak detection
    +540
    +541    peak_min_prominence_percent: float = 0.1  # 0-100 % used for peak detection
    +542
    +543    eic_signal_threshold: float = (
    +544        0.01  # 0-100 % used for extracted ion chromatogram peak detection
    +545    )
    +546
    +547    max_rt_distance: float = (
    +548        0.025  # minutes, max distance allowance hierarchical clutter
    +549    )
    +550
    +551    verbose_processing: bool = True
    +552
    +553    def __post_init__(self):
    +554        # enforce datatype
    +555        for field in dataclasses.fields(self):
    +556            value = getattr(self, field.name)
    +557            if not isinstance(value, field.type):
    +558                value = field.type(value)
    +559                setattr(self, field.name, value)
     
    @@ -3687,79 +3795,82 @@
    Attributes
    -
    512@dataclasses.dataclass
    -513class CompoundSearchSettings:
    -514    """Settings for compound search
    -515
    -516    Attributes
    -517    ----------
    -518    url_database : str, optional
    -519        URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
    -520    ri_search_range : float, optional
    -521        Retention index search range. Default is 35.
    -522    rt_search_range : float, optional
    -523        Retention time search range, in minutes. Default is 1.0.
    -524    correlation_threshold : float, optional
    -525        Threshold for correlation for spectral similarity. Default is 0.5.
    -526    score_threshold : float, optional
    -527        Threshold for compsite score. Default is 0.0.
    -528    ri_spacing : float, optional
    -529        Retention index spacing. Default is 200.
    -530    ri_std : float, optional
    -531        Retention index standard deviation. Default is 3.
    -532    ri_calibration_compound_names : list, optional
    -533        List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
    -534    
    -535    """
    -536    url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres" # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'
    -537
    -538    ri_search_range: float = 35
    -539
    -540    rt_search_range: float = 1.0  # used for retention index calibration
    -541
    -542    correlation_threshold: float = 0.5  # used for calibration, spectral similarity 
    -543
    -544    score_threshold: float = 0.0
    -545
    -546    ri_spacing: float = 200
    -547
    -548    ri_std: float = 3  # in standard deviation
    -549
    -550    ri_calibration_compound_names: List = dataclasses.field(default_factory=list)
    -551
    -552    # calculates and export all spectral similarity methods
    -553    exploratory_mode: bool = False
    -554
    -555    score_methods: tuple = ('highest_sim_score', 'highest_ss')
    -556
    -557    output_score_method: str = 'All'
    -558
    -559    
    -560
    -561    def __post_init__(self):
    -562        # enforce datatype
    -563        self.url_database = os.getenv('SPECTRAL_GCMS_DATABASE_URL', 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite')
    -564
    -565        for field in dataclasses.fields(self):
    -566            value = getattr(self, field.name)
    -567            if not isinstance(value, field.type):
    -568
    -569                value = field.type(value)
    -570                setattr(self, field.name, value)
    -571
    -572        self.ri_calibration_compound_names = ['Methyl Caprylate', 
    -573                                              'Methyl Caprate', 
    -574                                              'Methyl Pelargonate', 
    -575                                              'Methyl Laurate', 
    -576                                              'Methyl Myristate', 
    -577                                              'Methyl Palmitate', 
    -578                                              'Methyl Stearate', 
    -579                                              'Methyl Eicosanoate', 
    -580                                              'Methyl Docosanoate', 
    -581                                              'Methyl Linocerate', 
    -582                                              'Methyl Hexacosanoate', 
    -583                                              'Methyl Octacosanoate', 
    -584                                              'Methyl Triacontanoate']
    +            
    562@dataclasses.dataclass
    +563class CompoundSearchSettings:
    +564    """Settings for compound search
    +565
    +566    Attributes
    +567    ----------
    +568    url_database : str, optional
    +569        URL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
    +570    ri_search_range : float, optional
    +571        Retention index search range. Default is 35.
    +572    rt_search_range : float, optional
    +573        Retention time search range, in minutes. Default is 1.0.
    +574    correlation_threshold : float, optional
    +575        Threshold for correlation for spectral similarity. Default is 0.5.
    +576    score_threshold : float, optional
    +577        Threshold for compsite score. Default is 0.0.
    +578    ri_spacing : float, optional
    +579        Retention index spacing. Default is 200.
    +580    ri_std : float, optional
    +581        Retention index standard deviation. Default is 3.
    +582    ri_calibration_compound_names : list, optional
    +583        List of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
    +584
    +585    """
    +586
    +587    url_database: str = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres"  # 'postgresql://postgres:labthomson0102@172.22.113.27:5432/GCMS' # 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'
    +588
    +589    ri_search_range: float = 35
    +590
    +591    rt_search_range: float = 1.0  # used for retention index calibration
    +592
    +593    correlation_threshold: float = 0.5  # used for calibration, spectral similarity
    +594
    +595    score_threshold: float = 0.0
    +596
    +597    ri_spacing: float = 200
    +598
    +599    ri_std: float = 3  # in standard deviation
    +600
    +601    ri_calibration_compound_names: List = dataclasses.field(default_factory=list)
    +602
    +603    # calculates and export all spectral similarity methods
    +604    exploratory_mode: bool = False
    +605
    +606    score_methods: tuple = ("highest_sim_score", "highest_ss")
    +607
    +608    output_score_method: str = "All"
    +609
    +610    def __post_init__(self):
    +611        # enforce datatype
    +612        self.url_database = os.getenv(
    +613            "SPECTRAL_GCMS_DATABASE_URL",
    +614            "sqlite:///db/pnnl_lowres_gcms_compounds.sqlite",
    +615        )
    +616
    +617        for field in dataclasses.fields(self):
    +618            value = getattr(self, field.name)
    +619            if not isinstance(value, field.type):
    +620                value = field.type(value)
    +621                setattr(self, field.name, value)
    +622
    +623        self.ri_calibration_compound_names = [
    +624            "Methyl Caprylate",
    +625            "Methyl Caprate",
    +626            "Methyl Pelargonate",
    +627            "Methyl Laurate",
    +628            "Methyl Myristate",
    +629            "Methyl Palmitate",
    +630            "Methyl Stearate",
    +631            "Methyl Eicosanoate",
    +632            "Methyl Docosanoate",
    +633            "Methyl Linocerate",
    +634            "Methyl Hexacosanoate",
    +635            "Methyl Octacosanoate",
    +636            "Methyl Triacontanoate",
    +637        ]
     
    @@ -3943,95 +4054,97 @@
    Attributes
    -
    586class MolecularLookupDictSettings:
    -587    """Settings for molecular searching
    -588
    -589    These are used to generate the database entries, do not change.
    -590
    -591    Attributes
    -592    ----------
    -593    usedAtoms : dict, optional
    -594        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
    -595    min_mz : float, optional
    -596        Minimum m/z to use for searching. Default is 50.0.
    -597    max_mz : float, optional
    -598        Maximum m/z to use for searching. Default is 1200.0.
    -599    min_dbe : float, optional
    -600        Minimum double bond equivalent to use for searching. Default is 0.
    -601    max_dbe : float, optional
    -602        Maximum double bond equivalent to use for searching. Default is 50.
    -603    use_pah_line_rule : bool, optional
    -604        If True, use the PAH line rule. Default is False.
    -605    isRadical : bool, optional
    -606        If True, search for radical ions. Default is True.
    -607    isProtonated : bool, optional
    -608        If True, search for protonated ions. Default is True.
    -609    url_database : str, optional
    -610        URL for the database. Default is None.
    -611    db_jobs : int, optional
    -612        Number of jobs to use for database queries. Default is 1.
    -613    used_atom_valences : dict, optional
    -614        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
    -615        
    -616    """
    -617    ### DO NOT CHANGE IT! These are used to generate the database entries 
    -618
    -619    ### DO change when creating a new application database 
    -620
    -621    ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below
    -622
    -623    ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms
    -624    ### if you don't want to include one of those atoms set the max and min at 0
    -625    ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module
    -626    ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms 
    -627    ### NOTE : Adducts atoms have zero covalence
    -628    ### NOTE : Not using static variable because this class is distributed using multiprocessing
    -629    def __init__(self):
    -630
    -631        self.usedAtoms = {'C': (1, 90),
    -632                          'H': (4, 200),
    -633                          'O': (0, 12),
    -634                          'N': (0, 0),
    -635                          'S': (0, 0),
    -636                          'P': (0, 0),
    -637                          'Cl': (0, 0),
    -638                          }
    -639
    -640        self.min_mz = 50
    -641
    -642        self.max_mz = 1200
    -643
    -644        self.min_dbe = 0
    -645
    -646        self.max_dbe = 50
    -647
    -648        # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9
    -649        self.use_pah_line_rule = False
    -650
    -651        self.isRadical = True
    -652
    -653        self.isProtonated = True
    -654
    -655        self.url_database = None
    -656
    -657        self.db_jobs = 1
    -658
    -659        self.used_atom_valences = {'C': 4,
    -660                                   '13C': 4,
    -661                                   'H': 1,
    -662                                   'O': 2,
    -663                                   '18O': 2,
    -664                                   'N': 3,
    -665                                   'S': 2,
    -666                                   '34S': 2,
    -667                                   'P': 3,
    -668                                   'Cl': 1,
    -669                                   '37Cl': 1,
    -670                                   'Br': 1,
    -671                                   'Na': 1,
    -672                                   'F': 1,
    -673                                   'K': 0,
    -674                                   }
    +            
    640class MolecularLookupDictSettings:
    +641    """Settings for molecular searching
    +642
    +643    These are used to generate the database entries, do not change.
    +644
    +645    Attributes
    +646    ----------
    +647    usedAtoms : dict, optional
    +648        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
    +649    min_mz : float, optional
    +650        Minimum m/z to use for searching. Default is 50.0.
    +651    max_mz : float, optional
    +652        Maximum m/z to use for searching. Default is 1200.0.
    +653    min_dbe : float, optional
    +654        Minimum double bond equivalent to use for searching. Default is 0.
    +655    max_dbe : float, optional
    +656        Maximum double bond equivalent to use for searching. Default is 50.
    +657    use_pah_line_rule : bool, optional
    +658        If True, use the PAH line rule. Default is False.
    +659    isRadical : bool, optional
    +660        If True, search for radical ions. Default is True.
    +661    isProtonated : bool, optional
    +662        If True, search for protonated ions. Default is True.
    +663    url_database : str, optional
    +664        URL for the database. Default is None.
    +665    db_jobs : int, optional
    +666        Number of jobs to use for database queries. Default is 1.
    +667    used_atom_valences : dict, optional
    +668        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
    +669
    +670    """
    +671
    +672    ### DO NOT CHANGE IT! These are used to generate the database entries
    +673
    +674    ### DO change when creating a new application database
    +675
    +676    ### FOR search settings runtime and database query check use the MolecularFormulaSearchSettings class below
    +677
    +678    ### C, H, N, O, S and P atoms are ALWAYS needed at usedAtoms
    +679    ### if you don't want to include one of those atoms set the max and min at 0
    +680    ### you can include any atom listed at Atoms class inside encapsulation.settings.constants module
    +681    ### make sure to include the selected covalence at the used_atoms_valences when adding new atoms
    +682    ### NOTE : Adducts atoms have zero covalence
    +683    ### NOTE : Not using static variable because this class is distributed using multiprocessing
    +684    def __init__(self):
    +685        self.usedAtoms = {
    +686            "C": (1, 90),
    +687            "H": (4, 200),
    +688            "O": (0, 12),
    +689            "N": (0, 0),
    +690            "S": (0, 0),
    +691            "P": (0, 0),
    +692            "Cl": (0, 0),
    +693        }
    +694
    +695        self.min_mz = 50
    +696
    +697        self.max_mz = 1200
    +698
    +699        self.min_dbe = 0
    +700
    +701        self.max_dbe = 50
    +702
    +703        # overwrites the dbe limits above to DBE = (C + heteroatoms) * 0.9
    +704        self.use_pah_line_rule = False
    +705
    +706        self.isRadical = True
    +707
    +708        self.isProtonated = True
    +709
    +710        self.url_database = None
    +711
    +712        self.db_jobs = 1
    +713
    +714        self.used_atom_valences = {
    +715            "C": 4,
    +716            "13C": 4,
    +717            "H": 1,
    +718            "O": 2,
    +719            "18O": 2,
    +720            "N": 3,
    +721            "S": 2,
    +722            "34S": 2,
    +723            "P": 3,
    +724            "Cl": 1,
    +725            "37Cl": 1,
    +726            "Br": 1,
    +727            "Na": 1,
    +728            "F": 1,
    +729            "K": 0,
    +730        }
     
    @@ -4202,222 +4315,230 @@
    Attributes
    -
    676@dataclasses.dataclass
    -677class MolecularFormulaSearchSettings:
    -678    """Settings for molecular searching
    -679
    -680    Attributes
    -681    ----------
    -682    use_isotopologue_filter : bool, optional
    -683        If True, use isotopologue filter. Default is False.
    -684    isotopologue_filter_threshold : float, optional
    -685        Threshold for isotopologue filter. Default is 33.
    -686    isotopologue_filter_atoms : tuple, optional
    -687        Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
    -688    use_runtime_kendrick_filter : bool, optional
    -689        If True, use runtime Kendrick filter. Default is False.
    -690    use_min_peaks_filter : bool, optional
    -691        If True, use minimum peaks filter. Default is True.
    -692    min_peaks_per_class : int, optional
    -693        Minimum number of peaks per class. Default is 15.
    -694    url_database : str, optional
    -695        URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
    -696    db_jobs : int, optional
    -697        Number of jobs to use for database queries. Default is 3.
    -698    db_chunk_size : int, optional
    -699        Chunk size to use for database queries. Default is 300.
    -700    ion_charge : int, optional
    -701        Ion charge. Default is -1.
    -702    min_hc_filter : float, optional
    -703        Minimum hydrogen to carbon ratio. Default is 0.3.
    -704    max_hc_filter : float, optional
    -705        Maximum hydrogen to carbon ratio. Default is 3.
    -706    min_oc_filter : float, optional
    -707        Minimum oxygen to carbon ratio. Default is 0.0.
    -708    max_oc_filter : float, optional
    -709        Maximum oxygen to carbon ratio. Default is 1.2.
    -710    min_op_filter : float, optional
    -711        Minimum oxygen to phosphorous ratio. Default is 2.
    -712    use_pah_line_rule : bool, optional
    -713        If True, use the PAH line rule. Default is False.
    -714    min_dbe : float, optional
    -715        Minimum double bond equivalent to use for searching. Default is 0.
    -716    max_dbe : float, optional
    -717        Maximum double bond equivalent to use for searching. Default is 40.
    -718    mz_error_score_weight : float, optional
    -719        Weight for m/z error score to contribute to composite score. Default is 0.6.
    -720    isotopologue_score_weight : float, optional
    -721        Weight for isotopologue score to contribute to composite score. Default is 0.4.
    -722    adduct_atoms_neg : tuple, optional
    -723        Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
    -724    adduct_atoms_pos : tuple, optional
    -725        Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
    -726    score_methods : tuple, optional
    -727        Tuple of score method that can be implemented. 
    -728        Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
    -729    score_method : str, optional
    -730        Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.   
    -731    output_min_score : float, optional
    -732        Minimum score for output. Default is 0.1.
    -733    output_score_method : str, optional
    -734        Score method to use for output. Default is 'All Candidates'.
    -735    isRadical : bool, optional
    -736        If True, search for radical ions. Default is False.
    -737    isProtonated : bool, optional
    -738        If True, search for protonated ions. Default is True.
    -739    isAdduct : bool, optional
    -740        If True, search for adduct ions. Default is False.
    -741    usedAtoms : dict, optional
    -742        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
    -743    ion_types_excluded : list, optional
    -744        List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
    -745    ionization_type : str, optional
    -746        Ionization type. Default is 'ESI'.
    -747    min_ppm_error : float, optional
    -748        Minimum ppm error. Default is -10.0.
    -749    max_ppm_error : float, optional
    -750        Maximum ppm error. Default is 10.0.
    -751    min_abun_error : float, optional
    -752        Minimum abundance error for isotolopologue search. Default is -100.0.
    -753    max_abun_error : float, optional
    -754        Maximum abundance error for isotolopologue search. Default is 100.0.
    -755    mz_error_range : float, optional
    -756        m/z error range. Default is 1.5.
    -757    error_method : str, optional
    -758        Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
    -759    mz_error_average : float, optional
    -760        m/z error average. Default is 0.0.
    -761    used_atom_valences : dict, optional
    -762        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
    -763    """
    -764    use_isotopologue_filter: bool = False
    -765
    -766    isotopologue_filter_threshold: float = 33
    -767
    -768    isotopologue_filter_atoms: tuple = ('Cl', 'Br')
    -769
    -770    use_runtime_kendrick_filter: bool = False
    -771
    -772    use_min_peaks_filter: bool = True
    -773
    -774    min_peaks_per_class: int = 15
    -775
    -776    url_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'
    -777
    -778    db_jobs: int = 3
    -779
    -780    db_chunk_size: int = 300
    -781
    -782    #query setting========
    -783    ion_charge: int = -1
    -784
    -785    min_hc_filter: float = 0.3
    -786
    -787    max_hc_filter: float = 3
    -788
    -789    min_oc_filter: float = 0.0
    -790
    -791    max_oc_filter: float = 1.2
    -792
    -793    min_op_filter: float = 2
    -794
    -795    use_pah_line_rule: bool = False
    -796
    -797    min_dbe: float = 0
    -798
    -799    max_dbe: float = 40
    -800
    -801    mz_error_score_weight: float = 0.6
    -802
    -803    isotopologue_score_weight: float = 0.4
    -804
    -805    # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms  
    -806    adduct_atoms_neg: tuple = ('Cl', 'Br')
    -807
    -808    adduct_atoms_pos: tuple = ('Na', 'K')
    -809
    -810    score_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score',
    -811                            'air_filter_error', 'water_filter_error', 'earth_filter_error')
    -812
    -813    score_method: str = 'prob_score'
    -814
    -815    output_min_score: float = 0.1
    -816
    -817    output_score_method: str = 'All Candidates'
    -818
    -819    # depending on the polarity mode it looks for [M].+ , [M].-
    -820    # query and automatically compile add entry if it doesn't exist
    +            
    733@dataclasses.dataclass
    +734class MolecularFormulaSearchSettings:
    +735    """Settings for molecular searching
    +736
    +737    Attributes
    +738    ----------
    +739    use_isotopologue_filter : bool, optional
    +740        If True, use isotopologue filter. Default is False.
    +741    isotopologue_filter_threshold : float, optional
    +742        Threshold for isotopologue filter. Default is 33.
    +743    isotopologue_filter_atoms : tuple, optional
    +744        Tuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
    +745    use_runtime_kendrick_filter : bool, optional
    +746        If True, use runtime Kendrick filter. Default is False.
    +747    use_min_peaks_filter : bool, optional
    +748        If True, use minimum peaks filter. Default is True.
    +749    min_peaks_per_class : int, optional
    +750        Minimum number of peaks per class. Default is 15.
    +751    url_database : str, optional
    +752        URL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
    +753    db_jobs : int, optional
    +754        Number of jobs to use for database queries. Default is 3.
    +755    db_chunk_size : int, optional
    +756        Chunk size to use for database queries. Default is 300.
    +757    ion_charge : int, optional
    +758        Ion charge. Default is -1.
    +759    min_hc_filter : float, optional
    +760        Minimum hydrogen to carbon ratio. Default is 0.3.
    +761    max_hc_filter : float, optional
    +762        Maximum hydrogen to carbon ratio. Default is 3.
    +763    min_oc_filter : float, optional
    +764        Minimum oxygen to carbon ratio. Default is 0.0.
    +765    max_oc_filter : float, optional
    +766        Maximum oxygen to carbon ratio. Default is 1.2.
    +767    min_op_filter : float, optional
    +768        Minimum oxygen to phosphorous ratio. Default is 2.
    +769    use_pah_line_rule : bool, optional
    +770        If True, use the PAH line rule. Default is False.
    +771    min_dbe : float, optional
    +772        Minimum double bond equivalent to use for searching. Default is 0.
    +773    max_dbe : float, optional
    +774        Maximum double bond equivalent to use for searching. Default is 40.
    +775    mz_error_score_weight : float, optional
    +776        Weight for m/z error score to contribute to composite score. Default is 0.6.
    +777    isotopologue_score_weight : float, optional
    +778        Weight for isotopologue score to contribute to composite score. Default is 0.4.
    +779    adduct_atoms_neg : tuple, optional
    +780        Tuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
    +781    adduct_atoms_pos : tuple, optional
    +782        Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
    +783    score_methods : tuple, optional
    +784        Tuple of score method that can be implemented.
    +785        Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
    +786    score_method : str, optional
    +787        Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
    +788    output_min_score : float, optional
    +789        Minimum score for output. Default is 0.1.
    +790    output_score_method : str, optional
    +791        Score method to use for output. Default is 'All Candidates'.
    +792    isRadical : bool, optional
    +793        If True, search for radical ions. Default is False.
    +794    isProtonated : bool, optional
    +795        If True, search for protonated ions. Default is True.
    +796    isAdduct : bool, optional
    +797        If True, search for adduct ions. Default is False.
    +798    usedAtoms : dict, optional
    +799        Dictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
    +800    ion_types_excluded : list, optional
    +801        List of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
    +802    ionization_type : str, optional
    +803        Ionization type. Default is 'ESI'.
    +804    min_ppm_error : float, optional
    +805        Minimum ppm error. Default is -10.0.
    +806    max_ppm_error : float, optional
    +807        Maximum ppm error. Default is 10.0.
    +808    min_abun_error : float, optional
    +809        Minimum abundance error for isotolopologue search. Default is -100.0.
    +810    max_abun_error : float, optional
    +811        Maximum abundance error for isotolopologue search. Default is 100.0.
    +812    mz_error_range : float, optional
    +813        m/z error range. Default is 1.5.
    +814    error_method : str, optional
    +815        Error method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
    +816    mz_error_average : float, optional
    +817        m/z error average. Default is 0.0.
    +818    used_atom_valences : dict, optional
    +819        Dictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
    +820    """
     821
    -822    isRadical: bool = False
    +822    use_isotopologue_filter: bool = False
     823
    -824    # depending on the polarity mode it looks for [M + H]+ , [M - H]+
    -825    # query and automatically compile and push options if it doesn't exist
    -826    isProtonated: bool = True
    +824    isotopologue_filter_threshold: float = 33
    +825
    +826    isotopologue_filter_atoms: tuple = ("Cl", "Br")
     827
    -828    isAdduct: bool = False
    +828    use_runtime_kendrick_filter: bool = False
     829
    -830    usedAtoms: dict = dataclasses.field(default_factory=dict)
    -831    ion_types_excluded: list = dataclasses.field(default_factory=list)
    -832
    -833    # search setting ========
    -834
    -835    ionization_type: str = 'ESI'
    -836
    -837    # empirically set / needs optimization
    -838    min_ppm_error: float = -10.0  # ppm
    +830    use_min_peaks_filter: bool = True
    +831
    +832    min_peaks_per_class: int = 15
    +833
    +834    url_database: str = (
    +835        "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
    +836    )
    +837
    +838    db_jobs: int = 3
     839
    -840    # empirically set / needs optimization
    -841    max_ppm_error: float = 10.0  # ppm
    -842
    -843    # empirically set / needs optimization set for isotopologue search
    -844    min_abun_error: float = -100.0  # percentage
    -845
    -846    # empirically set / needs optimization set for isotopologue search
    -847    max_abun_error: float = 100.0  # percentage
    +840    db_chunk_size: int = 300
    +841
    +842    # query setting========
    +843    ion_charge: int = -1
    +844
    +845    min_hc_filter: float = 0.3
    +846
    +847    max_hc_filter: float = 3
     848
    -849    # empirically set / needs optimization
    -850    mz_error_range: float = 1.5
    -851
    -852    # 'distance', 'lowest', 'symmetrical','average' 'None'
    -853    error_method: str = 'None'
    +849    min_oc_filter: float = 0.0
    +850
    +851    max_oc_filter: float = 1.2
    +852
    +853    min_op_filter: float = 2
     854
    -855    mz_error_average: float = 0.0
    +855    use_pah_line_rule: bool = False
     856
    -857    # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict)
    -858    used_atom_valences: dict = dataclasses.field(default_factory=dict)
    -859
    -860    def __post_init__(self):
    -861
    -862        self.url_database = os.getenv('COREMS_DATABASE_URL', 'sqlite:///db/molformula.db')
    -863        # enforce datatype
    -864        for field in dataclasses.fields(self):
    -865            value = getattr(self, field.name)
    -866            if not isinstance(value, field.type):
    +857    min_dbe: float = 0
    +858
    +859    max_dbe: float = 40
    +860
    +861    mz_error_score_weight: float = 0.6
    +862
    +863    isotopologue_score_weight: float = 0.4
    +864
    +865    # look for close shell ions [M + Adduct]+ only considers metal set in the list adduct_atoms
    +866    adduct_atoms_neg: tuple = ("Cl", "Br")
     867
    -868                value = field.type(value)
    -869                setattr(self, field.name, value)
    -870
    -871        # enforce C and H if either do not exists
    -872        if 'C' not in self.usedAtoms.keys():
    -873            self.usedAtoms['C'] = (1, 100)
    -874        if 'H' not in self.usedAtoms.keys():
    -875            self.usedAtoms['H'] = (1, 200)
    -876
    -877        # add cummon values
    -878        current_used_atoms = self.used_atom_valences.keys()
    -879        
    -880        for atom in Atoms.atoms_covalence.keys():
    -881            
    -882            if atom not in current_used_atoms:
    -883                
    -884                covalence = Atoms.atoms_covalence.get(atom)
    -885                
    -886                if isinstance(covalence , int):
    -887                    self.used_atom_valences[atom] = covalence
    -888                
    -889                else:
    -890                    #will get the first number of all possible covalances, which should be the most commum 
    -891                    self.used_atom_valences[atom] = covalence[0]
    +868    adduct_atoms_pos: tuple = ("Na", "K")
    +869
    +870    score_methods: tuple = (
    +871        "S_P_lowest_error",
    +872        "N_S_P_lowest_error",
    +873        "lowest_error",
    +874        "prob_score",
    +875        "air_filter_error",
    +876        "water_filter_error",
    +877        "earth_filter_error",
    +878    )
    +879
    +880    score_method: str = "prob_score"
    +881
    +882    output_min_score: float = 0.1
    +883
    +884    output_score_method: str = "All Candidates"
    +885
    +886    # depending on the polarity mode it looks for [M].+ , [M].-
    +887    # query and automatically compile add entry if it doesn't exist
    +888
    +889    isRadical: bool = False
    +890
    +891    # depending on the polarity mode it looks for [M + H]+ , [M - H]+
    +892    # query and automatically compile and push options if it doesn't exist
    +893    isProtonated: bool = True
    +894
    +895    isAdduct: bool = False
    +896
    +897    usedAtoms: dict = dataclasses.field(default_factory=dict)
    +898    ion_types_excluded: list = dataclasses.field(default_factory=list)
    +899
    +900    # search setting ========
    +901
    +902    ionization_type: str = "ESI"
    +903
    +904    # empirically set / needs optimization
    +905    min_ppm_error: float = -10.0  # ppm
    +906
    +907    # empirically set / needs optimization
    +908    max_ppm_error: float = 10.0  # ppm
    +909
    +910    # empirically set / needs optimization set for isotopologue search
    +911    min_abun_error: float = -100.0  # percentage
    +912
    +913    # empirically set / needs optimization set for isotopologue search
    +914    max_abun_error: float = 100.0  # percentage
    +915
    +916    # empirically set / needs optimization
    +917    mz_error_range: float = 1.5
    +918
    +919    # 'distance', 'lowest', 'symmetrical','average' 'None'
    +920    error_method: str = "None"
    +921
    +922    mz_error_average: float = 0.0
    +923
    +924    # used_atom_valences: {'C': 4, 'H':1, etc} = dataclasses.field(default_factory=dict)
    +925    used_atom_valences: dict = dataclasses.field(default_factory=dict)
    +926
    +927    def __post_init__(self):
    +928        self.url_database = os.getenv(
    +929            "COREMS_DATABASE_URL", "sqlite:///db/molformula.db"
    +930        )
    +931        # enforce datatype
    +932        for field in dataclasses.fields(self):
    +933            value = getattr(self, field.name)
    +934            if not isinstance(value, field.type):
    +935                value = field.type(value)
    +936                setattr(self, field.name, value)
    +937
    +938        # enforce C and H if either do not exists
    +939        if "C" not in self.usedAtoms.keys():
    +940            self.usedAtoms["C"] = (1, 100)
    +941        if "H" not in self.usedAtoms.keys():
    +942            self.usedAtoms["H"] = (1, 200)
    +943
    +944        # add cummon values
    +945        current_used_atoms = self.used_atom_valences.keys()
    +946
    +947        for atom in Atoms.atoms_covalence.keys():
    +948            if atom not in current_used_atoms:
    +949                covalence = Atoms.atoms_covalence.get(atom)
    +950
    +951                if isinstance(covalence, int):
    +952                    self.used_atom_valences[atom] = covalence
    +953
    +954                else:
    +955                    # will get the first number of all possible covalances, which should be the most commum
    +956                    self.used_atom_valences[atom] = covalence[0]
     
    @@ -4471,7 +4592,7 @@
    Attributes
  • adduct_atoms_pos (tuple, optional): Tuple of atoms to use in positive polarity. Default is ('Na', 'K').
  • score_methods (tuple, optional): -Tuple of score method that can be implemented. +Tuple of score method that can be implemented. Default is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
  • score_method (str, optional): Score method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
  • diff --git a/docs/corems/encapsulation/input/parameter_from_json.html b/docs/corems/encapsulation/input/parameter_from_json.html index a30e4af6..98af3563 100644 --- a/docs/corems/encapsulation/input/parameter_from_json.html +++ b/docs/corems/encapsulation/input/parameter_from_json.html @@ -76,464 +76,504 @@

      1from pathlib import Path
    -  2import json, toml
    -  3
    -  4from corems.encapsulation.factory.parameters import MSParameters, LCMSParameters
    -  5from corems.encapsulation.factory.processingSetting  import MolecularFormulaSearchSettings, TransientSetting
    -  6from corems.encapsulation.factory.processingSetting  import MassSpectrumSetting, DataInputSetting
    -  7from corems.encapsulation.factory.processingSetting  import MassSpecPeakSetting
    -  8from corems.encapsulation.factory.processingSetting  import GasChromatographSetting, LiquidChromatographSetting
    -  9from corems.encapsulation.factory.processingSetting import CompoundSearchSettings
    - 10
    - 11def load_and_set_toml_parameters_ms(mass_spec_obj, parameters_path=False):
    - 12    """Load parameters from a toml file and set the parameters in the mass_spec_obj
    - 13    
    - 14    Parameters
    - 15    ----------
    - 16    mass_spec_obj : MassSpectrum
    - 17        corems MassSpectrum object
    - 18        
    - 19    parameters_path : str, optional
    - 20        path to the parameters file, by default False
    - 21        
    - 22    Raises
    - 23    ------
    - 24    FileNotFoundError
    - 25        if the file is not found
    - 26    """
    - 27    
    - 28    if parameters_path:
    - 29        
    - 30        file_path = Path(parameters_path)
    - 31
    - 32    else:
    - 33        
    - 34        filename='SettingsCoreMS.toml'
    - 35        file_path = Path.cwd() / filename 
    - 36
    - 37    if file_path.exists():  
    +  2import json
    +  3import toml
    +  4
    +  5from corems.encapsulation.factory.parameters import MSParameters, LCMSParameters
    +  6from corems.encapsulation.factory.processingSetting import (
    +  7    MolecularFormulaSearchSettings,
    +  8    TransientSetting,
    +  9)
    + 10from corems.encapsulation.factory.processingSetting import (
    + 11    MassSpectrumSetting,
    + 12    DataInputSetting,
    + 13)
    + 14from corems.encapsulation.factory.processingSetting import MassSpecPeakSetting
    + 15from corems.encapsulation.factory.processingSetting import GasChromatographSetting
    + 16from corems.encapsulation.factory.processingSetting import CompoundSearchSettings
    + 17
    + 18
    + 19def load_and_set_toml_parameters_ms(mass_spec_obj, parameters_path=False):
    + 20    """Load parameters from a toml file and set the parameters in the mass_spec_obj
    + 21
    + 22    Parameters
    + 23    ----------
    + 24    mass_spec_obj : MassSpectrum
    + 25        corems MassSpectrum object
    + 26
    + 27    parameters_path : str, optional
    + 28        path to the parameters file, by default False
    + 29
    + 30    Raises
    + 31    ------
    + 32    FileNotFoundError
    + 33        if the file is not found
    + 34    """
    + 35
    + 36    if parameters_path:
    + 37        file_path = Path(parameters_path)
      38
    - 39            with open(file_path, 'r', encoding='utf8',) as stream:
    - 40                data_loaded = toml.load(stream)
    - 41                _set_dict_data_ms(data_loaded, mass_spec_obj)
    - 42    else:
    - 43        
    - 44        raise FileNotFoundError("Could not locate %s", file_path)   
    - 45
    - 46def load_and_set_parameters_ms(mass_spec_obj, parameters_path=False):
    - 47    """Load parameters from a json file and set the parameters in the mass_spec_obj
    - 48
    - 49    Parameters
    - 50    ----------
    - 51    mass_spec_obj : MassSpectrum
    - 52        corems MassSpectrum object
    - 53    parameters_path : str, optional
    - 54        path to the parameters file, by default False
    - 55
    - 56    Raises
    - 57    ------
    - 58    FileNotFoundError
    - 59        if the file is not found    
    - 60    """
    - 61    
    - 62    if parameters_path:
    - 63        
    - 64        file_path = Path(parameters_path)
    - 65
    - 66    else:
    - 67        
    - 68        filename='SettingsCoreMS.json'
    - 69        file_path = Path.cwd() / filename 
    + 39    else:
    + 40        filename = "SettingsCoreMS.toml"
    + 41        file_path = Path.cwd() / filename
    + 42
    + 43    if file_path.exists():
    + 44        with open(
    + 45            file_path,
    + 46            "r",
    + 47            encoding="utf8",
    + 48        ) as stream:
    + 49            data_loaded = toml.load(stream)
    + 50            _set_dict_data_ms(data_loaded, mass_spec_obj)
    + 51    else:
    + 52        raise FileNotFoundError("Could not locate %s", file_path)
    + 53
    + 54
    + 55def load_and_set_parameters_ms(mass_spec_obj, parameters_path=False):
    + 56    """Load parameters from a json file and set the parameters in the mass_spec_obj
    + 57
    + 58    Parameters
    + 59    ----------
    + 60    mass_spec_obj : MassSpectrum
    + 61        corems MassSpectrum object
    + 62    parameters_path : str, optional
    + 63        path to the parameters file, by default False
    + 64
    + 65    Raises
    + 66    ------
    + 67    FileNotFoundError
    + 68        if the file is not found
    + 69    """
      70
    - 71    if file_path.exists():  
    - 72
    - 73            with open(file_path, 'r', encoding='utf8',) as stream:
    - 74                data_loaded = json.load(stream)
    - 75                _set_dict_data_ms(data_loaded, mass_spec_obj)
    - 76    else:
    - 77        
    - 78        raise FileNotFoundError("Could not locate %s", file_path)   
    - 79
    - 80def load_and_set_toml_parameters_gcms(gcms_obj, parameters_path=False):   
    - 81    """Load parameters from a toml file and set the parameters in the GCMS object
    - 82    
    - 83    Parameters
    - 84    ----------
    - 85    gcms_obj : GCMSBase
    - 86        corems GCMSBase object
    - 87    parameters_path : str, optional
    - 88        path to the parameters file, by default False
    - 89        
    - 90    Raises
    - 91    ------
    - 92    FileNotFoundError
    - 93        if the file is not found
    - 94    """
    - 95    
    - 96    if parameters_path:
    - 97        
    - 98        file_path = Path(parameters_path)
    + 71    if parameters_path:
    + 72        file_path = Path(parameters_path)
    + 73
    + 74    else:
    + 75        filename = "SettingsCoreMS.json"
    + 76        file_path = Path.cwd() / filename
    + 77
    + 78    if file_path.exists():
    + 79        with open(
    + 80            file_path,
    + 81            "r",
    + 82            encoding="utf8",
    + 83        ) as stream:
    + 84            data_loaded = json.load(stream)
    + 85            _set_dict_data_ms(data_loaded, mass_spec_obj)
    + 86    else:
    + 87        raise FileNotFoundError("Could not locate %s", file_path)
    + 88
    + 89
    + 90def load_and_set_toml_parameters_gcms(gcms_obj, parameters_path=False):
    + 91    """Load parameters from a toml file and set the parameters in the GCMS object
    + 92
    + 93    Parameters
    + 94    ----------
    + 95    gcms_obj : GCMSBase
    + 96        corems GCMSBase object
    + 97    parameters_path : str, optional
    + 98        path to the parameters file, by default False
      99
    -100    else:
    -101        
    -102        filename='SettingsCoreMS.toml'
    -103        file_path = Path.cwd() / filename 
    -104
    -105    if file_path.exists():  
    -106
    -107            with open(file_path, 'r', encoding='utf8',) as stream:
    -108                data_loaded = toml.load(stream)
    -109                _set_dict_data_gcms(data_loaded, gcms_obj)
    -110    else:
    -111        
    -112        raise FileNotFoundError("Could not locate %s", file_path) 
    -113
    -114def load_and_set_parameters_gcms(gcms_obj, parameters_path=False):   
    -115    """Load parameters from a json file and set the parameters in the GCMS object
    -116
    -117    Parameters
    -118    ----------
    -119    gcms_obj : GCMSBase
    -120        corems GCMSBase object
    -121    parameters_path : str, optional
    -122        path to the parameters file, by default False
    +100    Raises
    +101    ------
    +102    FileNotFoundError
    +103        if the file is not found
    +104    """
    +105
    +106    if parameters_path:
    +107        file_path = Path(parameters_path)
    +108
    +109    else:
    +110        filename = "SettingsCoreMS.toml"
    +111        file_path = Path.cwd() / filename
    +112
    +113    if file_path.exists():
    +114        with open(
    +115            file_path,
    +116            "r",
    +117            encoding="utf8",
    +118        ) as stream:
    +119            data_loaded = toml.load(stream)
    +120            _set_dict_data_gcms(data_loaded, gcms_obj)
    +121    else:
    +122        raise FileNotFoundError("Could not locate %s", file_path)
     123
    -124    Raises
    -125    ------
    -126    FileNotFoundError
    -127        if the file is not found
    -128    """        
    -129    
    -130    if parameters_path:
    -131        
    -132        file_path = Path(parameters_path)
    -133
    -134    else:
    -135        
    -136        filename='SettingsCoreMS.json'
    -137        file_path = Path.cwd() / filename 
    -138
    -139    if file_path.exists():  
    +124
    +125def load_and_set_parameters_gcms(gcms_obj, parameters_path=False):
    +126    """Load parameters from a json file and set the parameters in the GCMS object
    +127
    +128    Parameters
    +129    ----------
    +130    gcms_obj : GCMSBase
    +131        corems GCMSBase object
    +132    parameters_path : str, optional
    +133        path to the parameters file, by default False
    +134
    +135    Raises
    +136    ------
    +137    FileNotFoundError
    +138        if the file is not found
    +139    """
     140
    -141            with open(file_path, 'r', encoding='utf8',) as stream:
    -142                data_loaded = json.load(stream)
    -143                _set_dict_data_gcms(data_loaded, gcms_obj)
    +141    if parameters_path:
    +142        file_path = Path(parameters_path)
    +143
     144    else:
    -145        
    -146        raise FileNotFoundError("Could not locate %s", file_path)   
    +145        filename = "SettingsCoreMS.json"
    +146        file_path = Path.cwd() / filename
     147
    -148def load_and_set_json_parameters_lcms(lcms_obj, parameters_path=False):   
    -149    """Load parameters from a json file and set the parameters in the LCMS object
    -150
    -151    Parameters
    -152    ----------
    -153    lcms_obj : LCMSBase
    -154        corems LCMSBase object
    -155    parameters_path : str
    -156        path to the parameters file saved as a .json, by default False
    -157
    -158    Raises
    -159    ------
    -160    FileNotFoundError
    -161        if the file is not found
    -162    """        
    -163    
    -164    if parameters_path:
    -165        file_path = Path(parameters_path)
    -166
    -167    if file_path.exists():  
    -168            with open(file_path, 'r', encoding='utf8',) as stream:
    -169                data_loaded = json.load(stream)
    -170                _set_dict_data_lcms(data_loaded, lcms_obj)
    -171    else:
    -172        raise FileNotFoundError("Could not locate %s", file_path)
    -173
    -174
    -175def load_and_set_toml_parameters_lcms(lcms_obj, parameters_path=False):   
    -176    """Load parameters from a toml file and set the parameters in the LCMS object
    -177
    -178    Parameters
    -179    ----------
    -180    lcms_obj : LCMSBase
    -181        corems LCMSBase object
    -182    parameters_path : str
    -183        path to the parameters file saved as a .toml, by default False
    -184
    -185    Raises
    -186    ------
    -187    FileNotFoundError
    -188        if the file is not found
    -189    """        
    -190    
    -191    if parameters_path:
    -192        file_path = Path(parameters_path)
    +148    if file_path.exists():
    +149        with open(
    +150            file_path,
    +151            "r",
    +152            encoding="utf8",
    +153        ) as stream:
    +154            data_loaded = json.load(stream)
    +155            _set_dict_data_gcms(data_loaded, gcms_obj)
    +156    else:
    +157        raise FileNotFoundError("Could not locate %s", file_path)
    +158
    +159
    +160def load_and_set_json_parameters_lcms(lcms_obj, parameters_path=False):
    +161    """Load parameters from a json file and set the parameters in the LCMS object
    +162
    +163    Parameters
    +164    ----------
    +165    lcms_obj : LCMSBase
    +166        corems LCMSBase object
    +167    parameters_path : str
    +168        path to the parameters file saved as a .json, by default False
    +169
    +170    Raises
    +171    ------
    +172    FileNotFoundError
    +173        if the file is not found
    +174    """
    +175
    +176    if parameters_path:
    +177        file_path = Path(parameters_path)
    +178
    +179    if file_path.exists():
    +180        with open(
    +181            file_path,
    +182            "r",
    +183            encoding="utf8",
    +184        ) as stream:
    +185            data_loaded = json.load(stream)
    +186            _set_dict_data_lcms(data_loaded, lcms_obj)
    +187    else:
    +188        raise FileNotFoundError("Could not locate %s", file_path)
    +189
    +190
    +191def load_and_set_toml_parameters_lcms(lcms_obj, parameters_path=False):
    +192    """Load parameters from a toml file and set the parameters in the LCMS object
     193
    -194    if file_path.exists():  
    -195            with open(file_path, 'r', encoding='utf8',) as stream:
    -196                data_loaded = toml.load(stream)
    -197                _set_dict_data_lcms(data_loaded, lcms_obj)
    -198    else:
    -199        raise FileNotFoundError("Could not locate %s", file_path)
    +194    Parameters
    +195    ----------
    +196    lcms_obj : LCMSBase
    +197        corems LCMSBase object
    +198    parameters_path : str
    +199        path to the parameters file saved as a .toml, by default False
     200
    -201def _set_dict_data_gcms(data_loaded, gcms_obj):
    -202    """Set the parameters in the GCMS object from a dict
    -203    
    -204    This function is called by load_and_set_parameters_gcms and load_and_set_toml_parameters_gcms and should not be called directly.
    -205
    -206    Parameters
    -207    ----------
    -208    data_loaded : dict
    -209        dict with the parameters
    -210    gcms_obj : GCMSBase
    -211        corems GCMSBase object
    -212    """
    -213
    -214    classes = [GasChromatographSetting(),
    -215               CompoundSearchSettings(),
    -216              ]
    -217
    -218    labels = ["GasChromatograph", "MolecularSearch"]
    -219    
    -220    label_class = zip(labels, classes)
    +201    Raises
    +202    ------
    +203    FileNotFoundError
    +204        if the file is not found
    +205    """
    +206
    +207    if parameters_path:
    +208        file_path = Path(parameters_path)
    +209
    +210    if file_path.exists():
    +211        with open(
    +212            file_path,
    +213            "r",
    +214            encoding="utf8",
    +215        ) as stream:
    +216            data_loaded = toml.load(stream)
    +217            _set_dict_data_lcms(data_loaded, lcms_obj)
    +218    else:
    +219        raise FileNotFoundError("Could not locate %s", file_path)
    +220
     221
    -222    if data_loaded:
    -223    
    -224        for label, classe in label_class:
    -225            class_data = data_loaded.get(label)
    -226            # not always we will not all the settings
    -227            # this allow a class data to be none and continue
    -228            # to import the other classes
    -229            if class_data:
    -230                for item, value in class_data.items():
    -231                    setattr(classe, item, value)
    -232
    -233    gcms_obj.chromatogram_settings = classes[0]
    -234    gcms_obj.molecular_search_settings = classes[1]
    -235
    -236
    -237def _set_dict_data_lcms(data_loaded, lcms_obj):
    -238    """Set the parameters on a LCMS object from a dict
    -239    
    -240    This function is called by load_and_set_parameters_lcms and load_and_set_toml_parameters_lcms and should not be called directly.
    +222def _set_dict_data_gcms(data_loaded, gcms_obj):
    +223    """Set the parameters in the GCMS object from a dict
    +224
    +225    This function is called by load_and_set_parameters_gcms and load_and_set_toml_parameters_gcms and should not be called directly.
    +226
    +227    Parameters
    +228    ----------
    +229    data_loaded : dict
    +230        dict with the parameters
    +231    gcms_obj : GCMSBase
    +232        corems GCMSBase object
    +233    """
    +234
    +235    classes = [
    +236        GasChromatographSetting(),
    +237        CompoundSearchSettings(),
    +238    ]
    +239
    +240    labels = ["GasChromatograph", "MolecularSearch"]
     241
    -242    Parameters
    -243    ----------
    -244    data_loaded : dict
    -245        dict with the parameters
    -246    lcms_obj : LCMSBase
    -247        corems LCMSBase object
    -248    """
    -249
    -250    # Load the lcms parameters
    -251    default_params = LCMSParameters(use_defaults=True)
    -252    lcms_params = data_loaded.get("LiquidChromatograph")
    -253    for item, value in lcms_params.items():
    -254        # If the original value is a tuple but the new one is a list we need to convert the list to a tuple
    -255        if isinstance(value, list) and isinstance(getattr(default_params.lc_ms, item), tuple):
    -256            setattr(lcms_obj.parameters.lc_ms, item, tuple(value))
    -257        else:
    -258            setattr(lcms_obj.parameters.lc_ms, item, value)
    -259    
    -260    def set_ms_params_by_key(ms_key):
    -261        classes = [
    -262                MassSpectrumSetting,
    -263                MassSpecPeakSetting,
    -264                MolecularFormulaSearchSettings,
    -265                DataInputSetting,
    -266                TransientSetting
    -267              ]
    -268
    -269        labels = [
    -270            "mass_spectrum",
    -271            "ms_peak",
    -272            "molecular_search",
    -273            "data_input",
    -274            "transient"
    -275                ]
    -276        
    -277        label_class = zip(labels, classes)
    -278
    -279        for label, classe in label_class:
    -280            class_data = data_loaded.get("mass_spectrum").get(ms_key).get(label)
    -281            param_instance = classe()
    -282            if class_data is not None:
    -283                # Set the attributes of the nested class
    -284                for item, value in class_data.items():
    -285                    if item == "usedAtoms":
    -286                        # Convert the lists to tuples
    -287                        for atom, atom_value in value.items():
    -288                            value[atom] = tuple(atom_value)
    -289                    if isinstance(value, list) and isinstance(getattr(param_instance, item), tuple):
    -290                        setattr(param_instance, item, tuple(value))
    -291                    else:
    -292                        setattr(param_instance, item, value)
    -293            setattr(lcms_obj.parameters.mass_spectrum[ms_key], label, param_instance)
    -294
    -295    # Load the mass spectrum parameters
    -296    ms_keys = data_loaded["mass_spectrum"].keys()
    -297    for ms_key in ms_keys:
    -298        lcms_obj.parameters.mass_spectrum[ms_key] = MSParameters()
    -299        set_ms_params_by_key(ms_key)
    -300
    +242    label_class = zip(labels, classes)
    +243
    +244    if data_loaded:
    +245        for label, classe in label_class:
    +246            class_data = data_loaded.get(label)
    +247            # not always we will not all the settings
    +248            # this allow a class data to be none and continue
    +249            # to import the other classes
    +250            if class_data:
    +251                for item, value in class_data.items():
    +252                    setattr(classe, item, value)
    +253
    +254    gcms_obj.chromatogram_settings = classes[0]
    +255    gcms_obj.molecular_search_settings = classes[1]
    +256
    +257
    +258def _set_dict_data_lcms(data_loaded, lcms_obj):
    +259    """Set the parameters on a LCMS object from a dict
    +260
    +261    This function is called by load_and_set_parameters_lcms and load_and_set_toml_parameters_lcms and should not be called directly.
    +262
    +263    Parameters
    +264    ----------
    +265    data_loaded : dict
    +266        dict with the parameters
    +267    lcms_obj : LCMSBase
    +268        corems LCMSBase object
    +269    """
    +270
    +271    # Load the lcms parameters
    +272    default_params = LCMSParameters(use_defaults=True)
    +273    lcms_params = data_loaded.get("LiquidChromatograph")
    +274    for item, value in lcms_params.items():
    +275        # If the original value is a tuple but the new one is a list we need to convert the list to a tuple
    +276        if isinstance(value, list) and isinstance(
    +277            getattr(default_params.lc_ms, item), tuple
    +278        ):
    +279            setattr(lcms_obj.parameters.lc_ms, item, tuple(value))
    +280        else:
    +281            setattr(lcms_obj.parameters.lc_ms, item, value)
    +282
    +283    def set_ms_params_by_key(ms_key):
    +284        classes = [
    +285            MassSpectrumSetting,
    +286            MassSpecPeakSetting,
    +287            MolecularFormulaSearchSettings,
    +288            DataInputSetting,
    +289            TransientSetting,
    +290        ]
    +291
    +292        labels = [
    +293            "mass_spectrum",
    +294            "ms_peak",
    +295            "molecular_search",
    +296            "data_input",
    +297            "transient",
    +298        ]
    +299
    +300        label_class = zip(labels, classes)
     301
    -302def _set_dict_data_ms(data_loaded, mass_spec_obj):
    -303    """Set the parameters in the MassSpectrum object from a dict
    -304
    -305    This function is called by load_and_set_parameters_ms and load_and_set_toml_parameters_ms and should not be called directly.
    -306
    -307    Parameters
    -308    ----------
    -309    data_loaded : dict
    -310        dict with the parameters
    -311    mass_spec_obj : MassSpectrum
    -312        corems MassSpectrum object
    -313    """
    -314        
    -315    from copy import deepcopy
    -316
    -317    classes = [MolecularFormulaSearchSettings(), 
    -318               TransientSetting(),
    -319               MassSpectrumSetting(),
    -320               MassSpecPeakSetting()
    -321               ]
    -322               
    -323    labels = ["MolecularFormulaSearch", "Transient", "MassSpectrum", "MassSpecPeak"]
    -324    
    -325    label_class = zip(labels, classes)
    +302        for label, classe in label_class:
    +303            class_data = data_loaded.get("mass_spectrum").get(ms_key).get(label)
    +304            param_instance = classe()
    +305            if class_data is not None:
    +306                # Set the attributes of the nested class
    +307                for item, value in class_data.items():
    +308                    if item == "usedAtoms":
    +309                        # Convert the lists to tuples
    +310                        for atom, atom_value in value.items():
    +311                            value[atom] = tuple(atom_value)
    +312                    if isinstance(value, list) and isinstance(
    +313                        getattr(param_instance, item), tuple
    +314                    ):
    +315                        setattr(param_instance, item, tuple(value))
    +316                    else:
    +317                        setattr(param_instance, item, value)
    +318            setattr(lcms_obj.parameters.mass_spectrum[ms_key], label, param_instance)
    +319
    +320    # Load the mass spectrum parameters
    +321    ms_keys = data_loaded["mass_spectrum"].keys()
    +322    for ms_key in ms_keys:
    +323        lcms_obj.parameters.mass_spectrum[ms_key] = MSParameters()
    +324        set_ms_params_by_key(ms_key)
    +325
     326
    -327    if data_loaded:
    -328    
    -329        for label, classe in label_class:
    -330            class_data = data_loaded.get(label)
    -331            # not always we will have all the settings classes
    -332            # this allow a class data to be none and continue
    -333            # to import the other classes
    -334            if class_data:
    -335                for item, value in class_data.items():
    -336                    setattr(classe, item, value)
    -337    
    -338    mass_spec_obj.molecular_search_settings = classes[0]
    -339    mass_spec_obj.transient_settings = classes[1]
    -340    mass_spec_obj.settings = classes[2]
    -341    mass_spec_obj.mspeaks_settings = classes[3]
    -342
    -343
    -344def load_and_set_toml_parameters_class(parameter_label, instance_parameters_class, parameters_path=False):
    -345    """Load parameters from a toml file and set the parameters in the instance_parameters_class
    -346
    -347    Parameters
    -348    ----------
    -349    parameter_label : str
    -350        label of the parameters in the toml file
    -351    instance_parameters_class : object
    -352        instance of the parameters class
    -353    parameters_path : str, optional
    -354        path to the parameters file, by default False
    -355
    -356    Raises
    -357    ------
    -358    FileNotFoundError
    -359        if the file is not found
    -360
    -361    Returns
    -362    -------
    -363    object
    -364        instance of the parameters class
    -365    """
    -366    
    -367    if parameters_path: file_path = Path(parameters_path)
    +327def _set_dict_data_ms(data_loaded, mass_spec_obj):
    +328    """Set the parameters in the MassSpectrum object from a dict
    +329
    +330    This function is called by load_and_set_parameters_ms and load_and_set_toml_parameters_ms and should not be called directly.
    +331
    +332    Parameters
    +333    ----------
    +334    data_loaded : dict
    +335        dict with the parameters
    +336    mass_spec_obj : MassSpectrum
    +337        corems MassSpectrum object
    +338    """
    +339
    +340    from copy import deepcopy
    +341
    +342    classes = [
    +343        MolecularFormulaSearchSettings(),
    +344        TransientSetting(),
    +345        MassSpectrumSetting(),
    +346        MassSpecPeakSetting(),
    +347    ]
    +348
    +349    labels = ["MolecularFormulaSearch", "Transient", "MassSpectrum", "MassSpecPeak"]
    +350
    +351    label_class = zip(labels, classes)
    +352
    +353    if data_loaded:
    +354        for label, classe in label_class:
    +355            class_data = data_loaded.get(label)
    +356            # not always we will have all the settings classes
    +357            # this allow a class data to be none and continue
    +358            # to import the other classes
    +359            if class_data:
    +360                for item, value in class_data.items():
    +361                    setattr(classe, item, value)
    +362
    +363    mass_spec_obj.molecular_search_settings = classes[0]
    +364    mass_spec_obj.transient_settings = classes[1]
    +365    mass_spec_obj.settings = classes[2]
    +366    mass_spec_obj.mspeaks_settings = classes[3]
    +367
     368
    -369    else: file_path = Path.cwd() / 'SettingsCoreMS.toml' 
    -370        
    -371    if file_path.exists():
    -372        
    -373        with open(file_path, 'r', encoding='utf8',) as stream:
    -374            
    -375            data_loaded = toml.load(stream)
    -376            parameter_class = _set_dict_data(data_loaded, parameter_label, instance_parameters_class)
    -377            
    -378            return parameter_class
    -379    else:
    -380        
    -381        raise FileNotFoundError("Could not locate %s", file_path)  
    +369def load_and_set_toml_parameters_class(
    +370    parameter_label, instance_parameters_class, parameters_path=False
    +371):
    +372    """Load parameters from a toml file and set the parameters in the instance_parameters_class
    +373
    +374    Parameters
    +375    ----------
    +376    parameter_label : str
    +377        label of the parameters in the toml file
    +378    instance_parameters_class : object
    +379        instance of the parameters class
    +380    parameters_path : str, optional
    +381        path to the parameters file, by default False
     382
    -383def load_and_set_parameters_class(parameter_label, instance_parameters_class, parameters_path=False):  
    -384    """Load parameters from a json file and set the parameters in the instance_parameters_class
    -385
    -386    Parameters
    -387    ----------
    -388    parameter_label : str
    -389        label of the parameters in the json file
    -390    instance_parameters_class : object
    +383    Raises
    +384    ------
    +385    FileNotFoundError
    +386        if the file is not found
    +387
    +388    Returns
    +389    -------
    +390    object
     391        instance of the parameters class
    -392    parameters_path : str, optional
    -393        path to the parameters file, by default False
    -394
    -395    Raises
    -396    ------
    -397    FileNotFoundError
    -398        if the file is not found
    +392    """
    +393
    +394    if parameters_path:
    +395        file_path = Path(parameters_path)
    +396
    +397    else:
    +398        file_path = Path.cwd() / "SettingsCoreMS.toml"
     399
    -400    Returns
    -401    -------
    -402    object
    -403        instance of the parameters class
    -404    """
    -405    
    -406    if parameters_path: file_path = Path(parameters_path)
    -407
    -408    else: file_path = Path.cwd() / 'SettingsCoreMS.json' 
    -409        
    -410    if file_path.exists():
    -411        
    -412        with open(file_path, 'r', encoding='utf8',) as stream:
    -413            
    -414            data_loaded = json.load(stream)
    -415            parameter_class = _set_dict_data(data_loaded, parameter_label, instance_parameters_class)
    -416            
    -417            return parameter_class
    -418    else:
    -419        
    -420        raise FileNotFoundError("Could not locate %s", file_path)  
    -421    
    -422def _set_dict_data(data_loaded, parameter_label, instance_ParameterClass):
    -423    """Set the parameters in an instance of a parameter class from a dict
    -424
    -425    This function is called by load_and_set_parameters_class and load_and_set_toml_parameters_class and should not be called directly.
    -426
    -427    Parameters
    -428    ----------
    -429    data_loaded : dict
    -430        dict with the parameters
    -431    parameter_label : str
    -432        label of the parameters in the json file
    -433    instance_ParameterClass : object
    -434        instance of the parameters class
    -435
    -436    Returns
    -437    -------
    -438    object
    -439        instance of the parameters class
    -440    """
    -441    
    -442    classes = [instance_ParameterClass]
    -443               
    -444    labels = [parameter_label]
    -445    
    -446    label_class = zip(labels, classes)
    -447
    -448    if data_loaded:
    -449    
    -450        for label, classe in label_class:
    -451            class_data = data_loaded.get(label)
    -452            # not always we will have all the settings classes
    -453            # this allow a class data to be none and continue
    -454            # to import the other classes
    -455            if class_data:
    -456                for item, value in class_data.items():
    -457                    setattr(classe, item, value)
    -458    
    -459    return classes[0]
    +400    if file_path.exists():
    +401        with open(
    +402            file_path,
    +403            "r",
    +404            encoding="utf8",
    +405        ) as stream:
    +406            data_loaded = toml.load(stream)
    +407            parameter_class = _set_dict_data(
    +408                data_loaded, parameter_label, instance_parameters_class
    +409            )
    +410
    +411            return parameter_class
    +412    else:
    +413        raise FileNotFoundError("Could not locate %s", file_path)
    +414
    +415
    +416def load_and_set_parameters_class(
    +417    parameter_label, instance_parameters_class, parameters_path=False
    +418):
    +419    """Load parameters from a json file and set the parameters in the instance_parameters_class
    +420
    +421    Parameters
    +422    ----------
    +423    parameter_label : str
    +424        label of the parameters in the json file
    +425    instance_parameters_class : object
    +426        instance of the parameters class
    +427    parameters_path : str, optional
    +428        path to the parameters file, by default False
    +429
    +430    Raises
    +431    ------
    +432    FileNotFoundError
    +433        if the file is not found
    +434
    +435    Returns
    +436    -------
    +437    object
    +438        instance of the parameters class
    +439    """
    +440
    +441    if parameters_path:
    +442        file_path = Path(parameters_path)
    +443
    +444    else:
    +445        file_path = Path.cwd() / "SettingsCoreMS.json"
    +446
    +447    if file_path.exists():
    +448        with open(
    +449            file_path,
    +450            "r",
    +451            encoding="utf8",
    +452        ) as stream:
    +453            data_loaded = json.load(stream)
    +454            parameter_class = _set_dict_data(
    +455                data_loaded, parameter_label, instance_parameters_class
    +456            )
    +457
    +458            return parameter_class
    +459    else:
    +460        raise FileNotFoundError("Could not locate %s", file_path)
    +461
    +462
    +463def _set_dict_data(data_loaded, parameter_label, instance_ParameterClass):
    +464    """Set the parameters in an instance of a parameter class from a dict
    +465
    +466    This function is called by load_and_set_parameters_class and load_and_set_toml_parameters_class and should not be called directly.
    +467
    +468    Parameters
    +469    ----------
    +470    data_loaded : dict
    +471        dict with the parameters
    +472    parameter_label : str
    +473        label of the parameters in the json file
    +474    instance_ParameterClass : object
    +475        instance of the parameters class
    +476
    +477    Returns
    +478    -------
    +479    object
    +480        instance of the parameters class
    +481    """
    +482
    +483    classes = [instance_ParameterClass]
    +484
    +485    labels = [parameter_label]
    +486
    +487    label_class = zip(labels, classes)
    +488
    +489    if data_loaded:
    +490        for label, classe in label_class:
    +491            class_data = data_loaded.get(label)
    +492            # not always we will have all the settings classes
    +493            # this allow a class data to be none and continue
    +494            # to import the other classes
    +495            if class_data:
    +496                for item, value in class_data.items():
    +497                    setattr(classe, item, value)
    +498
    +499    return classes[0]
     
    @@ -549,40 +589,40 @@

    -
    12def load_and_set_toml_parameters_ms(mass_spec_obj, parameters_path=False):
    -13    """Load parameters from a toml file and set the parameters in the mass_spec_obj
    -14    
    -15    Parameters
    -16    ----------
    -17    mass_spec_obj : MassSpectrum
    -18        corems MassSpectrum object
    -19        
    -20    parameters_path : str, optional
    -21        path to the parameters file, by default False
    -22        
    -23    Raises
    -24    ------
    -25    FileNotFoundError
    -26        if the file is not found
    -27    """
    -28    
    -29    if parameters_path:
    -30        
    -31        file_path = Path(parameters_path)
    -32
    -33    else:
    -34        
    -35        filename='SettingsCoreMS.toml'
    -36        file_path = Path.cwd() / filename 
    -37
    -38    if file_path.exists():  
    +            
    20def load_and_set_toml_parameters_ms(mass_spec_obj, parameters_path=False):
    +21    """Load parameters from a toml file and set the parameters in the mass_spec_obj
    +22
    +23    Parameters
    +24    ----------
    +25    mass_spec_obj : MassSpectrum
    +26        corems MassSpectrum object
    +27
    +28    parameters_path : str, optional
    +29        path to the parameters file, by default False
    +30
    +31    Raises
    +32    ------
    +33    FileNotFoundError
    +34        if the file is not found
    +35    """
    +36
    +37    if parameters_path:
    +38        file_path = Path(parameters_path)
     39
    -40            with open(file_path, 'r', encoding='utf8',) as stream:
    -41                data_loaded = toml.load(stream)
    -42                _set_dict_data_ms(data_loaded, mass_spec_obj)
    -43    else:
    -44        
    -45        raise FileNotFoundError("Could not locate %s", file_path)   
    +40    else:
    +41        filename = "SettingsCoreMS.toml"
    +42        file_path = Path.cwd() / filename
    +43
    +44    if file_path.exists():
    +45        with open(
    +46            file_path,
    +47            "r",
    +48            encoding="utf8",
    +49        ) as stream:
    +50            data_loaded = toml.load(stream)
    +51            _set_dict_data_ms(data_loaded, mass_spec_obj)
    +52    else:
    +53        raise FileNotFoundError("Could not locate %s", file_path)
     
    @@ -617,39 +657,39 @@
    Raises
    -
    47def load_and_set_parameters_ms(mass_spec_obj, parameters_path=False):
    -48    """Load parameters from a json file and set the parameters in the mass_spec_obj
    -49
    -50    Parameters
    -51    ----------
    -52    mass_spec_obj : MassSpectrum
    -53        corems MassSpectrum object
    -54    parameters_path : str, optional
    -55        path to the parameters file, by default False
    -56
    -57    Raises
    -58    ------
    -59    FileNotFoundError
    -60        if the file is not found    
    -61    """
    -62    
    -63    if parameters_path:
    -64        
    -65        file_path = Path(parameters_path)
    -66
    -67    else:
    -68        
    -69        filename='SettingsCoreMS.json'
    -70        file_path = Path.cwd() / filename 
    +            
    56def load_and_set_parameters_ms(mass_spec_obj, parameters_path=False):
    +57    """Load parameters from a json file and set the parameters in the mass_spec_obj
    +58
    +59    Parameters
    +60    ----------
    +61    mass_spec_obj : MassSpectrum
    +62        corems MassSpectrum object
    +63    parameters_path : str, optional
    +64        path to the parameters file, by default False
    +65
    +66    Raises
    +67    ------
    +68    FileNotFoundError
    +69        if the file is not found
    +70    """
     71
    -72    if file_path.exists():  
    -73
    -74            with open(file_path, 'r', encoding='utf8',) as stream:
    -75                data_loaded = json.load(stream)
    -76                _set_dict_data_ms(data_loaded, mass_spec_obj)
    -77    else:
    -78        
    -79        raise FileNotFoundError("Could not locate %s", file_path)   
    +72    if parameters_path:
    +73        file_path = Path(parameters_path)
    +74
    +75    else:
    +76        filename = "SettingsCoreMS.json"
    +77        file_path = Path.cwd() / filename
    +78
    +79    if file_path.exists():
    +80        with open(
    +81            file_path,
    +82            "r",
    +83            encoding="utf8",
    +84        ) as stream:
    +85            data_loaded = json.load(stream)
    +86            _set_dict_data_ms(data_loaded, mass_spec_obj)
    +87    else:
    +88        raise FileNotFoundError("Could not locate %s", file_path)
     
    @@ -684,39 +724,39 @@
    Raises
    -
     81def load_and_set_toml_parameters_gcms(gcms_obj, parameters_path=False):   
    - 82    """Load parameters from a toml file and set the parameters in the GCMS object
    - 83    
    - 84    Parameters
    - 85    ----------
    - 86    gcms_obj : GCMSBase
    - 87        corems GCMSBase object
    - 88    parameters_path : str, optional
    - 89        path to the parameters file, by default False
    - 90        
    - 91    Raises
    - 92    ------
    - 93    FileNotFoundError
    - 94        if the file is not found
    - 95    """
    - 96    
    - 97    if parameters_path:
    - 98        
    - 99        file_path = Path(parameters_path)
    +            
     91def load_and_set_toml_parameters_gcms(gcms_obj, parameters_path=False):
    + 92    """Load parameters from a toml file and set the parameters in the GCMS object
    + 93
    + 94    Parameters
    + 95    ----------
    + 96    gcms_obj : GCMSBase
    + 97        corems GCMSBase object
    + 98    parameters_path : str, optional
    + 99        path to the parameters file, by default False
     100
    -101    else:
    -102        
    -103        filename='SettingsCoreMS.toml'
    -104        file_path = Path.cwd() / filename 
    -105
    -106    if file_path.exists():  
    -107
    -108            with open(file_path, 'r', encoding='utf8',) as stream:
    -109                data_loaded = toml.load(stream)
    -110                _set_dict_data_gcms(data_loaded, gcms_obj)
    -111    else:
    -112        
    -113        raise FileNotFoundError("Could not locate %s", file_path) 
    +101    Raises
    +102    ------
    +103    FileNotFoundError
    +104        if the file is not found
    +105    """
    +106
    +107    if parameters_path:
    +108        file_path = Path(parameters_path)
    +109
    +110    else:
    +111        filename = "SettingsCoreMS.toml"
    +112        file_path = Path.cwd() / filename
    +113
    +114    if file_path.exists():
    +115        with open(
    +116            file_path,
    +117            "r",
    +118            encoding="utf8",
    +119        ) as stream:
    +120            data_loaded = toml.load(stream)
    +121            _set_dict_data_gcms(data_loaded, gcms_obj)
    +122    else:
    +123        raise FileNotFoundError("Could not locate %s", file_path)
     
    @@ -751,39 +791,39 @@
    Raises
    -
    115def load_and_set_parameters_gcms(gcms_obj, parameters_path=False):   
    -116    """Load parameters from a json file and set the parameters in the GCMS object
    -117
    -118    Parameters
    -119    ----------
    -120    gcms_obj : GCMSBase
    -121        corems GCMSBase object
    -122    parameters_path : str, optional
    -123        path to the parameters file, by default False
    -124
    -125    Raises
    -126    ------
    -127    FileNotFoundError
    -128        if the file is not found
    -129    """        
    -130    
    -131    if parameters_path:
    -132        
    -133        file_path = Path(parameters_path)
    -134
    -135    else:
    -136        
    -137        filename='SettingsCoreMS.json'
    -138        file_path = Path.cwd() / filename 
    -139
    -140    if file_path.exists():  
    +            
    126def load_and_set_parameters_gcms(gcms_obj, parameters_path=False):
    +127    """Load parameters from a json file and set the parameters in the GCMS object
    +128
    +129    Parameters
    +130    ----------
    +131    gcms_obj : GCMSBase
    +132        corems GCMSBase object
    +133    parameters_path : str, optional
    +134        path to the parameters file, by default False
    +135
    +136    Raises
    +137    ------
    +138    FileNotFoundError
    +139        if the file is not found
    +140    """
     141
    -142            with open(file_path, 'r', encoding='utf8',) as stream:
    -143                data_loaded = json.load(stream)
    -144                _set_dict_data_gcms(data_loaded, gcms_obj)
    +142    if parameters_path:
    +143        file_path = Path(parameters_path)
    +144
     145    else:
    -146        
    -147        raise FileNotFoundError("Could not locate %s", file_path)   
    +146        filename = "SettingsCoreMS.json"
    +147        file_path = Path.cwd() / filename
    +148
    +149    if file_path.exists():
    +150        with open(
    +151            file_path,
    +152            "r",
    +153            encoding="utf8",
    +154        ) as stream:
    +155            data_loaded = json.load(stream)
    +156            _set_dict_data_gcms(data_loaded, gcms_obj)
    +157    else:
    +158        raise FileNotFoundError("Could not locate %s", file_path)
     
    @@ -818,31 +858,35 @@
    Raises
    -
    149def load_and_set_json_parameters_lcms(lcms_obj, parameters_path=False):   
    -150    """Load parameters from a json file and set the parameters in the LCMS object
    -151
    -152    Parameters
    -153    ----------
    -154    lcms_obj : LCMSBase
    -155        corems LCMSBase object
    -156    parameters_path : str
    -157        path to the parameters file saved as a .json, by default False
    -158
    -159    Raises
    -160    ------
    -161    FileNotFoundError
    -162        if the file is not found
    -163    """        
    -164    
    -165    if parameters_path:
    -166        file_path = Path(parameters_path)
    -167
    -168    if file_path.exists():  
    -169            with open(file_path, 'r', encoding='utf8',) as stream:
    -170                data_loaded = json.load(stream)
    -171                _set_dict_data_lcms(data_loaded, lcms_obj)
    -172    else:
    -173        raise FileNotFoundError("Could not locate %s", file_path)
    +            
    161def load_and_set_json_parameters_lcms(lcms_obj, parameters_path=False):
    +162    """Load parameters from a json file and set the parameters in the LCMS object
    +163
    +164    Parameters
    +165    ----------
    +166    lcms_obj : LCMSBase
    +167        corems LCMSBase object
    +168    parameters_path : str
    +169        path to the parameters file saved as a .json, by default False
    +170
    +171    Raises
    +172    ------
    +173    FileNotFoundError
    +174        if the file is not found
    +175    """
    +176
    +177    if parameters_path:
    +178        file_path = Path(parameters_path)
    +179
    +180    if file_path.exists():
    +181        with open(
    +182            file_path,
    +183            "r",
    +184            encoding="utf8",
    +185        ) as stream:
    +186            data_loaded = json.load(stream)
    +187            _set_dict_data_lcms(data_loaded, lcms_obj)
    +188    else:
    +189        raise FileNotFoundError("Could not locate %s", file_path)
     
    @@ -877,31 +921,35 @@
    Raises
    -
    176def load_and_set_toml_parameters_lcms(lcms_obj, parameters_path=False):   
    -177    """Load parameters from a toml file and set the parameters in the LCMS object
    -178
    -179    Parameters
    -180    ----------
    -181    lcms_obj : LCMSBase
    -182        corems LCMSBase object
    -183    parameters_path : str
    -184        path to the parameters file saved as a .toml, by default False
    -185
    -186    Raises
    -187    ------
    -188    FileNotFoundError
    -189        if the file is not found
    -190    """        
    -191    
    -192    if parameters_path:
    -193        file_path = Path(parameters_path)
    +            
    192def load_and_set_toml_parameters_lcms(lcms_obj, parameters_path=False):
    +193    """Load parameters from a toml file and set the parameters in the LCMS object
     194
    -195    if file_path.exists():  
    -196            with open(file_path, 'r', encoding='utf8',) as stream:
    -197                data_loaded = toml.load(stream)
    -198                _set_dict_data_lcms(data_loaded, lcms_obj)
    -199    else:
    -200        raise FileNotFoundError("Could not locate %s", file_path)
    +195    Parameters
    +196    ----------
    +197    lcms_obj : LCMSBase
    +198        corems LCMSBase object
    +199    parameters_path : str
    +200        path to the parameters file saved as a .toml, by default False
    +201
    +202    Raises
    +203    ------
    +204    FileNotFoundError
    +205        if the file is not found
    +206    """
    +207
    +208    if parameters_path:
    +209        file_path = Path(parameters_path)
    +210
    +211    if file_path.exists():
    +212        with open(
    +213            file_path,
    +214            "r",
    +215            encoding="utf8",
    +216        ) as stream:
    +217            data_loaded = toml.load(stream)
    +218            _set_dict_data_lcms(data_loaded, lcms_obj)
    +219    else:
    +220        raise FileNotFoundError("Could not locate %s", file_path)
     
    @@ -936,44 +984,51 @@
    Raises
    -
    345def load_and_set_toml_parameters_class(parameter_label, instance_parameters_class, parameters_path=False):
    -346    """Load parameters from a toml file and set the parameters in the instance_parameters_class
    -347
    -348    Parameters
    -349    ----------
    -350    parameter_label : str
    -351        label of the parameters in the toml file
    -352    instance_parameters_class : object
    -353        instance of the parameters class
    -354    parameters_path : str, optional
    -355        path to the parameters file, by default False
    -356
    -357    Raises
    -358    ------
    -359    FileNotFoundError
    -360        if the file is not found
    -361
    -362    Returns
    -363    -------
    -364    object
    -365        instance of the parameters class
    -366    """
    -367    
    -368    if parameters_path: file_path = Path(parameters_path)
    -369
    -370    else: file_path = Path.cwd() / 'SettingsCoreMS.toml' 
    -371        
    -372    if file_path.exists():
    -373        
    -374        with open(file_path, 'r', encoding='utf8',) as stream:
    -375            
    -376            data_loaded = toml.load(stream)
    -377            parameter_class = _set_dict_data(data_loaded, parameter_label, instance_parameters_class)
    -378            
    -379            return parameter_class
    -380    else:
    -381        
    -382        raise FileNotFoundError("Could not locate %s", file_path)  
    +            
    370def load_and_set_toml_parameters_class(
    +371    parameter_label, instance_parameters_class, parameters_path=False
    +372):
    +373    """Load parameters from a toml file and set the parameters in the instance_parameters_class
    +374
    +375    Parameters
    +376    ----------
    +377    parameter_label : str
    +378        label of the parameters in the toml file
    +379    instance_parameters_class : object
    +380        instance of the parameters class
    +381    parameters_path : str, optional
    +382        path to the parameters file, by default False
    +383
    +384    Raises
    +385    ------
    +386    FileNotFoundError
    +387        if the file is not found
    +388
    +389    Returns
    +390    -------
    +391    object
    +392        instance of the parameters class
    +393    """
    +394
    +395    if parameters_path:
    +396        file_path = Path(parameters_path)
    +397
    +398    else:
    +399        file_path = Path.cwd() / "SettingsCoreMS.toml"
    +400
    +401    if file_path.exists():
    +402        with open(
    +403            file_path,
    +404            "r",
    +405            encoding="utf8",
    +406        ) as stream:
    +407            data_loaded = toml.load(stream)
    +408            parameter_class = _set_dict_data(
    +409                data_loaded, parameter_label, instance_parameters_class
    +410            )
    +411
    +412            return parameter_class
    +413    else:
    +414        raise FileNotFoundError("Could not locate %s", file_path)
     
    @@ -1016,44 +1071,51 @@
    Returns
    -
    384def load_and_set_parameters_class(parameter_label, instance_parameters_class, parameters_path=False):  
    -385    """Load parameters from a json file and set the parameters in the instance_parameters_class
    -386
    -387    Parameters
    -388    ----------
    -389    parameter_label : str
    -390        label of the parameters in the json file
    -391    instance_parameters_class : object
    -392        instance of the parameters class
    -393    parameters_path : str, optional
    -394        path to the parameters file, by default False
    -395
    -396    Raises
    -397    ------
    -398    FileNotFoundError
    -399        if the file is not found
    -400
    -401    Returns
    -402    -------
    -403    object
    -404        instance of the parameters class
    -405    """
    -406    
    -407    if parameters_path: file_path = Path(parameters_path)
    -408
    -409    else: file_path = Path.cwd() / 'SettingsCoreMS.json' 
    -410        
    -411    if file_path.exists():
    -412        
    -413        with open(file_path, 'r', encoding='utf8',) as stream:
    -414            
    -415            data_loaded = json.load(stream)
    -416            parameter_class = _set_dict_data(data_loaded, parameter_label, instance_parameters_class)
    -417            
    -418            return parameter_class
    -419    else:
    -420        
    -421        raise FileNotFoundError("Could not locate %s", file_path)  
    +            
    417def load_and_set_parameters_class(
    +418    parameter_label, instance_parameters_class, parameters_path=False
    +419):
    +420    """Load parameters from a json file and set the parameters in the instance_parameters_class
    +421
    +422    Parameters
    +423    ----------
    +424    parameter_label : str
    +425        label of the parameters in the json file
    +426    instance_parameters_class : object
    +427        instance of the parameters class
    +428    parameters_path : str, optional
    +429        path to the parameters file, by default False
    +430
    +431    Raises
    +432    ------
    +433    FileNotFoundError
    +434        if the file is not found
    +435
    +436    Returns
    +437    -------
    +438    object
    +439        instance of the parameters class
    +440    """
    +441
    +442    if parameters_path:
    +443        file_path = Path(parameters_path)
    +444
    +445    else:
    +446        file_path = Path.cwd() / "SettingsCoreMS.json"
    +447
    +448    if file_path.exists():
    +449        with open(
    +450            file_path,
    +451            "r",
    +452            encoding="utf8",
    +453        ) as stream:
    +454            data_loaded = json.load(stream)
    +455            parameter_class = _set_dict_data(
    +456                data_loaded, parameter_label, instance_parameters_class
    +457            )
    +458
    +459            return parameter_class
    +460    else:
    +461        raise FileNotFoundError("Could not locate %s", file_path)
     
    diff --git a/docs/corems/encapsulation/output/parameter_to_dict.html b/docs/corems/encapsulation/output/parameter_to_dict.html index 028ce0af..5e80e647 100644 --- a/docs/corems/encapsulation/output/parameter_to_dict.html +++ b/docs/corems/encapsulation/output/parameter_to_dict.html @@ -72,114 +72,119 @@

    -
      1from corems.encapsulation.factory.parameters import MSParameters, GCMSParameters, LCMSParameters
    -  2
    -  3def get_dict_all_default_data():
    -  4    """ Return a dictionary with all default parameters for MS and GCMS
    -  5    
    -  6    """
    -  7    ms_params = MSParameters(use_defaults=True)
    -  8    gcms_params = GCMSParameters(use_defaults=True)
    -  9
    - 10    return { "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    - 11             "Transient": ms_params.transient.__dict__,
    - 12             "MassSpectrum": ms_params.mass_spectrum.__dict__,
    - 13             "MassSpecPeak": ms_params.ms_peak.__dict__,
    - 14             "DataInput": ms_params.data_input.__dict__,
    - 15             "MolecularSearch": gcms_params.molecular_search.__dict__,
    - 16             "GasChromatograph": gcms_params.gc_ms.__dict__,
    - 17            }
    - 18
    - 19def get_dict_data_lcms(lcms_obj):
    - 20    """ Return a dictionary with all parameters for LCMSBase object
    - 21
    - 22    Parameters
    - 23    ----------
    - 24    lcms_obj: LCMSBase
    - 25        LCMSBase object
    +                        
      1from corems.encapsulation.factory.parameters import (
    +  2    MSParameters,
    +  3    GCMSParameters,
    +  4    LCMSParameters,
    +  5)
    +  6
    +  7
    +  8def get_dict_all_default_data():
    +  9    """Return a dictionary with all default parameters for MS and GCMS"""
    + 10    ms_params = MSParameters(use_defaults=True)
    + 11    gcms_params = GCMSParameters(use_defaults=True)
    + 12
    + 13    return {
    + 14        "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    + 15        "Transient": ms_params.transient.__dict__,
    + 16        "MassSpectrum": ms_params.mass_spectrum.__dict__,
    + 17        "MassSpecPeak": ms_params.ms_peak.__dict__,
    + 18        "DataInput": ms_params.data_input.__dict__,
    + 19        "MolecularSearch": gcms_params.molecular_search.__dict__,
    + 20        "GasChromatograph": gcms_params.gc_ms.__dict__,
    + 21    }
    + 22
    + 23
    + 24def get_dict_data_lcms(lcms_obj):
    + 25    """Return a dictionary with all parameters for LCMSBase object
      26
    - 27    Returns
    - 28    -------
    - 29    dict
    - 30        dictionary with all parameters for LCMSBase object    
    - 31    """
    - 32    output_dict = {}
    - 33    output_dict["LiquidChromatograph"] = lcms_obj.parameters.lc_ms.__dict__
    - 34    output_dict["mass_spectrum"] = {}
    - 35    for key, value in lcms_obj.parameters.mass_spectrum.items():
    - 36        output_dict["mass_spectrum"][key] = {}
    - 37        for k, v in value.__dict__.items():
    - 38            output_dict["mass_spectrum"][key][k] = v.__dict__
    - 39    return output_dict
    - 40
    - 41def get_dict_lcms_default_data():
    - 42    """ Return a dictionary with all default parameters for LCMS
    - 43    
    - 44    """
    - 45    default_params = LCMSParameters(use_defaults=True)
    + 27    Parameters
    + 28    ----------
    + 29    lcms_obj: LCMSBase
    + 30        LCMSBase object
    + 31
    + 32    Returns
    + 33    -------
    + 34    dict
    + 35        dictionary with all parameters for LCMSBase object
    + 36    """
    + 37    output_dict = {}
    + 38    output_dict["LiquidChromatograph"] = lcms_obj.parameters.lc_ms.__dict__
    + 39    output_dict["mass_spectrum"] = {}
    + 40    for key, value in lcms_obj.parameters.mass_spectrum.items():
    + 41        output_dict["mass_spectrum"][key] = {}
    + 42        for k, v in value.__dict__.items():
    + 43            output_dict["mass_spectrum"][key][k] = v.__dict__
    + 44    return output_dict
    + 45
      46
    - 47    output_dict = {}
    - 48    output_dict["LiquidChromatograph"] = default_params.lc_ms.__dict__
    - 49    output_dict["mass_spectrum"] = {}
    - 50    for key, value in default_params.mass_spectrum.items():
    - 51        output_dict["mass_spectrum"][key] = {}
    - 52        for k, v in value.__dict__.items():
    - 53            output_dict["mass_spectrum"][key][k] = v.__dict__
    - 54    return output_dict
    - 55
    - 56def get_dict_data_ms(mass_spec):
    - 57    """ Return a dictionary with all parameters for MassSpectrum object
    - 58
    - 59    Parameters
    - 60    ----------
    - 61    mass_spec: MassSpectrum
    - 62        MassSpectrum object
    - 63    
    - 64    Returns
    - 65    -------
    - 66    dict
    - 67        dictionary with all parameters for MassSpectrum object    
    - 68    """
    - 69    ms_params = mass_spec.parameters
    - 70    return { "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    - 71            "Transient": ms_params.transient.__dict__,
    - 72            "MassSpectrum": ms_params.mass_spectrum.__dict__,
    - 73            "MassSpecPeak": ms_params.ms_peak.__dict__,
    - 74            "DataInput": ms_params.data_input.__dict__,
    - 75            }
    - 76
    - 77                
    - 78def get_dict_ms_default_data():
    - 79    """ Return a dictionary with all default parameters for MS including data input
    - 80    
    - 81    """
    - 82    ms_params = MSParameters(use_defaults=True)
    - 83    
    - 84    return { "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    - 85            "Transient": ms_params.transient.__dict__,
    - 86            "MassSpectrum": ms_params.mass_spectrum.__dict__,
    - 87            "MassSpecPeak": ms_params.ms_peak.__dict__,
    - 88            "DataInput": ms_params.data_input.__dict__,
    - 89            }
    - 90
    - 91def get_dict_gcms_default_data():
    - 92    """ Return a dictionary with all default parameters for GCMS
    - 93    
    - 94    """
    - 95    default_gcms_params = GCMSParameters(use_defaults=True)
    + 47def get_dict_lcms_default_data():
    + 48    """Return a dictionary with all default parameters for LCMS"""
    + 49    default_params = LCMSParameters(use_defaults=True)
    + 50
    + 51    output_dict = {}
    + 52    output_dict["LiquidChromatograph"] = default_params.lc_ms.__dict__
    + 53    output_dict["mass_spectrum"] = {}
    + 54    for key, value in default_params.mass_spectrum.items():
    + 55        output_dict["mass_spectrum"][key] = {}
    + 56        for k, v in value.__dict__.items():
    + 57            output_dict["mass_spectrum"][key][k] = v.__dict__
    + 58    return output_dict
    + 59
    + 60
    + 61def get_dict_data_ms(mass_spec):
    + 62    """Return a dictionary with all parameters for MassSpectrum object
    + 63
    + 64    Parameters
    + 65    ----------
    + 66    mass_spec: MassSpectrum
    + 67        MassSpectrum object
    + 68
    + 69    Returns
    + 70    -------
    + 71    dict
    + 72        dictionary with all parameters for MassSpectrum object
    + 73    """
    + 74    ms_params = mass_spec.parameters
    + 75    return {
    + 76        "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    + 77        "Transient": ms_params.transient.__dict__,
    + 78        "MassSpectrum": ms_params.mass_spectrum.__dict__,
    + 79        "MassSpecPeak": ms_params.ms_peak.__dict__,
    + 80        "DataInput": ms_params.data_input.__dict__,
    + 81    }
    + 82
    + 83
    + 84def get_dict_ms_default_data():
    + 85    """Return a dictionary with all default parameters for MS including data input"""
    + 86    ms_params = MSParameters(use_defaults=True)
    + 87
    + 88    return {
    + 89        "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    + 90        "Transient": ms_params.transient.__dict__,
    + 91        "MassSpectrum": ms_params.mass_spectrum.__dict__,
    + 92        "MassSpecPeak": ms_params.ms_peak.__dict__,
    + 93        "DataInput": ms_params.data_input.__dict__,
    + 94    }
    + 95
      96
    - 97    return { "MolecularSearch": default_gcms_params.molecular_search.__dict__,
    - 98             "GasChromatograph": default_gcms_params.gc_ms.__dict__,
    - 99            }
    + 97def get_dict_gcms_default_data():
    + 98    """Return a dictionary with all default parameters for GCMS"""
    + 99    default_gcms_params = GCMSParameters(use_defaults=True)
     100
    -101def get_dict_data_gcms(gcms):
    -102    """ Return a dictionary with all parameters for GCMS
    -103    
    -104    """
    +101    return {
    +102        "MolecularSearch": default_gcms_params.molecular_search.__dict__,
    +103        "GasChromatograph": default_gcms_params.gc_ms.__dict__,
    +104    }
     105
    -106    return { "MolecularSearch": gcms.molecular_search_settings.__dict__,
    -107             "GasChromatograph":  gcms.chromatogram_settings.__dict__,
    -108            }          
    +106
    +107def get_dict_data_gcms(gcms):
    +108    """Return a dictionary with all parameters for GCMS"""
    +109
    +110    return {
    +111        "MolecularSearch": gcms.molecular_search_settings.__dict__,
    +112        "GasChromatograph": gcms.chromatogram_settings.__dict__,
    +113    }
     
    @@ -195,21 +200,20 @@

    -
     4def get_dict_all_default_data():
    - 5    """ Return a dictionary with all default parameters for MS and GCMS
    - 6    
    - 7    """
    - 8    ms_params = MSParameters(use_defaults=True)
    - 9    gcms_params = GCMSParameters(use_defaults=True)
    -10
    -11    return { "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    -12             "Transient": ms_params.transient.__dict__,
    -13             "MassSpectrum": ms_params.mass_spectrum.__dict__,
    -14             "MassSpecPeak": ms_params.ms_peak.__dict__,
    -15             "DataInput": ms_params.data_input.__dict__,
    -16             "MolecularSearch": gcms_params.molecular_search.__dict__,
    -17             "GasChromatograph": gcms_params.gc_ms.__dict__,
    -18            }
    +            
     9def get_dict_all_default_data():
    +10    """Return a dictionary with all default parameters for MS and GCMS"""
    +11    ms_params = MSParameters(use_defaults=True)
    +12    gcms_params = GCMSParameters(use_defaults=True)
    +13
    +14    return {
    +15        "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    +16        "Transient": ms_params.transient.__dict__,
    +17        "MassSpectrum": ms_params.mass_spectrum.__dict__,
    +18        "MassSpecPeak": ms_params.ms_peak.__dict__,
    +19        "DataInput": ms_params.data_input.__dict__,
    +20        "MolecularSearch": gcms_params.molecular_search.__dict__,
    +21        "GasChromatograph": gcms_params.gc_ms.__dict__,
    +22    }
     
    @@ -229,27 +233,27 @@

    -
    20def get_dict_data_lcms(lcms_obj):
    -21    """ Return a dictionary with all parameters for LCMSBase object
    -22
    -23    Parameters
    -24    ----------
    -25    lcms_obj: LCMSBase
    -26        LCMSBase object
    +            
    25def get_dict_data_lcms(lcms_obj):
    +26    """Return a dictionary with all parameters for LCMSBase object
     27
    -28    Returns
    -29    -------
    -30    dict
    -31        dictionary with all parameters for LCMSBase object    
    -32    """
    -33    output_dict = {}
    -34    output_dict["LiquidChromatograph"] = lcms_obj.parameters.lc_ms.__dict__
    -35    output_dict["mass_spectrum"] = {}
    -36    for key, value in lcms_obj.parameters.mass_spectrum.items():
    -37        output_dict["mass_spectrum"][key] = {}
    -38        for k, v in value.__dict__.items():
    -39            output_dict["mass_spectrum"][key][k] = v.__dict__
    -40    return output_dict
    +28    Parameters
    +29    ----------
    +30    lcms_obj: LCMSBase
    +31        LCMSBase object
    +32
    +33    Returns
    +34    -------
    +35    dict
    +36        dictionary with all parameters for LCMSBase object
    +37    """
    +38    output_dict = {}
    +39    output_dict["LiquidChromatograph"] = lcms_obj.parameters.lc_ms.__dict__
    +40    output_dict["mass_spectrum"] = {}
    +41    for key, value in lcms_obj.parameters.mass_spectrum.items():
    +42        output_dict["mass_spectrum"][key] = {}
    +43        for k, v in value.__dict__.items():
    +44            output_dict["mass_spectrum"][key][k] = v.__dict__
    +45    return output_dict
     
    @@ -282,20 +286,18 @@
    Returns
    -
    42def get_dict_lcms_default_data():
    -43    """ Return a dictionary with all default parameters for LCMS
    -44    
    -45    """
    -46    default_params = LCMSParameters(use_defaults=True)
    -47
    -48    output_dict = {}
    -49    output_dict["LiquidChromatograph"] = default_params.lc_ms.__dict__
    -50    output_dict["mass_spectrum"] = {}
    -51    for key, value in default_params.mass_spectrum.items():
    -52        output_dict["mass_spectrum"][key] = {}
    -53        for k, v in value.__dict__.items():
    -54            output_dict["mass_spectrum"][key][k] = v.__dict__
    -55    return output_dict
    +            
    48def get_dict_lcms_default_data():
    +49    """Return a dictionary with all default parameters for LCMS"""
    +50    default_params = LCMSParameters(use_defaults=True)
    +51
    +52    output_dict = {}
    +53    output_dict["LiquidChromatograph"] = default_params.lc_ms.__dict__
    +54    output_dict["mass_spectrum"] = {}
    +55    for key, value in default_params.mass_spectrum.items():
    +56        output_dict["mass_spectrum"][key] = {}
    +57        for k, v in value.__dict__.items():
    +58            output_dict["mass_spectrum"][key][k] = v.__dict__
    +59    return output_dict
     
    @@ -315,26 +317,27 @@
    Returns
    -
    57def get_dict_data_ms(mass_spec):
    -58    """ Return a dictionary with all parameters for MassSpectrum object
    -59
    -60    Parameters
    -61    ----------
    -62    mass_spec: MassSpectrum
    -63        MassSpectrum object
    -64    
    -65    Returns
    -66    -------
    -67    dict
    -68        dictionary with all parameters for MassSpectrum object    
    -69    """
    -70    ms_params = mass_spec.parameters
    -71    return { "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    -72            "Transient": ms_params.transient.__dict__,
    -73            "MassSpectrum": ms_params.mass_spectrum.__dict__,
    -74            "MassSpecPeak": ms_params.ms_peak.__dict__,
    -75            "DataInput": ms_params.data_input.__dict__,
    -76            }
    +            
    62def get_dict_data_ms(mass_spec):
    +63    """Return a dictionary with all parameters for MassSpectrum object
    +64
    +65    Parameters
    +66    ----------
    +67    mass_spec: MassSpectrum
    +68        MassSpectrum object
    +69
    +70    Returns
    +71    -------
    +72    dict
    +73        dictionary with all parameters for MassSpectrum object
    +74    """
    +75    ms_params = mass_spec.parameters
    +76    return {
    +77        "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    +78        "Transient": ms_params.transient.__dict__,
    +79        "MassSpectrum": ms_params.mass_spectrum.__dict__,
    +80        "MassSpecPeak": ms_params.ms_peak.__dict__,
    +81        "DataInput": ms_params.data_input.__dict__,
    +82    }
     
    @@ -367,18 +370,17 @@
    Returns
    -
    79def get_dict_ms_default_data():
    -80    """ Return a dictionary with all default parameters for MS including data input
    -81    
    -82    """
    -83    ms_params = MSParameters(use_defaults=True)
    -84    
    -85    return { "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    -86            "Transient": ms_params.transient.__dict__,
    -87            "MassSpectrum": ms_params.mass_spectrum.__dict__,
    -88            "MassSpecPeak": ms_params.ms_peak.__dict__,
    -89            "DataInput": ms_params.data_input.__dict__,
    -90            }
    +            
    85def get_dict_ms_default_data():
    +86    """Return a dictionary with all default parameters for MS including data input"""
    +87    ms_params = MSParameters(use_defaults=True)
    +88
    +89    return {
    +90        "MolecularFormulaSearch": ms_params.molecular_search.__dict__,
    +91        "Transient": ms_params.transient.__dict__,
    +92        "MassSpectrum": ms_params.mass_spectrum.__dict__,
    +93        "MassSpecPeak": ms_params.ms_peak.__dict__,
    +94        "DataInput": ms_params.data_input.__dict__,
    +95    }
     
    @@ -398,15 +400,14 @@
    Returns
    -
     92def get_dict_gcms_default_data():
    - 93    """ Return a dictionary with all default parameters for GCMS
    - 94    
    - 95    """
    - 96    default_gcms_params = GCMSParameters(use_defaults=True)
    - 97
    - 98    return { "MolecularSearch": default_gcms_params.molecular_search.__dict__,
    - 99             "GasChromatograph": default_gcms_params.gc_ms.__dict__,
    -100            }
    +            
     98def get_dict_gcms_default_data():
    + 99    """Return a dictionary with all default parameters for GCMS"""
    +100    default_gcms_params = GCMSParameters(use_defaults=True)
    +101
    +102    return {
    +103        "MolecularSearch": default_gcms_params.molecular_search.__dict__,
    +104        "GasChromatograph": default_gcms_params.gc_ms.__dict__,
    +105    }
     
    @@ -426,14 +427,13 @@
    Returns
    -
    102def get_dict_data_gcms(gcms):
    -103    """ Return a dictionary with all parameters for GCMS
    -104    
    -105    """
    -106
    -107    return { "MolecularSearch": gcms.molecular_search_settings.__dict__,
    -108             "GasChromatograph":  gcms.chromatogram_settings.__dict__,
    -109            }          
    +            
    108def get_dict_data_gcms(gcms):
    +109    """Return a dictionary with all parameters for GCMS"""
    +110
    +111    return {
    +112        "MolecularSearch": gcms.molecular_search_settings.__dict__,
    +113        "GasChromatograph": gcms.chromatogram_settings.__dict__,
    +114    }
     
    diff --git a/docs/corems/encapsulation/output/parameter_to_json.html b/docs/corems/encapsulation/output/parameter_to_json.html index 60f6b65a..63f19a9b 100644 --- a/docs/corems/encapsulation/output/parameter_to_json.html +++ b/docs/corems/encapsulation/output/parameter_to_json.html @@ -81,207 +81,261 @@

    4from pathlib import Path 5 6from corems.encapsulation.output import parameter_to_dict - 7from corems.encapsulation.output.parameter_to_dict import get_dict_data_lcms, get_dict_lcms_default_data + 7from corems.encapsulation.output.parameter_to_dict import get_dict_data_lcms 8 - 9def dump_all_settings_json(filename='SettingsCoreMS.json', file_path=None): - 10 """ - 11 Write JSON file into current directory with all the default settings for the CoreMS package. - 12 - 13 Parameters: - 14 ---------- - 15 filename : str, optional - 16 The name of the JSON file to be created. Default is 'SettingsCoreMS.json'. - 17 file_path : str or Path, optional - 18 The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory. - 19 """ - 20 - 21 data_dict_all = parameter_to_dict.get_dict_all_default_data() - 22 - 23 if not file_path: - 24 file_path = Path.cwd() / filename - 25 - 26 with open(file_path, 'w', encoding='utf8', ) as outfile: - 27 - 28 import re - 29 #pretty print - 30 output = json.dumps(data_dict_all, sort_keys=False, indent=4, separators=(',', ': ')) - 31 output = re.sub(r'",\s+', '", ', output) - 32 - 33 outfile.write(output) - 34 - 35def dump_ms_settings_json(filename='SettingsCoreMS.json', file_path=None): - 36 """ - 37 Write JSON file into current directory with all the mass spectrum default settings for the CoreMS package. - 38 - 39 Parameters - 40 ---------- - 41 filename : str, optional - 42 The name of the JSON file to be created. Default is 'SettingsCoreMS.json'. - 43 file_path : str or Path, optional - 44 The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory. - 45 - 46 """ - 47 data_dict = parameter_to_dict.get_dict_ms_default_data() - 48 if not file_path: - 49 file_path = Path.cwd() / filename - 50 - 51 with open(file_path, 'w', encoding='utf8', ) as outfile: - 52 import re - 53 #pretty print - 54 output = json.dumps(data_dict, sort_keys=False, indent=4, separators=(',', ': ')) - 55 output = re.sub(r'",\s+', '", ', output) - 56 - 57 outfile.write(output) + 9 + 10def dump_all_settings_json(filename="SettingsCoreMS.json", file_path=None): + 11 """ + 12 Write JSON file into current directory with all the default settings for the CoreMS package. + 13 + 14 Parameters: + 15 ---------- + 16 filename : str, optional + 17 The name of the JSON file to be created. Default is 'SettingsCoreMS.json'. + 18 file_path : str or Path, optional + 19 The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory. + 20 """ + 21 + 22 data_dict_all = parameter_to_dict.get_dict_all_default_data() + 23 + 24 if not file_path: + 25 file_path = Path.cwd() / filename + 26 + 27 with open( + 28 file_path, + 29 "w", + 30 encoding="utf8", + 31 ) as outfile: + 32 import re + 33 + 34 # pretty print + 35 output = json.dumps( + 36 data_dict_all, sort_keys=False, indent=4, separators=(",", ": ") + 37 ) + 38 output = re.sub(r'",\s+', '", ', output) + 39 + 40 outfile.write(output) + 41 + 42 + 43def dump_ms_settings_json(filename="SettingsCoreMS.json", file_path=None): + 44 """ + 45 Write JSON file into current directory with all the mass spectrum default settings for the CoreMS package. + 46 + 47 Parameters + 48 ---------- + 49 filename : str, optional + 50 The name of the JSON file to be created. Default is 'SettingsCoreMS.json'. + 51 file_path : str or Path, optional + 52 The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory. + 53 + 54 """ + 55 data_dict = parameter_to_dict.get_dict_ms_default_data() + 56 if not file_path: + 57 file_path = Path.cwd() / filename 58 - 59def dump_gcms_settings_json(filename='SettingsCoreMS.json', file_path=None): - 60 """ - 61 Write JSON file into current directory containing the default GCMS settings data. - 62 - 63 Parameters - 64 ---------- - 65 filename : str, optional - 66 The name of the JSON file to be created. Default is 'SettingsCoreMS.json'. - 67 file_path : str or Path-like object, optional - 68 The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory. - 69 """ - 70 - 71 from pathlib import Path - 72 import json - 73 - 74 data_dict = parameter_to_dict.get_dict_gcms_default_data() - 75 - 76 if not file_path: - 77 file_path = Path.cwd() / filename - 78 - 79 with open(file_path, 'w', encoding='utf8', ) as outfile: - 80 import re - 81 #pretty print - 82 output = json.dumps(data_dict, sort_keys=False, indent=4, separators=(',', ': ')) - 83 output = re.sub(r'",\s+', '", ', output) - 84 - 85 outfile.write(output) + 59 with open( + 60 file_path, + 61 "w", + 62 encoding="utf8", + 63 ) as outfile: + 64 import re + 65 + 66 # pretty print + 67 output = json.dumps( + 68 data_dict, sort_keys=False, indent=4, separators=(",", ": ") + 69 ) + 70 output = re.sub(r'",\s+', '", ', output) + 71 + 72 outfile.write(output) + 73 + 74 + 75def dump_gcms_settings_json(filename="SettingsCoreMS.json", file_path=None): + 76 """ + 77 Write JSON file into current directory containing the default GCMS settings data. + 78 + 79 Parameters + 80 ---------- + 81 filename : str, optional + 82 The name of the JSON file to be created. Default is 'SettingsCoreMS.json'. + 83 file_path : str or Path-like object, optional + 84 The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory. + 85 """ 86 - 87def dump_all_settings_toml(filename='SettingsCoreMS.toml', file_path=None): - 88 """ - 89 Write TOML file into the specified file path or the current directory with all the default settings for the CoreMS package. - 90 - 91 Parameters - 92 ---------- - 93 filename : str, optional - 94 The name of the TOML file. Defaults to 'SettingsCoreMS.toml'. - 95 file_path : str or Path, optional - 96 The path where the TOML file will be saved. If not provided, the file will be saved in the current directory. - 97 - 98 """ - 99 from pathlib import Path -100 -101 data_dict_all = parameter_to_dict.get_dict_all_default_data() -102 -103 if not file_path: -104 file_path = Path.cwd() / filename -105 -106 with open(file_path, 'w', encoding='utf8', ) as outfile: -107 import re -108 output = toml.dumps(data_dict_all) -109 outfile.write(output) + 87 from pathlib import Path + 88 import json + 89 + 90 data_dict = parameter_to_dict.get_dict_gcms_default_data() + 91 + 92 if not file_path: + 93 file_path = Path.cwd() / filename + 94 + 95 with open( + 96 file_path, + 97 "w", + 98 encoding="utf8", + 99 ) as outfile: +100 import re +101 +102 # pretty print +103 output = json.dumps( +104 data_dict, sort_keys=False, indent=4, separators=(",", ": ") +105 ) +106 output = re.sub(r'",\s+', '", ', output) +107 +108 outfile.write(output) +109 110 -111def dump_ms_settings_toml(filename='SettingsCoreMS.toml', file_path=None): +111def dump_all_settings_toml(filename="SettingsCoreMS.toml", file_path=None): 112 """ -113 Write TOML file into the current directory with all the mass spectrum default settings for the CoreMS package. +113 Write TOML file into the specified file path or the current directory with all the default settings for the CoreMS package. 114 115 Parameters 116 ---------- 117 filename : str, optional -118 The name of the TOML file to be created. Default is 'SettingsCoreMS.toml'. +118 The name of the TOML file. Defaults to 'SettingsCoreMS.toml'. 119 file_path : str or Path, optional -120 The path where the TOML file should be saved. If not provided, the file will be saved in the current working directory. +120 The path where the TOML file will be saved. If not provided, the file will be saved in the current directory. 121 122 """ -123 data_dict = parameter_to_dict.get_dict_ms_default_data() +123 from pathlib import Path 124 -125 if not file_path: -126 file_path = Path.cwd() / filename -127 -128 with open(file_path, 'w', encoding='utf8', ) as outfile: -129 import re -130 # pretty print -131 output = toml.dumps(data_dict) -132 outfile.write(output) -133 -134def dump_gcms_settings_toml(filename='SettingsCoreMS.toml', file_path=None): -135 """ -136 Write TOML file into current directory containing the default GCMS settings data. -137 -138 Parameters -139 ---------- -140 filename : str, optional -141 The name of the TOML file. Defaults to 'SettingsCoreMS.toml'. -142 file_path : str or Path, optional -143 The path where the TOML file will be saved. If not provided, the file will be saved in the current working directory. -144 -145 """ -146 -147 data_dict = parameter_to_dict.get_dict_gcms_default_data() -148 -149 if not file_path: -150 file_path = Path.cwd() / filename -151 -152 with open(file_path, 'w', encoding='utf8', ) as outfile: -153 output = toml.dumps(data_dict) -154 outfile.write(output) -155 -156def dump_lcms_settings_json(filename='SettingsCoreMS.json', file_path=None, lcms_obj=None): -157 """ -158 Write JSON file into current directory with all the LCMS settings data for the CoreMS package. -159 -160 Parameters -161 ---------- -162 filename : str, optional -163 The name of the JSON file. Defaults to 'SettingsCoreMS.json'. -164 file_path : str or Path, optional -165 The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory. -166 lcms_obj : object, optional -167 The LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings. +125 data_dict_all = parameter_to_dict.get_dict_all_default_data() +126 +127 if not file_path: +128 file_path = Path.cwd() / filename +129 +130 with open( +131 file_path, +132 "w", +133 encoding="utf8", +134 ) as outfile: +135 import re +136 +137 output = toml.dumps(data_dict_all) +138 outfile.write(output) +139 +140 +141def dump_ms_settings_toml(filename="SettingsCoreMS.toml", file_path=None): +142 """ +143 Write TOML file into the current directory with all the mass spectrum default settings for the CoreMS package. +144 +145 Parameters +146 ---------- +147 filename : str, optional +148 The name of the TOML file to be created. Default is 'SettingsCoreMS.toml'. +149 file_path : str or Path, optional +150 The path where the TOML file should be saved. If not provided, the file will be saved in the current working directory. +151 +152 """ +153 data_dict = parameter_to_dict.get_dict_ms_default_data() +154 +155 if not file_path: +156 file_path = Path.cwd() / filename +157 +158 with open( +159 file_path, +160 "w", +161 encoding="utf8", +162 ) as outfile: +163 import re +164 +165 # pretty print +166 output = toml.dumps(data_dict) +167 outfile.write(output) 168 -169 """ -170 -171 if lcms_obj is None: -172 data_dict = parameter_to_dict.get_dict_lcms_default_data() -173 else: -174 data_dict = get_dict_data_lcms(lcms_obj) -175 -176 if not file_path: -177 file_path = Path.cwd() / filename -178 -179 with open(file_path, 'w', encoding='utf8', ) as outfile: -180 outfile.write(json.dumps(data_dict, indent=4)) -181 -182def dump_lcms_settings_toml(filename='SettingsCoreMS.toml', file_path=None, lcms_obj=None): -183 """ -184 Write TOML file into current directory with all the LCMS settings data for the CoreMS package. -185 -186 Parameters -187 ---------- -188 filename : str, optional -189 The name of the TOML file. Defaults to 'SettingsCoreMS.toml'. -190 file_path : str or Path, optional -191 The path where the TOML file will be saved. If not provided, the file will be saved in the current working directory. -192 lcms_obj : object, optional -193 The LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings. -194 -195 """ -196 -197 if lcms_obj is None: -198 data_dict = parameter_to_dict.get_dict_lcms_default_data() -199 else: -200 data_dict = get_dict_data_lcms(lcms_obj) -201 -202 if not file_path: -203 file_path = Path.cwd() / filename -204 -205 with open(file_path, 'w', encoding='utf8', ) as outfile: -206 output = toml.dumps(data_dict) -207 outfile.write(output) +169 +170def dump_gcms_settings_toml(filename="SettingsCoreMS.toml", file_path=None): +171 """ +172 Write TOML file into current directory containing the default GCMS settings data. +173 +174 Parameters +175 ---------- +176 filename : str, optional +177 The name of the TOML file. Defaults to 'SettingsCoreMS.toml'. +178 file_path : str or Path, optional +179 The path where the TOML file will be saved. If not provided, the file will be saved in the current working directory. +180 +181 """ +182 +183 data_dict = parameter_to_dict.get_dict_gcms_default_data() +184 +185 if not file_path: +186 file_path = Path.cwd() / filename +187 +188 with open( +189 file_path, +190 "w", +191 encoding="utf8", +192 ) as outfile: +193 output = toml.dumps(data_dict) +194 outfile.write(output) +195 +196 +197def dump_lcms_settings_json( +198 filename="SettingsCoreMS.json", file_path=None, lcms_obj=None +199): +200 """ +201 Write JSON file into current directory with all the LCMS settings data for the CoreMS package. +202 +203 Parameters +204 ---------- +205 filename : str, optional +206 The name of the JSON file. Defaults to 'SettingsCoreMS.json'. +207 file_path : str or Path, optional +208 The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory. +209 lcms_obj : object, optional +210 The LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings. +211 +212 """ +213 +214 if lcms_obj is None: +215 data_dict = parameter_to_dict.get_dict_lcms_default_data() +216 else: +217 data_dict = get_dict_data_lcms(lcms_obj) +218 +219 if not file_path: +220 file_path = Path.cwd() / filename +221 +222 with open( +223 file_path, +224 "w", +225 encoding="utf8", +226 ) as outfile: +227 outfile.write(json.dumps(data_dict, indent=4)) +228 +229 +230def dump_lcms_settings_toml( +231 filename="SettingsCoreMS.toml", file_path=None, lcms_obj=None +232): +233 """ +234 Write TOML file into current directory with all the LCMS settings data for the CoreMS package. +235 +236 Parameters +237 ---------- +238 filename : str, optional +239 The name of the TOML file. Defaults to 'SettingsCoreMS.toml'. +240 file_path : str or Path, optional +241 The path where the TOML file will be saved. If not provided, the file will be saved in the current working directory. +242 lcms_obj : object, optional +243 The LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings. +244 +245 """ +246 +247 if lcms_obj is None: +248 data_dict = parameter_to_dict.get_dict_lcms_default_data() +249 else: +250 data_dict = get_dict_data_lcms(lcms_obj) +251 +252 if not file_path: +253 file_path = Path.cwd() / filename +254 +255 with open( +256 file_path, +257 "w", +258 encoding="utf8", +259 ) as outfile: +260 output = toml.dumps(data_dict) +261 outfile.write(output)

    @@ -297,31 +351,37 @@

    -
    10def dump_all_settings_json(filename='SettingsCoreMS.json', file_path=None):
    -11    """
    -12    Write JSON file into current directory with all the default settings for the CoreMS package.
    -13    
    -14    Parameters:
    -15    ----------
    -16    filename : str, optional
    -17        The name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    -18    file_path : str or Path, optional
    -19        The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    -20    """
    -21    
    -22    data_dict_all = parameter_to_dict.get_dict_all_default_data()
    -23    
    -24    if not file_path:
    -25        file_path = Path.cwd() / filename 
    -26    
    -27    with open(file_path, 'w', encoding='utf8', ) as outfile:
    -28            
    -29        import re
    -30        #pretty print 
    -31        output = json.dumps(data_dict_all, sort_keys=False, indent=4, separators=(',', ': '))
    -32        output = re.sub(r'",\s+', '", ', output)
    -33        
    -34        outfile.write(output)
    +            
    11def dump_all_settings_json(filename="SettingsCoreMS.json", file_path=None):
    +12    """
    +13    Write JSON file into current directory with all the default settings for the CoreMS package.
    +14
    +15    Parameters:
    +16    ----------
    +17    filename : str, optional
    +18        The name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    +19    file_path : str or Path, optional
    +20        The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    +21    """
    +22
    +23    data_dict_all = parameter_to_dict.get_dict_all_default_data()
    +24
    +25    if not file_path:
    +26        file_path = Path.cwd() / filename
    +27
    +28    with open(
    +29        file_path,
    +30        "w",
    +31        encoding="utf8",
    +32    ) as outfile:
    +33        import re
    +34
    +35        # pretty print
    +36        output = json.dumps(
    +37            data_dict_all, sort_keys=False, indent=4, separators=(",", ": ")
    +38        )
    +39        output = re.sub(r'",\s+', '", ', output)
    +40
    +41        outfile.write(output)
     
    @@ -348,29 +408,36 @@

    Parameters:

    -
    36def dump_ms_settings_json(filename='SettingsCoreMS.json', file_path=None):
    -37    """
    -38    Write JSON file into current directory with all the mass spectrum default settings for the CoreMS package.
    -39
    -40    Parameters
    -41    ----------
    -42    filename : str, optional
    -43        The name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    -44    file_path : str or Path, optional
    -45        The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    -46
    -47    """
    -48    data_dict = parameter_to_dict.get_dict_ms_default_data()
    -49    if not file_path:
    -50        file_path = Path.cwd() / filename 
    -51    
    -52    with open(file_path, 'w', encoding='utf8', ) as outfile:
    -53        import re
    -54        #pretty print 
    -55        output = json.dumps(data_dict, sort_keys=False, indent=4, separators=(',', ': '))
    -56        output = re.sub(r'",\s+', '", ', output)
    -57        
    -58        outfile.write(output)
    +            
    44def dump_ms_settings_json(filename="SettingsCoreMS.json", file_path=None):
    +45    """
    +46    Write JSON file into current directory with all the mass spectrum default settings for the CoreMS package.
    +47
    +48    Parameters
    +49    ----------
    +50    filename : str, optional
    +51        The name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    +52    file_path : str or Path, optional
    +53        The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    +54
    +55    """
    +56    data_dict = parameter_to_dict.get_dict_ms_default_data()
    +57    if not file_path:
    +58        file_path = Path.cwd() / filename
    +59
    +60    with open(
    +61        file_path,
    +62        "w",
    +63        encoding="utf8",
    +64    ) as outfile:
    +65        import re
    +66
    +67        # pretty print
    +68        output = json.dumps(
    +69            data_dict, sort_keys=False, indent=4, separators=(",", ": ")
    +70        )
    +71        output = re.sub(r'",\s+', '", ', output)
    +72
    +73        outfile.write(output)
     
    @@ -399,33 +466,40 @@
    Parameters
    -
    60def dump_gcms_settings_json(filename='SettingsCoreMS.json', file_path=None):
    -61    """
    -62    Write JSON file into current directory containing the default GCMS settings data.
    -63
    -64    Parameters
    -65    ----------
    -66    filename : str, optional
    -67        The name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    -68    file_path : str or Path-like object, optional
    -69        The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    -70    """
    -71
    -72    from pathlib import Path
    -73    import json
    -74    
    -75    data_dict = parameter_to_dict.get_dict_gcms_default_data()
    -76
    -77    if not file_path:
    -78        file_path = Path.cwd() / filename 
    -79    
    -80    with open(file_path, 'w', encoding='utf8', ) as outfile:
    -81        import re
    -82        #pretty print 
    -83        output = json.dumps(data_dict, sort_keys=False, indent=4, separators=(',', ': '))
    -84        output = re.sub(r'",\s+', '", ', output)
    -85        
    -86        outfile.write(output)
    +            
     76def dump_gcms_settings_json(filename="SettingsCoreMS.json", file_path=None):
    + 77    """
    + 78    Write JSON file into current directory containing the default GCMS settings data.
    + 79
    + 80    Parameters
    + 81    ----------
    + 82    filename : str, optional
    + 83        The name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    + 84    file_path : str or Path-like object, optional
    + 85        The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    + 86    """
    + 87
    + 88    from pathlib import Path
    + 89    import json
    + 90
    + 91    data_dict = parameter_to_dict.get_dict_gcms_default_data()
    + 92
    + 93    if not file_path:
    + 94        file_path = Path.cwd() / filename
    + 95
    + 96    with open(
    + 97        file_path,
    + 98        "w",
    + 99        encoding="utf8",
    +100    ) as outfile:
    +101        import re
    +102
    +103        # pretty print
    +104        output = json.dumps(
    +105            data_dict, sort_keys=False, indent=4, separators=(",", ": ")
    +106        )
    +107        output = re.sub(r'",\s+', '", ', output)
    +108
    +109        outfile.write(output)
     
    @@ -454,29 +528,34 @@
    Parameters
    -
     88def dump_all_settings_toml(filename='SettingsCoreMS.toml', file_path=None):
    - 89    """
    - 90    Write TOML file into the specified file path or the current directory with all the default settings for the CoreMS package.
    - 91
    - 92    Parameters
    - 93    ----------
    - 94    filename : str, optional
    - 95        The name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    - 96    file_path : str or Path, optional
    - 97        The path where the TOML file will be saved. If not provided, the file will be saved in the current directory.
    - 98
    - 99    """
    -100    from pathlib import Path
    -101    
    -102    data_dict_all = parameter_to_dict.get_dict_all_default_data()
    -103    
    -104    if not file_path:
    -105        file_path = Path.cwd() / filename
    -106    
    -107    with open(file_path, 'w', encoding='utf8', ) as outfile:
    -108        import re
    -109        output = toml.dumps(data_dict_all)
    -110        outfile.write(output)
    +            
    112def dump_all_settings_toml(filename="SettingsCoreMS.toml", file_path=None):
    +113    """
    +114    Write TOML file into the specified file path or the current directory with all the default settings for the CoreMS package.
    +115
    +116    Parameters
    +117    ----------
    +118    filename : str, optional
    +119        The name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    +120    file_path : str or Path, optional
    +121        The path where the TOML file will be saved. If not provided, the file will be saved in the current directory.
    +122
    +123    """
    +124    from pathlib import Path
    +125
    +126    data_dict_all = parameter_to_dict.get_dict_all_default_data()
    +127
    +128    if not file_path:
    +129        file_path = Path.cwd() / filename
    +130
    +131    with open(
    +132        file_path,
    +133        "w",
    +134        encoding="utf8",
    +135    ) as outfile:
    +136        import re
    +137
    +138        output = toml.dumps(data_dict_all)
    +139        outfile.write(output)
     
    @@ -505,28 +584,33 @@
    Parameters
    -
    112def dump_ms_settings_toml(filename='SettingsCoreMS.toml', file_path=None):
    -113    """
    -114    Write TOML file into the current directory with all the mass spectrum default settings for the CoreMS package.
    -115
    -116    Parameters
    -117    ----------
    -118    filename : str, optional
    -119        The name of the TOML file to be created. Default is 'SettingsCoreMS.toml'.
    -120    file_path : str or Path, optional
    -121        The path where the TOML file should be saved. If not provided, the file will be saved in the current working directory.
    -122
    -123    """
    -124    data_dict = parameter_to_dict.get_dict_ms_default_data()
    -125
    -126    if not file_path:
    -127        file_path = Path.cwd() / filename 
    -128    
    -129    with open(file_path, 'w', encoding='utf8', ) as outfile:
    -130        import re
    -131        # pretty print 
    -132        output = toml.dumps(data_dict)
    -133        outfile.write(output)
    +            
    142def dump_ms_settings_toml(filename="SettingsCoreMS.toml", file_path=None):
    +143    """
    +144    Write TOML file into the current directory with all the mass spectrum default settings for the CoreMS package.
    +145
    +146    Parameters
    +147    ----------
    +148    filename : str, optional
    +149        The name of the TOML file to be created. Default is 'SettingsCoreMS.toml'.
    +150    file_path : str or Path, optional
    +151        The path where the TOML file should be saved. If not provided, the file will be saved in the current working directory.
    +152
    +153    """
    +154    data_dict = parameter_to_dict.get_dict_ms_default_data()
    +155
    +156    if not file_path:
    +157        file_path = Path.cwd() / filename
    +158
    +159    with open(
    +160        file_path,
    +161        "w",
    +162        encoding="utf8",
    +163    ) as outfile:
    +164        import re
    +165
    +166        # pretty print
    +167        output = toml.dumps(data_dict)
    +168        outfile.write(output)
     
    @@ -555,27 +639,31 @@
    Parameters
    -
    135def dump_gcms_settings_toml(filename='SettingsCoreMS.toml', file_path=None):
    -136    """
    -137    Write TOML file into current directory containing the default GCMS settings data.
    -138
    -139    Parameters
    -140    ----------
    -141    filename : str, optional
    -142        The name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    -143    file_path : str or Path, optional
    -144        The path where the TOML file will be saved. If not provided, the file will be saved in the current working directory.
    -145    
    -146    """
    -147
    -148    data_dict = parameter_to_dict.get_dict_gcms_default_data()
    -149
    -150    if not file_path:
    -151        file_path = Path.cwd() / filename 
    -152    
    -153    with open(file_path, 'w', encoding='utf8', ) as outfile:
    -154        output = toml.dumps(data_dict)
    -155        outfile.write(output)
    +            
    171def dump_gcms_settings_toml(filename="SettingsCoreMS.toml", file_path=None):
    +172    """
    +173    Write TOML file into current directory containing the default GCMS settings data.
    +174
    +175    Parameters
    +176    ----------
    +177    filename : str, optional
    +178        The name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    +179    file_path : str or Path, optional
    +180        The path where the TOML file will be saved. If not provided, the file will be saved in the current working directory.
    +181
    +182    """
    +183
    +184    data_dict = parameter_to_dict.get_dict_gcms_default_data()
    +185
    +186    if not file_path:
    +187        file_path = Path.cwd() / filename
    +188
    +189    with open(
    +190        file_path,
    +191        "w",
    +192        encoding="utf8",
    +193    ) as outfile:
    +194        output = toml.dumps(data_dict)
    +195        outfile.write(output)
     
    @@ -604,31 +692,37 @@
    Parameters
    -
    157def dump_lcms_settings_json(filename='SettingsCoreMS.json', file_path=None, lcms_obj=None):
    -158    """
    -159    Write JSON file into current directory with all the LCMS settings data for the CoreMS package.
    -160
    -161    Parameters
    -162    ----------
    -163    filename : str, optional
    -164        The name of the JSON file. Defaults to 'SettingsCoreMS.json'.
    -165    file_path : str or Path, optional
    -166        The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    -167    lcms_obj : object, optional
    -168        The LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings.
    -169
    -170    """
    -171   
    -172    if lcms_obj is None:
    -173        data_dict = parameter_to_dict.get_dict_lcms_default_data()
    -174    else:
    -175        data_dict = get_dict_data_lcms(lcms_obj)
    -176    
    -177    if not file_path:
    -178        file_path = Path.cwd() / filename 
    -179    
    -180    with open(file_path, 'w', encoding='utf8', ) as outfile:
    -181        outfile.write(json.dumps(data_dict, indent=4))
    +            
    198def dump_lcms_settings_json(
    +199    filename="SettingsCoreMS.json", file_path=None, lcms_obj=None
    +200):
    +201    """
    +202    Write JSON file into current directory with all the LCMS settings data for the CoreMS package.
    +203
    +204    Parameters
    +205    ----------
    +206    filename : str, optional
    +207        The name of the JSON file. Defaults to 'SettingsCoreMS.json'.
    +208    file_path : str or Path, optional
    +209        The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    +210    lcms_obj : object, optional
    +211        The LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings.
    +212
    +213    """
    +214
    +215    if lcms_obj is None:
    +216        data_dict = parameter_to_dict.get_dict_lcms_default_data()
    +217    else:
    +218        data_dict = get_dict_data_lcms(lcms_obj)
    +219
    +220    if not file_path:
    +221        file_path = Path.cwd() / filename
    +222
    +223    with open(
    +224        file_path,
    +225        "w",
    +226        encoding="utf8",
    +227    ) as outfile:
    +228        outfile.write(json.dumps(data_dict, indent=4))
     
    @@ -659,32 +753,38 @@
    Parameters
    -
    183def dump_lcms_settings_toml(filename='SettingsCoreMS.toml', file_path=None, lcms_obj=None):
    -184    """
    -185    Write TOML file into current directory with all the LCMS settings data for the CoreMS package.
    -186
    -187    Parameters
    -188    ----------
    -189    filename : str, optional
    -190        The name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    -191    file_path : str or Path, optional
    -192        The path where the TOML file will be saved. If not provided, the file will be saved in the current working directory.
    -193    lcms_obj : object, optional
    -194        The LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings.
    -195
    -196    """
    -197    
    -198    if lcms_obj is None:
    -199        data_dict = parameter_to_dict.get_dict_lcms_default_data()
    -200    else:
    -201        data_dict = get_dict_data_lcms(lcms_obj)
    -202    
    -203    if not file_path:
    -204        file_path = Path.cwd() / filename 
    -205    
    -206    with open(file_path, 'w', encoding='utf8', ) as outfile:
    -207        output = toml.dumps(data_dict)
    -208        outfile.write(output)
    +            
    231def dump_lcms_settings_toml(
    +232    filename="SettingsCoreMS.toml", file_path=None, lcms_obj=None
    +233):
    +234    """
    +235    Write TOML file into current directory with all the LCMS settings data for the CoreMS package.
    +236
    +237    Parameters
    +238    ----------
    +239    filename : str, optional
    +240        The name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    +241    file_path : str or Path, optional
    +242        The path where the TOML file will be saved. If not provided, the file will be saved in the current working directory.
    +243    lcms_obj : object, optional
    +244        The LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings.
    +245
    +246    """
    +247
    +248    if lcms_obj is None:
    +249        data_dict = parameter_to_dict.get_dict_lcms_default_data()
    +250    else:
    +251        data_dict = get_dict_data_lcms(lcms_obj)
    +252
    +253    if not file_path:
    +254        file_path = Path.cwd() / filename
    +255
    +256    with open(
    +257        file_path,
    +258        "w",
    +259        encoding="utf8",
    +260    ) as outfile:
    +261        output = toml.dumps(data_dict)
    +262        outfile.write(output)
     
    diff --git a/docs/corems/mass_spectra/calc/GC_Calc.html b/docs/corems/mass_spectra/calc/GC_Calc.html index 57a343d5..0b907b1b 100644 --- a/docs/corems/mass_spectra/calc/GC_Calc.html +++ b/docs/corems/mass_spectra/calc/GC_Calc.html @@ -76,79 +76,83 @@

    5import numpy as np 6from pandas import Series 7 - 8from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecCentroidLowRes - 9from corems.mass_spectra.calc import SignalProcessing as sp + 8from corems.mass_spectra.calc import SignalProcessing as sp + 9 10 -11 -12class GC_Calculations: -13 -14 def calibrate_ri(self, ref_dict, cal_file_path): -15 -16 if not self: -17 -18 self.process_chromatogram() -19 -20 for gcms_peak in self: -21 -22 gcms_peak.calc_ri(ref_dict) -23 -24 self.ri_pairs_ref = ref_dict -25 if isinstance(cal_file_path, str): -26 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed -27 self.cal_file_path = Path(cal_file_path) -28 else: -29 self.cal_file_path = cal_file_path -30 -31 def smooth_tic(self, tic): -32 -33 implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method -34 -35 pol_order = self.chromatogram_settings.savgol_pol_order -36 -37 window_len = self.chromatogram_settings.smooth_window +11class GC_Calculations: +12 def calibrate_ri(self, ref_dict, cal_file_path): +13 if not self: +14 self.process_chromatogram() +15 +16 for gcms_peak in self: +17 gcms_peak.calc_ri(ref_dict) +18 +19 self.ri_pairs_ref = ref_dict +20 if isinstance(cal_file_path, str): +21 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed +22 self.cal_file_path = Path(cal_file_path) +23 else: +24 self.cal_file_path = cal_file_path +25 +26 def smooth_tic(self, tic): +27 implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method +28 +29 pol_order = self.chromatogram_settings.savgol_pol_order +30 +31 window_len = self.chromatogram_settings.smooth_window +32 +33 window = self.chromatogram_settings.smooth_method +34 +35 return sp.smooth_signal( +36 tic, window_len, window, pol_order, implemented_smooth_method +37 ) 38 -39 window = self.chromatogram_settings.smooth_method -40 -41 return sp.smooth_signal(tic, window_len, window, pol_order, implemented_smooth_method) -42 -43 def centroid_detector(self, tic, rt): -44 -45 noise_std = self.chromatogram_settings.std_noise_threshold -46 -47 method = self.chromatogram_settings.noise_threshold_method -48 -49 #peak picking -50 min_height = self.chromatogram_settings.peak_height_min_percent -51 min_datapoints = self.chromatogram_settings.min_peak_datapoints -52 -53 # baseline detection -54 max_prominence = self.chromatogram_settings.peak_max_prominence_percent -55 max_height = self.chromatogram_settings.peak_height_max_percent -56 -57 peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints) -58 -59 return peak_indexes_generator -60 -61 def remove_outliers(self, data): -62 -63 from numpy import percentile -64 q25, q75 = percentile(data, 25), percentile(data, 75) -65 iqr = q75 - q25 -66 if self.parameters.verbose_processing: -67 print('Percentiles: 25th=%.3f, 75th=%.3f, IQR=%.3f' % (q25, q75, iqr)) -68 # calculate the outlier cutoff -69 cut_off = iqr * 1.5 -70 lower, upper = q25 - cut_off, q75 + cut_off -71 # identify outliers -72 outliers = [x for x in data if x < lower or x > upper] -73 if self.parameters.verbose_processing: -74 print('Identified outliers: %d' % len(outliers)) -75 # remove outliers -76 nanfilled_outliers = Series([x if lower <= x <= upper else np.nan for x in data]) -77 -78 return nanfilled_outliers -79 -80 +39 def centroid_detector(self, tic, rt): +40 noise_std = self.chromatogram_settings.std_noise_threshold +41 +42 method = self.chromatogram_settings.noise_threshold_method +43 +44 # peak picking +45 min_height = self.chromatogram_settings.peak_height_min_percent +46 min_datapoints = self.chromatogram_settings.min_peak_datapoints +47 +48 # baseline detection +49 max_prominence = self.chromatogram_settings.peak_max_prominence_percent +50 max_height = self.chromatogram_settings.peak_height_max_percent +51 +52 peak_indexes_generator = sp.peak_detector_generator( +53 tic, +54 noise_std, +55 method, +56 rt, +57 max_height, +58 min_height, +59 max_prominence, +60 min_datapoints, +61 ) +62 +63 return peak_indexes_generator +64 +65 def remove_outliers(self, data): +66 from numpy import percentile +67 +68 q25, q75 = percentile(data, 25), percentile(data, 75) +69 iqr = q75 - q25 +70 if self.parameters.verbose_processing: +71 print("Percentiles: 25th=%.3f, 75th=%.3f, IQR=%.3f" % (q25, q75, iqr)) +72 # calculate the outlier cutoff +73 cut_off = iqr * 1.5 +74 lower, upper = q25 - cut_off, q75 + cut_off +75 # identify outliers +76 outliers = [x for x in data if x < lower or x > upper] +77 if self.parameters.verbose_processing: +78 print("Identified outliers: %d" % len(outliers)) +79 # remove outliers +80 nanfilled_outliers = Series( +81 [x if lower <= x <= upper else np.nan for x in data] +82 ) +83 +84 return nanfilled_outliers

    @@ -164,73 +168,80 @@

    -
    13class GC_Calculations:
    -14    
    -15    def calibrate_ri(self, ref_dict, cal_file_path):
    -16        
    -17        if not self:
    -18            
    -19            self.process_chromatogram()
    -20
    -21        for gcms_peak in self:
    -22        
    -23            gcms_peak.calc_ri(ref_dict)
    -24
    -25        self.ri_pairs_ref = ref_dict
    -26        if  isinstance(cal_file_path, str):
    -27			# if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    -28            self.cal_file_path = Path(cal_file_path)
    -29        else:
    -30            self.cal_file_path = cal_file_path
    -31        
    -32    def smooth_tic(self, tic):
    -33            
    -34        implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method
    -35        
    -36        pol_order = self.chromatogram_settings.savgol_pol_order
    -37
    -38        window_len = self.chromatogram_settings.smooth_window
    +            
    12class GC_Calculations:
    +13    def calibrate_ri(self, ref_dict, cal_file_path):
    +14        if not self:
    +15            self.process_chromatogram()
    +16
    +17        for gcms_peak in self:
    +18            gcms_peak.calc_ri(ref_dict)
    +19
    +20        self.ri_pairs_ref = ref_dict
    +21        if isinstance(cal_file_path, str):
    +22            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    +23            self.cal_file_path = Path(cal_file_path)
    +24        else:
    +25            self.cal_file_path = cal_file_path
    +26
    +27    def smooth_tic(self, tic):
    +28        implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method
    +29
    +30        pol_order = self.chromatogram_settings.savgol_pol_order
    +31
    +32        window_len = self.chromatogram_settings.smooth_window
    +33
    +34        window = self.chromatogram_settings.smooth_method
    +35
    +36        return sp.smooth_signal(
    +37            tic, window_len, window, pol_order, implemented_smooth_method
    +38        )
     39
    -40        window = self.chromatogram_settings.smooth_method
    -41
    -42        return sp.smooth_signal(tic, window_len, window, pol_order, implemented_smooth_method)
    -43    
    -44    def centroid_detector(self, tic, rt):
    -45        
    -46        noise_std = self.chromatogram_settings.std_noise_threshold
    -47
    -48        method = self.chromatogram_settings.noise_threshold_method
    -49        
    -50        #peak picking
    -51        min_height = self.chromatogram_settings.peak_height_min_percent 
    -52        min_datapoints = self.chromatogram_settings.min_peak_datapoints   
    -53        
    -54        # baseline detection
    -55        max_prominence = self.chromatogram_settings.peak_max_prominence_percent 
    -56        max_height = self.chromatogram_settings.peak_height_max_percent 
    -57        
    -58        peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints)
    -59
    -60        return peak_indexes_generator
    -61  
    -62    def remove_outliers(self, data):
    -63        
    -64        from numpy import percentile
    -65        q25, q75 = percentile(data, 25), percentile(data, 75)
    -66        iqr = q75 - q25
    -67        if self.parameters.verbose_processing:
    -68            print('Percentiles: 25th=%.3f, 75th=%.3f, IQR=%.3f' % (q25, q75, iqr))
    -69        # calculate the outlier cutoff
    -70        cut_off = iqr * 1.5
    -71        lower, upper = q25 - cut_off, q75 + cut_off
    -72        # identify outliers
    -73        outliers = [x for x in data if x < lower or x > upper]
    -74        if self.parameters.verbose_processing:
    -75            print('Identified outliers: %d' % len(outliers))
    -76        # remove outliers
    -77        nanfilled_outliers = Series([x if lower <= x <= upper else np.nan for x in data])
    -78
    -79        return nanfilled_outliers
    +40    def centroid_detector(self, tic, rt):
    +41        noise_std = self.chromatogram_settings.std_noise_threshold
    +42
    +43        method = self.chromatogram_settings.noise_threshold_method
    +44
    +45        # peak picking
    +46        min_height = self.chromatogram_settings.peak_height_min_percent
    +47        min_datapoints = self.chromatogram_settings.min_peak_datapoints
    +48
    +49        # baseline detection
    +50        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    +51        max_height = self.chromatogram_settings.peak_height_max_percent
    +52
    +53        peak_indexes_generator = sp.peak_detector_generator(
    +54            tic,
    +55            noise_std,
    +56            method,
    +57            rt,
    +58            max_height,
    +59            min_height,
    +60            max_prominence,
    +61            min_datapoints,
    +62        )
    +63
    +64        return peak_indexes_generator
    +65
    +66    def remove_outliers(self, data):
    +67        from numpy import percentile
    +68
    +69        q25, q75 = percentile(data, 25), percentile(data, 75)
    +70        iqr = q75 - q25
    +71        if self.parameters.verbose_processing:
    +72            print("Percentiles: 25th=%.3f, 75th=%.3f, IQR=%.3f" % (q25, q75, iqr))
    +73        # calculate the outlier cutoff
    +74        cut_off = iqr * 1.5
    +75        lower, upper = q25 - cut_off, q75 + cut_off
    +76        # identify outliers
    +77        outliers = [x for x in data if x < lower or x > upper]
    +78        if self.parameters.verbose_processing:
    +79            print("Identified outliers: %d" % len(outliers))
    +80        # remove outliers
    +81        nanfilled_outliers = Series(
    +82            [x if lower <= x <= upper else np.nan for x in data]
    +83        )
    +84
    +85        return nanfilled_outliers
     
    @@ -247,22 +258,19 @@

    -
    15    def calibrate_ri(self, ref_dict, cal_file_path):
    -16        
    -17        if not self:
    -18            
    -19            self.process_chromatogram()
    -20
    -21        for gcms_peak in self:
    -22        
    -23            gcms_peak.calc_ri(ref_dict)
    -24
    -25        self.ri_pairs_ref = ref_dict
    -26        if  isinstance(cal_file_path, str):
    -27			# if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    -28            self.cal_file_path = Path(cal_file_path)
    -29        else:
    -30            self.cal_file_path = cal_file_path
    +            
    13    def calibrate_ri(self, ref_dict, cal_file_path):
    +14        if not self:
    +15            self.process_chromatogram()
    +16
    +17        for gcms_peak in self:
    +18            gcms_peak.calc_ri(ref_dict)
    +19
    +20        self.ri_pairs_ref = ref_dict
    +21        if isinstance(cal_file_path, str):
    +22            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    +23            self.cal_file_path = Path(cal_file_path)
    +24        else:
    +25            self.cal_file_path = cal_file_path
     
    @@ -280,17 +288,18 @@

    -
    32    def smooth_tic(self, tic):
    -33            
    -34        implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method
    -35        
    -36        pol_order = self.chromatogram_settings.savgol_pol_order
    -37
    -38        window_len = self.chromatogram_settings.smooth_window
    -39
    -40        window = self.chromatogram_settings.smooth_method
    -41
    -42        return sp.smooth_signal(tic, window_len, window, pol_order, implemented_smooth_method)
    +            
    27    def smooth_tic(self, tic):
    +28        implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method
    +29
    +30        pol_order = self.chromatogram_settings.savgol_pol_order
    +31
    +32        window_len = self.chromatogram_settings.smooth_window
    +33
    +34        window = self.chromatogram_settings.smooth_method
    +35
    +36        return sp.smooth_signal(
    +37            tic, window_len, window, pol_order, implemented_smooth_method
    +38        )
     
    @@ -308,23 +317,31 @@

    -
    44    def centroid_detector(self, tic, rt):
    -45        
    -46        noise_std = self.chromatogram_settings.std_noise_threshold
    -47
    -48        method = self.chromatogram_settings.noise_threshold_method
    -49        
    -50        #peak picking
    -51        min_height = self.chromatogram_settings.peak_height_min_percent 
    -52        min_datapoints = self.chromatogram_settings.min_peak_datapoints   
    -53        
    -54        # baseline detection
    -55        max_prominence = self.chromatogram_settings.peak_max_prominence_percent 
    -56        max_height = self.chromatogram_settings.peak_height_max_percent 
    -57        
    -58        peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints)
    -59
    -60        return peak_indexes_generator
    +            
    40    def centroid_detector(self, tic, rt):
    +41        noise_std = self.chromatogram_settings.std_noise_threshold
    +42
    +43        method = self.chromatogram_settings.noise_threshold_method
    +44
    +45        # peak picking
    +46        min_height = self.chromatogram_settings.peak_height_min_percent
    +47        min_datapoints = self.chromatogram_settings.min_peak_datapoints
    +48
    +49        # baseline detection
    +50        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    +51        max_height = self.chromatogram_settings.peak_height_max_percent
    +52
    +53        peak_indexes_generator = sp.peak_detector_generator(
    +54            tic,
    +55            noise_std,
    +56            method,
    +57            rt,
    +58            max_height,
    +59            min_height,
    +60            max_prominence,
    +61            min_datapoints,
    +62        )
    +63
    +64        return peak_indexes_generator
     
    @@ -342,24 +359,26 @@

    -
    62    def remove_outliers(self, data):
    -63        
    -64        from numpy import percentile
    -65        q25, q75 = percentile(data, 25), percentile(data, 75)
    -66        iqr = q75 - q25
    -67        if self.parameters.verbose_processing:
    -68            print('Percentiles: 25th=%.3f, 75th=%.3f, IQR=%.3f' % (q25, q75, iqr))
    -69        # calculate the outlier cutoff
    -70        cut_off = iqr * 1.5
    -71        lower, upper = q25 - cut_off, q75 + cut_off
    -72        # identify outliers
    -73        outliers = [x for x in data if x < lower or x > upper]
    -74        if self.parameters.verbose_processing:
    -75            print('Identified outliers: %d' % len(outliers))
    -76        # remove outliers
    -77        nanfilled_outliers = Series([x if lower <= x <= upper else np.nan for x in data])
    -78
    -79        return nanfilled_outliers
    +            
    66    def remove_outliers(self, data):
    +67        from numpy import percentile
    +68
    +69        q25, q75 = percentile(data, 25), percentile(data, 75)
    +70        iqr = q75 - q25
    +71        if self.parameters.verbose_processing:
    +72            print("Percentiles: 25th=%.3f, 75th=%.3f, IQR=%.3f" % (q25, q75, iqr))
    +73        # calculate the outlier cutoff
    +74        cut_off = iqr * 1.5
    +75        lower, upper = q25 - cut_off, q75 + cut_off
    +76        # identify outliers
    +77        outliers = [x for x in data if x < lower or x > upper]
    +78        if self.parameters.verbose_processing:
    +79            print("Identified outliers: %d" % len(outliers))
    +80        # remove outliers
    +81        nanfilled_outliers = Series(
    +82            [x if lower <= x <= upper else np.nan for x in data]
    +83        )
    +84
    +85        return nanfilled_outliers
     
    diff --git a/docs/corems/mass_spectra/calc/GC_Deconvolution.html b/docs/corems/mass_spectra/calc/GC_Deconvolution.html index 2c95095f..b3f4ec71 100644 --- a/docs/corems/mass_spectra/calc/GC_Deconvolution.html +++ b/docs/corems/mass_spectra/calc/GC_Deconvolution.html @@ -96,105 +96,105 @@

    7from corems.chroma_peak.factory.chroma_peak_classes import GCPeakDeconvolved 8from corems.mass_spectra.calc import SignalProcessing as sp 9 - 10class MassDeconvolution: - 11 + 10 + 11class MassDeconvolution: 12 def run_deconvolution(self, plot_res=False): - 13 - 14 eic_dict = self.ion_extracted_chroma(self._ms) - 15 - 16 peaks_entity_data = self.find_peaks_entity(eic_dict) - 17 - 18 ''' select model peaks, create Mass Spectrum objs, GCPeak objs, store results in GC_Class gcpeaks obj''' - 19 self.deconvolution(peaks_entity_data, plot_res) - 20 - 21 def centroid_detector(self, tic, rt): - 22 ''' this function has been replaced with sp.peak_picking_first_derivative - 23 and it not used - 24 ''' - 25 noise_std = self.chromatogram_settings.std_noise_threshold - 26 - 27 method = self.chromatogram_settings.noise_threshold_method - 28 - 29 ''' peak picking''' - 30 min_height = self.chromatogram_settings.peak_height_min_percent - 31 min_datapoints = self.chromatogram_settings.min_peak_datapoints - 32 - 33 ''' baseline detection''' - 34 max_prominence = self.chromatogram_settings.peak_max_prominence_percent - 35 max_height = self.chromatogram_settings.peak_height_max_percent - 36 - 37 peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints) - 38 - 39 return peak_indexes_generator - 40 - 41 def ion_extracted_chroma(self, mass_spectra_obj): - 42 - 43 eic_dict = {} - 44 - 45 for scan_number, ms_obj in mass_spectra_obj.items(): + 13 eic_dict = self.ion_extracted_chroma(self._ms) + 14 + 15 peaks_entity_data = self.find_peaks_entity(eic_dict) + 16 + 17 """ select model peaks, create Mass Spectrum objs, GCPeak objs, store results in GC_Class gcpeaks obj""" + 18 self.deconvolution(peaks_entity_data, plot_res) + 19 + 20 def centroid_detector(self, tic, rt): + 21 """this function has been replaced with sp.peak_picking_first_derivative + 22 and it not used + 23 """ + 24 noise_std = self.chromatogram_settings.std_noise_threshold + 25 + 26 method = self.chromatogram_settings.noise_threshold_method + 27 + 28 """ peak picking""" + 29 min_height = self.chromatogram_settings.peak_height_min_percent + 30 min_datapoints = self.chromatogram_settings.min_peak_datapoints + 31 + 32 """ baseline detection""" + 33 max_prominence = self.chromatogram_settings.peak_max_prominence_percent + 34 max_height = self.chromatogram_settings.peak_height_max_percent + 35 + 36 peak_indexes_generator = sp.peak_detector_generator( + 37 tic, + 38 noise_std, + 39 method, + 40 rt, + 41 max_height, + 42 min_height, + 43 max_prominence, + 44 min_datapoints, + 45 ) 46 - 47 mz_list = ms_obj.mz_exp - 48 abundance_list = ms_obj.abundance - 49 # add list of scan numbers - 50 for index, mz in enumerate(mz_list): + 47 return peak_indexes_generator + 48 + 49 def ion_extracted_chroma(self, mass_spectra_obj): + 50 eic_dict = {} 51 - 52 # dict of mz and tuple (mass spectrum abundances index, and scan number) - 53 if mz not in eic_dict.keys(): - 54 - 55 eic_dict[mz] = [[abundance_list[index]], [ms_obj.retention_time] ] - 56 - 57 else: - 58 - 59 eic_dict[mz][0].append(ms_obj.abundance[index]) - 60 eic_dict[mz][1].append(ms_obj.retention_time) - 61 - 62 return eic_dict - 63 - 64 def hc(self, X, Y, max_rt_distance=0.025): - 65 - 66 from scipy.cluster.hierarchy import dendrogram, linkage - 67 from scipy.cluster.hierarchy import fcluster - 68 # from matplotlib import pyplot as plt - 69 - 70 Z = linkage(np.reshape(X, (len(X), 1)), method="ward") - 71 # Z = linkage(X, method = "ward") - 72 - 73 max_d = max_rt_distance - 74 distance_clusters = fcluster(Z, max_d, criterion='distance') - 75 # print("distance") - 76 # print(distance_clusters) - 77 - 78 # inconsistency_cluster = fcluster(Z, 2, depth=2) - 79 # max_cluster = fcluster(Z, 2, criterion='maxclust') - 80 - 81 grouped_rt = {} + 52 for scan_number, ms_obj in mass_spectra_obj.items(): + 53 mz_list = ms_obj.mz_exp + 54 abundance_list = ms_obj.abundance + 55 # add list of scan numbers + 56 for index, mz in enumerate(mz_list): + 57 # dict of mz and tuple (mass spectrum abundances index, and scan number) + 58 if mz not in eic_dict.keys(): + 59 eic_dict[mz] = [[abundance_list[index]], [ms_obj.retention_time]] + 60 + 61 else: + 62 eic_dict[mz][0].append(ms_obj.abundance[index]) + 63 eic_dict[mz][1].append(ms_obj.retention_time) + 64 + 65 return eic_dict + 66 + 67 def hc(self, X, Y, max_rt_distance=0.025): + 68 from scipy.cluster.hierarchy import dendrogram, linkage + 69 from scipy.cluster.hierarchy import fcluster + 70 # from matplotlib import pyplot as plt + 71 + 72 Z = linkage(np.reshape(X, (len(X), 1)), method="ward") + 73 # Z = linkage(X, method = "ward") + 74 + 75 max_d = max_rt_distance + 76 distance_clusters = fcluster(Z, max_d, criterion="distance") + 77 # print("distance") + 78 # print(distance_clusters) + 79 + 80 # inconsistency_cluster = fcluster(Z, 2, depth=2) + 81 # max_cluster = fcluster(Z, 2, criterion='maxclust') 82 - 83 for index_obj, group in enumerate(distance_clusters): + 83 grouped_rt = {} 84 - 85 if group not in grouped_rt.keys(): - 86 grouped_rt[group] = [X[index_obj]] - 87 else: - 88 grouped_rt[group].append(X[index_obj]) - 89 - 90 # print(distance_clusters, grouped_rt) - 91 return grouped_rt - 92 - 93 # plt.figure(figsize=(10, 8)) - 94 # plt.scatter(X, Y, c=distance_clusters, cmap='prism') # plot points with cluster dependent colors - 95 # plt.show() - 96 # labelList = range(int(min(X)), int(max(X))) - 97 - 98 # plt.figure(figsize=(10, 7)) - 99 # dendrogram(Z, -100 # orientation='top', -101 # distance_sort='descending', -102 # show_leaf_counts=True) -103 # plt.show() -104 # print(Z) -105 -106 def find_peaks_entity(self, eic_dict): -107 -108 ''' combine eic with mathing rt apexes''' + 85 for index_obj, group in enumerate(distance_clusters): + 86 if group not in grouped_rt.keys(): + 87 grouped_rt[group] = [X[index_obj]] + 88 else: + 89 grouped_rt[group].append(X[index_obj]) + 90 + 91 # print(distance_clusters, grouped_rt) + 92 return grouped_rt + 93 + 94 # plt.figure(figsize=(10, 8)) + 95 # plt.scatter(X, Y, c=distance_clusters, cmap='prism') # plot points with cluster dependent colors + 96 # plt.show() + 97 # labelList = range(int(min(X)), int(max(X))) + 98 + 99 # plt.figure(figsize=(10, 7)) +100 # dendrogram(Z, +101 # orientation='top', +102 # distance_sort='descending', +103 # show_leaf_counts=True) +104 # plt.show() +105 # print(Z) +106 +107 def find_peaks_entity(self, eic_dict): +108 """combine eic with mathing rt apexes""" 109 max_prominence = self.chromatogram_settings.peak_max_prominence_percent 110 111 max_height = self.chromatogram_settings.peak_height_max_percent @@ -202,7 +202,7 @@

    113 signal_threshold = self.chromatogram_settings.eic_signal_threshold 114 115 min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints -116 +116 117 peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold 118 119 correct_baseline = False @@ -210,353 +210,441 @@

    121 122 max_eic = 0 123 for mz, eic_scan_index_rt in eic_dict.items(): -124 -125 ind_max_eic = max(eic_scan_index_rt[0]) -126 max_eic = ind_max_eic if ind_max_eic > max_eic else max_eic -127 -128 for mz, eic_scan_index_rt in eic_dict.items(): -129 -130 eic = eic_scan_index_rt[0] -131 rt_list = eic_scan_index_rt[1] -132 -133 if len(eic) >= min_peak_datapoints: -134 -135 smooth_eic = self.smooth_tic(eic) -136 -137 include_indexes = sp.peak_picking_first_derivative(rt_list, smooth_eic, max_height, max_prominence, max_eic, min_peak_datapoints, -138 peak_derivative_threshold, -139 signal_threshold=signal_threshold, correct_baseline=correct_baseline) -140 -141 for initial_scan, apex_scan, final_scan in include_indexes: -142 -143 rt_corrected_therm = self.quadratic_interpolation(rt_list, smooth_eic, apex_scan) -144 -145 ref_apex_rt = round(rt_list[apex_scan] + rt_corrected_therm, 4) -146 -147 apex_rt = rt_list[apex_scan] -148 # apex_abundance = smooth_eic[apex_scan] -149 -150 # maximum_tic = apex_abundance if apex_abundance > maximum_tic else maximum_tic -151 -152 for scan_index in range(initial_scan, final_scan): -153 -154 peak_rt = rt_list[scan_index] -155 peak_abundance = smooth_eic[scan_index] -156 -157 if peak_abundance > 0: -158 -159 dict_data = {peak_rt: {'mz': [mz], -160 'abundance': [peak_abundance], -161 'scan_number': [scan_index]}, -162 'ref_apex_rt': ref_apex_rt -163 } -164 -165 if apex_rt not in peaks_entity_data.keys(): -166 -167 peaks_entity_data[apex_rt] = dict_data -168 -169 else: -170 -171 if peak_rt not in peaks_entity_data[apex_rt].keys(): -172 -173 peaks_entity_data[apex_rt][peak_rt] = dict_data.get(peak_rt) +124 ind_max_eic = max(eic_scan_index_rt[0]) +125 max_eic = ind_max_eic if ind_max_eic > max_eic else max_eic +126 +127 for mz, eic_scan_index_rt in eic_dict.items(): +128 eic = eic_scan_index_rt[0] +129 rt_list = eic_scan_index_rt[1] +130 +131 if len(eic) >= min_peak_datapoints: +132 smooth_eic = self.smooth_tic(eic) +133 +134 include_indexes = sp.peak_picking_first_derivative( +135 rt_list, +136 smooth_eic, +137 max_height, +138 max_prominence, +139 max_eic, +140 min_peak_datapoints, +141 peak_derivative_threshold, +142 signal_threshold=signal_threshold, +143 correct_baseline=correct_baseline, +144 ) +145 +146 for initial_scan, apex_scan, final_scan in include_indexes: +147 rt_corrected_therm = self.quadratic_interpolation( +148 rt_list, smooth_eic, apex_scan +149 ) +150 +151 ref_apex_rt = round(rt_list[apex_scan] + rt_corrected_therm, 4) +152 +153 apex_rt = rt_list[apex_scan] +154 # apex_abundance = smooth_eic[apex_scan] +155 +156 # maximum_tic = apex_abundance if apex_abundance > maximum_tic else maximum_tic +157 +158 for scan_index in range(initial_scan, final_scan): +159 peak_rt = rt_list[scan_index] +160 peak_abundance = smooth_eic[scan_index] +161 +162 if peak_abundance > 0: +163 dict_data = { +164 peak_rt: { +165 "mz": [mz], +166 "abundance": [peak_abundance], +167 "scan_number": [scan_index], +168 }, +169 "ref_apex_rt": ref_apex_rt, +170 } +171 +172 if apex_rt not in peaks_entity_data.keys(): +173 peaks_entity_data[apex_rt] = dict_data 174 -175 else: -176 -177 existing_data = peaks_entity_data[apex_rt].get(peak_rt) -178 -179 existing_data['mz'].append(mz) -180 existing_data['abundance'].append(peak_abundance) -181 existing_data['scan_number'].append(scan_index) -182 -183 return peaks_entity_data -184 -185 def mass_spec_factory(self, rt, datadict): -186 -187 # tic = sum(datadict.get('abundance')) -188 -189 scan_index = datadict['scan_number'][0] -190 -191 mz_list, abundance_list = zip(*sorted(zip(datadict['mz'], datadict['abundance']))) -192 -193 data_dict = {Labels.mz: mz_list, Labels.abundance: abundance_list} +175 else: +176 if peak_rt not in peaks_entity_data[apex_rt].keys(): +177 peaks_entity_data[apex_rt][peak_rt] = dict_data.get( +178 peak_rt +179 ) +180 +181 else: +182 existing_data = peaks_entity_data[apex_rt].get( +183 peak_rt +184 ) +185 +186 existing_data["mz"].append(mz) +187 existing_data["abundance"].append(peak_abundance) +188 existing_data["scan_number"].append(scan_index) +189 +190 return peaks_entity_data +191 +192 def mass_spec_factory(self, rt, datadict): +193 # tic = sum(datadict.get('abundance')) 194 -195 d_params = default_parameters(self._ms[scan_index]._filename) +195 scan_index = datadict["scan_number"][0] 196 -197 d_params["rt"] = rt -198 -199 d_params["scan_number"] = scan_index +197 mz_list, abundance_list = zip( +198 *sorted(zip(datadict["mz"], datadict["abundance"])) +199 ) 200 -201 d_params['label'] = Labels.gcms_centroid +201 data_dict = {Labels.mz: mz_list, Labels.abundance: abundance_list} 202 -203 d_params["polarity"] = self._ms[scan_index].polarity +203 d_params = default_parameters(self._ms[scan_index]._filename) 204 -205 d_params['analyzer'] = self._ms[scan_index].analyzer +205 d_params["rt"] = rt 206 -207 d_params['instrument_label'] = self._ms[scan_index].instrument_label +207 d_params["scan_number"] = scan_index 208 -209 d_params["filename_path"] = self._ms[scan_index].instrument_label +209 d_params["label"] = Labels.gcms_centroid 210 -211 ms = MassSpecCentroidLowRes(data_dict, d_params) +211 d_params["polarity"] = self._ms[scan_index].polarity 212 -213 return ms +213 d_params["analyzer"] = self._ms[scan_index].analyzer 214 -215 def smooth_signal(self, signal): +215 d_params["instrument_label"] = self._ms[scan_index].instrument_label 216 -217 implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method +217 d_params["filename_path"] = self._ms[scan_index].instrument_label 218 -219 pol_order = self.chromatogram_settings.savgol_pol_order +219 ms = MassSpecCentroidLowRes(data_dict, d_params) 220 -221 window_len = self.chromatogram_settings.smooth_window +221 return ms 222 -223 window = self.chromatogram_settings.smooth_method -224 -225 return sp.smooth_signal(signal, window_len, window, pol_order, implemented_smooth_method) -226 -227 def add_gcpeak(self, new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res): -228 -229 if start_rt <= peak_rt[new_apex_index[1]] <= final_rt: -230 -231 rt_list = peak_rt[new_apex_index[0]:new_apex_index[2]] -232 tic_list = smoothed_tic[new_apex_index[0]:new_apex_index[2]] -233 -234 apex_rt = peak_rt[new_apex_index[1]] -235 apex_i = rt_list.index(apex_rt) -236 -237 '''workaround for peak picking missing some local minimas''' -238 if apex_rt not in self.processed_appexes: -239 -240 self.processed_appexes.append(apex_rt) -241 -242 mass_spectra = (self.mass_spec_factory(rt, datadict.get(rt)) for rt in rt_list) -243 -244 gc_peak = GCPeakDeconvolved(self, mass_spectra, apex_i, rt_list, tic_list) -245 -246 gc_peak.calc_area(tic_list, 1) -247 -248 self.gcpeaks.append(gc_peak) +223 def smooth_signal(self, signal): +224 implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method +225 +226 pol_order = self.chromatogram_settings.savgol_pol_order +227 +228 window_len = self.chromatogram_settings.smooth_window +229 +230 window = self.chromatogram_settings.smooth_method +231 +232 return sp.smooth_signal( +233 signal, window_len, window, pol_order, implemented_smooth_method +234 ) +235 +236 def add_gcpeak( +237 self, +238 new_apex_index, +239 start_rt, +240 final_rt, +241 peak_rt, +242 smoothed_tic, +243 datadict, +244 plot_res, +245 ): +246 if start_rt <= peak_rt[new_apex_index[1]] <= final_rt: +247 rt_list = peak_rt[new_apex_index[0] : new_apex_index[2]] +248 tic_list = smoothed_tic[new_apex_index[0] : new_apex_index[2]] 249 -250 if plot_res: -251 -252 plt.plot(gc_peak.rt_list, gc_peak.tic_list) -253 plt.plot(gc_peak.retention_time, gc_peak.tic, c='black', marker= '^', linewidth=0) -254 -255 def deconvolution(self, peaks_entity_data, plot_res): +250 apex_rt = peak_rt[new_apex_index[1]] +251 apex_i = rt_list.index(apex_rt) +252 +253 """workaround for peak picking missing some local minimas""" +254 if apex_rt not in self.processed_appexes: +255 self.processed_appexes.append(apex_rt) 256 -257 # plot_res = True -258 domain = self.retention_time -259 signal = self._processed_tic -260 max_height = self.chromatogram_settings.peak_height_max_percent -261 max_prominence = self.chromatogram_settings.peak_max_prominence_percent -262 min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints -263 signal_threshold = self.chromatogram_settings.peak_height_min_percent -264 max_rt_distance = self.chromatogram_settings.max_rt_distance -265 peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold +257 mass_spectra = ( +258 self.mass_spec_factory(rt, datadict.get(rt)) for rt in rt_list +259 ) +260 +261 gc_peak = GCPeakDeconvolved( +262 self, mass_spectra, apex_i, rt_list, tic_list +263 ) +264 +265 gc_peak.calc_area(tic_list, 1) 266 -267 max_signal = max(signal) -268 correct_baseline = False -269 -270 include_indexes = sp.peak_picking_first_derivative(domain, signal, max_height, max_prominence, max_signal, min_peak_datapoints, -271 peak_derivative_threshold, signal_threshold=signal_threshold, -272 correct_baseline=correct_baseline, plot_res=False) -273 -274 ''' deconvolution window is defined by the TIC peak region''' -275 all_apexes_rt = np.array(list(peaks_entity_data.keys())) -276 -277 '''workaround for peak picking missing some local minimas''' -278 self.processed_appexes = [] -279 -280 for indexes_tuple in include_indexes: -281 -282 start_rt = self.retention_time[indexes_tuple[0]] -283 # apex_rt = self.retention_time[indexes_tuple[1]] -284 final_rt = self.retention_time[indexes_tuple[2]] -285 -286 ''' find all features within TIC peak window''' -287 peak_features_indexes = np.where((all_apexes_rt > start_rt) & (all_apexes_rt < final_rt))[0] -288 peak_features_rts = all_apexes_rt[peak_features_indexes] +267 self.gcpeaks.append(gc_peak) +268 +269 if plot_res: +270 plt.plot(gc_peak.rt_list, gc_peak.tic_list) +271 plt.plot( +272 gc_peak.retention_time, +273 gc_peak.tic, +274 c="black", +275 marker="^", +276 linewidth=0, +277 ) +278 +279 def deconvolution(self, peaks_entity_data, plot_res): +280 # plot_res = True +281 domain = self.retention_time +282 signal = self._processed_tic +283 max_height = self.chromatogram_settings.peak_height_max_percent +284 max_prominence = self.chromatogram_settings.peak_max_prominence_percent +285 min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints +286 signal_threshold = self.chromatogram_settings.peak_height_min_percent +287 max_rt_distance = self.chromatogram_settings.max_rt_distance +288 peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold 289 -290 # print(start_rt, apex_rt, final_rt ) -291 -292 filtered_features_rt = [] -293 filtered_features_abundance = [] -294 -295 for each_apex_rt in peak_features_rts: -296 -297 apex_data = peaks_entity_data.get(each_apex_rt).get(each_apex_rt) -298 -299 peak_features_tic = sum(peaks_entity_data.get(each_apex_rt).get(each_apex_rt).get('abundance')) -300 -301 norm_smooth_tic = (peak_features_tic / max_signal) * 100 -302 -303 ''' TODO: -304 Improve Peak Filtering +290 max_signal = max(signal) +291 correct_baseline = False +292 +293 include_indexes = sp.peak_picking_first_derivative( +294 domain, +295 signal, +296 max_height, +297 max_prominence, +298 max_signal, +299 min_peak_datapoints, +300 peak_derivative_threshold, +301 signal_threshold=signal_threshold, +302 correct_baseline=correct_baseline, +303 plot_res=False, +304 ) 305 -306 Calculate peaks sharpness here and filter it out (Amax - An /n)? -307 Peak Fit and Calculate Peak Gaussian Similarity? -308 Currentely using flat % tic relative abundance threshold and min 3 m/z per mass spectrum -309 ''' -310 if norm_smooth_tic > signal_threshold and len(apex_data['mz']) > 1: +306 """ deconvolution window is defined by the TIC peak region""" +307 all_apexes_rt = np.array(list(peaks_entity_data.keys())) +308 +309 """workaround for peak picking missing some local minimas""" +310 self.processed_appexes = [] 311 -312 # print(len(apex_data['mz'])) -313 filtered_features_rt.append(each_apex_rt) -314 filtered_features_abundance.append(peak_features_tic) -315 -316 if len(filtered_features_rt) > 1: -317 ''' more than one peak feature identified inside a TIC peak ''' -318 # plt.plot(self.retention_time[indexes_tuple[0]:indexes_tuple[2]], signal[indexes_tuple[0]:indexes_tuple[2]], c='black') -319 -320 # print(filtered_features_rt) -321 grouped_rt = self.hc(filtered_features_rt, filtered_features_abundance, max_rt_distance=max_rt_distance) -322 # print(grouped_rt) -323 -324 for group, apex_rt_list in grouped_rt.items(): -325 ''' each group is a peak feature defined by the hierarchical clutter algorithm -326 -327 ''' -328 group_datadict = {} -329 group_datadict['ref_apex_rt'] = [] +312 for indexes_tuple in include_indexes: +313 start_rt = self.retention_time[indexes_tuple[0]] +314 # apex_rt = self.retention_time[indexes_tuple[1]] +315 final_rt = self.retention_time[indexes_tuple[2]] +316 +317 """ find all features within TIC peak window""" +318 peak_features_indexes = np.where( +319 (all_apexes_rt > start_rt) & (all_apexes_rt < final_rt) +320 )[0] +321 peak_features_rts = all_apexes_rt[peak_features_indexes] +322 +323 # print(start_rt, apex_rt, final_rt ) +324 +325 filtered_features_rt = [] +326 filtered_features_abundance = [] +327 +328 for each_apex_rt in peak_features_rts: +329 apex_data = peaks_entity_data.get(each_apex_rt).get(each_apex_rt) 330 -331 for each_group_apex_rt in apex_rt_list: -332 -333 datadict = peaks_entity_data.get(each_group_apex_rt) -334 -335 for rt, each_datadict in datadict.items(): +331 peak_features_tic = sum( +332 peaks_entity_data.get(each_apex_rt) +333 .get(each_apex_rt) +334 .get("abundance") +335 ) 336 -337 if rt == "ref_apex_rt": +337 norm_smooth_tic = (peak_features_tic / max_signal) * 100 338 -339 group_datadict['ref_apex_rt'].append(each_datadict) -340 -341 else: -342 -343 if rt in group_datadict.keys(): -344 -345 mz_list = each_datadict.get("mz") -346 abundance_list = each_datadict.get("abundance") -347 -348 each_mz_abun = dict(zip(mz_list, abundance_list)) -349 -350 for index_mz, mz in enumerate(group_datadict[rt].get("mz")): -351 if mz in each_mz_abun.keys(): -352 -353 each_mz_abun[mz] = each_mz_abun[mz] + group_datadict[rt].get("abundance")[index_mz] +339 """ TODO: +340 Improve Peak Filtering +341 +342 Calculate peaks sharpness here and filter it out (Amax - An /n)? +343 Peak Fit and Calculate Peak Gaussian Similarity? +344 Currentely using flat % tic relative abundance threshold and min 3 m/z per mass spectrum +345 """ +346 if norm_smooth_tic > signal_threshold and len(apex_data["mz"]) > 1: +347 # print(len(apex_data['mz'])) +348 filtered_features_rt.append(each_apex_rt) +349 filtered_features_abundance.append(peak_features_tic) +350 +351 if len(filtered_features_rt) > 1: +352 """ more than one peak feature identified inside a TIC peak """ +353 # plt.plot(self.retention_time[indexes_tuple[0]:indexes_tuple[2]], signal[indexes_tuple[0]:indexes_tuple[2]], c='black') 354 -355 else: -356 -357 each_mz_abun[mz] = group_datadict[rt].get("abundance")[index_mz] -358 -359 group_datadict[rt] = {'mz': list(each_mz_abun.keys()), -360 'abundance': list(each_mz_abun.values()), -361 'scan_number': each_datadict.get('scan_number')} +355 # print(filtered_features_rt) +356 grouped_rt = self.hc( +357 filtered_features_rt, +358 filtered_features_abundance, +359 max_rt_distance=max_rt_distance, +360 ) +361 # print(grouped_rt) 362 -363 else: -364 -365 group_datadict[rt] = each_datadict -366 -367 peak_rt = [] -368 peak_tic = [] +363 for group, apex_rt_list in grouped_rt.items(): +364 """ each group is a peak feature defined by the hierarchical clutter algorithm +365 +366 """ +367 group_datadict = {} +368 group_datadict["ref_apex_rt"] = [] 369 -370 # print(group_datadict.get('ref_apex_rt')) -371 for rt, each_datadict in group_datadict.items(): -372 if rt != "ref_apex_rt": -373 peak_rt.append(rt) -374 peak_tic.append(sum(each_datadict["abundance"])) -375 -376 peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) -377 -378 smoothed_tic = self.smooth_signal(peak_tic) -379 -380 include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic, max_height, max_prominence, max_signal, min_peak_datapoints, -381 peak_derivative_threshold, -382 signal_threshold=signal_threshold, correct_baseline=False, plot_res=False) +370 for each_group_apex_rt in apex_rt_list: +371 datadict = peaks_entity_data.get(each_group_apex_rt) +372 +373 for rt, each_datadict in datadict.items(): +374 if rt == "ref_apex_rt": +375 group_datadict["ref_apex_rt"].append(each_datadict) +376 +377 else: +378 if rt in group_datadict.keys(): +379 mz_list = each_datadict.get("mz") +380 abundance_list = each_datadict.get("abundance") +381 +382 each_mz_abun = dict(zip(mz_list, abundance_list)) 383 -384 include_indexes = list(include_indexes) -385 -386 if include_indexes: -387 -388 if len(include_indexes) > 1: -389 ''' after sum there are two apexes -390 check if it is inside the deconvolution window, otherwise ignores it -391 ''' -392 -393 for new_apex_index in include_indexes: -394 # pass -395 self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res) -396 -397 else: -398 ''' after sum there is on apex -399 save it -400 ''' -401 new_apex_index = include_indexes[0] -402 # print(include_indexes, group, apex_rt_list) -403 self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res) -404 -405 elif len(filtered_features_rt) == 1: -406 ''' only one peak feature inside deconvolution window ''' -407 -408 each_apex_rt = filtered_features_rt[0] -409 -410 datadict = peaks_entity_data.get(each_apex_rt) +384 for index_mz, mz in enumerate( +385 group_datadict[rt].get("mz") +386 ): +387 if mz in each_mz_abun.keys(): +388 each_mz_abun[mz] = ( +389 each_mz_abun[mz] +390 + group_datadict[rt].get("abundance")[ +391 index_mz +392 ] +393 ) +394 +395 else: +396 each_mz_abun[mz] = group_datadict[rt].get( +397 "abundance" +398 )[index_mz] +399 +400 group_datadict[rt] = { +401 "mz": list(each_mz_abun.keys()), +402 "abundance": list(each_mz_abun.values()), +403 "scan_number": each_datadict.get("scan_number"), +404 } +405 +406 else: +407 group_datadict[rt] = each_datadict +408 +409 peak_rt = [] +410 peak_tic = [] 411 -412 peak_rt = [] -413 peak_tic = [] -414 -415 for rt, each_datadict in datadict.items(): -416 -417 if rt != "ref_apex_rt": -418 peak_rt.append(rt) -419 peak_tic.append(sum(each_datadict["abundance"])) -420 -421 peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) -422 -423 smoothed_tic = self.smooth_signal(peak_tic) -424 -425 include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic, max_height, max_prominence, max_signal, min_peak_datapoints, -426 peak_derivative_threshold, -427 signal_threshold=signal_threshold, correct_baseline=False, plot_res=False) -428 include_indexes = list(include_indexes) -429 -430 if include_indexes: -431 -432 ''' after sum there are two apexes -433 check if it is inside the deconvolution window, otherwise ignores it''' -434 if len(include_indexes) > 1: -435 -436 for new_apex_index in include_indexes: -437 # pass -438 self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res) -439 -440 else: -441 ''' after sum there is one apex -442 save it -443 includes_indexes = (start, apex, final )''' -444 -445 new_apex_index = include_indexes[0] -446 -447 self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res) -448 -449 else: -450 -451 # print('no data after filter') -452 pass -453 if plot_res: -454 plt.plot(self.retention_time, self._processed_tic, c='black') -455 plt.show() -456 -457 def quadratic_interpolation(self, rt_list, tic_list, apex_index): -458 -459 rt_list = np.array(rt_list) -460 tic_list = np.array(tic_list) -461 three_highest_i = [i for i in range(apex_index - 1, apex_index + 2)] -462 -463 z = np.poly1d(np.polyfit(rt_list[three_highest_i], tic_list[three_highest_i], 2)) -464 a = z[2] -465 b = z[1] -466 -467 corrected_apex_rt = -b / (2 * a) -468 initial_rt = rt_list[apex_index] -469 -470 return initial_rt - corrected_apex_rt +412 # print(group_datadict.get('ref_apex_rt')) +413 for rt, each_datadict in group_datadict.items(): +414 if rt != "ref_apex_rt": +415 peak_rt.append(rt) +416 peak_tic.append(sum(each_datadict["abundance"])) +417 +418 peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) +419 +420 smoothed_tic = self.smooth_signal(peak_tic) +421 +422 include_indexes = sp.peak_picking_first_derivative( +423 peak_rt, +424 smoothed_tic, +425 max_height, +426 max_prominence, +427 max_signal, +428 min_peak_datapoints, +429 peak_derivative_threshold, +430 signal_threshold=signal_threshold, +431 correct_baseline=False, +432 plot_res=False, +433 ) +434 +435 include_indexes = list(include_indexes) +436 +437 if include_indexes: +438 if len(include_indexes) > 1: +439 """ after sum there are two apexes +440 check if it is inside the deconvolution window, otherwise ignores it +441 """ +442 +443 for new_apex_index in include_indexes: +444 # pass +445 self.add_gcpeak( +446 new_apex_index, +447 start_rt, +448 final_rt, +449 peak_rt, +450 smoothed_tic, +451 group_datadict, +452 plot_res, +453 ) +454 +455 else: +456 """ after sum there is on apex +457 save it +458 """ +459 new_apex_index = include_indexes[0] +460 # print(include_indexes, group, apex_rt_list) +461 self.add_gcpeak( +462 new_apex_index, +463 start_rt, +464 final_rt, +465 peak_rt, +466 smoothed_tic, +467 group_datadict, +468 plot_res, +469 ) +470 +471 elif len(filtered_features_rt) == 1: +472 """ only one peak feature inside deconvolution window """ +473 +474 each_apex_rt = filtered_features_rt[0] +475 +476 datadict = peaks_entity_data.get(each_apex_rt) +477 +478 peak_rt = [] +479 peak_tic = [] +480 +481 for rt, each_datadict in datadict.items(): +482 if rt != "ref_apex_rt": +483 peak_rt.append(rt) +484 peak_tic.append(sum(each_datadict["abundance"])) +485 +486 peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) +487 +488 smoothed_tic = self.smooth_signal(peak_tic) +489 +490 include_indexes = sp.peak_picking_first_derivative( +491 peak_rt, +492 smoothed_tic, +493 max_height, +494 max_prominence, +495 max_signal, +496 min_peak_datapoints, +497 peak_derivative_threshold, +498 signal_threshold=signal_threshold, +499 correct_baseline=False, +500 plot_res=False, +501 ) +502 include_indexes = list(include_indexes) +503 +504 if include_indexes: +505 """ after sum there are two apexes +506 check if it is inside the deconvolution window, otherwise ignores it""" +507 if len(include_indexes) > 1: +508 for new_apex_index in include_indexes: +509 # pass +510 self.add_gcpeak( +511 new_apex_index, +512 start_rt, +513 final_rt, +514 peak_rt, +515 smoothed_tic, +516 datadict, +517 plot_res, +518 ) +519 +520 else: +521 """ after sum there is one apex +522 save it +523 includes_indexes = (start, apex, final )""" +524 +525 new_apex_index = include_indexes[0] +526 +527 self.add_gcpeak( +528 new_apex_index, +529 start_rt, +530 final_rt, +531 peak_rt, +532 smoothed_tic, +533 datadict, +534 plot_res, +535 ) +536 +537 else: +538 # print('no data after filter') +539 pass +540 if plot_res: +541 plt.plot(self.retention_time, self._processed_tic, c="black") +542 plt.show() +543 +544 def quadratic_interpolation(self, rt_list, tic_list, apex_index): +545 rt_list = np.array(rt_list) +546 tic_list = np.array(tic_list) +547 three_highest_i = [i for i in range(apex_index - 1, apex_index + 2)] +548 +549 z = np.poly1d( +550 np.polyfit(rt_list[three_highest_i], tic_list[three_highest_i], 2) +551 ) +552 a = z[2] +553 b = z[1] +554 +555 corrected_apex_rt = -b / (2 * a) +556 initial_rt = rt_list[apex_index] +557 +558 return initial_rt - corrected_apex_rt

    @@ -572,62 +660,63 @@

    -
     13class MassDeconvolution:
    - 14
    - 15    def run_deconvolution(self, plot_res=False):
    - 16
    - 17        eic_dict = self.ion_extracted_chroma(self._ms)
    - 18
    - 19        peaks_entity_data = self.find_peaks_entity(eic_dict)
    +            
     12class MassDeconvolution:
    + 13    def run_deconvolution(self, plot_res=False):
    + 14        eic_dict = self.ion_extracted_chroma(self._ms)
    + 15
    + 16        peaks_entity_data = self.find_peaks_entity(eic_dict)
    + 17
    + 18        """ select model peaks, create Mass Spectrum objs, GCPeak objs, store results in GC_Class gcpeaks obj"""
    + 19        self.deconvolution(peaks_entity_data, plot_res)
      20
    - 21        ''' select model peaks, create Mass Spectrum objs, GCPeak objs, store results in GC_Class gcpeaks obj'''
    - 22        self.deconvolution(peaks_entity_data, plot_res)
    - 23
    - 24    def centroid_detector(self, tic, rt):
    - 25        ''' this function has been replaced with sp.peak_picking_first_derivative
    - 26            and it not used
    - 27        '''
    - 28        noise_std = self.chromatogram_settings.std_noise_threshold
    - 29
    - 30        method = self.chromatogram_settings.noise_threshold_method
    - 31
    - 32        ''' peak picking'''
    - 33        min_height = self.chromatogram_settings.peak_height_min_percent
    - 34        min_datapoints = self.chromatogram_settings.min_peak_datapoints
    - 35
    - 36        ''' baseline detection'''
    - 37        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    - 38        max_height = self.chromatogram_settings.peak_height_max_percent
    - 39
    - 40        peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints)
    - 41
    - 42        return peak_indexes_generator
    - 43
    - 44    def ion_extracted_chroma(self, mass_spectra_obj):
    - 45
    - 46        eic_dict = {}
    + 21    def centroid_detector(self, tic, rt):
    + 22        """this function has been replaced with sp.peak_picking_first_derivative
    + 23        and it not used
    + 24        """
    + 25        noise_std = self.chromatogram_settings.std_noise_threshold
    + 26
    + 27        method = self.chromatogram_settings.noise_threshold_method
    + 28
    + 29        """ peak picking"""
    + 30        min_height = self.chromatogram_settings.peak_height_min_percent
    + 31        min_datapoints = self.chromatogram_settings.min_peak_datapoints
    + 32
    + 33        """ baseline detection"""
    + 34        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    + 35        max_height = self.chromatogram_settings.peak_height_max_percent
    + 36
    + 37        peak_indexes_generator = sp.peak_detector_generator(
    + 38            tic,
    + 39            noise_std,
    + 40            method,
    + 41            rt,
    + 42            max_height,
    + 43            min_height,
    + 44            max_prominence,
    + 45            min_datapoints,
    + 46        )
      47
    - 48        for scan_number, ms_obj in mass_spectra_obj.items():
    + 48        return peak_indexes_generator
      49
    - 50            mz_list = ms_obj.mz_exp
    - 51            abundance_list = ms_obj.abundance
    - 52            # add list of scan numbers
    - 53            for index, mz in enumerate(mz_list):
    - 54
    - 55                # dict of mz and tuple (mass spectrum abundances index, and scan number)
    - 56                if mz not in eic_dict.keys():
    - 57
    - 58                    eic_dict[mz] = [[abundance_list[index]], [ms_obj.retention_time] ]
    - 59
    - 60                else:
    + 50    def ion_extracted_chroma(self, mass_spectra_obj):
    + 51        eic_dict = {}
    + 52
    + 53        for scan_number, ms_obj in mass_spectra_obj.items():
    + 54            mz_list = ms_obj.mz_exp
    + 55            abundance_list = ms_obj.abundance
    + 56            # add list of scan numbers
    + 57            for index, mz in enumerate(mz_list):
    + 58                # dict of mz and tuple (mass spectrum abundances index, and scan number)
    + 59                if mz not in eic_dict.keys():
    + 60                    eic_dict[mz] = [[abundance_list[index]], [ms_obj.retention_time]]
      61
    - 62                    eic_dict[mz][0].append(ms_obj.abundance[index])
    - 63                    eic_dict[mz][1].append(ms_obj.retention_time)
    - 64
    - 65        return eic_dict
    - 66
    - 67    def hc(self, X, Y, max_rt_distance=0.025):
    - 68
    + 62                else:
    + 63                    eic_dict[mz][0].append(ms_obj.abundance[index])
    + 64                    eic_dict[mz][1].append(ms_obj.retention_time)
    + 65
    + 66        return eic_dict
    + 67
    + 68    def hc(self, X, Y, max_rt_distance=0.025):
      69        from scipy.cluster.hierarchy import dendrogram, linkage
      70        from scipy.cluster.hierarchy import fcluster
      71        # from matplotlib import pyplot as plt
    @@ -636,7 +725,7 @@ 

    74 # Z = linkage(X, method = "ward") 75 76 max_d = max_rt_distance - 77 distance_clusters = fcluster(Z, max_d, criterion='distance') + 77 distance_clusters = fcluster(Z, max_d, criterion="distance") 78 # print("distance") 79 # print(distance_clusters) 80 @@ -646,393 +735,479 @@

    84 grouped_rt = {} 85 86 for index_obj, group in enumerate(distance_clusters): - 87 - 88 if group not in grouped_rt.keys(): - 89 grouped_rt[group] = [X[index_obj]] - 90 else: - 91 grouped_rt[group].append(X[index_obj]) - 92 - 93 # print(distance_clusters, grouped_rt) - 94 return grouped_rt - 95 - 96 # plt.figure(figsize=(10, 8)) - 97 # plt.scatter(X, Y, c=distance_clusters, cmap='prism') # plot points with cluster dependent colors - 98 # plt.show() - 99 # labelList = range(int(min(X)), int(max(X))) -100 -101 # plt.figure(figsize=(10, 7)) -102 # dendrogram(Z, -103 # orientation='top', -104 # distance_sort='descending', -105 # show_leaf_counts=True) -106 # plt.show() -107 # print(Z) -108 -109 def find_peaks_entity(self, eic_dict): -110 -111 ''' combine eic with mathing rt apexes''' -112 max_prominence = self.chromatogram_settings.peak_max_prominence_percent + 87 if group not in grouped_rt.keys(): + 88 grouped_rt[group] = [X[index_obj]] + 89 else: + 90 grouped_rt[group].append(X[index_obj]) + 91 + 92 # print(distance_clusters, grouped_rt) + 93 return grouped_rt + 94 + 95 # plt.figure(figsize=(10, 8)) + 96 # plt.scatter(X, Y, c=distance_clusters, cmap='prism') # plot points with cluster dependent colors + 97 # plt.show() + 98 # labelList = range(int(min(X)), int(max(X))) + 99 +100 # plt.figure(figsize=(10, 7)) +101 # dendrogram(Z, +102 # orientation='top', +103 # distance_sort='descending', +104 # show_leaf_counts=True) +105 # plt.show() +106 # print(Z) +107 +108 def find_peaks_entity(self, eic_dict): +109 """combine eic with mathing rt apexes""" +110 max_prominence = self.chromatogram_settings.peak_max_prominence_percent +111 +112 max_height = self.chromatogram_settings.peak_height_max_percent 113 -114 max_height = self.chromatogram_settings.peak_height_max_percent +114 signal_threshold = self.chromatogram_settings.eic_signal_threshold 115 -116 signal_threshold = self.chromatogram_settings.eic_signal_threshold +116 min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints 117 -118 min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints -119 -120 peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold -121 -122 correct_baseline = False -123 peaks_entity_data = {} -124 -125 max_eic = 0 -126 for mz, eic_scan_index_rt in eic_dict.items(): +118 peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold +119 +120 correct_baseline = False +121 peaks_entity_data = {} +122 +123 max_eic = 0 +124 for mz, eic_scan_index_rt in eic_dict.items(): +125 ind_max_eic = max(eic_scan_index_rt[0]) +126 max_eic = ind_max_eic if ind_max_eic > max_eic else max_eic 127 -128 ind_max_eic = max(eic_scan_index_rt[0]) -129 max_eic = ind_max_eic if ind_max_eic > max_eic else max_eic -130 -131 for mz, eic_scan_index_rt in eic_dict.items(): -132 -133 eic = eic_scan_index_rt[0] -134 rt_list = eic_scan_index_rt[1] -135 -136 if len(eic) >= min_peak_datapoints: -137 -138 smooth_eic = self.smooth_tic(eic) -139 -140 include_indexes = sp.peak_picking_first_derivative(rt_list, smooth_eic, max_height, max_prominence, max_eic, min_peak_datapoints, -141 peak_derivative_threshold, -142 signal_threshold=signal_threshold, correct_baseline=correct_baseline) -143 -144 for initial_scan, apex_scan, final_scan in include_indexes: -145 -146 rt_corrected_therm = self.quadratic_interpolation(rt_list, smooth_eic, apex_scan) -147 -148 ref_apex_rt = round(rt_list[apex_scan] + rt_corrected_therm, 4) -149 -150 apex_rt = rt_list[apex_scan] -151 # apex_abundance = smooth_eic[apex_scan] -152 -153 # maximum_tic = apex_abundance if apex_abundance > maximum_tic else maximum_tic -154 -155 for scan_index in range(initial_scan, final_scan): +128 for mz, eic_scan_index_rt in eic_dict.items(): +129 eic = eic_scan_index_rt[0] +130 rt_list = eic_scan_index_rt[1] +131 +132 if len(eic) >= min_peak_datapoints: +133 smooth_eic = self.smooth_tic(eic) +134 +135 include_indexes = sp.peak_picking_first_derivative( +136 rt_list, +137 smooth_eic, +138 max_height, +139 max_prominence, +140 max_eic, +141 min_peak_datapoints, +142 peak_derivative_threshold, +143 signal_threshold=signal_threshold, +144 correct_baseline=correct_baseline, +145 ) +146 +147 for initial_scan, apex_scan, final_scan in include_indexes: +148 rt_corrected_therm = self.quadratic_interpolation( +149 rt_list, smooth_eic, apex_scan +150 ) +151 +152 ref_apex_rt = round(rt_list[apex_scan] + rt_corrected_therm, 4) +153 +154 apex_rt = rt_list[apex_scan] +155 # apex_abundance = smooth_eic[apex_scan] 156 -157 peak_rt = rt_list[scan_index] -158 peak_abundance = smooth_eic[scan_index] -159 -160 if peak_abundance > 0: -161 -162 dict_data = {peak_rt: {'mz': [mz], -163 'abundance': [peak_abundance], -164 'scan_number': [scan_index]}, -165 'ref_apex_rt': ref_apex_rt -166 } -167 -168 if apex_rt not in peaks_entity_data.keys(): -169 -170 peaks_entity_data[apex_rt] = dict_data -171 -172 else: -173 -174 if peak_rt not in peaks_entity_data[apex_rt].keys(): +157 # maximum_tic = apex_abundance if apex_abundance > maximum_tic else maximum_tic +158 +159 for scan_index in range(initial_scan, final_scan): +160 peak_rt = rt_list[scan_index] +161 peak_abundance = smooth_eic[scan_index] +162 +163 if peak_abundance > 0: +164 dict_data = { +165 peak_rt: { +166 "mz": [mz], +167 "abundance": [peak_abundance], +168 "scan_number": [scan_index], +169 }, +170 "ref_apex_rt": ref_apex_rt, +171 } +172 +173 if apex_rt not in peaks_entity_data.keys(): +174 peaks_entity_data[apex_rt] = dict_data 175 -176 peaks_entity_data[apex_rt][peak_rt] = dict_data.get(peak_rt) -177 -178 else: -179 -180 existing_data = peaks_entity_data[apex_rt].get(peak_rt) +176 else: +177 if peak_rt not in peaks_entity_data[apex_rt].keys(): +178 peaks_entity_data[apex_rt][peak_rt] = dict_data.get( +179 peak_rt +180 ) 181 -182 existing_data['mz'].append(mz) -183 existing_data['abundance'].append(peak_abundance) -184 existing_data['scan_number'].append(scan_index) -185 -186 return peaks_entity_data -187 -188 def mass_spec_factory(self, rt, datadict): -189 -190 # tic = sum(datadict.get('abundance')) -191 -192 scan_index = datadict['scan_number'][0] -193 -194 mz_list, abundance_list = zip(*sorted(zip(datadict['mz'], datadict['abundance']))) +182 else: +183 existing_data = peaks_entity_data[apex_rt].get( +184 peak_rt +185 ) +186 +187 existing_data["mz"].append(mz) +188 existing_data["abundance"].append(peak_abundance) +189 existing_data["scan_number"].append(scan_index) +190 +191 return peaks_entity_data +192 +193 def mass_spec_factory(self, rt, datadict): +194 # tic = sum(datadict.get('abundance')) 195 -196 data_dict = {Labels.mz: mz_list, Labels.abundance: abundance_list} +196 scan_index = datadict["scan_number"][0] 197 -198 d_params = default_parameters(self._ms[scan_index]._filename) -199 -200 d_params["rt"] = rt +198 mz_list, abundance_list = zip( +199 *sorted(zip(datadict["mz"], datadict["abundance"])) +200 ) 201 -202 d_params["scan_number"] = scan_index +202 data_dict = {Labels.mz: mz_list, Labels.abundance: abundance_list} 203 -204 d_params['label'] = Labels.gcms_centroid +204 d_params = default_parameters(self._ms[scan_index]._filename) 205 -206 d_params["polarity"] = self._ms[scan_index].polarity +206 d_params["rt"] = rt 207 -208 d_params['analyzer'] = self._ms[scan_index].analyzer +208 d_params["scan_number"] = scan_index 209 -210 d_params['instrument_label'] = self._ms[scan_index].instrument_label +210 d_params["label"] = Labels.gcms_centroid 211 -212 d_params["filename_path"] = self._ms[scan_index].instrument_label +212 d_params["polarity"] = self._ms[scan_index].polarity 213 -214 ms = MassSpecCentroidLowRes(data_dict, d_params) +214 d_params["analyzer"] = self._ms[scan_index].analyzer 215 -216 return ms +216 d_params["instrument_label"] = self._ms[scan_index].instrument_label 217 -218 def smooth_signal(self, signal): +218 d_params["filename_path"] = self._ms[scan_index].instrument_label 219 -220 implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method +220 ms = MassSpecCentroidLowRes(data_dict, d_params) 221 -222 pol_order = self.chromatogram_settings.savgol_pol_order +222 return ms 223 -224 window_len = self.chromatogram_settings.smooth_window -225 -226 window = self.chromatogram_settings.smooth_method -227 -228 return sp.smooth_signal(signal, window_len, window, pol_order, implemented_smooth_method) -229 -230 def add_gcpeak(self, new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res): -231 -232 if start_rt <= peak_rt[new_apex_index[1]] <= final_rt: -233 -234 rt_list = peak_rt[new_apex_index[0]:new_apex_index[2]] -235 tic_list = smoothed_tic[new_apex_index[0]:new_apex_index[2]] +224 def smooth_signal(self, signal): +225 implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method +226 +227 pol_order = self.chromatogram_settings.savgol_pol_order +228 +229 window_len = self.chromatogram_settings.smooth_window +230 +231 window = self.chromatogram_settings.smooth_method +232 +233 return sp.smooth_signal( +234 signal, window_len, window, pol_order, implemented_smooth_method +235 ) 236 -237 apex_rt = peak_rt[new_apex_index[1]] -238 apex_i = rt_list.index(apex_rt) -239 -240 '''workaround for peak picking missing some local minimas''' -241 if apex_rt not in self.processed_appexes: -242 -243 self.processed_appexes.append(apex_rt) -244 -245 mass_spectra = (self.mass_spec_factory(rt, datadict.get(rt)) for rt in rt_list) -246 -247 gc_peak = GCPeakDeconvolved(self, mass_spectra, apex_i, rt_list, tic_list) -248 -249 gc_peak.calc_area(tic_list, 1) +237 def add_gcpeak( +238 self, +239 new_apex_index, +240 start_rt, +241 final_rt, +242 peak_rt, +243 smoothed_tic, +244 datadict, +245 plot_res, +246 ): +247 if start_rt <= peak_rt[new_apex_index[1]] <= final_rt: +248 rt_list = peak_rt[new_apex_index[0] : new_apex_index[2]] +249 tic_list = smoothed_tic[new_apex_index[0] : new_apex_index[2]] 250 -251 self.gcpeaks.append(gc_peak) -252 -253 if plot_res: -254 -255 plt.plot(gc_peak.rt_list, gc_peak.tic_list) -256 plt.plot(gc_peak.retention_time, gc_peak.tic, c='black', marker= '^', linewidth=0) +251 apex_rt = peak_rt[new_apex_index[1]] +252 apex_i = rt_list.index(apex_rt) +253 +254 """workaround for peak picking missing some local minimas""" +255 if apex_rt not in self.processed_appexes: +256 self.processed_appexes.append(apex_rt) 257 -258 def deconvolution(self, peaks_entity_data, plot_res): -259 -260 # plot_res = True -261 domain = self.retention_time -262 signal = self._processed_tic -263 max_height = self.chromatogram_settings.peak_height_max_percent -264 max_prominence = self.chromatogram_settings.peak_max_prominence_percent -265 min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints -266 signal_threshold = self.chromatogram_settings.peak_height_min_percent -267 max_rt_distance = self.chromatogram_settings.max_rt_distance -268 peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold +258 mass_spectra = ( +259 self.mass_spec_factory(rt, datadict.get(rt)) for rt in rt_list +260 ) +261 +262 gc_peak = GCPeakDeconvolved( +263 self, mass_spectra, apex_i, rt_list, tic_list +264 ) +265 +266 gc_peak.calc_area(tic_list, 1) +267 +268 self.gcpeaks.append(gc_peak) 269 -270 max_signal = max(signal) -271 correct_baseline = False -272 -273 include_indexes = sp.peak_picking_first_derivative(domain, signal, max_height, max_prominence, max_signal, min_peak_datapoints, -274 peak_derivative_threshold, signal_threshold=signal_threshold, -275 correct_baseline=correct_baseline, plot_res=False) -276 -277 ''' deconvolution window is defined by the TIC peak region''' -278 all_apexes_rt = np.array(list(peaks_entity_data.keys())) +270 if plot_res: +271 plt.plot(gc_peak.rt_list, gc_peak.tic_list) +272 plt.plot( +273 gc_peak.retention_time, +274 gc_peak.tic, +275 c="black", +276 marker="^", +277 linewidth=0, +278 ) 279 -280 '''workaround for peak picking missing some local minimas''' -281 self.processed_appexes = [] -282 -283 for indexes_tuple in include_indexes: -284 -285 start_rt = self.retention_time[indexes_tuple[0]] -286 # apex_rt = self.retention_time[indexes_tuple[1]] -287 final_rt = self.retention_time[indexes_tuple[2]] -288 -289 ''' find all features within TIC peak window''' -290 peak_features_indexes = np.where((all_apexes_rt > start_rt) & (all_apexes_rt < final_rt))[0] -291 peak_features_rts = all_apexes_rt[peak_features_indexes] -292 -293 # print(start_rt, apex_rt, final_rt ) -294 -295 filtered_features_rt = [] -296 filtered_features_abundance = [] -297 -298 for each_apex_rt in peak_features_rts: -299 -300 apex_data = peaks_entity_data.get(each_apex_rt).get(each_apex_rt) -301 -302 peak_features_tic = sum(peaks_entity_data.get(each_apex_rt).get(each_apex_rt).get('abundance')) -303 -304 norm_smooth_tic = (peak_features_tic / max_signal) * 100 -305 -306 ''' TODO: -307 Improve Peak Filtering -308 -309 Calculate peaks sharpness here and filter it out (Amax - An /n)? -310 Peak Fit and Calculate Peak Gaussian Similarity? -311 Currentely using flat % tic relative abundance threshold and min 3 m/z per mass spectrum -312 ''' -313 if norm_smooth_tic > signal_threshold and len(apex_data['mz']) > 1: -314 -315 # print(len(apex_data['mz'])) -316 filtered_features_rt.append(each_apex_rt) -317 filtered_features_abundance.append(peak_features_tic) -318 -319 if len(filtered_features_rt) > 1: -320 ''' more than one peak feature identified inside a TIC peak ''' -321 # plt.plot(self.retention_time[indexes_tuple[0]:indexes_tuple[2]], signal[indexes_tuple[0]:indexes_tuple[2]], c='black') -322 -323 # print(filtered_features_rt) -324 grouped_rt = self.hc(filtered_features_rt, filtered_features_abundance, max_rt_distance=max_rt_distance) -325 # print(grouped_rt) -326 -327 for group, apex_rt_list in grouped_rt.items(): -328 ''' each group is a peak feature defined by the hierarchical clutter algorithm -329 -330 ''' -331 group_datadict = {} -332 group_datadict['ref_apex_rt'] = [] -333 -334 for each_group_apex_rt in apex_rt_list: -335 -336 datadict = peaks_entity_data.get(each_group_apex_rt) +280 def deconvolution(self, peaks_entity_data, plot_res): +281 # plot_res = True +282 domain = self.retention_time +283 signal = self._processed_tic +284 max_height = self.chromatogram_settings.peak_height_max_percent +285 max_prominence = self.chromatogram_settings.peak_max_prominence_percent +286 min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints +287 signal_threshold = self.chromatogram_settings.peak_height_min_percent +288 max_rt_distance = self.chromatogram_settings.max_rt_distance +289 peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold +290 +291 max_signal = max(signal) +292 correct_baseline = False +293 +294 include_indexes = sp.peak_picking_first_derivative( +295 domain, +296 signal, +297 max_height, +298 max_prominence, +299 max_signal, +300 min_peak_datapoints, +301 peak_derivative_threshold, +302 signal_threshold=signal_threshold, +303 correct_baseline=correct_baseline, +304 plot_res=False, +305 ) +306 +307 """ deconvolution window is defined by the TIC peak region""" +308 all_apexes_rt = np.array(list(peaks_entity_data.keys())) +309 +310 """workaround for peak picking missing some local minimas""" +311 self.processed_appexes = [] +312 +313 for indexes_tuple in include_indexes: +314 start_rt = self.retention_time[indexes_tuple[0]] +315 # apex_rt = self.retention_time[indexes_tuple[1]] +316 final_rt = self.retention_time[indexes_tuple[2]] +317 +318 """ find all features within TIC peak window""" +319 peak_features_indexes = np.where( +320 (all_apexes_rt > start_rt) & (all_apexes_rt < final_rt) +321 )[0] +322 peak_features_rts = all_apexes_rt[peak_features_indexes] +323 +324 # print(start_rt, apex_rt, final_rt ) +325 +326 filtered_features_rt = [] +327 filtered_features_abundance = [] +328 +329 for each_apex_rt in peak_features_rts: +330 apex_data = peaks_entity_data.get(each_apex_rt).get(each_apex_rt) +331 +332 peak_features_tic = sum( +333 peaks_entity_data.get(each_apex_rt) +334 .get(each_apex_rt) +335 .get("abundance") +336 ) 337 -338 for rt, each_datadict in datadict.items(): +338 norm_smooth_tic = (peak_features_tic / max_signal) * 100 339 -340 if rt == "ref_apex_rt": -341 -342 group_datadict['ref_apex_rt'].append(each_datadict) -343 -344 else: -345 -346 if rt in group_datadict.keys(): -347 -348 mz_list = each_datadict.get("mz") -349 abundance_list = each_datadict.get("abundance") -350 -351 each_mz_abun = dict(zip(mz_list, abundance_list)) -352 -353 for index_mz, mz in enumerate(group_datadict[rt].get("mz")): -354 if mz in each_mz_abun.keys(): +340 """ TODO: +341 Improve Peak Filtering +342 +343 Calculate peaks sharpness here and filter it out (Amax - An /n)? +344 Peak Fit and Calculate Peak Gaussian Similarity? +345 Currentely using flat % tic relative abundance threshold and min 3 m/z per mass spectrum +346 """ +347 if norm_smooth_tic > signal_threshold and len(apex_data["mz"]) > 1: +348 # print(len(apex_data['mz'])) +349 filtered_features_rt.append(each_apex_rt) +350 filtered_features_abundance.append(peak_features_tic) +351 +352 if len(filtered_features_rt) > 1: +353 """ more than one peak feature identified inside a TIC peak """ +354 # plt.plot(self.retention_time[indexes_tuple[0]:indexes_tuple[2]], signal[indexes_tuple[0]:indexes_tuple[2]], c='black') 355 -356 each_mz_abun[mz] = each_mz_abun[mz] + group_datadict[rt].get("abundance")[index_mz] -357 -358 else: -359 -360 each_mz_abun[mz] = group_datadict[rt].get("abundance")[index_mz] -361 -362 group_datadict[rt] = {'mz': list(each_mz_abun.keys()), -363 'abundance': list(each_mz_abun.values()), -364 'scan_number': each_datadict.get('scan_number')} -365 -366 else: -367 -368 group_datadict[rt] = each_datadict -369 -370 peak_rt = [] -371 peak_tic = [] -372 -373 # print(group_datadict.get('ref_apex_rt')) -374 for rt, each_datadict in group_datadict.items(): -375 if rt != "ref_apex_rt": -376 peak_rt.append(rt) -377 peak_tic.append(sum(each_datadict["abundance"])) -378 -379 peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) -380 -381 smoothed_tic = self.smooth_signal(peak_tic) +356 # print(filtered_features_rt) +357 grouped_rt = self.hc( +358 filtered_features_rt, +359 filtered_features_abundance, +360 max_rt_distance=max_rt_distance, +361 ) +362 # print(grouped_rt) +363 +364 for group, apex_rt_list in grouped_rt.items(): +365 """ each group is a peak feature defined by the hierarchical clutter algorithm +366 +367 """ +368 group_datadict = {} +369 group_datadict["ref_apex_rt"] = [] +370 +371 for each_group_apex_rt in apex_rt_list: +372 datadict = peaks_entity_data.get(each_group_apex_rt) +373 +374 for rt, each_datadict in datadict.items(): +375 if rt == "ref_apex_rt": +376 group_datadict["ref_apex_rt"].append(each_datadict) +377 +378 else: +379 if rt in group_datadict.keys(): +380 mz_list = each_datadict.get("mz") +381 abundance_list = each_datadict.get("abundance") 382 -383 include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic, max_height, max_prominence, max_signal, min_peak_datapoints, -384 peak_derivative_threshold, -385 signal_threshold=signal_threshold, correct_baseline=False, plot_res=False) -386 -387 include_indexes = list(include_indexes) -388 -389 if include_indexes: -390 -391 if len(include_indexes) > 1: -392 ''' after sum there are two apexes -393 check if it is inside the deconvolution window, otherwise ignores it -394 ''' +383 each_mz_abun = dict(zip(mz_list, abundance_list)) +384 +385 for index_mz, mz in enumerate( +386 group_datadict[rt].get("mz") +387 ): +388 if mz in each_mz_abun.keys(): +389 each_mz_abun[mz] = ( +390 each_mz_abun[mz] +391 + group_datadict[rt].get("abundance")[ +392 index_mz +393 ] +394 ) 395 -396 for new_apex_index in include_indexes: -397 # pass -398 self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res) -399 -400 else: -401 ''' after sum there is on apex -402 save it -403 ''' -404 new_apex_index = include_indexes[0] -405 # print(include_indexes, group, apex_rt_list) -406 self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res) -407 -408 elif len(filtered_features_rt) == 1: -409 ''' only one peak feature inside deconvolution window ''' -410 -411 each_apex_rt = filtered_features_rt[0] +396 else: +397 each_mz_abun[mz] = group_datadict[rt].get( +398 "abundance" +399 )[index_mz] +400 +401 group_datadict[rt] = { +402 "mz": list(each_mz_abun.keys()), +403 "abundance": list(each_mz_abun.values()), +404 "scan_number": each_datadict.get("scan_number"), +405 } +406 +407 else: +408 group_datadict[rt] = each_datadict +409 +410 peak_rt = [] +411 peak_tic = [] 412 -413 datadict = peaks_entity_data.get(each_apex_rt) -414 -415 peak_rt = [] -416 peak_tic = [] -417 -418 for rt, each_datadict in datadict.items(): -419 -420 if rt != "ref_apex_rt": -421 peak_rt.append(rt) -422 peak_tic.append(sum(each_datadict["abundance"])) -423 -424 peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) -425 -426 smoothed_tic = self.smooth_signal(peak_tic) -427 -428 include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic, max_height, max_prominence, max_signal, min_peak_datapoints, -429 peak_derivative_threshold, -430 signal_threshold=signal_threshold, correct_baseline=False, plot_res=False) -431 include_indexes = list(include_indexes) -432 -433 if include_indexes: -434 -435 ''' after sum there are two apexes -436 check if it is inside the deconvolution window, otherwise ignores it''' -437 if len(include_indexes) > 1: -438 -439 for new_apex_index in include_indexes: -440 # pass -441 self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res) -442 -443 else: -444 ''' after sum there is one apex -445 save it -446 includes_indexes = (start, apex, final )''' -447 -448 new_apex_index = include_indexes[0] -449 -450 self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res) -451 -452 else: -453 -454 # print('no data after filter') -455 pass -456 if plot_res: -457 plt.plot(self.retention_time, self._processed_tic, c='black') -458 plt.show() -459 -460 def quadratic_interpolation(self, rt_list, tic_list, apex_index): -461 -462 rt_list = np.array(rt_list) -463 tic_list = np.array(tic_list) -464 three_highest_i = [i for i in range(apex_index - 1, apex_index + 2)] -465 -466 z = np.poly1d(np.polyfit(rt_list[three_highest_i], tic_list[three_highest_i], 2)) -467 a = z[2] -468 b = z[1] -469 -470 corrected_apex_rt = -b / (2 * a) -471 initial_rt = rt_list[apex_index] -472 -473 return initial_rt - corrected_apex_rt +413 # print(group_datadict.get('ref_apex_rt')) +414 for rt, each_datadict in group_datadict.items(): +415 if rt != "ref_apex_rt": +416 peak_rt.append(rt) +417 peak_tic.append(sum(each_datadict["abundance"])) +418 +419 peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) +420 +421 smoothed_tic = self.smooth_signal(peak_tic) +422 +423 include_indexes = sp.peak_picking_first_derivative( +424 peak_rt, +425 smoothed_tic, +426 max_height, +427 max_prominence, +428 max_signal, +429 min_peak_datapoints, +430 peak_derivative_threshold, +431 signal_threshold=signal_threshold, +432 correct_baseline=False, +433 plot_res=False, +434 ) +435 +436 include_indexes = list(include_indexes) +437 +438 if include_indexes: +439 if len(include_indexes) > 1: +440 """ after sum there are two apexes +441 check if it is inside the deconvolution window, otherwise ignores it +442 """ +443 +444 for new_apex_index in include_indexes: +445 # pass +446 self.add_gcpeak( +447 new_apex_index, +448 start_rt, +449 final_rt, +450 peak_rt, +451 smoothed_tic, +452 group_datadict, +453 plot_res, +454 ) +455 +456 else: +457 """ after sum there is on apex +458 save it +459 """ +460 new_apex_index = include_indexes[0] +461 # print(include_indexes, group, apex_rt_list) +462 self.add_gcpeak( +463 new_apex_index, +464 start_rt, +465 final_rt, +466 peak_rt, +467 smoothed_tic, +468 group_datadict, +469 plot_res, +470 ) +471 +472 elif len(filtered_features_rt) == 1: +473 """ only one peak feature inside deconvolution window """ +474 +475 each_apex_rt = filtered_features_rt[0] +476 +477 datadict = peaks_entity_data.get(each_apex_rt) +478 +479 peak_rt = [] +480 peak_tic = [] +481 +482 for rt, each_datadict in datadict.items(): +483 if rt != "ref_apex_rt": +484 peak_rt.append(rt) +485 peak_tic.append(sum(each_datadict["abundance"])) +486 +487 peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic))) +488 +489 smoothed_tic = self.smooth_signal(peak_tic) +490 +491 include_indexes = sp.peak_picking_first_derivative( +492 peak_rt, +493 smoothed_tic, +494 max_height, +495 max_prominence, +496 max_signal, +497 min_peak_datapoints, +498 peak_derivative_threshold, +499 signal_threshold=signal_threshold, +500 correct_baseline=False, +501 plot_res=False, +502 ) +503 include_indexes = list(include_indexes) +504 +505 if include_indexes: +506 """ after sum there are two apexes +507 check if it is inside the deconvolution window, otherwise ignores it""" +508 if len(include_indexes) > 1: +509 for new_apex_index in include_indexes: +510 # pass +511 self.add_gcpeak( +512 new_apex_index, +513 start_rt, +514 final_rt, +515 peak_rt, +516 smoothed_tic, +517 datadict, +518 plot_res, +519 ) +520 +521 else: +522 """ after sum there is one apex +523 save it +524 includes_indexes = (start, apex, final )""" +525 +526 new_apex_index = include_indexes[0] +527 +528 self.add_gcpeak( +529 new_apex_index, +530 start_rt, +531 final_rt, +532 peak_rt, +533 smoothed_tic, +534 datadict, +535 plot_res, +536 ) +537 +538 else: +539 # print('no data after filter') +540 pass +541 if plot_res: +542 plt.plot(self.retention_time, self._processed_tic, c="black") +543 plt.show() +544 +545 def quadratic_interpolation(self, rt_list, tic_list, apex_index): +546 rt_list = np.array(rt_list) +547 tic_list = np.array(tic_list) +548 three_highest_i = [i for i in range(apex_index - 1, apex_index + 2)] +549 +550 z = np.poly1d( +551 np.polyfit(rt_list[three_highest_i], tic_list[three_highest_i], 2) +552 ) +553 a = z[2] +554 b = z[1] +555 +556 corrected_apex_rt = -b / (2 * a) +557 initial_rt = rt_list[apex_index] +558 +559 return initial_rt - corrected_apex_rt

    @@ -1049,14 +1224,13 @@

    -
    15    def run_deconvolution(self, plot_res=False):
    -16
    -17        eic_dict = self.ion_extracted_chroma(self._ms)
    -18
    -19        peaks_entity_data = self.find_peaks_entity(eic_dict)
    -20
    -21        ''' select model peaks, create Mass Spectrum objs, GCPeak objs, store results in GC_Class gcpeaks obj'''
    -22        self.deconvolution(peaks_entity_data, plot_res)
    +            
    13    def run_deconvolution(self, plot_res=False):
    +14        eic_dict = self.ion_extracted_chroma(self._ms)
    +15
    +16        peaks_entity_data = self.find_peaks_entity(eic_dict)
    +17
    +18        """ select model peaks, create Mass Spectrum objs, GCPeak objs, store results in GC_Class gcpeaks obj"""
    +19        self.deconvolution(peaks_entity_data, plot_res)
     
    @@ -1074,25 +1248,34 @@

    -
    24    def centroid_detector(self, tic, rt):
    -25        ''' this function has been replaced with sp.peak_picking_first_derivative
    -26            and it not used
    -27        '''
    -28        noise_std = self.chromatogram_settings.std_noise_threshold
    -29
    -30        method = self.chromatogram_settings.noise_threshold_method
    -31
    -32        ''' peak picking'''
    -33        min_height = self.chromatogram_settings.peak_height_min_percent
    -34        min_datapoints = self.chromatogram_settings.min_peak_datapoints
    -35
    -36        ''' baseline detection'''
    -37        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    -38        max_height = self.chromatogram_settings.peak_height_max_percent
    -39
    -40        peak_indexes_generator = sp.peak_detector_generator(tic, noise_std, method, rt, max_height, min_height, max_prominence, min_datapoints)
    -41
    -42        return peak_indexes_generator
    +            
    21    def centroid_detector(self, tic, rt):
    +22        """this function has been replaced with sp.peak_picking_first_derivative
    +23        and it not used
    +24        """
    +25        noise_std = self.chromatogram_settings.std_noise_threshold
    +26
    +27        method = self.chromatogram_settings.noise_threshold_method
    +28
    +29        """ peak picking"""
    +30        min_height = self.chromatogram_settings.peak_height_min_percent
    +31        min_datapoints = self.chromatogram_settings.min_peak_datapoints
    +32
    +33        """ baseline detection"""
    +34        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    +35        max_height = self.chromatogram_settings.peak_height_max_percent
    +36
    +37        peak_indexes_generator = sp.peak_detector_generator(
    +38            tic,
    +39            noise_std,
    +40            method,
    +41            rt,
    +42            max_height,
    +43            min_height,
    +44            max_prominence,
    +45            min_datapoints,
    +46        )
    +47
    +48        return peak_indexes_generator
     
    @@ -1113,28 +1296,23 @@

    -
    44    def ion_extracted_chroma(self, mass_spectra_obj):
    -45
    -46        eic_dict = {}
    -47
    -48        for scan_number, ms_obj in mass_spectra_obj.items():
    -49
    -50            mz_list = ms_obj.mz_exp
    -51            abundance_list = ms_obj.abundance
    -52            # add list of scan numbers
    -53            for index, mz in enumerate(mz_list):
    -54
    -55                # dict of mz and tuple (mass spectrum abundances index, and scan number)
    -56                if mz not in eic_dict.keys():
    -57
    -58                    eic_dict[mz] = [[abundance_list[index]], [ms_obj.retention_time] ]
    -59
    -60                else:
    +            
    50    def ion_extracted_chroma(self, mass_spectra_obj):
    +51        eic_dict = {}
    +52
    +53        for scan_number, ms_obj in mass_spectra_obj.items():
    +54            mz_list = ms_obj.mz_exp
    +55            abundance_list = ms_obj.abundance
    +56            # add list of scan numbers
    +57            for index, mz in enumerate(mz_list):
    +58                # dict of mz and tuple (mass spectrum abundances index, and scan number)
    +59                if mz not in eic_dict.keys():
    +60                    eic_dict[mz] = [[abundance_list[index]], [ms_obj.retention_time]]
     61
    -62                    eic_dict[mz][0].append(ms_obj.abundance[index])
    -63                    eic_dict[mz][1].append(ms_obj.retention_time)
    -64
    -65        return eic_dict
    +62                else:
    +63                    eic_dict[mz][0].append(ms_obj.abundance[index])
    +64                    eic_dict[mz][1].append(ms_obj.retention_time)
    +65
    +66        return eic_dict
     
    @@ -1152,8 +1330,7 @@

    -
     67    def hc(self, X, Y, max_rt_distance=0.025):
    - 68
    +            
     68    def hc(self, X, Y, max_rt_distance=0.025):
      69        from scipy.cluster.hierarchy import dendrogram, linkage
      70        from scipy.cluster.hierarchy import fcluster
      71        # from matplotlib import pyplot as plt
    @@ -1162,7 +1339,7 @@ 

    74 # Z = linkage(X, method = "ward") 75 76 max_d = max_rt_distance - 77 distance_clusters = fcluster(Z, max_d, criterion='distance') + 77 distance_clusters = fcluster(Z, max_d, criterion="distance") 78 # print("distance") 79 # print(distance_clusters) 80 @@ -1172,27 +1349,26 @@

    84 grouped_rt = {} 85 86 for index_obj, group in enumerate(distance_clusters): - 87 - 88 if group not in grouped_rt.keys(): - 89 grouped_rt[group] = [X[index_obj]] - 90 else: - 91 grouped_rt[group].append(X[index_obj]) - 92 - 93 # print(distance_clusters, grouped_rt) - 94 return grouped_rt - 95 - 96 # plt.figure(figsize=(10, 8)) - 97 # plt.scatter(X, Y, c=distance_clusters, cmap='prism') # plot points with cluster dependent colors - 98 # plt.show() - 99 # labelList = range(int(min(X)), int(max(X))) -100 -101 # plt.figure(figsize=(10, 7)) -102 # dendrogram(Z, -103 # orientation='top', -104 # distance_sort='descending', -105 # show_leaf_counts=True) -106 # plt.show() -107 # print(Z) + 87 if group not in grouped_rt.keys(): + 88 grouped_rt[group] = [X[index_obj]] + 89 else: + 90 grouped_rt[group].append(X[index_obj]) + 91 + 92 # print(distance_clusters, grouped_rt) + 93 return grouped_rt + 94 + 95 # plt.figure(figsize=(10, 8)) + 96 # plt.scatter(X, Y, c=distance_clusters, cmap='prism') # plot points with cluster dependent colors + 97 # plt.show() + 98 # labelList = range(int(min(X)), int(max(X))) + 99 +100 # plt.figure(figsize=(10, 7)) +101 # dendrogram(Z, +102 # orientation='top', +103 # distance_sort='descending', +104 # show_leaf_counts=True) +105 # plt.show() +106 # print(Z)

    @@ -1210,84 +1386,90 @@

    -
    109    def find_peaks_entity(self, eic_dict):
    -110
    -111        ''' combine eic with mathing rt apexes'''
    -112        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    +            
    108    def find_peaks_entity(self, eic_dict):
    +109        """combine eic with mathing rt apexes"""
    +110        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    +111
    +112        max_height = self.chromatogram_settings.peak_height_max_percent
     113
    -114        max_height = self.chromatogram_settings.peak_height_max_percent
    +114        signal_threshold = self.chromatogram_settings.eic_signal_threshold
     115
    -116        signal_threshold = self.chromatogram_settings.eic_signal_threshold
    +116        min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints
     117
    -118        min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints
    -119        
    -120        peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold
    -121
    -122        correct_baseline = False
    -123        peaks_entity_data = {}
    -124
    -125        max_eic = 0
    -126        for mz, eic_scan_index_rt in eic_dict.items():
    +118        peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold
    +119
    +120        correct_baseline = False
    +121        peaks_entity_data = {}
    +122
    +123        max_eic = 0
    +124        for mz, eic_scan_index_rt in eic_dict.items():
    +125            ind_max_eic = max(eic_scan_index_rt[0])
    +126            max_eic = ind_max_eic if ind_max_eic > max_eic else max_eic
     127
    -128            ind_max_eic = max(eic_scan_index_rt[0])
    -129            max_eic = ind_max_eic if ind_max_eic > max_eic else max_eic
    -130
    -131        for mz, eic_scan_index_rt in eic_dict.items():
    -132
    -133            eic = eic_scan_index_rt[0]
    -134            rt_list = eic_scan_index_rt[1]
    -135
    -136            if len(eic) >= min_peak_datapoints:
    -137
    -138                smooth_eic = self.smooth_tic(eic)
    -139
    -140                include_indexes = sp.peak_picking_first_derivative(rt_list, smooth_eic, max_height, max_prominence, max_eic, min_peak_datapoints,
    -141                                                                   peak_derivative_threshold, 
    -142                                                                   signal_threshold=signal_threshold, correct_baseline=correct_baseline)
    -143
    -144                for initial_scan, apex_scan, final_scan in include_indexes:
    -145
    -146                    rt_corrected_therm = self.quadratic_interpolation(rt_list, smooth_eic, apex_scan)
    -147
    -148                    ref_apex_rt = round(rt_list[apex_scan] + rt_corrected_therm, 4)
    -149
    -150                    apex_rt = rt_list[apex_scan]
    -151                    # apex_abundance = smooth_eic[apex_scan]
    -152
    -153                    # maximum_tic = apex_abundance if apex_abundance > maximum_tic else maximum_tic
    -154
    -155                    for scan_index in range(initial_scan, final_scan):
    +128        for mz, eic_scan_index_rt in eic_dict.items():
    +129            eic = eic_scan_index_rt[0]
    +130            rt_list = eic_scan_index_rt[1]
    +131
    +132            if len(eic) >= min_peak_datapoints:
    +133                smooth_eic = self.smooth_tic(eic)
    +134
    +135                include_indexes = sp.peak_picking_first_derivative(
    +136                    rt_list,
    +137                    smooth_eic,
    +138                    max_height,
    +139                    max_prominence,
    +140                    max_eic,
    +141                    min_peak_datapoints,
    +142                    peak_derivative_threshold,
    +143                    signal_threshold=signal_threshold,
    +144                    correct_baseline=correct_baseline,
    +145                )
    +146
    +147                for initial_scan, apex_scan, final_scan in include_indexes:
    +148                    rt_corrected_therm = self.quadratic_interpolation(
    +149                        rt_list, smooth_eic, apex_scan
    +150                    )
    +151
    +152                    ref_apex_rt = round(rt_list[apex_scan] + rt_corrected_therm, 4)
    +153
    +154                    apex_rt = rt_list[apex_scan]
    +155                    # apex_abundance = smooth_eic[apex_scan]
     156
    -157                        peak_rt = rt_list[scan_index]
    -158                        peak_abundance = smooth_eic[scan_index]
    -159
    -160                        if peak_abundance > 0:
    -161
    -162                            dict_data = {peak_rt: {'mz': [mz],
    -163                                                   'abundance': [peak_abundance],
    -164                                                   'scan_number': [scan_index]},
    -165                                                   'ref_apex_rt': ref_apex_rt
    -166                                                   }
    -167
    -168                            if apex_rt not in peaks_entity_data.keys():
    -169
    -170                                peaks_entity_data[apex_rt] = dict_data
    -171
    -172                            else:
    -173
    -174                                if peak_rt not in peaks_entity_data[apex_rt].keys():
    +157                    # maximum_tic = apex_abundance if apex_abundance > maximum_tic else maximum_tic
    +158
    +159                    for scan_index in range(initial_scan, final_scan):
    +160                        peak_rt = rt_list[scan_index]
    +161                        peak_abundance = smooth_eic[scan_index]
    +162
    +163                        if peak_abundance > 0:
    +164                            dict_data = {
    +165                                peak_rt: {
    +166                                    "mz": [mz],
    +167                                    "abundance": [peak_abundance],
    +168                                    "scan_number": [scan_index],
    +169                                },
    +170                                "ref_apex_rt": ref_apex_rt,
    +171                            }
    +172
    +173                            if apex_rt not in peaks_entity_data.keys():
    +174                                peaks_entity_data[apex_rt] = dict_data
     175
    -176                                    peaks_entity_data[apex_rt][peak_rt] = dict_data.get(peak_rt)
    -177
    -178                                else:
    -179
    -180                                    existing_data = peaks_entity_data[apex_rt].get(peak_rt)
    +176                            else:
    +177                                if peak_rt not in peaks_entity_data[apex_rt].keys():
    +178                                    peaks_entity_data[apex_rt][peak_rt] = dict_data.get(
    +179                                        peak_rt
    +180                                    )
     181
    -182                                    existing_data['mz'].append(mz)
    -183                                    existing_data['abundance'].append(peak_abundance)
    -184                                    existing_data['scan_number'].append(scan_index)
    -185
    -186        return peaks_entity_data
    +182                                else:
    +183                                    existing_data = peaks_entity_data[apex_rt].get(
    +184                                        peak_rt
    +185                                    )
    +186
    +187                                    existing_data["mz"].append(mz)
    +188                                    existing_data["abundance"].append(peak_abundance)
    +189                                    existing_data["scan_number"].append(scan_index)
    +190
    +191        return peaks_entity_data
     
    @@ -1307,35 +1489,36 @@

    -
    188    def mass_spec_factory(self, rt, datadict):
    -189
    -190        # tic = sum(datadict.get('abundance'))
    -191
    -192        scan_index = datadict['scan_number'][0]
    -193
    -194        mz_list, abundance_list = zip(*sorted(zip(datadict['mz'], datadict['abundance'])))
    +            
    193    def mass_spec_factory(self, rt, datadict):
    +194        # tic = sum(datadict.get('abundance'))
     195
    -196        data_dict = {Labels.mz: mz_list, Labels.abundance: abundance_list}
    +196        scan_index = datadict["scan_number"][0]
     197
    -198        d_params = default_parameters(self._ms[scan_index]._filename)
    -199
    -200        d_params["rt"] = rt
    +198        mz_list, abundance_list = zip(
    +199            *sorted(zip(datadict["mz"], datadict["abundance"]))
    +200        )
     201
    -202        d_params["scan_number"] = scan_index
    +202        data_dict = {Labels.mz: mz_list, Labels.abundance: abundance_list}
     203
    -204        d_params['label'] = Labels.gcms_centroid
    +204        d_params = default_parameters(self._ms[scan_index]._filename)
     205
    -206        d_params["polarity"] = self._ms[scan_index].polarity
    +206        d_params["rt"] = rt
     207
    -208        d_params['analyzer'] = self._ms[scan_index].analyzer
    +208        d_params["scan_number"] = scan_index
     209
    -210        d_params['instrument_label'] = self._ms[scan_index].instrument_label
    +210        d_params["label"] = Labels.gcms_centroid
     211
    -212        d_params["filename_path"] = self._ms[scan_index].instrument_label
    +212        d_params["polarity"] = self._ms[scan_index].polarity
     213
    -214        ms = MassSpecCentroidLowRes(data_dict, d_params)
    +214        d_params["analyzer"] = self._ms[scan_index].analyzer
     215
    -216        return ms
    +216        d_params["instrument_label"] = self._ms[scan_index].instrument_label
    +217
    +218        d_params["filename_path"] = self._ms[scan_index].instrument_label
    +219
    +220        ms = MassSpecCentroidLowRes(data_dict, d_params)
    +221
    +222        return ms
     
    @@ -1353,17 +1536,18 @@

    -
    218    def smooth_signal(self, signal):
    -219
    -220        implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method
    -221
    -222        pol_order = self.chromatogram_settings.savgol_pol_order
    -223
    -224        window_len = self.chromatogram_settings.smooth_window
    -225
    -226        window = self.chromatogram_settings.smooth_method
    -227
    -228        return sp.smooth_signal(signal, window_len, window, pol_order, implemented_smooth_method)
    +            
    224    def smooth_signal(self, signal):
    +225        implemented_smooth_method = self.chromatogram_settings.implemented_smooth_method
    +226
    +227        pol_order = self.chromatogram_settings.savgol_pol_order
    +228
    +229        window_len = self.chromatogram_settings.smooth_window
    +230
    +231        window = self.chromatogram_settings.smooth_method
    +232
    +233        return sp.smooth_signal(
    +234            signal, window_len, window, pol_order, implemented_smooth_method
    +235        )
     
    @@ -1381,33 +1565,48 @@

    -
    230    def add_gcpeak(self, new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res):
    -231
    -232        if start_rt <= peak_rt[new_apex_index[1]] <= final_rt:
    -233
    -234            rt_list = peak_rt[new_apex_index[0]:new_apex_index[2]]
    -235            tic_list = smoothed_tic[new_apex_index[0]:new_apex_index[2]]
    -236
    -237            apex_rt = peak_rt[new_apex_index[1]]
    -238            apex_i = rt_list.index(apex_rt)
    -239
    -240            '''workaround for peak picking missing some local minimas'''
    -241            if apex_rt not in self.processed_appexes:
    -242
    -243                self.processed_appexes.append(apex_rt)
    -244
    -245                mass_spectra = (self.mass_spec_factory(rt, datadict.get(rt)) for rt in rt_list)
    -246
    -247                gc_peak = GCPeakDeconvolved(self, mass_spectra, apex_i, rt_list, tic_list)
    -248
    -249                gc_peak.calc_area(tic_list, 1)
    +            
    237    def add_gcpeak(
    +238        self,
    +239        new_apex_index,
    +240        start_rt,
    +241        final_rt,
    +242        peak_rt,
    +243        smoothed_tic,
    +244        datadict,
    +245        plot_res,
    +246    ):
    +247        if start_rt <= peak_rt[new_apex_index[1]] <= final_rt:
    +248            rt_list = peak_rt[new_apex_index[0] : new_apex_index[2]]
    +249            tic_list = smoothed_tic[new_apex_index[0] : new_apex_index[2]]
     250
    -251                self.gcpeaks.append(gc_peak)
    -252
    -253                if plot_res:
    -254
    -255                    plt.plot(gc_peak.rt_list, gc_peak.tic_list)
    -256                    plt.plot(gc_peak.retention_time, gc_peak.tic, c='black', marker= '^', linewidth=0)
    +251            apex_rt = peak_rt[new_apex_index[1]]
    +252            apex_i = rt_list.index(apex_rt)
    +253
    +254            """workaround for peak picking missing some local minimas"""
    +255            if apex_rt not in self.processed_appexes:
    +256                self.processed_appexes.append(apex_rt)
    +257
    +258                mass_spectra = (
    +259                    self.mass_spec_factory(rt, datadict.get(rt)) for rt in rt_list
    +260                )
    +261
    +262                gc_peak = GCPeakDeconvolved(
    +263                    self, mass_spectra, apex_i, rt_list, tic_list
    +264                )
    +265
    +266                gc_peak.calc_area(tic_list, 1)
    +267
    +268                self.gcpeaks.append(gc_peak)
    +269
    +270                if plot_res:
    +271                    plt.plot(gc_peak.rt_list, gc_peak.tic_list)
    +272                    plt.plot(
    +273                        gc_peak.retention_time,
    +274                        gc_peak.tic,
    +275                        c="black",
    +276                        marker="^",
    +277                        linewidth=0,
    +278                    )
     
    @@ -1425,207 +1624,270 @@

    -
    258    def deconvolution(self, peaks_entity_data, plot_res):
    -259
    -260        # plot_res = True 
    -261        domain = self.retention_time
    -262        signal = self._processed_tic
    -263        max_height = self.chromatogram_settings.peak_height_max_percent
    -264        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    -265        min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints
    -266        signal_threshold = self.chromatogram_settings.peak_height_min_percent
    -267        max_rt_distance = self.chromatogram_settings.max_rt_distance
    -268        peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold
    -269
    -270        max_signal = max(signal)
    -271        correct_baseline = False
    -272
    -273        include_indexes = sp.peak_picking_first_derivative(domain, signal, max_height, max_prominence, max_signal, min_peak_datapoints,
    -274                                                           peak_derivative_threshold, signal_threshold=signal_threshold, 
    -275                                                            correct_baseline=correct_baseline, plot_res=False)
    -276
    -277        ''' deconvolution window is defined by the TIC peak region'''
    -278        all_apexes_rt = np.array(list(peaks_entity_data.keys()))
    -279
    -280        '''workaround for peak picking missing some local minimas'''
    -281        self.processed_appexes = []
    -282
    -283        for indexes_tuple in include_indexes:
    -284
    -285            start_rt = self.retention_time[indexes_tuple[0]]
    -286            # apex_rt = self.retention_time[indexes_tuple[1]]
    -287            final_rt = self.retention_time[indexes_tuple[2]]
    -288
    -289            ''' find all features within TIC peak window'''
    -290            peak_features_indexes = np.where((all_apexes_rt > start_rt) & (all_apexes_rt < final_rt))[0]
    -291            peak_features_rts = all_apexes_rt[peak_features_indexes]
    -292
    -293            # print(start_rt, apex_rt, final_rt )
    -294
    -295            filtered_features_rt = []
    -296            filtered_features_abundance = []
    -297
    -298            for each_apex_rt in peak_features_rts:
    -299
    -300                apex_data = peaks_entity_data.get(each_apex_rt).get(each_apex_rt)
    -301
    -302                peak_features_tic = sum(peaks_entity_data.get(each_apex_rt).get(each_apex_rt).get('abundance'))
    -303
    -304                norm_smooth_tic = (peak_features_tic / max_signal) * 100
    -305
    -306                ''' TODO: 
    -307                    Improve Peak Filtering
    -308
    -309                    Calculate peaks sharpness here and filter it out (Amax - An /n)?
    -310                    Peak Fit and Calculate Peak Gaussian Similarity?
    -311                    Currentely using flat % tic relative abundance threshold and min 3 m/z per mass spectrum
    -312                '''
    -313                if norm_smooth_tic > signal_threshold and len(apex_data['mz']) > 1:
    -314
    -315                    # print(len(apex_data['mz']))
    -316                    filtered_features_rt.append(each_apex_rt)
    -317                    filtered_features_abundance.append(peak_features_tic)
    -318
    -319            if len(filtered_features_rt) > 1:
    -320                ''' more than one peak feature identified inside a TIC peak  '''
    -321                # plt.plot(self.retention_time[indexes_tuple[0]:indexes_tuple[2]], signal[indexes_tuple[0]:indexes_tuple[2]], c='black')
    -322
    -323                # print(filtered_features_rt)
    -324                grouped_rt = self.hc(filtered_features_rt, filtered_features_abundance, max_rt_distance=max_rt_distance)
    -325                # print(grouped_rt)
    -326
    -327                for group, apex_rt_list in grouped_rt.items():
    -328                    ''' each group is a peak feature defined by the hierarchical clutter algorithm
    -329
    -330                    '''
    -331                    group_datadict = {}
    -332                    group_datadict['ref_apex_rt'] = []
    -333
    -334                    for each_group_apex_rt in apex_rt_list:
    -335
    -336                        datadict = peaks_entity_data.get(each_group_apex_rt)
    +            
    280    def deconvolution(self, peaks_entity_data, plot_res):
    +281        # plot_res = True
    +282        domain = self.retention_time
    +283        signal = self._processed_tic
    +284        max_height = self.chromatogram_settings.peak_height_max_percent
    +285        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    +286        min_peak_datapoints = self.chromatogram_settings.min_peak_datapoints
    +287        signal_threshold = self.chromatogram_settings.peak_height_min_percent
    +288        max_rt_distance = self.chromatogram_settings.max_rt_distance
    +289        peak_derivative_threshold = self.chromatogram_settings.peak_derivative_threshold
    +290
    +291        max_signal = max(signal)
    +292        correct_baseline = False
    +293
    +294        include_indexes = sp.peak_picking_first_derivative(
    +295            domain,
    +296            signal,
    +297            max_height,
    +298            max_prominence,
    +299            max_signal,
    +300            min_peak_datapoints,
    +301            peak_derivative_threshold,
    +302            signal_threshold=signal_threshold,
    +303            correct_baseline=correct_baseline,
    +304            plot_res=False,
    +305        )
    +306
    +307        """ deconvolution window is defined by the TIC peak region"""
    +308        all_apexes_rt = np.array(list(peaks_entity_data.keys()))
    +309
    +310        """workaround for peak picking missing some local minimas"""
    +311        self.processed_appexes = []
    +312
    +313        for indexes_tuple in include_indexes:
    +314            start_rt = self.retention_time[indexes_tuple[0]]
    +315            # apex_rt = self.retention_time[indexes_tuple[1]]
    +316            final_rt = self.retention_time[indexes_tuple[2]]
    +317
    +318            """ find all features within TIC peak window"""
    +319            peak_features_indexes = np.where(
    +320                (all_apexes_rt > start_rt) & (all_apexes_rt < final_rt)
    +321            )[0]
    +322            peak_features_rts = all_apexes_rt[peak_features_indexes]
    +323
    +324            # print(start_rt, apex_rt, final_rt )
    +325
    +326            filtered_features_rt = []
    +327            filtered_features_abundance = []
    +328
    +329            for each_apex_rt in peak_features_rts:
    +330                apex_data = peaks_entity_data.get(each_apex_rt).get(each_apex_rt)
    +331
    +332                peak_features_tic = sum(
    +333                    peaks_entity_data.get(each_apex_rt)
    +334                    .get(each_apex_rt)
    +335                    .get("abundance")
    +336                )
     337
    -338                        for rt, each_datadict in datadict.items():
    +338                norm_smooth_tic = (peak_features_tic / max_signal) * 100
     339
    -340                            if rt == "ref_apex_rt":
    -341
    -342                                group_datadict['ref_apex_rt'].append(each_datadict)
    -343
    -344                            else:
    -345
    -346                                if rt in group_datadict.keys():
    -347
    -348                                    mz_list = each_datadict.get("mz")
    -349                                    abundance_list = each_datadict.get("abundance")
    -350
    -351                                    each_mz_abun = dict(zip(mz_list, abundance_list)) 
    -352
    -353                                    for index_mz, mz in enumerate(group_datadict[rt].get("mz")):
    -354                                        if mz in each_mz_abun.keys():
    +340                """ TODO: 
    +341                    Improve Peak Filtering
    +342
    +343                    Calculate peaks sharpness here and filter it out (Amax - An /n)?
    +344                    Peak Fit and Calculate Peak Gaussian Similarity?
    +345                    Currentely using flat % tic relative abundance threshold and min 3 m/z per mass spectrum
    +346                """
    +347                if norm_smooth_tic > signal_threshold and len(apex_data["mz"]) > 1:
    +348                    # print(len(apex_data['mz']))
    +349                    filtered_features_rt.append(each_apex_rt)
    +350                    filtered_features_abundance.append(peak_features_tic)
    +351
    +352            if len(filtered_features_rt) > 1:
    +353                """ more than one peak feature identified inside a TIC peak  """
    +354                # plt.plot(self.retention_time[indexes_tuple[0]:indexes_tuple[2]], signal[indexes_tuple[0]:indexes_tuple[2]], c='black')
     355
    -356                                            each_mz_abun[mz] = each_mz_abun[mz] + group_datadict[rt].get("abundance")[index_mz]
    -357
    -358                                        else:
    -359
    -360                                            each_mz_abun[mz] = group_datadict[rt].get("abundance")[index_mz]
    -361
    -362                                    group_datadict[rt] = {'mz': list(each_mz_abun.keys()),
    -363                                                          'abundance': list(each_mz_abun.values()),
    -364                                                          'scan_number': each_datadict.get('scan_number')}
    -365
    -366                                else:
    -367
    -368                                    group_datadict[rt] = each_datadict
    -369
    -370                    peak_rt = []
    -371                    peak_tic = []
    -372
    -373                    # print(group_datadict.get('ref_apex_rt'))
    -374                    for rt, each_datadict in group_datadict.items():
    -375                        if rt != "ref_apex_rt":
    -376                            peak_rt.append(rt)
    -377                            peak_tic.append(sum(each_datadict["abundance"]))
    -378
    -379                    peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic)))
    -380
    -381                    smoothed_tic = self.smooth_signal(peak_tic)
    +356                # print(filtered_features_rt)
    +357                grouped_rt = self.hc(
    +358                    filtered_features_rt,
    +359                    filtered_features_abundance,
    +360                    max_rt_distance=max_rt_distance,
    +361                )
    +362                # print(grouped_rt)
    +363
    +364                for group, apex_rt_list in grouped_rt.items():
    +365                    """ each group is a peak feature defined by the hierarchical clutter algorithm
    +366
    +367                    """
    +368                    group_datadict = {}
    +369                    group_datadict["ref_apex_rt"] = []
    +370
    +371                    for each_group_apex_rt in apex_rt_list:
    +372                        datadict = peaks_entity_data.get(each_group_apex_rt)
    +373
    +374                        for rt, each_datadict in datadict.items():
    +375                            if rt == "ref_apex_rt":
    +376                                group_datadict["ref_apex_rt"].append(each_datadict)
    +377
    +378                            else:
    +379                                if rt in group_datadict.keys():
    +380                                    mz_list = each_datadict.get("mz")
    +381                                    abundance_list = each_datadict.get("abundance")
     382
    -383                    include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic, max_height, max_prominence, max_signal, min_peak_datapoints,
    -384                                                                       peak_derivative_threshold,
    -385                                                                       signal_threshold=signal_threshold, correct_baseline=False, plot_res=False)
    -386
    -387                    include_indexes = list(include_indexes)
    -388
    -389                    if include_indexes:
    -390
    -391                        if len(include_indexes) > 1:
    -392                            ''' after sum there are two apexes
    -393                                check if it is inside the deconvolution window, otherwise ignores it
    -394                            '''
    +383                                    each_mz_abun = dict(zip(mz_list, abundance_list))
    +384
    +385                                    for index_mz, mz in enumerate(
    +386                                        group_datadict[rt].get("mz")
    +387                                    ):
    +388                                        if mz in each_mz_abun.keys():
    +389                                            each_mz_abun[mz] = (
    +390                                                each_mz_abun[mz]
    +391                                                + group_datadict[rt].get("abundance")[
    +392                                                    index_mz
    +393                                                ]
    +394                                            )
     395
    -396                            for new_apex_index in include_indexes:
    -397                                # pass
    -398                                self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res)
    -399
    -400                        else:
    -401                            ''' after sum there is on apex
    -402                                save it
    -403                            ''' 
    -404                            new_apex_index = include_indexes[0]
    -405                            # print(include_indexes, group, apex_rt_list)
    -406                            self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, group_datadict, plot_res)
    -407
    -408            elif len(filtered_features_rt) == 1:
    -409                ''' only one peak feature inside deconvolution window '''
    -410
    -411                each_apex_rt = filtered_features_rt[0]
    +396                                        else:
    +397                                            each_mz_abun[mz] = group_datadict[rt].get(
    +398                                                "abundance"
    +399                                            )[index_mz]
    +400
    +401                                    group_datadict[rt] = {
    +402                                        "mz": list(each_mz_abun.keys()),
    +403                                        "abundance": list(each_mz_abun.values()),
    +404                                        "scan_number": each_datadict.get("scan_number"),
    +405                                    }
    +406
    +407                                else:
    +408                                    group_datadict[rt] = each_datadict
    +409
    +410                    peak_rt = []
    +411                    peak_tic = []
     412
    -413                datadict = peaks_entity_data.get(each_apex_rt)
    -414
    -415                peak_rt = []
    -416                peak_tic = []
    -417
    -418                for rt, each_datadict in datadict.items():
    -419
    -420                    if rt != "ref_apex_rt":
    -421                        peak_rt.append(rt)
    -422                        peak_tic.append(sum(each_datadict["abundance"]))
    -423
    -424                peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic)))
    -425
    -426                smoothed_tic = self.smooth_signal(peak_tic)
    -427
    -428                include_indexes = sp.peak_picking_first_derivative(peak_rt, smoothed_tic, max_height, max_prominence, max_signal, min_peak_datapoints,
    -429                                                                   peak_derivative_threshold,
    -430                                                                   signal_threshold=signal_threshold, correct_baseline=False, plot_res=False)
    -431                include_indexes = list(include_indexes)
    -432
    -433                if include_indexes:
    -434
    -435                        ''' after sum there are two apexes
    -436                            check if it is inside the deconvolution window, otherwise ignores it'''
    -437                        if len(include_indexes) > 1:
    -438
    -439                            for new_apex_index in include_indexes:
    -440                                # pass
    -441                                self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res)
    -442
    -443                        else:
    -444                            ''' after sum there is one apex
    -445                            save it
    -446                            includes_indexes = (start, apex, final )'''
    -447
    -448                            new_apex_index = include_indexes[0]
    -449
    -450                            self.add_gcpeak(new_apex_index, start_rt, final_rt, peak_rt, smoothed_tic, datadict, plot_res)
    -451
    -452            else:
    -453
    -454                # print('no data after filter')
    -455                pass
    -456        if plot_res:
    -457            plt.plot(self.retention_time, self._processed_tic, c='black')
    -458            plt.show()
    +413                    # print(group_datadict.get('ref_apex_rt'))
    +414                    for rt, each_datadict in group_datadict.items():
    +415                        if rt != "ref_apex_rt":
    +416                            peak_rt.append(rt)
    +417                            peak_tic.append(sum(each_datadict["abundance"]))
    +418
    +419                    peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic)))
    +420
    +421                    smoothed_tic = self.smooth_signal(peak_tic)
    +422
    +423                    include_indexes = sp.peak_picking_first_derivative(
    +424                        peak_rt,
    +425                        smoothed_tic,
    +426                        max_height,
    +427                        max_prominence,
    +428                        max_signal,
    +429                        min_peak_datapoints,
    +430                        peak_derivative_threshold,
    +431                        signal_threshold=signal_threshold,
    +432                        correct_baseline=False,
    +433                        plot_res=False,
    +434                    )
    +435
    +436                    include_indexes = list(include_indexes)
    +437
    +438                    if include_indexes:
    +439                        if len(include_indexes) > 1:
    +440                            """ after sum there are two apexes
    +441                                check if it is inside the deconvolution window, otherwise ignores it
    +442                            """
    +443
    +444                            for new_apex_index in include_indexes:
    +445                                # pass
    +446                                self.add_gcpeak(
    +447                                    new_apex_index,
    +448                                    start_rt,
    +449                                    final_rt,
    +450                                    peak_rt,
    +451                                    smoothed_tic,
    +452                                    group_datadict,
    +453                                    plot_res,
    +454                                )
    +455
    +456                        else:
    +457                            """ after sum there is on apex
    +458                                save it
    +459                            """
    +460                            new_apex_index = include_indexes[0]
    +461                            # print(include_indexes, group, apex_rt_list)
    +462                            self.add_gcpeak(
    +463                                new_apex_index,
    +464                                start_rt,
    +465                                final_rt,
    +466                                peak_rt,
    +467                                smoothed_tic,
    +468                                group_datadict,
    +469                                plot_res,
    +470                            )
    +471
    +472            elif len(filtered_features_rt) == 1:
    +473                """ only one peak feature inside deconvolution window """
    +474
    +475                each_apex_rt = filtered_features_rt[0]
    +476
    +477                datadict = peaks_entity_data.get(each_apex_rt)
    +478
    +479                peak_rt = []
    +480                peak_tic = []
    +481
    +482                for rt, each_datadict in datadict.items():
    +483                    if rt != "ref_apex_rt":
    +484                        peak_rt.append(rt)
    +485                        peak_tic.append(sum(each_datadict["abundance"]))
    +486
    +487                peak_rt, peak_tic = zip(*sorted(zip(peak_rt, peak_tic)))
    +488
    +489                smoothed_tic = self.smooth_signal(peak_tic)
    +490
    +491                include_indexes = sp.peak_picking_first_derivative(
    +492                    peak_rt,
    +493                    smoothed_tic,
    +494                    max_height,
    +495                    max_prominence,
    +496                    max_signal,
    +497                    min_peak_datapoints,
    +498                    peak_derivative_threshold,
    +499                    signal_threshold=signal_threshold,
    +500                    correct_baseline=False,
    +501                    plot_res=False,
    +502                )
    +503                include_indexes = list(include_indexes)
    +504
    +505                if include_indexes:
    +506                    """ after sum there are two apexes
    +507                            check if it is inside the deconvolution window, otherwise ignores it"""
    +508                    if len(include_indexes) > 1:
    +509                        for new_apex_index in include_indexes:
    +510                            # pass
    +511                            self.add_gcpeak(
    +512                                new_apex_index,
    +513                                start_rt,
    +514                                final_rt,
    +515                                peak_rt,
    +516                                smoothed_tic,
    +517                                datadict,
    +518                                plot_res,
    +519                            )
    +520
    +521                    else:
    +522                        """ after sum there is one apex
    +523                            save it
    +524                            includes_indexes = (start, apex, final )"""
    +525
    +526                        new_apex_index = include_indexes[0]
    +527
    +528                        self.add_gcpeak(
    +529                            new_apex_index,
    +530                            start_rt,
    +531                            final_rt,
    +532                            peak_rt,
    +533                            smoothed_tic,
    +534                            datadict,
    +535                            plot_res,
    +536                        )
    +537
    +538            else:
    +539                # print('no data after filter')
    +540                pass
    +541        if plot_res:
    +542            plt.plot(self.retention_time, self._processed_tic, c="black")
    +543            plt.show()
     
    @@ -1643,20 +1905,21 @@

    -
    460    def quadratic_interpolation(self, rt_list, tic_list, apex_index):
    -461
    -462        rt_list = np.array(rt_list)
    -463        tic_list = np.array(tic_list)
    -464        three_highest_i = [i for i in range(apex_index - 1, apex_index + 2)]
    -465
    -466        z = np.poly1d(np.polyfit(rt_list[three_highest_i], tic_list[three_highest_i], 2))
    -467        a = z[2]
    -468        b = z[1]
    -469
    -470        corrected_apex_rt = -b / (2 * a)
    -471        initial_rt = rt_list[apex_index]
    -472
    -473        return initial_rt - corrected_apex_rt
    +            
    545    def quadratic_interpolation(self, rt_list, tic_list, apex_index):
    +546        rt_list = np.array(rt_list)
    +547        tic_list = np.array(tic_list)
    +548        three_highest_i = [i for i in range(apex_index - 1, apex_index + 2)]
    +549
    +550        z = np.poly1d(
    +551            np.polyfit(rt_list[three_highest_i], tic_list[three_highest_i], 2)
    +552        )
    +553        a = z[2]
    +554        b = z[1]
    +555
    +556        corrected_apex_rt = -b / (2 * a)
    +557        initial_rt = rt_list[apex_index]
    +558
    +559        return initial_rt - corrected_apex_rt
     
    diff --git a/docs/corems/mass_spectra/calc/GC_RI_Calibration.html b/docs/corems/mass_spectra/calc/GC_RI_Calibration.html index b72d313c..4bc8f7d5 100644 --- a/docs/corems/mass_spectra/calc/GC_RI_Calibration.html +++ b/docs/corems/mass_spectra/calc/GC_RI_Calibration.html @@ -58,38 +58,46 @@

    2 3 4def get_rt_ri_pairs(gcms_ref_obj, sql_obj=None): - 5 - 6 lowResSearch = LowResMassSpectralMatch(gcms_ref_obj,sql_obj=sql_obj, calibration=True) - 7 - 8 lowResSearch.run() - 9 -10 dict_ri_rt = {} -11 -12 list_of_compound_obj = {} -13 -14 for gcms_peak in gcms_ref_obj: -15 + 5 lowResSearch = LowResMassSpectralMatch( + 6 gcms_ref_obj, sql_obj=sql_obj, calibration=True + 7 ) + 8 + 9 lowResSearch.run() +10 +11 dict_ri_rt = {} +12 +13 list_of_compound_obj = {} +14 +15 for gcms_peak in gcms_ref_obj: 16 # has a compound matched 17 if gcms_peak: -18 -19 compound_obj = gcms_peak.highest_ss_compound -20 -21 if not compound_obj.ri in dict_ri_rt.keys(): -22 -23 dict_ri_rt[compound_obj.ri] = [(gcms_peak.mass_spectrum.retention_time, compound_obj)] +18 compound_obj = gcms_peak.highest_ss_compound +19 +20 if not compound_obj.ri in dict_ri_rt.keys(): +21 dict_ri_rt[compound_obj.ri] = [ +22 (gcms_peak.mass_spectrum.retention_time, compound_obj) +23 ] 24 25 else: -26 -27 dict_ri_rt[compound_obj.ri].append((gcms_peak.mass_spectrum.retention_time, compound_obj)) -28 if gcms_ref_obj.parameters.gc_ms.verbose_processing: -29 print(compound_obj.name, gcms_peak.mass_spectrum.retention_time, compound_obj.spectral_similarity_score) -30 -31 ris = [i for i in dict_ri_rt.keys()] -32 rts = [max(i, key = lambda c: c[1].spectral_similarity_score)[0] for i in dict_ri_rt.values()] -33 -34 rt_ri_pairs = list(zip(rts, ris)) -35 -36 return rt_ri_pairs +26 dict_ri_rt[compound_obj.ri].append( +27 (gcms_peak.mass_spectrum.retention_time, compound_obj) +28 ) +29 if gcms_ref_obj.parameters.gc_ms.verbose_processing: +30 print( +31 compound_obj.name, +32 gcms_peak.mass_spectrum.retention_time, +33 compound_obj.spectral_similarity_score, +34 ) +35 +36 ris = [i for i in dict_ri_rt.keys()] +37 rts = [ +38 max(i, key=lambda c: c[1].spectral_similarity_score)[0] +39 for i in dict_ri_rt.values() +40 ] +41 +42 rt_ri_pairs = list(zip(rts, ris)) +43 +44 return rt_ri_pairs

    @@ -106,38 +114,46 @@

     5def get_rt_ri_pairs(gcms_ref_obj, sql_obj=None):
    - 6    
    - 7    lowResSearch = LowResMassSpectralMatch(gcms_ref_obj,sql_obj=sql_obj, calibration=True)
    - 8
    - 9    lowResSearch.run()
    -10
    -11    dict_ri_rt = {}
    -12
    -13    list_of_compound_obj = {}
    -14
    -15    for gcms_peak in gcms_ref_obj:
    -16
    + 6    lowResSearch = LowResMassSpectralMatch(
    + 7        gcms_ref_obj, sql_obj=sql_obj, calibration=True
    + 8    )
    + 9
    +10    lowResSearch.run()
    +11
    +12    dict_ri_rt = {}
    +13
    +14    list_of_compound_obj = {}
    +15
    +16    for gcms_peak in gcms_ref_obj:
     17        # has a compound matched
     18        if gcms_peak:
    -19            
    -20            compound_obj = gcms_peak.highest_ss_compound
    -21            
    -22            if not compound_obj.ri in dict_ri_rt.keys():
    -23                
    -24                dict_ri_rt[compound_obj.ri] = [(gcms_peak.mass_spectrum.retention_time, compound_obj)]
    +19            compound_obj = gcms_peak.highest_ss_compound
    +20
    +21            if not compound_obj.ri in dict_ri_rt.keys():
    +22                dict_ri_rt[compound_obj.ri] = [
    +23                    (gcms_peak.mass_spectrum.retention_time, compound_obj)
    +24                ]
     25
     26            else:
    -27                
    -28                dict_ri_rt[compound_obj.ri].append((gcms_peak.mass_spectrum.retention_time, compound_obj))
    -29            if gcms_ref_obj.parameters.gc_ms.verbose_processing:
    -30                print(compound_obj.name, gcms_peak.mass_spectrum.retention_time, compound_obj.spectral_similarity_score)
    -31    
    -32    ris = [i for i in  dict_ri_rt.keys()]
    -33    rts = [max(i, key = lambda c: c[1].spectral_similarity_score)[0] for i in dict_ri_rt.values()]
    -34    
    -35    rt_ri_pairs = list(zip(rts, ris)) 
    -36    
    -37    return rt_ri_pairs
    +27                dict_ri_rt[compound_obj.ri].append(
    +28                    (gcms_peak.mass_spectrum.retention_time, compound_obj)
    +29                )
    +30            if gcms_ref_obj.parameters.gc_ms.verbose_processing:
    +31                print(
    +32                    compound_obj.name,
    +33                    gcms_peak.mass_spectrum.retention_time,
    +34                    compound_obj.spectral_similarity_score,
    +35                )
    +36
    +37    ris = [i for i in dict_ri_rt.keys()]
    +38    rts = [
    +39        max(i, key=lambda c: c[1].spectral_similarity_score)[0]
    +40        for i in dict_ri_rt.values()
    +41    ]
    +42
    +43    rt_ri_pairs = list(zip(rts, ris))
    +44
    +45    return rt_ri_pairs
     
    diff --git a/docs/corems/mass_spectra/calc/LC_Calc.html b/docs/corems/mass_spectra/calc/LC_Calc.html index f4a2348a..854af957 100644 --- a/docs/corems/mass_spectra/calc/LC_Calc.html +++ b/docs/corems/mass_spectra/calc/LC_Calc.html @@ -350,7 +350,7 @@

    231 polarity : int, optional 232 The polarity of the mass spectra (1 or -1). If not set, the polarity will be determined from the dataset. Defaults to None. (fastest if set to -1 or 1) 233 ms_params : MSParameters, optional - 234 The mass spectrum parameters to use. If not set (None), the globally set parameters will be used. Defaults to None. + 234 The mass spectrum parameters to use. If not set (None), the globally set parameters will be used. Defaults to None. 235 236 Returns 237 ------- @@ -441,7 +441,7 @@

    322 if auto_process: 323 ms.process_mass_spec() 324 return ms - 325 + 325 326 def find_mass_features(self, ms_level=1, grid=True): 327 """Find mass features within an LCMSBase object 328 @@ -472,1074 +472,1073 @@

    353 if pp_method == "persistent homology": 354 msx_scan_df = self.scan_df[self.scan_df["ms_level"] == ms_level] 355 if all(msx_scan_df["ms_format"] == "profile"): - 356 self.find_mass_features_ph( - 357 ms_level=ms_level, grid=grid - 358 ) - 359 self.cluster_mass_features( - 360 drop_children=True, sort_by="persistence" - 361 ) - 362 else: - 363 raise ValueError( - 364 "MS{} scans are not profile mode, which is required for persistent homology peak picking.".format( - 365 ms_level - 366 ) - 367 ) - 368 else: - 369 raise ValueError("Peak picking method not implemented") - 370 - 371 def integrate_mass_features(self, drop_if_fail=True, drop_duplicates=True, ms_level=1): - 372 """Integrate mass features and extract EICs. + 356 self.find_mass_features_ph(ms_level=ms_level, grid=grid) + 357 self.cluster_mass_features(drop_children=True, sort_by="persistence") + 358 else: + 359 raise ValueError( + 360 "MS{} scans are not profile mode, which is required for persistent homology peak picking.".format( + 361 ms_level + 362 ) + 363 ) + 364 else: + 365 raise ValueError("Peak picking method not implemented") + 366 + 367 def integrate_mass_features( + 368 self, drop_if_fail=True, drop_duplicates=True, ms_level=1 + 369 ): + 370 """Integrate mass features and extract EICs. + 371 + 372 Populates the _eics attribute on the LCMSBase object for each unique mz in the mass_features dataframe and adds data (start_scan, final_scan, area) to the mass_features attribute. 373 - 374 Populates the _eics attribute on the LCMSBase object for each unique mz in the mass_features dataframe and adds data (start_scan, final_scan, area) to the mass_features attribute. - 375 - 376 Parameters - 377 ---------- - 378 drop_if_fail : bool, optional - 379 Whether to drop mass features if the EIC limit calculations fail. - 380 Default is True. - 381 drop_duplicates : bool, optional - 382 Whether to mass features that appear to be duplicates - 383 (i.e., mz is similar to another mass feature and limits of the EIC are similar or encapsulating). - 384 Default is True. - 385 ms_level : int, optional - 386 The MS level to use. Default is 1. - 387 - 388 Raises - 389 ------ - 390 ValueError - 391 If no mass features are found. - 392 If no MS level data is found for the given MS level (either in data or in the scan data) - 393 - 394 Returns - 395 ------- - 396 None, but populates the eics attribute on the LCMSBase object and adds data (start_scan, final_scan, area) to the mass_features attribute. - 397 - 398 Notes - 399 ----- - 400 drop_if_fail is useful for discarding mass features that do not have good shapes, usually due to a detection on a shoulder of a peak or a noisy region (especially if minimal smoothing is used during mass feature detection). - 401 """ - 402 # Check if there is data - 403 if ms_level in self._ms_unprocessed.keys(): - 404 raw_data = self._ms_unprocessed[ms_level].copy() - 405 else: - 406 raise ValueError("No MS level " + str(ms_level) + " data found") - 407 if self.mass_features is not None: - 408 mf_df = self.mass_features_to_df().copy() - 409 else: - 410 raise ValueError( - 411 "No mass features found, did you run find_mass_features() first?" - 412 ) - 413 # Check if mass_spectrum exists on each mass feature - 414 if not all( - 415 [mf.mass_spectrum is not None for mf in self.mass_features.values()] - 416 ): - 417 raise ValueError( - 418 "Mass spectrum must be associated with each mass feature, did you run add_associated_ms1() first?" - 419 ) - 420 - 421 # Subset scan data to only include correct ms_level - 422 scan_df_sub = self.scan_df[ - 423 self.scan_df["ms_level"] == int(ms_level) - 424 ].reset_index(drop=True) - 425 if scan_df_sub.empty: - 426 raise ValueError("No MS level " + ms_level + " data found in scan data") - 427 scan_df_sub = scan_df_sub[["scan", "scan_time"]].copy() - 428 - 429 mzs_to_extract = np.unique(mf_df["mz"].values) - 430 mzs_to_extract.sort() - 431 - 432 # Get EICs for each unique mz in mass features list - 433 for mz in mzs_to_extract: - 434 mz_max = mz + self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6 - 435 mz_min = mz - self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6 - 436 raw_data_sub = raw_data[ - 437 (raw_data["mz"] >= mz_min) & (raw_data["mz"] <= mz_max) - 438 ].reset_index(drop=True) - 439 raw_data_sub = ( - 440 raw_data_sub.groupby(["scan"])["intensity"].sum().reset_index() - 441 ) - 442 raw_data_sub = scan_df_sub.merge(raw_data_sub, on="scan", how="left") - 443 raw_data_sub["intensity"] = raw_data_sub["intensity"].fillna(0) - 444 myEIC = EIC_Data( - 445 scans=raw_data_sub["scan"].values, - 446 time=raw_data_sub["scan_time"].values, - 447 eic=raw_data_sub["intensity"].values, - 448 ) - 449 # Smooth EIC - 450 smoothed_eic = self.smooth_tic(myEIC.eic) - 451 smoothed_eic[smoothed_eic < 0] = 0 - 452 myEIC.eic_smoothed = smoothed_eic - 453 self.eics[mz] = myEIC - 454 - 455 # Get limits of mass features using EIC centroid detector and integrate - 456 mf_df["area"] = np.nan - 457 for idx, mass_feature in mf_df.iterrows(): - 458 mz = mass_feature.mz - 459 apex_scan = mass_feature.apex_scan - 460 - 461 # Pull EIC data and find apex scan index - 462 myEIC = self.eics[mz] - 463 self.mass_features[idx]._eic_data = myEIC - 464 apex_index = np.where(myEIC.scans == apex_scan)[0][0] - 465 - 466 # Find left and right limits of peak using EIC centroid detector, add to EICData - 467 centroid_eics = self.eic_centroid_detector( - 468 myEIC.time, - 469 myEIC.eic_smoothed, - 470 mass_feature.intensity * 1.1, - 471 apex_indexes=[int(apex_index)], - 472 ) - 473 l_a_r_scan_idx = [i for i in centroid_eics] - 474 if len(l_a_r_scan_idx) > 0: - 475 # Add start and final scan to mass_features and EICData - 476 left_scan, right_scan = ( - 477 myEIC.scans[l_a_r_scan_idx[0][0]], - 478 myEIC.scans[l_a_r_scan_idx[0][2]], - 479 ) - 480 mf_scan_apex = [(left_scan, int(apex_scan), right_scan)] - 481 myEIC.apexes = myEIC.apexes + mf_scan_apex - 482 self.mass_features[idx].start_scan = left_scan - 483 self.mass_features[idx].final_scan = right_scan - 484 - 485 # Find area under peak using limits from EIC centroid detector, add to mass_features and EICData - 486 area = np.trapz( - 487 myEIC.eic_smoothed[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1], - 488 myEIC.time[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1], - 489 ) - 490 mf_df.at[idx, "area"] = area - 491 myEIC.areas = myEIC.areas + [area] - 492 self.eics[mz] = myEIC - 493 self.mass_features[idx]._area = area - 494 else: - 495 if drop_if_fail is True: - 496 self.mass_features.pop(idx) - 497 - 498 if drop_duplicates: - 499 # Prepare mass feature dataframe - 500 mf_df = self.mass_features_to_df().copy() - 501 - 502 # For each mass feature, find all mass features within the clustering tolerance ppm and drop if their start and end times are within another mass feature - 503 # Kepp the first mass fea - 504 for idx, mass_feature in mf_df.iterrows(): - 505 mz = mass_feature.mz - 506 apex_scan = mass_feature.apex_scan - 507 - 508 mf_df["mz_diff_ppm"] = np.abs(mf_df["mz"] - mz) / mz * 10**6 - 509 mf_df_sub = mf_df[mf_df["mz_diff_ppm"] < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel * 10**6].copy() - 510 - 511 # For all mass features within the clustering tolerance, check if the start and end times are within the start and end times of the mass feature - 512 for idx2, mass_feature2 in mf_df_sub.iterrows(): - 513 if idx2 != idx: - 514 if mass_feature2.start_scan >= mass_feature.start_scan and mass_feature2.final_scan <= mass_feature.final_scan: - 515 if idx2 in self.mass_features.keys(): - 516 self.mass_features.pop(idx2) - 517 - 518 - 519 - 520 def find_c13_mass_features(self): - 521 """Mark likely C13 isotopes and connect to monoisoitopic mass features. + 374 Parameters + 375 ---------- + 376 drop_if_fail : bool, optional + 377 Whether to drop mass features if the EIC limit calculations fail. + 378 Default is True. + 379 drop_duplicates : bool, optional + 380 Whether to mass features that appear to be duplicates + 381 (i.e., mz is similar to another mass feature and limits of the EIC are similar or encapsulating). + 382 Default is True. + 383 ms_level : int, optional + 384 The MS level to use. Default is 1. + 385 + 386 Raises + 387 ------ + 388 ValueError + 389 If no mass features are found. + 390 If no MS level data is found for the given MS level (either in data or in the scan data) + 391 + 392 Returns + 393 ------- + 394 None, but populates the eics attribute on the LCMSBase object and adds data (start_scan, final_scan, area) to the mass_features attribute. + 395 + 396 Notes + 397 ----- + 398 drop_if_fail is useful for discarding mass features that do not have good shapes, usually due to a detection on a shoulder of a peak or a noisy region (especially if minimal smoothing is used during mass feature detection). + 399 """ + 400 # Check if there is data + 401 if ms_level in self._ms_unprocessed.keys(): + 402 raw_data = self._ms_unprocessed[ms_level].copy() + 403 else: + 404 raise ValueError("No MS level " + str(ms_level) + " data found") + 405 if self.mass_features is not None: + 406 mf_df = self.mass_features_to_df().copy() + 407 else: + 408 raise ValueError( + 409 "No mass features found, did you run find_mass_features() first?" + 410 ) + 411 # Check if mass_spectrum exists on each mass feature + 412 if not all( + 413 [mf.mass_spectrum is not None for mf in self.mass_features.values()] + 414 ): + 415 raise ValueError( + 416 "Mass spectrum must be associated with each mass feature, did you run add_associated_ms1() first?" + 417 ) + 418 + 419 # Subset scan data to only include correct ms_level + 420 scan_df_sub = self.scan_df[ + 421 self.scan_df["ms_level"] == int(ms_level) + 422 ].reset_index(drop=True) + 423 if scan_df_sub.empty: + 424 raise ValueError("No MS level " + ms_level + " data found in scan data") + 425 scan_df_sub = scan_df_sub[["scan", "scan_time"]].copy() + 426 + 427 mzs_to_extract = np.unique(mf_df["mz"].values) + 428 mzs_to_extract.sort() + 429 + 430 # Get EICs for each unique mz in mass features list + 431 for mz in mzs_to_extract: + 432 mz_max = mz + self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6 + 433 mz_min = mz - self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6 + 434 raw_data_sub = raw_data[ + 435 (raw_data["mz"] >= mz_min) & (raw_data["mz"] <= mz_max) + 436 ].reset_index(drop=True) + 437 raw_data_sub = ( + 438 raw_data_sub.groupby(["scan"])["intensity"].sum().reset_index() + 439 ) + 440 raw_data_sub = scan_df_sub.merge(raw_data_sub, on="scan", how="left") + 441 raw_data_sub["intensity"] = raw_data_sub["intensity"].fillna(0) + 442 myEIC = EIC_Data( + 443 scans=raw_data_sub["scan"].values, + 444 time=raw_data_sub["scan_time"].values, + 445 eic=raw_data_sub["intensity"].values, + 446 ) + 447 # Smooth EIC + 448 smoothed_eic = self.smooth_tic(myEIC.eic) + 449 smoothed_eic[smoothed_eic < 0] = 0 + 450 myEIC.eic_smoothed = smoothed_eic + 451 self.eics[mz] = myEIC + 452 + 453 # Get limits of mass features using EIC centroid detector and integrate + 454 mf_df["area"] = np.nan + 455 for idx, mass_feature in mf_df.iterrows(): + 456 mz = mass_feature.mz + 457 apex_scan = mass_feature.apex_scan + 458 + 459 # Pull EIC data and find apex scan index + 460 myEIC = self.eics[mz] + 461 self.mass_features[idx]._eic_data = myEIC + 462 apex_index = np.where(myEIC.scans == apex_scan)[0][0] + 463 + 464 # Find left and right limits of peak using EIC centroid detector, add to EICData + 465 centroid_eics = self.eic_centroid_detector( + 466 myEIC.time, + 467 myEIC.eic_smoothed, + 468 mass_feature.intensity * 1.1, + 469 apex_indexes=[int(apex_index)], + 470 ) + 471 l_a_r_scan_idx = [i for i in centroid_eics] + 472 if len(l_a_r_scan_idx) > 0: + 473 # Add start and final scan to mass_features and EICData + 474 left_scan, right_scan = ( + 475 myEIC.scans[l_a_r_scan_idx[0][0]], + 476 myEIC.scans[l_a_r_scan_idx[0][2]], + 477 ) + 478 mf_scan_apex = [(left_scan, int(apex_scan), right_scan)] + 479 myEIC.apexes = myEIC.apexes + mf_scan_apex + 480 self.mass_features[idx].start_scan = left_scan + 481 self.mass_features[idx].final_scan = right_scan + 482 + 483 # Find area under peak using limits from EIC centroid detector, add to mass_features and EICData + 484 area = np.trapz( + 485 myEIC.eic_smoothed[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1], + 486 myEIC.time[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1], + 487 ) + 488 mf_df.at[idx, "area"] = area + 489 myEIC.areas = myEIC.areas + [area] + 490 self.eics[mz] = myEIC + 491 self.mass_features[idx]._area = area + 492 else: + 493 if drop_if_fail is True: + 494 self.mass_features.pop(idx) + 495 + 496 if drop_duplicates: + 497 # Prepare mass feature dataframe + 498 mf_df = self.mass_features_to_df().copy() + 499 + 500 # For each mass feature, find all mass features within the clustering tolerance ppm and drop if their start and end times are within another mass feature + 501 # Kepp the first mass fea + 502 for idx, mass_feature in mf_df.iterrows(): + 503 mz = mass_feature.mz + 504 apex_scan = mass_feature.apex_scan + 505 + 506 mf_df["mz_diff_ppm"] = np.abs(mf_df["mz"] - mz) / mz * 10**6 + 507 mf_df_sub = mf_df[ + 508 mf_df["mz_diff_ppm"] + 509 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel + 510 * 10**6 + 511 ].copy() + 512 + 513 # For all mass features within the clustering tolerance, check if the start and end times are within the start and end times of the mass feature + 514 for idx2, mass_feature2 in mf_df_sub.iterrows(): + 515 if idx2 != idx: + 516 if ( + 517 mass_feature2.start_scan >= mass_feature.start_scan + 518 and mass_feature2.final_scan <= mass_feature.final_scan + 519 ): + 520 if idx2 in self.mass_features.keys(): + 521 self.mass_features.pop(idx2) 522 - 523 Returns - 524 ------- - 525 None, but populates the monoisotopic_mf_id and isotopologue_type attributes to the indivual LCMSMassFeatures within the mass_features attribute of the LCMSBase object. - 526 - 527 Raises - 528 ------ - 529 ValueError - 530 If no mass features are found. - 531 """ - 532 verbose = self.parameters.lc_ms.verbose_processing - 533 if verbose: - 534 print("evaluating mass features for C13 isotopes") - 535 if self.mass_features is None: - 536 raise ValueError("No mass features found, run find_mass_features() first") - 537 - 538 # Data prep fo sparse distance matrix - 539 dims = ["mz", "scan_time"] - 540 mf_df = self.mass_features_to_df().copy() - 541 # Drop mass features that have no area (these are likely to be noise) - 542 mf_df = mf_df[mf_df["area"].notnull()] - 543 mf_df["mf_id"] = mf_df.index.values - 544 dims = ["mz", "scan_time"] - 545 - 546 # Sort my ascending mz so we always get the monoisotopic mass first, regardless of the order/intensity of the mass features - 547 mf_df = mf_df.sort_values(by=["mz"]).reset_index(drop=True).copy() + 523 def find_c13_mass_features(self): + 524 """Mark likely C13 isotopes and connect to monoisoitopic mass features. + 525 + 526 Returns + 527 ------- + 528 None, but populates the monoisotopic_mf_id and isotopologue_type attributes to the indivual LCMSMassFeatures within the mass_features attribute of the LCMSBase object. + 529 + 530 Raises + 531 ------ + 532 ValueError + 533 If no mass features are found. + 534 """ + 535 verbose = self.parameters.lc_ms.verbose_processing + 536 if verbose: + 537 print("evaluating mass features for C13 isotopes") + 538 if self.mass_features is None: + 539 raise ValueError("No mass features found, run find_mass_features() first") + 540 + 541 # Data prep fo sparse distance matrix + 542 dims = ["mz", "scan_time"] + 543 mf_df = self.mass_features_to_df().copy() + 544 # Drop mass features that have no area (these are likely to be noise) + 545 mf_df = mf_df[mf_df["area"].notnull()] + 546 mf_df["mf_id"] = mf_df.index.values + 547 dims = ["mz", "scan_time"] 548 - 549 mz_diff = 1.003355 # C13-C12 mass difference - 550 tol = [ - 551 mf_df["mz"].median() - 552 * self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel, - 553 self.parameters.lc_ms.mass_feature_cluster_rt_tolerance * 0.5, - 554 ] # mz, in relative; scan_time in minutes - 555 - 556 # Compute inter-feature distances - 557 distances = None - 558 for i in range(len(dims)): - 559 # Construct k-d tree - 560 values = mf_df[dims[i]].values - 561 tree = KDTree(values.reshape(-1, 1)) - 562 - 563 max_tol = tol[i] - 564 if dims[i] == "mz": - 565 # Maximum absolute tolerance - 566 max_tol = mz_diff + tol[i] - 567 - 568 # Compute sparse distance matrix - 569 # the larger the max_tol, the slower this operation is - 570 sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix") - 571 - 572 # Only consider forward case, exclude diagonal - 573 sdm = sparse.triu(sdm, k=1) + 549 # Sort my ascending mz so we always get the monoisotopic mass first, regardless of the order/intensity of the mass features + 550 mf_df = mf_df.sort_values(by=["mz"]).reset_index(drop=True).copy() + 551 + 552 mz_diff = 1.003355 # C13-C12 mass difference + 553 tol = [ + 554 mf_df["mz"].median() + 555 * self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel, + 556 self.parameters.lc_ms.mass_feature_cluster_rt_tolerance * 0.5, + 557 ] # mz, in relative; scan_time in minutes + 558 + 559 # Compute inter-feature distances + 560 distances = None + 561 for i in range(len(dims)): + 562 # Construct k-d tree + 563 values = mf_df[dims[i]].values + 564 tree = KDTree(values.reshape(-1, 1)) + 565 + 566 max_tol = tol[i] + 567 if dims[i] == "mz": + 568 # Maximum absolute tolerance + 569 max_tol = mz_diff + tol[i] + 570 + 571 # Compute sparse distance matrix + 572 # the larger the max_tol, the slower this operation is + 573 sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix") 574 - 575 if dims[i] == "mz": - 576 min_tol = mz_diff - tol[i] - 577 # Get only the ones that are above the min tol - 578 idx = sdm.data > min_tol - 579 - 580 # Reconstruct sparse distance matrix - 581 sdm = sparse.coo_matrix( - 582 (sdm.data[idx], (sdm.row[idx], sdm.col[idx])), - 583 shape=(len(values), len(values)), - 584 ) - 585 - 586 # Cast as binary matrix - 587 sdm.data = np.ones_like(sdm.data) + 575 # Only consider forward case, exclude diagonal + 576 sdm = sparse.triu(sdm, k=1) + 577 + 578 if dims[i] == "mz": + 579 min_tol = mz_diff - tol[i] + 580 # Get only the ones that are above the min tol + 581 idx = sdm.data > min_tol + 582 + 583 # Reconstruct sparse distance matrix + 584 sdm = sparse.coo_matrix( + 585 (sdm.data[idx], (sdm.row[idx], sdm.col[idx])), + 586 shape=(len(values), len(values)), + 587 ) 588 - 589 # Stack distances - 590 if distances is None: - 591 distances = sdm - 592 else: - 593 distances = distances.multiply(sdm) - 594 - 595 # Extract indices of within-tolerance points - 596 distances = distances.tocoo() - 597 pairs = np.stack((distances.row, distances.col), axis=1) # C12 to C13 pairs - 598 - 599 # Turn pairs (which are index of mf_df) into mf_id and then into two dataframes to join to mf_df - 600 pairs_mf = pairs.copy() - 601 pairs_mf[:, 0] = mf_df.iloc[pairs[:, 0]].mf_id.values - 602 pairs_mf[:, 1] = mf_df.iloc[pairs[:, 1]].mf_id.values - 603 - 604 # Connect monoisotopic masses with isotopologes within mass_features - 605 monos = np.setdiff1d(np.unique(pairs_mf[:, 0]), np.unique(pairs_mf[:, 1])) - 606 for mono in monos: - 607 self.mass_features[mono].monoisotopic_mf_id = mono - 608 pairs_iso_df = pd.DataFrame(pairs_mf, columns=["parent", "child"]) - 609 while not pairs_iso_df.empty: - 610 pairs_iso_df = pairs_iso_df.set_index("parent", drop=False) - 611 m1_isos = pairs_iso_df.loc[monos, "child"].unique() - 612 for iso in m1_isos: - 613 # Set monoisotopic_mf_id and isotopologue_type for isotopologues - 614 parent = pairs_mf[pairs_mf[:, 1] == iso, 0] - 615 if len(parent) > 1: - 616 # Choose the parent that is closest in time to the isotopologue - 617 parent_time = [self.mass_features[p].retention_time for p in parent] - 618 time_diff = [ - 619 np.abs(self.mass_features[iso].retention_time - x) - 620 for x in parent_time - 621 ] - 622 parent = parent[np.argmin(time_diff)] - 623 else: - 624 parent = parent[0] - 625 self.mass_features[iso].monoisotopic_mf_id = self.mass_features[ - 626 parent - 627 ].monoisotopic_mf_id - 628 if self.mass_features[iso].monoisotopic_mf_id is not None: - 629 mass_diff = ( - 630 self.mass_features[iso].mz - 631 - self.mass_features[ - 632 self.mass_features[iso].monoisotopic_mf_id - 633 ].mz - 634 ) - 635 self.mass_features[iso].isotopologue_type = "13C" + str( - 636 int(round(mass_diff, 0)) + 589 # Cast as binary matrix + 590 sdm.data = np.ones_like(sdm.data) + 591 + 592 # Stack distances + 593 if distances is None: + 594 distances = sdm + 595 else: + 596 distances = distances.multiply(sdm) + 597 + 598 # Extract indices of within-tolerance points + 599 distances = distances.tocoo() + 600 pairs = np.stack((distances.row, distances.col), axis=1) # C12 to C13 pairs + 601 + 602 # Turn pairs (which are index of mf_df) into mf_id and then into two dataframes to join to mf_df + 603 pairs_mf = pairs.copy() + 604 pairs_mf[:, 0] = mf_df.iloc[pairs[:, 0]].mf_id.values + 605 pairs_mf[:, 1] = mf_df.iloc[pairs[:, 1]].mf_id.values + 606 + 607 # Connect monoisotopic masses with isotopologes within mass_features + 608 monos = np.setdiff1d(np.unique(pairs_mf[:, 0]), np.unique(pairs_mf[:, 1])) + 609 for mono in monos: + 610 self.mass_features[mono].monoisotopic_mf_id = mono + 611 pairs_iso_df = pd.DataFrame(pairs_mf, columns=["parent", "child"]) + 612 while not pairs_iso_df.empty: + 613 pairs_iso_df = pairs_iso_df.set_index("parent", drop=False) + 614 m1_isos = pairs_iso_df.loc[monos, "child"].unique() + 615 for iso in m1_isos: + 616 # Set monoisotopic_mf_id and isotopologue_type for isotopologues + 617 parent = pairs_mf[pairs_mf[:, 1] == iso, 0] + 618 if len(parent) > 1: + 619 # Choose the parent that is closest in time to the isotopologue + 620 parent_time = [self.mass_features[p].retention_time for p in parent] + 621 time_diff = [ + 622 np.abs(self.mass_features[iso].retention_time - x) + 623 for x in parent_time + 624 ] + 625 parent = parent[np.argmin(time_diff)] + 626 else: + 627 parent = parent[0] + 628 self.mass_features[iso].monoisotopic_mf_id = self.mass_features[ + 629 parent + 630 ].monoisotopic_mf_id + 631 if self.mass_features[iso].monoisotopic_mf_id is not None: + 632 mass_diff = ( + 633 self.mass_features[iso].mz + 634 - self.mass_features[ + 635 self.mass_features[iso].monoisotopic_mf_id + 636 ].mz 637 ) - 638 - 639 # Drop the mono and iso from the pairs_iso_df - 640 pairs_iso_df = pairs_iso_df.drop( - 641 index=monos, errors="ignore" - 642 ) # Drop pairs where the parent is a child that is a child of a root - 643 pairs_iso_df = pairs_iso_df.set_index("child", drop=False) - 644 pairs_iso_df = pairs_iso_df.drop(index=m1_isos, errors="ignore") - 645 - 646 if not pairs_iso_df.empty: - 647 # Get new monos, recognizing that these are just 13C isotopologues that are connected to other 13C isotopologues to repeat the process - 648 monos = np.setdiff1d( - 649 np.unique(pairs_iso_df.parent), np.unique(pairs_iso_df.child) - 650 ) - 651 if verbose: - 652 # Report fraction of compounds annotated with isotopes - 653 mf_df["c13_flag"] = np.where( - 654 np.logical_or( - 655 np.isin(mf_df["mf_id"], pairs_mf[:, 0]), - 656 np.isin(mf_df["mf_id"], pairs_mf[:, 1]), - 657 ), - 658 1, - 659 0, - 660 ) - 661 print( - 662 str(round(len(mf_df[mf_df["c13_flag"] == 1]) / len(mf_df), ndigits=3)) - 663 + " of mass features have or are C13 isotopes" - 664 ) - 665 - 666 def deconvolute_ms1_mass_features(self): - 667 """Deconvolute MS1 mass features + 638 self.mass_features[iso].isotopologue_type = "13C" + str( + 639 int(round(mass_diff, 0)) + 640 ) + 641 + 642 # Drop the mono and iso from the pairs_iso_df + 643 pairs_iso_df = pairs_iso_df.drop( + 644 index=monos, errors="ignore" + 645 ) # Drop pairs where the parent is a child that is a child of a root + 646 pairs_iso_df = pairs_iso_df.set_index("child", drop=False) + 647 pairs_iso_df = pairs_iso_df.drop(index=m1_isos, errors="ignore") + 648 + 649 if not pairs_iso_df.empty: + 650 # Get new monos, recognizing that these are just 13C isotopologues that are connected to other 13C isotopologues to repeat the process + 651 monos = np.setdiff1d( + 652 np.unique(pairs_iso_df.parent), np.unique(pairs_iso_df.child) + 653 ) + 654 if verbose: + 655 # Report fraction of compounds annotated with isotopes + 656 mf_df["c13_flag"] = np.where( + 657 np.logical_or( + 658 np.isin(mf_df["mf_id"], pairs_mf[:, 0]), + 659 np.isin(mf_df["mf_id"], pairs_mf[:, 1]), + 660 ), + 661 1, + 662 0, + 663 ) + 664 print( + 665 str(round(len(mf_df[mf_df["c13_flag"] == 1]) / len(mf_df), ndigits=3)) + 666 + " of mass features have or are C13 isotopes" + 667 ) 668 - 669 Deconvolute mass features ms1 spectrum based on the correlation of all masses within a spectrum over the EIC of the mass features - 670 - 671 Parameters - 672 ---------- - 673 None - 674 - 675 Returns - 676 ------- - 677 None, but assigns the _ms_deconvoluted_idx, mass_spectrum_deconvoluted_parent, - 678 and associated_mass_features_deconvoluted attributes to the mass features in the - 679 mass_features attribute of the LCMSBase object. - 680 - 681 Raises - 682 ------ - 683 ValueError - 684 If no mass features are found, must run find_mass_features() first. - 685 If no EICs are found, did you run integrate_mass_features() first? - 686 - 687 """ - 688 # Checks for set mass_features and eics - 689 if self.mass_features is None: - 690 raise ValueError( - 691 "No mass features found, did you run find_mass_features() first?" - 692 ) - 693 - 694 if self.eics == {}: - 695 raise ValueError( - 696 "No EICs found, did you run integrate_mass_features() first?" - 697 ) - 698 - 699 if 1 not in self._ms_unprocessed.keys(): - 700 raise ValueError("No unprocessed MS1 spectra found.") + 669 def deconvolute_ms1_mass_features(self): + 670 """Deconvolute MS1 mass features + 671 + 672 Deconvolute mass features ms1 spectrum based on the correlation of all masses within a spectrum over the EIC of the mass features + 673 + 674 Parameters + 675 ---------- + 676 None + 677 + 678 Returns + 679 ------- + 680 None, but assigns the _ms_deconvoluted_idx, mass_spectrum_deconvoluted_parent, + 681 and associated_mass_features_deconvoluted attributes to the mass features in the + 682 mass_features attribute of the LCMSBase object. + 683 + 684 Raises + 685 ------ + 686 ValueError + 687 If no mass features are found, must run find_mass_features() first. + 688 If no EICs are found, did you run integrate_mass_features() first? + 689 + 690 """ + 691 # Checks for set mass_features and eics + 692 if self.mass_features is None: + 693 raise ValueError( + 694 "No mass features found, did you run find_mass_features() first?" + 695 ) + 696 + 697 if self.eics == {}: + 698 raise ValueError( + 699 "No EICs found, did you run integrate_mass_features() first?" + 700 ) 701 - 702 # Prep ms1 data - 703 ms1_data = self._ms_unprocessed[1].copy() - 704 ms1_data = ms1_data.set_index("scan") - 705 - 706 # Prep mass feature summary - 707 mass_feature_df = self.mass_features_to_df() + 702 if 1 not in self._ms_unprocessed.keys(): + 703 raise ValueError("No unprocessed MS1 spectra found.") + 704 + 705 # Prep ms1 data + 706 ms1_data = self._ms_unprocessed[1].copy() + 707 ms1_data = ms1_data.set_index("scan") 708 - 709 # Loop through each mass feature - 710 for mf_id, mass_feature in self.mass_features.items(): - 711 - 712 # Check that the mass_feature.mz attribute == the mz of the mass feature in the mass_feature_df - 713 if mass_feature.mz != mass_feature.ms1_peak.mz_exp: - 714 continue - 715 - 716 # Get the left and right limits of the EIC of the mass feature - 717 l_scan, _, r_scan = mass_feature._eic_data.apexes[0] - 718 - 719 # Pull from the _ms1_unprocessed data the scan range of interest and sort by mz - 720 ms1_data_sub = ms1_data.loc[l_scan:r_scan].copy() - 721 ms1_data_sub = ms1_data_sub.sort_values(by=["mz"]).reset_index(drop=False) - 722 - 723 # Get the centroided masses of the mass feature - 724 mf_mspeak_mzs = mass_feature.mass_spectrum.mz_exp - 725 - 726 # Find the closest mz in the ms1 data to the centroided masses of the mass feature - 727 ms1_data_sub["mass_feature_mz"] = mf_mspeak_mzs[ - 728 find_closest(mf_mspeak_mzs, ms1_data_sub.mz.values) - 729 ] - 730 - 731 # Drop rows with mz_diff > 0.01 between the mass feature mz and the ms1 data mz - 732 ms1_data_sub["mz_diff_rel"] = ( - 733 np.abs(ms1_data_sub["mass_feature_mz"] - ms1_data_sub["mz"]) - 734 / ms1_data_sub["mz"] - 735 ) - 736 ms1_data_sub = ms1_data_sub[ - 737 ms1_data_sub["mz_diff_rel"] - 738 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel - 739 ].reset_index(drop=True) - 740 - 741 # Group by mass_feature_mz and scan and sum intensity - 742 ms1_data_sub_group = ( - 743 ms1_data_sub.groupby(["mass_feature_mz", "scan"])["intensity"] - 744 .sum() - 745 .reset_index() - 746 ) - 747 - 748 # Calculate the correlation of the intensities of the mass feature and the ms1 data (set to 0 if no intensity) - 749 corr = ( - 750 ms1_data_sub_group.pivot( - 751 index="scan", columns="mass_feature_mz", values="intensity" - 752 ) - 753 .fillna(0) - 754 .corr() - 755 ) - 756 - 757 # Subset the correlation matrix to only include the masses of the mass feature and those with a correlation > 0.8 - 758 decon_corr_min = self.parameters.lc_ms.ms1_deconvolution_corr_min - 759 decon_corr_min = 0.9 - 760 corr_subset = corr.loc[mass_feature.mz,] - 761 corr_subset = corr_subset[corr_subset > decon_corr_min] - 762 - 763 # Get the masses from the mass spectrum that are the result of the deconvolution - 764 mzs_decon = corr_subset.index.values - 765 - 766 # Get the indices of the mzs_decon in mass_feature.mass_spectrum.mz_exp and assign to the mass feature - 767 mzs_decon_idx = [ - 768 id - 769 for id, mz in enumerate(mass_feature.mass_spectrum.mz_exp) - 770 if mz in mzs_decon - 771 ] - 772 mass_feature._ms_deconvoluted_idx = mzs_decon_idx - 773 - 774 # Check if the mass feature's ms1 peak is the largest in the deconvoluted mass spectrum - 775 if ( - 776 mass_feature.ms1_peak.abundance - 777 == mass_feature.mass_spectrum.abundance[mzs_decon_idx].max() - 778 ): - 779 mass_feature.mass_spectrum_deconvoluted_parent = True - 780 else: - 781 mass_feature.mass_spectrum_deconvoluted_parent = False - 782 - 783 # Check for other mass features that are in the deconvoluted mass spectrum and add the deconvoluted mass spectrum to the mass feature - 784 # Subset mass_feature_df to only include mass features that are within the clustering tolerance - 785 mass_feature_df_sub = mass_feature_df[ - 786 abs(mass_feature.retention_time - mass_feature_df["scan_time"]) - 787 < self.parameters.lc_ms.mass_feature_cluster_rt_tolerance - 788 ].copy() - 789 # Calculate the mz difference in ppm between the mass feature and the peaks in the deconvoluted mass spectrum - 790 mass_feature_df_sub["mz_diff_ppm"] = [ - 791 np.abs(mzs_decon - mz).min() / mz * 10**6 - 792 for mz in mass_feature_df_sub["mz"] - 793 ] - 794 # Subset mass_feature_df to only include mass features that are within 1 ppm of the deconvoluted masses - 795 mfs_associated_decon = mass_feature_df_sub[ - 796 mass_feature_df_sub["mz_diff_ppm"] - 797 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel * 10**6 - 798 ].index.values - 799 - 800 mass_feature.associated_mass_features_deconvoluted = mfs_associated_decon + 709 # Prep mass feature summary + 710 mass_feature_df = self.mass_features_to_df() + 711 + 712 # Loop through each mass feature + 713 for mf_id, mass_feature in self.mass_features.items(): + 714 # Check that the mass_feature.mz attribute == the mz of the mass feature in the mass_feature_df + 715 if mass_feature.mz != mass_feature.ms1_peak.mz_exp: + 716 continue + 717 + 718 # Get the left and right limits of the EIC of the mass feature + 719 l_scan, _, r_scan = mass_feature._eic_data.apexes[0] + 720 + 721 # Pull from the _ms1_unprocessed data the scan range of interest and sort by mz + 722 ms1_data_sub = ms1_data.loc[l_scan:r_scan].copy() + 723 ms1_data_sub = ms1_data_sub.sort_values(by=["mz"]).reset_index(drop=False) + 724 + 725 # Get the centroided masses of the mass feature + 726 mf_mspeak_mzs = mass_feature.mass_spectrum.mz_exp + 727 + 728 # Find the closest mz in the ms1 data to the centroided masses of the mass feature + 729 ms1_data_sub["mass_feature_mz"] = mf_mspeak_mzs[ + 730 find_closest(mf_mspeak_mzs, ms1_data_sub.mz.values) + 731 ] + 732 + 733 # Drop rows with mz_diff > 0.01 between the mass feature mz and the ms1 data mz + 734 ms1_data_sub["mz_diff_rel"] = ( + 735 np.abs(ms1_data_sub["mass_feature_mz"] - ms1_data_sub["mz"]) + 736 / ms1_data_sub["mz"] + 737 ) + 738 ms1_data_sub = ms1_data_sub[ + 739 ms1_data_sub["mz_diff_rel"] + 740 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel + 741 ].reset_index(drop=True) + 742 + 743 # Group by mass_feature_mz and scan and sum intensity + 744 ms1_data_sub_group = ( + 745 ms1_data_sub.groupby(["mass_feature_mz", "scan"])["intensity"] + 746 .sum() + 747 .reset_index() + 748 ) + 749 + 750 # Calculate the correlation of the intensities of the mass feature and the ms1 data (set to 0 if no intensity) + 751 corr = ( + 752 ms1_data_sub_group.pivot( + 753 index="scan", columns="mass_feature_mz", values="intensity" + 754 ) + 755 .fillna(0) + 756 .corr() + 757 ) + 758 + 759 # Subset the correlation matrix to only include the masses of the mass feature and those with a correlation > 0.8 + 760 decon_corr_min = self.parameters.lc_ms.ms1_deconvolution_corr_min + 761 decon_corr_min = 0.9 + 762 corr_subset = corr.loc[mass_feature.mz,] + 763 corr_subset = corr_subset[corr_subset > decon_corr_min] + 764 + 765 # Get the masses from the mass spectrum that are the result of the deconvolution + 766 mzs_decon = corr_subset.index.values + 767 + 768 # Get the indices of the mzs_decon in mass_feature.mass_spectrum.mz_exp and assign to the mass feature + 769 mzs_decon_idx = [ + 770 id + 771 for id, mz in enumerate(mass_feature.mass_spectrum.mz_exp) + 772 if mz in mzs_decon + 773 ] + 774 mass_feature._ms_deconvoluted_idx = mzs_decon_idx + 775 + 776 # Check if the mass feature's ms1 peak is the largest in the deconvoluted mass spectrum + 777 if ( + 778 mass_feature.ms1_peak.abundance + 779 == mass_feature.mass_spectrum.abundance[mzs_decon_idx].max() + 780 ): + 781 mass_feature.mass_spectrum_deconvoluted_parent = True + 782 else: + 783 mass_feature.mass_spectrum_deconvoluted_parent = False + 784 + 785 # Check for other mass features that are in the deconvoluted mass spectrum and add the deconvoluted mass spectrum to the mass feature + 786 # Subset mass_feature_df to only include mass features that are within the clustering tolerance + 787 mass_feature_df_sub = mass_feature_df[ + 788 abs(mass_feature.retention_time - mass_feature_df["scan_time"]) + 789 < self.parameters.lc_ms.mass_feature_cluster_rt_tolerance + 790 ].copy() + 791 # Calculate the mz difference in ppm between the mass feature and the peaks in the deconvoluted mass spectrum + 792 mass_feature_df_sub["mz_diff_ppm"] = [ + 793 np.abs(mzs_decon - mz).min() / mz * 10**6 + 794 for mz in mass_feature_df_sub["mz"] + 795 ] + 796 # Subset mass_feature_df to only include mass features that are within 1 ppm of the deconvoluted masses + 797 mfs_associated_decon = mass_feature_df_sub[ + 798 mass_feature_df_sub["mz_diff_ppm"] + 799 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel * 10**6 + 800 ].index.values 801 - 802 - 803class PHCalculations: - 804 """Methods for performing calculations related to 2D peak picking via persistent homology on LCMS data. - 805 - 806 Notes - 807 ----- - 808 This class is intended to be used as a mixin for the LCMSBase class. - 809 - 810 Methods - 811 ------- - 812 * sparse_mean_filter(idx, V, radius=[0, 1, 1]). - 813 Sparse implementation of a mean filter. - 814 * embed_unique_indices(a). - 815 Creates an array of indices, sorted by unique element. - 816 * sparse_upper_star(idx, V). - 817 Sparse implementation of an upper star filtration. - 818 * check_if_grid(data). - 819 Check if the data is gridded in mz space. - 820 * grid_data(data). - 821 Grid the data in the mz dimension. - 822 * find_mass_features_ph(ms_level=1, grid=True). - 823 Find mass features within an LCMSBase object using persistent homology. - 824 * cluster_mass_features(drop_children=True). - 825 Cluster regions of interest. - 826 """ - 827 - 828 @staticmethod - 829 def sparse_mean_filter(idx, V, radius=[0, 1, 1]): - 830 """Sparse implementation of a mean filter. - 831 - 832 Parameters - 833 ---------- - 834 idx : :obj:`~numpy.array` - 835 Edge indices for each dimension (MxN). - 836 V : :obj:`~numpy.array` - 837 Array of intensity data (Mx1). - 838 radius : float or list - 839 Radius of the sparse filter in each dimension. Values less than - 840 zero indicate no connectivity in that dimension. - 841 - 842 Returns - 843 ------- - 844 :obj:`~numpy.array` - 845 Filtered intensities (Mx1). - 846 - 847 Notes - 848 ----- - 849 This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos. - 850 This is a static method. - 851 """ - 852 - 853 # Copy indices - 854 idx = idx.copy().astype(V.dtype) - 855 - 856 # Scale - 857 for i, r in enumerate(radius): - 858 # Increase inter-index distance - 859 if r < 1: - 860 idx[:, i] *= 2 - 861 - 862 # Do nothing - 863 elif r == 1: - 864 pass - 865 - 866 # Decrease inter-index distance - 867 else: - 868 idx[:, i] /= r - 869 - 870 # Connectivity matrix - 871 cmat = KDTree(idx) - 872 cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix") - 873 cmat.setdiag(1) - 874 - 875 # Pair indices - 876 I, J = cmat.nonzero() - 877 - 878 # Delete cmat - 879 cmat_shape = cmat.shape - 880 del cmat - 881 - 882 # Sum over columns - 883 V_sum = sparse.bsr_matrix( - 884 (V[J], (I, I)), shape=cmat_shape, dtype=V.dtype - 885 ).diagonal(0) - 886 - 887 # Count over columns - 888 V_count = sparse.bsr_matrix( - 889 (np.ones_like(J), (I, I)), shape=cmat_shape, dtype=V.dtype - 890 ).diagonal(0) - 891 - 892 return V_sum / V_count + 802 mass_feature.associated_mass_features_deconvoluted = mfs_associated_decon + 803 + 804 + 805class PHCalculations: + 806 """Methods for performing calculations related to 2D peak picking via persistent homology on LCMS data. + 807 + 808 Notes + 809 ----- + 810 This class is intended to be used as a mixin for the LCMSBase class. + 811 + 812 Methods + 813 ------- + 814 * sparse_mean_filter(idx, V, radius=[0, 1, 1]). + 815 Sparse implementation of a mean filter. + 816 * embed_unique_indices(a). + 817 Creates an array of indices, sorted by unique element. + 818 * sparse_upper_star(idx, V). + 819 Sparse implementation of an upper star filtration. + 820 * check_if_grid(data). + 821 Check if the data is gridded in mz space. + 822 * grid_data(data). + 823 Grid the data in the mz dimension. + 824 * find_mass_features_ph(ms_level=1, grid=True). + 825 Find mass features within an LCMSBase object using persistent homology. + 826 * cluster_mass_features(drop_children=True). + 827 Cluster regions of interest. + 828 """ + 829 + 830 @staticmethod + 831 def sparse_mean_filter(idx, V, radius=[0, 1, 1]): + 832 """Sparse implementation of a mean filter. + 833 + 834 Parameters + 835 ---------- + 836 idx : :obj:`~numpy.array` + 837 Edge indices for each dimension (MxN). + 838 V : :obj:`~numpy.array` + 839 Array of intensity data (Mx1). + 840 radius : float or list + 841 Radius of the sparse filter in each dimension. Values less than + 842 zero indicate no connectivity in that dimension. + 843 + 844 Returns + 845 ------- + 846 :obj:`~numpy.array` + 847 Filtered intensities (Mx1). + 848 + 849 Notes + 850 ----- + 851 This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos. + 852 This is a static method. + 853 """ + 854 + 855 # Copy indices + 856 idx = idx.copy().astype(V.dtype) + 857 + 858 # Scale + 859 for i, r in enumerate(radius): + 860 # Increase inter-index distance + 861 if r < 1: + 862 idx[:, i] *= 2 + 863 + 864 # Do nothing + 865 elif r == 1: + 866 pass + 867 + 868 # Decrease inter-index distance + 869 else: + 870 idx[:, i] /= r + 871 + 872 # Connectivity matrix + 873 cmat = KDTree(idx) + 874 cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix") + 875 cmat.setdiag(1) + 876 + 877 # Pair indices + 878 I, J = cmat.nonzero() + 879 + 880 # Delete cmat + 881 cmat_shape = cmat.shape + 882 del cmat + 883 + 884 # Sum over columns + 885 V_sum = sparse.bsr_matrix( + 886 (V[J], (I, I)), shape=cmat_shape, dtype=V.dtype + 887 ).diagonal(0) + 888 + 889 # Count over columns + 890 V_count = sparse.bsr_matrix( + 891 (np.ones_like(J), (I, I)), shape=cmat_shape, dtype=V.dtype + 892 ).diagonal(0) 893 - 894 @staticmethod - 895 def embed_unique_indices(a): - 896 """Creates an array of indices, sorted by unique element. - 897 - 898 Parameters - 899 ---------- - 900 a : :obj:`~numpy.array` - 901 Array of unique elements (Mx1). - 902 - 903 Returns - 904 ------- - 905 :obj:`~numpy.array` - 906 Array of indices (Mx1). - 907 - 908 Notes - 909 ----- - 910 This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos - 911 This is a static method. - 912 """ - 913 - 914 def count_tens(n): - 915 # Count tens - 916 ntens = (n - 1) // 10 - 917 - 918 while True: - 919 ntens_test = (ntens + n - 1) // 10 - 920 - 921 if ntens_test == ntens: - 922 return ntens - 923 else: - 924 ntens = ntens_test - 925 - 926 def arange_exclude_10s(n): - 927 # How many 10s will there be? - 928 ntens = count_tens(n) - 929 - 930 # Base array - 931 arr = np.arange(0, n + ntens) - 932 - 933 # Exclude 10s - 934 arr = arr[(arr == 0) | (arr % 10 != 0)][:n] - 935 - 936 return arr + 894 return V_sum / V_count + 895 + 896 @staticmethod + 897 def embed_unique_indices(a): + 898 """Creates an array of indices, sorted by unique element. + 899 + 900 Parameters + 901 ---------- + 902 a : :obj:`~numpy.array` + 903 Array of unique elements (Mx1). + 904 + 905 Returns + 906 ------- + 907 :obj:`~numpy.array` + 908 Array of indices (Mx1). + 909 + 910 Notes + 911 ----- + 912 This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos + 913 This is a static method. + 914 """ + 915 + 916 def count_tens(n): + 917 # Count tens + 918 ntens = (n - 1) // 10 + 919 + 920 while True: + 921 ntens_test = (ntens + n - 1) // 10 + 922 + 923 if ntens_test == ntens: + 924 return ntens + 925 else: + 926 ntens = ntens_test + 927 + 928 def arange_exclude_10s(n): + 929 # How many 10s will there be? + 930 ntens = count_tens(n) + 931 + 932 # Base array + 933 arr = np.arange(0, n + ntens) + 934 + 935 # Exclude 10s + 936 arr = arr[(arr == 0) | (arr % 10 != 0)][:n] 937 - 938 # Creates an array of indices, sorted by unique element - 939 idx_sort = np.argsort(a) - 940 idx_unsort = np.argsort(idx_sort) - 941 - 942 # Sorts records array so all unique elements are together - 943 sorted_a = a[idx_sort] - 944 - 945 # Returns the unique values, the index of the first occurrence, - 946 # and the count for each element - 947 vals, idx_start, count = np.unique( - 948 sorted_a, return_index=True, return_counts=True - 949 ) - 950 - 951 # Splits the indices into separate arrays - 952 splits = np.split(idx_sort, idx_start[1:]) - 953 - 954 # Creates unique indices for each split - 955 idx_unq = np.concatenate([arange_exclude_10s(len(x)) for x in splits]) - 956 - 957 # Reorders according to input array - 958 idx_unq = idx_unq[idx_unsort] - 959 - 960 # Magnitude of each index - 961 exp = np.log10( - 962 idx_unq, where=idx_unq > 0, out=np.zeros_like(idx_unq, dtype=np.float64) - 963 ) - 964 idx_unq_mag = np.power(10, np.floor(exp) + 1) - 965 - 966 # Result - 967 return a + idx_unq / idx_unq_mag - 968 - 969 def sparse_upper_star(self, idx, V): - 970 """Sparse implementation of an upper star filtration. - 971 - 972 Parameters - 973 ---------- - 974 idx : :obj:`~numpy.array` - 975 Edge indices for each dimension (MxN). - 976 V : :obj:`~numpy.array` - 977 Array of intensity data (Mx1). - 978 Returns - 979 ------- - 980 idx : :obj:`~numpy.array` - 981 Index of filtered points (Mx1). - 982 persistence : :obj:`~numpy.array` - 983 Persistence of each filtered point (Mx1). - 984 - 985 Notes - 986 ----- - 987 This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos - 988 """ - 989 - 990 # Invert - 991 V = -1 * V.copy().astype(int) - 992 - 993 # Embed indices - 994 V = self.embed_unique_indices(V) - 995 - 996 # Connectivity matrix - 997 cmat = KDTree(idx) - 998 cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix") - 999 cmat.setdiag(1) -1000 cmat = sparse.triu(cmat) -1001 -1002 # Pairwise minimums -1003 I, J = cmat.nonzero() -1004 d = np.maximum(V[I], V[J]) -1005 -1006 # Delete connectiity matrix -1007 cmat_shape = cmat.shape -1008 del cmat -1009 -1010 # Sparse distance matrix -1011 sdm = sparse.coo_matrix((d, (I, J)), shape=cmat_shape) -1012 -1013 # Delete pairwise mins -1014 del d, I, J -1015 -1016 # Persistence homology -1017 ph = ripser(sdm, distance_matrix=True, maxdim=0)["dgms"][0] -1018 -1019 # Bound death values -1020 ph[ph[:, 1] == np.inf, 1] = np.max(V) -1021 -1022 # Construct tree to query against -1023 tree = KDTree(V.reshape((-1, 1))) -1024 -1025 # Get the indexes of the first nearest neighbor by birth -1026 _, nn = tree.query(ph[:, 0].reshape((-1, 1)), k=1, workers=-1) -1027 -1028 return nn, -(ph[:, 0] // 1 - ph[:, 1] // 1) + 938 return arr + 939 + 940 # Creates an array of indices, sorted by unique element + 941 idx_sort = np.argsort(a) + 942 idx_unsort = np.argsort(idx_sort) + 943 + 944 # Sorts records array so all unique elements are together + 945 sorted_a = a[idx_sort] + 946 + 947 # Returns the unique values, the index of the first occurrence, + 948 # and the count for each element + 949 vals, idx_start, count = np.unique( + 950 sorted_a, return_index=True, return_counts=True + 951 ) + 952 + 953 # Splits the indices into separate arrays + 954 splits = np.split(idx_sort, idx_start[1:]) + 955 + 956 # Creates unique indices for each split + 957 idx_unq = np.concatenate([arange_exclude_10s(len(x)) for x in splits]) + 958 + 959 # Reorders according to input array + 960 idx_unq = idx_unq[idx_unsort] + 961 + 962 # Magnitude of each index + 963 exp = np.log10( + 964 idx_unq, where=idx_unq > 0, out=np.zeros_like(idx_unq, dtype=np.float64) + 965 ) + 966 idx_unq_mag = np.power(10, np.floor(exp) + 1) + 967 + 968 # Result + 969 return a + idx_unq / idx_unq_mag + 970 + 971 def sparse_upper_star(self, idx, V): + 972 """Sparse implementation of an upper star filtration. + 973 + 974 Parameters + 975 ---------- + 976 idx : :obj:`~numpy.array` + 977 Edge indices for each dimension (MxN). + 978 V : :obj:`~numpy.array` + 979 Array of intensity data (Mx1). + 980 Returns + 981 ------- + 982 idx : :obj:`~numpy.array` + 983 Index of filtered points (Mx1). + 984 persistence : :obj:`~numpy.array` + 985 Persistence of each filtered point (Mx1). + 986 + 987 Notes + 988 ----- + 989 This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos + 990 """ + 991 + 992 # Invert + 993 V = -1 * V.copy().astype(int) + 994 + 995 # Embed indices + 996 V = self.embed_unique_indices(V) + 997 + 998 # Connectivity matrix + 999 cmat = KDTree(idx) +1000 cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix") +1001 cmat.setdiag(1) +1002 cmat = sparse.triu(cmat) +1003 +1004 # Pairwise minimums +1005 I, J = cmat.nonzero() +1006 d = np.maximum(V[I], V[J]) +1007 +1008 # Delete connectiity matrix +1009 cmat_shape = cmat.shape +1010 del cmat +1011 +1012 # Sparse distance matrix +1013 sdm = sparse.coo_matrix((d, (I, J)), shape=cmat_shape) +1014 +1015 # Delete pairwise mins +1016 del d, I, J +1017 +1018 # Persistence homology +1019 ph = ripser(sdm, distance_matrix=True, maxdim=0)["dgms"][0] +1020 +1021 # Bound death values +1022 ph[ph[:, 1] == np.inf, 1] = np.max(V) +1023 +1024 # Construct tree to query against +1025 tree = KDTree(V.reshape((-1, 1))) +1026 +1027 # Get the indexes of the first nearest neighbor by birth +1028 _, nn = tree.query(ph[:, 0].reshape((-1, 1)), k=1, workers=-1) 1029 -1030 def check_if_grid(self, data): -1031 """Check if the data are gridded in mz space. -1032 -1033 Parameters -1034 ---------- -1035 data : DataFrame -1036 DataFrame containing the mass spectrometry data. Needs to have mz and scan columns. -1037 -1038 Returns -1039 ------- -1040 bool -1041 True if the data is gridded in the mz direction, False otherwise. -1042 -1043 Notes -1044 ----- -1045 This function is used within the grid_data function and the find_mass_features function and is not intended to be called directly. -1046 """ -1047 # Calculate the difference between consecutive mz values in a single scan -1048 dat_check = data.copy().reset_index(drop=True) -1049 dat_check["mz_diff"] = np.abs(dat_check["mz"].diff()) -1050 mz_diff_min = ( -1051 dat_check.groupby("scan")["mz_diff"].min().min() -1052 ) # within each scan, what is the smallest mz difference between consecutive mz values -1053 -1054 # Find the mininum mz difference between mz values in the data; regardless of scan -1055 dat_check_mz = dat_check[["mz"]].drop_duplicates().copy() -1056 dat_check_mz = dat_check_mz.sort_values(by=["mz"]).reset_index(drop=True) -1057 dat_check_mz["mz_diff"] = np.abs(dat_check_mz["mz"].diff()) -1058 -1059 # Get minimum mz_diff between mz values in the data -1060 mz_diff_min_raw = dat_check_mz["mz_diff"].min() -1061 -1062 # If the minimum mz difference between mz values in the data is less than the minimum mz difference between mz values within a single scan, then the data is not gridded -1063 if mz_diff_min_raw < mz_diff_min: -1064 return False -1065 else: -1066 return True -1067 -1068 def grid_data(self, data): -1069 """Grid the data in the mz dimension. -1070 -1071 Data must be gridded prior to persistent homology calculations. +1030 return nn, -(ph[:, 0] // 1 - ph[:, 1] // 1) +1031 +1032 def check_if_grid(self, data): +1033 """Check if the data are gridded in mz space. +1034 +1035 Parameters +1036 ---------- +1037 data : DataFrame +1038 DataFrame containing the mass spectrometry data. Needs to have mz and scan columns. +1039 +1040 Returns +1041 ------- +1042 bool +1043 True if the data is gridded in the mz direction, False otherwise. +1044 +1045 Notes +1046 ----- +1047 This function is used within the grid_data function and the find_mass_features function and is not intended to be called directly. +1048 """ +1049 # Calculate the difference between consecutive mz values in a single scan +1050 dat_check = data.copy().reset_index(drop=True) +1051 dat_check["mz_diff"] = np.abs(dat_check["mz"].diff()) +1052 mz_diff_min = ( +1053 dat_check.groupby("scan")["mz_diff"].min().min() +1054 ) # within each scan, what is the smallest mz difference between consecutive mz values +1055 +1056 # Find the mininum mz difference between mz values in the data; regardless of scan +1057 dat_check_mz = dat_check[["mz"]].drop_duplicates().copy() +1058 dat_check_mz = dat_check_mz.sort_values(by=["mz"]).reset_index(drop=True) +1059 dat_check_mz["mz_diff"] = np.abs(dat_check_mz["mz"].diff()) +1060 +1061 # Get minimum mz_diff between mz values in the data +1062 mz_diff_min_raw = dat_check_mz["mz_diff"].min() +1063 +1064 # If the minimum mz difference between mz values in the data is less than the minimum mz difference between mz values within a single scan, then the data is not gridded +1065 if mz_diff_min_raw < mz_diff_min: +1066 return False +1067 else: +1068 return True +1069 +1070 def grid_data(self, data): +1071 """Grid the data in the mz dimension. 1072 -1073 Parameters -1074 ---------- -1075 data : DataFrame -1076 The input data containing mz, scan, scan_time, and intensity columns. -1077 -1078 Returns -1079 ------- -1080 DataFrame -1081 The gridded data with mz, scan, scan_time, and intensity columns. -1082 -1083 Raises -1084 ------ -1085 ValueError -1086 If gridding fails. -1087 """ -1088 -1089 # Calculate the difference between consecutive mz values in a single scan for grid spacing -1090 data_w = data.copy().reset_index(drop=True) -1091 data_w["mz_diff"] = np.abs(data_w["mz"].diff()) -1092 mz_diff_min = data_w.groupby("scan")["mz_diff"].min().min() * 0.99999 -1093 -1094 # Need high intensity mz values first so they are parents in the output pairs stack -1095 dat_mz = data_w[["mz", "intensity"]].sort_values( -1096 by=["intensity"], ascending=False -1097 ) -1098 dat_mz = dat_mz[["mz"]].drop_duplicates().reset_index(drop=True).copy() -1099 -1100 # Construct KD tree -1101 tree = KDTree(dat_mz.mz.values.reshape(-1, 1)) -1102 sdm = tree.sparse_distance_matrix(tree, mz_diff_min, output_type="coo_matrix") -1103 sdm = sparse.triu(sdm, k=1) -1104 sdm.data = np.ones_like(sdm.data) -1105 distances = sdm.tocoo() -1106 pairs = np.stack((distances.row, distances.col), axis=1) -1107 -1108 # Cull pairs to just get root -1109 to_drop = [] -1110 while len(pairs) > 0: -1111 root_parents = np.setdiff1d(np.unique(pairs[:, 0]), np.unique(pairs[:, 1])) -1112 id_root_parents = np.isin(pairs[:, 0], root_parents) -1113 children_of_roots = np.unique(pairs[id_root_parents, 1]) -1114 to_drop = np.append(to_drop, children_of_roots) -1115 -1116 # Set up pairs array for next iteration by removing pairs with children or parents already dropped -1117 pairs = pairs[~np.isin(pairs[:, 1], to_drop), :] -1118 pairs = pairs[~np.isin(pairs[:, 0], to_drop), :] -1119 dat_mz = dat_mz.reset_index(drop=True).drop(index=np.array(to_drop)) -1120 mz_dat_np = ( -1121 dat_mz[["mz"]] -1122 .sort_values(by=["mz"]) -1123 .reset_index(drop=True) -1124 .values.flatten() -1125 ) -1126 -1127 # Sort data by mz and recast mz to nearest value in mz_dat_np -1128 data_w = data_w.sort_values(by=["mz"]).reset_index(drop=True).copy() -1129 data_w["mz_new"] = mz_dat_np[find_closest(mz_dat_np, data_w["mz"].values)] -1130 data_w["mz_diff"] = np.abs(data_w["mz"] - data_w["mz_new"]) -1131 -1132 # Rename mz_new as mz; drop mz_diff; groupby scan and mz and sum intensity -1133 new_data_w = data_w.rename(columns={"mz": "mz_orig", "mz_new": "mz"}).copy() -1134 new_data_w = ( -1135 new_data_w.drop(columns=["mz_diff", "mz_orig"]) -1136 .groupby(["scan", "mz"])["intensity"] -1137 .sum() -1138 .reset_index() -1139 ) -1140 new_data_w = ( -1141 new_data_w.sort_values(by=["scan", "mz"], ascending=[True, True]) -1142 .reset_index(drop=True) -1143 .copy() -1144 ) -1145 -1146 # Check if grid worked and return -1147 if self.check_if_grid(new_data_w): -1148 return new_data_w -1149 else: -1150 raise ValueError("Gridding failed") -1151 -1152 def find_mass_features_ph(self, ms_level=1, grid=True): -1153 """Find mass features within an LCMSBase object using persistent homology. -1154 -1155 Assigns the mass_features attribute to the object (a dictionary of LCMSMassFeature objects, keyed by mass feature id) +1073 Data must be gridded prior to persistent homology calculations. +1074 +1075 Parameters +1076 ---------- +1077 data : DataFrame +1078 The input data containing mz, scan, scan_time, and intensity columns. +1079 +1080 Returns +1081 ------- +1082 DataFrame +1083 The gridded data with mz, scan, scan_time, and intensity columns. +1084 +1085 Raises +1086 ------ +1087 ValueError +1088 If gridding fails. +1089 """ +1090 +1091 # Calculate the difference between consecutive mz values in a single scan for grid spacing +1092 data_w = data.copy().reset_index(drop=True) +1093 data_w["mz_diff"] = np.abs(data_w["mz"].diff()) +1094 mz_diff_min = data_w.groupby("scan")["mz_diff"].min().min() * 0.99999 +1095 +1096 # Need high intensity mz values first so they are parents in the output pairs stack +1097 dat_mz = data_w[["mz", "intensity"]].sort_values( +1098 by=["intensity"], ascending=False +1099 ) +1100 dat_mz = dat_mz[["mz"]].drop_duplicates().reset_index(drop=True).copy() +1101 +1102 # Construct KD tree +1103 tree = KDTree(dat_mz.mz.values.reshape(-1, 1)) +1104 sdm = tree.sparse_distance_matrix(tree, mz_diff_min, output_type="coo_matrix") +1105 sdm = sparse.triu(sdm, k=1) +1106 sdm.data = np.ones_like(sdm.data) +1107 distances = sdm.tocoo() +1108 pairs = np.stack((distances.row, distances.col), axis=1) +1109 +1110 # Cull pairs to just get root +1111 to_drop = [] +1112 while len(pairs) > 0: +1113 root_parents = np.setdiff1d(np.unique(pairs[:, 0]), np.unique(pairs[:, 1])) +1114 id_root_parents = np.isin(pairs[:, 0], root_parents) +1115 children_of_roots = np.unique(pairs[id_root_parents, 1]) +1116 to_drop = np.append(to_drop, children_of_roots) +1117 +1118 # Set up pairs array for next iteration by removing pairs with children or parents already dropped +1119 pairs = pairs[~np.isin(pairs[:, 1], to_drop), :] +1120 pairs = pairs[~np.isin(pairs[:, 0], to_drop), :] +1121 dat_mz = dat_mz.reset_index(drop=True).drop(index=np.array(to_drop)) +1122 mz_dat_np = ( +1123 dat_mz[["mz"]] +1124 .sort_values(by=["mz"]) +1125 .reset_index(drop=True) +1126 .values.flatten() +1127 ) +1128 +1129 # Sort data by mz and recast mz to nearest value in mz_dat_np +1130 data_w = data_w.sort_values(by=["mz"]).reset_index(drop=True).copy() +1131 data_w["mz_new"] = mz_dat_np[find_closest(mz_dat_np, data_w["mz"].values)] +1132 data_w["mz_diff"] = np.abs(data_w["mz"] - data_w["mz_new"]) +1133 +1134 # Rename mz_new as mz; drop mz_diff; groupby scan and mz and sum intensity +1135 new_data_w = data_w.rename(columns={"mz": "mz_orig", "mz_new": "mz"}).copy() +1136 new_data_w = ( +1137 new_data_w.drop(columns=["mz_diff", "mz_orig"]) +1138 .groupby(["scan", "mz"])["intensity"] +1139 .sum() +1140 .reset_index() +1141 ) +1142 new_data_w = ( +1143 new_data_w.sort_values(by=["scan", "mz"], ascending=[True, True]) +1144 .reset_index(drop=True) +1145 .copy() +1146 ) +1147 +1148 # Check if grid worked and return +1149 if self.check_if_grid(new_data_w): +1150 return new_data_w +1151 else: +1152 raise ValueError("Gridding failed") +1153 +1154 def find_mass_features_ph(self, ms_level=1, grid=True): +1155 """Find mass features within an LCMSBase object using persistent homology. 1156 -1157 Parameters -1158 ---------- -1159 ms_level : int, optional -1160 The MS level to use. Default is 1. -1161 grid : bool, optional -1162 If True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded). Default is True. -1163 -1164 Raises -1165 ------ -1166 ValueError -1167 If no MS level data is found on the object. -1168 If data is not gridded and grid is False. -1169 -1170 Returns -1171 ------- -1172 None, but assigns the mass_features attribute to the object. -1173 -1174 Notes -1175 ----- -1176 This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos -1177 """ -1178 # Check that ms_level is a key in self._ms_uprocessed -1179 if ms_level not in self._ms_unprocessed.keys(): -1180 raise ValueError( -1181 "No MS level " -1182 + str(ms_level) -1183 + " data found, did you instantiate with parser specific to MS level?" -1184 ) -1185 -1186 # Get ms data -1187 data = self._ms_unprocessed[ms_level].copy() -1188 -1189 # Drop rows with missing intensity values and reset index -1190 data = data.dropna(subset=["intensity"]).reset_index(drop=True) -1191 -1192 -1193 # Threshold data -1194 dims = ["mz", "scan_time"] -1195 threshold = self.parameters.lc_ms.ph_inten_min_rel * data.intensity.max() -1196 data_thres = data[data["intensity"] > threshold].reset_index(drop=True).copy() -1197 -1198 # Check if gridded, if not, grid -1199 gridded_mz = self.check_if_grid(data_thres) -1200 if gridded_mz is False: -1201 if grid is False: -1202 raise ValueError( -1203 "Data are not gridded in mz dimension, try reprocessing with a different params or grid data before running this function" -1204 ) -1205 else: -1206 data_thres = self.grid_data(data_thres) -1207 -1208 # Add build factors and add scan_time -1209 data_thres = data_thres.merge(self.scan_df[["scan", "scan_time"]], on="scan") -1210 factors = { -1211 dim: pd.factorize(data_thres[dim], sort=True)[1].astype(np.float32) -1212 for dim in dims -1213 } # this is return a float64 index -1214 -1215 # Build indexes -1216 index = { -1217 dim: np.searchsorted(factors[dim], data_thres[dim]).astype(np.float32) -1218 for dim in factors -1219 } -1220 -1221 # Smooth data -1222 iterations = self.parameters.lc_ms.ph_smooth_it -1223 smooth_radius = [ -1224 self.parameters.lc_ms.ph_smooth_radius_mz, -1225 self.parameters.lc_ms.ph_smooth_radius_scan, -1226 ] # mz, scan_time smoothing radius (in steps) -1227 -1228 index = np.vstack([index[dim] for dim in dims]).T -1229 V = data_thres["intensity"].values -1230 resid = np.inf -1231 for i in range(iterations): -1232 # Previous iteration -1233 V_prev = V.copy() -1234 resid_prev = resid -1235 V = self.sparse_mean_filter(index, V, radius=smooth_radius) -1236 -1237 # Calculate residual with previous iteration -1238 resid = np.sqrt(np.mean(np.square(V - V_prev))) -1239 -1240 # Evaluate convergence -1241 if i > 0: -1242 # Percent change in residual -1243 test = np.abs(resid - resid_prev) / resid_prev -1244 -1245 # Exit criteria -1246 if test <= 0: -1247 break -1248 -1249 # Overwrite values -1250 data_thres["intensity"] = V -1251 -1252 # Use persistent homology to find regions of interest -1253 pidx, pers = self.sparse_upper_star(index, V) -1254 pidx = pidx[pers > 1] -1255 pers = pers[pers > 1] -1256 -1257 # Get peaks -1258 peaks = data_thres.iloc[pidx, :].reset_index(drop=True) -1259 -1260 # Add persistence column -1261 peaks["persistence"] = pers -1262 mass_features = peaks.sort_values( -1263 by="persistence", ascending=False -1264 ).reset_index(drop=True) -1265 -1266 # Filter by persistence threshold -1267 persistence_threshold = ( -1268 self.parameters.lc_ms.ph_persis_min_rel * data.intensity.max() -1269 ) -1270 mass_features = mass_features.loc[ -1271 mass_features["persistence"] > persistence_threshold, : -1272 ].reset_index(drop=True) -1273 -1274 # Rename scan column to apex_scan -1275 mass_features = mass_features.rename( -1276 columns={"scan": "apex_scan", "scan_time": "retention_time"} -1277 ) -1278 -1279 # Populate mass_features attribute -1280 self.mass_features = {} -1281 for row in mass_features.itertuples(): -1282 row_dict = mass_features.iloc[row.Index].to_dict() -1283 lcms_feature = LCMSMassFeature(self, **row_dict) -1284 self.mass_features[lcms_feature.id] = lcms_feature -1285 -1286 if self.parameters.lc_ms.verbose_processing: -1287 print("Found " + str(len(mass_features)) + " initial mass features") -1288 -1289 def cluster_mass_features( -1290 self, drop_children=True, sort_by="persistence" -1291 ): -1292 """Cluster mass features -1293 -1294 Based on their proximity in the mz and scan_time dimensions, priorizies the mass features with the highest persistence. -1295 -1296 Parameters -1297 ---------- -1298 drop_children : bool, optional -1299 Whether to drop the mass features that are not cluster parents. Default is True. -1300 sort_by : str, optional -1301 The column to sort the mass features by, this will determine which mass features get rolled up into a parent mass feature. Default is "persistence". -1302 -1303 Raises -1304 ------ -1305 ValueError -1306 If no mass features are found. -1307 If too many mass features are found. -1308 -1309 Returns -1310 ------- -1311 None if drop_children is True, otherwise returns a list of mass feature ids that are not cluster parents. -1312 """ -1313 verbose = self.parameters.lc_ms.verbose_processing -1314 -1315 if self.mass_features is None: -1316 raise ValueError("No mass features found, run find_mass_features() first") -1317 if len(self.mass_features) > 400000: -1318 raise ValueError( -1319 "Too many mass featuers of interest found, run find_mass_features() with a higher intensity threshold" -1320 ) -1321 dims = ["mz", "scan_time"] -1322 mf_df_og = self.mass_features_to_df() -1323 mf_df = mf_df_og.copy() -1324 -1325 # Sort mass features by sort_by column, make mf_id its own column for easier bookkeeping -1326 mf_df = mf_df.sort_values(by=sort_by, ascending=False).reset_index(drop=False) -1327 -1328 tol = [ -1329 self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel, -1330 self.parameters.lc_ms.mass_feature_cluster_rt_tolerance, -1331 ] # mz, in relative; scan_time in minutes -1332 relative = [True, False] -1333 -1334 # Compute inter-feature distances -1335 distances = None -1336 for i in range(len(dims)): -1337 # Construct k-d tree -1338 values = mf_df[dims[i]].values -1339 tree = KDTree(values.reshape(-1, 1)) -1340 -1341 max_tol = tol[i] -1342 if relative[i] is True: -1343 # Maximum absolute tolerance -1344 max_tol = tol[i] * values.max() -1345 -1346 # Compute sparse distance matrix -1347 # the larger the max_tol, the slower this operation is -1348 sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix") -1349 -1350 # Only consider forward case, exclude diagonal -1351 sdm = sparse.triu(sdm, k=1) -1352 -1353 # Filter relative distances -1354 if relative[i] is True: -1355 # Compute relative distances -1356 rel_dists = sdm.data / values[sdm.row] # or col? -1357 -1358 # Indices of relative distances less than tolerance -1359 idx = rel_dists <= tol[i] -1360 -1361 # Reconstruct sparse distance matrix -1362 sdm = sparse.coo_matrix( -1363 (rel_dists[idx], (sdm.row[idx], sdm.col[idx])), -1364 shape=(len(values), len(values)), -1365 ) -1366 -1367 # Cast as binary matrix -1368 sdm.data = np.ones_like(sdm.data) -1369 -1370 # Stack distances -1371 if distances is None: -1372 distances = sdm -1373 else: -1374 distances = distances.multiply(sdm) -1375 -1376 # Extract indices of within-tolerance points -1377 distances = distances.tocoo() -1378 pairs = np.stack((distances.row, distances.col), axis=1) -1379 pairs_df = pd.DataFrame(pairs, columns=["parent", "child"]) -1380 pairs_df = pairs_df.set_index("parent") -1381 -1382 to_drop = [] -1383 while not pairs_df.empty: -1384 # Find root_parents and their children -1385 root_parents = np.setdiff1d(np.unique(pairs_df.index.values), np.unique(pairs_df.child.values)) -1386 children_of_roots = pairs_df.loc[root_parents, "child"].unique() -1387 to_drop = np.append(to_drop, children_of_roots) -1388 -1389 # Remove root_children as possible parents from pairs_df for next iteration -1390 pairs_df = pairs_df.drop( -1391 index=children_of_roots, errors="ignore" -1392 ) -1393 pairs_df = pairs_df.reset_index().set_index("child") -1394 # Remove root_children as possible children from pairs_df for next iteration -1395 pairs_df = pairs_df.drop(index=children_of_roots) -1396 -1397 # Prepare for next iteration -1398 pairs_df = pairs_df.reset_index().set_index("parent") -1399 -1400 # Drop mass features that are not cluster parents -1401 mf_df = mf_df.drop(index=np.array(to_drop)) -1402 -1403 # Set index back to mf_id -1404 mf_df = mf_df.set_index("mf_id") -1405 if verbose: -1406 print(str(len(mf_df)) + " mass features remaining") -1407 -1408 mf_df_new = mf_df_og.copy() -1409 mf_df_new["cluster_parent"] = np.where( -1410 np.isin(mf_df_new.index, mf_df.index), True, False -1411 ) -1412 -1413 # get mass feature ids of features that are not cluster parents -1414 cluster_daughters = mf_df_new[mf_df_new["cluster_parent"] == False].index.values -1415 if drop_children is True: -1416 # Drop mass features that are not cluster parents from self -1417 self.mass_features = { -1418 k: v -1419 for k, v in self.mass_features.items() -1420 if k not in cluster_daughters -1421 } -1422 else: -1423 return cluster_daughters +1157 Assigns the mass_features attribute to the object (a dictionary of LCMSMassFeature objects, keyed by mass feature id) +1158 +1159 Parameters +1160 ---------- +1161 ms_level : int, optional +1162 The MS level to use. Default is 1. +1163 grid : bool, optional +1164 If True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded). Default is True. +1165 +1166 Raises +1167 ------ +1168 ValueError +1169 If no MS level data is found on the object. +1170 If data is not gridded and grid is False. +1171 +1172 Returns +1173 ------- +1174 None, but assigns the mass_features attribute to the object. +1175 +1176 Notes +1177 ----- +1178 This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos +1179 """ +1180 # Check that ms_level is a key in self._ms_uprocessed +1181 if ms_level not in self._ms_unprocessed.keys(): +1182 raise ValueError( +1183 "No MS level " +1184 + str(ms_level) +1185 + " data found, did you instantiate with parser specific to MS level?" +1186 ) +1187 +1188 # Get ms data +1189 data = self._ms_unprocessed[ms_level].copy() +1190 +1191 # Drop rows with missing intensity values and reset index +1192 data = data.dropna(subset=["intensity"]).reset_index(drop=True) +1193 +1194 # Threshold data +1195 dims = ["mz", "scan_time"] +1196 threshold = self.parameters.lc_ms.ph_inten_min_rel * data.intensity.max() +1197 data_thres = data[data["intensity"] > threshold].reset_index(drop=True).copy() +1198 +1199 # Check if gridded, if not, grid +1200 gridded_mz = self.check_if_grid(data_thres) +1201 if gridded_mz is False: +1202 if grid is False: +1203 raise ValueError( +1204 "Data are not gridded in mz dimension, try reprocessing with a different params or grid data before running this function" +1205 ) +1206 else: +1207 data_thres = self.grid_data(data_thres) +1208 +1209 # Add build factors and add scan_time +1210 data_thres = data_thres.merge(self.scan_df[["scan", "scan_time"]], on="scan") +1211 factors = { +1212 dim: pd.factorize(data_thres[dim], sort=True)[1].astype(np.float32) +1213 for dim in dims +1214 } # this is return a float64 index +1215 +1216 # Build indexes +1217 index = { +1218 dim: np.searchsorted(factors[dim], data_thres[dim]).astype(np.float32) +1219 for dim in factors +1220 } +1221 +1222 # Smooth data +1223 iterations = self.parameters.lc_ms.ph_smooth_it +1224 smooth_radius = [ +1225 self.parameters.lc_ms.ph_smooth_radius_mz, +1226 self.parameters.lc_ms.ph_smooth_radius_scan, +1227 ] # mz, scan_time smoothing radius (in steps) +1228 +1229 index = np.vstack([index[dim] for dim in dims]).T +1230 V = data_thres["intensity"].values +1231 resid = np.inf +1232 for i in range(iterations): +1233 # Previous iteration +1234 V_prev = V.copy() +1235 resid_prev = resid +1236 V = self.sparse_mean_filter(index, V, radius=smooth_radius) +1237 +1238 # Calculate residual with previous iteration +1239 resid = np.sqrt(np.mean(np.square(V - V_prev))) +1240 +1241 # Evaluate convergence +1242 if i > 0: +1243 # Percent change in residual +1244 test = np.abs(resid - resid_prev) / resid_prev +1245 +1246 # Exit criteria +1247 if test <= 0: +1248 break +1249 +1250 # Overwrite values +1251 data_thres["intensity"] = V +1252 +1253 # Use persistent homology to find regions of interest +1254 pidx, pers = self.sparse_upper_star(index, V) +1255 pidx = pidx[pers > 1] +1256 pers = pers[pers > 1] +1257 +1258 # Get peaks +1259 peaks = data_thres.iloc[pidx, :].reset_index(drop=True) +1260 +1261 # Add persistence column +1262 peaks["persistence"] = pers +1263 mass_features = peaks.sort_values( +1264 by="persistence", ascending=False +1265 ).reset_index(drop=True) +1266 +1267 # Filter by persistence threshold +1268 persistence_threshold = ( +1269 self.parameters.lc_ms.ph_persis_min_rel * data.intensity.max() +1270 ) +1271 mass_features = mass_features.loc[ +1272 mass_features["persistence"] > persistence_threshold, : +1273 ].reset_index(drop=True) +1274 +1275 # Rename scan column to apex_scan +1276 mass_features = mass_features.rename( +1277 columns={"scan": "apex_scan", "scan_time": "retention_time"} +1278 ) +1279 +1280 # Populate mass_features attribute +1281 self.mass_features = {} +1282 for row in mass_features.itertuples(): +1283 row_dict = mass_features.iloc[row.Index].to_dict() +1284 lcms_feature = LCMSMassFeature(self, **row_dict) +1285 self.mass_features[lcms_feature.id] = lcms_feature +1286 +1287 if self.parameters.lc_ms.verbose_processing: +1288 print("Found " + str(len(mass_features)) + " initial mass features") +1289 +1290 def cluster_mass_features(self, drop_children=True, sort_by="persistence"): +1291 """Cluster mass features +1292 +1293 Based on their proximity in the mz and scan_time dimensions, priorizies the mass features with the highest persistence. +1294 +1295 Parameters +1296 ---------- +1297 drop_children : bool, optional +1298 Whether to drop the mass features that are not cluster parents. Default is True. +1299 sort_by : str, optional +1300 The column to sort the mass features by, this will determine which mass features get rolled up into a parent mass feature. Default is "persistence". +1301 +1302 Raises +1303 ------ +1304 ValueError +1305 If no mass features are found. +1306 If too many mass features are found. +1307 +1308 Returns +1309 ------- +1310 None if drop_children is True, otherwise returns a list of mass feature ids that are not cluster parents. +1311 """ +1312 verbose = self.parameters.lc_ms.verbose_processing +1313 +1314 if self.mass_features is None: +1315 raise ValueError("No mass features found, run find_mass_features() first") +1316 if len(self.mass_features) > 400000: +1317 raise ValueError( +1318 "Too many mass featuers of interest found, run find_mass_features() with a higher intensity threshold" +1319 ) +1320 dims = ["mz", "scan_time"] +1321 mf_df_og = self.mass_features_to_df() +1322 mf_df = mf_df_og.copy() +1323 +1324 # Sort mass features by sort_by column, make mf_id its own column for easier bookkeeping +1325 mf_df = mf_df.sort_values(by=sort_by, ascending=False).reset_index(drop=False) +1326 +1327 tol = [ +1328 self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel, +1329 self.parameters.lc_ms.mass_feature_cluster_rt_tolerance, +1330 ] # mz, in relative; scan_time in minutes +1331 relative = [True, False] +1332 +1333 # Compute inter-feature distances +1334 distances = None +1335 for i in range(len(dims)): +1336 # Construct k-d tree +1337 values = mf_df[dims[i]].values +1338 tree = KDTree(values.reshape(-1, 1)) +1339 +1340 max_tol = tol[i] +1341 if relative[i] is True: +1342 # Maximum absolute tolerance +1343 max_tol = tol[i] * values.max() +1344 +1345 # Compute sparse distance matrix +1346 # the larger the max_tol, the slower this operation is +1347 sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix") +1348 +1349 # Only consider forward case, exclude diagonal +1350 sdm = sparse.triu(sdm, k=1) +1351 +1352 # Filter relative distances +1353 if relative[i] is True: +1354 # Compute relative distances +1355 rel_dists = sdm.data / values[sdm.row] # or col? +1356 +1357 # Indices of relative distances less than tolerance +1358 idx = rel_dists <= tol[i] +1359 +1360 # Reconstruct sparse distance matrix +1361 sdm = sparse.coo_matrix( +1362 (rel_dists[idx], (sdm.row[idx], sdm.col[idx])), +1363 shape=(len(values), len(values)), +1364 ) +1365 +1366 # Cast as binary matrix +1367 sdm.data = np.ones_like(sdm.data) +1368 +1369 # Stack distances +1370 if distances is None: +1371 distances = sdm +1372 else: +1373 distances = distances.multiply(sdm) +1374 +1375 # Extract indices of within-tolerance points +1376 distances = distances.tocoo() +1377 pairs = np.stack((distances.row, distances.col), axis=1) +1378 pairs_df = pd.DataFrame(pairs, columns=["parent", "child"]) +1379 pairs_df = pairs_df.set_index("parent") +1380 +1381 to_drop = [] +1382 while not pairs_df.empty: +1383 # Find root_parents and their children +1384 root_parents = np.setdiff1d( +1385 np.unique(pairs_df.index.values), np.unique(pairs_df.child.values) +1386 ) +1387 children_of_roots = pairs_df.loc[root_parents, "child"].unique() +1388 to_drop = np.append(to_drop, children_of_roots) +1389 +1390 # Remove root_children as possible parents from pairs_df for next iteration +1391 pairs_df = pairs_df.drop(index=children_of_roots, errors="ignore") +1392 pairs_df = pairs_df.reset_index().set_index("child") +1393 # Remove root_children as possible children from pairs_df for next iteration +1394 pairs_df = pairs_df.drop(index=children_of_roots) +1395 +1396 # Prepare for next iteration +1397 pairs_df = pairs_df.reset_index().set_index("parent") +1398 +1399 # Drop mass features that are not cluster parents +1400 mf_df = mf_df.drop(index=np.array(to_drop)) +1401 +1402 # Set index back to mf_id +1403 mf_df = mf_df.set_index("mf_id") +1404 if verbose: +1405 print(str(len(mf_df)) + " mass features remaining") +1406 +1407 mf_df_new = mf_df_og.copy() +1408 mf_df_new["cluster_parent"] = np.where( +1409 np.isin(mf_df_new.index, mf_df.index), True, False +1410 ) +1411 +1412 # get mass feature ids of features that are not cluster parents +1413 cluster_daughters = mf_df_new[mf_df_new["cluster_parent"] == False].index.values +1414 if drop_children is True: +1415 # Drop mass features that are not cluster parents from self +1416 self.mass_features = { +1417 k: v +1418 for k, v in self.mass_features.items() +1419 if k not in cluster_daughters +1420 } +1421 else: +1422 return cluster_daughters

    @@ -1808,7 +1807,7 @@
    Returns
    232 polarity : int, optional 233 The polarity of the mass spectra (1 or -1). If not set, the polarity will be determined from the dataset. Defaults to None. (fastest if set to -1 or 1) 234 ms_params : MSParameters, optional -235 The mass spectrum parameters to use. If not set (None), the globally set parameters will be used. Defaults to None. +235 The mass spectrum parameters to use. If not set (None), the globally set parameters will be used. Defaults to None. 236 237 Returns 238 ------- @@ -1899,7 +1898,7 @@
    Returns
    323 if auto_process: 324 ms.process_mass_spec() 325 return ms -326 +326 327 def find_mass_features(self, ms_level=1, grid=True): 328 """Find mass features within an LCMSBase object 329 @@ -1930,451 +1929,453 @@
    Returns
    354 if pp_method == "persistent homology": 355 msx_scan_df = self.scan_df[self.scan_df["ms_level"] == ms_level] 356 if all(msx_scan_df["ms_format"] == "profile"): -357 self.find_mass_features_ph( -358 ms_level=ms_level, grid=grid -359 ) -360 self.cluster_mass_features( -361 drop_children=True, sort_by="persistence" -362 ) -363 else: -364 raise ValueError( -365 "MS{} scans are not profile mode, which is required for persistent homology peak picking.".format( -366 ms_level -367 ) -368 ) -369 else: -370 raise ValueError("Peak picking method not implemented") -371 -372 def integrate_mass_features(self, drop_if_fail=True, drop_duplicates=True, ms_level=1): -373 """Integrate mass features and extract EICs. +357 self.find_mass_features_ph(ms_level=ms_level, grid=grid) +358 self.cluster_mass_features(drop_children=True, sort_by="persistence") +359 else: +360 raise ValueError( +361 "MS{} scans are not profile mode, which is required for persistent homology peak picking.".format( +362 ms_level +363 ) +364 ) +365 else: +366 raise ValueError("Peak picking method not implemented") +367 +368 def integrate_mass_features( +369 self, drop_if_fail=True, drop_duplicates=True, ms_level=1 +370 ): +371 """Integrate mass features and extract EICs. +372 +373 Populates the _eics attribute on the LCMSBase object for each unique mz in the mass_features dataframe and adds data (start_scan, final_scan, area) to the mass_features attribute. 374 -375 Populates the _eics attribute on the LCMSBase object for each unique mz in the mass_features dataframe and adds data (start_scan, final_scan, area) to the mass_features attribute. -376 -377 Parameters -378 ---------- -379 drop_if_fail : bool, optional -380 Whether to drop mass features if the EIC limit calculations fail. -381 Default is True. -382 drop_duplicates : bool, optional -383 Whether to mass features that appear to be duplicates -384 (i.e., mz is similar to another mass feature and limits of the EIC are similar or encapsulating). -385 Default is True. -386 ms_level : int, optional -387 The MS level to use. Default is 1. -388 -389 Raises -390 ------ -391 ValueError -392 If no mass features are found. -393 If no MS level data is found for the given MS level (either in data or in the scan data) -394 -395 Returns -396 ------- -397 None, but populates the eics attribute on the LCMSBase object and adds data (start_scan, final_scan, area) to the mass_features attribute. -398 -399 Notes -400 ----- -401 drop_if_fail is useful for discarding mass features that do not have good shapes, usually due to a detection on a shoulder of a peak or a noisy region (especially if minimal smoothing is used during mass feature detection). -402 """ -403 # Check if there is data -404 if ms_level in self._ms_unprocessed.keys(): -405 raw_data = self._ms_unprocessed[ms_level].copy() -406 else: -407 raise ValueError("No MS level " + str(ms_level) + " data found") -408 if self.mass_features is not None: -409 mf_df = self.mass_features_to_df().copy() -410 else: -411 raise ValueError( -412 "No mass features found, did you run find_mass_features() first?" -413 ) -414 # Check if mass_spectrum exists on each mass feature -415 if not all( -416 [mf.mass_spectrum is not None for mf in self.mass_features.values()] -417 ): -418 raise ValueError( -419 "Mass spectrum must be associated with each mass feature, did you run add_associated_ms1() first?" -420 ) -421 -422 # Subset scan data to only include correct ms_level -423 scan_df_sub = self.scan_df[ -424 self.scan_df["ms_level"] == int(ms_level) -425 ].reset_index(drop=True) -426 if scan_df_sub.empty: -427 raise ValueError("No MS level " + ms_level + " data found in scan data") -428 scan_df_sub = scan_df_sub[["scan", "scan_time"]].copy() -429 -430 mzs_to_extract = np.unique(mf_df["mz"].values) -431 mzs_to_extract.sort() -432 -433 # Get EICs for each unique mz in mass features list -434 for mz in mzs_to_extract: -435 mz_max = mz + self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6 -436 mz_min = mz - self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6 -437 raw_data_sub = raw_data[ -438 (raw_data["mz"] >= mz_min) & (raw_data["mz"] <= mz_max) -439 ].reset_index(drop=True) -440 raw_data_sub = ( -441 raw_data_sub.groupby(["scan"])["intensity"].sum().reset_index() -442 ) -443 raw_data_sub = scan_df_sub.merge(raw_data_sub, on="scan", how="left") -444 raw_data_sub["intensity"] = raw_data_sub["intensity"].fillna(0) -445 myEIC = EIC_Data( -446 scans=raw_data_sub["scan"].values, -447 time=raw_data_sub["scan_time"].values, -448 eic=raw_data_sub["intensity"].values, -449 ) -450 # Smooth EIC -451 smoothed_eic = self.smooth_tic(myEIC.eic) -452 smoothed_eic[smoothed_eic < 0] = 0 -453 myEIC.eic_smoothed = smoothed_eic -454 self.eics[mz] = myEIC -455 -456 # Get limits of mass features using EIC centroid detector and integrate -457 mf_df["area"] = np.nan -458 for idx, mass_feature in mf_df.iterrows(): -459 mz = mass_feature.mz -460 apex_scan = mass_feature.apex_scan -461 -462 # Pull EIC data and find apex scan index -463 myEIC = self.eics[mz] -464 self.mass_features[idx]._eic_data = myEIC -465 apex_index = np.where(myEIC.scans == apex_scan)[0][0] -466 -467 # Find left and right limits of peak using EIC centroid detector, add to EICData -468 centroid_eics = self.eic_centroid_detector( -469 myEIC.time, -470 myEIC.eic_smoothed, -471 mass_feature.intensity * 1.1, -472 apex_indexes=[int(apex_index)], -473 ) -474 l_a_r_scan_idx = [i for i in centroid_eics] -475 if len(l_a_r_scan_idx) > 0: -476 # Add start and final scan to mass_features and EICData -477 left_scan, right_scan = ( -478 myEIC.scans[l_a_r_scan_idx[0][0]], -479 myEIC.scans[l_a_r_scan_idx[0][2]], -480 ) -481 mf_scan_apex = [(left_scan, int(apex_scan), right_scan)] -482 myEIC.apexes = myEIC.apexes + mf_scan_apex -483 self.mass_features[idx].start_scan = left_scan -484 self.mass_features[idx].final_scan = right_scan -485 -486 # Find area under peak using limits from EIC centroid detector, add to mass_features and EICData -487 area = np.trapz( -488 myEIC.eic_smoothed[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1], -489 myEIC.time[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1], -490 ) -491 mf_df.at[idx, "area"] = area -492 myEIC.areas = myEIC.areas + [area] -493 self.eics[mz] = myEIC -494 self.mass_features[idx]._area = area -495 else: -496 if drop_if_fail is True: -497 self.mass_features.pop(idx) -498 -499 if drop_duplicates: -500 # Prepare mass feature dataframe -501 mf_df = self.mass_features_to_df().copy() -502 -503 # For each mass feature, find all mass features within the clustering tolerance ppm and drop if their start and end times are within another mass feature -504 # Kepp the first mass fea -505 for idx, mass_feature in mf_df.iterrows(): -506 mz = mass_feature.mz -507 apex_scan = mass_feature.apex_scan -508 -509 mf_df["mz_diff_ppm"] = np.abs(mf_df["mz"] - mz) / mz * 10**6 -510 mf_df_sub = mf_df[mf_df["mz_diff_ppm"] < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel * 10**6].copy() -511 -512 # For all mass features within the clustering tolerance, check if the start and end times are within the start and end times of the mass feature -513 for idx2, mass_feature2 in mf_df_sub.iterrows(): -514 if idx2 != idx: -515 if mass_feature2.start_scan >= mass_feature.start_scan and mass_feature2.final_scan <= mass_feature.final_scan: -516 if idx2 in self.mass_features.keys(): -517 self.mass_features.pop(idx2) -518 -519 -520 -521 def find_c13_mass_features(self): -522 """Mark likely C13 isotopes and connect to monoisoitopic mass features. +375 Parameters +376 ---------- +377 drop_if_fail : bool, optional +378 Whether to drop mass features if the EIC limit calculations fail. +379 Default is True. +380 drop_duplicates : bool, optional +381 Whether to mass features that appear to be duplicates +382 (i.e., mz is similar to another mass feature and limits of the EIC are similar or encapsulating). +383 Default is True. +384 ms_level : int, optional +385 The MS level to use. Default is 1. +386 +387 Raises +388 ------ +389 ValueError +390 If no mass features are found. +391 If no MS level data is found for the given MS level (either in data or in the scan data) +392 +393 Returns +394 ------- +395 None, but populates the eics attribute on the LCMSBase object and adds data (start_scan, final_scan, area) to the mass_features attribute. +396 +397 Notes +398 ----- +399 drop_if_fail is useful for discarding mass features that do not have good shapes, usually due to a detection on a shoulder of a peak or a noisy region (especially if minimal smoothing is used during mass feature detection). +400 """ +401 # Check if there is data +402 if ms_level in self._ms_unprocessed.keys(): +403 raw_data = self._ms_unprocessed[ms_level].copy() +404 else: +405 raise ValueError("No MS level " + str(ms_level) + " data found") +406 if self.mass_features is not None: +407 mf_df = self.mass_features_to_df().copy() +408 else: +409 raise ValueError( +410 "No mass features found, did you run find_mass_features() first?" +411 ) +412 # Check if mass_spectrum exists on each mass feature +413 if not all( +414 [mf.mass_spectrum is not None for mf in self.mass_features.values()] +415 ): +416 raise ValueError( +417 "Mass spectrum must be associated with each mass feature, did you run add_associated_ms1() first?" +418 ) +419 +420 # Subset scan data to only include correct ms_level +421 scan_df_sub = self.scan_df[ +422 self.scan_df["ms_level"] == int(ms_level) +423 ].reset_index(drop=True) +424 if scan_df_sub.empty: +425 raise ValueError("No MS level " + ms_level + " data found in scan data") +426 scan_df_sub = scan_df_sub[["scan", "scan_time"]].copy() +427 +428 mzs_to_extract = np.unique(mf_df["mz"].values) +429 mzs_to_extract.sort() +430 +431 # Get EICs for each unique mz in mass features list +432 for mz in mzs_to_extract: +433 mz_max = mz + self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6 +434 mz_min = mz - self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6 +435 raw_data_sub = raw_data[ +436 (raw_data["mz"] >= mz_min) & (raw_data["mz"] <= mz_max) +437 ].reset_index(drop=True) +438 raw_data_sub = ( +439 raw_data_sub.groupby(["scan"])["intensity"].sum().reset_index() +440 ) +441 raw_data_sub = scan_df_sub.merge(raw_data_sub, on="scan", how="left") +442 raw_data_sub["intensity"] = raw_data_sub["intensity"].fillna(0) +443 myEIC = EIC_Data( +444 scans=raw_data_sub["scan"].values, +445 time=raw_data_sub["scan_time"].values, +446 eic=raw_data_sub["intensity"].values, +447 ) +448 # Smooth EIC +449 smoothed_eic = self.smooth_tic(myEIC.eic) +450 smoothed_eic[smoothed_eic < 0] = 0 +451 myEIC.eic_smoothed = smoothed_eic +452 self.eics[mz] = myEIC +453 +454 # Get limits of mass features using EIC centroid detector and integrate +455 mf_df["area"] = np.nan +456 for idx, mass_feature in mf_df.iterrows(): +457 mz = mass_feature.mz +458 apex_scan = mass_feature.apex_scan +459 +460 # Pull EIC data and find apex scan index +461 myEIC = self.eics[mz] +462 self.mass_features[idx]._eic_data = myEIC +463 apex_index = np.where(myEIC.scans == apex_scan)[0][0] +464 +465 # Find left and right limits of peak using EIC centroid detector, add to EICData +466 centroid_eics = self.eic_centroid_detector( +467 myEIC.time, +468 myEIC.eic_smoothed, +469 mass_feature.intensity * 1.1, +470 apex_indexes=[int(apex_index)], +471 ) +472 l_a_r_scan_idx = [i for i in centroid_eics] +473 if len(l_a_r_scan_idx) > 0: +474 # Add start and final scan to mass_features and EICData +475 left_scan, right_scan = ( +476 myEIC.scans[l_a_r_scan_idx[0][0]], +477 myEIC.scans[l_a_r_scan_idx[0][2]], +478 ) +479 mf_scan_apex = [(left_scan, int(apex_scan), right_scan)] +480 myEIC.apexes = myEIC.apexes + mf_scan_apex +481 self.mass_features[idx].start_scan = left_scan +482 self.mass_features[idx].final_scan = right_scan +483 +484 # Find area under peak using limits from EIC centroid detector, add to mass_features and EICData +485 area = np.trapz( +486 myEIC.eic_smoothed[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1], +487 myEIC.time[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1], +488 ) +489 mf_df.at[idx, "area"] = area +490 myEIC.areas = myEIC.areas + [area] +491 self.eics[mz] = myEIC +492 self.mass_features[idx]._area = area +493 else: +494 if drop_if_fail is True: +495 self.mass_features.pop(idx) +496 +497 if drop_duplicates: +498 # Prepare mass feature dataframe +499 mf_df = self.mass_features_to_df().copy() +500 +501 # For each mass feature, find all mass features within the clustering tolerance ppm and drop if their start and end times are within another mass feature +502 # Kepp the first mass fea +503 for idx, mass_feature in mf_df.iterrows(): +504 mz = mass_feature.mz +505 apex_scan = mass_feature.apex_scan +506 +507 mf_df["mz_diff_ppm"] = np.abs(mf_df["mz"] - mz) / mz * 10**6 +508 mf_df_sub = mf_df[ +509 mf_df["mz_diff_ppm"] +510 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel +511 * 10**6 +512 ].copy() +513 +514 # For all mass features within the clustering tolerance, check if the start and end times are within the start and end times of the mass feature +515 for idx2, mass_feature2 in mf_df_sub.iterrows(): +516 if idx2 != idx: +517 if ( +518 mass_feature2.start_scan >= mass_feature.start_scan +519 and mass_feature2.final_scan <= mass_feature.final_scan +520 ): +521 if idx2 in self.mass_features.keys(): +522 self.mass_features.pop(idx2) 523 -524 Returns -525 ------- -526 None, but populates the monoisotopic_mf_id and isotopologue_type attributes to the indivual LCMSMassFeatures within the mass_features attribute of the LCMSBase object. -527 -528 Raises -529 ------ -530 ValueError -531 If no mass features are found. -532 """ -533 verbose = self.parameters.lc_ms.verbose_processing -534 if verbose: -535 print("evaluating mass features for C13 isotopes") -536 if self.mass_features is None: -537 raise ValueError("No mass features found, run find_mass_features() first") -538 -539 # Data prep fo sparse distance matrix -540 dims = ["mz", "scan_time"] -541 mf_df = self.mass_features_to_df().copy() -542 # Drop mass features that have no area (these are likely to be noise) -543 mf_df = mf_df[mf_df["area"].notnull()] -544 mf_df["mf_id"] = mf_df.index.values -545 dims = ["mz", "scan_time"] -546 -547 # Sort my ascending mz so we always get the monoisotopic mass first, regardless of the order/intensity of the mass features -548 mf_df = mf_df.sort_values(by=["mz"]).reset_index(drop=True).copy() +524 def find_c13_mass_features(self): +525 """Mark likely C13 isotopes and connect to monoisoitopic mass features. +526 +527 Returns +528 ------- +529 None, but populates the monoisotopic_mf_id and isotopologue_type attributes to the indivual LCMSMassFeatures within the mass_features attribute of the LCMSBase object. +530 +531 Raises +532 ------ +533 ValueError +534 If no mass features are found. +535 """ +536 verbose = self.parameters.lc_ms.verbose_processing +537 if verbose: +538 print("evaluating mass features for C13 isotopes") +539 if self.mass_features is None: +540 raise ValueError("No mass features found, run find_mass_features() first") +541 +542 # Data prep fo sparse distance matrix +543 dims = ["mz", "scan_time"] +544 mf_df = self.mass_features_to_df().copy() +545 # Drop mass features that have no area (these are likely to be noise) +546 mf_df = mf_df[mf_df["area"].notnull()] +547 mf_df["mf_id"] = mf_df.index.values +548 dims = ["mz", "scan_time"] 549 -550 mz_diff = 1.003355 # C13-C12 mass difference -551 tol = [ -552 mf_df["mz"].median() -553 * self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel, -554 self.parameters.lc_ms.mass_feature_cluster_rt_tolerance * 0.5, -555 ] # mz, in relative; scan_time in minutes -556 -557 # Compute inter-feature distances -558 distances = None -559 for i in range(len(dims)): -560 # Construct k-d tree -561 values = mf_df[dims[i]].values -562 tree = KDTree(values.reshape(-1, 1)) -563 -564 max_tol = tol[i] -565 if dims[i] == "mz": -566 # Maximum absolute tolerance -567 max_tol = mz_diff + tol[i] -568 -569 # Compute sparse distance matrix -570 # the larger the max_tol, the slower this operation is -571 sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix") -572 -573 # Only consider forward case, exclude diagonal -574 sdm = sparse.triu(sdm, k=1) +550 # Sort my ascending mz so we always get the monoisotopic mass first, regardless of the order/intensity of the mass features +551 mf_df = mf_df.sort_values(by=["mz"]).reset_index(drop=True).copy() +552 +553 mz_diff = 1.003355 # C13-C12 mass difference +554 tol = [ +555 mf_df["mz"].median() +556 * self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel, +557 self.parameters.lc_ms.mass_feature_cluster_rt_tolerance * 0.5, +558 ] # mz, in relative; scan_time in minutes +559 +560 # Compute inter-feature distances +561 distances = None +562 for i in range(len(dims)): +563 # Construct k-d tree +564 values = mf_df[dims[i]].values +565 tree = KDTree(values.reshape(-1, 1)) +566 +567 max_tol = tol[i] +568 if dims[i] == "mz": +569 # Maximum absolute tolerance +570 max_tol = mz_diff + tol[i] +571 +572 # Compute sparse distance matrix +573 # the larger the max_tol, the slower this operation is +574 sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix") 575 -576 if dims[i] == "mz": -577 min_tol = mz_diff - tol[i] -578 # Get only the ones that are above the min tol -579 idx = sdm.data > min_tol -580 -581 # Reconstruct sparse distance matrix -582 sdm = sparse.coo_matrix( -583 (sdm.data[idx], (sdm.row[idx], sdm.col[idx])), -584 shape=(len(values), len(values)), -585 ) -586 -587 # Cast as binary matrix -588 sdm.data = np.ones_like(sdm.data) +576 # Only consider forward case, exclude diagonal +577 sdm = sparse.triu(sdm, k=1) +578 +579 if dims[i] == "mz": +580 min_tol = mz_diff - tol[i] +581 # Get only the ones that are above the min tol +582 idx = sdm.data > min_tol +583 +584 # Reconstruct sparse distance matrix +585 sdm = sparse.coo_matrix( +586 (sdm.data[idx], (sdm.row[idx], sdm.col[idx])), +587 shape=(len(values), len(values)), +588 ) 589 -590 # Stack distances -591 if distances is None: -592 distances = sdm -593 else: -594 distances = distances.multiply(sdm) -595 -596 # Extract indices of within-tolerance points -597 distances = distances.tocoo() -598 pairs = np.stack((distances.row, distances.col), axis=1) # C12 to C13 pairs -599 -600 # Turn pairs (which are index of mf_df) into mf_id and then into two dataframes to join to mf_df -601 pairs_mf = pairs.copy() -602 pairs_mf[:, 0] = mf_df.iloc[pairs[:, 0]].mf_id.values -603 pairs_mf[:, 1] = mf_df.iloc[pairs[:, 1]].mf_id.values -604 -605 # Connect monoisotopic masses with isotopologes within mass_features -606 monos = np.setdiff1d(np.unique(pairs_mf[:, 0]), np.unique(pairs_mf[:, 1])) -607 for mono in monos: -608 self.mass_features[mono].monoisotopic_mf_id = mono -609 pairs_iso_df = pd.DataFrame(pairs_mf, columns=["parent", "child"]) -610 while not pairs_iso_df.empty: -611 pairs_iso_df = pairs_iso_df.set_index("parent", drop=False) -612 m1_isos = pairs_iso_df.loc[monos, "child"].unique() -613 for iso in m1_isos: -614 # Set monoisotopic_mf_id and isotopologue_type for isotopologues -615 parent = pairs_mf[pairs_mf[:, 1] == iso, 0] -616 if len(parent) > 1: -617 # Choose the parent that is closest in time to the isotopologue -618 parent_time = [self.mass_features[p].retention_time for p in parent] -619 time_diff = [ -620 np.abs(self.mass_features[iso].retention_time - x) -621 for x in parent_time -622 ] -623 parent = parent[np.argmin(time_diff)] -624 else: -625 parent = parent[0] -626 self.mass_features[iso].monoisotopic_mf_id = self.mass_features[ -627 parent -628 ].monoisotopic_mf_id -629 if self.mass_features[iso].monoisotopic_mf_id is not None: -630 mass_diff = ( -631 self.mass_features[iso].mz -632 - self.mass_features[ -633 self.mass_features[iso].monoisotopic_mf_id -634 ].mz -635 ) -636 self.mass_features[iso].isotopologue_type = "13C" + str( -637 int(round(mass_diff, 0)) +590 # Cast as binary matrix +591 sdm.data = np.ones_like(sdm.data) +592 +593 # Stack distances +594 if distances is None: +595 distances = sdm +596 else: +597 distances = distances.multiply(sdm) +598 +599 # Extract indices of within-tolerance points +600 distances = distances.tocoo() +601 pairs = np.stack((distances.row, distances.col), axis=1) # C12 to C13 pairs +602 +603 # Turn pairs (which are index of mf_df) into mf_id and then into two dataframes to join to mf_df +604 pairs_mf = pairs.copy() +605 pairs_mf[:, 0] = mf_df.iloc[pairs[:, 0]].mf_id.values +606 pairs_mf[:, 1] = mf_df.iloc[pairs[:, 1]].mf_id.values +607 +608 # Connect monoisotopic masses with isotopologes within mass_features +609 monos = np.setdiff1d(np.unique(pairs_mf[:, 0]), np.unique(pairs_mf[:, 1])) +610 for mono in monos: +611 self.mass_features[mono].monoisotopic_mf_id = mono +612 pairs_iso_df = pd.DataFrame(pairs_mf, columns=["parent", "child"]) +613 while not pairs_iso_df.empty: +614 pairs_iso_df = pairs_iso_df.set_index("parent", drop=False) +615 m1_isos = pairs_iso_df.loc[monos, "child"].unique() +616 for iso in m1_isos: +617 # Set monoisotopic_mf_id and isotopologue_type for isotopologues +618 parent = pairs_mf[pairs_mf[:, 1] == iso, 0] +619 if len(parent) > 1: +620 # Choose the parent that is closest in time to the isotopologue +621 parent_time = [self.mass_features[p].retention_time for p in parent] +622 time_diff = [ +623 np.abs(self.mass_features[iso].retention_time - x) +624 for x in parent_time +625 ] +626 parent = parent[np.argmin(time_diff)] +627 else: +628 parent = parent[0] +629 self.mass_features[iso].monoisotopic_mf_id = self.mass_features[ +630 parent +631 ].monoisotopic_mf_id +632 if self.mass_features[iso].monoisotopic_mf_id is not None: +633 mass_diff = ( +634 self.mass_features[iso].mz +635 - self.mass_features[ +636 self.mass_features[iso].monoisotopic_mf_id +637 ].mz 638 ) -639 -640 # Drop the mono and iso from the pairs_iso_df -641 pairs_iso_df = pairs_iso_df.drop( -642 index=monos, errors="ignore" -643 ) # Drop pairs where the parent is a child that is a child of a root -644 pairs_iso_df = pairs_iso_df.set_index("child", drop=False) -645 pairs_iso_df = pairs_iso_df.drop(index=m1_isos, errors="ignore") -646 -647 if not pairs_iso_df.empty: -648 # Get new monos, recognizing that these are just 13C isotopologues that are connected to other 13C isotopologues to repeat the process -649 monos = np.setdiff1d( -650 np.unique(pairs_iso_df.parent), np.unique(pairs_iso_df.child) -651 ) -652 if verbose: -653 # Report fraction of compounds annotated with isotopes -654 mf_df["c13_flag"] = np.where( -655 np.logical_or( -656 np.isin(mf_df["mf_id"], pairs_mf[:, 0]), -657 np.isin(mf_df["mf_id"], pairs_mf[:, 1]), -658 ), -659 1, -660 0, -661 ) -662 print( -663 str(round(len(mf_df[mf_df["c13_flag"] == 1]) / len(mf_df), ndigits=3)) -664 + " of mass features have or are C13 isotopes" -665 ) -666 -667 def deconvolute_ms1_mass_features(self): -668 """Deconvolute MS1 mass features +639 self.mass_features[iso].isotopologue_type = "13C" + str( +640 int(round(mass_diff, 0)) +641 ) +642 +643 # Drop the mono and iso from the pairs_iso_df +644 pairs_iso_df = pairs_iso_df.drop( +645 index=monos, errors="ignore" +646 ) # Drop pairs where the parent is a child that is a child of a root +647 pairs_iso_df = pairs_iso_df.set_index("child", drop=False) +648 pairs_iso_df = pairs_iso_df.drop(index=m1_isos, errors="ignore") +649 +650 if not pairs_iso_df.empty: +651 # Get new monos, recognizing that these are just 13C isotopologues that are connected to other 13C isotopologues to repeat the process +652 monos = np.setdiff1d( +653 np.unique(pairs_iso_df.parent), np.unique(pairs_iso_df.child) +654 ) +655 if verbose: +656 # Report fraction of compounds annotated with isotopes +657 mf_df["c13_flag"] = np.where( +658 np.logical_or( +659 np.isin(mf_df["mf_id"], pairs_mf[:, 0]), +660 np.isin(mf_df["mf_id"], pairs_mf[:, 1]), +661 ), +662 1, +663 0, +664 ) +665 print( +666 str(round(len(mf_df[mf_df["c13_flag"] == 1]) / len(mf_df), ndigits=3)) +667 + " of mass features have or are C13 isotopes" +668 ) 669 -670 Deconvolute mass features ms1 spectrum based on the correlation of all masses within a spectrum over the EIC of the mass features -671 -672 Parameters -673 ---------- -674 None -675 -676 Returns -677 ------- -678 None, but assigns the _ms_deconvoluted_idx, mass_spectrum_deconvoluted_parent, -679 and associated_mass_features_deconvoluted attributes to the mass features in the -680 mass_features attribute of the LCMSBase object. -681 -682 Raises -683 ------ -684 ValueError -685 If no mass features are found, must run find_mass_features() first. -686 If no EICs are found, did you run integrate_mass_features() first? -687 -688 """ -689 # Checks for set mass_features and eics -690 if self.mass_features is None: -691 raise ValueError( -692 "No mass features found, did you run find_mass_features() first?" -693 ) -694 -695 if self.eics == {}: -696 raise ValueError( -697 "No EICs found, did you run integrate_mass_features() first?" -698 ) -699 -700 if 1 not in self._ms_unprocessed.keys(): -701 raise ValueError("No unprocessed MS1 spectra found.") +670 def deconvolute_ms1_mass_features(self): +671 """Deconvolute MS1 mass features +672 +673 Deconvolute mass features ms1 spectrum based on the correlation of all masses within a spectrum over the EIC of the mass features +674 +675 Parameters +676 ---------- +677 None +678 +679 Returns +680 ------- +681 None, but assigns the _ms_deconvoluted_idx, mass_spectrum_deconvoluted_parent, +682 and associated_mass_features_deconvoluted attributes to the mass features in the +683 mass_features attribute of the LCMSBase object. +684 +685 Raises +686 ------ +687 ValueError +688 If no mass features are found, must run find_mass_features() first. +689 If no EICs are found, did you run integrate_mass_features() first? +690 +691 """ +692 # Checks for set mass_features and eics +693 if self.mass_features is None: +694 raise ValueError( +695 "No mass features found, did you run find_mass_features() first?" +696 ) +697 +698 if self.eics == {}: +699 raise ValueError( +700 "No EICs found, did you run integrate_mass_features() first?" +701 ) 702 -703 # Prep ms1 data -704 ms1_data = self._ms_unprocessed[1].copy() -705 ms1_data = ms1_data.set_index("scan") -706 -707 # Prep mass feature summary -708 mass_feature_df = self.mass_features_to_df() +703 if 1 not in self._ms_unprocessed.keys(): +704 raise ValueError("No unprocessed MS1 spectra found.") +705 +706 # Prep ms1 data +707 ms1_data = self._ms_unprocessed[1].copy() +708 ms1_data = ms1_data.set_index("scan") 709 -710 # Loop through each mass feature -711 for mf_id, mass_feature in self.mass_features.items(): -712 -713 # Check that the mass_feature.mz attribute == the mz of the mass feature in the mass_feature_df -714 if mass_feature.mz != mass_feature.ms1_peak.mz_exp: -715 continue -716 -717 # Get the left and right limits of the EIC of the mass feature -718 l_scan, _, r_scan = mass_feature._eic_data.apexes[0] -719 -720 # Pull from the _ms1_unprocessed data the scan range of interest and sort by mz -721 ms1_data_sub = ms1_data.loc[l_scan:r_scan].copy() -722 ms1_data_sub = ms1_data_sub.sort_values(by=["mz"]).reset_index(drop=False) -723 -724 # Get the centroided masses of the mass feature -725 mf_mspeak_mzs = mass_feature.mass_spectrum.mz_exp -726 -727 # Find the closest mz in the ms1 data to the centroided masses of the mass feature -728 ms1_data_sub["mass_feature_mz"] = mf_mspeak_mzs[ -729 find_closest(mf_mspeak_mzs, ms1_data_sub.mz.values) -730 ] -731 -732 # Drop rows with mz_diff > 0.01 between the mass feature mz and the ms1 data mz -733 ms1_data_sub["mz_diff_rel"] = ( -734 np.abs(ms1_data_sub["mass_feature_mz"] - ms1_data_sub["mz"]) -735 / ms1_data_sub["mz"] -736 ) -737 ms1_data_sub = ms1_data_sub[ -738 ms1_data_sub["mz_diff_rel"] -739 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel -740 ].reset_index(drop=True) -741 -742 # Group by mass_feature_mz and scan and sum intensity -743 ms1_data_sub_group = ( -744 ms1_data_sub.groupby(["mass_feature_mz", "scan"])["intensity"] -745 .sum() -746 .reset_index() -747 ) -748 -749 # Calculate the correlation of the intensities of the mass feature and the ms1 data (set to 0 if no intensity) -750 corr = ( -751 ms1_data_sub_group.pivot( -752 index="scan", columns="mass_feature_mz", values="intensity" -753 ) -754 .fillna(0) -755 .corr() -756 ) -757 -758 # Subset the correlation matrix to only include the masses of the mass feature and those with a correlation > 0.8 -759 decon_corr_min = self.parameters.lc_ms.ms1_deconvolution_corr_min -760 decon_corr_min = 0.9 -761 corr_subset = corr.loc[mass_feature.mz,] -762 corr_subset = corr_subset[corr_subset > decon_corr_min] -763 -764 # Get the masses from the mass spectrum that are the result of the deconvolution -765 mzs_decon = corr_subset.index.values -766 -767 # Get the indices of the mzs_decon in mass_feature.mass_spectrum.mz_exp and assign to the mass feature -768 mzs_decon_idx = [ -769 id -770 for id, mz in enumerate(mass_feature.mass_spectrum.mz_exp) -771 if mz in mzs_decon -772 ] -773 mass_feature._ms_deconvoluted_idx = mzs_decon_idx -774 -775 # Check if the mass feature's ms1 peak is the largest in the deconvoluted mass spectrum -776 if ( -777 mass_feature.ms1_peak.abundance -778 == mass_feature.mass_spectrum.abundance[mzs_decon_idx].max() -779 ): -780 mass_feature.mass_spectrum_deconvoluted_parent = True -781 else: -782 mass_feature.mass_spectrum_deconvoluted_parent = False -783 -784 # Check for other mass features that are in the deconvoluted mass spectrum and add the deconvoluted mass spectrum to the mass feature -785 # Subset mass_feature_df to only include mass features that are within the clustering tolerance -786 mass_feature_df_sub = mass_feature_df[ -787 abs(mass_feature.retention_time - mass_feature_df["scan_time"]) -788 < self.parameters.lc_ms.mass_feature_cluster_rt_tolerance -789 ].copy() -790 # Calculate the mz difference in ppm between the mass feature and the peaks in the deconvoluted mass spectrum -791 mass_feature_df_sub["mz_diff_ppm"] = [ -792 np.abs(mzs_decon - mz).min() / mz * 10**6 -793 for mz in mass_feature_df_sub["mz"] -794 ] -795 # Subset mass_feature_df to only include mass features that are within 1 ppm of the deconvoluted masses -796 mfs_associated_decon = mass_feature_df_sub[ -797 mass_feature_df_sub["mz_diff_ppm"] -798 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel * 10**6 -799 ].index.values -800 -801 mass_feature.associated_mass_features_deconvoluted = mfs_associated_decon +710 # Prep mass feature summary +711 mass_feature_df = self.mass_features_to_df() +712 +713 # Loop through each mass feature +714 for mf_id, mass_feature in self.mass_features.items(): +715 # Check that the mass_feature.mz attribute == the mz of the mass feature in the mass_feature_df +716 if mass_feature.mz != mass_feature.ms1_peak.mz_exp: +717 continue +718 +719 # Get the left and right limits of the EIC of the mass feature +720 l_scan, _, r_scan = mass_feature._eic_data.apexes[0] +721 +722 # Pull from the _ms1_unprocessed data the scan range of interest and sort by mz +723 ms1_data_sub = ms1_data.loc[l_scan:r_scan].copy() +724 ms1_data_sub = ms1_data_sub.sort_values(by=["mz"]).reset_index(drop=False) +725 +726 # Get the centroided masses of the mass feature +727 mf_mspeak_mzs = mass_feature.mass_spectrum.mz_exp +728 +729 # Find the closest mz in the ms1 data to the centroided masses of the mass feature +730 ms1_data_sub["mass_feature_mz"] = mf_mspeak_mzs[ +731 find_closest(mf_mspeak_mzs, ms1_data_sub.mz.values) +732 ] +733 +734 # Drop rows with mz_diff > 0.01 between the mass feature mz and the ms1 data mz +735 ms1_data_sub["mz_diff_rel"] = ( +736 np.abs(ms1_data_sub["mass_feature_mz"] - ms1_data_sub["mz"]) +737 / ms1_data_sub["mz"] +738 ) +739 ms1_data_sub = ms1_data_sub[ +740 ms1_data_sub["mz_diff_rel"] +741 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel +742 ].reset_index(drop=True) +743 +744 # Group by mass_feature_mz and scan and sum intensity +745 ms1_data_sub_group = ( +746 ms1_data_sub.groupby(["mass_feature_mz", "scan"])["intensity"] +747 .sum() +748 .reset_index() +749 ) +750 +751 # Calculate the correlation of the intensities of the mass feature and the ms1 data (set to 0 if no intensity) +752 corr = ( +753 ms1_data_sub_group.pivot( +754 index="scan", columns="mass_feature_mz", values="intensity" +755 ) +756 .fillna(0) +757 .corr() +758 ) +759 +760 # Subset the correlation matrix to only include the masses of the mass feature and those with a correlation > 0.8 +761 decon_corr_min = self.parameters.lc_ms.ms1_deconvolution_corr_min +762 decon_corr_min = 0.9 +763 corr_subset = corr.loc[mass_feature.mz,] +764 corr_subset = corr_subset[corr_subset > decon_corr_min] +765 +766 # Get the masses from the mass spectrum that are the result of the deconvolution +767 mzs_decon = corr_subset.index.values +768 +769 # Get the indices of the mzs_decon in mass_feature.mass_spectrum.mz_exp and assign to the mass feature +770 mzs_decon_idx = [ +771 id +772 for id, mz in enumerate(mass_feature.mass_spectrum.mz_exp) +773 if mz in mzs_decon +774 ] +775 mass_feature._ms_deconvoluted_idx = mzs_decon_idx +776 +777 # Check if the mass feature's ms1 peak is the largest in the deconvoluted mass spectrum +778 if ( +779 mass_feature.ms1_peak.abundance +780 == mass_feature.mass_spectrum.abundance[mzs_decon_idx].max() +781 ): +782 mass_feature.mass_spectrum_deconvoluted_parent = True +783 else: +784 mass_feature.mass_spectrum_deconvoluted_parent = False +785 +786 # Check for other mass features that are in the deconvoluted mass spectrum and add the deconvoluted mass spectrum to the mass feature +787 # Subset mass_feature_df to only include mass features that are within the clustering tolerance +788 mass_feature_df_sub = mass_feature_df[ +789 abs(mass_feature.retention_time - mass_feature_df["scan_time"]) +790 < self.parameters.lc_ms.mass_feature_cluster_rt_tolerance +791 ].copy() +792 # Calculate the mz difference in ppm between the mass feature and the peaks in the deconvoluted mass spectrum +793 mass_feature_df_sub["mz_diff_ppm"] = [ +794 np.abs(mzs_decon - mz).min() / mz * 10**6 +795 for mz in mass_feature_df_sub["mz"] +796 ] +797 # Subset mass_feature_df to only include mass features that are within 1 ppm of the deconvoluted masses +798 mfs_associated_decon = mass_feature_df_sub[ +799 mass_feature_df_sub["mz_diff_ppm"] +800 < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel * 10**6 +801 ].index.values +802 +803 mass_feature.associated_mass_features_deconvoluted = mfs_associated_decon @@ -2744,7 +2745,7 @@
    Returns
    232 polarity : int, optional 233 The polarity of the mass spectra (1 or -1). If not set, the polarity will be determined from the dataset. Defaults to None. (fastest if set to -1 or 1) 234 ms_params : MSParameters, optional -235 The mass spectrum parameters to use. If not set (None), the globally set parameters will be used. Defaults to None. +235 The mass spectrum parameters to use. If not set (None), the globally set parameters will be used. Defaults to None. 236 237 Returns 238 ------- @@ -2921,20 +2922,16 @@
    Raises
    354 if pp_method == "persistent homology": 355 msx_scan_df = self.scan_df[self.scan_df["ms_level"] == ms_level] 356 if all(msx_scan_df["ms_format"] == "profile"): -357 self.find_mass_features_ph( -358 ms_level=ms_level, grid=grid -359 ) -360 self.cluster_mass_features( -361 drop_children=True, sort_by="persistence" -362 ) -363 else: -364 raise ValueError( -365 "MS{} scans are not profile mode, which is required for persistent homology peak picking.".format( -366 ms_level -367 ) -368 ) -369 else: -370 raise ValueError("Peak picking method not implemented") +357 self.find_mass_features_ph(ms_level=ms_level, grid=grid) +358 self.cluster_mass_features(drop_children=True, sort_by="persistence") +359 else: +360 raise ValueError( +361 "MS{} scans are not profile mode, which is required for persistent homology peak picking.".format( +362 ms_level +363 ) +364 ) +365 else: +366 raise ValueError("Peak picking method not implemented") @@ -2980,152 +2977,161 @@
    Returns
    -
    372    def integrate_mass_features(self, drop_if_fail=True, drop_duplicates=True, ms_level=1):
    -373        """Integrate mass features and extract EICs.
    +            
    368    def integrate_mass_features(
    +369        self, drop_if_fail=True, drop_duplicates=True, ms_level=1
    +370    ):
    +371        """Integrate mass features and extract EICs.
    +372
    +373        Populates the _eics attribute on the LCMSBase object for each unique mz in the mass_features dataframe and adds data (start_scan, final_scan, area) to the mass_features attribute.
     374
    -375        Populates the _eics attribute on the LCMSBase object for each unique mz in the mass_features dataframe and adds data (start_scan, final_scan, area) to the mass_features attribute.
    -376
    -377        Parameters
    -378        ----------
    -379        drop_if_fail : bool, optional
    -380            Whether to drop mass features if the EIC limit calculations fail.
    -381            Default is True.
    -382        drop_duplicates : bool, optional
    -383            Whether to mass features that appear to be duplicates 
    -384            (i.e., mz is similar to another mass feature and limits of the EIC are similar or encapsulating).
    -385            Default is True.
    -386        ms_level : int, optional
    -387            The MS level to use. Default is 1.
    -388
    -389        Raises
    -390        ------
    -391        ValueError
    -392            If no mass features are found.
    -393            If no MS level data is found for the given MS level (either in data or in the scan data)
    -394
    -395        Returns
    -396        -------
    -397        None, but populates the eics attribute on the LCMSBase object and adds data (start_scan, final_scan, area) to the mass_features attribute.
    -398
    -399        Notes
    -400        -----
    -401        drop_if_fail is useful for discarding mass features that do not have good shapes, usually due to a detection on a shoulder of a peak or a noisy region (especially if minimal smoothing is used during mass feature detection).
    -402        """
    -403        # Check if there is data
    -404        if ms_level in self._ms_unprocessed.keys():
    -405            raw_data = self._ms_unprocessed[ms_level].copy()
    -406        else:
    -407            raise ValueError("No MS level " + str(ms_level) + " data found")
    -408        if self.mass_features is not None:
    -409            mf_df = self.mass_features_to_df().copy()
    -410        else:
    -411            raise ValueError(
    -412                "No mass features found, did you run find_mass_features() first?"
    -413            )
    -414        # Check if mass_spectrum exists on each mass feature
    -415        if not all(
    -416            [mf.mass_spectrum is not None for mf in self.mass_features.values()]
    -417        ):
    -418            raise ValueError(
    -419                "Mass spectrum must be associated with each mass feature, did you run add_associated_ms1() first?"
    -420            )
    -421
    -422        # Subset scan data to only include correct ms_level
    -423        scan_df_sub = self.scan_df[
    -424            self.scan_df["ms_level"] == int(ms_level)
    -425        ].reset_index(drop=True)
    -426        if scan_df_sub.empty:
    -427            raise ValueError("No MS level " + ms_level + " data found in scan data")
    -428        scan_df_sub = scan_df_sub[["scan", "scan_time"]].copy()
    -429
    -430        mzs_to_extract = np.unique(mf_df["mz"].values)
    -431        mzs_to_extract.sort()
    -432
    -433        # Get EICs for each unique mz in mass features list
    -434        for mz in mzs_to_extract:
    -435            mz_max = mz + self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6
    -436            mz_min = mz - self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6
    -437            raw_data_sub = raw_data[
    -438                (raw_data["mz"] >= mz_min) & (raw_data["mz"] <= mz_max)
    -439            ].reset_index(drop=True)
    -440            raw_data_sub = (
    -441                raw_data_sub.groupby(["scan"])["intensity"].sum().reset_index()
    -442            )
    -443            raw_data_sub = scan_df_sub.merge(raw_data_sub, on="scan", how="left")
    -444            raw_data_sub["intensity"] = raw_data_sub["intensity"].fillna(0)
    -445            myEIC = EIC_Data(
    -446                scans=raw_data_sub["scan"].values,
    -447                time=raw_data_sub["scan_time"].values,
    -448                eic=raw_data_sub["intensity"].values,
    -449            )
    -450            # Smooth EIC
    -451            smoothed_eic = self.smooth_tic(myEIC.eic)
    -452            smoothed_eic[smoothed_eic < 0] = 0
    -453            myEIC.eic_smoothed = smoothed_eic
    -454            self.eics[mz] = myEIC
    -455
    -456        # Get limits of mass features using EIC centroid detector and integrate
    -457        mf_df["area"] = np.nan
    -458        for idx, mass_feature in mf_df.iterrows():
    -459            mz = mass_feature.mz
    -460            apex_scan = mass_feature.apex_scan
    -461
    -462            # Pull EIC data and find apex scan index
    -463            myEIC = self.eics[mz]
    -464            self.mass_features[idx]._eic_data = myEIC
    -465            apex_index = np.where(myEIC.scans == apex_scan)[0][0]
    -466
    -467            # Find left and right limits of peak using EIC centroid detector, add to EICData
    -468            centroid_eics = self.eic_centroid_detector(
    -469                myEIC.time,
    -470                myEIC.eic_smoothed,
    -471                mass_feature.intensity * 1.1,
    -472                apex_indexes=[int(apex_index)],
    -473            )
    -474            l_a_r_scan_idx = [i for i in centroid_eics]
    -475            if len(l_a_r_scan_idx) > 0:
    -476                # Add start and final scan to mass_features and EICData
    -477                left_scan, right_scan = (
    -478                    myEIC.scans[l_a_r_scan_idx[0][0]],
    -479                    myEIC.scans[l_a_r_scan_idx[0][2]],
    -480                )
    -481                mf_scan_apex = [(left_scan, int(apex_scan), right_scan)]
    -482                myEIC.apexes = myEIC.apexes + mf_scan_apex
    -483                self.mass_features[idx].start_scan = left_scan
    -484                self.mass_features[idx].final_scan = right_scan
    -485
    -486                # Find area under peak using limits from EIC centroid detector, add to mass_features and EICData
    -487                area = np.trapz(
    -488                    myEIC.eic_smoothed[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1],
    -489                    myEIC.time[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1],
    -490                )
    -491                mf_df.at[idx, "area"] = area
    -492                myEIC.areas = myEIC.areas + [area]
    -493                self.eics[mz] = myEIC
    -494                self.mass_features[idx]._area = area
    -495            else:
    -496                if drop_if_fail is True:
    -497                    self.mass_features.pop(idx)
    -498        
    -499        if drop_duplicates:
    -500            # Prepare mass feature dataframe
    -501            mf_df = self.mass_features_to_df().copy()
    -502
    -503            # For each mass feature, find all mass features within the clustering tolerance ppm and drop if their start and end times are within another mass feature
    -504            # Kepp the first mass fea
    -505            for idx, mass_feature in mf_df.iterrows():
    -506                mz = mass_feature.mz
    -507                apex_scan = mass_feature.apex_scan
    -508
    -509                mf_df["mz_diff_ppm"] = np.abs(mf_df["mz"] - mz) / mz * 10**6
    -510                mf_df_sub = mf_df[mf_df["mz_diff_ppm"] < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel * 10**6].copy()
    -511
    -512                # For all mass features within the clustering tolerance, check if the start and end times are within the start and end times of the mass feature
    -513                for idx2, mass_feature2 in mf_df_sub.iterrows():
    -514                    if idx2 != idx:
    -515                        if mass_feature2.start_scan >= mass_feature.start_scan and mass_feature2.final_scan <= mass_feature.final_scan:
    -516                            if idx2 in self.mass_features.keys():
    -517                                self.mass_features.pop(idx2)
    +375        Parameters
    +376        ----------
    +377        drop_if_fail : bool, optional
    +378            Whether to drop mass features if the EIC limit calculations fail.
    +379            Default is True.
    +380        drop_duplicates : bool, optional
    +381            Whether to mass features that appear to be duplicates
    +382            (i.e., mz is similar to another mass feature and limits of the EIC are similar or encapsulating).
    +383            Default is True.
    +384        ms_level : int, optional
    +385            The MS level to use. Default is 1.
    +386
    +387        Raises
    +388        ------
    +389        ValueError
    +390            If no mass features are found.
    +391            If no MS level data is found for the given MS level (either in data or in the scan data)
    +392
    +393        Returns
    +394        -------
    +395        None, but populates the eics attribute on the LCMSBase object and adds data (start_scan, final_scan, area) to the mass_features attribute.
    +396
    +397        Notes
    +398        -----
    +399        drop_if_fail is useful for discarding mass features that do not have good shapes, usually due to a detection on a shoulder of a peak or a noisy region (especially if minimal smoothing is used during mass feature detection).
    +400        """
    +401        # Check if there is data
    +402        if ms_level in self._ms_unprocessed.keys():
    +403            raw_data = self._ms_unprocessed[ms_level].copy()
    +404        else:
    +405            raise ValueError("No MS level " + str(ms_level) + " data found")
    +406        if self.mass_features is not None:
    +407            mf_df = self.mass_features_to_df().copy()
    +408        else:
    +409            raise ValueError(
    +410                "No mass features found, did you run find_mass_features() first?"
    +411            )
    +412        # Check if mass_spectrum exists on each mass feature
    +413        if not all(
    +414            [mf.mass_spectrum is not None for mf in self.mass_features.values()]
    +415        ):
    +416            raise ValueError(
    +417                "Mass spectrum must be associated with each mass feature, did you run add_associated_ms1() first?"
    +418            )
    +419
    +420        # Subset scan data to only include correct ms_level
    +421        scan_df_sub = self.scan_df[
    +422            self.scan_df["ms_level"] == int(ms_level)
    +423        ].reset_index(drop=True)
    +424        if scan_df_sub.empty:
    +425            raise ValueError("No MS level " + ms_level + " data found in scan data")
    +426        scan_df_sub = scan_df_sub[["scan", "scan_time"]].copy()
    +427
    +428        mzs_to_extract = np.unique(mf_df["mz"].values)
    +429        mzs_to_extract.sort()
    +430
    +431        # Get EICs for each unique mz in mass features list
    +432        for mz in mzs_to_extract:
    +433            mz_max = mz + self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6
    +434            mz_min = mz - self.parameters.lc_ms.eic_tolerance_ppm * mz / 1e6
    +435            raw_data_sub = raw_data[
    +436                (raw_data["mz"] >= mz_min) & (raw_data["mz"] <= mz_max)
    +437            ].reset_index(drop=True)
    +438            raw_data_sub = (
    +439                raw_data_sub.groupby(["scan"])["intensity"].sum().reset_index()
    +440            )
    +441            raw_data_sub = scan_df_sub.merge(raw_data_sub, on="scan", how="left")
    +442            raw_data_sub["intensity"] = raw_data_sub["intensity"].fillna(0)
    +443            myEIC = EIC_Data(
    +444                scans=raw_data_sub["scan"].values,
    +445                time=raw_data_sub["scan_time"].values,
    +446                eic=raw_data_sub["intensity"].values,
    +447            )
    +448            # Smooth EIC
    +449            smoothed_eic = self.smooth_tic(myEIC.eic)
    +450            smoothed_eic[smoothed_eic < 0] = 0
    +451            myEIC.eic_smoothed = smoothed_eic
    +452            self.eics[mz] = myEIC
    +453
    +454        # Get limits of mass features using EIC centroid detector and integrate
    +455        mf_df["area"] = np.nan
    +456        for idx, mass_feature in mf_df.iterrows():
    +457            mz = mass_feature.mz
    +458            apex_scan = mass_feature.apex_scan
    +459
    +460            # Pull EIC data and find apex scan index
    +461            myEIC = self.eics[mz]
    +462            self.mass_features[idx]._eic_data = myEIC
    +463            apex_index = np.where(myEIC.scans == apex_scan)[0][0]
    +464
    +465            # Find left and right limits of peak using EIC centroid detector, add to EICData
    +466            centroid_eics = self.eic_centroid_detector(
    +467                myEIC.time,
    +468                myEIC.eic_smoothed,
    +469                mass_feature.intensity * 1.1,
    +470                apex_indexes=[int(apex_index)],
    +471            )
    +472            l_a_r_scan_idx = [i for i in centroid_eics]
    +473            if len(l_a_r_scan_idx) > 0:
    +474                # Add start and final scan to mass_features and EICData
    +475                left_scan, right_scan = (
    +476                    myEIC.scans[l_a_r_scan_idx[0][0]],
    +477                    myEIC.scans[l_a_r_scan_idx[0][2]],
    +478                )
    +479                mf_scan_apex = [(left_scan, int(apex_scan), right_scan)]
    +480                myEIC.apexes = myEIC.apexes + mf_scan_apex
    +481                self.mass_features[idx].start_scan = left_scan
    +482                self.mass_features[idx].final_scan = right_scan
    +483
    +484                # Find area under peak using limits from EIC centroid detector, add to mass_features and EICData
    +485                area = np.trapz(
    +486                    myEIC.eic_smoothed[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1],
    +487                    myEIC.time[l_a_r_scan_idx[0][0] : l_a_r_scan_idx[0][2] + 1],
    +488                )
    +489                mf_df.at[idx, "area"] = area
    +490                myEIC.areas = myEIC.areas + [area]
    +491                self.eics[mz] = myEIC
    +492                self.mass_features[idx]._area = area
    +493            else:
    +494                if drop_if_fail is True:
    +495                    self.mass_features.pop(idx)
    +496
    +497        if drop_duplicates:
    +498            # Prepare mass feature dataframe
    +499            mf_df = self.mass_features_to_df().copy()
    +500
    +501            # For each mass feature, find all mass features within the clustering tolerance ppm and drop if their start and end times are within another mass feature
    +502            # Kepp the first mass fea
    +503            for idx, mass_feature in mf_df.iterrows():
    +504                mz = mass_feature.mz
    +505                apex_scan = mass_feature.apex_scan
    +506
    +507                mf_df["mz_diff_ppm"] = np.abs(mf_df["mz"] - mz) / mz * 10**6
    +508                mf_df_sub = mf_df[
    +509                    mf_df["mz_diff_ppm"]
    +510                    < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel
    +511                    * 10**6
    +512                ].copy()
    +513
    +514                # For all mass features within the clustering tolerance, check if the start and end times are within the start and end times of the mass feature
    +515                for idx2, mass_feature2 in mf_df_sub.iterrows():
    +516                    if idx2 != idx:
    +517                        if (
    +518                            mass_feature2.start_scan >= mass_feature.start_scan
    +519                            and mass_feature2.final_scan <= mass_feature.final_scan
    +520                        ):
    +521                            if idx2 in self.mass_features.keys():
    +522                                self.mass_features.pop(idx2)
     
    @@ -3140,7 +3146,7 @@
    Parameters
    Whether to drop mass features if the EIC limit calculations fail. Default is True.
  • drop_duplicates (bool, optional): -Whether to mass features that appear to be duplicates +Whether to mass features that appear to be duplicates (i.e., mz is similar to another mass feature and limits of the EIC are similar or encapsulating). Default is True.
  • ms_level (int, optional): @@ -3178,151 +3184,151 @@
    Notes
  • -
    521    def find_c13_mass_features(self):
    -522        """Mark likely C13 isotopes and connect to monoisoitopic mass features.
    -523
    -524        Returns
    -525        -------
    -526        None, but populates the monoisotopic_mf_id and isotopologue_type attributes to the indivual LCMSMassFeatures within the mass_features attribute of the LCMSBase object.
    -527
    -528        Raises
    -529        ------
    -530        ValueError
    -531            If no mass features are found.
    -532        """
    -533        verbose = self.parameters.lc_ms.verbose_processing
    -534        if verbose:
    -535            print("evaluating mass features for C13 isotopes")
    -536        if self.mass_features is None:
    -537            raise ValueError("No mass features found, run find_mass_features() first")
    -538
    -539        # Data prep fo sparse distance matrix
    -540        dims = ["mz", "scan_time"]
    -541        mf_df = self.mass_features_to_df().copy()
    -542        # Drop mass features that have no area (these are likely to be noise)
    -543        mf_df = mf_df[mf_df["area"].notnull()]
    -544        mf_df["mf_id"] = mf_df.index.values
    -545        dims = ["mz", "scan_time"]
    -546
    -547        # Sort my ascending mz so we always get the monoisotopic mass first, regardless of the order/intensity of the mass features
    -548        mf_df = mf_df.sort_values(by=["mz"]).reset_index(drop=True).copy()
    +            
    524    def find_c13_mass_features(self):
    +525        """Mark likely C13 isotopes and connect to monoisoitopic mass features.
    +526
    +527        Returns
    +528        -------
    +529        None, but populates the monoisotopic_mf_id and isotopologue_type attributes to the indivual LCMSMassFeatures within the mass_features attribute of the LCMSBase object.
    +530
    +531        Raises
    +532        ------
    +533        ValueError
    +534            If no mass features are found.
    +535        """
    +536        verbose = self.parameters.lc_ms.verbose_processing
    +537        if verbose:
    +538            print("evaluating mass features for C13 isotopes")
    +539        if self.mass_features is None:
    +540            raise ValueError("No mass features found, run find_mass_features() first")
    +541
    +542        # Data prep fo sparse distance matrix
    +543        dims = ["mz", "scan_time"]
    +544        mf_df = self.mass_features_to_df().copy()
    +545        # Drop mass features that have no area (these are likely to be noise)
    +546        mf_df = mf_df[mf_df["area"].notnull()]
    +547        mf_df["mf_id"] = mf_df.index.values
    +548        dims = ["mz", "scan_time"]
     549
    -550        mz_diff = 1.003355  # C13-C12 mass difference
    -551        tol = [
    -552            mf_df["mz"].median()
    -553            * self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel,
    -554            self.parameters.lc_ms.mass_feature_cluster_rt_tolerance * 0.5,
    -555        ]  # mz, in relative; scan_time in minutes
    -556
    -557        # Compute inter-feature distances
    -558        distances = None
    -559        for i in range(len(dims)):
    -560            # Construct k-d tree
    -561            values = mf_df[dims[i]].values
    -562            tree = KDTree(values.reshape(-1, 1))
    -563
    -564            max_tol = tol[i]
    -565            if dims[i] == "mz":
    -566                # Maximum absolute tolerance
    -567                max_tol = mz_diff + tol[i]
    -568
    -569            # Compute sparse distance matrix
    -570            # the larger the max_tol, the slower this operation is
    -571            sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix")
    -572
    -573            # Only consider forward case, exclude diagonal
    -574            sdm = sparse.triu(sdm, k=1)
    +550        # Sort my ascending mz so we always get the monoisotopic mass first, regardless of the order/intensity of the mass features
    +551        mf_df = mf_df.sort_values(by=["mz"]).reset_index(drop=True).copy()
    +552
    +553        mz_diff = 1.003355  # C13-C12 mass difference
    +554        tol = [
    +555            mf_df["mz"].median()
    +556            * self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel,
    +557            self.parameters.lc_ms.mass_feature_cluster_rt_tolerance * 0.5,
    +558        ]  # mz, in relative; scan_time in minutes
    +559
    +560        # Compute inter-feature distances
    +561        distances = None
    +562        for i in range(len(dims)):
    +563            # Construct k-d tree
    +564            values = mf_df[dims[i]].values
    +565            tree = KDTree(values.reshape(-1, 1))
    +566
    +567            max_tol = tol[i]
    +568            if dims[i] == "mz":
    +569                # Maximum absolute tolerance
    +570                max_tol = mz_diff + tol[i]
    +571
    +572            # Compute sparse distance matrix
    +573            # the larger the max_tol, the slower this operation is
    +574            sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix")
     575
    -576            if dims[i] == "mz":
    -577                min_tol = mz_diff - tol[i]
    -578                # Get only the ones that are above the min tol
    -579                idx = sdm.data > min_tol
    -580
    -581                # Reconstruct sparse distance matrix
    -582                sdm = sparse.coo_matrix(
    -583                    (sdm.data[idx], (sdm.row[idx], sdm.col[idx])),
    -584                    shape=(len(values), len(values)),
    -585                )
    -586
    -587            # Cast as binary matrix
    -588            sdm.data = np.ones_like(sdm.data)
    +576            # Only consider forward case, exclude diagonal
    +577            sdm = sparse.triu(sdm, k=1)
    +578
    +579            if dims[i] == "mz":
    +580                min_tol = mz_diff - tol[i]
    +581                # Get only the ones that are above the min tol
    +582                idx = sdm.data > min_tol
    +583
    +584                # Reconstruct sparse distance matrix
    +585                sdm = sparse.coo_matrix(
    +586                    (sdm.data[idx], (sdm.row[idx], sdm.col[idx])),
    +587                    shape=(len(values), len(values)),
    +588                )
     589
    -590            # Stack distances
    -591            if distances is None:
    -592                distances = sdm
    -593            else:
    -594                distances = distances.multiply(sdm)
    -595
    -596        # Extract indices of within-tolerance points
    -597        distances = distances.tocoo()
    -598        pairs = np.stack((distances.row, distances.col), axis=1)  # C12 to C13 pairs
    -599
    -600        # Turn pairs (which are index of mf_df) into mf_id and then into two dataframes to join to mf_df
    -601        pairs_mf = pairs.copy()
    -602        pairs_mf[:, 0] = mf_df.iloc[pairs[:, 0]].mf_id.values
    -603        pairs_mf[:, 1] = mf_df.iloc[pairs[:, 1]].mf_id.values
    -604
    -605        # Connect monoisotopic masses with isotopologes within mass_features
    -606        monos = np.setdiff1d(np.unique(pairs_mf[:, 0]), np.unique(pairs_mf[:, 1]))
    -607        for mono in monos:
    -608            self.mass_features[mono].monoisotopic_mf_id = mono
    -609        pairs_iso_df = pd.DataFrame(pairs_mf, columns=["parent", "child"])
    -610        while not pairs_iso_df.empty:
    -611            pairs_iso_df = pairs_iso_df.set_index("parent", drop=False)
    -612            m1_isos = pairs_iso_df.loc[monos, "child"].unique()
    -613            for iso in m1_isos:
    -614                # Set monoisotopic_mf_id and isotopologue_type for isotopologues
    -615                parent = pairs_mf[pairs_mf[:, 1] == iso, 0]
    -616                if len(parent) > 1:
    -617                    # Choose the parent that is closest in time to the isotopologue
    -618                    parent_time = [self.mass_features[p].retention_time for p in parent]
    -619                    time_diff = [
    -620                        np.abs(self.mass_features[iso].retention_time - x)
    -621                        for x in parent_time
    -622                    ]
    -623                    parent = parent[np.argmin(time_diff)]
    -624                else:
    -625                    parent = parent[0]
    -626                self.mass_features[iso].monoisotopic_mf_id = self.mass_features[
    -627                    parent
    -628                ].monoisotopic_mf_id
    -629                if self.mass_features[iso].monoisotopic_mf_id is not None:
    -630                    mass_diff = (
    -631                        self.mass_features[iso].mz
    -632                        - self.mass_features[
    -633                            self.mass_features[iso].monoisotopic_mf_id
    -634                        ].mz
    -635                    )
    -636                    self.mass_features[iso].isotopologue_type = "13C" + str(
    -637                        int(round(mass_diff, 0))
    +590            # Cast as binary matrix
    +591            sdm.data = np.ones_like(sdm.data)
    +592
    +593            # Stack distances
    +594            if distances is None:
    +595                distances = sdm
    +596            else:
    +597                distances = distances.multiply(sdm)
    +598
    +599        # Extract indices of within-tolerance points
    +600        distances = distances.tocoo()
    +601        pairs = np.stack((distances.row, distances.col), axis=1)  # C12 to C13 pairs
    +602
    +603        # Turn pairs (which are index of mf_df) into mf_id and then into two dataframes to join to mf_df
    +604        pairs_mf = pairs.copy()
    +605        pairs_mf[:, 0] = mf_df.iloc[pairs[:, 0]].mf_id.values
    +606        pairs_mf[:, 1] = mf_df.iloc[pairs[:, 1]].mf_id.values
    +607
    +608        # Connect monoisotopic masses with isotopologes within mass_features
    +609        monos = np.setdiff1d(np.unique(pairs_mf[:, 0]), np.unique(pairs_mf[:, 1]))
    +610        for mono in monos:
    +611            self.mass_features[mono].monoisotopic_mf_id = mono
    +612        pairs_iso_df = pd.DataFrame(pairs_mf, columns=["parent", "child"])
    +613        while not pairs_iso_df.empty:
    +614            pairs_iso_df = pairs_iso_df.set_index("parent", drop=False)
    +615            m1_isos = pairs_iso_df.loc[monos, "child"].unique()
    +616            for iso in m1_isos:
    +617                # Set monoisotopic_mf_id and isotopologue_type for isotopologues
    +618                parent = pairs_mf[pairs_mf[:, 1] == iso, 0]
    +619                if len(parent) > 1:
    +620                    # Choose the parent that is closest in time to the isotopologue
    +621                    parent_time = [self.mass_features[p].retention_time for p in parent]
    +622                    time_diff = [
    +623                        np.abs(self.mass_features[iso].retention_time - x)
    +624                        for x in parent_time
    +625                    ]
    +626                    parent = parent[np.argmin(time_diff)]
    +627                else:
    +628                    parent = parent[0]
    +629                self.mass_features[iso].monoisotopic_mf_id = self.mass_features[
    +630                    parent
    +631                ].monoisotopic_mf_id
    +632                if self.mass_features[iso].monoisotopic_mf_id is not None:
    +633                    mass_diff = (
    +634                        self.mass_features[iso].mz
    +635                        - self.mass_features[
    +636                            self.mass_features[iso].monoisotopic_mf_id
    +637                        ].mz
     638                    )
    -639
    -640            # Drop the mono and iso from the pairs_iso_df
    -641            pairs_iso_df = pairs_iso_df.drop(
    -642                index=monos, errors="ignore"
    -643            )  # Drop pairs where the parent is a child that is a child of a root
    -644            pairs_iso_df = pairs_iso_df.set_index("child", drop=False)
    -645            pairs_iso_df = pairs_iso_df.drop(index=m1_isos, errors="ignore")
    -646
    -647            if not pairs_iso_df.empty:
    -648                # Get new monos, recognizing that these are just 13C isotopologues that are connected to other 13C isotopologues to repeat the process
    -649                monos = np.setdiff1d(
    -650                    np.unique(pairs_iso_df.parent), np.unique(pairs_iso_df.child)
    -651                )
    -652        if verbose:
    -653            # Report fraction of compounds annotated with isotopes
    -654            mf_df["c13_flag"] = np.where(
    -655                np.logical_or(
    -656                    np.isin(mf_df["mf_id"], pairs_mf[:, 0]),
    -657                    np.isin(mf_df["mf_id"], pairs_mf[:, 1]),
    -658                ),
    -659                1,
    -660                0,
    -661            )
    -662            print(
    -663                str(round(len(mf_df[mf_df["c13_flag"] == 1]) / len(mf_df), ndigits=3))
    -664                + " of mass features have or are C13 isotopes"
    -665            )
    +639                    self.mass_features[iso].isotopologue_type = "13C" + str(
    +640                        int(round(mass_diff, 0))
    +641                    )
    +642
    +643            # Drop the mono and iso from the pairs_iso_df
    +644            pairs_iso_df = pairs_iso_df.drop(
    +645                index=monos, errors="ignore"
    +646            )  # Drop pairs where the parent is a child that is a child of a root
    +647            pairs_iso_df = pairs_iso_df.set_index("child", drop=False)
    +648            pairs_iso_df = pairs_iso_df.drop(index=m1_isos, errors="ignore")
    +649
    +650            if not pairs_iso_df.empty:
    +651                # Get new monos, recognizing that these are just 13C isotopologues that are connected to other 13C isotopologues to repeat the process
    +652                monos = np.setdiff1d(
    +653                    np.unique(pairs_iso_df.parent), np.unique(pairs_iso_df.child)
    +654                )
    +655        if verbose:
    +656            # Report fraction of compounds annotated with isotopes
    +657            mf_df["c13_flag"] = np.where(
    +658                np.logical_or(
    +659                    np.isin(mf_df["mf_id"], pairs_mf[:, 0]),
    +660                    np.isin(mf_df["mf_id"], pairs_mf[:, 1]),
    +661                ),
    +662                1,
    +663                0,
    +664            )
    +665            print(
    +666                str(round(len(mf_df[mf_df["c13_flag"] == 1]) / len(mf_df), ndigits=3))
    +667                + " of mass features have or are C13 isotopes"
    +668            )
     
    @@ -3354,141 +3360,140 @@
    Raises
    -
    667    def deconvolute_ms1_mass_features(self):
    -668        """Deconvolute MS1 mass features
    -669
    -670        Deconvolute mass features ms1 spectrum based on the correlation of all masses within a spectrum over the EIC of the mass features
    -671
    -672        Parameters
    -673        ----------
    -674        None
    -675
    -676        Returns
    -677        -------
    -678        None, but assigns the _ms_deconvoluted_idx, mass_spectrum_deconvoluted_parent,
    -679        and associated_mass_features_deconvoluted attributes to the mass features in the
    -680        mass_features attribute of the LCMSBase object.
    -681
    -682        Raises
    -683        ------
    -684        ValueError
    -685            If no mass features are found, must run find_mass_features() first.
    -686            If no EICs are found, did you run integrate_mass_features() first?
    -687
    -688        """
    -689        # Checks for set mass_features and eics
    -690        if self.mass_features is None:
    -691            raise ValueError(
    -692                "No mass features found, did you run find_mass_features() first?"
    -693            )
    -694
    -695        if self.eics == {}:
    -696            raise ValueError(
    -697                "No EICs found, did you run integrate_mass_features() first?"
    -698            )
    -699
    -700        if 1 not in self._ms_unprocessed.keys():
    -701            raise ValueError("No unprocessed MS1 spectra found.")
    +            
    670    def deconvolute_ms1_mass_features(self):
    +671        """Deconvolute MS1 mass features
    +672
    +673        Deconvolute mass features ms1 spectrum based on the correlation of all masses within a spectrum over the EIC of the mass features
    +674
    +675        Parameters
    +676        ----------
    +677        None
    +678
    +679        Returns
    +680        -------
    +681        None, but assigns the _ms_deconvoluted_idx, mass_spectrum_deconvoluted_parent,
    +682        and associated_mass_features_deconvoluted attributes to the mass features in the
    +683        mass_features attribute of the LCMSBase object.
    +684
    +685        Raises
    +686        ------
    +687        ValueError
    +688            If no mass features are found, must run find_mass_features() first.
    +689            If no EICs are found, did you run integrate_mass_features() first?
    +690
    +691        """
    +692        # Checks for set mass_features and eics
    +693        if self.mass_features is None:
    +694            raise ValueError(
    +695                "No mass features found, did you run find_mass_features() first?"
    +696            )
    +697
    +698        if self.eics == {}:
    +699            raise ValueError(
    +700                "No EICs found, did you run integrate_mass_features() first?"
    +701            )
     702
    -703        # Prep ms1 data
    -704        ms1_data = self._ms_unprocessed[1].copy()
    -705        ms1_data = ms1_data.set_index("scan")
    -706
    -707        # Prep mass feature summary
    -708        mass_feature_df = self.mass_features_to_df()
    +703        if 1 not in self._ms_unprocessed.keys():
    +704            raise ValueError("No unprocessed MS1 spectra found.")
    +705
    +706        # Prep ms1 data
    +707        ms1_data = self._ms_unprocessed[1].copy()
    +708        ms1_data = ms1_data.set_index("scan")
     709
    -710        # Loop through each mass feature
    -711        for mf_id, mass_feature in self.mass_features.items():
    -712            
    -713            # Check that the mass_feature.mz attribute == the mz of the mass feature in the mass_feature_df
    -714            if mass_feature.mz != mass_feature.ms1_peak.mz_exp:
    -715                continue
    -716
    -717            # Get the left and right limits of the EIC of the mass feature
    -718            l_scan, _, r_scan = mass_feature._eic_data.apexes[0]
    -719
    -720            # Pull from the _ms1_unprocessed data the scan range of interest and sort by mz
    -721            ms1_data_sub = ms1_data.loc[l_scan:r_scan].copy()
    -722            ms1_data_sub = ms1_data_sub.sort_values(by=["mz"]).reset_index(drop=False)
    -723
    -724            # Get the centroided masses of the mass feature
    -725            mf_mspeak_mzs = mass_feature.mass_spectrum.mz_exp
    -726
    -727            # Find the closest mz in the ms1 data to the centroided masses of the mass feature
    -728            ms1_data_sub["mass_feature_mz"] = mf_mspeak_mzs[
    -729                find_closest(mf_mspeak_mzs, ms1_data_sub.mz.values)
    -730            ]
    -731
    -732            # Drop rows with mz_diff > 0.01 between the mass feature mz and the ms1 data mz
    -733            ms1_data_sub["mz_diff_rel"] = (
    -734                np.abs(ms1_data_sub["mass_feature_mz"] - ms1_data_sub["mz"])
    -735                / ms1_data_sub["mz"]
    -736            )
    -737            ms1_data_sub = ms1_data_sub[
    -738                ms1_data_sub["mz_diff_rel"]
    -739                < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel
    -740            ].reset_index(drop=True)
    -741
    -742            # Group by mass_feature_mz and scan and sum intensity
    -743            ms1_data_sub_group = (
    -744                ms1_data_sub.groupby(["mass_feature_mz", "scan"])["intensity"]
    -745                .sum()
    -746                .reset_index()
    -747            )
    -748
    -749            # Calculate the correlation of the intensities of the mass feature and the ms1 data (set to 0 if no intensity)
    -750            corr = (
    -751                ms1_data_sub_group.pivot(
    -752                    index="scan", columns="mass_feature_mz", values="intensity"
    -753                )
    -754                .fillna(0)
    -755                .corr()
    -756            )
    -757
    -758            # Subset the correlation matrix to only include the masses of the mass feature and those with a correlation > 0.8
    -759            decon_corr_min = self.parameters.lc_ms.ms1_deconvolution_corr_min
    -760            decon_corr_min = 0.9
    -761            corr_subset = corr.loc[mass_feature.mz,]
    -762            corr_subset = corr_subset[corr_subset > decon_corr_min]
    -763
    -764            # Get the masses from the mass spectrum that are the result of the deconvolution
    -765            mzs_decon = corr_subset.index.values
    -766
    -767            # Get the indices of the mzs_decon in mass_feature.mass_spectrum.mz_exp and assign to the mass feature
    -768            mzs_decon_idx = [
    -769                id
    -770                for id, mz in enumerate(mass_feature.mass_spectrum.mz_exp)
    -771                if mz in mzs_decon
    -772            ]
    -773            mass_feature._ms_deconvoluted_idx = mzs_decon_idx
    -774
    -775            # Check if the mass feature's ms1 peak is the largest in the deconvoluted mass spectrum
    -776            if (
    -777                mass_feature.ms1_peak.abundance
    -778                == mass_feature.mass_spectrum.abundance[mzs_decon_idx].max()
    -779            ):
    -780                mass_feature.mass_spectrum_deconvoluted_parent = True
    -781            else:
    -782                mass_feature.mass_spectrum_deconvoluted_parent = False
    -783
    -784            # Check for other mass features that are in the deconvoluted mass spectrum and add the deconvoluted mass spectrum to the mass feature
    -785            # Subset mass_feature_df to only include mass features that are within the clustering tolerance
    -786            mass_feature_df_sub = mass_feature_df[
    -787                abs(mass_feature.retention_time - mass_feature_df["scan_time"])
    -788                < self.parameters.lc_ms.mass_feature_cluster_rt_tolerance
    -789            ].copy()
    -790            # Calculate the mz difference in ppm between the mass feature and the peaks in the deconvoluted mass spectrum
    -791            mass_feature_df_sub["mz_diff_ppm"] = [
    -792                np.abs(mzs_decon - mz).min() / mz * 10**6
    -793                for mz in mass_feature_df_sub["mz"]
    -794            ]
    -795            # Subset mass_feature_df to only include mass features that are within 1 ppm of the deconvoluted masses
    -796            mfs_associated_decon = mass_feature_df_sub[
    -797                mass_feature_df_sub["mz_diff_ppm"]
    -798                < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel * 10**6
    -799            ].index.values
    -800
    -801            mass_feature.associated_mass_features_deconvoluted = mfs_associated_decon
    +710        # Prep mass feature summary
    +711        mass_feature_df = self.mass_features_to_df()
    +712
    +713        # Loop through each mass feature
    +714        for mf_id, mass_feature in self.mass_features.items():
    +715            # Check that the mass_feature.mz attribute == the mz of the mass feature in the mass_feature_df
    +716            if mass_feature.mz != mass_feature.ms1_peak.mz_exp:
    +717                continue
    +718
    +719            # Get the left and right limits of the EIC of the mass feature
    +720            l_scan, _, r_scan = mass_feature._eic_data.apexes[0]
    +721
    +722            # Pull from the _ms1_unprocessed data the scan range of interest and sort by mz
    +723            ms1_data_sub = ms1_data.loc[l_scan:r_scan].copy()
    +724            ms1_data_sub = ms1_data_sub.sort_values(by=["mz"]).reset_index(drop=False)
    +725
    +726            # Get the centroided masses of the mass feature
    +727            mf_mspeak_mzs = mass_feature.mass_spectrum.mz_exp
    +728
    +729            # Find the closest mz in the ms1 data to the centroided masses of the mass feature
    +730            ms1_data_sub["mass_feature_mz"] = mf_mspeak_mzs[
    +731                find_closest(mf_mspeak_mzs, ms1_data_sub.mz.values)
    +732            ]
    +733
    +734            # Drop rows with mz_diff > 0.01 between the mass feature mz and the ms1 data mz
    +735            ms1_data_sub["mz_diff_rel"] = (
    +736                np.abs(ms1_data_sub["mass_feature_mz"] - ms1_data_sub["mz"])
    +737                / ms1_data_sub["mz"]
    +738            )
    +739            ms1_data_sub = ms1_data_sub[
    +740                ms1_data_sub["mz_diff_rel"]
    +741                < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel
    +742            ].reset_index(drop=True)
    +743
    +744            # Group by mass_feature_mz and scan and sum intensity
    +745            ms1_data_sub_group = (
    +746                ms1_data_sub.groupby(["mass_feature_mz", "scan"])["intensity"]
    +747                .sum()
    +748                .reset_index()
    +749            )
    +750
    +751            # Calculate the correlation of the intensities of the mass feature and the ms1 data (set to 0 if no intensity)
    +752            corr = (
    +753                ms1_data_sub_group.pivot(
    +754                    index="scan", columns="mass_feature_mz", values="intensity"
    +755                )
    +756                .fillna(0)
    +757                .corr()
    +758            )
    +759
    +760            # Subset the correlation matrix to only include the masses of the mass feature and those with a correlation > 0.8
    +761            decon_corr_min = self.parameters.lc_ms.ms1_deconvolution_corr_min
    +762            decon_corr_min = 0.9
    +763            corr_subset = corr.loc[mass_feature.mz,]
    +764            corr_subset = corr_subset[corr_subset > decon_corr_min]
    +765
    +766            # Get the masses from the mass spectrum that are the result of the deconvolution
    +767            mzs_decon = corr_subset.index.values
    +768
    +769            # Get the indices of the mzs_decon in mass_feature.mass_spectrum.mz_exp and assign to the mass feature
    +770            mzs_decon_idx = [
    +771                id
    +772                for id, mz in enumerate(mass_feature.mass_spectrum.mz_exp)
    +773                if mz in mzs_decon
    +774            ]
    +775            mass_feature._ms_deconvoluted_idx = mzs_decon_idx
    +776
    +777            # Check if the mass feature's ms1 peak is the largest in the deconvoluted mass spectrum
    +778            if (
    +779                mass_feature.ms1_peak.abundance
    +780                == mass_feature.mass_spectrum.abundance[mzs_decon_idx].max()
    +781            ):
    +782                mass_feature.mass_spectrum_deconvoluted_parent = True
    +783            else:
    +784                mass_feature.mass_spectrum_deconvoluted_parent = False
    +785
    +786            # Check for other mass features that are in the deconvoluted mass spectrum and add the deconvoluted mass spectrum to the mass feature
    +787            # Subset mass_feature_df to only include mass features that are within the clustering tolerance
    +788            mass_feature_df_sub = mass_feature_df[
    +789                abs(mass_feature.retention_time - mass_feature_df["scan_time"])
    +790                < self.parameters.lc_ms.mass_feature_cluster_rt_tolerance
    +791            ].copy()
    +792            # Calculate the mz difference in ppm between the mass feature and the peaks in the deconvoluted mass spectrum
    +793            mass_feature_df_sub["mz_diff_ppm"] = [
    +794                np.abs(mzs_decon - mz).min() / mz * 10**6
    +795                for mz in mass_feature_df_sub["mz"]
    +796            ]
    +797            # Subset mass_feature_df to only include mass features that are within 1 ppm of the deconvoluted masses
    +798            mfs_associated_decon = mass_feature_df_sub[
    +799                mass_feature_df_sub["mz_diff_ppm"]
    +800                < self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel * 10**6
    +801            ].index.values
    +802
    +803            mass_feature.associated_mass_features_deconvoluted = mfs_associated_decon
     
    @@ -3532,627 +3537,624 @@
    Raises
    -
     804class PHCalculations:
    - 805    """Methods for performing calculations related to 2D peak picking via persistent homology on LCMS data.
    - 806
    - 807    Notes
    - 808    -----
    - 809    This class is intended to be used as a mixin for the LCMSBase class.
    - 810
    - 811    Methods
    - 812    -------
    - 813    * sparse_mean_filter(idx, V, radius=[0, 1, 1]).
    - 814        Sparse implementation of a mean filter.
    - 815    * embed_unique_indices(a).
    - 816        Creates an array of indices, sorted by unique element.
    - 817    * sparse_upper_star(idx, V).
    - 818        Sparse implementation of an upper star filtration.
    - 819    * check_if_grid(data).
    - 820        Check if the data is gridded in mz space.
    - 821    * grid_data(data).
    - 822        Grid the data in the mz dimension.
    - 823    * find_mass_features_ph(ms_level=1, grid=True).
    - 824        Find mass features within an LCMSBase object using persistent homology.
    - 825    * cluster_mass_features(drop_children=True).
    - 826        Cluster regions of interest.
    - 827    """
    - 828
    - 829    @staticmethod
    - 830    def sparse_mean_filter(idx, V, radius=[0, 1, 1]):
    - 831        """Sparse implementation of a mean filter.
    - 832
    - 833        Parameters
    - 834        ----------
    - 835        idx : :obj:`~numpy.array`
    - 836            Edge indices for each dimension (MxN).
    - 837        V : :obj:`~numpy.array`
    - 838            Array of intensity data (Mx1).
    - 839        radius : float or list
    - 840            Radius of the sparse filter in each dimension. Values less than
    - 841            zero indicate no connectivity in that dimension.
    - 842
    - 843        Returns
    - 844        -------
    - 845        :obj:`~numpy.array`
    - 846            Filtered intensities (Mx1).
    - 847
    - 848        Notes
    - 849        -----
    - 850        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos.
    - 851        This is a static method.
    - 852        """
    - 853
    - 854        # Copy indices
    - 855        idx = idx.copy().astype(V.dtype)
    - 856
    - 857        # Scale
    - 858        for i, r in enumerate(radius):
    - 859            # Increase inter-index distance
    - 860            if r < 1:
    - 861                idx[:, i] *= 2
    - 862
    - 863            # Do nothing
    - 864            elif r == 1:
    - 865                pass
    - 866
    - 867            # Decrease inter-index distance
    - 868            else:
    - 869                idx[:, i] /= r
    - 870
    - 871        # Connectivity matrix
    - 872        cmat = KDTree(idx)
    - 873        cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix")
    - 874        cmat.setdiag(1)
    - 875
    - 876        # Pair indices
    - 877        I, J = cmat.nonzero()
    - 878
    - 879        # Delete cmat
    - 880        cmat_shape = cmat.shape
    - 881        del cmat
    - 882
    - 883        # Sum over columns
    - 884        V_sum = sparse.bsr_matrix(
    - 885            (V[J], (I, I)), shape=cmat_shape, dtype=V.dtype
    - 886        ).diagonal(0)
    - 887
    - 888        # Count over columns
    - 889        V_count = sparse.bsr_matrix(
    - 890            (np.ones_like(J), (I, I)), shape=cmat_shape, dtype=V.dtype
    - 891        ).diagonal(0)
    - 892
    - 893        return V_sum / V_count
    +            
     806class PHCalculations:
    + 807    """Methods for performing calculations related to 2D peak picking via persistent homology on LCMS data.
    + 808
    + 809    Notes
    + 810    -----
    + 811    This class is intended to be used as a mixin for the LCMSBase class.
    + 812
    + 813    Methods
    + 814    -------
    + 815    * sparse_mean_filter(idx, V, radius=[0, 1, 1]).
    + 816        Sparse implementation of a mean filter.
    + 817    * embed_unique_indices(a).
    + 818        Creates an array of indices, sorted by unique element.
    + 819    * sparse_upper_star(idx, V).
    + 820        Sparse implementation of an upper star filtration.
    + 821    * check_if_grid(data).
    + 822        Check if the data is gridded in mz space.
    + 823    * grid_data(data).
    + 824        Grid the data in the mz dimension.
    + 825    * find_mass_features_ph(ms_level=1, grid=True).
    + 826        Find mass features within an LCMSBase object using persistent homology.
    + 827    * cluster_mass_features(drop_children=True).
    + 828        Cluster regions of interest.
    + 829    """
    + 830
    + 831    @staticmethod
    + 832    def sparse_mean_filter(idx, V, radius=[0, 1, 1]):
    + 833        """Sparse implementation of a mean filter.
    + 834
    + 835        Parameters
    + 836        ----------
    + 837        idx : :obj:`~numpy.array`
    + 838            Edge indices for each dimension (MxN).
    + 839        V : :obj:`~numpy.array`
    + 840            Array of intensity data (Mx1).
    + 841        radius : float or list
    + 842            Radius of the sparse filter in each dimension. Values less than
    + 843            zero indicate no connectivity in that dimension.
    + 844
    + 845        Returns
    + 846        -------
    + 847        :obj:`~numpy.array`
    + 848            Filtered intensities (Mx1).
    + 849
    + 850        Notes
    + 851        -----
    + 852        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos.
    + 853        This is a static method.
    + 854        """
    + 855
    + 856        # Copy indices
    + 857        idx = idx.copy().astype(V.dtype)
    + 858
    + 859        # Scale
    + 860        for i, r in enumerate(radius):
    + 861            # Increase inter-index distance
    + 862            if r < 1:
    + 863                idx[:, i] *= 2
    + 864
    + 865            # Do nothing
    + 866            elif r == 1:
    + 867                pass
    + 868
    + 869            # Decrease inter-index distance
    + 870            else:
    + 871                idx[:, i] /= r
    + 872
    + 873        # Connectivity matrix
    + 874        cmat = KDTree(idx)
    + 875        cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix")
    + 876        cmat.setdiag(1)
    + 877
    + 878        # Pair indices
    + 879        I, J = cmat.nonzero()
    + 880
    + 881        # Delete cmat
    + 882        cmat_shape = cmat.shape
    + 883        del cmat
    + 884
    + 885        # Sum over columns
    + 886        V_sum = sparse.bsr_matrix(
    + 887            (V[J], (I, I)), shape=cmat_shape, dtype=V.dtype
    + 888        ).diagonal(0)
    + 889
    + 890        # Count over columns
    + 891        V_count = sparse.bsr_matrix(
    + 892            (np.ones_like(J), (I, I)), shape=cmat_shape, dtype=V.dtype
    + 893        ).diagonal(0)
      894
    - 895    @staticmethod
    - 896    def embed_unique_indices(a):
    - 897        """Creates an array of indices, sorted by unique element.
    - 898
    - 899        Parameters
    - 900        ----------
    - 901        a : :obj:`~numpy.array`
    - 902            Array of unique elements (Mx1).
    - 903
    - 904        Returns
    - 905        -------
    - 906        :obj:`~numpy.array`
    - 907            Array of indices (Mx1).
    - 908
    - 909        Notes
    - 910        -----
    - 911        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    - 912        This is a static method.
    - 913        """
    - 914
    - 915        def count_tens(n):
    - 916            # Count tens
    - 917            ntens = (n - 1) // 10
    - 918
    - 919            while True:
    - 920                ntens_test = (ntens + n - 1) // 10
    - 921
    - 922                if ntens_test == ntens:
    - 923                    return ntens
    - 924                else:
    - 925                    ntens = ntens_test
    - 926
    - 927        def arange_exclude_10s(n):
    - 928            # How many 10s will there be?
    - 929            ntens = count_tens(n)
    - 930
    - 931            # Base array
    - 932            arr = np.arange(0, n + ntens)
    - 933
    - 934            # Exclude 10s
    - 935            arr = arr[(arr == 0) | (arr % 10 != 0)][:n]
    - 936
    - 937            return arr
    + 895        return V_sum / V_count
    + 896
    + 897    @staticmethod
    + 898    def embed_unique_indices(a):
    + 899        """Creates an array of indices, sorted by unique element.
    + 900
    + 901        Parameters
    + 902        ----------
    + 903        a : :obj:`~numpy.array`
    + 904            Array of unique elements (Mx1).
    + 905
    + 906        Returns
    + 907        -------
    + 908        :obj:`~numpy.array`
    + 909            Array of indices (Mx1).
    + 910
    + 911        Notes
    + 912        -----
    + 913        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    + 914        This is a static method.
    + 915        """
    + 916
    + 917        def count_tens(n):
    + 918            # Count tens
    + 919            ntens = (n - 1) // 10
    + 920
    + 921            while True:
    + 922                ntens_test = (ntens + n - 1) // 10
    + 923
    + 924                if ntens_test == ntens:
    + 925                    return ntens
    + 926                else:
    + 927                    ntens = ntens_test
    + 928
    + 929        def arange_exclude_10s(n):
    + 930            # How many 10s will there be?
    + 931            ntens = count_tens(n)
    + 932
    + 933            # Base array
    + 934            arr = np.arange(0, n + ntens)
    + 935
    + 936            # Exclude 10s
    + 937            arr = arr[(arr == 0) | (arr % 10 != 0)][:n]
      938
    - 939        # Creates an array of indices, sorted by unique element
    - 940        idx_sort = np.argsort(a)
    - 941        idx_unsort = np.argsort(idx_sort)
    - 942
    - 943        # Sorts records array so all unique elements are together
    - 944        sorted_a = a[idx_sort]
    - 945
    - 946        # Returns the unique values, the index of the first occurrence,
    - 947        # and the count for each element
    - 948        vals, idx_start, count = np.unique(
    - 949            sorted_a, return_index=True, return_counts=True
    - 950        )
    - 951
    - 952        # Splits the indices into separate arrays
    - 953        splits = np.split(idx_sort, idx_start[1:])
    - 954
    - 955        # Creates unique indices for each split
    - 956        idx_unq = np.concatenate([arange_exclude_10s(len(x)) for x in splits])
    - 957
    - 958        # Reorders according to input array
    - 959        idx_unq = idx_unq[idx_unsort]
    - 960
    - 961        # Magnitude of each index
    - 962        exp = np.log10(
    - 963            idx_unq, where=idx_unq > 0, out=np.zeros_like(idx_unq, dtype=np.float64)
    - 964        )
    - 965        idx_unq_mag = np.power(10, np.floor(exp) + 1)
    - 966
    - 967        # Result
    - 968        return a + idx_unq / idx_unq_mag
    - 969
    - 970    def sparse_upper_star(self, idx, V):
    - 971        """Sparse implementation of an upper star filtration.
    - 972
    - 973        Parameters
    - 974        ----------
    - 975        idx : :obj:`~numpy.array`
    - 976            Edge indices for each dimension (MxN).
    - 977        V : :obj:`~numpy.array`
    - 978            Array of intensity data (Mx1).
    - 979        Returns
    - 980        -------
    - 981        idx : :obj:`~numpy.array`
    - 982            Index of filtered points (Mx1).
    - 983        persistence : :obj:`~numpy.array`
    - 984            Persistence of each filtered point (Mx1).
    - 985
    - 986        Notes
    - 987        -----
    - 988        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    - 989        """
    - 990
    - 991        # Invert
    - 992        V = -1 * V.copy().astype(int)
    - 993
    - 994        # Embed indices
    - 995        V = self.embed_unique_indices(V)
    - 996
    - 997        # Connectivity matrix
    - 998        cmat = KDTree(idx)
    - 999        cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix")
    -1000        cmat.setdiag(1)
    -1001        cmat = sparse.triu(cmat)
    -1002
    -1003        # Pairwise minimums
    -1004        I, J = cmat.nonzero()
    -1005        d = np.maximum(V[I], V[J])
    -1006
    -1007        # Delete connectiity matrix
    -1008        cmat_shape = cmat.shape
    -1009        del cmat
    -1010
    -1011        # Sparse distance matrix
    -1012        sdm = sparse.coo_matrix((d, (I, J)), shape=cmat_shape)
    -1013
    -1014        # Delete pairwise mins
    -1015        del d, I, J
    -1016
    -1017        # Persistence homology
    -1018        ph = ripser(sdm, distance_matrix=True, maxdim=0)["dgms"][0]
    -1019
    -1020        # Bound death values
    -1021        ph[ph[:, 1] == np.inf, 1] = np.max(V)
    -1022
    -1023        # Construct tree to query against
    -1024        tree = KDTree(V.reshape((-1, 1)))
    -1025
    -1026        # Get the indexes of the first nearest neighbor by birth
    -1027        _, nn = tree.query(ph[:, 0].reshape((-1, 1)), k=1, workers=-1)
    -1028
    -1029        return nn, -(ph[:, 0] // 1 - ph[:, 1] // 1)
    + 939            return arr
    + 940
    + 941        # Creates an array of indices, sorted by unique element
    + 942        idx_sort = np.argsort(a)
    + 943        idx_unsort = np.argsort(idx_sort)
    + 944
    + 945        # Sorts records array so all unique elements are together
    + 946        sorted_a = a[idx_sort]
    + 947
    + 948        # Returns the unique values, the index of the first occurrence,
    + 949        # and the count for each element
    + 950        vals, idx_start, count = np.unique(
    + 951            sorted_a, return_index=True, return_counts=True
    + 952        )
    + 953
    + 954        # Splits the indices into separate arrays
    + 955        splits = np.split(idx_sort, idx_start[1:])
    + 956
    + 957        # Creates unique indices for each split
    + 958        idx_unq = np.concatenate([arange_exclude_10s(len(x)) for x in splits])
    + 959
    + 960        # Reorders according to input array
    + 961        idx_unq = idx_unq[idx_unsort]
    + 962
    + 963        # Magnitude of each index
    + 964        exp = np.log10(
    + 965            idx_unq, where=idx_unq > 0, out=np.zeros_like(idx_unq, dtype=np.float64)
    + 966        )
    + 967        idx_unq_mag = np.power(10, np.floor(exp) + 1)
    + 968
    + 969        # Result
    + 970        return a + idx_unq / idx_unq_mag
    + 971
    + 972    def sparse_upper_star(self, idx, V):
    + 973        """Sparse implementation of an upper star filtration.
    + 974
    + 975        Parameters
    + 976        ----------
    + 977        idx : :obj:`~numpy.array`
    + 978            Edge indices for each dimension (MxN).
    + 979        V : :obj:`~numpy.array`
    + 980            Array of intensity data (Mx1).
    + 981        Returns
    + 982        -------
    + 983        idx : :obj:`~numpy.array`
    + 984            Index of filtered points (Mx1).
    + 985        persistence : :obj:`~numpy.array`
    + 986            Persistence of each filtered point (Mx1).
    + 987
    + 988        Notes
    + 989        -----
    + 990        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    + 991        """
    + 992
    + 993        # Invert
    + 994        V = -1 * V.copy().astype(int)
    + 995
    + 996        # Embed indices
    + 997        V = self.embed_unique_indices(V)
    + 998
    + 999        # Connectivity matrix
    +1000        cmat = KDTree(idx)
    +1001        cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix")
    +1002        cmat.setdiag(1)
    +1003        cmat = sparse.triu(cmat)
    +1004
    +1005        # Pairwise minimums
    +1006        I, J = cmat.nonzero()
    +1007        d = np.maximum(V[I], V[J])
    +1008
    +1009        # Delete connectiity matrix
    +1010        cmat_shape = cmat.shape
    +1011        del cmat
    +1012
    +1013        # Sparse distance matrix
    +1014        sdm = sparse.coo_matrix((d, (I, J)), shape=cmat_shape)
    +1015
    +1016        # Delete pairwise mins
    +1017        del d, I, J
    +1018
    +1019        # Persistence homology
    +1020        ph = ripser(sdm, distance_matrix=True, maxdim=0)["dgms"][0]
    +1021
    +1022        # Bound death values
    +1023        ph[ph[:, 1] == np.inf, 1] = np.max(V)
    +1024
    +1025        # Construct tree to query against
    +1026        tree = KDTree(V.reshape((-1, 1)))
    +1027
    +1028        # Get the indexes of the first nearest neighbor by birth
    +1029        _, nn = tree.query(ph[:, 0].reshape((-1, 1)), k=1, workers=-1)
     1030
    -1031    def check_if_grid(self, data):
    -1032        """Check if the data are gridded in mz space.
    -1033
    -1034        Parameters
    -1035        ----------
    -1036        data : DataFrame
    -1037            DataFrame containing the mass spectrometry data.  Needs to have mz and scan columns.
    -1038
    -1039        Returns
    -1040        -------
    -1041        bool
    -1042            True if the data is gridded in the mz direction, False otherwise.
    -1043
    -1044        Notes
    -1045        -----
    -1046        This function is used within the grid_data function and the find_mass_features function and is not intended to be called directly.
    -1047        """
    -1048        # Calculate the difference between consecutive mz values in a single scan
    -1049        dat_check = data.copy().reset_index(drop=True)
    -1050        dat_check["mz_diff"] = np.abs(dat_check["mz"].diff())
    -1051        mz_diff_min = (
    -1052            dat_check.groupby("scan")["mz_diff"].min().min()
    -1053        )  # within each scan, what is the smallest mz difference between consecutive mz values
    -1054
    -1055        # Find the mininum mz difference between mz values in the data; regardless of scan
    -1056        dat_check_mz = dat_check[["mz"]].drop_duplicates().copy()
    -1057        dat_check_mz = dat_check_mz.sort_values(by=["mz"]).reset_index(drop=True)
    -1058        dat_check_mz["mz_diff"] = np.abs(dat_check_mz["mz"].diff())
    -1059
    -1060        # Get minimum mz_diff between mz values in the data
    -1061        mz_diff_min_raw = dat_check_mz["mz_diff"].min()
    -1062
    -1063        # If the minimum mz difference between mz values in the data is less than the minimum mz difference between mz values within a single scan, then the data is not gridded
    -1064        if mz_diff_min_raw < mz_diff_min:
    -1065            return False
    -1066        else:
    -1067            return True
    -1068
    -1069    def grid_data(self, data):
    -1070        """Grid the data in the mz dimension.
    -1071
    -1072        Data must be gridded prior to persistent homology calculations.
    +1031        return nn, -(ph[:, 0] // 1 - ph[:, 1] // 1)
    +1032
    +1033    def check_if_grid(self, data):
    +1034        """Check if the data are gridded in mz space.
    +1035
    +1036        Parameters
    +1037        ----------
    +1038        data : DataFrame
    +1039            DataFrame containing the mass spectrometry data.  Needs to have mz and scan columns.
    +1040
    +1041        Returns
    +1042        -------
    +1043        bool
    +1044            True if the data is gridded in the mz direction, False otherwise.
    +1045
    +1046        Notes
    +1047        -----
    +1048        This function is used within the grid_data function and the find_mass_features function and is not intended to be called directly.
    +1049        """
    +1050        # Calculate the difference between consecutive mz values in a single scan
    +1051        dat_check = data.copy().reset_index(drop=True)
    +1052        dat_check["mz_diff"] = np.abs(dat_check["mz"].diff())
    +1053        mz_diff_min = (
    +1054            dat_check.groupby("scan")["mz_diff"].min().min()
    +1055        )  # within each scan, what is the smallest mz difference between consecutive mz values
    +1056
    +1057        # Find the mininum mz difference between mz values in the data; regardless of scan
    +1058        dat_check_mz = dat_check[["mz"]].drop_duplicates().copy()
    +1059        dat_check_mz = dat_check_mz.sort_values(by=["mz"]).reset_index(drop=True)
    +1060        dat_check_mz["mz_diff"] = np.abs(dat_check_mz["mz"].diff())
    +1061
    +1062        # Get minimum mz_diff between mz values in the data
    +1063        mz_diff_min_raw = dat_check_mz["mz_diff"].min()
    +1064
    +1065        # If the minimum mz difference between mz values in the data is less than the minimum mz difference between mz values within a single scan, then the data is not gridded
    +1066        if mz_diff_min_raw < mz_diff_min:
    +1067            return False
    +1068        else:
    +1069            return True
    +1070
    +1071    def grid_data(self, data):
    +1072        """Grid the data in the mz dimension.
     1073
    -1074        Parameters
    -1075        ----------
    -1076        data : DataFrame
    -1077            The input data containing mz, scan, scan_time, and intensity columns.
    -1078
    -1079        Returns
    -1080        -------
    -1081        DataFrame
    -1082            The gridded data with mz, scan, scan_time, and intensity columns.
    -1083
    -1084        Raises
    -1085        ------
    -1086        ValueError
    -1087            If gridding fails.
    -1088        """
    -1089
    -1090        # Calculate the difference between consecutive mz values in a single scan for grid spacing
    -1091        data_w = data.copy().reset_index(drop=True)
    -1092        data_w["mz_diff"] = np.abs(data_w["mz"].diff())
    -1093        mz_diff_min = data_w.groupby("scan")["mz_diff"].min().min() * 0.99999
    -1094
    -1095        # Need high intensity mz values first so they are parents in the output pairs stack
    -1096        dat_mz = data_w[["mz", "intensity"]].sort_values(
    -1097            by=["intensity"], ascending=False
    -1098        )
    -1099        dat_mz = dat_mz[["mz"]].drop_duplicates().reset_index(drop=True).copy()
    -1100
    -1101        # Construct KD tree
    -1102        tree = KDTree(dat_mz.mz.values.reshape(-1, 1))
    -1103        sdm = tree.sparse_distance_matrix(tree, mz_diff_min, output_type="coo_matrix")
    -1104        sdm = sparse.triu(sdm, k=1)
    -1105        sdm.data = np.ones_like(sdm.data)
    -1106        distances = sdm.tocoo()
    -1107        pairs = np.stack((distances.row, distances.col), axis=1)
    -1108
    -1109        # Cull pairs to just get root
    -1110        to_drop = []
    -1111        while len(pairs) > 0:
    -1112            root_parents = np.setdiff1d(np.unique(pairs[:, 0]), np.unique(pairs[:, 1]))
    -1113            id_root_parents = np.isin(pairs[:, 0], root_parents)
    -1114            children_of_roots = np.unique(pairs[id_root_parents, 1])
    -1115            to_drop = np.append(to_drop, children_of_roots)
    -1116
    -1117            # Set up pairs array for next iteration by removing pairs with children or parents already dropped
    -1118            pairs = pairs[~np.isin(pairs[:, 1], to_drop), :]
    -1119            pairs = pairs[~np.isin(pairs[:, 0], to_drop), :]
    -1120        dat_mz = dat_mz.reset_index(drop=True).drop(index=np.array(to_drop))
    -1121        mz_dat_np = (
    -1122            dat_mz[["mz"]]
    -1123            .sort_values(by=["mz"])
    -1124            .reset_index(drop=True)
    -1125            .values.flatten()
    -1126        )
    -1127
    -1128        # Sort data by mz and recast mz to nearest value in mz_dat_np
    -1129        data_w = data_w.sort_values(by=["mz"]).reset_index(drop=True).copy()
    -1130        data_w["mz_new"] = mz_dat_np[find_closest(mz_dat_np, data_w["mz"].values)]
    -1131        data_w["mz_diff"] = np.abs(data_w["mz"] - data_w["mz_new"])
    -1132
    -1133        # Rename mz_new as mz; drop mz_diff; groupby scan and mz and sum intensity
    -1134        new_data_w = data_w.rename(columns={"mz": "mz_orig", "mz_new": "mz"}).copy()
    -1135        new_data_w = (
    -1136            new_data_w.drop(columns=["mz_diff", "mz_orig"])
    -1137            .groupby(["scan", "mz"])["intensity"]
    -1138            .sum()
    -1139            .reset_index()
    -1140        )
    -1141        new_data_w = (
    -1142            new_data_w.sort_values(by=["scan", "mz"], ascending=[True, True])
    -1143            .reset_index(drop=True)
    -1144            .copy()
    -1145        )
    -1146
    -1147        # Check if grid worked and return
    -1148        if self.check_if_grid(new_data_w):
    -1149            return new_data_w
    -1150        else:
    -1151            raise ValueError("Gridding failed")
    -1152
    -1153    def find_mass_features_ph(self, ms_level=1, grid=True):
    -1154        """Find mass features within an LCMSBase object using persistent homology.
    -1155
    -1156        Assigns the mass_features attribute to the object (a dictionary of LCMSMassFeature objects, keyed by mass feature id)
    +1074        Data must be gridded prior to persistent homology calculations.
    +1075
    +1076        Parameters
    +1077        ----------
    +1078        data : DataFrame
    +1079            The input data containing mz, scan, scan_time, and intensity columns.
    +1080
    +1081        Returns
    +1082        -------
    +1083        DataFrame
    +1084            The gridded data with mz, scan, scan_time, and intensity columns.
    +1085
    +1086        Raises
    +1087        ------
    +1088        ValueError
    +1089            If gridding fails.
    +1090        """
    +1091
    +1092        # Calculate the difference between consecutive mz values in a single scan for grid spacing
    +1093        data_w = data.copy().reset_index(drop=True)
    +1094        data_w["mz_diff"] = np.abs(data_w["mz"].diff())
    +1095        mz_diff_min = data_w.groupby("scan")["mz_diff"].min().min() * 0.99999
    +1096
    +1097        # Need high intensity mz values first so they are parents in the output pairs stack
    +1098        dat_mz = data_w[["mz", "intensity"]].sort_values(
    +1099            by=["intensity"], ascending=False
    +1100        )
    +1101        dat_mz = dat_mz[["mz"]].drop_duplicates().reset_index(drop=True).copy()
    +1102
    +1103        # Construct KD tree
    +1104        tree = KDTree(dat_mz.mz.values.reshape(-1, 1))
    +1105        sdm = tree.sparse_distance_matrix(tree, mz_diff_min, output_type="coo_matrix")
    +1106        sdm = sparse.triu(sdm, k=1)
    +1107        sdm.data = np.ones_like(sdm.data)
    +1108        distances = sdm.tocoo()
    +1109        pairs = np.stack((distances.row, distances.col), axis=1)
    +1110
    +1111        # Cull pairs to just get root
    +1112        to_drop = []
    +1113        while len(pairs) > 0:
    +1114            root_parents = np.setdiff1d(np.unique(pairs[:, 0]), np.unique(pairs[:, 1]))
    +1115            id_root_parents = np.isin(pairs[:, 0], root_parents)
    +1116            children_of_roots = np.unique(pairs[id_root_parents, 1])
    +1117            to_drop = np.append(to_drop, children_of_roots)
    +1118
    +1119            # Set up pairs array for next iteration by removing pairs with children or parents already dropped
    +1120            pairs = pairs[~np.isin(pairs[:, 1], to_drop), :]
    +1121            pairs = pairs[~np.isin(pairs[:, 0], to_drop), :]
    +1122        dat_mz = dat_mz.reset_index(drop=True).drop(index=np.array(to_drop))
    +1123        mz_dat_np = (
    +1124            dat_mz[["mz"]]
    +1125            .sort_values(by=["mz"])
    +1126            .reset_index(drop=True)
    +1127            .values.flatten()
    +1128        )
    +1129
    +1130        # Sort data by mz and recast mz to nearest value in mz_dat_np
    +1131        data_w = data_w.sort_values(by=["mz"]).reset_index(drop=True).copy()
    +1132        data_w["mz_new"] = mz_dat_np[find_closest(mz_dat_np, data_w["mz"].values)]
    +1133        data_w["mz_diff"] = np.abs(data_w["mz"] - data_w["mz_new"])
    +1134
    +1135        # Rename mz_new as mz; drop mz_diff; groupby scan and mz and sum intensity
    +1136        new_data_w = data_w.rename(columns={"mz": "mz_orig", "mz_new": "mz"}).copy()
    +1137        new_data_w = (
    +1138            new_data_w.drop(columns=["mz_diff", "mz_orig"])
    +1139            .groupby(["scan", "mz"])["intensity"]
    +1140            .sum()
    +1141            .reset_index()
    +1142        )
    +1143        new_data_w = (
    +1144            new_data_w.sort_values(by=["scan", "mz"], ascending=[True, True])
    +1145            .reset_index(drop=True)
    +1146            .copy()
    +1147        )
    +1148
    +1149        # Check if grid worked and return
    +1150        if self.check_if_grid(new_data_w):
    +1151            return new_data_w
    +1152        else:
    +1153            raise ValueError("Gridding failed")
    +1154
    +1155    def find_mass_features_ph(self, ms_level=1, grid=True):
    +1156        """Find mass features within an LCMSBase object using persistent homology.
     1157
    -1158        Parameters
    -1159        ----------
    -1160        ms_level : int, optional
    -1161            The MS level to use. Default is 1.
    -1162        grid : bool, optional
    -1163            If True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded). Default is True.
    -1164
    -1165        Raises
    -1166        ------
    -1167        ValueError
    -1168            If no MS level data is found on the object.
    -1169            If data is not gridded and grid is False.
    -1170
    -1171        Returns
    -1172        -------
    -1173        None, but assigns the mass_features attribute to the object.
    -1174
    -1175        Notes
    -1176        -----
    -1177        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    -1178        """
    -1179        # Check that ms_level is a key in self._ms_uprocessed
    -1180        if ms_level not in self._ms_unprocessed.keys():
    -1181            raise ValueError(
    -1182                "No MS level "
    -1183                + str(ms_level)
    -1184                + " data found, did you instantiate with parser specific to MS level?"
    -1185            )
    -1186
    -1187        # Get ms data
    -1188        data = self._ms_unprocessed[ms_level].copy()
    -1189
    -1190        # Drop rows with missing intensity values and reset index
    -1191        data = data.dropna(subset=["intensity"]).reset_index(drop=True)
    -1192
    -1193
    -1194        # Threshold data
    -1195        dims = ["mz", "scan_time"]
    -1196        threshold = self.parameters.lc_ms.ph_inten_min_rel * data.intensity.max()
    -1197        data_thres = data[data["intensity"] > threshold].reset_index(drop=True).copy()
    -1198
    -1199        # Check if gridded, if not, grid
    -1200        gridded_mz = self.check_if_grid(data_thres)
    -1201        if gridded_mz is False:
    -1202            if grid is False:
    -1203                raise ValueError(
    -1204                    "Data are not gridded in mz dimension, try reprocessing with a different params or grid data before running this function"
    -1205                )
    -1206            else:
    -1207                data_thres = self.grid_data(data_thres)
    -1208
    -1209        # Add build factors and add scan_time
    -1210        data_thres = data_thres.merge(self.scan_df[["scan", "scan_time"]], on="scan")
    -1211        factors = {
    -1212            dim: pd.factorize(data_thres[dim], sort=True)[1].astype(np.float32)
    -1213            for dim in dims
    -1214        }  # this is return a float64 index
    -1215
    -1216        # Build indexes
    -1217        index = {
    -1218            dim: np.searchsorted(factors[dim], data_thres[dim]).astype(np.float32)
    -1219            for dim in factors
    -1220        }
    -1221
    -1222        # Smooth data
    -1223        iterations = self.parameters.lc_ms.ph_smooth_it
    -1224        smooth_radius = [
    -1225            self.parameters.lc_ms.ph_smooth_radius_mz,
    -1226            self.parameters.lc_ms.ph_smooth_radius_scan,
    -1227        ]  # mz, scan_time smoothing radius (in steps)
    -1228
    -1229        index = np.vstack([index[dim] for dim in dims]).T
    -1230        V = data_thres["intensity"].values
    -1231        resid = np.inf
    -1232        for i in range(iterations):
    -1233            # Previous iteration
    -1234            V_prev = V.copy()
    -1235            resid_prev = resid
    -1236            V = self.sparse_mean_filter(index, V, radius=smooth_radius)
    -1237
    -1238            # Calculate residual with previous iteration
    -1239            resid = np.sqrt(np.mean(np.square(V - V_prev)))
    -1240
    -1241            # Evaluate convergence
    -1242            if i > 0:
    -1243                # Percent change in residual
    -1244                test = np.abs(resid - resid_prev) / resid_prev
    -1245
    -1246                # Exit criteria
    -1247                if test <= 0:
    -1248                    break
    -1249
    -1250        # Overwrite values
    -1251        data_thres["intensity"] = V
    -1252
    -1253        # Use persistent homology to find regions of interest
    -1254        pidx, pers = self.sparse_upper_star(index, V)
    -1255        pidx = pidx[pers > 1]
    -1256        pers = pers[pers > 1]
    -1257
    -1258        # Get peaks
    -1259        peaks = data_thres.iloc[pidx, :].reset_index(drop=True)
    -1260
    -1261        # Add persistence column
    -1262        peaks["persistence"] = pers
    -1263        mass_features = peaks.sort_values(
    -1264            by="persistence", ascending=False
    -1265        ).reset_index(drop=True)
    -1266
    -1267        # Filter by persistence threshold
    -1268        persistence_threshold = (
    -1269            self.parameters.lc_ms.ph_persis_min_rel * data.intensity.max()
    -1270        )
    -1271        mass_features = mass_features.loc[
    -1272            mass_features["persistence"] > persistence_threshold, :
    -1273        ].reset_index(drop=True)
    -1274
    -1275        # Rename scan column to apex_scan
    -1276        mass_features = mass_features.rename(
    -1277            columns={"scan": "apex_scan", "scan_time": "retention_time"}
    -1278        )
    -1279
    -1280        # Populate mass_features attribute
    -1281        self.mass_features = {}
    -1282        for row in mass_features.itertuples():
    -1283            row_dict = mass_features.iloc[row.Index].to_dict()
    -1284            lcms_feature = LCMSMassFeature(self, **row_dict)
    -1285            self.mass_features[lcms_feature.id] = lcms_feature
    -1286
    -1287        if self.parameters.lc_ms.verbose_processing:
    -1288            print("Found " + str(len(mass_features)) + " initial mass features")
    -1289
    -1290    def cluster_mass_features(
    -1291        self, drop_children=True, sort_by="persistence"
    -1292    ):
    -1293        """Cluster mass features
    -1294
    -1295        Based on their proximity in the mz and scan_time dimensions, priorizies the mass features with the highest persistence.
    -1296
    -1297        Parameters
    -1298        ----------
    -1299        drop_children : bool, optional
    -1300            Whether to drop the mass features that are not cluster parents. Default is True.
    -1301        sort_by : str, optional
    -1302            The column to sort the mass features by, this will determine which mass features get rolled up into a parent mass feature. Default is "persistence".
    -1303
    -1304        Raises
    -1305        ------
    -1306        ValueError
    -1307            If no mass features are found.
    -1308            If too many mass features are found.
    -1309
    -1310        Returns
    -1311        -------
    -1312        None if drop_children is True, otherwise returns a list of mass feature ids that are not cluster parents.
    -1313        """
    -1314        verbose = self.parameters.lc_ms.verbose_processing
    -1315
    -1316        if self.mass_features is None:
    -1317            raise ValueError("No mass features found, run find_mass_features() first")
    -1318        if len(self.mass_features) > 400000:
    -1319            raise ValueError(
    -1320                "Too many mass featuers of interest found, run find_mass_features() with a higher intensity threshold"
    -1321            )
    -1322        dims = ["mz", "scan_time"]
    -1323        mf_df_og = self.mass_features_to_df()
    -1324        mf_df = mf_df_og.copy()
    -1325
    -1326        # Sort mass features by sort_by column, make mf_id its own column for easier bookkeeping
    -1327        mf_df = mf_df.sort_values(by=sort_by, ascending=False).reset_index(drop=False)
    -1328
    -1329        tol = [
    -1330            self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel,
    -1331            self.parameters.lc_ms.mass_feature_cluster_rt_tolerance,
    -1332        ]  # mz, in relative; scan_time in minutes
    -1333        relative = [True, False]
    -1334
    -1335        # Compute inter-feature distances
    -1336        distances = None
    -1337        for i in range(len(dims)):
    -1338            # Construct k-d tree
    -1339            values = mf_df[dims[i]].values
    -1340            tree = KDTree(values.reshape(-1, 1))
    -1341
    -1342            max_tol = tol[i]
    -1343            if relative[i] is True:
    -1344                # Maximum absolute tolerance
    -1345                max_tol = tol[i] * values.max()
    -1346
    -1347            # Compute sparse distance matrix
    -1348            # the larger the max_tol, the slower this operation is
    -1349            sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix")
    -1350
    -1351            # Only consider forward case, exclude diagonal
    -1352            sdm = sparse.triu(sdm, k=1)
    -1353
    -1354            # Filter relative distances
    -1355            if relative[i] is True:
    -1356                # Compute relative distances
    -1357                rel_dists = sdm.data / values[sdm.row]  # or col?
    -1358
    -1359                # Indices of relative distances less than tolerance
    -1360                idx = rel_dists <= tol[i]
    -1361
    -1362                # Reconstruct sparse distance matrix
    -1363                sdm = sparse.coo_matrix(
    -1364                    (rel_dists[idx], (sdm.row[idx], sdm.col[idx])),
    -1365                    shape=(len(values), len(values)),
    -1366                )
    -1367
    -1368            # Cast as binary matrix
    -1369            sdm.data = np.ones_like(sdm.data)
    -1370
    -1371            # Stack distances
    -1372            if distances is None:
    -1373                distances = sdm
    -1374            else:
    -1375                distances = distances.multiply(sdm)
    -1376
    -1377        # Extract indices of within-tolerance points
    -1378        distances = distances.tocoo()
    -1379        pairs = np.stack((distances.row, distances.col), axis=1)
    -1380        pairs_df = pd.DataFrame(pairs, columns=["parent", "child"])
    -1381        pairs_df = pairs_df.set_index("parent")
    -1382
    -1383        to_drop = []
    -1384        while not pairs_df.empty:
    -1385            # Find root_parents and their children
    -1386            root_parents = np.setdiff1d(np.unique(pairs_df.index.values), np.unique(pairs_df.child.values))
    -1387            children_of_roots = pairs_df.loc[root_parents, "child"].unique()
    -1388            to_drop = np.append(to_drop, children_of_roots)
    -1389
    -1390            # Remove root_children as possible parents from pairs_df for next iteration
    -1391            pairs_df = pairs_df.drop(
    -1392                index=children_of_roots, errors="ignore"
    -1393            )  
    -1394            pairs_df = pairs_df.reset_index().set_index("child")
    -1395            # Remove root_children as possible children from pairs_df for next iteration
    -1396            pairs_df = pairs_df.drop(index=children_of_roots)
    -1397
    -1398            # Prepare for next iteration
    -1399            pairs_df = pairs_df.reset_index().set_index("parent")
    -1400
    -1401        # Drop mass features that are not cluster parents
    -1402        mf_df = mf_df.drop(index=np.array(to_drop))
    -1403
    -1404        # Set index back to mf_id
    -1405        mf_df = mf_df.set_index("mf_id")
    -1406        if verbose:
    -1407            print(str(len(mf_df)) + " mass features remaining")
    -1408
    -1409        mf_df_new = mf_df_og.copy()
    -1410        mf_df_new["cluster_parent"] = np.where(
    -1411            np.isin(mf_df_new.index, mf_df.index), True, False
    -1412        )
    -1413
    -1414        # get mass feature ids of features that are not cluster parents
    -1415        cluster_daughters = mf_df_new[mf_df_new["cluster_parent"] == False].index.values
    -1416        if drop_children is True:
    -1417            # Drop mass features that are not cluster parents from self
    -1418            self.mass_features = {
    -1419                k: v
    -1420                for k, v in self.mass_features.items()
    -1421                if k not in cluster_daughters
    -1422            }
    -1423        else:
    -1424            return cluster_daughters
    +1158        Assigns the mass_features attribute to the object (a dictionary of LCMSMassFeature objects, keyed by mass feature id)
    +1159
    +1160        Parameters
    +1161        ----------
    +1162        ms_level : int, optional
    +1163            The MS level to use. Default is 1.
    +1164        grid : bool, optional
    +1165            If True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded). Default is True.
    +1166
    +1167        Raises
    +1168        ------
    +1169        ValueError
    +1170            If no MS level data is found on the object.
    +1171            If data is not gridded and grid is False.
    +1172
    +1173        Returns
    +1174        -------
    +1175        None, but assigns the mass_features attribute to the object.
    +1176
    +1177        Notes
    +1178        -----
    +1179        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    +1180        """
    +1181        # Check that ms_level is a key in self._ms_uprocessed
    +1182        if ms_level not in self._ms_unprocessed.keys():
    +1183            raise ValueError(
    +1184                "No MS level "
    +1185                + str(ms_level)
    +1186                + " data found, did you instantiate with parser specific to MS level?"
    +1187            )
    +1188
    +1189        # Get ms data
    +1190        data = self._ms_unprocessed[ms_level].copy()
    +1191
    +1192        # Drop rows with missing intensity values and reset index
    +1193        data = data.dropna(subset=["intensity"]).reset_index(drop=True)
    +1194
    +1195        # Threshold data
    +1196        dims = ["mz", "scan_time"]
    +1197        threshold = self.parameters.lc_ms.ph_inten_min_rel * data.intensity.max()
    +1198        data_thres = data[data["intensity"] > threshold].reset_index(drop=True).copy()
    +1199
    +1200        # Check if gridded, if not, grid
    +1201        gridded_mz = self.check_if_grid(data_thres)
    +1202        if gridded_mz is False:
    +1203            if grid is False:
    +1204                raise ValueError(
    +1205                    "Data are not gridded in mz dimension, try reprocessing with a different params or grid data before running this function"
    +1206                )
    +1207            else:
    +1208                data_thres = self.grid_data(data_thres)
    +1209
    +1210        # Add build factors and add scan_time
    +1211        data_thres = data_thres.merge(self.scan_df[["scan", "scan_time"]], on="scan")
    +1212        factors = {
    +1213            dim: pd.factorize(data_thres[dim], sort=True)[1].astype(np.float32)
    +1214            for dim in dims
    +1215        }  # this is return a float64 index
    +1216
    +1217        # Build indexes
    +1218        index = {
    +1219            dim: np.searchsorted(factors[dim], data_thres[dim]).astype(np.float32)
    +1220            for dim in factors
    +1221        }
    +1222
    +1223        # Smooth data
    +1224        iterations = self.parameters.lc_ms.ph_smooth_it
    +1225        smooth_radius = [
    +1226            self.parameters.lc_ms.ph_smooth_radius_mz,
    +1227            self.parameters.lc_ms.ph_smooth_radius_scan,
    +1228        ]  # mz, scan_time smoothing radius (in steps)
    +1229
    +1230        index = np.vstack([index[dim] for dim in dims]).T
    +1231        V = data_thres["intensity"].values
    +1232        resid = np.inf
    +1233        for i in range(iterations):
    +1234            # Previous iteration
    +1235            V_prev = V.copy()
    +1236            resid_prev = resid
    +1237            V = self.sparse_mean_filter(index, V, radius=smooth_radius)
    +1238
    +1239            # Calculate residual with previous iteration
    +1240            resid = np.sqrt(np.mean(np.square(V - V_prev)))
    +1241
    +1242            # Evaluate convergence
    +1243            if i > 0:
    +1244                # Percent change in residual
    +1245                test = np.abs(resid - resid_prev) / resid_prev
    +1246
    +1247                # Exit criteria
    +1248                if test <= 0:
    +1249                    break
    +1250
    +1251        # Overwrite values
    +1252        data_thres["intensity"] = V
    +1253
    +1254        # Use persistent homology to find regions of interest
    +1255        pidx, pers = self.sparse_upper_star(index, V)
    +1256        pidx = pidx[pers > 1]
    +1257        pers = pers[pers > 1]
    +1258
    +1259        # Get peaks
    +1260        peaks = data_thres.iloc[pidx, :].reset_index(drop=True)
    +1261
    +1262        # Add persistence column
    +1263        peaks["persistence"] = pers
    +1264        mass_features = peaks.sort_values(
    +1265            by="persistence", ascending=False
    +1266        ).reset_index(drop=True)
    +1267
    +1268        # Filter by persistence threshold
    +1269        persistence_threshold = (
    +1270            self.parameters.lc_ms.ph_persis_min_rel * data.intensity.max()
    +1271        )
    +1272        mass_features = mass_features.loc[
    +1273            mass_features["persistence"] > persistence_threshold, :
    +1274        ].reset_index(drop=True)
    +1275
    +1276        # Rename scan column to apex_scan
    +1277        mass_features = mass_features.rename(
    +1278            columns={"scan": "apex_scan", "scan_time": "retention_time"}
    +1279        )
    +1280
    +1281        # Populate mass_features attribute
    +1282        self.mass_features = {}
    +1283        for row in mass_features.itertuples():
    +1284            row_dict = mass_features.iloc[row.Index].to_dict()
    +1285            lcms_feature = LCMSMassFeature(self, **row_dict)
    +1286            self.mass_features[lcms_feature.id] = lcms_feature
    +1287
    +1288        if self.parameters.lc_ms.verbose_processing:
    +1289            print("Found " + str(len(mass_features)) + " initial mass features")
    +1290
    +1291    def cluster_mass_features(self, drop_children=True, sort_by="persistence"):
    +1292        """Cluster mass features
    +1293
    +1294        Based on their proximity in the mz and scan_time dimensions, priorizies the mass features with the highest persistence.
    +1295
    +1296        Parameters
    +1297        ----------
    +1298        drop_children : bool, optional
    +1299            Whether to drop the mass features that are not cluster parents. Default is True.
    +1300        sort_by : str, optional
    +1301            The column to sort the mass features by, this will determine which mass features get rolled up into a parent mass feature. Default is "persistence".
    +1302
    +1303        Raises
    +1304        ------
    +1305        ValueError
    +1306            If no mass features are found.
    +1307            If too many mass features are found.
    +1308
    +1309        Returns
    +1310        -------
    +1311        None if drop_children is True, otherwise returns a list of mass feature ids that are not cluster parents.
    +1312        """
    +1313        verbose = self.parameters.lc_ms.verbose_processing
    +1314
    +1315        if self.mass_features is None:
    +1316            raise ValueError("No mass features found, run find_mass_features() first")
    +1317        if len(self.mass_features) > 400000:
    +1318            raise ValueError(
    +1319                "Too many mass featuers of interest found, run find_mass_features() with a higher intensity threshold"
    +1320            )
    +1321        dims = ["mz", "scan_time"]
    +1322        mf_df_og = self.mass_features_to_df()
    +1323        mf_df = mf_df_og.copy()
    +1324
    +1325        # Sort mass features by sort_by column, make mf_id its own column for easier bookkeeping
    +1326        mf_df = mf_df.sort_values(by=sort_by, ascending=False).reset_index(drop=False)
    +1327
    +1328        tol = [
    +1329            self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel,
    +1330            self.parameters.lc_ms.mass_feature_cluster_rt_tolerance,
    +1331        ]  # mz, in relative; scan_time in minutes
    +1332        relative = [True, False]
    +1333
    +1334        # Compute inter-feature distances
    +1335        distances = None
    +1336        for i in range(len(dims)):
    +1337            # Construct k-d tree
    +1338            values = mf_df[dims[i]].values
    +1339            tree = KDTree(values.reshape(-1, 1))
    +1340
    +1341            max_tol = tol[i]
    +1342            if relative[i] is True:
    +1343                # Maximum absolute tolerance
    +1344                max_tol = tol[i] * values.max()
    +1345
    +1346            # Compute sparse distance matrix
    +1347            # the larger the max_tol, the slower this operation is
    +1348            sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix")
    +1349
    +1350            # Only consider forward case, exclude diagonal
    +1351            sdm = sparse.triu(sdm, k=1)
    +1352
    +1353            # Filter relative distances
    +1354            if relative[i] is True:
    +1355                # Compute relative distances
    +1356                rel_dists = sdm.data / values[sdm.row]  # or col?
    +1357
    +1358                # Indices of relative distances less than tolerance
    +1359                idx = rel_dists <= tol[i]
    +1360
    +1361                # Reconstruct sparse distance matrix
    +1362                sdm = sparse.coo_matrix(
    +1363                    (rel_dists[idx], (sdm.row[idx], sdm.col[idx])),
    +1364                    shape=(len(values), len(values)),
    +1365                )
    +1366
    +1367            # Cast as binary matrix
    +1368            sdm.data = np.ones_like(sdm.data)
    +1369
    +1370            # Stack distances
    +1371            if distances is None:
    +1372                distances = sdm
    +1373            else:
    +1374                distances = distances.multiply(sdm)
    +1375
    +1376        # Extract indices of within-tolerance points
    +1377        distances = distances.tocoo()
    +1378        pairs = np.stack((distances.row, distances.col), axis=1)
    +1379        pairs_df = pd.DataFrame(pairs, columns=["parent", "child"])
    +1380        pairs_df = pairs_df.set_index("parent")
    +1381
    +1382        to_drop = []
    +1383        while not pairs_df.empty:
    +1384            # Find root_parents and their children
    +1385            root_parents = np.setdiff1d(
    +1386                np.unique(pairs_df.index.values), np.unique(pairs_df.child.values)
    +1387            )
    +1388            children_of_roots = pairs_df.loc[root_parents, "child"].unique()
    +1389            to_drop = np.append(to_drop, children_of_roots)
    +1390
    +1391            # Remove root_children as possible parents from pairs_df for next iteration
    +1392            pairs_df = pairs_df.drop(index=children_of_roots, errors="ignore")
    +1393            pairs_df = pairs_df.reset_index().set_index("child")
    +1394            # Remove root_children as possible children from pairs_df for next iteration
    +1395            pairs_df = pairs_df.drop(index=children_of_roots)
    +1396
    +1397            # Prepare for next iteration
    +1398            pairs_df = pairs_df.reset_index().set_index("parent")
    +1399
    +1400        # Drop mass features that are not cluster parents
    +1401        mf_df = mf_df.drop(index=np.array(to_drop))
    +1402
    +1403        # Set index back to mf_id
    +1404        mf_df = mf_df.set_index("mf_id")
    +1405        if verbose:
    +1406            print(str(len(mf_df)) + " mass features remaining")
    +1407
    +1408        mf_df_new = mf_df_og.copy()
    +1409        mf_df_new["cluster_parent"] = np.where(
    +1410            np.isin(mf_df_new.index, mf_df.index), True, False
    +1411        )
    +1412
    +1413        # get mass feature ids of features that are not cluster parents
    +1414        cluster_daughters = mf_df_new[mf_df_new["cluster_parent"] == False].index.values
    +1415        if drop_children is True:
    +1416            # Drop mass features that are not cluster parents from self
    +1417            self.mass_features = {
    +1418                k: v
    +1419                for k, v in self.mass_features.items()
    +1420                if k not in cluster_daughters
    +1421            }
    +1422        else:
    +1423            return cluster_daughters
     
    @@ -4195,71 +4197,71 @@
    Methods
    -
    829    @staticmethod
    -830    def sparse_mean_filter(idx, V, radius=[0, 1, 1]):
    -831        """Sparse implementation of a mean filter.
    -832
    -833        Parameters
    -834        ----------
    -835        idx : :obj:`~numpy.array`
    -836            Edge indices for each dimension (MxN).
    -837        V : :obj:`~numpy.array`
    -838            Array of intensity data (Mx1).
    -839        radius : float or list
    -840            Radius of the sparse filter in each dimension. Values less than
    -841            zero indicate no connectivity in that dimension.
    -842
    -843        Returns
    -844        -------
    -845        :obj:`~numpy.array`
    -846            Filtered intensities (Mx1).
    -847
    -848        Notes
    -849        -----
    -850        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos.
    -851        This is a static method.
    -852        """
    -853
    -854        # Copy indices
    -855        idx = idx.copy().astype(V.dtype)
    -856
    -857        # Scale
    -858        for i, r in enumerate(radius):
    -859            # Increase inter-index distance
    -860            if r < 1:
    -861                idx[:, i] *= 2
    -862
    -863            # Do nothing
    -864            elif r == 1:
    -865                pass
    -866
    -867            # Decrease inter-index distance
    -868            else:
    -869                idx[:, i] /= r
    -870
    -871        # Connectivity matrix
    -872        cmat = KDTree(idx)
    -873        cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix")
    -874        cmat.setdiag(1)
    -875
    -876        # Pair indices
    -877        I, J = cmat.nonzero()
    -878
    -879        # Delete cmat
    -880        cmat_shape = cmat.shape
    -881        del cmat
    -882
    -883        # Sum over columns
    -884        V_sum = sparse.bsr_matrix(
    -885            (V[J], (I, I)), shape=cmat_shape, dtype=V.dtype
    -886        ).diagonal(0)
    -887
    -888        # Count over columns
    -889        V_count = sparse.bsr_matrix(
    -890            (np.ones_like(J), (I, I)), shape=cmat_shape, dtype=V.dtype
    -891        ).diagonal(0)
    -892
    -893        return V_sum / V_count
    +            
    831    @staticmethod
    +832    def sparse_mean_filter(idx, V, radius=[0, 1, 1]):
    +833        """Sparse implementation of a mean filter.
    +834
    +835        Parameters
    +836        ----------
    +837        idx : :obj:`~numpy.array`
    +838            Edge indices for each dimension (MxN).
    +839        V : :obj:`~numpy.array`
    +840            Array of intensity data (Mx1).
    +841        radius : float or list
    +842            Radius of the sparse filter in each dimension. Values less than
    +843            zero indicate no connectivity in that dimension.
    +844
    +845        Returns
    +846        -------
    +847        :obj:`~numpy.array`
    +848            Filtered intensities (Mx1).
    +849
    +850        Notes
    +851        -----
    +852        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos.
    +853        This is a static method.
    +854        """
    +855
    +856        # Copy indices
    +857        idx = idx.copy().astype(V.dtype)
    +858
    +859        # Scale
    +860        for i, r in enumerate(radius):
    +861            # Increase inter-index distance
    +862            if r < 1:
    +863                idx[:, i] *= 2
    +864
    +865            # Do nothing
    +866            elif r == 1:
    +867                pass
    +868
    +869            # Decrease inter-index distance
    +870            else:
    +871                idx[:, i] /= r
    +872
    +873        # Connectivity matrix
    +874        cmat = KDTree(idx)
    +875        cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix")
    +876        cmat.setdiag(1)
    +877
    +878        # Pair indices
    +879        I, J = cmat.nonzero()
    +880
    +881        # Delete cmat
    +882        cmat_shape = cmat.shape
    +883        del cmat
    +884
    +885        # Sum over columns
    +886        V_sum = sparse.bsr_matrix(
    +887            (V[J], (I, I)), shape=cmat_shape, dtype=V.dtype
    +888        ).diagonal(0)
    +889
    +890        # Count over columns
    +891        V_count = sparse.bsr_matrix(
    +892            (np.ones_like(J), (I, I)), shape=cmat_shape, dtype=V.dtype
    +893        ).diagonal(0)
    +894
    +895        return V_sum / V_count
     
    @@ -4303,80 +4305,80 @@
    Notes
    -
    895    @staticmethod
    -896    def embed_unique_indices(a):
    -897        """Creates an array of indices, sorted by unique element.
    -898
    -899        Parameters
    -900        ----------
    -901        a : :obj:`~numpy.array`
    -902            Array of unique elements (Mx1).
    -903
    -904        Returns
    -905        -------
    -906        :obj:`~numpy.array`
    -907            Array of indices (Mx1).
    -908
    -909        Notes
    -910        -----
    -911        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    -912        This is a static method.
    -913        """
    -914
    -915        def count_tens(n):
    -916            # Count tens
    -917            ntens = (n - 1) // 10
    -918
    -919            while True:
    -920                ntens_test = (ntens + n - 1) // 10
    -921
    -922                if ntens_test == ntens:
    -923                    return ntens
    -924                else:
    -925                    ntens = ntens_test
    -926
    -927        def arange_exclude_10s(n):
    -928            # How many 10s will there be?
    -929            ntens = count_tens(n)
    -930
    -931            # Base array
    -932            arr = np.arange(0, n + ntens)
    -933
    -934            # Exclude 10s
    -935            arr = arr[(arr == 0) | (arr % 10 != 0)][:n]
    -936
    -937            return arr
    +            
    897    @staticmethod
    +898    def embed_unique_indices(a):
    +899        """Creates an array of indices, sorted by unique element.
    +900
    +901        Parameters
    +902        ----------
    +903        a : :obj:`~numpy.array`
    +904            Array of unique elements (Mx1).
    +905
    +906        Returns
    +907        -------
    +908        :obj:`~numpy.array`
    +909            Array of indices (Mx1).
    +910
    +911        Notes
    +912        -----
    +913        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    +914        This is a static method.
    +915        """
    +916
    +917        def count_tens(n):
    +918            # Count tens
    +919            ntens = (n - 1) // 10
    +920
    +921            while True:
    +922                ntens_test = (ntens + n - 1) // 10
    +923
    +924                if ntens_test == ntens:
    +925                    return ntens
    +926                else:
    +927                    ntens = ntens_test
    +928
    +929        def arange_exclude_10s(n):
    +930            # How many 10s will there be?
    +931            ntens = count_tens(n)
    +932
    +933            # Base array
    +934            arr = np.arange(0, n + ntens)
    +935
    +936            # Exclude 10s
    +937            arr = arr[(arr == 0) | (arr % 10 != 0)][:n]
     938
    -939        # Creates an array of indices, sorted by unique element
    -940        idx_sort = np.argsort(a)
    -941        idx_unsort = np.argsort(idx_sort)
    -942
    -943        # Sorts records array so all unique elements are together
    -944        sorted_a = a[idx_sort]
    -945
    -946        # Returns the unique values, the index of the first occurrence,
    -947        # and the count for each element
    -948        vals, idx_start, count = np.unique(
    -949            sorted_a, return_index=True, return_counts=True
    -950        )
    -951
    -952        # Splits the indices into separate arrays
    -953        splits = np.split(idx_sort, idx_start[1:])
    -954
    -955        # Creates unique indices for each split
    -956        idx_unq = np.concatenate([arange_exclude_10s(len(x)) for x in splits])
    -957
    -958        # Reorders according to input array
    -959        idx_unq = idx_unq[idx_unsort]
    -960
    -961        # Magnitude of each index
    -962        exp = np.log10(
    -963            idx_unq, where=idx_unq > 0, out=np.zeros_like(idx_unq, dtype=np.float64)
    -964        )
    -965        idx_unq_mag = np.power(10, np.floor(exp) + 1)
    -966
    -967        # Result
    -968        return a + idx_unq / idx_unq_mag
    +939            return arr
    +940
    +941        # Creates an array of indices, sorted by unique element
    +942        idx_sort = np.argsort(a)
    +943        idx_unsort = np.argsort(idx_sort)
    +944
    +945        # Sorts records array so all unique elements are together
    +946        sorted_a = a[idx_sort]
    +947
    +948        # Returns the unique values, the index of the first occurrence,
    +949        # and the count for each element
    +950        vals, idx_start, count = np.unique(
    +951            sorted_a, return_index=True, return_counts=True
    +952        )
    +953
    +954        # Splits the indices into separate arrays
    +955        splits = np.split(idx_sort, idx_start[1:])
    +956
    +957        # Creates unique indices for each split
    +958        idx_unq = np.concatenate([arange_exclude_10s(len(x)) for x in splits])
    +959
    +960        # Reorders according to input array
    +961        idx_unq = idx_unq[idx_unsort]
    +962
    +963        # Magnitude of each index
    +964        exp = np.log10(
    +965            idx_unq, where=idx_unq > 0, out=np.zeros_like(idx_unq, dtype=np.float64)
    +966        )
    +967        idx_unq_mag = np.power(10, np.floor(exp) + 1)
    +968
    +969        # Result
    +970        return a + idx_unq / idx_unq_mag
     
    @@ -4414,66 +4416,66 @@
    Notes
    -
     970    def sparse_upper_star(self, idx, V):
    - 971        """Sparse implementation of an upper star filtration.
    - 972
    - 973        Parameters
    - 974        ----------
    - 975        idx : :obj:`~numpy.array`
    - 976            Edge indices for each dimension (MxN).
    - 977        V : :obj:`~numpy.array`
    - 978            Array of intensity data (Mx1).
    - 979        Returns
    - 980        -------
    - 981        idx : :obj:`~numpy.array`
    - 982            Index of filtered points (Mx1).
    - 983        persistence : :obj:`~numpy.array`
    - 984            Persistence of each filtered point (Mx1).
    - 985
    - 986        Notes
    - 987        -----
    - 988        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    - 989        """
    - 990
    - 991        # Invert
    - 992        V = -1 * V.copy().astype(int)
    - 993
    - 994        # Embed indices
    - 995        V = self.embed_unique_indices(V)
    - 996
    - 997        # Connectivity matrix
    - 998        cmat = KDTree(idx)
    - 999        cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix")
    -1000        cmat.setdiag(1)
    -1001        cmat = sparse.triu(cmat)
    -1002
    -1003        # Pairwise minimums
    -1004        I, J = cmat.nonzero()
    -1005        d = np.maximum(V[I], V[J])
    -1006
    -1007        # Delete connectiity matrix
    -1008        cmat_shape = cmat.shape
    -1009        del cmat
    -1010
    -1011        # Sparse distance matrix
    -1012        sdm = sparse.coo_matrix((d, (I, J)), shape=cmat_shape)
    -1013
    -1014        # Delete pairwise mins
    -1015        del d, I, J
    -1016
    -1017        # Persistence homology
    -1018        ph = ripser(sdm, distance_matrix=True, maxdim=0)["dgms"][0]
    -1019
    -1020        # Bound death values
    -1021        ph[ph[:, 1] == np.inf, 1] = np.max(V)
    -1022
    -1023        # Construct tree to query against
    -1024        tree = KDTree(V.reshape((-1, 1)))
    -1025
    -1026        # Get the indexes of the first nearest neighbor by birth
    -1027        _, nn = tree.query(ph[:, 0].reshape((-1, 1)), k=1, workers=-1)
    -1028
    -1029        return nn, -(ph[:, 0] // 1 - ph[:, 1] // 1)
    +            
     972    def sparse_upper_star(self, idx, V):
    + 973        """Sparse implementation of an upper star filtration.
    + 974
    + 975        Parameters
    + 976        ----------
    + 977        idx : :obj:`~numpy.array`
    + 978            Edge indices for each dimension (MxN).
    + 979        V : :obj:`~numpy.array`
    + 980            Array of intensity data (Mx1).
    + 981        Returns
    + 982        -------
    + 983        idx : :obj:`~numpy.array`
    + 984            Index of filtered points (Mx1).
    + 985        persistence : :obj:`~numpy.array`
    + 986            Persistence of each filtered point (Mx1).
    + 987
    + 988        Notes
    + 989        -----
    + 990        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    + 991        """
    + 992
    + 993        # Invert
    + 994        V = -1 * V.copy().astype(int)
    + 995
    + 996        # Embed indices
    + 997        V = self.embed_unique_indices(V)
    + 998
    + 999        # Connectivity matrix
    +1000        cmat = KDTree(idx)
    +1001        cmat = cmat.sparse_distance_matrix(cmat, 1, p=np.inf, output_type="coo_matrix")
    +1002        cmat.setdiag(1)
    +1003        cmat = sparse.triu(cmat)
    +1004
    +1005        # Pairwise minimums
    +1006        I, J = cmat.nonzero()
    +1007        d = np.maximum(V[I], V[J])
    +1008
    +1009        # Delete connectiity matrix
    +1010        cmat_shape = cmat.shape
    +1011        del cmat
    +1012
    +1013        # Sparse distance matrix
    +1014        sdm = sparse.coo_matrix((d, (I, J)), shape=cmat_shape)
    +1015
    +1016        # Delete pairwise mins
    +1017        del d, I, J
    +1018
    +1019        # Persistence homology
    +1020        ph = ripser(sdm, distance_matrix=True, maxdim=0)["dgms"][0]
    +1021
    +1022        # Bound death values
    +1023        ph[ph[:, 1] == np.inf, 1] = np.max(V)
    +1024
    +1025        # Construct tree to query against
    +1026        tree = KDTree(V.reshape((-1, 1)))
    +1027
    +1028        # Get the indexes of the first nearest neighbor by birth
    +1029        _, nn = tree.query(ph[:, 0].reshape((-1, 1)), k=1, workers=-1)
    +1030
    +1031        return nn, -(ph[:, 0] // 1 - ph[:, 1] // 1)
     
    @@ -4515,43 +4517,43 @@
    Notes
    -
    1031    def check_if_grid(self, data):
    -1032        """Check if the data are gridded in mz space.
    -1033
    -1034        Parameters
    -1035        ----------
    -1036        data : DataFrame
    -1037            DataFrame containing the mass spectrometry data.  Needs to have mz and scan columns.
    -1038
    -1039        Returns
    -1040        -------
    -1041        bool
    -1042            True if the data is gridded in the mz direction, False otherwise.
    -1043
    -1044        Notes
    -1045        -----
    -1046        This function is used within the grid_data function and the find_mass_features function and is not intended to be called directly.
    -1047        """
    -1048        # Calculate the difference between consecutive mz values in a single scan
    -1049        dat_check = data.copy().reset_index(drop=True)
    -1050        dat_check["mz_diff"] = np.abs(dat_check["mz"].diff())
    -1051        mz_diff_min = (
    -1052            dat_check.groupby("scan")["mz_diff"].min().min()
    -1053        )  # within each scan, what is the smallest mz difference between consecutive mz values
    -1054
    -1055        # Find the mininum mz difference between mz values in the data; regardless of scan
    -1056        dat_check_mz = dat_check[["mz"]].drop_duplicates().copy()
    -1057        dat_check_mz = dat_check_mz.sort_values(by=["mz"]).reset_index(drop=True)
    -1058        dat_check_mz["mz_diff"] = np.abs(dat_check_mz["mz"].diff())
    -1059
    -1060        # Get minimum mz_diff between mz values in the data
    -1061        mz_diff_min_raw = dat_check_mz["mz_diff"].min()
    -1062
    -1063        # If the minimum mz difference between mz values in the data is less than the minimum mz difference between mz values within a single scan, then the data is not gridded
    -1064        if mz_diff_min_raw < mz_diff_min:
    -1065            return False
    -1066        else:
    -1067            return True
    +            
    1033    def check_if_grid(self, data):
    +1034        """Check if the data are gridded in mz space.
    +1035
    +1036        Parameters
    +1037        ----------
    +1038        data : DataFrame
    +1039            DataFrame containing the mass spectrometry data.  Needs to have mz and scan columns.
    +1040
    +1041        Returns
    +1042        -------
    +1043        bool
    +1044            True if the data is gridded in the mz direction, False otherwise.
    +1045
    +1046        Notes
    +1047        -----
    +1048        This function is used within the grid_data function and the find_mass_features function and is not intended to be called directly.
    +1049        """
    +1050        # Calculate the difference between consecutive mz values in a single scan
    +1051        dat_check = data.copy().reset_index(drop=True)
    +1052        dat_check["mz_diff"] = np.abs(dat_check["mz"].diff())
    +1053        mz_diff_min = (
    +1054            dat_check.groupby("scan")["mz_diff"].min().min()
    +1055        )  # within each scan, what is the smallest mz difference between consecutive mz values
    +1056
    +1057        # Find the mininum mz difference between mz values in the data; regardless of scan
    +1058        dat_check_mz = dat_check[["mz"]].drop_duplicates().copy()
    +1059        dat_check_mz = dat_check_mz.sort_values(by=["mz"]).reset_index(drop=True)
    +1060        dat_check_mz["mz_diff"] = np.abs(dat_check_mz["mz"].diff())
    +1061
    +1062        # Get minimum mz_diff between mz values in the data
    +1063        mz_diff_min_raw = dat_check_mz["mz_diff"].min()
    +1064
    +1065        # If the minimum mz difference between mz values in the data is less than the minimum mz difference between mz values within a single scan, then the data is not gridded
    +1066        if mz_diff_min_raw < mz_diff_min:
    +1067            return False
    +1068        else:
    +1069            return True
     
    @@ -4588,89 +4590,89 @@
    Notes
    -
    1069    def grid_data(self, data):
    -1070        """Grid the data in the mz dimension.
    -1071
    -1072        Data must be gridded prior to persistent homology calculations.
    +            
    1071    def grid_data(self, data):
    +1072        """Grid the data in the mz dimension.
     1073
    -1074        Parameters
    -1075        ----------
    -1076        data : DataFrame
    -1077            The input data containing mz, scan, scan_time, and intensity columns.
    -1078
    -1079        Returns
    -1080        -------
    -1081        DataFrame
    -1082            The gridded data with mz, scan, scan_time, and intensity columns.
    -1083
    -1084        Raises
    -1085        ------
    -1086        ValueError
    -1087            If gridding fails.
    -1088        """
    -1089
    -1090        # Calculate the difference between consecutive mz values in a single scan for grid spacing
    -1091        data_w = data.copy().reset_index(drop=True)
    -1092        data_w["mz_diff"] = np.abs(data_w["mz"].diff())
    -1093        mz_diff_min = data_w.groupby("scan")["mz_diff"].min().min() * 0.99999
    -1094
    -1095        # Need high intensity mz values first so they are parents in the output pairs stack
    -1096        dat_mz = data_w[["mz", "intensity"]].sort_values(
    -1097            by=["intensity"], ascending=False
    -1098        )
    -1099        dat_mz = dat_mz[["mz"]].drop_duplicates().reset_index(drop=True).copy()
    -1100
    -1101        # Construct KD tree
    -1102        tree = KDTree(dat_mz.mz.values.reshape(-1, 1))
    -1103        sdm = tree.sparse_distance_matrix(tree, mz_diff_min, output_type="coo_matrix")
    -1104        sdm = sparse.triu(sdm, k=1)
    -1105        sdm.data = np.ones_like(sdm.data)
    -1106        distances = sdm.tocoo()
    -1107        pairs = np.stack((distances.row, distances.col), axis=1)
    -1108
    -1109        # Cull pairs to just get root
    -1110        to_drop = []
    -1111        while len(pairs) > 0:
    -1112            root_parents = np.setdiff1d(np.unique(pairs[:, 0]), np.unique(pairs[:, 1]))
    -1113            id_root_parents = np.isin(pairs[:, 0], root_parents)
    -1114            children_of_roots = np.unique(pairs[id_root_parents, 1])
    -1115            to_drop = np.append(to_drop, children_of_roots)
    -1116
    -1117            # Set up pairs array for next iteration by removing pairs with children or parents already dropped
    -1118            pairs = pairs[~np.isin(pairs[:, 1], to_drop), :]
    -1119            pairs = pairs[~np.isin(pairs[:, 0], to_drop), :]
    -1120        dat_mz = dat_mz.reset_index(drop=True).drop(index=np.array(to_drop))
    -1121        mz_dat_np = (
    -1122            dat_mz[["mz"]]
    -1123            .sort_values(by=["mz"])
    -1124            .reset_index(drop=True)
    -1125            .values.flatten()
    -1126        )
    -1127
    -1128        # Sort data by mz and recast mz to nearest value in mz_dat_np
    -1129        data_w = data_w.sort_values(by=["mz"]).reset_index(drop=True).copy()
    -1130        data_w["mz_new"] = mz_dat_np[find_closest(mz_dat_np, data_w["mz"].values)]
    -1131        data_w["mz_diff"] = np.abs(data_w["mz"] - data_w["mz_new"])
    -1132
    -1133        # Rename mz_new as mz; drop mz_diff; groupby scan and mz and sum intensity
    -1134        new_data_w = data_w.rename(columns={"mz": "mz_orig", "mz_new": "mz"}).copy()
    -1135        new_data_w = (
    -1136            new_data_w.drop(columns=["mz_diff", "mz_orig"])
    -1137            .groupby(["scan", "mz"])["intensity"]
    -1138            .sum()
    -1139            .reset_index()
    -1140        )
    -1141        new_data_w = (
    -1142            new_data_w.sort_values(by=["scan", "mz"], ascending=[True, True])
    -1143            .reset_index(drop=True)
    -1144            .copy()
    -1145        )
    -1146
    -1147        # Check if grid worked and return
    -1148        if self.check_if_grid(new_data_w):
    -1149            return new_data_w
    -1150        else:
    -1151            raise ValueError("Gridding failed")
    +1074        Data must be gridded prior to persistent homology calculations.
    +1075
    +1076        Parameters
    +1077        ----------
    +1078        data : DataFrame
    +1079            The input data containing mz, scan, scan_time, and intensity columns.
    +1080
    +1081        Returns
    +1082        -------
    +1083        DataFrame
    +1084            The gridded data with mz, scan, scan_time, and intensity columns.
    +1085
    +1086        Raises
    +1087        ------
    +1088        ValueError
    +1089            If gridding fails.
    +1090        """
    +1091
    +1092        # Calculate the difference between consecutive mz values in a single scan for grid spacing
    +1093        data_w = data.copy().reset_index(drop=True)
    +1094        data_w["mz_diff"] = np.abs(data_w["mz"].diff())
    +1095        mz_diff_min = data_w.groupby("scan")["mz_diff"].min().min() * 0.99999
    +1096
    +1097        # Need high intensity mz values first so they are parents in the output pairs stack
    +1098        dat_mz = data_w[["mz", "intensity"]].sort_values(
    +1099            by=["intensity"], ascending=False
    +1100        )
    +1101        dat_mz = dat_mz[["mz"]].drop_duplicates().reset_index(drop=True).copy()
    +1102
    +1103        # Construct KD tree
    +1104        tree = KDTree(dat_mz.mz.values.reshape(-1, 1))
    +1105        sdm = tree.sparse_distance_matrix(tree, mz_diff_min, output_type="coo_matrix")
    +1106        sdm = sparse.triu(sdm, k=1)
    +1107        sdm.data = np.ones_like(sdm.data)
    +1108        distances = sdm.tocoo()
    +1109        pairs = np.stack((distances.row, distances.col), axis=1)
    +1110
    +1111        # Cull pairs to just get root
    +1112        to_drop = []
    +1113        while len(pairs) > 0:
    +1114            root_parents = np.setdiff1d(np.unique(pairs[:, 0]), np.unique(pairs[:, 1]))
    +1115            id_root_parents = np.isin(pairs[:, 0], root_parents)
    +1116            children_of_roots = np.unique(pairs[id_root_parents, 1])
    +1117            to_drop = np.append(to_drop, children_of_roots)
    +1118
    +1119            # Set up pairs array for next iteration by removing pairs with children or parents already dropped
    +1120            pairs = pairs[~np.isin(pairs[:, 1], to_drop), :]
    +1121            pairs = pairs[~np.isin(pairs[:, 0], to_drop), :]
    +1122        dat_mz = dat_mz.reset_index(drop=True).drop(index=np.array(to_drop))
    +1123        mz_dat_np = (
    +1124            dat_mz[["mz"]]
    +1125            .sort_values(by=["mz"])
    +1126            .reset_index(drop=True)
    +1127            .values.flatten()
    +1128        )
    +1129
    +1130        # Sort data by mz and recast mz to nearest value in mz_dat_np
    +1131        data_w = data_w.sort_values(by=["mz"]).reset_index(drop=True).copy()
    +1132        data_w["mz_new"] = mz_dat_np[find_closest(mz_dat_np, data_w["mz"].values)]
    +1133        data_w["mz_diff"] = np.abs(data_w["mz"] - data_w["mz_new"])
    +1134
    +1135        # Rename mz_new as mz; drop mz_diff; groupby scan and mz and sum intensity
    +1136        new_data_w = data_w.rename(columns={"mz": "mz_orig", "mz_new": "mz"}).copy()
    +1137        new_data_w = (
    +1138            new_data_w.drop(columns=["mz_diff", "mz_orig"])
    +1139            .groupby(["scan", "mz"])["intensity"]
    +1140            .sum()
    +1141            .reset_index()
    +1142        )
    +1143        new_data_w = (
    +1144            new_data_w.sort_values(by=["scan", "mz"], ascending=[True, True])
    +1145            .reset_index(drop=True)
    +1146            .copy()
    +1147        )
    +1148
    +1149        # Check if grid worked and return
    +1150        if self.check_if_grid(new_data_w):
    +1151            return new_data_w
    +1152        else:
    +1153            raise ValueError("Gridding failed")
     
    @@ -4711,142 +4713,141 @@
    Raises
    -
    1153    def find_mass_features_ph(self, ms_level=1, grid=True):
    -1154        """Find mass features within an LCMSBase object using persistent homology.
    -1155
    -1156        Assigns the mass_features attribute to the object (a dictionary of LCMSMassFeature objects, keyed by mass feature id)
    +            
    1155    def find_mass_features_ph(self, ms_level=1, grid=True):
    +1156        """Find mass features within an LCMSBase object using persistent homology.
     1157
    -1158        Parameters
    -1159        ----------
    -1160        ms_level : int, optional
    -1161            The MS level to use. Default is 1.
    -1162        grid : bool, optional
    -1163            If True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded). Default is True.
    -1164
    -1165        Raises
    -1166        ------
    -1167        ValueError
    -1168            If no MS level data is found on the object.
    -1169            If data is not gridded and grid is False.
    -1170
    -1171        Returns
    -1172        -------
    -1173        None, but assigns the mass_features attribute to the object.
    -1174
    -1175        Notes
    -1176        -----
    -1177        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    -1178        """
    -1179        # Check that ms_level is a key in self._ms_uprocessed
    -1180        if ms_level not in self._ms_unprocessed.keys():
    -1181            raise ValueError(
    -1182                "No MS level "
    -1183                + str(ms_level)
    -1184                + " data found, did you instantiate with parser specific to MS level?"
    -1185            )
    -1186
    -1187        # Get ms data
    -1188        data = self._ms_unprocessed[ms_level].copy()
    -1189
    -1190        # Drop rows with missing intensity values and reset index
    -1191        data = data.dropna(subset=["intensity"]).reset_index(drop=True)
    -1192
    -1193
    -1194        # Threshold data
    -1195        dims = ["mz", "scan_time"]
    -1196        threshold = self.parameters.lc_ms.ph_inten_min_rel * data.intensity.max()
    -1197        data_thres = data[data["intensity"] > threshold].reset_index(drop=True).copy()
    -1198
    -1199        # Check if gridded, if not, grid
    -1200        gridded_mz = self.check_if_grid(data_thres)
    -1201        if gridded_mz is False:
    -1202            if grid is False:
    -1203                raise ValueError(
    -1204                    "Data are not gridded in mz dimension, try reprocessing with a different params or grid data before running this function"
    -1205                )
    -1206            else:
    -1207                data_thres = self.grid_data(data_thres)
    -1208
    -1209        # Add build factors and add scan_time
    -1210        data_thres = data_thres.merge(self.scan_df[["scan", "scan_time"]], on="scan")
    -1211        factors = {
    -1212            dim: pd.factorize(data_thres[dim], sort=True)[1].astype(np.float32)
    -1213            for dim in dims
    -1214        }  # this is return a float64 index
    -1215
    -1216        # Build indexes
    -1217        index = {
    -1218            dim: np.searchsorted(factors[dim], data_thres[dim]).astype(np.float32)
    -1219            for dim in factors
    -1220        }
    -1221
    -1222        # Smooth data
    -1223        iterations = self.parameters.lc_ms.ph_smooth_it
    -1224        smooth_radius = [
    -1225            self.parameters.lc_ms.ph_smooth_radius_mz,
    -1226            self.parameters.lc_ms.ph_smooth_radius_scan,
    -1227        ]  # mz, scan_time smoothing radius (in steps)
    -1228
    -1229        index = np.vstack([index[dim] for dim in dims]).T
    -1230        V = data_thres["intensity"].values
    -1231        resid = np.inf
    -1232        for i in range(iterations):
    -1233            # Previous iteration
    -1234            V_prev = V.copy()
    -1235            resid_prev = resid
    -1236            V = self.sparse_mean_filter(index, V, radius=smooth_radius)
    -1237
    -1238            # Calculate residual with previous iteration
    -1239            resid = np.sqrt(np.mean(np.square(V - V_prev)))
    -1240
    -1241            # Evaluate convergence
    -1242            if i > 0:
    -1243                # Percent change in residual
    -1244                test = np.abs(resid - resid_prev) / resid_prev
    -1245
    -1246                # Exit criteria
    -1247                if test <= 0:
    -1248                    break
    -1249
    -1250        # Overwrite values
    -1251        data_thres["intensity"] = V
    -1252
    -1253        # Use persistent homology to find regions of interest
    -1254        pidx, pers = self.sparse_upper_star(index, V)
    -1255        pidx = pidx[pers > 1]
    -1256        pers = pers[pers > 1]
    -1257
    -1258        # Get peaks
    -1259        peaks = data_thres.iloc[pidx, :].reset_index(drop=True)
    -1260
    -1261        # Add persistence column
    -1262        peaks["persistence"] = pers
    -1263        mass_features = peaks.sort_values(
    -1264            by="persistence", ascending=False
    -1265        ).reset_index(drop=True)
    -1266
    -1267        # Filter by persistence threshold
    -1268        persistence_threshold = (
    -1269            self.parameters.lc_ms.ph_persis_min_rel * data.intensity.max()
    -1270        )
    -1271        mass_features = mass_features.loc[
    -1272            mass_features["persistence"] > persistence_threshold, :
    -1273        ].reset_index(drop=True)
    -1274
    -1275        # Rename scan column to apex_scan
    -1276        mass_features = mass_features.rename(
    -1277            columns={"scan": "apex_scan", "scan_time": "retention_time"}
    -1278        )
    -1279
    -1280        # Populate mass_features attribute
    -1281        self.mass_features = {}
    -1282        for row in mass_features.itertuples():
    -1283            row_dict = mass_features.iloc[row.Index].to_dict()
    -1284            lcms_feature = LCMSMassFeature(self, **row_dict)
    -1285            self.mass_features[lcms_feature.id] = lcms_feature
    -1286
    -1287        if self.parameters.lc_ms.verbose_processing:
    -1288            print("Found " + str(len(mass_features)) + " initial mass features")
    +1158        Assigns the mass_features attribute to the object (a dictionary of LCMSMassFeature objects, keyed by mass feature id)
    +1159
    +1160        Parameters
    +1161        ----------
    +1162        ms_level : int, optional
    +1163            The MS level to use. Default is 1.
    +1164        grid : bool, optional
    +1165            If True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded). Default is True.
    +1166
    +1167        Raises
    +1168        ------
    +1169        ValueError
    +1170            If no MS level data is found on the object.
    +1171            If data is not gridded and grid is False.
    +1172
    +1173        Returns
    +1174        -------
    +1175        None, but assigns the mass_features attribute to the object.
    +1176
    +1177        Notes
    +1178        -----
    +1179        This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos
    +1180        """
    +1181        # Check that ms_level is a key in self._ms_uprocessed
    +1182        if ms_level not in self._ms_unprocessed.keys():
    +1183            raise ValueError(
    +1184                "No MS level "
    +1185                + str(ms_level)
    +1186                + " data found, did you instantiate with parser specific to MS level?"
    +1187            )
    +1188
    +1189        # Get ms data
    +1190        data = self._ms_unprocessed[ms_level].copy()
    +1191
    +1192        # Drop rows with missing intensity values and reset index
    +1193        data = data.dropna(subset=["intensity"]).reset_index(drop=True)
    +1194
    +1195        # Threshold data
    +1196        dims = ["mz", "scan_time"]
    +1197        threshold = self.parameters.lc_ms.ph_inten_min_rel * data.intensity.max()
    +1198        data_thres = data[data["intensity"] > threshold].reset_index(drop=True).copy()
    +1199
    +1200        # Check if gridded, if not, grid
    +1201        gridded_mz = self.check_if_grid(data_thres)
    +1202        if gridded_mz is False:
    +1203            if grid is False:
    +1204                raise ValueError(
    +1205                    "Data are not gridded in mz dimension, try reprocessing with a different params or grid data before running this function"
    +1206                )
    +1207            else:
    +1208                data_thres = self.grid_data(data_thres)
    +1209
    +1210        # Add build factors and add scan_time
    +1211        data_thres = data_thres.merge(self.scan_df[["scan", "scan_time"]], on="scan")
    +1212        factors = {
    +1213            dim: pd.factorize(data_thres[dim], sort=True)[1].astype(np.float32)
    +1214            for dim in dims
    +1215        }  # this is return a float64 index
    +1216
    +1217        # Build indexes
    +1218        index = {
    +1219            dim: np.searchsorted(factors[dim], data_thres[dim]).astype(np.float32)
    +1220            for dim in factors
    +1221        }
    +1222
    +1223        # Smooth data
    +1224        iterations = self.parameters.lc_ms.ph_smooth_it
    +1225        smooth_radius = [
    +1226            self.parameters.lc_ms.ph_smooth_radius_mz,
    +1227            self.parameters.lc_ms.ph_smooth_radius_scan,
    +1228        ]  # mz, scan_time smoothing radius (in steps)
    +1229
    +1230        index = np.vstack([index[dim] for dim in dims]).T
    +1231        V = data_thres["intensity"].values
    +1232        resid = np.inf
    +1233        for i in range(iterations):
    +1234            # Previous iteration
    +1235            V_prev = V.copy()
    +1236            resid_prev = resid
    +1237            V = self.sparse_mean_filter(index, V, radius=smooth_radius)
    +1238
    +1239            # Calculate residual with previous iteration
    +1240            resid = np.sqrt(np.mean(np.square(V - V_prev)))
    +1241
    +1242            # Evaluate convergence
    +1243            if i > 0:
    +1244                # Percent change in residual
    +1245                test = np.abs(resid - resid_prev) / resid_prev
    +1246
    +1247                # Exit criteria
    +1248                if test <= 0:
    +1249                    break
    +1250
    +1251        # Overwrite values
    +1252        data_thres["intensity"] = V
    +1253
    +1254        # Use persistent homology to find regions of interest
    +1255        pidx, pers = self.sparse_upper_star(index, V)
    +1256        pidx = pidx[pers > 1]
    +1257        pers = pers[pers > 1]
    +1258
    +1259        # Get peaks
    +1260        peaks = data_thres.iloc[pidx, :].reset_index(drop=True)
    +1261
    +1262        # Add persistence column
    +1263        peaks["persistence"] = pers
    +1264        mass_features = peaks.sort_values(
    +1265            by="persistence", ascending=False
    +1266        ).reset_index(drop=True)
    +1267
    +1268        # Filter by persistence threshold
    +1269        persistence_threshold = (
    +1270            self.parameters.lc_ms.ph_persis_min_rel * data.intensity.max()
    +1271        )
    +1272        mass_features = mass_features.loc[
    +1273            mass_features["persistence"] > persistence_threshold, :
    +1274        ].reset_index(drop=True)
    +1275
    +1276        # Rename scan column to apex_scan
    +1277        mass_features = mass_features.rename(
    +1278            columns={"scan": "apex_scan", "scan_time": "retention_time"}
    +1279        )
    +1280
    +1281        # Populate mass_features attribute
    +1282        self.mass_features = {}
    +1283        for row in mass_features.itertuples():
    +1284            row_dict = mass_features.iloc[row.Index].to_dict()
    +1285            lcms_feature = LCMSMassFeature(self, **row_dict)
    +1286            self.mass_features[lcms_feature.id] = lcms_feature
    +1287
    +1288        if self.parameters.lc_ms.verbose_processing:
    +1289            print("Found " + str(len(mass_features)) + " initial mass features")
     
    @@ -4894,141 +4895,139 @@
    Notes
    -
    1290    def cluster_mass_features(
    -1291        self, drop_children=True, sort_by="persistence"
    -1292    ):
    -1293        """Cluster mass features
    -1294
    -1295        Based on their proximity in the mz and scan_time dimensions, priorizies the mass features with the highest persistence.
    -1296
    -1297        Parameters
    -1298        ----------
    -1299        drop_children : bool, optional
    -1300            Whether to drop the mass features that are not cluster parents. Default is True.
    -1301        sort_by : str, optional
    -1302            The column to sort the mass features by, this will determine which mass features get rolled up into a parent mass feature. Default is "persistence".
    -1303
    -1304        Raises
    -1305        ------
    -1306        ValueError
    -1307            If no mass features are found.
    -1308            If too many mass features are found.
    -1309
    -1310        Returns
    -1311        -------
    -1312        None if drop_children is True, otherwise returns a list of mass feature ids that are not cluster parents.
    -1313        """
    -1314        verbose = self.parameters.lc_ms.verbose_processing
    -1315
    -1316        if self.mass_features is None:
    -1317            raise ValueError("No mass features found, run find_mass_features() first")
    -1318        if len(self.mass_features) > 400000:
    -1319            raise ValueError(
    -1320                "Too many mass featuers of interest found, run find_mass_features() with a higher intensity threshold"
    -1321            )
    -1322        dims = ["mz", "scan_time"]
    -1323        mf_df_og = self.mass_features_to_df()
    -1324        mf_df = mf_df_og.copy()
    -1325
    -1326        # Sort mass features by sort_by column, make mf_id its own column for easier bookkeeping
    -1327        mf_df = mf_df.sort_values(by=sort_by, ascending=False).reset_index(drop=False)
    -1328
    -1329        tol = [
    -1330            self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel,
    -1331            self.parameters.lc_ms.mass_feature_cluster_rt_tolerance,
    -1332        ]  # mz, in relative; scan_time in minutes
    -1333        relative = [True, False]
    -1334
    -1335        # Compute inter-feature distances
    -1336        distances = None
    -1337        for i in range(len(dims)):
    -1338            # Construct k-d tree
    -1339            values = mf_df[dims[i]].values
    -1340            tree = KDTree(values.reshape(-1, 1))
    -1341
    -1342            max_tol = tol[i]
    -1343            if relative[i] is True:
    -1344                # Maximum absolute tolerance
    -1345                max_tol = tol[i] * values.max()
    -1346
    -1347            # Compute sparse distance matrix
    -1348            # the larger the max_tol, the slower this operation is
    -1349            sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix")
    -1350
    -1351            # Only consider forward case, exclude diagonal
    -1352            sdm = sparse.triu(sdm, k=1)
    -1353
    -1354            # Filter relative distances
    -1355            if relative[i] is True:
    -1356                # Compute relative distances
    -1357                rel_dists = sdm.data / values[sdm.row]  # or col?
    -1358
    -1359                # Indices of relative distances less than tolerance
    -1360                idx = rel_dists <= tol[i]
    -1361
    -1362                # Reconstruct sparse distance matrix
    -1363                sdm = sparse.coo_matrix(
    -1364                    (rel_dists[idx], (sdm.row[idx], sdm.col[idx])),
    -1365                    shape=(len(values), len(values)),
    -1366                )
    -1367
    -1368            # Cast as binary matrix
    -1369            sdm.data = np.ones_like(sdm.data)
    -1370
    -1371            # Stack distances
    -1372            if distances is None:
    -1373                distances = sdm
    -1374            else:
    -1375                distances = distances.multiply(sdm)
    -1376
    -1377        # Extract indices of within-tolerance points
    -1378        distances = distances.tocoo()
    -1379        pairs = np.stack((distances.row, distances.col), axis=1)
    -1380        pairs_df = pd.DataFrame(pairs, columns=["parent", "child"])
    -1381        pairs_df = pairs_df.set_index("parent")
    -1382
    -1383        to_drop = []
    -1384        while not pairs_df.empty:
    -1385            # Find root_parents and their children
    -1386            root_parents = np.setdiff1d(np.unique(pairs_df.index.values), np.unique(pairs_df.child.values))
    -1387            children_of_roots = pairs_df.loc[root_parents, "child"].unique()
    -1388            to_drop = np.append(to_drop, children_of_roots)
    -1389
    -1390            # Remove root_children as possible parents from pairs_df for next iteration
    -1391            pairs_df = pairs_df.drop(
    -1392                index=children_of_roots, errors="ignore"
    -1393            )  
    -1394            pairs_df = pairs_df.reset_index().set_index("child")
    -1395            # Remove root_children as possible children from pairs_df for next iteration
    -1396            pairs_df = pairs_df.drop(index=children_of_roots)
    -1397
    -1398            # Prepare for next iteration
    -1399            pairs_df = pairs_df.reset_index().set_index("parent")
    -1400
    -1401        # Drop mass features that are not cluster parents
    -1402        mf_df = mf_df.drop(index=np.array(to_drop))
    -1403
    -1404        # Set index back to mf_id
    -1405        mf_df = mf_df.set_index("mf_id")
    -1406        if verbose:
    -1407            print(str(len(mf_df)) + " mass features remaining")
    -1408
    -1409        mf_df_new = mf_df_og.copy()
    -1410        mf_df_new["cluster_parent"] = np.where(
    -1411            np.isin(mf_df_new.index, mf_df.index), True, False
    -1412        )
    -1413
    -1414        # get mass feature ids of features that are not cluster parents
    -1415        cluster_daughters = mf_df_new[mf_df_new["cluster_parent"] == False].index.values
    -1416        if drop_children is True:
    -1417            # Drop mass features that are not cluster parents from self
    -1418            self.mass_features = {
    -1419                k: v
    -1420                for k, v in self.mass_features.items()
    -1421                if k not in cluster_daughters
    -1422            }
    -1423        else:
    -1424            return cluster_daughters
    +            
    1291    def cluster_mass_features(self, drop_children=True, sort_by="persistence"):
    +1292        """Cluster mass features
    +1293
    +1294        Based on their proximity in the mz and scan_time dimensions, priorizies the mass features with the highest persistence.
    +1295
    +1296        Parameters
    +1297        ----------
    +1298        drop_children : bool, optional
    +1299            Whether to drop the mass features that are not cluster parents. Default is True.
    +1300        sort_by : str, optional
    +1301            The column to sort the mass features by, this will determine which mass features get rolled up into a parent mass feature. Default is "persistence".
    +1302
    +1303        Raises
    +1304        ------
    +1305        ValueError
    +1306            If no mass features are found.
    +1307            If too many mass features are found.
    +1308
    +1309        Returns
    +1310        -------
    +1311        None if drop_children is True, otherwise returns a list of mass feature ids that are not cluster parents.
    +1312        """
    +1313        verbose = self.parameters.lc_ms.verbose_processing
    +1314
    +1315        if self.mass_features is None:
    +1316            raise ValueError("No mass features found, run find_mass_features() first")
    +1317        if len(self.mass_features) > 400000:
    +1318            raise ValueError(
    +1319                "Too many mass featuers of interest found, run find_mass_features() with a higher intensity threshold"
    +1320            )
    +1321        dims = ["mz", "scan_time"]
    +1322        mf_df_og = self.mass_features_to_df()
    +1323        mf_df = mf_df_og.copy()
    +1324
    +1325        # Sort mass features by sort_by column, make mf_id its own column for easier bookkeeping
    +1326        mf_df = mf_df.sort_values(by=sort_by, ascending=False).reset_index(drop=False)
    +1327
    +1328        tol = [
    +1329            self.parameters.lc_ms.mass_feature_cluster_mz_tolerance_rel,
    +1330            self.parameters.lc_ms.mass_feature_cluster_rt_tolerance,
    +1331        ]  # mz, in relative; scan_time in minutes
    +1332        relative = [True, False]
    +1333
    +1334        # Compute inter-feature distances
    +1335        distances = None
    +1336        for i in range(len(dims)):
    +1337            # Construct k-d tree
    +1338            values = mf_df[dims[i]].values
    +1339            tree = KDTree(values.reshape(-1, 1))
    +1340
    +1341            max_tol = tol[i]
    +1342            if relative[i] is True:
    +1343                # Maximum absolute tolerance
    +1344                max_tol = tol[i] * values.max()
    +1345
    +1346            # Compute sparse distance matrix
    +1347            # the larger the max_tol, the slower this operation is
    +1348            sdm = tree.sparse_distance_matrix(tree, max_tol, output_type="coo_matrix")
    +1349
    +1350            # Only consider forward case, exclude diagonal
    +1351            sdm = sparse.triu(sdm, k=1)
    +1352
    +1353            # Filter relative distances
    +1354            if relative[i] is True:
    +1355                # Compute relative distances
    +1356                rel_dists = sdm.data / values[sdm.row]  # or col?
    +1357
    +1358                # Indices of relative distances less than tolerance
    +1359                idx = rel_dists <= tol[i]
    +1360
    +1361                # Reconstruct sparse distance matrix
    +1362                sdm = sparse.coo_matrix(
    +1363                    (rel_dists[idx], (sdm.row[idx], sdm.col[idx])),
    +1364                    shape=(len(values), len(values)),
    +1365                )
    +1366
    +1367            # Cast as binary matrix
    +1368            sdm.data = np.ones_like(sdm.data)
    +1369
    +1370            # Stack distances
    +1371            if distances is None:
    +1372                distances = sdm
    +1373            else:
    +1374                distances = distances.multiply(sdm)
    +1375
    +1376        # Extract indices of within-tolerance points
    +1377        distances = distances.tocoo()
    +1378        pairs = np.stack((distances.row, distances.col), axis=1)
    +1379        pairs_df = pd.DataFrame(pairs, columns=["parent", "child"])
    +1380        pairs_df = pairs_df.set_index("parent")
    +1381
    +1382        to_drop = []
    +1383        while not pairs_df.empty:
    +1384            # Find root_parents and their children
    +1385            root_parents = np.setdiff1d(
    +1386                np.unique(pairs_df.index.values), np.unique(pairs_df.child.values)
    +1387            )
    +1388            children_of_roots = pairs_df.loc[root_parents, "child"].unique()
    +1389            to_drop = np.append(to_drop, children_of_roots)
    +1390
    +1391            # Remove root_children as possible parents from pairs_df for next iteration
    +1392            pairs_df = pairs_df.drop(index=children_of_roots, errors="ignore")
    +1393            pairs_df = pairs_df.reset_index().set_index("child")
    +1394            # Remove root_children as possible children from pairs_df for next iteration
    +1395            pairs_df = pairs_df.drop(index=children_of_roots)
    +1396
    +1397            # Prepare for next iteration
    +1398            pairs_df = pairs_df.reset_index().set_index("parent")
    +1399
    +1400        # Drop mass features that are not cluster parents
    +1401        mf_df = mf_df.drop(index=np.array(to_drop))
    +1402
    +1403        # Set index back to mf_id
    +1404        mf_df = mf_df.set_index("mf_id")
    +1405        if verbose:
    +1406            print(str(len(mf_df)) + " mass features remaining")
    +1407
    +1408        mf_df_new = mf_df_og.copy()
    +1409        mf_df_new["cluster_parent"] = np.where(
    +1410            np.isin(mf_df_new.index, mf_df.index), True, False
    +1411        )
    +1412
    +1413        # get mass feature ids of features that are not cluster parents
    +1414        cluster_daughters = mf_df_new[mf_df_new["cluster_parent"] == False].index.values
    +1415        if drop_children is True:
    +1416            # Drop mass features that are not cluster parents from self
    +1417            self.mass_features = {
    +1418                k: v
    +1419                for k, v in self.mass_features.items()
    +1420                if k not in cluster_daughters
    +1421            }
    +1422        else:
    +1423            return cluster_daughters
     
    diff --git a/docs/corems/mass_spectra/calc/MZSearch.html b/docs/corems/mass_spectra/calc/MZSearch.html index 1b7e101d..ebfd7928 100644 --- a/docs/corems/mass_spectra/calc/MZSearch.html +++ b/docs/corems/mass_spectra/calc/MZSearch.html @@ -124,258 +124,248 @@

    5from dataclasses import dataclass 6from typing import List 7 - 8@dataclass - 9class SearchResults: - 10 + 8 + 9@dataclass + 10class SearchResults: 11 calculated_mz: float 12 exp_mz: float 13 error: float 14 tolerance: float 15 - 16class MZSearch(Thread): - 17 - 18 def __init__(self, exp_mzs: List[float], calculated_mzs: List[float], tolerance, method="ppm", average_target_mz=True): - 19 ''' - 20 Parameters - 21 ---------- - 22 calculated_mzs: [float] calculated m/z - 23 exp_mzs: [float] experimental m/z - 24 method: string, - 25 ppm or ppb - 26 call run to trigger the m/z search algorithm - 27 or start if using it as thread - 28 ''' - 29 Thread.__init__(self) - 30 # placeholder for the results - 31 self._matched_mz = {} - 32 - 33 self._calculated_mzs = calculated_mzs - 34 - 35 self._matched_mz = {} - 36 - 37 self._averaged_target_mz = [] - 38 - 39 self._exp_mzs = exp_mzs - 40 - 41 self._tolerance = tolerance - 42 self.method = method + 16 + 17class MZSearch(Thread): + 18 def __init__( + 19 self, + 20 exp_mzs: List[float], + 21 calculated_mzs: List[float], + 22 tolerance, + 23 method="ppm", + 24 average_target_mz=True, + 25 ): + 26 """ + 27 Parameters + 28 ---------- + 29 calculated_mzs: [float] calculated m/z + 30 exp_mzs: [float] experimental m/z + 31 method: string, + 32 ppm or ppb + 33 call run to trigger the m/z search algorithm + 34 or start if using it as thread + 35 """ + 36 Thread.__init__(self) + 37 # placeholder for the results + 38 self._matched_mz = {} + 39 + 40 self._calculated_mzs = calculated_mzs + 41 + 42 self._matched_mz = {} 43 - 44 if average_target_mz: - 45 self.colapse_calculated() - 46 - 47 @property - 48 def results(self): - 49 ''' {calculated_mz: [SearchResults]} - 50 contains the results of the search - 51 ''' - 52 return self._matched_mz + 44 self._averaged_target_mz = [] + 45 + 46 self._exp_mzs = exp_mzs + 47 + 48 self._tolerance = tolerance + 49 self.method = method + 50 + 51 if average_target_mz: + 52 self.colapse_calculated() 53 54 @property - 55 def averaged_target_mz(self): - 56 ''' [float] - 57 contains the average target m/z to be searched against - 58 ''' - 59 return self._averaged_target_mz + 55 def results(self): + 56 """{calculated_mz: [SearchResults]} + 57 contains the results of the search + 58 """ + 59 return self._matched_mz 60 61 @property - 62 def calculated_mzs(self): - 63 ''' [float] - 64 contains the mz target to be searched against - 65 ''' - 66 if self.averaged_target_mz: - 67 return sorted(self.averaged_target_mz) - 68 else: - 69 - 70 return sorted(list(self._calculated_mzs)) - 71 - 72 @property - 73 def exp_mzs(self): - 74 ''' [float] - 75 contains the exp mz to be searched against - 76 ''' - 77 return self._exp_mzs - 78 - 79 @property - 80 def method(self): - 81 return self._method - 82 - 83 @method.setter - 84 def method(self, method): - 85 ''' - 86 method: string, - 87 ppm or ppb - 88 ''' - 89 if method not in ['ppm' or 'ppb']: - 90 raise ValueError("Method should be ppm or ppb") - 91 self._method = method - 92 - 93 @property - 94 def tolerance(self): - 95 return self._tolerance - 96 - 97 @tolerance.setter - 98 def tolerance(self, tolerance): - 99 ''' -100 method: string, -101 ppm or ppb -102 ''' -103 if tolerance < 0: -104 raise ValueError("Tolerance needs to be a positive number") -105 self._tolerance = tolerance -106 -107 def colapse_calculated(self): -108 -109 if len(self.calculated_mzs) > 1: -110 all_mz = [] -111 subset = set() -112 -113 i = -1 -114 while True: -115 -116 i = i +1 -117 -118 if i == len(self.calculated_mzs)-1: -119 all_mz.append({i}) -120 #print(i, 'break1') -121 break -122 -123 if i >= len(self.calculated_mzs)-1: -124 #print(i, 'break2') + 62 def averaged_target_mz(self): + 63 """[float] + 64 contains the average target m/z to be searched against + 65 """ + 66 return self._averaged_target_mz + 67 + 68 @property + 69 def calculated_mzs(self): + 70 """[float] + 71 contains the mz target to be searched against + 72 """ + 73 if self.averaged_target_mz: + 74 return sorted(self.averaged_target_mz) + 75 else: + 76 return sorted(list(self._calculated_mzs)) + 77 + 78 @property + 79 def exp_mzs(self): + 80 """[float] + 81 contains the exp mz to be searched against + 82 """ + 83 return self._exp_mzs + 84 + 85 @property + 86 def method(self): + 87 return self._method + 88 + 89 @method.setter + 90 def method(self, method): + 91 """ + 92 method: string, + 93 ppm or ppb + 94 """ + 95 if method not in ["ppm" or "ppb"]: + 96 raise ValueError("Method should be ppm or ppb") + 97 self._method = method + 98 + 99 @property +100 def tolerance(self): +101 return self._tolerance +102 +103 @tolerance.setter +104 def tolerance(self, tolerance): +105 """ +106 method: string, +107 ppm or ppb +108 """ +109 if tolerance < 0: +110 raise ValueError("Tolerance needs to be a positive number") +111 self._tolerance = tolerance +112 +113 def colapse_calculated(self): +114 if len(self.calculated_mzs) > 1: +115 all_mz = [] +116 subset = set() +117 +118 i = -1 +119 while True: +120 i = i + 1 +121 +122 if i == len(self.calculated_mzs) - 1: +123 all_mz.append({i}) +124 # print(i, 'break1') 125 break 126 -127 error = self.calc_mz_error(self.calculated_mzs[i], self.calculated_mzs[i+1]) -128 -129 #print(self.tolerance) +127 if i >= len(self.calculated_mzs) - 1: +128 # print(i, 'break2') +129 break 130 -131 check_error = self.check_ppm_error(self.tolerance, error) -132 -133 if not check_error: -134 start_list = {i} -135 -136 else: -137 -138 start_list = set() -139 -140 while check_error: -141 -142 start_list.add(i) -143 start_list.add(i+1) +131 error = self.calc_mz_error( +132 self.calculated_mzs[i], self.calculated_mzs[i + 1] +133 ) +134 +135 # print(self.tolerance) +136 +137 check_error = self.check_ppm_error(self.tolerance, error) +138 +139 if not check_error: +140 start_list = {i} +141 +142 else: +143 start_list = set() 144 -145 i = i + 1 -146 -147 if i == len(self.calculated_mzs)-1: -148 start_list.add(i) -149 #print(i, 'break3') -150 break -151 -152 -153 error = self.calc_mz_error(self.calculated_mzs[i], self.calculated_mzs[i+1]) -154 check_error = self.check_ppm_error(self.tolerance, error) -155 -156 if start_list: -157 all_mz.append(start_list) -158 -159 results = [] -160 for each in all_mz: -161 #print(each) -162 mzs = [self.calculated_mzs[i] for i in each] -163 results.append(sum(mzs)/len(mzs)) -164 -165 #print(results) -166 self._averaged_target_mz = results -167 -168 -169 def run(self): -170 -171 dict_nominal_exp_mz = self.get_nominal_exp(self.exp_mzs) +145 while check_error: +146 start_list.add(i) +147 start_list.add(i + 1) +148 +149 i = i + 1 +150 +151 if i == len(self.calculated_mzs) - 1: +152 start_list.add(i) +153 # print(i, 'break3') +154 break +155 +156 error = self.calc_mz_error( +157 self.calculated_mzs[i], self.calculated_mzs[i + 1] +158 ) +159 check_error = self.check_ppm_error(self.tolerance, error) +160 +161 if start_list: +162 all_mz.append(start_list) +163 +164 results = [] +165 for each in all_mz: +166 # print(each) +167 mzs = [self.calculated_mzs[i] for i in each] +168 results.append(sum(mzs) / len(mzs)) +169 +170 # print(results) +171 self._averaged_target_mz = results 172 -173 for calculated_mz in self.calculated_mzs: -174 -175 nominal_selected_mz = int(calculated_mz) -176 -177 if nominal_selected_mz in dict_nominal_exp_mz.keys(): +173 def run(self): +174 dict_nominal_exp_mz = self.get_nominal_exp(self.exp_mzs) +175 +176 for calculated_mz in self.calculated_mzs: +177 nominal_selected_mz = int(calculated_mz) 178 -179 self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, 0) -180 -181 elif nominal_selected_mz - 1 in dict_nominal_exp_mz.keys(): -182 +179 if nominal_selected_mz in dict_nominal_exp_mz.keys(): +180 self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, 0) +181 +182 elif nominal_selected_mz - 1 in dict_nominal_exp_mz.keys(): 183 self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, -1) 184 185 elif nominal_selected_mz + 1 in dict_nominal_exp_mz.keys(): -186 -187 self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, +1) -188 -189 else: +186 self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, +1) +187 +188 else: +189 continue 190 -191 continue -192 -193 @staticmethod -194 def calc_mz_error(calculated_mz, exp_mz, method='ppm'): -195 ''' -196 Parameters -197 ---------- -198 calculated_mz: float, -199 exp_mz:float -200 method: string, -201 ppm or ppb -202 ''' -203 if method == 'ppm': -204 multi_factor = 1000000 -205 -206 elif method == 'ppb': -207 multi_factor = 1000000 -208 -209 else: -210 raise Exception("method needs to be ppm or ppb, \ -211 you have entered %s" % method) -212 -213 return ((exp_mz - calculated_mz) / calculated_mz) * multi_factor -214 -215 @staticmethod -216 def check_ppm_error(tolerance, error): -217 return True if -tolerance <= error <= tolerance else False -218 -219 def get_nominal_exp(self, exp_mzs) -> dict: -220 +191 @staticmethod +192 def calc_mz_error(calculated_mz, exp_mz, method="ppm"): +193 """ +194 Parameters +195 ---------- +196 calculated_mz: float, +197 exp_mz:float +198 method: string, +199 ppm or ppb +200 """ +201 if method == "ppm": +202 multi_factor = 1000000 +203 +204 elif method == "ppb": +205 multi_factor = 1000000 +206 +207 else: +208 raise Exception( +209 "method needs to be ppm or ppb, \ +210 you have entered %s" +211 % method +212 ) +213 +214 return ((exp_mz - calculated_mz) / calculated_mz) * multi_factor +215 +216 @staticmethod +217 def check_ppm_error(tolerance, error): +218 return True if -tolerance <= error <= tolerance else False +219 +220 def get_nominal_exp(self, exp_mzs) -> dict: 221 dict_nominal_exp_mz = {} 222 223 for exp_mz in exp_mzs: -224 -225 nominal_mz = int(exp_mz) -226 -227 if nominal_mz not in dict_nominal_exp_mz.keys(): -228 dict_nominal_exp_mz[int(exp_mz)] = [exp_mz] -229 else: -230 dict_nominal_exp_mz[int(exp_mz)].append(exp_mz) -231 -232 return dict_nominal_exp_mz -233 -234 def search_mz(self, results, dict_nominal_exp_mz, calculated_mz, offset) -> None: -235 -236 nominal_calculated_mz = int(calculated_mz) + offset -237 matched_n_precursors = dict_nominal_exp_mz.get(nominal_calculated_mz) -238 -239 for precursor_mz in matched_n_precursors: -240 -241 error = self.calc_mz_error(calculated_mz, precursor_mz, -242 method=self.method) -243 -244 if self.check_ppm_error(self.tolerance, error): -245 -246 new_match = SearchResults(calculated_mz, -247 precursor_mz, -248 error, -249 self.tolerance) -250 -251 if calculated_mz not in results.keys(): -252 -253 results[calculated_mz] = [new_match] -254 -255 else: -256 -257 results[calculated_mz].append(new_match) -258 -259 +224 nominal_mz = int(exp_mz) +225 +226 if nominal_mz not in dict_nominal_exp_mz.keys(): +227 dict_nominal_exp_mz[int(exp_mz)] = [exp_mz] +228 else: +229 dict_nominal_exp_mz[int(exp_mz)].append(exp_mz) +230 +231 return dict_nominal_exp_mz +232 +233 def search_mz(self, results, dict_nominal_exp_mz, calculated_mz, offset) -> None: +234 nominal_calculated_mz = int(calculated_mz) + offset +235 matched_n_precursors = dict_nominal_exp_mz.get(nominal_calculated_mz) +236 +237 for precursor_mz in matched_n_precursors: +238 error = self.calc_mz_error(calculated_mz, precursor_mz, method=self.method) +239 +240 if self.check_ppm_error(self.tolerance, error): +241 new_match = SearchResults( +242 calculated_mz, precursor_mz, error, self.tolerance +243 ) +244 +245 if calculated_mz not in results.keys(): +246 results[calculated_mz] = [new_match] +247 +248 else: +249 results[calculated_mz].append(new_match)

    @@ -392,9 +382,8 @@

    -
     9@dataclass
    -10class SearchResults:
    -11
    +            
    10@dataclass
    +11class SearchResults:
     12    calculated_mz: float
     13    exp_mz: float
     14    error: float
    @@ -472,248 +461,239 @@ 

    -
     17class MZSearch(Thread):
    - 18
    - 19    def __init__(self, exp_mzs: List[float], calculated_mzs: List[float], tolerance, method="ppm", average_target_mz=True):
    - 20        '''
    - 21        Parameters
    - 22        ----------
    - 23        calculated_mzs: [float] calculated m/z
    - 24        exp_mzs: [float] experimental m/z
    - 25        method: string,
    - 26            ppm or ppb
    - 27        call run to trigger the m/z search algorithm
    - 28        or start if using it as thread
    - 29        '''
    - 30        Thread.__init__(self)
    - 31        # placeholder for the results
    - 32        self._matched_mz = {}
    - 33
    - 34        self._calculated_mzs = calculated_mzs
    - 35
    - 36        self._matched_mz = {}
    - 37
    - 38        self._averaged_target_mz = []
    - 39
    - 40        self._exp_mzs = exp_mzs
    - 41        
    - 42        self._tolerance = tolerance
    - 43        self.method = method
    +            
     18class MZSearch(Thread):
    + 19    def __init__(
    + 20        self,
    + 21        exp_mzs: List[float],
    + 22        calculated_mzs: List[float],
    + 23        tolerance,
    + 24        method="ppm",
    + 25        average_target_mz=True,
    + 26    ):
    + 27        """
    + 28        Parameters
    + 29        ----------
    + 30        calculated_mzs: [float] calculated m/z
    + 31        exp_mzs: [float] experimental m/z
    + 32        method: string,
    + 33            ppm or ppb
    + 34        call run to trigger the m/z search algorithm
    + 35        or start if using it as thread
    + 36        """
    + 37        Thread.__init__(self)
    + 38        # placeholder for the results
    + 39        self._matched_mz = {}
    + 40
    + 41        self._calculated_mzs = calculated_mzs
    + 42
    + 43        self._matched_mz = {}
      44
    - 45        if average_target_mz:
    - 46            self.colapse_calculated()
    - 47
    - 48    @property
    - 49    def results(self):
    - 50        ''' {calculated_mz: [SearchResults]}
    - 51            contains the results of the search
    - 52        '''
    - 53        return self._matched_mz
    + 45        self._averaged_target_mz = []
    + 46
    + 47        self._exp_mzs = exp_mzs
    + 48
    + 49        self._tolerance = tolerance
    + 50        self.method = method
    + 51
    + 52        if average_target_mz:
    + 53            self.colapse_calculated()
      54
      55    @property
    - 56    def averaged_target_mz(self):
    - 57        ''' [float]
    - 58            contains the average target m/z to be searched against
    - 59        '''
    - 60        return self._averaged_target_mz
    + 56    def results(self):
    + 57        """{calculated_mz: [SearchResults]}
    + 58        contains the results of the search
    + 59        """
    + 60        return self._matched_mz
      61
      62    @property
    - 63    def calculated_mzs(self):
    - 64        ''' [float]
    - 65            contains the mz target to be searched against
    - 66        '''
    - 67        if self.averaged_target_mz:
    - 68            return sorted(self.averaged_target_mz)
    - 69        else:    
    - 70            
    - 71            return sorted(list(self._calculated_mzs))
    - 72
    - 73    @property
    - 74    def exp_mzs(self):
    - 75        ''' [float]
    - 76            contains the exp mz to be searched against
    - 77        '''
    - 78        return self._exp_mzs
    - 79
    - 80    @property
    - 81    def method(self):
    - 82        return self._method
    - 83
    - 84    @method.setter
    - 85    def method(self, method):
    - 86        '''
    - 87         method: string,
    - 88            ppm or ppb
    - 89        '''
    - 90        if method not in ['ppm' or 'ppb']:
    - 91            raise ValueError("Method should be ppm or ppb")
    - 92        self._method = method
    - 93
    - 94    @property
    - 95    def tolerance(self):
    - 96        return self._tolerance
    - 97
    - 98    @tolerance.setter
    - 99    def tolerance(self, tolerance):
    -100        '''
    -101         method: string,
    -102            ppm or ppb
    -103        '''
    -104        if tolerance < 0:
    -105            raise ValueError("Tolerance needs to be a positive number")
    -106        self._tolerance = tolerance
    -107
    -108    def colapse_calculated(self):
    -109        
    -110        if len(self.calculated_mzs) > 1:
    -111            all_mz = []
    -112            subset = set()
    -113            
    -114            i = -1
    -115            while True:
    -116                
    -117                i = i +1
    -118                
    -119                if i == len(self.calculated_mzs)-1:
    -120                    all_mz.append({i})
    -121                    #print(i, 'break1')
    -122                    break
    -123                
    -124                if i >= len(self.calculated_mzs)-1:
    -125                    #print(i, 'break2')
    + 63    def averaged_target_mz(self):
    + 64        """[float]
    + 65        contains the average target m/z to be searched against
    + 66        """
    + 67        return self._averaged_target_mz
    + 68
    + 69    @property
    + 70    def calculated_mzs(self):
    + 71        """[float]
    + 72        contains the mz target to be searched against
    + 73        """
    + 74        if self.averaged_target_mz:
    + 75            return sorted(self.averaged_target_mz)
    + 76        else:
    + 77            return sorted(list(self._calculated_mzs))
    + 78
    + 79    @property
    + 80    def exp_mzs(self):
    + 81        """[float]
    + 82        contains the exp mz to be searched against
    + 83        """
    + 84        return self._exp_mzs
    + 85
    + 86    @property
    + 87    def method(self):
    + 88        return self._method
    + 89
    + 90    @method.setter
    + 91    def method(self, method):
    + 92        """
    + 93        method: string,
    + 94           ppm or ppb
    + 95        """
    + 96        if method not in ["ppm" or "ppb"]:
    + 97            raise ValueError("Method should be ppm or ppb")
    + 98        self._method = method
    + 99
    +100    @property
    +101    def tolerance(self):
    +102        return self._tolerance
    +103
    +104    @tolerance.setter
    +105    def tolerance(self, tolerance):
    +106        """
    +107        method: string,
    +108           ppm or ppb
    +109        """
    +110        if tolerance < 0:
    +111            raise ValueError("Tolerance needs to be a positive number")
    +112        self._tolerance = tolerance
    +113
    +114    def colapse_calculated(self):
    +115        if len(self.calculated_mzs) > 1:
    +116            all_mz = []
    +117            subset = set()
    +118
    +119            i = -1
    +120            while True:
    +121                i = i + 1
    +122
    +123                if i == len(self.calculated_mzs) - 1:
    +124                    all_mz.append({i})
    +125                    # print(i, 'break1')
     126                    break
     127
    -128                error = self.calc_mz_error(self.calculated_mzs[i], self.calculated_mzs[i+1])
    -129                
    -130                #print(self.tolerance)
    +128                if i >= len(self.calculated_mzs) - 1:
    +129                    # print(i, 'break2')
    +130                    break
     131
    -132                check_error = self.check_ppm_error(self.tolerance, error)
    -133                
    -134                if not check_error:
    -135                    start_list = {i}
    -136
    -137                else:
    -138
    -139                    start_list = set()
    -140
    -141                while check_error:
    -142                    
    -143                    start_list.add(i)
    -144                    start_list.add(i+1)
    +132                error = self.calc_mz_error(
    +133                    self.calculated_mzs[i], self.calculated_mzs[i + 1]
    +134                )
    +135
    +136                # print(self.tolerance)
    +137
    +138                check_error = self.check_ppm_error(self.tolerance, error)
    +139
    +140                if not check_error:
    +141                    start_list = {i}
    +142
    +143                else:
    +144                    start_list = set()
     145
    -146                    i = i + 1    
    -147                    
    -148                    if i == len(self.calculated_mzs)-1:
    -149                        start_list.add(i)
    -150                        #print(i, 'break3')
    -151                        break
    -152
    -153                    
    -154                    error = self.calc_mz_error(self.calculated_mzs[i], self.calculated_mzs[i+1])
    -155                    check_error = self.check_ppm_error(self.tolerance, error)
    -156                
    -157                if start_list:
    -158                    all_mz.append(start_list)
    -159            
    -160            results = []
    -161            for each in all_mz:
    -162                #print(each)
    -163                mzs = [self.calculated_mzs[i] for i in each]
    -164                results.append(sum(mzs)/len(mzs))
    -165            
    -166            #print(results)
    -167            self._averaged_target_mz = results
    -168            
    -169
    -170    def run(self):
    -171
    -172        dict_nominal_exp_mz = self.get_nominal_exp(self.exp_mzs)
    +146                while check_error:
    +147                    start_list.add(i)
    +148                    start_list.add(i + 1)
    +149
    +150                    i = i + 1
    +151
    +152                    if i == len(self.calculated_mzs) - 1:
    +153                        start_list.add(i)
    +154                        # print(i, 'break3')
    +155                        break
    +156
    +157                    error = self.calc_mz_error(
    +158                        self.calculated_mzs[i], self.calculated_mzs[i + 1]
    +159                    )
    +160                    check_error = self.check_ppm_error(self.tolerance, error)
    +161
    +162                if start_list:
    +163                    all_mz.append(start_list)
    +164
    +165            results = []
    +166            for each in all_mz:
    +167                # print(each)
    +168                mzs = [self.calculated_mzs[i] for i in each]
    +169                results.append(sum(mzs) / len(mzs))
    +170
    +171            # print(results)
    +172            self._averaged_target_mz = results
     173
    -174        for calculated_mz in self.calculated_mzs:
    -175
    -176            nominal_selected_mz = int(calculated_mz)
    -177
    -178            if nominal_selected_mz in dict_nominal_exp_mz.keys():
    +174    def run(self):
    +175        dict_nominal_exp_mz = self.get_nominal_exp(self.exp_mzs)
    +176
    +177        for calculated_mz in self.calculated_mzs:
    +178            nominal_selected_mz = int(calculated_mz)
     179
    -180                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, 0)
    -181
    -182            elif nominal_selected_mz - 1 in dict_nominal_exp_mz.keys():
    -183
    +180            if nominal_selected_mz in dict_nominal_exp_mz.keys():
    +181                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, 0)
    +182
    +183            elif nominal_selected_mz - 1 in dict_nominal_exp_mz.keys():
     184                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, -1)
     185
     186            elif nominal_selected_mz + 1 in dict_nominal_exp_mz.keys():
    -187
    -188                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, +1)
    -189
    -190            else:
    +187                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, +1)
    +188
    +189            else:
    +190                continue
     191
    -192                continue
    -193
    -194    @staticmethod
    -195    def calc_mz_error(calculated_mz, exp_mz, method='ppm'):
    -196        '''
    -197        Parameters
    -198        ----------
    -199        calculated_mz: float,
    -200        exp_mz:float
    -201        method: string,
    -202            ppm or ppb
    -203        '''
    -204        if method == 'ppm':
    -205            multi_factor = 1000000
    -206
    -207        elif method == 'ppb':
    -208            multi_factor = 1000000
    -209
    -210        else:
    -211            raise Exception("method needs to be ppm or ppb, \
    -212                             you have entered %s" % method)
    -213
    -214        return ((exp_mz - calculated_mz) / calculated_mz) * multi_factor
    -215
    -216    @staticmethod
    -217    def check_ppm_error(tolerance, error):
    -218        return True if -tolerance <= error <= tolerance else False
    -219
    -220    def get_nominal_exp(self, exp_mzs) -> dict:
    -221
    +192    @staticmethod
    +193    def calc_mz_error(calculated_mz, exp_mz, method="ppm"):
    +194        """
    +195        Parameters
    +196        ----------
    +197        calculated_mz: float,
    +198        exp_mz:float
    +199        method: string,
    +200            ppm or ppb
    +201        """
    +202        if method == "ppm":
    +203            multi_factor = 1000000
    +204
    +205        elif method == "ppb":
    +206            multi_factor = 1000000
    +207
    +208        else:
    +209            raise Exception(
    +210                "method needs to be ppm or ppb, \
    +211                             you have entered %s"
    +212                % method
    +213            )
    +214
    +215        return ((exp_mz - calculated_mz) / calculated_mz) * multi_factor
    +216
    +217    @staticmethod
    +218    def check_ppm_error(tolerance, error):
    +219        return True if -tolerance <= error <= tolerance else False
    +220
    +221    def get_nominal_exp(self, exp_mzs) -> dict:
     222        dict_nominal_exp_mz = {}
     223
     224        for exp_mz in exp_mzs:
    -225
    -226            nominal_mz = int(exp_mz)
    -227
    -228            if nominal_mz not in dict_nominal_exp_mz.keys():
    -229                dict_nominal_exp_mz[int(exp_mz)] = [exp_mz]
    -230            else:
    -231                dict_nominal_exp_mz[int(exp_mz)].append(exp_mz)
    -232
    -233        return dict_nominal_exp_mz
    -234
    -235    def search_mz(self, results, dict_nominal_exp_mz, calculated_mz, offset) -> None:
    -236
    -237        nominal_calculated_mz = int(calculated_mz) + offset
    -238        matched_n_precursors = dict_nominal_exp_mz.get(nominal_calculated_mz)
    -239
    -240        for precursor_mz in matched_n_precursors:
    -241            
    -242            error = self.calc_mz_error(calculated_mz, precursor_mz,
    -243                                       method=self.method)
    -244
    -245            if self.check_ppm_error(self.tolerance, error):
    -246
    -247                new_match = SearchResults(calculated_mz,
    -248                                          precursor_mz,
    -249                                          error,
    -250                                          self.tolerance)
    -251                
    -252                if calculated_mz not in results.keys():
    -253                    
    -254                    results[calculated_mz] = [new_match]
    -255
    -256                else:
    -257
    -258                    results[calculated_mz].append(new_match)
    +225            nominal_mz = int(exp_mz)
    +226
    +227            if nominal_mz not in dict_nominal_exp_mz.keys():
    +228                dict_nominal_exp_mz[int(exp_mz)] = [exp_mz]
    +229            else:
    +230                dict_nominal_exp_mz[int(exp_mz)].append(exp_mz)
    +231
    +232        return dict_nominal_exp_mz
    +233
    +234    def search_mz(self, results, dict_nominal_exp_mz, calculated_mz, offset) -> None:
    +235        nominal_calculated_mz = int(calculated_mz) + offset
    +236        matched_n_precursors = dict_nominal_exp_mz.get(nominal_calculated_mz)
    +237
    +238        for precursor_mz in matched_n_precursors:
    +239            error = self.calc_mz_error(calculated_mz, precursor_mz, method=self.method)
    +240
    +241            if self.check_ppm_error(self.tolerance, error):
    +242                new_match = SearchResults(
    +243                    calculated_mz, precursor_mz, error, self.tolerance
    +244                )
    +245
    +246                if calculated_mz not in results.keys():
    +247                    results[calculated_mz] = [new_match]
    +248
    +249                else:
    +250                    results[calculated_mz].append(new_match)
     
    @@ -735,34 +715,41 @@

    -
    19    def __init__(self, exp_mzs: List[float], calculated_mzs: List[float], tolerance, method="ppm", average_target_mz=True):
    -20        '''
    -21        Parameters
    -22        ----------
    -23        calculated_mzs: [float] calculated m/z
    -24        exp_mzs: [float] experimental m/z
    -25        method: string,
    -26            ppm or ppb
    -27        call run to trigger the m/z search algorithm
    -28        or start if using it as thread
    -29        '''
    -30        Thread.__init__(self)
    -31        # placeholder for the results
    -32        self._matched_mz = {}
    -33
    -34        self._calculated_mzs = calculated_mzs
    -35
    -36        self._matched_mz = {}
    -37
    -38        self._averaged_target_mz = []
    -39
    -40        self._exp_mzs = exp_mzs
    -41        
    -42        self._tolerance = tolerance
    -43        self.method = method
    +            
    19    def __init__(
    +20        self,
    +21        exp_mzs: List[float],
    +22        calculated_mzs: List[float],
    +23        tolerance,
    +24        method="ppm",
    +25        average_target_mz=True,
    +26    ):
    +27        """
    +28        Parameters
    +29        ----------
    +30        calculated_mzs: [float] calculated m/z
    +31        exp_mzs: [float] experimental m/z
    +32        method: string,
    +33            ppm or ppb
    +34        call run to trigger the m/z search algorithm
    +35        or start if using it as thread
    +36        """
    +37        Thread.__init__(self)
    +38        # placeholder for the results
    +39        self._matched_mz = {}
    +40
    +41        self._calculated_mzs = calculated_mzs
    +42
    +43        self._matched_mz = {}
     44
    -45        if average_target_mz:
    -46            self.colapse_calculated()
    +45        self._averaged_target_mz = []
    +46
    +47        self._exp_mzs = exp_mzs
    +48
    +49        self._tolerance = tolerance
    +50        self.method = method
    +51
    +52        if average_target_mz:
    +53            self.colapse_calculated()
     
    @@ -875,66 +862,65 @@

    -
    108    def colapse_calculated(self):
    -109        
    -110        if len(self.calculated_mzs) > 1:
    -111            all_mz = []
    -112            subset = set()
    -113            
    -114            i = -1
    -115            while True:
    -116                
    -117                i = i +1
    -118                
    -119                if i == len(self.calculated_mzs)-1:
    -120                    all_mz.append({i})
    -121                    #print(i, 'break1')
    -122                    break
    -123                
    -124                if i >= len(self.calculated_mzs)-1:
    -125                    #print(i, 'break2')
    +            
    114    def colapse_calculated(self):
    +115        if len(self.calculated_mzs) > 1:
    +116            all_mz = []
    +117            subset = set()
    +118
    +119            i = -1
    +120            while True:
    +121                i = i + 1
    +122
    +123                if i == len(self.calculated_mzs) - 1:
    +124                    all_mz.append({i})
    +125                    # print(i, 'break1')
     126                    break
     127
    -128                error = self.calc_mz_error(self.calculated_mzs[i], self.calculated_mzs[i+1])
    -129                
    -130                #print(self.tolerance)
    +128                if i >= len(self.calculated_mzs) - 1:
    +129                    # print(i, 'break2')
    +130                    break
     131
    -132                check_error = self.check_ppm_error(self.tolerance, error)
    -133                
    -134                if not check_error:
    -135                    start_list = {i}
    -136
    -137                else:
    -138
    -139                    start_list = set()
    -140
    -141                while check_error:
    -142                    
    -143                    start_list.add(i)
    -144                    start_list.add(i+1)
    +132                error = self.calc_mz_error(
    +133                    self.calculated_mzs[i], self.calculated_mzs[i + 1]
    +134                )
    +135
    +136                # print(self.tolerance)
    +137
    +138                check_error = self.check_ppm_error(self.tolerance, error)
    +139
    +140                if not check_error:
    +141                    start_list = {i}
    +142
    +143                else:
    +144                    start_list = set()
     145
    -146                    i = i + 1    
    -147                    
    -148                    if i == len(self.calculated_mzs)-1:
    -149                        start_list.add(i)
    -150                        #print(i, 'break3')
    -151                        break
    -152
    -153                    
    -154                    error = self.calc_mz_error(self.calculated_mzs[i], self.calculated_mzs[i+1])
    -155                    check_error = self.check_ppm_error(self.tolerance, error)
    -156                
    -157                if start_list:
    -158                    all_mz.append(start_list)
    -159            
    -160            results = []
    -161            for each in all_mz:
    -162                #print(each)
    -163                mzs = [self.calculated_mzs[i] for i in each]
    -164                results.append(sum(mzs)/len(mzs))
    -165            
    -166            #print(results)
    -167            self._averaged_target_mz = results
    +146                while check_error:
    +147                    start_list.add(i)
    +148                    start_list.add(i + 1)
    +149
    +150                    i = i + 1
    +151
    +152                    if i == len(self.calculated_mzs) - 1:
    +153                        start_list.add(i)
    +154                        # print(i, 'break3')
    +155                        break
    +156
    +157                    error = self.calc_mz_error(
    +158                        self.calculated_mzs[i], self.calculated_mzs[i + 1]
    +159                    )
    +160                    check_error = self.check_ppm_error(self.tolerance, error)
    +161
    +162                if start_list:
    +163                    all_mz.append(start_list)
    +164
    +165            results = []
    +166            for each in all_mz:
    +167                # print(each)
    +168                mzs = [self.calculated_mzs[i] for i in each]
    +169                results.append(sum(mzs) / len(mzs))
    +170
    +171            # print(results)
    +172            self._averaged_target_mz = results
     
    @@ -952,29 +938,23 @@

    -
    170    def run(self):
    -171
    -172        dict_nominal_exp_mz = self.get_nominal_exp(self.exp_mzs)
    -173
    -174        for calculated_mz in self.calculated_mzs:
    -175
    -176            nominal_selected_mz = int(calculated_mz)
    -177
    -178            if nominal_selected_mz in dict_nominal_exp_mz.keys():
    +            
    174    def run(self):
    +175        dict_nominal_exp_mz = self.get_nominal_exp(self.exp_mzs)
    +176
    +177        for calculated_mz in self.calculated_mzs:
    +178            nominal_selected_mz = int(calculated_mz)
     179
    -180                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, 0)
    -181
    -182            elif nominal_selected_mz - 1 in dict_nominal_exp_mz.keys():
    -183
    +180            if nominal_selected_mz in dict_nominal_exp_mz.keys():
    +181                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, 0)
    +182
    +183            elif nominal_selected_mz - 1 in dict_nominal_exp_mz.keys():
     184                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, -1)
     185
     186            elif nominal_selected_mz + 1 in dict_nominal_exp_mz.keys():
    -187
    -188                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, +1)
    -189
    -190            else:
    -191
    -192                continue
    +187                self.search_mz(self.results, dict_nominal_exp_mz, calculated_mz, +1)
    +188
    +189            else:
    +190                continue
     
    @@ -1000,27 +980,30 @@

    -
    194    @staticmethod
    -195    def calc_mz_error(calculated_mz, exp_mz, method='ppm'):
    -196        '''
    -197        Parameters
    -198        ----------
    -199        calculated_mz: float,
    -200        exp_mz:float
    -201        method: string,
    -202            ppm or ppb
    -203        '''
    -204        if method == 'ppm':
    -205            multi_factor = 1000000
    -206
    -207        elif method == 'ppb':
    -208            multi_factor = 1000000
    -209
    -210        else:
    -211            raise Exception("method needs to be ppm or ppb, \
    -212                             you have entered %s" % method)
    -213
    -214        return ((exp_mz - calculated_mz) / calculated_mz) * multi_factor
    +            
    192    @staticmethod
    +193    def calc_mz_error(calculated_mz, exp_mz, method="ppm"):
    +194        """
    +195        Parameters
    +196        ----------
    +197        calculated_mz: float,
    +198        exp_mz:float
    +199        method: string,
    +200            ppm or ppb
    +201        """
    +202        if method == "ppm":
    +203            multi_factor = 1000000
    +204
    +205        elif method == "ppb":
    +206            multi_factor = 1000000
    +207
    +208        else:
    +209            raise Exception(
    +210                "method needs to be ppm or ppb, \
    +211                             you have entered %s"
    +212                % method
    +213            )
    +214
    +215        return ((exp_mz - calculated_mz) / calculated_mz) * multi_factor
     
    @@ -1048,9 +1031,9 @@

    -
    216    @staticmethod
    -217    def check_ppm_error(tolerance, error):
    -218        return True if -tolerance <= error <= tolerance else False
    +            
    217    @staticmethod
    +218    def check_ppm_error(tolerance, error):
    +219        return True if -tolerance <= error <= tolerance else False
     
    @@ -1068,20 +1051,18 @@

    -
    220    def get_nominal_exp(self, exp_mzs) -> dict:
    -221
    +            
    221    def get_nominal_exp(self, exp_mzs) -> dict:
     222        dict_nominal_exp_mz = {}
     223
     224        for exp_mz in exp_mzs:
    -225
    -226            nominal_mz = int(exp_mz)
    -227
    -228            if nominal_mz not in dict_nominal_exp_mz.keys():
    -229                dict_nominal_exp_mz[int(exp_mz)] = [exp_mz]
    -230            else:
    -231                dict_nominal_exp_mz[int(exp_mz)].append(exp_mz)
    -232
    -233        return dict_nominal_exp_mz
    +225            nominal_mz = int(exp_mz)
    +226
    +227            if nominal_mz not in dict_nominal_exp_mz.keys():
    +228                dict_nominal_exp_mz[int(exp_mz)] = [exp_mz]
    +229            else:
    +230                dict_nominal_exp_mz[int(exp_mz)].append(exp_mz)
    +231
    +232        return dict_nominal_exp_mz
     
    @@ -1099,30 +1080,23 @@

    -
    235    def search_mz(self, results, dict_nominal_exp_mz, calculated_mz, offset) -> None:
    -236
    -237        nominal_calculated_mz = int(calculated_mz) + offset
    -238        matched_n_precursors = dict_nominal_exp_mz.get(nominal_calculated_mz)
    -239
    -240        for precursor_mz in matched_n_precursors:
    -241            
    -242            error = self.calc_mz_error(calculated_mz, precursor_mz,
    -243                                       method=self.method)
    -244
    -245            if self.check_ppm_error(self.tolerance, error):
    -246
    -247                new_match = SearchResults(calculated_mz,
    -248                                          precursor_mz,
    -249                                          error,
    -250                                          self.tolerance)
    -251                
    -252                if calculated_mz not in results.keys():
    -253                    
    -254                    results[calculated_mz] = [new_match]
    -255
    -256                else:
    -257
    -258                    results[calculated_mz].append(new_match)
    +            
    234    def search_mz(self, results, dict_nominal_exp_mz, calculated_mz, offset) -> None:
    +235        nominal_calculated_mz = int(calculated_mz) + offset
    +236        matched_n_precursors = dict_nominal_exp_mz.get(nominal_calculated_mz)
    +237
    +238        for precursor_mz in matched_n_precursors:
    +239            error = self.calc_mz_error(calculated_mz, precursor_mz, method=self.method)
    +240
    +241            if self.check_ppm_error(self.tolerance, error):
    +242                new_match = SearchResults(
    +243                    calculated_mz, precursor_mz, error, self.tolerance
    +244                )
    +245
    +246                if calculated_mz not in results.keys():
    +247                    results[calculated_mz] = [new_match]
    +248
    +249                else:
    +250                    results[calculated_mz].append(new_match)
     
    diff --git a/docs/corems/mass_spectra/calc/SignalProcessing.html b/docs/corems/mass_spectra/calc/SignalProcessing.html index 52ec48ea..9ec85bf6 100644 --- a/docs/corems/mass_spectra/calc/SignalProcessing.html +++ b/docs/corems/mass_spectra/calc/SignalProcessing.html @@ -86,611 +86,660 @@

      1import numpy as np
       2
    -  3from pandas import Series, DataFrame
    -  4
    -  5from scipy.signal import savgol_filter
    -  6from scipy.signal.windows import boxcar
    -  7from scipy import interpolate
    -  8from matplotlib import pyplot as plt
    -  9from numpy import abs
    - 10from numpy import array, polyfit, asarray
    - 11
    - 12def peak_detector(tic, max_tic): #TODO remove max_tic argument?
    - 13    """
    - 14    Find peaks by detecting minima in the first derivative of the data
    - 15    Used in LC/GC data processing
    - 16
    - 17    Parameters
    - 18    ----------
    - 19    tic : array
    - 20        array of data points to find the peaks
    - 21    max_tic : float
    - 22        maximum value of the data points
    - 23    
    - 24    Returns
    - 25    -------
    - 26    tuple
    - 27        tuple of indexes of the start, apex and final points of the peak
    - 28    
    - 29    """
    - 30    dy = derivate(tic)
    - 31
    - 32    indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    - 33
    - 34    for index in indexes:
    - 35
    - 36        start_index = find_minima(index, tic, right=False)
    - 37        final_index = find_minima(index, tic)
    +  3from scipy.signal import savgol_filter
    +  4from scipy.signal.windows import boxcar
    +  5from scipy import interpolate
    +  6from matplotlib import pyplot as plt
    +  7from numpy import abs
    +  8from numpy import array, polyfit, asarray
    +  9
    + 10
    + 11def peak_detector(tic, max_tic):  # TODO remove max_tic argument?
    + 12    """
    + 13    Find peaks by detecting minima in the first derivative of the data
    + 14    Used in LC/GC data processing
    + 15
    + 16    Parameters
    + 17    ----------
    + 18    tic : array
    + 19        array of data points to find the peaks
    + 20    max_tic : float
    + 21        maximum value of the data points
    + 22
    + 23    Returns
    + 24    -------
    + 25    tuple
    + 26        tuple of indexes of the start, apex and final points of the peak
    + 27
    + 28    """
    + 29    dy = derivate(tic)
    + 30
    + 31    indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    + 32
    + 33    for index in indexes:
    + 34        start_index = find_minima(index, tic, right=False)
    + 35        final_index = find_minima(index, tic)
    + 36
    + 37        yield (start_index, index, final_index)
      38
    - 39        yield (start_index, index, final_index)
    - 40
    - 41def find_nearest_scan(data, nodes):
    - 42    """
    - 43    Find nearest data point in a list of nodes (derivated data)
    - 44    in LC/GC this is 'scan', in MS this is 'm/z' data point
    - 45
    - 46    Parameters
    - 47    ----------
    - 48    data : float
    - 49        data point to find the nearest node
    - 50    nodes : array
    - 51        array of nodes to search for the nearest node
    - 52    
    - 53    Returns
    - 54    -------
    - 55    float
    - 56        nearest node to the data point
    - 57    """
    - 58
    - 59    array_data = asarray(nodes)
    - 60
    - 61    scan_index = (abs(array_data - data)).argmin()
    - 62
    - 63    return nodes[scan_index]
    + 39
    + 40def find_nearest_scan(data, nodes):
    + 41    """
    + 42    Find nearest data point in a list of nodes (derivated data)
    + 43    in LC/GC this is 'scan', in MS this is 'm/z' data point
    + 44
    + 45    Parameters
    + 46    ----------
    + 47    data : float
    + 48        data point to find the nearest node
    + 49    nodes : array
    + 50        array of nodes to search for the nearest node
    + 51
    + 52    Returns
    + 53    -------
    + 54    float
    + 55        nearest node to the data point
    + 56    """
    + 57
    + 58    array_data = asarray(nodes)
    + 59
    + 60    scan_index = (abs(array_data - data)).argmin()
    + 61
    + 62    return nodes[scan_index]
    + 63
      64
    - 65
    - 66def check_corrected_abundance(closest_left, closest_right, apex_index, signal, max_signal, signal_threshold, abun_norm):
    - 67    """
    - 68    Check the corrected abundance of the peak
    - 69
    - 70    Parameters
    - 71    ----------
    - 72    closest_left : int
    - 73        index of the closest left node
    - 74    closest_right : int
    - 75        index of the closest right node
    - 76    apex_index : int
    - 77        index of the apex node
    - 78    signal : array
    - 79        array of data points to find the peaks
    - 80    max_signal : float
    - 81        maximum value of the data points
    - 82    signal_threshold : float
    - 83        threshold for the signal
    - 84    abun_norm : float
    - 85        abundance normalization factor
    - 86    
    - 87    Returns
    - 88    -------
    - 89    float
    - 90        corrected abundance of the peak
    - 91    
    - 92
    - 93    """
    - 94    x = [closest_left, closest_right]
    - 95    y = [signal[closest_left], signal[closest_right]]
    - 96    
    - 97    pol = polyfit(x, y, 1) #TODO replace with faster method in this file
    + 65def check_corrected_abundance(
    + 66    closest_left,
    + 67    closest_right,
    + 68    apex_index,
    + 69    signal,
    + 70    max_signal,
    + 71    signal_threshold,
    + 72    abun_norm,
    + 73):
    + 74    """
    + 75    Check the corrected abundance of the peak
    + 76
    + 77    Parameters
    + 78    ----------
    + 79    closest_left : int
    + 80        index of the closest left node
    + 81    closest_right : int
    + 82        index of the closest right node
    + 83    apex_index : int
    + 84        index of the apex node
    + 85    signal : array
    + 86        array of data points to find the peaks
    + 87    max_signal : float
    + 88        maximum value of the data points
    + 89    signal_threshold : float
    + 90        threshold for the signal
    + 91    abun_norm : float
    + 92        abundance normalization factor
    + 93
    + 94    Returns
    + 95    -------
    + 96    float
    + 97        corrected abundance of the peak
      98
    - 99    corrected_peak_height = signal[apex_index] - pol(apex_index)
    -100
    -101    if (corrected_peak_height / max_signal) * abun_norm > signal_threshold:
    -102        return corrected_peak_height
    -103    else:
    -104        return False
    + 99
    +100    """
    +101    x = [closest_left, closest_right]
    +102    y = [signal[closest_left], signal[closest_right]]
    +103
    +104    pol = polyfit(x, y, 1)  # TODO replace with faster method in this file
     105
    -106def peak_picking_first_derivative(domain, signal, max_height, max_prominence, max_signal, 
    -107                                  min_peak_datapoints,
    -108                                  peak_derivative_threshold,
    -109                                  signal_threshold=0.1, correct_baseline=True, plot_res=False, 
    -110                                  abun_norm=100, check_abundance=False,
    -111                                  apex_indexes = []):
    -112    
    -113    """
    -114    Find peaks by detecting minima in the first derivative of the data
    -115    Used in LC/GC and MS data processing
    -116    Optional baseline correction, then peak apex detection via 1st derivative.
    -117    For each apex the peak datapoints surrounding the apex are determined. 
    -118    Some basic thresholding is applied (signal, number of datapoints, etc). 
    -119
    -120    Parameters
    -121    ----------
    -122    domain : array
    -123        array of data points to find the peaks
    -124    signal : array
    -125        array of data points to find the peaks
    -126    max_height : float
    -127        maximum height of the peak
    -128    max_prominence : float
    -129        maximum prominence of the peak
    -130    max_signal : float
    -131        maximum signal of the peak
    -132    min_peak_datapoints : int
    -133        minimum number of data points in the peak
    -134    peak_derivative_threshold : float
    -135        threshold for the peak derivative
    -136    signal_threshold : float
    -137        threshold for the signal
    -138    correct_baseline : bool
    -139        flag to correct the baseline
    -140    plot_res : bool
    -141        flag to plot the results
    -142    abun_norm : float
    -143        abundance normalization factor
    -144    check_abundance : bool
    -145        flag to check the abundance
    -146    
    -147    
    -148    Returns
    -149    -------
    -150    tuple
    -151        tuple of indexes of the start, apex and final points of the peak
    -152    
    -153    
    -154    """
    -155    if correct_baseline:
    -156        signal = signal - baseline_detector(signal, domain, max_height, max_prominence)
    -157
    -158
    -159    domain = np.array(domain)
    -160    signal = np.array(signal)
    -161
    -162    dy = derivate(signal)
    -163    if len(apex_indexes) == 0:
    -164        # Find apexes
    -165        apex_indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    -166    else:
    -167        apex_indexes = np.array(apex_indexes)
    -168    
    -169    if apex_indexes.size and apex_indexes is not None:
    -170            apex_indexes = apex_indexes[signal[apex_indexes]/max_signal >= signal_threshold]
    -171
    -172    signal = signal/max(signal)
    -173    start_peak = []
    -174    end_peak = []
    -175
    -176    pos_dy_threshold = peak_derivative_threshold  #max(dy) * peak_derivative_threshold
    -177    neg_dy_threshold = -peak_derivative_threshold #min(dy) * peak_derivative_threshold
    -178    len_dy = len(dy)
    -179    # take apex_index and move left to find start
    -180    for index in apex_indexes:
    -181        # catch for starting position
    -182        
    -183        if index == 0:
    -184            index_start = index
    -185        else:
    -186            index_start = index - 1
    -187
    -188        # catch for ending position
    -189        if (index  + 1) >= dy.shape[0] :
    -190            index_end = index -1
    -191        else:
    -192            index_end = index + 1
    -193
    -194        # while dy[index_start-1] > 0 and index_start != 0:
    -195        while dy[index_start - 1] > pos_dy_threshold and index_start > 0:
    -196            index_start = index_start - 1
    -197        start_peak.append(index_start)
    -198
    -199        # while dy[index_end] < 0 and index_end != (len(dy) - 1):
    -200        while dy[index_end] < neg_dy_threshold and index_end != (len_dy - 1):
    -201            index_end = index_end + 1
    -202        end_peak.append(index_end)
    -203
    -204    start_peak = array(start_peak)
    -205    end_peak = array(end_peak)
    -206    
    -207
    -208    for apex_index in apex_indexes:
    -209        #index_gt_apex = np.where(end_peak >= apex_index)[0]
    -210        #index_lt_apex = np.where(start_peak <= apex_index)[0]
    -211        index_gt_apex = np.arange(np.searchsorted(end_peak, apex_index),  len(end_peak))
    -212        index_lt_apex = np.arange(0, np.searchsorted(start_peak, apex_index,side='right'))
    -213
    -214        if not index_gt_apex.size == 0 and not index_lt_apex.size == 0:
    +106    corrected_peak_height = signal[apex_index] - pol(apex_index)
    +107
    +108    if (corrected_peak_height / max_signal) * abun_norm > signal_threshold:
    +109        return corrected_peak_height
    +110    else:
    +111        return False
    +112
    +113
    +114def peak_picking_first_derivative(
    +115    domain,
    +116    signal,
    +117    max_height,
    +118    max_prominence,
    +119    max_signal,
    +120    min_peak_datapoints,
    +121    peak_derivative_threshold,
    +122    signal_threshold=0.1,
    +123    correct_baseline=True,
    +124    plot_res=False,
    +125    abun_norm=100,
    +126    check_abundance=False,
    +127    apex_indexes=[],
    +128):
    +129    """
    +130    Find peaks by detecting minima in the first derivative of the data
    +131    Used in LC/GC and MS data processing
    +132    Optional baseline correction, then peak apex detection via 1st derivative.
    +133    For each apex the peak datapoints surrounding the apex are determined.
    +134    Some basic thresholding is applied (signal, number of datapoints, etc).
    +135
    +136    Parameters
    +137    ----------
    +138    domain : array
    +139        array of data points to find the peaks
    +140    signal : array
    +141        array of data points to find the peaks
    +142    max_height : float
    +143        maximum height of the peak
    +144    max_prominence : float
    +145        maximum prominence of the peak
    +146    max_signal : float
    +147        maximum signal of the peak
    +148    min_peak_datapoints : int
    +149        minimum number of data points in the peak
    +150    peak_derivative_threshold : float
    +151        threshold for the peak derivative
    +152    signal_threshold : float
    +153        threshold for the signal
    +154    correct_baseline : bool
    +155        flag to correct the baseline
    +156    plot_res : bool
    +157        flag to plot the results
    +158    abun_norm : float
    +159        abundance normalization factor
    +160    check_abundance : bool
    +161        flag to check the abundance
    +162
    +163
    +164    Returns
    +165    -------
    +166    tuple
    +167        tuple of indexes of the start, apex and final points of the peak
    +168
    +169
    +170    """
    +171    if correct_baseline:
    +172        signal = signal - baseline_detector(signal, domain, max_height, max_prominence)
    +173
    +174    domain = np.array(domain)
    +175    signal = np.array(signal)
    +176
    +177    dy = derivate(signal)
    +178    if len(apex_indexes) == 0:
    +179        # Find apexes
    +180        apex_indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    +181    else:
    +182        apex_indexes = np.array(apex_indexes)
    +183
    +184    if apex_indexes.size and apex_indexes is not None:
    +185        apex_indexes = apex_indexes[
    +186            signal[apex_indexes] / max_signal >= signal_threshold
    +187        ]
    +188
    +189    signal = signal / max(signal)
    +190    start_peak = []
    +191    end_peak = []
    +192
    +193    pos_dy_threshold = peak_derivative_threshold  # max(dy) * peak_derivative_threshold
    +194    neg_dy_threshold = -peak_derivative_threshold  # min(dy) * peak_derivative_threshold
    +195    len_dy = len(dy)
    +196    # take apex_index and move left to find start
    +197    for index in apex_indexes:
    +198        # catch for starting position
    +199
    +200        if index == 0:
    +201            index_start = index
    +202        else:
    +203            index_start = index - 1
    +204
    +205        # catch for ending position
    +206        if (index + 1) >= dy.shape[0]:
    +207            index_end = index - 1
    +208        else:
    +209            index_end = index + 1
    +210
    +211        # while dy[index_start-1] > 0 and index_start != 0:
    +212        while dy[index_start - 1] > pos_dy_threshold and index_start > 0:
    +213            index_start = index_start - 1
    +214        start_peak.append(index_start)
     215
    -216            closest_right = find_nearest_scan(apex_index, end_peak[index_gt_apex])
    -217            closest_left = find_nearest_scan(apex_index,  start_peak[index_lt_apex])
    -218            if check_abundance:
    -219                corrected_peak_height = check_corrected_abundance(closest_left, closest_right, apex_index, signal, max_signal, signal_threshold, abun_norm)
    -220            else:
    -221                corrected_peak_height = signal[apex_index]
    -222
    -223            if (closest_right - closest_left) >= min_peak_datapoints:
    -224
    -225                if plot_res:
    -226                    plt.plot(domain[closest_left: closest_right + 1], dy[closest_left:closest_right + 1], c='red')
    -227                    plt.plot(domain[closest_left: closest_right + 1], signal[closest_left:closest_right + 1], c='black')
    -228                    plt.title(str((corrected_peak_height / max_signal) * 100))
    -229                    plt.show()
    -230                
    -231                yield (closest_left, apex_index, closest_right)
    -232    
    -233                
    -234
    -235def find_minima(index, tic, right=True):
    -236    """
    -237    Find the index of the local minima in the given time-of-flight (TOF) intensity array.
    -238
    -239    Parameters:
    -240    -----------
    -241    index: int 
    -242        The starting index to search for the minima.
    -243    tic: list
    -244        TIC data points
    -245    right : bool, optional
    -246        Determines the direction of the search. If True, search to the right of the index. If False, search to the left of the index. Default is True.
    +216        # while dy[index_end] < 0 and index_end != (len(dy) - 1):
    +217        while dy[index_end] < neg_dy_threshold and index_end != (len_dy - 1):
    +218            index_end = index_end + 1
    +219        end_peak.append(index_end)
    +220
    +221    start_peak = array(start_peak)
    +222    end_peak = array(end_peak)
    +223
    +224    for apex_index in apex_indexes:
    +225        # index_gt_apex = np.where(end_peak >= apex_index)[0]
    +226        # index_lt_apex = np.where(start_peak <= apex_index)[0]
    +227        index_gt_apex = np.arange(np.searchsorted(end_peak, apex_index), len(end_peak))
    +228        index_lt_apex = np.arange(
    +229            0, np.searchsorted(start_peak, apex_index, side="right")
    +230        )
    +231
    +232        if not index_gt_apex.size == 0 and not index_lt_apex.size == 0:
    +233            closest_right = find_nearest_scan(apex_index, end_peak[index_gt_apex])
    +234            closest_left = find_nearest_scan(apex_index, start_peak[index_lt_apex])
    +235            if check_abundance:
    +236                corrected_peak_height = check_corrected_abundance(
    +237                    closest_left,
    +238                    closest_right,
    +239                    apex_index,
    +240                    signal,
    +241                    max_signal,
    +242                    signal_threshold,
    +243                    abun_norm,
    +244                )
    +245            else:
    +246                corrected_peak_height = signal[apex_index]
     247
    -248    Returns:
    -249    --------
    -250    int
    -251        The index of the local minima in the TIC  array.
    -252    """
    -253            
    -254    j = index
    -255    #apex_abundance = tic[index]
    -256    tic_len = len(tic)
    -257
    -258    if right: minima = tic[j] >= tic[j+1]
    -259    else: minima = tic[j] >= tic[j-1]
    -260
    -261    while minima:
    -262        
    -263        if j == 1 or j == tic_len -2:
    -264            break
    -265        
    -266        if right: 
    -267            j += 1
    -268
    -269            minima = tic[j] >= tic[j+1]
    -270
    -271        else: 
    -272            j -= 1
    -273            minima = tic[j] >= tic[j-1]
    -274
    -275    if right: return j
    -276    else: return j
    -277
    -278def derivate(data_array):
    -279    """
    -280    Calculate derivative of the data points. 
    -281    Replaces nan with infinity
    -282
    -283    Parameters
    -284    ----------
    -285    data_array : array
    -286        array of data points
    -287    
    -288    Returns
    -289    -------
    -290    array
    -291        array of the derivative of the data points
    -292    """
    -293    data_array = np.array(data_array)
    -294
    -295    dy = data_array[1:] - data_array[:-1]
    -296
    -297    #replaces nan for infinity
    -298    indices_nan = np.where(np.isnan(data_array))[0]
    -299
    -300    if indices_nan.size:
    -301
    -302        data_array[indices_nan] = np.inf
    -303        dy[np.where(np.isnan(dy))[0]] = np.inf
    -304
    -305    return dy
    +248            if (closest_right - closest_left) >= min_peak_datapoints:
    +249                if plot_res:
    +250                    plt.plot(
    +251                        domain[closest_left : closest_right + 1],
    +252                        dy[closest_left : closest_right + 1],
    +253                        c="red",
    +254                    )
    +255                    plt.plot(
    +256                        domain[closest_left : closest_right + 1],
    +257                        signal[closest_left : closest_right + 1],
    +258                        c="black",
    +259                    )
    +260                    plt.title(str((corrected_peak_height / max_signal) * 100))
    +261                    plt.show()
    +262
    +263                yield (closest_left, apex_index, closest_right)
    +264
    +265
    +266def find_minima(index, tic, right=True):
    +267    """
    +268    Find the index of the local minima in the given time-of-flight (TOF) intensity array.
    +269
    +270    Parameters:
    +271    -----------
    +272    index: int
    +273        The starting index to search for the minima.
    +274    tic: list
    +275        TIC data points
    +276    right : bool, optional
    +277        Determines the direction of the search. If True, search to the right of the index. If False, search to the left of the index. Default is True.
    +278
    +279    Returns:
    +280    --------
    +281    int
    +282        The index of the local minima in the TIC  array.
    +283    """
    +284
    +285    j = index
    +286    # apex_abundance = tic[index]
    +287    tic_len = len(tic)
    +288
    +289    if right:
    +290        minima = tic[j] >= tic[j + 1]
    +291    else:
    +292        minima = tic[j] >= tic[j - 1]
    +293
    +294    while minima:
    +295        if j == 1 or j == tic_len - 2:
    +296            break
    +297
    +298        if right:
    +299            j += 1
    +300
    +301            minima = tic[j] >= tic[j + 1]
    +302
    +303        else:
    +304            j -= 1
    +305            minima = tic[j] >= tic[j - 1]
     306
    -307def minima_detector(tic, max_tic, peak_height_max_percent, peak_max_prominence_percent):
    -308    """
    -309    Minima detector for the TIC data points.
    -310
    -311    Parameters
    -312    ----------
    -313    tic : array
    -314        array of data points to find the peaks  
    -315    max_tic : float
    -316        maximum value of the data points
    -317    peak_height_max_percent : float
    -318        maximum height of the peak
    -319    peak_max_prominence_percent : float
    -320        maximum prominence of the peak
    -321    
    -322    Returns
    -323    -------
    -324    generator
    -325        generator of the indexes of the minima in the TIC array
    -326    
    +307    if right:
    +308        return j
    +309    else:
    +310        return j
    +311
    +312
    +313def derivate(data_array):
    +314    """
    +315    Calculate derivative of the data points.
    +316    Replaces nan with infinity
    +317
    +318    Parameters
    +319    ----------
    +320    data_array : array
    +321        array of data points
    +322
    +323    Returns
    +324    -------
    +325    array
    +326        array of the derivative of the data points
     327    """
    -328    peak_height_diff = lambda hi, li : ((tic[hi] - tic[li]) / max_tic )*100
    +328    data_array = np.array(data_array)
     329
    -330    for start_index, index, final_index in peak_detector(tic, max_tic):
    +330    dy = data_array[1:] - data_array[:-1]
     331
    -332        # abundance max threshold    
    -333        if (tic[index] / max_tic) * 100 < peak_height_max_percent:
    +332    # replaces nan for infinity
    +333    indices_nan = np.where(np.isnan(data_array))[0]
     334
    -335            # calculates prominence and filter   
    -336            if peak_height_diff(index, start_index) and peak_height_diff(index, final_index) < peak_max_prominence_percent:
    -337                
    -338                    yield from (start_index, final_index)
    -339
    -340def baseline_detector(tic, rt, peak_height_max_percent, peak_max_prominence_percent, do_interpolation=True):
    -341    """
    -342    Baseline detector for the TIC data points.
    -343    For LC/GC data processing
    -344
    -345    Parameters
    -346    ----------
    -347    tic : array
    -348        array of data points to find the peaks
    -349    rt : array
    -350        array of retention time data points
    -351    peak_height_max_percent : float
    -352        maximum height of the peak
    -353    peak_max_prominence_percent : float
    -354        maximum prominence of the peak
    -355    do_interpolation : bool, optional
    -356        flag to interpolate the data points. Default is True
    -357    
    -358    Returns
    -359    ------- 
    -360    array
    -361        array of the baseline corrected data points
    -362           
    -363    """
    -364    rt = np.array(rt)
    -365
    -366    max_tic = max(tic)
    -367
    -368    indexes = sorted(list(set(i for i in minima_detector(tic, max_tic, peak_height_max_percent, peak_max_prominence_percent))))
    -369
    -370    y = -tic
    -371
    -372    x1 = rt[indexes]
    -373
    -374    y1 = y[indexes]
    +335    if indices_nan.size:
    +336        data_array[indices_nan] = np.inf
    +337        dy[np.where(np.isnan(dy))[0]] = np.inf
    +338
    +339    return dy
    +340
    +341
    +342def minima_detector(tic, max_tic, peak_height_max_percent, peak_max_prominence_percent):
    +343    """
    +344    Minima detector for the TIC data points.
    +345
    +346    Parameters
    +347    ----------
    +348    tic : array
    +349        array of data points to find the peaks
    +350    max_tic : float
    +351        maximum value of the data points
    +352    peak_height_max_percent : float
    +353        maximum height of the peak
    +354    peak_max_prominence_percent : float
    +355        maximum prominence of the peak
    +356
    +357    Returns
    +358    -------
    +359    generator
    +360        generator of the indexes of the minima in the TIC array
    +361
    +362    """
    +363    peak_height_diff = lambda hi, li: ((tic[hi] - tic[li]) / max_tic) * 100
    +364
    +365    for start_index, index, final_index in peak_detector(tic, max_tic):
    +366        # abundance max threshold
    +367        if (tic[index] / max_tic) * 100 < peak_height_max_percent:
    +368            # calculates prominence and filter
    +369            if (
    +370                peak_height_diff(index, start_index)
    +371                and peak_height_diff(index, final_index) < peak_max_prominence_percent
    +372            ):
    +373                yield from (start_index, final_index)
    +374
     375
    -376    if len(x1) <= 5:
    -377        return tic
    -378
    -379    if not do_interpolation:
    -380  
    -381        y0 = np.zeros(tic.shape)
    -382        y0[indexes] = y[indexes]
    -383
    -384        return y0
    -385
    -386    else:
    -387
    -388        f1 = interpolate.interp1d(x1, y1, kind='quadratic',fill_value="extrapolate")
    -389
    -390        ynew1 = f1(list(rt))
    -391
    -392        # from matplotlib import pyplot as plt   
    -393        # if self.deconv_rt_list and  self.deconv_mz == 51:
    -394
    -395        #   plt.plot(rt, tic-(-1* ynew1), color='green')
    -396
    -397        # plt.plot(rt, -1* ynew1, c='black')
    -398
    -399        # s = self.smooth(s, 10, 'blackman')
    +376def baseline_detector(
    +377    tic, rt, peak_height_max_percent, peak_max_prominence_percent, do_interpolation=True
    +378):
    +379    """
    +380    Baseline detector for the TIC data points.
    +381    For LC/GC data processing
    +382
    +383    Parameters
    +384    ----------
    +385    tic : array
    +386        array of data points to find the peaks
    +387    rt : array
    +388        array of retention time data points
    +389    peak_height_max_percent : float
    +390        maximum height of the peak
    +391    peak_max_prominence_percent : float
    +392        maximum prominence of the peak
    +393    do_interpolation : bool, optional
    +394        flag to interpolate the data points. Default is True
    +395
    +396    Returns
    +397    -------
    +398    array
    +399        array of the baseline corrected data points
     400
    -401        # plt.plot(self.retention_time, -s)
    -402
    -403        # plt.show()
    -404
    -405        return -1 * ynew1
    -406
    -407def peak_detector_generator(tic, stds, method, rt, max_height, min_height, max_prominence, min_datapoints):
    -408    """
    -409    Peak detector generator for the TIC data points.
    -410
    -411    Parameters
    -412    ----------
    -413    tic : array
    -414        array of data points to find the peaks
    -415    stds : float
    -416        standard deviation
    -417    method : str
    -418        method to detect the peaks
    -419        Available methods: 'manual_relative_abundance', 'auto_relative_abundance', 'second_derivative'
    -420    rt : array
    -421        array of retention time data points
    -422    max_height : float
    -423        maximum height of the peak
    -424    min_height : float
    -425        minimum height of the peak
    -426    max_prominence : float
    -427        maximum prominence of the peak
    -428    min_datapoints : int
    -429        minimum number of data points in the peak
    -430
    -431    Returns
    -432    -------
    -433    generator
    -434        generator of the indexes of the peaks in the TIC array
    -435
    -436    """
    -437    max_tic = max(tic)
    -438
    -439    if method == 'manual_relative_abundance':
    -440
    -441        tic = tic - baseline_detector(tic, rt, max_height, max_prominence)
    -442
    -443        norm_tic = (tic / max_tic) * 100
    -444
    -445        remove_indexes = np.where(norm_tic < min_height)[0]
    -446
    -447        # if self.deconv_rt_list and  self.deconv_mz == 51:
    -448        #    plt.plot(self.deconv_rt_list, tic, label=self.deconv_mz)
    +401    """
    +402    rt = np.array(rt)
    +403
    +404    max_tic = max(tic)
    +405
    +406    indexes = sorted(
    +407        list(
    +408            set(
    +409                i
    +410                for i in minima_detector(
    +411                    tic, max_tic, peak_height_max_percent, peak_max_prominence_percent
    +412                )
    +413            )
    +414        )
    +415    )
    +416
    +417    y = -tic
    +418
    +419    x1 = rt[indexes]
    +420
    +421    y1 = y[indexes]
    +422
    +423    if len(x1) <= 5:
    +424        return tic
    +425
    +426    if not do_interpolation:
    +427        y0 = np.zeros(tic.shape)
    +428        y0[indexes] = y[indexes]
    +429
    +430        return y0
    +431
    +432    else:
    +433        f1 = interpolate.interp1d(x1, y1, kind="quadratic", fill_value="extrapolate")
    +434
    +435        ynew1 = f1(list(rt))
    +436
    +437        # from matplotlib import pyplot as plt
    +438        # if self.deconv_rt_list and  self.deconv_mz == 51:
    +439
    +440        #   plt.plot(rt, tic-(-1* ynew1), color='green')
    +441
    +442        # plt.plot(rt, -1* ynew1, c='black')
    +443
    +444        # s = self.smooth(s, 10, 'blackman')
    +445
    +446        # plt.plot(self.retention_time, -s)
    +447
    +448        # plt.show()
     449
    -450    elif method == 'auto_relative_abundance':
    +450        return -1 * ynew1
     451
    -452        tic = tic - baseline_detector(tic, rt, max_height, max_prominence)
    -453
    -454        baseline = baseline_detector(tic, rt, max_height, max_prominence)
    -455
    -456        peak_detect_threshold = ((np.nanmean(baseline) + (stds * np.std(baseline))))
    -457
    -458        remove_indexes = np.where(tic < peak_detect_threshold)[0]
    -459
    -460    elif method == 'second_derivative':
    -461
    -462        remove_indexes = second_derivative_threshold(tic, stds, rt, max_height, max_prominence)
    -463
    -464    else:
    -465
    -466        NotImplemented(method)
    -467
    -468    peak_height_diff = lambda hi, li : ((tic[hi] - tic[li]) / max_tic )*100
    -469    
    -470    dy = derivate(tic)
    -471
    -472    include_indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    -473
    -474    final_indexes = sorted(set(include_indexes)-set(remove_indexes))
    -475
    -476    #from matplotlib import pyplot as plt   
    -477    
    -478    #plt.plot(self.retention_time, tic, color='black')
    -479    #plt.scatter(self.retention_time[remove_indexes], tic[remove_indexes], color='red')
    -480    #plt.scatter(self.retention_time[include_indexes], tic[include_indexes], color='blue')
    -481    #plt.scatter(self.retention_time[final_indexes], tic[final_indexes], color='blue')
    -482    
    -483    #plt.show()
    -484
    -485    for index in final_indexes:
    -486            
    -487        start_index = find_minima(index, tic, right=False)
    -488        final_index = find_minima(index, tic)
    -489        
    -490        if final_index-start_index > min_datapoints:
    +452
    +453def peak_detector_generator(
    +454    tic, stds, method, rt, max_height, min_height, max_prominence, min_datapoints
    +455):
    +456    """
    +457    Peak detector generator for the TIC data points.
    +458
    +459    Parameters
    +460    ----------
    +461    tic : array
    +462        array of data points to find the peaks
    +463    stds : float
    +464        standard deviation
    +465    method : str
    +466        method to detect the peaks
    +467        Available methods: 'manual_relative_abundance', 'auto_relative_abundance', 'second_derivative'
    +468    rt : array
    +469        array of retention time data points
    +470    max_height : float
    +471        maximum height of the peak
    +472    min_height : float
    +473        minimum height of the peak
    +474    max_prominence : float
    +475        maximum prominence of the peak
    +476    min_datapoints : int
    +477        minimum number of data points in the peak
    +478
    +479    Returns
    +480    -------
    +481    generator
    +482        generator of the indexes of the peaks in the TIC array
    +483
    +484    """
    +485    max_tic = max(tic)
    +486
    +487    if method == "manual_relative_abundance":
    +488        tic = tic - baseline_detector(tic, rt, max_height, max_prominence)
    +489
    +490        norm_tic = (tic / max_tic) * 100
     491
    -492            #if min( peak_height_diff(index,start_index), peak_height_diff(index,final_index) )> self.chromatogram_settings.peak_min_prominence_percent :   
    -493                
    -494                yield (start_index, index, final_index)
    -495
    -496def smooth_signal(x, window_len, window, pol_order, implemented_smooth_method):
    -497    """
    -498    Smooth the data using a window with requested size.
    +492        remove_indexes = np.where(norm_tic < min_height)[0]
    +493
    +494        # if self.deconv_rt_list and  self.deconv_mz == 51:
    +495        #    plt.plot(self.deconv_rt_list, tic, label=self.deconv_mz)
    +496
    +497    elif method == "auto_relative_abundance":
    +498        tic = tic - baseline_detector(tic, rt, max_height, max_prominence)
     499
    -500    This method is based on the convolution of a scaled window with the signal.
    -501    The signal is prepared by introducing reflected copies of the signal 
    -502    (with the window size) in both ends so that transient parts are minimized
    -503    in the begining and end part of the output signal.
    -504
    -505    Parameters
    -506    ----------
    -507    x: array
    -508        the input signal
    -509    window_len: int
    -510        the dimension of the smoothing window; should be an odd integer
    -511    window: str
    -512        the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
    -513    pol_order: int
    -514        the order of the polynomial to fit the data
    -515    implemented_smooth_method: list
    -516        list of implemented smoothing methods
    +500        baseline = baseline_detector(tic, rt, max_height, max_prominence)
    +501
    +502        peak_detect_threshold = np.nanmean(baseline) + (stds * np.std(baseline))
    +503
    +504        remove_indexes = np.where(tic < peak_detect_threshold)[0]
    +505
    +506    elif method == "second_derivative":
    +507        remove_indexes = second_derivative_threshold(
    +508            tic, stds, rt, max_height, max_prominence
    +509        )
    +510
    +511    else:
    +512        NotImplemented(method)
    +513
    +514    peak_height_diff = lambda hi, li: ((tic[hi] - tic[li]) / max_tic) * 100
    +515
    +516    dy = derivate(tic)
     517
    -518    Returns 
    -519    -------
    -520    y: array
    -521        the smoothed signal
    -522    
    -523    Notes:
    -524    -----
    -525    See also: numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    -526    scipy.signal.savgol_filter
    -527
    -528    """
    -529    x= np.array(x)
    +518    include_indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    +519
    +520    final_indexes = sorted(set(include_indexes) - set(remove_indexes))
    +521
    +522    # from matplotlib import pyplot as plt
    +523
    +524    # plt.plot(self.retention_time, tic, color='black')
    +525    # plt.scatter(self.retention_time[remove_indexes], tic[remove_indexes], color='red')
    +526    # plt.scatter(self.retention_time[include_indexes], tic[include_indexes], color='blue')
    +527    # plt.scatter(self.retention_time[final_indexes], tic[final_indexes], color='blue')
    +528
    +529    # plt.show()
     530
    -531    if x.ndim != 1:
    -532        raise ValueError("smooth only accepts 1 dimension arrays.")
    -533
    -534    if x.size < window_len:
    -535        raise ValueError("Input array needs to be bigger than window size")
    -536
    -537    #if window_len < 3:
    -538    #    return x
    +531    for index in final_indexes:
    +532        start_index = find_minima(index, tic, right=False)
    +533        final_index = find_minima(index, tic)
    +534
    +535        if final_index - start_index > min_datapoints:
    +536            # if min( peak_height_diff(index,start_index), peak_height_diff(index,final_index) )> self.chromatogram_settings.peak_min_prominence_percent :
    +537
    +538            yield (start_index, index, final_index)
     539
    -540    if not window in implemented_smooth_method:
    -541        raise ValueError("Window method should be 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
    -542
    -543    s = np.r_[x[window_len - 1:0:-1], x, x[-1:-window_len:-1]]
    +540
    +541def smooth_signal(x, window_len, window, pol_order, implemented_smooth_method):
    +542    """
    +543    Smooth the data using a window with requested size.
     544
    -545    if window == 'savgol':
    -546        
    -547        return savgol_filter(x, window_len, pol_order)
    -548
    +545    This method is based on the convolution of a scaled window with the signal.
    +546    The signal is prepared by introducing reflected copies of the signal
    +547    (with the window size) in both ends so that transient parts are minimized
    +548    in the begining and end part of the output signal.
     549
    -550    elif window == 'boxcar':  # moving average
    -551        
    -552        w = boxcar(window_len)
    -553        
    -554        y = np.convolve(w, s, mode='valid')
    -555
    -556        
    -557    elif window == 'flat':  # moving average
    -558        
    -559        w = np.ones(window_len, 'd')
    -560
    -561        y = np.convolve(w / w.sum(), s, mode='valid')
    -562        
    -563    else:
    -564        
    -565        w = eval(window + '(window_len)')
    -566
    -567        y = np.convolve(w / w.sum(), s, mode='valid')
    -568
    -569    return y[int(window_len / 2 - 1):int(-window_len / 2)]
    -570
    -571def second_derivative_threshold(tic, stds, rt, peak_height_max_percent, peak_max_prominence_percent):
    -572    """
    -573    Second derivative threshold for the TIC data points.
    -574    For LC/GC data processing
    +550    Parameters
    +551    ----------
    +552    x: array
    +553        the input signal
    +554    window_len: int
    +555        the dimension of the smoothing window; should be an odd integer
    +556    window: str
    +557        the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
    +558    pol_order: int
    +559        the order of the polynomial to fit the data
    +560    implemented_smooth_method: list
    +561        list of implemented smoothing methods
    +562
    +563    Returns
    +564    -------
    +565    y: array
    +566        the smoothed signal
    +567
    +568    Notes:
    +569    -----
    +570    See also: numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    +571    scipy.signal.savgol_filter
    +572
    +573    """
    +574    x = np.array(x)
     575
    -576    Parameters
    -577    ----------
    -578    tic : array
    -579        array of data points to find the peaks
    -580    stds : float
    -581        standard deviation
    -582    rt : array
    -583        array of retention time data points
    -584    peak_height_max_percent : float
    -585        maximum height of the peak
    -586    
    -587    Returns
    -588    -------
    -589    array
    -590        array of the indexes of the data points to remove
    -591    
    -592    """
    -593
    -594    dy = derivate(tic)
    -595    
    -596    dydy = derivate(dy)
    -597    dydy = np.hstack((dydy, 0))
    -598    dydy = np.hstack((0, dydy))
    +576    if x.ndim != 1:
    +577        raise ValueError("smooth only accepts 1 dimension arrays.")
    +578
    +579    if x.size < window_len:
    +580        raise ValueError("Input array needs to be bigger than window size")
    +581
    +582    # if window_len < 3:
    +583    #    return x
    +584
    +585    if not window in implemented_smooth_method:
    +586        raise ValueError(
    +587            "Window method should be 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
    +588        )
    +589
    +590    s = np.r_[x[window_len - 1 : 0 : -1], x, x[-1:-window_len:-1]]
    +591
    +592    if window == "savgol":
    +593        return savgol_filter(x, window_len, pol_order)
    +594
    +595    elif window == "boxcar":  # moving average
    +596        w = boxcar(window_len)
    +597
    +598        y = np.convolve(w, s, mode="valid")
     599
    -600    baseline = baseline_detector(dydy, rt, peak_height_max_percent, peak_max_prominence_percent, do_interpolation=False)
    -601    
    -602    threshold_median = np.median(baseline) - (stds * np.std(baseline))
    -603    
    -604    remove_indexes = np.where(dydy > threshold_median)[0]
    -605    
    -606    return remove_indexes
    -607        
    +600    elif window == "flat":  # moving average
    +601        w = np.ones(window_len, "d")
    +602
    +603        y = np.convolve(w / w.sum(), s, mode="valid")
    +604
    +605    else:
    +606        w = eval(window + "(window_len)")
    +607
    +608        y = np.convolve(w / w.sum(), s, mode="valid")
    +609
    +610    return y[int(window_len / 2 - 1) : int(-window_len / 2)]
    +611
    +612
    +613def second_derivative_threshold(
    +614    tic, stds, rt, peak_height_max_percent, peak_max_prominence_percent
    +615):
    +616    """
    +617    Second derivative threshold for the TIC data points.
    +618    For LC/GC data processing
    +619
    +620    Parameters
    +621    ----------
    +622    tic : array
    +623        array of data points to find the peaks
    +624    stds : float
    +625        standard deviation
    +626    rt : array
    +627        array of retention time data points
    +628    peak_height_max_percent : float
    +629        maximum height of the peak
    +630
    +631    Returns
    +632    -------
    +633    array
    +634        array of the indexes of the data points to remove
    +635
    +636    """
    +637
    +638    dy = derivate(tic)
    +639
    +640    dydy = derivate(dy)
    +641    dydy = np.hstack((dydy, 0))
    +642    dydy = np.hstack((0, dydy))
    +643
    +644    baseline = baseline_detector(
    +645        dydy,
    +646        rt,
    +647        peak_height_max_percent,
    +648        peak_max_prominence_percent,
    +649        do_interpolation=False,
    +650    )
    +651
    +652    threshold_median = np.median(baseline) - (stds * np.std(baseline))
    +653
    +654    remove_indexes = np.where(dydy > threshold_median)[0]
    +655
    +656    return remove_indexes
     
    @@ -706,34 +755,33 @@

    -
    15def peak_detector(tic, max_tic): #TODO remove max_tic argument?
    -16    """
    -17    Find peaks by detecting minima in the first derivative of the data
    -18    Used in LC/GC data processing
    -19
    -20    Parameters
    -21    ----------
    -22    tic : array
    -23        array of data points to find the peaks
    -24    max_tic : float
    -25        maximum value of the data points
    -26    
    -27    Returns
    -28    -------
    -29    tuple
    -30        tuple of indexes of the start, apex and final points of the peak
    -31    
    -32    """
    -33    dy = derivate(tic)
    -34
    -35    indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    -36
    -37    for index in indexes:
    -38
    -39        start_index = find_minima(index, tic, right=False)
    -40        final_index = find_minima(index, tic)
    -41
    -42        yield (start_index, index, final_index)
    +            
    12def peak_detector(tic, max_tic):  # TODO remove max_tic argument?
    +13    """
    +14    Find peaks by detecting minima in the first derivative of the data
    +15    Used in LC/GC data processing
    +16
    +17    Parameters
    +18    ----------
    +19    tic : array
    +20        array of data points to find the peaks
    +21    max_tic : float
    +22        maximum value of the data points
    +23
    +24    Returns
    +25    -------
    +26    tuple
    +27        tuple of indexes of the start, apex and final points of the peak
    +28
    +29    """
    +30    dy = derivate(tic)
    +31
    +32    indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    +33
    +34    for index in indexes:
    +35        start_index = find_minima(index, tic, right=False)
    +36        final_index = find_minima(index, tic)
    +37
    +38        yield (start_index, index, final_index)
     
    @@ -769,29 +817,29 @@
    Returns
    -
    44def find_nearest_scan(data, nodes):
    -45    """
    -46    Find nearest data point in a list of nodes (derivated data)
    -47    in LC/GC this is 'scan', in MS this is 'm/z' data point
    -48
    -49    Parameters
    -50    ----------
    -51    data : float
    -52        data point to find the nearest node
    -53    nodes : array
    -54        array of nodes to search for the nearest node
    -55    
    -56    Returns
    -57    -------
    -58    float
    -59        nearest node to the data point
    -60    """
    -61
    -62    array_data = asarray(nodes)
    -63
    -64    scan_index = (abs(array_data - data)).argmin()
    -65
    -66    return nodes[scan_index]
    +            
    41def find_nearest_scan(data, nodes):
    +42    """
    +43    Find nearest data point in a list of nodes (derivated data)
    +44    in LC/GC this is 'scan', in MS this is 'm/z' data point
    +45
    +46    Parameters
    +47    ----------
    +48    data : float
    +49        data point to find the nearest node
    +50    nodes : array
    +51        array of nodes to search for the nearest node
    +52
    +53    Returns
    +54    -------
    +55    float
    +56        nearest node to the data point
    +57    """
    +58
    +59    array_data = asarray(nodes)
    +60
    +61    scan_index = (abs(array_data - data)).argmin()
    +62
    +63    return nodes[scan_index]
     
    @@ -827,45 +875,53 @@
    Returns
    -
     69def check_corrected_abundance(closest_left, closest_right, apex_index, signal, max_signal, signal_threshold, abun_norm):
    - 70    """
    - 71    Check the corrected abundance of the peak
    - 72
    - 73    Parameters
    - 74    ----------
    - 75    closest_left : int
    - 76        index of the closest left node
    - 77    closest_right : int
    - 78        index of the closest right node
    - 79    apex_index : int
    - 80        index of the apex node
    - 81    signal : array
    - 82        array of data points to find the peaks
    - 83    max_signal : float
    - 84        maximum value of the data points
    - 85    signal_threshold : float
    - 86        threshold for the signal
    - 87    abun_norm : float
    - 88        abundance normalization factor
    - 89    
    - 90    Returns
    - 91    -------
    - 92    float
    - 93        corrected abundance of the peak
    - 94    
    - 95
    - 96    """
    - 97    x = [closest_left, closest_right]
    - 98    y = [signal[closest_left], signal[closest_right]]
    - 99    
    -100    pol = polyfit(x, y, 1) #TODO replace with faster method in this file
    -101
    -102    corrected_peak_height = signal[apex_index] - pol(apex_index)
    -103
    -104    if (corrected_peak_height / max_signal) * abun_norm > signal_threshold:
    -105        return corrected_peak_height
    -106    else:
    -107        return False
    +            
     66def check_corrected_abundance(
    + 67    closest_left,
    + 68    closest_right,
    + 69    apex_index,
    + 70    signal,
    + 71    max_signal,
    + 72    signal_threshold,
    + 73    abun_norm,
    + 74):
    + 75    """
    + 76    Check the corrected abundance of the peak
    + 77
    + 78    Parameters
    + 79    ----------
    + 80    closest_left : int
    + 81        index of the closest left node
    + 82    closest_right : int
    + 83        index of the closest right node
    + 84    apex_index : int
    + 85        index of the apex node
    + 86    signal : array
    + 87        array of data points to find the peaks
    + 88    max_signal : float
    + 89        maximum value of the data points
    + 90    signal_threshold : float
    + 91        threshold for the signal
    + 92    abun_norm : float
    + 93        abundance normalization factor
    + 94
    + 95    Returns
    + 96    -------
    + 97    float
    + 98        corrected abundance of the peak
    + 99
    +100
    +101    """
    +102    x = [closest_left, closest_right]
    +103    y = [signal[closest_left], signal[closest_right]]
    +104
    +105    pol = polyfit(x, y, 1)  # TODO replace with faster method in this file
    +106
    +107    corrected_peak_height = signal[apex_index] - pol(apex_index)
    +108
    +109    if (corrected_peak_height / max_signal) * abun_norm > signal_threshold:
    +110        return corrected_peak_height
    +111    else:
    +112        return False
     
    @@ -910,140 +966,164 @@
    Returns
    -
    109def peak_picking_first_derivative(domain, signal, max_height, max_prominence, max_signal, 
    -110                                  min_peak_datapoints,
    -111                                  peak_derivative_threshold,
    -112                                  signal_threshold=0.1, correct_baseline=True, plot_res=False, 
    -113                                  abun_norm=100, check_abundance=False,
    -114                                  apex_indexes = []):
    -115    
    -116    """
    -117    Find peaks by detecting minima in the first derivative of the data
    -118    Used in LC/GC and MS data processing
    -119    Optional baseline correction, then peak apex detection via 1st derivative.
    -120    For each apex the peak datapoints surrounding the apex are determined. 
    -121    Some basic thresholding is applied (signal, number of datapoints, etc). 
    -122
    -123    Parameters
    -124    ----------
    -125    domain : array
    -126        array of data points to find the peaks
    -127    signal : array
    -128        array of data points to find the peaks
    -129    max_height : float
    -130        maximum height of the peak
    -131    max_prominence : float
    -132        maximum prominence of the peak
    -133    max_signal : float
    -134        maximum signal of the peak
    -135    min_peak_datapoints : int
    -136        minimum number of data points in the peak
    -137    peak_derivative_threshold : float
    -138        threshold for the peak derivative
    -139    signal_threshold : float
    -140        threshold for the signal
    -141    correct_baseline : bool
    -142        flag to correct the baseline
    -143    plot_res : bool
    -144        flag to plot the results
    -145    abun_norm : float
    -146        abundance normalization factor
    -147    check_abundance : bool
    -148        flag to check the abundance
    -149    
    -150    
    -151    Returns
    -152    -------
    -153    tuple
    -154        tuple of indexes of the start, apex and final points of the peak
    -155    
    -156    
    -157    """
    -158    if correct_baseline:
    -159        signal = signal - baseline_detector(signal, domain, max_height, max_prominence)
    -160
    -161
    -162    domain = np.array(domain)
    -163    signal = np.array(signal)
    +            
    115def peak_picking_first_derivative(
    +116    domain,
    +117    signal,
    +118    max_height,
    +119    max_prominence,
    +120    max_signal,
    +121    min_peak_datapoints,
    +122    peak_derivative_threshold,
    +123    signal_threshold=0.1,
    +124    correct_baseline=True,
    +125    plot_res=False,
    +126    abun_norm=100,
    +127    check_abundance=False,
    +128    apex_indexes=[],
    +129):
    +130    """
    +131    Find peaks by detecting minima in the first derivative of the data
    +132    Used in LC/GC and MS data processing
    +133    Optional baseline correction, then peak apex detection via 1st derivative.
    +134    For each apex the peak datapoints surrounding the apex are determined.
    +135    Some basic thresholding is applied (signal, number of datapoints, etc).
    +136
    +137    Parameters
    +138    ----------
    +139    domain : array
    +140        array of data points to find the peaks
    +141    signal : array
    +142        array of data points to find the peaks
    +143    max_height : float
    +144        maximum height of the peak
    +145    max_prominence : float
    +146        maximum prominence of the peak
    +147    max_signal : float
    +148        maximum signal of the peak
    +149    min_peak_datapoints : int
    +150        minimum number of data points in the peak
    +151    peak_derivative_threshold : float
    +152        threshold for the peak derivative
    +153    signal_threshold : float
    +154        threshold for the signal
    +155    correct_baseline : bool
    +156        flag to correct the baseline
    +157    plot_res : bool
    +158        flag to plot the results
    +159    abun_norm : float
    +160        abundance normalization factor
    +161    check_abundance : bool
    +162        flag to check the abundance
    +163
     164
    -165    dy = derivate(signal)
    -166    if len(apex_indexes) == 0:
    -167        # Find apexes
    -168        apex_indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    -169    else:
    -170        apex_indexes = np.array(apex_indexes)
    -171    
    -172    if apex_indexes.size and apex_indexes is not None:
    -173            apex_indexes = apex_indexes[signal[apex_indexes]/max_signal >= signal_threshold]
    +165    Returns
    +166    -------
    +167    tuple
    +168        tuple of indexes of the start, apex and final points of the peak
    +169
    +170
    +171    """
    +172    if correct_baseline:
    +173        signal = signal - baseline_detector(signal, domain, max_height, max_prominence)
     174
    -175    signal = signal/max(signal)
    -176    start_peak = []
    -177    end_peak = []
    -178
    -179    pos_dy_threshold = peak_derivative_threshold  #max(dy) * peak_derivative_threshold
    -180    neg_dy_threshold = -peak_derivative_threshold #min(dy) * peak_derivative_threshold
    -181    len_dy = len(dy)
    -182    # take apex_index and move left to find start
    -183    for index in apex_indexes:
    -184        # catch for starting position
    -185        
    -186        if index == 0:
    -187            index_start = index
    -188        else:
    -189            index_start = index - 1
    -190
    -191        # catch for ending position
    -192        if (index  + 1) >= dy.shape[0] :
    -193            index_end = index -1
    -194        else:
    -195            index_end = index + 1
    -196
    -197        # while dy[index_start-1] > 0 and index_start != 0:
    -198        while dy[index_start - 1] > pos_dy_threshold and index_start > 0:
    -199            index_start = index_start - 1
    -200        start_peak.append(index_start)
    -201
    -202        # while dy[index_end] < 0 and index_end != (len(dy) - 1):
    -203        while dy[index_end] < neg_dy_threshold and index_end != (len_dy - 1):
    -204            index_end = index_end + 1
    -205        end_peak.append(index_end)
    -206
    -207    start_peak = array(start_peak)
    -208    end_peak = array(end_peak)
    -209    
    -210
    -211    for apex_index in apex_indexes:
    -212        #index_gt_apex = np.where(end_peak >= apex_index)[0]
    -213        #index_lt_apex = np.where(start_peak <= apex_index)[0]
    -214        index_gt_apex = np.arange(np.searchsorted(end_peak, apex_index),  len(end_peak))
    -215        index_lt_apex = np.arange(0, np.searchsorted(start_peak, apex_index,side='right'))
    +175    domain = np.array(domain)
    +176    signal = np.array(signal)
    +177
    +178    dy = derivate(signal)
    +179    if len(apex_indexes) == 0:
    +180        # Find apexes
    +181        apex_indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    +182    else:
    +183        apex_indexes = np.array(apex_indexes)
    +184
    +185    if apex_indexes.size and apex_indexes is not None:
    +186        apex_indexes = apex_indexes[
    +187            signal[apex_indexes] / max_signal >= signal_threshold
    +188        ]
    +189
    +190    signal = signal / max(signal)
    +191    start_peak = []
    +192    end_peak = []
    +193
    +194    pos_dy_threshold = peak_derivative_threshold  # max(dy) * peak_derivative_threshold
    +195    neg_dy_threshold = -peak_derivative_threshold  # min(dy) * peak_derivative_threshold
    +196    len_dy = len(dy)
    +197    # take apex_index and move left to find start
    +198    for index in apex_indexes:
    +199        # catch for starting position
    +200
    +201        if index == 0:
    +202            index_start = index
    +203        else:
    +204            index_start = index - 1
    +205
    +206        # catch for ending position
    +207        if (index + 1) >= dy.shape[0]:
    +208            index_end = index - 1
    +209        else:
    +210            index_end = index + 1
    +211
    +212        # while dy[index_start-1] > 0 and index_start != 0:
    +213        while dy[index_start - 1] > pos_dy_threshold and index_start > 0:
    +214            index_start = index_start - 1
    +215        start_peak.append(index_start)
     216
    -217        if not index_gt_apex.size == 0 and not index_lt_apex.size == 0:
    -218
    -219            closest_right = find_nearest_scan(apex_index, end_peak[index_gt_apex])
    -220            closest_left = find_nearest_scan(apex_index,  start_peak[index_lt_apex])
    -221            if check_abundance:
    -222                corrected_peak_height = check_corrected_abundance(closest_left, closest_right, apex_index, signal, max_signal, signal_threshold, abun_norm)
    -223            else:
    -224                corrected_peak_height = signal[apex_index]
    -225
    -226            if (closest_right - closest_left) >= min_peak_datapoints:
    -227
    -228                if plot_res:
    -229                    plt.plot(domain[closest_left: closest_right + 1], dy[closest_left:closest_right + 1], c='red')
    -230                    plt.plot(domain[closest_left: closest_right + 1], signal[closest_left:closest_right + 1], c='black')
    -231                    plt.title(str((corrected_peak_height / max_signal) * 100))
    -232                    plt.show()
    -233                
    -234                yield (closest_left, apex_index, closest_right)
    +217        # while dy[index_end] < 0 and index_end != (len(dy) - 1):
    +218        while dy[index_end] < neg_dy_threshold and index_end != (len_dy - 1):
    +219            index_end = index_end + 1
    +220        end_peak.append(index_end)
    +221
    +222    start_peak = array(start_peak)
    +223    end_peak = array(end_peak)
    +224
    +225    for apex_index in apex_indexes:
    +226        # index_gt_apex = np.where(end_peak >= apex_index)[0]
    +227        # index_lt_apex = np.where(start_peak <= apex_index)[0]
    +228        index_gt_apex = np.arange(np.searchsorted(end_peak, apex_index), len(end_peak))
    +229        index_lt_apex = np.arange(
    +230            0, np.searchsorted(start_peak, apex_index, side="right")
    +231        )
    +232
    +233        if not index_gt_apex.size == 0 and not index_lt_apex.size == 0:
    +234            closest_right = find_nearest_scan(apex_index, end_peak[index_gt_apex])
    +235            closest_left = find_nearest_scan(apex_index, start_peak[index_lt_apex])
    +236            if check_abundance:
    +237                corrected_peak_height = check_corrected_abundance(
    +238                    closest_left,
    +239                    closest_right,
    +240                    apex_index,
    +241                    signal,
    +242                    max_signal,
    +243                    signal_threshold,
    +244                    abun_norm,
    +245                )
    +246            else:
    +247                corrected_peak_height = signal[apex_index]
    +248
    +249            if (closest_right - closest_left) >= min_peak_datapoints:
    +250                if plot_res:
    +251                    plt.plot(
    +252                        domain[closest_left : closest_right + 1],
    +253                        dy[closest_left : closest_right + 1],
    +254                        c="red",
    +255                    )
    +256                    plt.plot(
    +257                        domain[closest_left : closest_right + 1],
    +258                        signal[closest_left : closest_right + 1],
    +259                        c="black",
    +260                    )
    +261                    plt.title(str((corrected_peak_height / max_signal) * 100))
    +262                    plt.show()
    +263
    +264                yield (closest_left, apex_index, closest_right)
     

    Find peaks by detecting minima in the first derivative of the data Used in LC/GC and MS data processing Optional baseline correction, then peak apex detection via 1st derivative. -For each apex the peak datapoints surrounding the apex are determined. -Some basic thresholding is applied (signal, number of datapoints, etc).

    +For each apex the peak datapoints surrounding the apex are determined. +Some basic thresholding is applied (signal, number of datapoints, etc).

    Parameters
    @@ -1094,48 +1174,51 @@
    Returns
    -
    238def find_minima(index, tic, right=True):
    -239    """
    -240    Find the index of the local minima in the given time-of-flight (TOF) intensity array.
    -241
    -242    Parameters:
    -243    -----------
    -244    index: int 
    -245        The starting index to search for the minima.
    -246    tic: list
    -247        TIC data points
    -248    right : bool, optional
    -249        Determines the direction of the search. If True, search to the right of the index. If False, search to the left of the index. Default is True.
    -250
    -251    Returns:
    -252    --------
    -253    int
    -254        The index of the local minima in the TIC  array.
    -255    """
    -256            
    -257    j = index
    -258    #apex_abundance = tic[index]
    -259    tic_len = len(tic)
    -260
    -261    if right: minima = tic[j] >= tic[j+1]
    -262    else: minima = tic[j] >= tic[j-1]
    -263
    -264    while minima:
    -265        
    -266        if j == 1 or j == tic_len -2:
    -267            break
    -268        
    -269        if right: 
    -270            j += 1
    -271
    -272            minima = tic[j] >= tic[j+1]
    -273
    -274        else: 
    -275            j -= 1
    -276            minima = tic[j] >= tic[j-1]
    -277
    -278    if right: return j
    -279    else: return j
    +            
    267def find_minima(index, tic, right=True):
    +268    """
    +269    Find the index of the local minima in the given time-of-flight (TOF) intensity array.
    +270
    +271    Parameters:
    +272    -----------
    +273    index: int
    +274        The starting index to search for the minima.
    +275    tic: list
    +276        TIC data points
    +277    right : bool, optional
    +278        Determines the direction of the search. If True, search to the right of the index. If False, search to the left of the index. Default is True.
    +279
    +280    Returns:
    +281    --------
    +282    int
    +283        The index of the local minima in the TIC  array.
    +284    """
    +285
    +286    j = index
    +287    # apex_abundance = tic[index]
    +288    tic_len = len(tic)
    +289
    +290    if right:
    +291        minima = tic[j] >= tic[j + 1]
    +292    else:
    +293        minima = tic[j] >= tic[j - 1]
    +294
    +295    while minima:
    +296        if j == 1 or j == tic_len - 2:
    +297            break
    +298
    +299        if right:
    +300            j += 1
    +301
    +302            minima = tic[j] >= tic[j + 1]
    +303
    +304        else:
    +305            j -= 1
    +306            minima = tic[j] >= tic[j - 1]
    +307
    +308    if right:
    +309        return j
    +310    else:
    +311        return j
     
    @@ -1143,7 +1226,7 @@
    Returns

    Parameters:

    -

    index: int +

    index: int The starting index to search for the minima. tic: list TIC data points @@ -1169,38 +1252,37 @@

    Returns:

    -
    281def derivate(data_array):
    -282    """
    -283    Calculate derivative of the data points. 
    -284    Replaces nan with infinity
    -285
    -286    Parameters
    -287    ----------
    -288    data_array : array
    -289        array of data points
    -290    
    -291    Returns
    -292    -------
    -293    array
    -294        array of the derivative of the data points
    -295    """
    -296    data_array = np.array(data_array)
    -297
    -298    dy = data_array[1:] - data_array[:-1]
    -299
    -300    #replaces nan for infinity
    -301    indices_nan = np.where(np.isnan(data_array))[0]
    -302
    -303    if indices_nan.size:
    -304
    -305        data_array[indices_nan] = np.inf
    -306        dy[np.where(np.isnan(dy))[0]] = np.inf
    -307
    -308    return dy
    +            
    314def derivate(data_array):
    +315    """
    +316    Calculate derivative of the data points.
    +317    Replaces nan with infinity
    +318
    +319    Parameters
    +320    ----------
    +321    data_array : array
    +322        array of data points
    +323
    +324    Returns
    +325    -------
    +326    array
    +327        array of the derivative of the data points
    +328    """
    +329    data_array = np.array(data_array)
    +330
    +331    dy = data_array[1:] - data_array[:-1]
    +332
    +333    # replaces nan for infinity
    +334    indices_nan = np.where(np.isnan(data_array))[0]
    +335
    +336    if indices_nan.size:
    +337        data_array[indices_nan] = np.inf
    +338        dy[np.where(np.isnan(dy))[0]] = np.inf
    +339
    +340    return dy
     
    -

    Calculate derivative of the data points. +

    Calculate derivative of the data points. Replaces nan with infinity

    Parameters
    @@ -1230,38 +1312,38 @@
    Returns
    -
    310def minima_detector(tic, max_tic, peak_height_max_percent, peak_max_prominence_percent):
    -311    """
    -312    Minima detector for the TIC data points.
    -313
    -314    Parameters
    -315    ----------
    -316    tic : array
    -317        array of data points to find the peaks  
    -318    max_tic : float
    -319        maximum value of the data points
    -320    peak_height_max_percent : float
    -321        maximum height of the peak
    -322    peak_max_prominence_percent : float
    -323        maximum prominence of the peak
    -324    
    -325    Returns
    -326    -------
    -327    generator
    -328        generator of the indexes of the minima in the TIC array
    -329    
    -330    """
    -331    peak_height_diff = lambda hi, li : ((tic[hi] - tic[li]) / max_tic )*100
    -332
    -333    for start_index, index, final_index in peak_detector(tic, max_tic):
    -334
    -335        # abundance max threshold    
    -336        if (tic[index] / max_tic) * 100 < peak_height_max_percent:
    -337
    -338            # calculates prominence and filter   
    -339            if peak_height_diff(index, start_index) and peak_height_diff(index, final_index) < peak_max_prominence_percent:
    -340                
    -341                    yield from (start_index, final_index)
    +            
    343def minima_detector(tic, max_tic, peak_height_max_percent, peak_max_prominence_percent):
    +344    """
    +345    Minima detector for the TIC data points.
    +346
    +347    Parameters
    +348    ----------
    +349    tic : array
    +350        array of data points to find the peaks
    +351    max_tic : float
    +352        maximum value of the data points
    +353    peak_height_max_percent : float
    +354        maximum height of the peak
    +355    peak_max_prominence_percent : float
    +356        maximum prominence of the peak
    +357
    +358    Returns
    +359    -------
    +360    generator
    +361        generator of the indexes of the minima in the TIC array
    +362
    +363    """
    +364    peak_height_diff = lambda hi, li: ((tic[hi] - tic[li]) / max_tic) * 100
    +365
    +366    for start_index, index, final_index in peak_detector(tic, max_tic):
    +367        # abundance max threshold
    +368        if (tic[index] / max_tic) * 100 < peak_height_max_percent:
    +369            # calculates prominence and filter
    +370            if (
    +371                peak_height_diff(index, start_index)
    +372                and peak_height_diff(index, final_index) < peak_max_prominence_percent
    +373            ):
    +374                yield from (start_index, final_index)
     
    @@ -1300,72 +1382,81 @@
    Returns
    -
    343def baseline_detector(tic, rt, peak_height_max_percent, peak_max_prominence_percent, do_interpolation=True):
    -344    """
    -345    Baseline detector for the TIC data points.
    -346    For LC/GC data processing
    -347
    -348    Parameters
    -349    ----------
    -350    tic : array
    -351        array of data points to find the peaks
    -352    rt : array
    -353        array of retention time data points
    -354    peak_height_max_percent : float
    -355        maximum height of the peak
    -356    peak_max_prominence_percent : float
    -357        maximum prominence of the peak
    -358    do_interpolation : bool, optional
    -359        flag to interpolate the data points. Default is True
    -360    
    -361    Returns
    -362    ------- 
    -363    array
    -364        array of the baseline corrected data points
    -365           
    -366    """
    -367    rt = np.array(rt)
    -368
    -369    max_tic = max(tic)
    -370
    -371    indexes = sorted(list(set(i for i in minima_detector(tic, max_tic, peak_height_max_percent, peak_max_prominence_percent))))
    -372
    -373    y = -tic
    -374
    -375    x1 = rt[indexes]
    -376
    -377    y1 = y[indexes]
    -378
    -379    if len(x1) <= 5:
    -380        return tic
    -381
    -382    if not do_interpolation:
    -383  
    -384        y0 = np.zeros(tic.shape)
    -385        y0[indexes] = y[indexes]
    -386
    -387        return y0
    -388
    -389    else:
    -390
    -391        f1 = interpolate.interp1d(x1, y1, kind='quadratic',fill_value="extrapolate")
    -392
    -393        ynew1 = f1(list(rt))
    -394
    -395        # from matplotlib import pyplot as plt   
    -396        # if self.deconv_rt_list and  self.deconv_mz == 51:
    -397
    -398        #   plt.plot(rt, tic-(-1* ynew1), color='green')
    -399
    -400        # plt.plot(rt, -1* ynew1, c='black')
    +            
    377def baseline_detector(
    +378    tic, rt, peak_height_max_percent, peak_max_prominence_percent, do_interpolation=True
    +379):
    +380    """
    +381    Baseline detector for the TIC data points.
    +382    For LC/GC data processing
    +383
    +384    Parameters
    +385    ----------
    +386    tic : array
    +387        array of data points to find the peaks
    +388    rt : array
    +389        array of retention time data points
    +390    peak_height_max_percent : float
    +391        maximum height of the peak
    +392    peak_max_prominence_percent : float
    +393        maximum prominence of the peak
    +394    do_interpolation : bool, optional
    +395        flag to interpolate the data points. Default is True
    +396
    +397    Returns
    +398    -------
    +399    array
    +400        array of the baseline corrected data points
     401
    -402        # s = self.smooth(s, 10, 'blackman')
    -403
    -404        # plt.plot(self.retention_time, -s)
    -405
    -406        # plt.show()
    -407
    -408        return -1 * ynew1
    +402    """
    +403    rt = np.array(rt)
    +404
    +405    max_tic = max(tic)
    +406
    +407    indexes = sorted(
    +408        list(
    +409            set(
    +410                i
    +411                for i in minima_detector(
    +412                    tic, max_tic, peak_height_max_percent, peak_max_prominence_percent
    +413                )
    +414            )
    +415        )
    +416    )
    +417
    +418    y = -tic
    +419
    +420    x1 = rt[indexes]
    +421
    +422    y1 = y[indexes]
    +423
    +424    if len(x1) <= 5:
    +425        return tic
    +426
    +427    if not do_interpolation:
    +428        y0 = np.zeros(tic.shape)
    +429        y0[indexes] = y[indexes]
    +430
    +431        return y0
    +432
    +433    else:
    +434        f1 = interpolate.interp1d(x1, y1, kind="quadratic", fill_value="extrapolate")
    +435
    +436        ynew1 = f1(list(rt))
    +437
    +438        # from matplotlib import pyplot as plt
    +439        # if self.deconv_rt_list and  self.deconv_mz == 51:
    +440
    +441        #   plt.plot(rt, tic-(-1* ynew1), color='green')
    +442
    +443        # plt.plot(rt, -1* ynew1, c='black')
    +444
    +445        # s = self.smooth(s, 10, 'blackman')
    +446
    +447        # plt.plot(self.retention_time, -s)
    +448
    +449        # plt.show()
    +450
    +451        return -1 * ynew1
     
    @@ -1385,8 +1476,11 @@
    Parameters
    maximum prominence of the peak
  • do_interpolation (bool, optional): flag to interpolate the data points. Default is True
  • -
  • Returns
  • -
  • -------
  • + + +
    Returns
    + +
    • array: array of the baseline corrected data points
    @@ -1404,94 +1498,92 @@
    Parameters
    -
    410def peak_detector_generator(tic, stds, method, rt, max_height, min_height, max_prominence, min_datapoints):
    -411    """
    -412    Peak detector generator for the TIC data points.
    -413
    -414    Parameters
    -415    ----------
    -416    tic : array
    -417        array of data points to find the peaks
    -418    stds : float
    -419        standard deviation
    -420    method : str
    -421        method to detect the peaks
    -422        Available methods: 'manual_relative_abundance', 'auto_relative_abundance', 'second_derivative'
    -423    rt : array
    -424        array of retention time data points
    -425    max_height : float
    -426        maximum height of the peak
    -427    min_height : float
    -428        minimum height of the peak
    -429    max_prominence : float
    -430        maximum prominence of the peak
    -431    min_datapoints : int
    -432        minimum number of data points in the peak
    -433
    -434    Returns
    -435    -------
    -436    generator
    -437        generator of the indexes of the peaks in the TIC array
    -438
    -439    """
    -440    max_tic = max(tic)
    -441
    -442    if method == 'manual_relative_abundance':
    -443
    -444        tic = tic - baseline_detector(tic, rt, max_height, max_prominence)
    -445
    -446        norm_tic = (tic / max_tic) * 100
    -447
    -448        remove_indexes = np.where(norm_tic < min_height)[0]
    -449
    -450        # if self.deconv_rt_list and  self.deconv_mz == 51:
    -451        #    plt.plot(self.deconv_rt_list, tic, label=self.deconv_mz)
    -452
    -453    elif method == 'auto_relative_abundance':
    -454
    -455        tic = tic - baseline_detector(tic, rt, max_height, max_prominence)
    -456
    -457        baseline = baseline_detector(tic, rt, max_height, max_prominence)
    -458
    -459        peak_detect_threshold = ((np.nanmean(baseline) + (stds * np.std(baseline))))
    -460
    -461        remove_indexes = np.where(tic < peak_detect_threshold)[0]
    -462
    -463    elif method == 'second_derivative':
    -464
    -465        remove_indexes = second_derivative_threshold(tic, stds, rt, max_height, max_prominence)
    -466
    -467    else:
    -468
    -469        NotImplemented(method)
    -470
    -471    peak_height_diff = lambda hi, li : ((tic[hi] - tic[li]) / max_tic )*100
    -472    
    -473    dy = derivate(tic)
    -474
    -475    include_indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    -476
    -477    final_indexes = sorted(set(include_indexes)-set(remove_indexes))
    -478
    -479    #from matplotlib import pyplot as plt   
    -480    
    -481    #plt.plot(self.retention_time, tic, color='black')
    -482    #plt.scatter(self.retention_time[remove_indexes], tic[remove_indexes], color='red')
    -483    #plt.scatter(self.retention_time[include_indexes], tic[include_indexes], color='blue')
    -484    #plt.scatter(self.retention_time[final_indexes], tic[final_indexes], color='blue')
    -485    
    -486    #plt.show()
    +            
    454def peak_detector_generator(
    +455    tic, stds, method, rt, max_height, min_height, max_prominence, min_datapoints
    +456):
    +457    """
    +458    Peak detector generator for the TIC data points.
    +459
    +460    Parameters
    +461    ----------
    +462    tic : array
    +463        array of data points to find the peaks
    +464    stds : float
    +465        standard deviation
    +466    method : str
    +467        method to detect the peaks
    +468        Available methods: 'manual_relative_abundance', 'auto_relative_abundance', 'second_derivative'
    +469    rt : array
    +470        array of retention time data points
    +471    max_height : float
    +472        maximum height of the peak
    +473    min_height : float
    +474        minimum height of the peak
    +475    max_prominence : float
    +476        maximum prominence of the peak
    +477    min_datapoints : int
    +478        minimum number of data points in the peak
    +479
    +480    Returns
    +481    -------
    +482    generator
    +483        generator of the indexes of the peaks in the TIC array
    +484
    +485    """
    +486    max_tic = max(tic)
     487
    -488    for index in final_indexes:
    -489            
    -490        start_index = find_minima(index, tic, right=False)
    -491        final_index = find_minima(index, tic)
    -492        
    -493        if final_index-start_index > min_datapoints:
    +488    if method == "manual_relative_abundance":
    +489        tic = tic - baseline_detector(tic, rt, max_height, max_prominence)
    +490
    +491        norm_tic = (tic / max_tic) * 100
    +492
    +493        remove_indexes = np.where(norm_tic < min_height)[0]
     494
    -495            #if min( peak_height_diff(index,start_index), peak_height_diff(index,final_index) )> self.chromatogram_settings.peak_min_prominence_percent :   
    -496                
    -497                yield (start_index, index, final_index)
    +495        # if self.deconv_rt_list and  self.deconv_mz == 51:
    +496        #    plt.plot(self.deconv_rt_list, tic, label=self.deconv_mz)
    +497
    +498    elif method == "auto_relative_abundance":
    +499        tic = tic - baseline_detector(tic, rt, max_height, max_prominence)
    +500
    +501        baseline = baseline_detector(tic, rt, max_height, max_prominence)
    +502
    +503        peak_detect_threshold = np.nanmean(baseline) + (stds * np.std(baseline))
    +504
    +505        remove_indexes = np.where(tic < peak_detect_threshold)[0]
    +506
    +507    elif method == "second_derivative":
    +508        remove_indexes = second_derivative_threshold(
    +509            tic, stds, rt, max_height, max_prominence
    +510        )
    +511
    +512    else:
    +513        NotImplemented(method)
    +514
    +515    peak_height_diff = lambda hi, li: ((tic[hi] - tic[li]) / max_tic) * 100
    +516
    +517    dy = derivate(tic)
    +518
    +519    include_indexes = np.where((np.hstack((dy, 0)) < 0) & (np.hstack((0, dy)) > 0))[0]
    +520
    +521    final_indexes = sorted(set(include_indexes) - set(remove_indexes))
    +522
    +523    # from matplotlib import pyplot as plt
    +524
    +525    # plt.plot(self.retention_time, tic, color='black')
    +526    # plt.scatter(self.retention_time[remove_indexes], tic[remove_indexes], color='red')
    +527    # plt.scatter(self.retention_time[include_indexes], tic[include_indexes], color='blue')
    +528    # plt.scatter(self.retention_time[final_indexes], tic[final_indexes], color='blue')
    +529
    +530    # plt.show()
    +531
    +532    for index in final_indexes:
    +533        start_index = find_minima(index, tic, right=False)
    +534        final_index = find_minima(index, tic)
    +535
    +536        if final_index - start_index > min_datapoints:
    +537            # if min( peak_height_diff(index,start_index), peak_height_diff(index,final_index) )> self.chromatogram_settings.peak_min_prominence_percent :
    +538
    +539            yield (start_index, index, final_index)
     
    @@ -1539,87 +1631,83 @@
    Returns
    -
    499def smooth_signal(x, window_len, window, pol_order, implemented_smooth_method):
    -500    """
    -501    Smooth the data using a window with requested size.
    -502
    -503    This method is based on the convolution of a scaled window with the signal.
    -504    The signal is prepared by introducing reflected copies of the signal 
    -505    (with the window size) in both ends so that transient parts are minimized
    -506    in the begining and end part of the output signal.
    -507
    -508    Parameters
    -509    ----------
    -510    x: array
    -511        the input signal
    -512    window_len: int
    -513        the dimension of the smoothing window; should be an odd integer
    -514    window: str
    -515        the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
    -516    pol_order: int
    -517        the order of the polynomial to fit the data
    -518    implemented_smooth_method: list
    -519        list of implemented smoothing methods
    -520
    -521    Returns 
    -522    -------
    -523    y: array
    -524        the smoothed signal
    -525    
    -526    Notes:
    -527    -----
    -528    See also: numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    -529    scipy.signal.savgol_filter
    -530
    -531    """
    -532    x= np.array(x)
    -533
    -534    if x.ndim != 1:
    -535        raise ValueError("smooth only accepts 1 dimension arrays.")
    -536
    -537    if x.size < window_len:
    -538        raise ValueError("Input array needs to be bigger than window size")
    -539
    -540    #if window_len < 3:
    -541    #    return x
    -542
    -543    if not window in implemented_smooth_method:
    -544        raise ValueError("Window method should be 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'")
    +            
    542def smooth_signal(x, window_len, window, pol_order, implemented_smooth_method):
    +543    """
    +544    Smooth the data using a window with requested size.
     545
    -546    s = np.r_[x[window_len - 1:0:-1], x, x[-1:-window_len:-1]]
    -547
    -548    if window == 'savgol':
    -549        
    -550        return savgol_filter(x, window_len, pol_order)
    -551
    -552
    -553    elif window == 'boxcar':  # moving average
    -554        
    -555        w = boxcar(window_len)
    -556        
    -557        y = np.convolve(w, s, mode='valid')
    -558
    -559        
    -560    elif window == 'flat':  # moving average
    -561        
    -562        w = np.ones(window_len, 'd')
    +546    This method is based on the convolution of a scaled window with the signal.
    +547    The signal is prepared by introducing reflected copies of the signal
    +548    (with the window size) in both ends so that transient parts are minimized
    +549    in the begining and end part of the output signal.
    +550
    +551    Parameters
    +552    ----------
    +553    x: array
    +554        the input signal
    +555    window_len: int
    +556        the dimension of the smoothing window; should be an odd integer
    +557    window: str
    +558        the type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
    +559    pol_order: int
    +560        the order of the polynomial to fit the data
    +561    implemented_smooth_method: list
    +562        list of implemented smoothing methods
     563
    -564        y = np.convolve(w / w.sum(), s, mode='valid')
    -565        
    -566    else:
    -567        
    -568        w = eval(window + '(window_len)')
    -569
    -570        y = np.convolve(w / w.sum(), s, mode='valid')
    -571
    -572    return y[int(window_len / 2 - 1):int(-window_len / 2)]
    +564    Returns
    +565    -------
    +566    y: array
    +567        the smoothed signal
    +568
    +569    Notes:
    +570    -----
    +571    See also: numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve
    +572    scipy.signal.savgol_filter
    +573
    +574    """
    +575    x = np.array(x)
    +576
    +577    if x.ndim != 1:
    +578        raise ValueError("smooth only accepts 1 dimension arrays.")
    +579
    +580    if x.size < window_len:
    +581        raise ValueError("Input array needs to be bigger than window size")
    +582
    +583    # if window_len < 3:
    +584    #    return x
    +585
    +586    if not window in implemented_smooth_method:
    +587        raise ValueError(
    +588            "Window method should be 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"
    +589        )
    +590
    +591    s = np.r_[x[window_len - 1 : 0 : -1], x, x[-1:-window_len:-1]]
    +592
    +593    if window == "savgol":
    +594        return savgol_filter(x, window_len, pol_order)
    +595
    +596    elif window == "boxcar":  # moving average
    +597        w = boxcar(window_len)
    +598
    +599        y = np.convolve(w, s, mode="valid")
    +600
    +601    elif window == "flat":  # moving average
    +602        w = np.ones(window_len, "d")
    +603
    +604        y = np.convolve(w / w.sum(), s, mode="valid")
    +605
    +606    else:
    +607        w = eval(window + "(window_len)")
    +608
    +609        y = np.convolve(w / w.sum(), s, mode="valid")
    +610
    +611    return y[int(window_len / 2 - 1) : int(-window_len / 2)]
     

    Smooth the data using a window with requested size.

    This method is based on the convolution of a scaled window with the signal. -The signal is prepared by introducing reflected copies of the signal +The signal is prepared by introducing reflected copies of the signal (with the window size) in both ends so that transient parts are minimized in the begining and end part of the output signal.

    @@ -1640,13 +1728,14 @@
    Parameters
    Returns
    -

    y: array - the smoothed signal

    - -

    Notes:

    - -

    See also: numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve -scipy.signal.savgol_filter

    +
      +
    • y (array): +the smoothed signal
    • +
    • Notes:
    • +
    • -----
    • +
    • See also (numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve):

    • +
    • scipy.signal.savgol_filter

    • +
    @@ -1662,42 +1751,50 @@

    Notes:

    -
    574def second_derivative_threshold(tic, stds, rt, peak_height_max_percent, peak_max_prominence_percent):
    -575    """
    -576    Second derivative threshold for the TIC data points.
    -577    For LC/GC data processing
    -578
    -579    Parameters
    -580    ----------
    -581    tic : array
    -582        array of data points to find the peaks
    -583    stds : float
    -584        standard deviation
    -585    rt : array
    -586        array of retention time data points
    -587    peak_height_max_percent : float
    -588        maximum height of the peak
    -589    
    -590    Returns
    -591    -------
    -592    array
    -593        array of the indexes of the data points to remove
    -594    
    -595    """
    -596
    -597    dy = derivate(tic)
    -598    
    -599    dydy = derivate(dy)
    -600    dydy = np.hstack((dydy, 0))
    -601    dydy = np.hstack((0, dydy))
    -602
    -603    baseline = baseline_detector(dydy, rt, peak_height_max_percent, peak_max_prominence_percent, do_interpolation=False)
    -604    
    -605    threshold_median = np.median(baseline) - (stds * np.std(baseline))
    -606    
    -607    remove_indexes = np.where(dydy > threshold_median)[0]
    -608    
    -609    return remove_indexes
    +            
    614def second_derivative_threshold(
    +615    tic, stds, rt, peak_height_max_percent, peak_max_prominence_percent
    +616):
    +617    """
    +618    Second derivative threshold for the TIC data points.
    +619    For LC/GC data processing
    +620
    +621    Parameters
    +622    ----------
    +623    tic : array
    +624        array of data points to find the peaks
    +625    stds : float
    +626        standard deviation
    +627    rt : array
    +628        array of retention time data points
    +629    peak_height_max_percent : float
    +630        maximum height of the peak
    +631
    +632    Returns
    +633    -------
    +634    array
    +635        array of the indexes of the data points to remove
    +636
    +637    """
    +638
    +639    dy = derivate(tic)
    +640
    +641    dydy = derivate(dy)
    +642    dydy = np.hstack((dydy, 0))
    +643    dydy = np.hstack((0, dydy))
    +644
    +645    baseline = baseline_detector(
    +646        dydy,
    +647        rt,
    +648        peak_height_max_percent,
    +649        peak_max_prominence_percent,
    +650        do_interpolation=False,
    +651    )
    +652
    +653    threshold_median = np.median(baseline) - (stds * np.std(baseline))
    +654
    +655    remove_indexes = np.where(dydy > threshold_median)[0]
    +656
    +657    return remove_indexes
     
    diff --git a/docs/corems/mass_spectra/factory/GC_Class.html b/docs/corems/mass_spectra/factory/GC_Class.html index 167c79df..1e0f361d 100644 --- a/docs/corems/mass_spectra/factory/GC_Class.html +++ b/docs/corems/mass_spectra/factory/GC_Class.html @@ -252,156 +252,156 @@

    67 * plot_gc_peaks(ax=None, color='red'). Plot the GC peaks. 68 """ 69 - 70 def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown', sample_name=None): - 71 - 72 if isinstance(file_location, str): - 73 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed - 74 file_location = Path(file_location) - 75 - 76 if not file_location.exists(): - 77 - 78 raise FileExistsError("File does not exist: " + str(file_location)) - 79 - 80 self.file_location = file_location - 81 - 82 if sample_name: - 83 self.sample_name = sample_name - 84 else: - 85 self.sample_name = file_location.stem - 86 - 87 self.analyzer = analyzer - 88 self.instrument_label = instrument_label - 89 self._init_settings() + 70 def __init__( + 71 self, + 72 file_location, + 73 analyzer="Unknown", + 74 instrument_label="Unknown", + 75 sample_name=None, + 76 ): + 77 if isinstance(file_location, str): + 78 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed + 79 file_location = Path(file_location) + 80 + 81 if not file_location.exists(): + 82 raise FileExistsError("File does not exist: " + str(file_location)) + 83 + 84 self.file_location = file_location + 85 + 86 if sample_name: + 87 self.sample_name = sample_name + 88 else: + 89 self.sample_name = file_location.stem 90 - 91 self._retention_time_list = [] - 92 self._scans_number_list = [] - 93 self._tic_list = [] + 91 self.analyzer = analyzer + 92 self.instrument_label = instrument_label + 93 self._init_settings() 94 - 95 # all scans - 96 self._ms = {} - 97 - 98 # after peak detection - 99 self._processed_tic = [] -100 self.gcpeaks = [] + 95 self._retention_time_list = [] + 96 self._scans_number_list = [] + 97 self._tic_list = [] + 98 + 99 # all scans +100 self._ms = {} 101 -102 self.ri_pairs_ref = None -103 self.cal_file_path = None -104 -105 def _init_settings(self): -106 """Initialize the settings for GC_Class. -107 -108 This method initializes the settings for the GC_Class object using the GCMSParameters class. -109 """ -110 self._parameters = GCMSParameters() +102 # after peak detection +103 self._processed_tic = [] +104 self.gcpeaks = [] +105 +106 self.ri_pairs_ref = None +107 self.cal_file_path = None +108 +109 def _init_settings(self): +110 """Initialize the settings for GC_Class. 111 -112 def __len__(self): -113 """Return the number of GC peaks in the GC_Class object.""" -114 return len(self.gcpeaks) +112 This method initializes the settings for the GC_Class object using the GCMSParameters class. +113 """ +114 self._parameters = GCMSParameters() 115 -116 def __getitem__(self, scan_number) -> GCPeak: -117 """Return the GCPeak with the given scan number.""" -118 return self.gcpeaks[scan_number] +116 def __len__(self): +117 """Return the number of GC peaks in the GC_Class object.""" +118 return len(self.gcpeaks) 119 -120 # def __iter__(self): -121 -122 # return iter(self.gcpeaks.values()) +120 def __getitem__(self, scan_number) -> GCPeak: +121 """Return the GCPeak with the given scan number.""" +122 return self.gcpeaks[scan_number] 123 -124 def process_chromatogram(self, plot_res=False): -125 """Process the chromatogram. -126 -127 This method processes the chromatogram. -128 -129 Parameters -130 ---------- -131 plot_res : bool, optional -132 If True, plot the results. Defaults to False. -133 """ -134 -135 # tic = self.tic - self.baseline_detector(self.tic) -136 -137 self._processed_tic = self.smooth_tic(self.tic) +124 # def __iter__(self): +125 +126 # return iter(self.gcpeaks.values()) +127 +128 def process_chromatogram(self, plot_res=False): +129 """Process the chromatogram. +130 +131 This method processes the chromatogram. +132 +133 Parameters +134 ---------- +135 plot_res : bool, optional +136 If True, plot the results. Defaults to False. +137 """ 138 -139 for index, tic in enumerate(self._processed_tic): +139 # tic = self.tic - self.baseline_detector(self.tic) 140 -141 self._ms[index]._processed_tic = tic +141 self._processed_tic = self.smooth_tic(self.tic) 142 -143 # self.second_derivative_threshold(self._processed_tic) -144 -145 if self.chromatogram_settings.use_deconvolution: -146 -147 self.run_deconvolution(plot_res=False) -148 -149 else: +143 for index, tic in enumerate(self._processed_tic): +144 self._ms[index]._processed_tic = tic +145 +146 # self.second_derivative_threshold(self._processed_tic) +147 +148 if self.chromatogram_settings.use_deconvolution: +149 self.run_deconvolution(plot_res=False) 150 -151 peaks_index = self.centroid_detector(self._processed_tic, self.retention_time) -152 -153 for i in peaks_index: -154 -155 apex_index = i[1] -156 -157 gc_peak = GCPeak(self, self._ms[apex_index], i ) +151 else: +152 peaks_index = self.centroid_detector( +153 self._processed_tic, self.retention_time +154 ) +155 +156 for i in peaks_index: +157 apex_index = i[1] 158 -159 gc_peak.calc_area(self._processed_tic, 1) +159 gc_peak = GCPeak(self, self._ms[apex_index], i) 160 -161 self.gcpeaks.append(gc_peak) +161 gc_peak.calc_area(self._processed_tic, 1) 162 -163 # self.gcpeaks[self.scans_number[apex_index]] = gc_peak +163 self.gcpeaks.append(gc_peak) 164 -165 def add_mass_spectrum(self, mass_spec): -166 """Add a mass spectrum to the GC-MS object. -167 -168 This method adds a mass spectrum to the GC-MS object. +165 # self.gcpeaks[self.scans_number[apex_index]] = gc_peak +166 +167 def add_mass_spectrum(self, mass_spec): +168 """Add a mass spectrum to the GC-MS object. 169 -170 Parameters -171 ---------- -172 mass_spec : MassSpectrum -173 Mass spectrum to be added. -174 """ -175 -176 self._ms[mass_spec.scan_number] = mass_spec +170 This method adds a mass spectrum to the GC-MS object. +171 +172 Parameters +173 ---------- +174 mass_spec : MassSpectrum +175 Mass spectrum to be added. +176 """ 177 -178 def set_tic_list_from_data(self): -179 """Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.""" -180 -181 self.tic = [self._ms.get(i).tic for i in self.scans_number] +178 self._ms[mass_spec.scan_number] = mass_spec +179 +180 def set_tic_list_from_data(self): +181 """Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.""" 182 -183 # self.set_tic_list([self._ms.get(i).get_sumed_signal_to_noise() for i in self.get_scans_number()]) +183 self.tic = [self._ms.get(i).tic for i in self.scans_number] 184 -185 def set_retention_time_from_data(self): -186 """Set the retention time list from the mass spectra data within the GC-MS data object.""" -187 -188 retention_time_list = [] +185 # self.set_tic_list([self._ms.get(i).get_sumed_signal_to_noise() for i in self.get_scans_number()]) +186 +187 def set_retention_time_from_data(self): +188 """Set the retention time list from the mass spectra data within the GC-MS data object.""" 189 -190 for key_ms in sorted(self._ms.keys()): +190 retention_time_list = [] 191 -192 retention_time_list.append(self._ms.get(key_ms).retention_time) -193 -194 self.retention_time = retention_time_list -195 -196 # self.set_retention_time_list(sorted(self._ms.keys())) -197 -198 def set_scans_number_from_data(self): -199 """Set the scan number list from the mass spectra data within the GC-MS data object.""" -200 -201 self.scans_number = sorted(self._ms.keys()) -202 -203 @property -204 def parameter(self): -205 """GCMS Parameters""" -206 return self._parameters -207 -208 @parameter.setter -209 def parameter(self, gcms_parameters_instance): -210 self._parameters = gcms_parameters_instance -211 -212 @property -213 def molecular_search_settings(self): -214 """Molecular Search Settings""" -215 return self.parameter.molecular_search -216 -217 @molecular_search_settings.setter -218 def molecular_search_settings(self, settings_class_instance): -219 +192 for key_ms in sorted(self._ms.keys()): +193 retention_time_list.append(self._ms.get(key_ms).retention_time) +194 +195 self.retention_time = retention_time_list +196 +197 # self.set_retention_time_list(sorted(self._ms.keys())) +198 +199 def set_scans_number_from_data(self): +200 """Set the scan number list from the mass spectra data within the GC-MS data object.""" +201 +202 self.scans_number = sorted(self._ms.keys()) +203 +204 @property +205 def parameter(self): +206 """GCMS Parameters""" +207 return self._parameters +208 +209 @parameter.setter +210 def parameter(self, gcms_parameters_instance): +211 self._parameters = gcms_parameters_instance +212 +213 @property +214 def molecular_search_settings(self): +215 """Molecular Search Settings""" +216 return self.parameter.molecular_search +217 +218 @molecular_search_settings.setter +219 def molecular_search_settings(self, settings_class_instance): 220 self.parameter.molecular_search = settings_class_instance 221 222 @property @@ -476,431 +476,492 @@

    291 for gc_peak in self: 292 if gc_peak: 293 for compound_obj in gc_peak: -294 -295 if compound_obj.name in metabolites.keys(): -296 current_score = metabolites[compound_obj.name]["highest_similarity_score"] -297 compound_score = compound_obj.spectral_similarity_score -298 metabolites[compound_obj.name]["highest_similarity_score"] = compound_score if compound_score > current_score else current_score -299 -300 else: -301 if compound_obj.metadata: -302 metabolites[compound_obj.name] = { -303 "name": compound_obj.name, -304 "highest_similarity_score": compound_obj.spectral_similarity_score, -305 "casno": compound_obj.metadata.cas, -306 "kegg": compound_obj.metadata.kegg, -307 "inchi": compound_obj.metadata.inchi, -308 "inchi_key": compound_obj.metadata.inchikey, -309 "chebi": compound_obj.metadata.chebi, -310 "smiles": compound_obj.metadata.smiles -311 } -312 else: -313 metabolites[compound_obj.name] = { "name": compound_obj.name, -314 "highest_similarity_score": compound_obj.spectral_similarity_score, -315 "casno": "", -316 "kegg": "", -317 "inchi": "", -318 "inchikey": "", -319 "chebi": "", -320 "smiles": "" -321 } -322 -323 return list(metabolites.values()) -324 -325 @property -326 def no_matched_peaks(self): -327 """Peaks with no Matched Metabolites""" -328 return [peak for peak in self if not peak] -329 -330 @retention_time.setter -331 def retention_time(self, alist): -332 # self._retention_time_list = linspace(0, 80, num=len(self._scans_number_list)) -333 self._retention_time_list = alist -334 -335 @scans_number.setter -336 def scans_number(self, alist): -337 -338 self._scans_number_list = alist -339 -340 @tic.setter -341 def tic(self, alist): -342 -343 self._tic_list = array(alist) +294 if compound_obj.name in metabolites.keys(): +295 current_score = metabolites[compound_obj.name][ +296 "highest_similarity_score" +297 ] +298 compound_score = compound_obj.spectral_similarity_score +299 metabolites[compound_obj.name]["highest_similarity_score"] = ( +300 compound_score +301 if compound_score > current_score +302 else current_score +303 ) +304 +305 else: +306 if compound_obj.metadata: +307 metabolites[compound_obj.name] = { +308 "name": compound_obj.name, +309 "highest_similarity_score": compound_obj.spectral_similarity_score, +310 "casno": compound_obj.metadata.cas, +311 "kegg": compound_obj.metadata.kegg, +312 "inchi": compound_obj.metadata.inchi, +313 "inchi_key": compound_obj.metadata.inchikey, +314 "chebi": compound_obj.metadata.chebi, +315 "smiles": compound_obj.metadata.smiles, +316 } +317 else: +318 metabolites[compound_obj.name] = { +319 "name": compound_obj.name, +320 "highest_similarity_score": compound_obj.spectral_similarity_score, +321 "casno": "", +322 "kegg": "", +323 "inchi": "", +324 "inchikey": "", +325 "chebi": "", +326 "smiles": "", +327 } +328 +329 return list(metabolites.values()) +330 +331 @property +332 def no_matched_peaks(self): +333 """Peaks with no Matched Metabolites""" +334 return [peak for peak in self if not peak] +335 +336 @retention_time.setter +337 def retention_time(self, alist): +338 # self._retention_time_list = linspace(0, 80, num=len(self._scans_number_list)) +339 self._retention_time_list = alist +340 +341 @scans_number.setter +342 def scans_number(self, alist): +343 self._scans_number_list = alist 344 -345 def plot_gc_peaks(self, ax=None, color="red"): # pragma: no cover -346 """Plot the GC peaks. -347 -348 This method plots the GC peaks. -349 -350 Parameters -351 ---------- -352 ax : matplotlib.axes.Axes, optional -353 Axes object to plot the GC peaks. Defaults to None. -354 color : str, optional -355 Color of the GC peaks. Defaults to 'red'. -356 """ -357 -358 import matplotlib.pyplot as plt -359 fig = plt.gcf() -360 if ax is None: -361 ax = plt.gca() -362 -363 max_rts = [gc_peak.mass_spectrum.retention_time for gc_peak in self] -364 max_tics = [gc_peak.mass_spectrum.tic for gc_peak in self] -365 -366 # min_rts = [self._ms[gc_peak.start_index].retention_time for gc_peak in self] + [self._ms[gc_peak.final_index].retention_time for gc_peak in self] -367 # min_tics = [self._ms[gc_peak.start_index].tic for gc_peak in self] + [self._ms[gc_peak.final_index].tic for gc_peak in self] -368 # sc = ax.scatter(min_rts, min_tics, color='yellow', linewidth=0, marker='v') -369 -370 sc = ax.scatter(max_rts, max_tics, color=color, marker='v') -371 -372 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') -373 -374 annot = ax.annotate("", xy=(0, 0), xytext=(20, 20), textcoords="offset points", -375 bbox=dict(boxstyle="round", fc="w"), -376 arrowprops=dict(arrowstyle="->")) -377 annot.set_visible(False) -378 annot.get_bbox_patch().set_facecolor(('lightblue')) -379 annot.get_bbox_patch().set_alpha(0.8) -380 -381 def update_annot(ind): -382 -383 pos = sc.get_offsets()[ind["ind"][0]] -384 annot.xy = pos -385 -386 text = "RT: {}\nRT Ref: {}\nRI: {}\nRI Ref: {}\nSimilarity Score: {}\nName: {}".format(" ".join([str(round(self[n].retention_time, 2)) for n in ind["ind"]]), -387 " ".join([str(round(self[n].highest_score_compound.retention_time, 2) if self[n].highest_score_compound else None) for n in ind["ind"]]), -388 " ".join([str(round(self[n].ri, 2) if self[n].ri else None) for n in ind["ind"]]), -389 " ".join([str(round(self[n].highest_score_compound.ri, 2) if self[n].highest_score_compound else None) for n in ind["ind"]]), -390 " ".join([str(round(self[n].highest_score_compound.similarity_score, 4) if self[n].highest_score_compound else None) for n in ind["ind"]]), -391 " ".join([str(self[n].highest_score_compound.name if self[n].highest_score_compound else None) for n in ind["ind"]]) -392 ) -393 annot.set_text(text) +345 @tic.setter +346 def tic(self, alist): +347 self._tic_list = array(alist) +348 +349 def plot_gc_peaks(self, ax=None, color="red"): # pragma: no cover +350 """Plot the GC peaks. +351 +352 This method plots the GC peaks. +353 +354 Parameters +355 ---------- +356 ax : matplotlib.axes.Axes, optional +357 Axes object to plot the GC peaks. Defaults to None. +358 color : str, optional +359 Color of the GC peaks. Defaults to 'red'. +360 """ +361 +362 import matplotlib.pyplot as plt +363 +364 fig = plt.gcf() +365 if ax is None: +366 ax = plt.gca() +367 +368 max_rts = [gc_peak.mass_spectrum.retention_time for gc_peak in self] +369 max_tics = [gc_peak.mass_spectrum.tic for gc_peak in self] +370 +371 # min_rts = [self._ms[gc_peak.start_index].retention_time for gc_peak in self] + [self._ms[gc_peak.final_index].retention_time for gc_peak in self] +372 # min_tics = [self._ms[gc_peak.start_index].tic for gc_peak in self] + [self._ms[gc_peak.final_index].tic for gc_peak in self] +373 # sc = ax.scatter(min_rts, min_tics, color='yellow', linewidth=0, marker='v') +374 +375 sc = ax.scatter(max_rts, max_tics, color=color, marker="v") +376 +377 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +378 +379 annot = ax.annotate( +380 "", +381 xy=(0, 0), +382 xytext=(20, 20), +383 textcoords="offset points", +384 bbox=dict(boxstyle="round", fc="w"), +385 arrowprops=dict(arrowstyle="->"), +386 ) +387 annot.set_visible(False) +388 annot.get_bbox_patch().set_facecolor(("lightblue")) +389 annot.get_bbox_patch().set_alpha(0.8) +390 +391 def update_annot(ind): +392 pos = sc.get_offsets()[ind["ind"][0]] +393 annot.xy = pos 394 -395 def hover(event): -396 vis = annot.get_visible() -397 if event.inaxes == ax: -398 cont, ind = sc.contains(event) -399 if cont: -400 update_annot(ind) -401 annot.set_visible(True) -402 fig.canvas.draw_idle() -403 else: -404 if vis: -405 annot.set_visible(False) -406 fig.canvas.draw_idle() -407 -408 fig.canvas.mpl_connect("motion_notify_event", hover) -409 -410 return ax -411 -412 def to_excel(self, out_file_path, write_mode='ab', write_metadata=True, id_label="corems:"): -413 """Export the GC-MS data to an Excel file. -414 -415 This method exports the GC-MS data to an Excel file. -416 -417 Parameters -418 ---------- -419 out_file_path : str, pathlib.Path, or s3path.S3Path -420 Path object containing the file location. -421 write_mode : str, optional -422 Write mode. Defaults to 'ab'. -423 write_metadata : bool, optional -424 If True, write the metadata. Defaults to True. -425 id_label : str, optional -426 Label of the ID. Defaults to 'corems:'. -427 -428 """ -429 -430 if isinstance(out_file_path, str): -431 out_file_path = Path(out_file_path) -432 -433 exportMS = LowResGCMSExport(out_file_path, self) -434 exportMS.to_excel(id_label=id_label, write_mode=write_mode, write_metadata=write_metadata) -435 -436 return out_file_path.with_suffix('.xlsx') -437 -438 def to_csv(self, out_file_path, separate_output=False, write_metadata=True, id_label="corems:"): -439 """Export the GC-MS data to a CSV file. -440 -441 Parameters -442 ---------- -443 out_file_path : str, pathlib.Path, or s3path.S3Path -444 Path object containing the file location. -445 separate_output : bool, optional -446 If True, separate the output. Defaults to False. -447 write_metadata : bool, optional -448 If True, write the metadata. Defaults to True. -449 -450 """ -451 -452 if isinstance(out_file_path, str): -453 out_file_path = Path(out_file_path) -454 -455 exportMS = LowResGCMSExport(out_file_path, self) -456 exportMS.to_csv(id_label=id_label, separate_output=separate_output, write_metadata=write_metadata) -457 -458 return out_file_path.with_suffix('.csv') -459 -460 def to_pandas(self, out_file_path, write_metadata=True, id_label="corems:"): -461 """Export the GC-MS data to a Pandas dataframe. +395 text = "RT: {}\nRT Ref: {}\nRI: {}\nRI Ref: {}\nSimilarity Score: {}\nName: {}".format( +396 " ".join([str(round(self[n].retention_time, 2)) for n in ind["ind"]]), +397 " ".join( +398 [ +399 str( +400 round(self[n].highest_score_compound.retention_time, 2) +401 if self[n].highest_score_compound +402 else None +403 ) +404 for n in ind["ind"] +405 ] +406 ), +407 " ".join( +408 [ +409 str(round(self[n].ri, 2) if self[n].ri else None) +410 for n in ind["ind"] +411 ] +412 ), +413 " ".join( +414 [ +415 str( +416 round(self[n].highest_score_compound.ri, 2) +417 if self[n].highest_score_compound +418 else None +419 ) +420 for n in ind["ind"] +421 ] +422 ), +423 " ".join( +424 [ +425 str( +426 round(self[n].highest_score_compound.similarity_score, 4) +427 if self[n].highest_score_compound +428 else None +429 ) +430 for n in ind["ind"] +431 ] +432 ), +433 " ".join( +434 [ +435 str( +436 self[n].highest_score_compound.name +437 if self[n].highest_score_compound +438 else None +439 ) +440 for n in ind["ind"] +441 ] +442 ), +443 ) +444 annot.set_text(text) +445 +446 def hover(event): +447 vis = annot.get_visible() +448 if event.inaxes == ax: +449 cont, ind = sc.contains(event) +450 if cont: +451 update_annot(ind) +452 annot.set_visible(True) +453 fig.canvas.draw_idle() +454 else: +455 if vis: +456 annot.set_visible(False) +457 fig.canvas.draw_idle() +458 +459 fig.canvas.mpl_connect("motion_notify_event", hover) +460 +461 return ax 462 -463 Parameters -464 ---------- -465 out_file_path : str, pathlib.Path, or s3path.S3Path -466 Path object containing the file location. -467 write_metadata : bool, optional -468 If True, write the metadata. Defaults to True. -469 id_label : str, optional -470 Label of the ID. Defaults to 'corems:'. -471 -472 """ -473 -474 if isinstance(out_file_path, str): -475 out_file_path = Path(out_file_path) -476 # pickle dataframe (pkl extension) -477 exportMS = LowResGCMSExport(out_file_path, self) -478 exportMS.to_pandas(id_label=id_label, write_metadata=write_metadata) -479 -480 return out_file_path.with_suffix('.pkl') -481 -482 def to_dataframe(self, id_label="corems:"): -483 """Export the GC-MS data to a Pandas dataframe. -484 -485 Parameters -486 ---------- -487 id_label : str, optional -488 Label of the ID. Defaults to 'corems:'. -489 -490 """ -491 -492 # returns pandas dataframe -493 exportMS = LowResGCMSExport(self.sample_name, self) -494 return exportMS.get_pandas_df(id_label=id_label) -495 -496 def processing_stats(self): -497 """Return the processing statistics. -498 -499 """ -500 -501 # returns json string -502 exportMS = LowResGCMSExport(self.sample_name, self) -503 return exportMS.get_data_stats(self) -504 -505 def parameters_json(self, id_label="corems:", output_path=" "): -506 """Return the parameters in JSON format. -507 -508 Parameters -509 ---------- -510 id_label : str, optional -511 Label of the ID. Defaults to 'corems:'. -512 output_path : str, optional -513 Path object containing the file location. Defaults to " ". -514 """ +463 def to_excel( +464 self, out_file_path, write_mode="ab", write_metadata=True, id_label="corems:" +465 ): +466 """Export the GC-MS data to an Excel file. +467 +468 This method exports the GC-MS data to an Excel file. +469 +470 Parameters +471 ---------- +472 out_file_path : str, pathlib.Path, or s3path.S3Path +473 Path object containing the file location. +474 write_mode : str, optional +475 Write mode. Defaults to 'ab'. +476 write_metadata : bool, optional +477 If True, write the metadata. Defaults to True. +478 id_label : str, optional +479 Label of the ID. Defaults to 'corems:'. +480 +481 """ +482 +483 if isinstance(out_file_path, str): +484 out_file_path = Path(out_file_path) +485 +486 exportMS = LowResGCMSExport(out_file_path, self) +487 exportMS.to_excel( +488 id_label=id_label, write_mode=write_mode, write_metadata=write_metadata +489 ) +490 +491 return out_file_path.with_suffix(".xlsx") +492 +493 def to_csv( +494 self, +495 out_file_path, +496 separate_output=False, +497 write_metadata=True, +498 id_label="corems:", +499 ): +500 """Export the GC-MS data to a CSV file. +501 +502 Parameters +503 ---------- +504 out_file_path : str, pathlib.Path, or s3path.S3Path +505 Path object containing the file location. +506 separate_output : bool, optional +507 If True, separate the output. Defaults to False. +508 write_metadata : bool, optional +509 If True, write the metadata. Defaults to True. +510 +511 """ +512 +513 if isinstance(out_file_path, str): +514 out_file_path = Path(out_file_path) 515 -516 # returns json string -517 exportMS = LowResGCMSExport(self.sample_name, self) -518 return exportMS.get_parameters_json(self, id_label, output_path) -519 -520 def to_json(self, id_label="corems:"): -521 """Export the GC-MS data to a JSON file. +516 exportMS = LowResGCMSExport(out_file_path, self) +517 exportMS.to_csv( +518 id_label=id_label, +519 separate_output=separate_output, +520 write_metadata=write_metadata, +521 ) 522 -523 Parameters -524 ---------- -525 id_label : str, optional -526 Label of the ID. Defaults to 'corems:'. -527 -528 """ -529 -530 # returns pandas dataframe -531 exportMS = LowResGCMSExport(self.sample_name, self) -532 return exportMS.get_json(id_label=id_label) -533 -534 def to_hdf(self, id_label="corems:"): -535 """Export the GC-MS data to a HDF file. +523 return out_file_path.with_suffix(".csv") +524 +525 def to_pandas(self, out_file_path, write_metadata=True, id_label="corems:"): +526 """Export the GC-MS data to a Pandas dataframe. +527 +528 Parameters +529 ---------- +530 out_file_path : str, pathlib.Path, or s3path.S3Path +531 Path object containing the file location. +532 write_metadata : bool, optional +533 If True, write the metadata. Defaults to True. +534 id_label : str, optional +535 Label of the ID. Defaults to 'corems:'. 536 -537 Parameters -538 ---------- -539 id_label : str, optional -540 Label of the ID. Defaults to 'corems:'. -541 -542 """ -543 -544 # returns pandas dataframe -545 exportMS = LowResGCMSExport(self.sample_name, self) -546 return exportMS.to_hdf(id_label=id_label) -547 -548 def plot_chromatogram(self, ax=None, color="blue"): #pragma: no cover -549 """Plot the chromatogram. -550 -551 Parameters -552 ---------- -553 ax : matplotlib.axes.Axes, optional -554 Axes object to plot the chromatogram. Defaults to None. -555 color : str, optional -556 Color of the chromatogram. Defaults to 'blue'. -557 -558 """ -559 -560 import matplotlib.pyplot as plt -561 -562 if ax is None: -563 ax = plt.gca() -564 -565 ax.plot(self.retention_time, self.tic, color=color) -566 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') +537 """ +538 +539 if isinstance(out_file_path, str): +540 out_file_path = Path(out_file_path) +541 # pickle dataframe (pkl extension) +542 exportMS = LowResGCMSExport(out_file_path, self) +543 exportMS.to_pandas(id_label=id_label, write_metadata=write_metadata) +544 +545 return out_file_path.with_suffix(".pkl") +546 +547 def to_dataframe(self, id_label="corems:"): +548 """Export the GC-MS data to a Pandas dataframe. +549 +550 Parameters +551 ---------- +552 id_label : str, optional +553 Label of the ID. Defaults to 'corems:'. +554 +555 """ +556 +557 # returns pandas dataframe +558 exportMS = LowResGCMSExport(self.sample_name, self) +559 return exportMS.get_pandas_df(id_label=id_label) +560 +561 def processing_stats(self): +562 """Return the processing statistics.""" +563 +564 # returns json string +565 exportMS = LowResGCMSExport(self.sample_name, self) +566 return exportMS.get_data_stats(self) 567 -568 return ax -569 -570 def plot_smoothed_chromatogram(self, ax=None, color="green"): #pragma: no cover -571 """Plot the smoothed chromatogram. -572 -573 Parameters -574 ---------- -575 ax : matplotlib.axes.Axes, optional -576 Axes object to plot the smoothed chromatogram. Defaults to None. -577 color : str, optional -578 Color of the smoothed chromatogram. Defaults to 'green'. -579 -580 """ -581 -582 import matplotlib.pyplot as plt -583 -584 if ax is None: +568 def parameters_json(self, id_label="corems:", output_path=" "): +569 """Return the parameters in JSON format. +570 +571 Parameters +572 ---------- +573 id_label : str, optional +574 Label of the ID. Defaults to 'corems:'. +575 output_path : str, optional +576 Path object containing the file location. Defaults to " ". +577 """ +578 +579 # returns json string +580 exportMS = LowResGCMSExport(self.sample_name, self) +581 return exportMS.get_parameters_json(self, id_label, output_path) +582 +583 def to_json(self, id_label="corems:"): +584 """Export the GC-MS data to a JSON file. 585 -586 ax = plt.gca() -587 -588 ax.plot(self.retention_time, self.smooth_tic(self.tic), color=color) -589 -590 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') -591 -592 return ax -593 -594 def plot_detected_baseline(self, ax=None, color="blue"): # pragma: no cover -595 """Plot the detected baseline. +586 Parameters +587 ---------- +588 id_label : str, optional +589 Label of the ID. Defaults to 'corems:'. +590 +591 """ +592 +593 # returns pandas dataframe +594 exportMS = LowResGCMSExport(self.sample_name, self) +595 return exportMS.get_json(id_label=id_label) 596 -597 Parameters -598 ---------- -599 ax : matplotlib.axes.Axes, optional -600 Axes object to plot the detected baseline. Defaults to None. -601 color : str, optional -602 Color of the detected baseline. Defaults to 'blue'. -603 -604 """ -605 -606 import matplotlib.pyplot as plt -607 -608 if ax is None: -609 -610 ax = plt.gca() -611 -612 max_height = self.chromatogram_settings.peak_height_max_percent -613 max_prominence = self.chromatogram_settings.peak_max_prominence_percent -614 -615 baseline = sp.baseline_detector(self.tic, self.retention_time, max_height, max_prominence) -616 ax.plot(self.retention_time, color=color) -617 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') -618 -619 return ax +597 def to_hdf(self, id_label="corems:"): +598 """Export the GC-MS data to a HDF file. +599 +600 Parameters +601 ---------- +602 id_label : str, optional +603 Label of the ID. Defaults to 'corems:'. +604 +605 """ +606 +607 # returns pandas dataframe +608 exportMS = LowResGCMSExport(self.sample_name, self) +609 return exportMS.to_hdf(id_label=id_label) +610 +611 def plot_chromatogram(self, ax=None, color="blue"): # pragma: no cover +612 """Plot the chromatogram. +613 +614 Parameters +615 ---------- +616 ax : matplotlib.axes.Axes, optional +617 Axes object to plot the chromatogram. Defaults to None. +618 color : str, optional +619 Color of the chromatogram. Defaults to 'blue'. 620 -621 def plot_baseline_subtraction(self, ax=None, color="black"): # pragma: no cover -622 """Plot the baseline subtraction. -623 -624 Parameters -625 ---------- -626 ax : matplotlib.axes.Axes, optional -627 Axes object to plot the baseline subtraction. Defaults to None. -628 color : str, optional -629 Color of the baseline subtraction. Defaults to 'black'. +621 """ +622 +623 import matplotlib.pyplot as plt +624 +625 if ax is None: +626 ax = plt.gca() +627 +628 ax.plot(self.retention_time, self.tic, color=color) +629 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 630 -631 """ +631 return ax 632 -633 import matplotlib.pyplot as plt -634 -635 if ax is None: -636 -637 ax = plt.gca() -638 -639 max_height = self.chromatogram_settings.peak_height_max_percent -640 -641 max_prominence = self.chromatogram_settings.peak_max_prominence_percent +633 def plot_smoothed_chromatogram(self, ax=None, color="green"): # pragma: no cover +634 """Plot the smoothed chromatogram. +635 +636 Parameters +637 ---------- +638 ax : matplotlib.axes.Axes, optional +639 Axes object to plot the smoothed chromatogram. Defaults to None. +640 color : str, optional +641 Color of the smoothed chromatogram. Defaults to 'green'. 642 -643 x = self.tic + sp.baseline_detector(self.tic, self.retention_time, max_height, max_prominence) +643 """ 644 -645 ax.plot(self.retention_time, x, color=color) +645 import matplotlib.pyplot as plt 646 -647 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') -648 -649 return ax -650 -651 def peaks_rt_tic(self, json_string=False): -652 """Return the peaks, retention time, and total ion chromatogram. -653 -654 Parameters -655 ---------- -656 json_string : bool, optional -657 If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False. -658 -659 """ -660 -661 peaks_list = dict() -662 -663 all_candidates_data = {} -664 -665 all_peaks_data = {} -666 -667 for gcms_peak in self.sorted_gcpeaks: -668 -669 dict_data = {'rt': gcms_peak.rt_list, -670 'tic': gcms_peak.tic_list, -671 'mz': gcms_peak.mass_spectrum.mz_exp.tolist(), -672 'abundance': gcms_peak.mass_spectrum.abundance.tolist(), -673 'candidate_names': gcms_peak.compound_names, -674 } +647 if ax is None: +648 ax = plt.gca() +649 +650 ax.plot(self.retention_time, self.smooth_tic(self.tic), color=color) +651 +652 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +653 +654 return ax +655 +656 def plot_detected_baseline(self, ax=None, color="blue"): # pragma: no cover +657 """Plot the detected baseline. +658 +659 Parameters +660 ---------- +661 ax : matplotlib.axes.Axes, optional +662 Axes object to plot the detected baseline. Defaults to None. +663 color : str, optional +664 Color of the detected baseline. Defaults to 'blue'. +665 +666 """ +667 +668 import matplotlib.pyplot as plt +669 +670 if ax is None: +671 ax = plt.gca() +672 +673 max_height = self.chromatogram_settings.peak_height_max_percent +674 max_prominence = self.chromatogram_settings.peak_max_prominence_percent 675 -676 peaks_list[gcms_peak.retention_time] = dict_data -677 -678 for compound in gcms_peak: -679 -680 if compound.name not in all_candidates_data.keys(): -681 mz = array(compound.mz).tolist() -682 abundance = array(compound.abundance).tolist() -683 data = {'mz': mz, "abundance": abundance} -684 all_candidates_data[compound.name] = data -685 -686 all_peaks_data["peak_data"] = peaks_list -687 all_peaks_data["ref_data"] = all_candidates_data -688 -689 if json_string: -690 -691 return json.dumps(all_peaks_data) -692 -693 else: -694 return all_peaks_data +676 baseline = sp.baseline_detector( +677 self.tic, self.retention_time, max_height, max_prominence +678 ) +679 ax.plot(self.retention_time, color=color) +680 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +681 +682 return ax +683 +684 def plot_baseline_subtraction(self, ax=None, color="black"): # pragma: no cover +685 """Plot the baseline subtraction. +686 +687 Parameters +688 ---------- +689 ax : matplotlib.axes.Axes, optional +690 Axes object to plot the baseline subtraction. Defaults to None. +691 color : str, optional +692 Color of the baseline subtraction. Defaults to 'black'. +693 +694 """ 695 -696 def plot_processed_chromatogram(self, ax=None, color="black"): -697 """Plot the processed chromatogram. -698 -699 Parameters -700 ---------- -701 ax : matplotlib.axes.Axes, optional -702 Axes object to plot the processed chromatogram. Defaults to None. -703 color : str, optional -704 Color of the processed chromatogram. Defaults to 'black'. -705 -706 """ -707 -708 import matplotlib.pyplot as plt -709 -710 if ax is None: -711 -712 ax = plt.gca() -713 -714 ax.plot(self.retention_time, self.processed_tic, color=color) -715 -716 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') +696 import matplotlib.pyplot as plt +697 +698 if ax is None: +699 ax = plt.gca() +700 +701 max_height = self.chromatogram_settings.peak_height_max_percent +702 +703 max_prominence = self.chromatogram_settings.peak_max_prominence_percent +704 +705 x = self.tic + sp.baseline_detector( +706 self.tic, self.retention_time, max_height, max_prominence +707 ) +708 +709 ax.plot(self.retention_time, x, color=color) +710 +711 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +712 +713 return ax +714 +715 def peaks_rt_tic(self, json_string=False): +716 """Return the peaks, retention time, and total ion chromatogram. 717 -718 return ax +718 Parameters +719 ---------- +720 json_string : bool, optional +721 If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False. +722 +723 """ +724 +725 peaks_list = dict() +726 +727 all_candidates_data = {} +728 +729 all_peaks_data = {} +730 +731 for gcms_peak in self.sorted_gcpeaks: +732 dict_data = { +733 "rt": gcms_peak.rt_list, +734 "tic": gcms_peak.tic_list, +735 "mz": gcms_peak.mass_spectrum.mz_exp.tolist(), +736 "abundance": gcms_peak.mass_spectrum.abundance.tolist(), +737 "candidate_names": gcms_peak.compound_names, +738 } +739 +740 peaks_list[gcms_peak.retention_time] = dict_data +741 +742 for compound in gcms_peak: +743 if compound.name not in all_candidates_data.keys(): +744 mz = array(compound.mz).tolist() +745 abundance = array(compound.abundance).tolist() +746 data = {"mz": mz, "abundance": abundance} +747 all_candidates_data[compound.name] = data +748 +749 all_peaks_data["peak_data"] = peaks_list +750 all_peaks_data["ref_data"] = all_candidates_data +751 +752 if json_string: +753 return json.dumps(all_peaks_data) +754 +755 else: +756 return all_peaks_data +757 +758 def plot_processed_chromatogram(self, ax=None, color="black"): +759 """Plot the processed chromatogram. +760 +761 Parameters +762 ---------- +763 ax : matplotlib.axes.Axes, optional +764 Axes object to plot the processed chromatogram. Defaults to None. +765 color : str, optional +766 Color of the processed chromatogram. Defaults to 'black'. +767 +768 """ +769 +770 import matplotlib.pyplot as plt +771 +772 if ax is None: +773 ax = plt.gca() +774 +775 ax.plot(self.retention_time, self.processed_tic, color=color) +776 +777 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +778 +779 return ax

    @@ -965,156 +1026,156 @@

    68 * plot_gc_peaks(ax=None, color='red'). Plot the GC peaks. 69 """ 70 - 71 def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown', sample_name=None): - 72 - 73 if isinstance(file_location, str): - 74 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed - 75 file_location = Path(file_location) - 76 - 77 if not file_location.exists(): - 78 - 79 raise FileExistsError("File does not exist: " + str(file_location)) - 80 - 81 self.file_location = file_location - 82 - 83 if sample_name: - 84 self.sample_name = sample_name - 85 else: - 86 self.sample_name = file_location.stem - 87 - 88 self.analyzer = analyzer - 89 self.instrument_label = instrument_label - 90 self._init_settings() + 71 def __init__( + 72 self, + 73 file_location, + 74 analyzer="Unknown", + 75 instrument_label="Unknown", + 76 sample_name=None, + 77 ): + 78 if isinstance(file_location, str): + 79 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed + 80 file_location = Path(file_location) + 81 + 82 if not file_location.exists(): + 83 raise FileExistsError("File does not exist: " + str(file_location)) + 84 + 85 self.file_location = file_location + 86 + 87 if sample_name: + 88 self.sample_name = sample_name + 89 else: + 90 self.sample_name = file_location.stem 91 - 92 self._retention_time_list = [] - 93 self._scans_number_list = [] - 94 self._tic_list = [] + 92 self.analyzer = analyzer + 93 self.instrument_label = instrument_label + 94 self._init_settings() 95 - 96 # all scans - 97 self._ms = {} - 98 - 99 # after peak detection -100 self._processed_tic = [] -101 self.gcpeaks = [] + 96 self._retention_time_list = [] + 97 self._scans_number_list = [] + 98 self._tic_list = [] + 99 +100 # all scans +101 self._ms = {} 102 -103 self.ri_pairs_ref = None -104 self.cal_file_path = None -105 -106 def _init_settings(self): -107 """Initialize the settings for GC_Class. -108 -109 This method initializes the settings for the GC_Class object using the GCMSParameters class. -110 """ -111 self._parameters = GCMSParameters() +103 # after peak detection +104 self._processed_tic = [] +105 self.gcpeaks = [] +106 +107 self.ri_pairs_ref = None +108 self.cal_file_path = None +109 +110 def _init_settings(self): +111 """Initialize the settings for GC_Class. 112 -113 def __len__(self): -114 """Return the number of GC peaks in the GC_Class object.""" -115 return len(self.gcpeaks) +113 This method initializes the settings for the GC_Class object using the GCMSParameters class. +114 """ +115 self._parameters = GCMSParameters() 116 -117 def __getitem__(self, scan_number) -> GCPeak: -118 """Return the GCPeak with the given scan number.""" -119 return self.gcpeaks[scan_number] +117 def __len__(self): +118 """Return the number of GC peaks in the GC_Class object.""" +119 return len(self.gcpeaks) 120 -121 # def __iter__(self): -122 -123 # return iter(self.gcpeaks.values()) +121 def __getitem__(self, scan_number) -> GCPeak: +122 """Return the GCPeak with the given scan number.""" +123 return self.gcpeaks[scan_number] 124 -125 def process_chromatogram(self, plot_res=False): -126 """Process the chromatogram. -127 -128 This method processes the chromatogram. -129 -130 Parameters -131 ---------- -132 plot_res : bool, optional -133 If True, plot the results. Defaults to False. -134 """ -135 -136 # tic = self.tic - self.baseline_detector(self.tic) -137 -138 self._processed_tic = self.smooth_tic(self.tic) +125 # def __iter__(self): +126 +127 # return iter(self.gcpeaks.values()) +128 +129 def process_chromatogram(self, plot_res=False): +130 """Process the chromatogram. +131 +132 This method processes the chromatogram. +133 +134 Parameters +135 ---------- +136 plot_res : bool, optional +137 If True, plot the results. Defaults to False. +138 """ 139 -140 for index, tic in enumerate(self._processed_tic): +140 # tic = self.tic - self.baseline_detector(self.tic) 141 -142 self._ms[index]._processed_tic = tic +142 self._processed_tic = self.smooth_tic(self.tic) 143 -144 # self.second_derivative_threshold(self._processed_tic) -145 -146 if self.chromatogram_settings.use_deconvolution: -147 -148 self.run_deconvolution(plot_res=False) -149 -150 else: +144 for index, tic in enumerate(self._processed_tic): +145 self._ms[index]._processed_tic = tic +146 +147 # self.second_derivative_threshold(self._processed_tic) +148 +149 if self.chromatogram_settings.use_deconvolution: +150 self.run_deconvolution(plot_res=False) 151 -152 peaks_index = self.centroid_detector(self._processed_tic, self.retention_time) -153 -154 for i in peaks_index: -155 -156 apex_index = i[1] -157 -158 gc_peak = GCPeak(self, self._ms[apex_index], i ) +152 else: +153 peaks_index = self.centroid_detector( +154 self._processed_tic, self.retention_time +155 ) +156 +157 for i in peaks_index: +158 apex_index = i[1] 159 -160 gc_peak.calc_area(self._processed_tic, 1) +160 gc_peak = GCPeak(self, self._ms[apex_index], i) 161 -162 self.gcpeaks.append(gc_peak) +162 gc_peak.calc_area(self._processed_tic, 1) 163 -164 # self.gcpeaks[self.scans_number[apex_index]] = gc_peak +164 self.gcpeaks.append(gc_peak) 165 -166 def add_mass_spectrum(self, mass_spec): -167 """Add a mass spectrum to the GC-MS object. -168 -169 This method adds a mass spectrum to the GC-MS object. +166 # self.gcpeaks[self.scans_number[apex_index]] = gc_peak +167 +168 def add_mass_spectrum(self, mass_spec): +169 """Add a mass spectrum to the GC-MS object. 170 -171 Parameters -172 ---------- -173 mass_spec : MassSpectrum -174 Mass spectrum to be added. -175 """ -176 -177 self._ms[mass_spec.scan_number] = mass_spec +171 This method adds a mass spectrum to the GC-MS object. +172 +173 Parameters +174 ---------- +175 mass_spec : MassSpectrum +176 Mass spectrum to be added. +177 """ 178 -179 def set_tic_list_from_data(self): -180 """Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.""" -181 -182 self.tic = [self._ms.get(i).tic for i in self.scans_number] +179 self._ms[mass_spec.scan_number] = mass_spec +180 +181 def set_tic_list_from_data(self): +182 """Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.""" 183 -184 # self.set_tic_list([self._ms.get(i).get_sumed_signal_to_noise() for i in self.get_scans_number()]) +184 self.tic = [self._ms.get(i).tic for i in self.scans_number] 185 -186 def set_retention_time_from_data(self): -187 """Set the retention time list from the mass spectra data within the GC-MS data object.""" -188 -189 retention_time_list = [] +186 # self.set_tic_list([self._ms.get(i).get_sumed_signal_to_noise() for i in self.get_scans_number()]) +187 +188 def set_retention_time_from_data(self): +189 """Set the retention time list from the mass spectra data within the GC-MS data object.""" 190 -191 for key_ms in sorted(self._ms.keys()): +191 retention_time_list = [] 192 -193 retention_time_list.append(self._ms.get(key_ms).retention_time) -194 -195 self.retention_time = retention_time_list -196 -197 # self.set_retention_time_list(sorted(self._ms.keys())) -198 -199 def set_scans_number_from_data(self): -200 """Set the scan number list from the mass spectra data within the GC-MS data object.""" -201 -202 self.scans_number = sorted(self._ms.keys()) -203 -204 @property -205 def parameter(self): -206 """GCMS Parameters""" -207 return self._parameters -208 -209 @parameter.setter -210 def parameter(self, gcms_parameters_instance): -211 self._parameters = gcms_parameters_instance -212 -213 @property -214 def molecular_search_settings(self): -215 """Molecular Search Settings""" -216 return self.parameter.molecular_search -217 -218 @molecular_search_settings.setter -219 def molecular_search_settings(self, settings_class_instance): -220 +193 for key_ms in sorted(self._ms.keys()): +194 retention_time_list.append(self._ms.get(key_ms).retention_time) +195 +196 self.retention_time = retention_time_list +197 +198 # self.set_retention_time_list(sorted(self._ms.keys())) +199 +200 def set_scans_number_from_data(self): +201 """Set the scan number list from the mass spectra data within the GC-MS data object.""" +202 +203 self.scans_number = sorted(self._ms.keys()) +204 +205 @property +206 def parameter(self): +207 """GCMS Parameters""" +208 return self._parameters +209 +210 @parameter.setter +211 def parameter(self, gcms_parameters_instance): +212 self._parameters = gcms_parameters_instance +213 +214 @property +215 def molecular_search_settings(self): +216 """Molecular Search Settings""" +217 return self.parameter.molecular_search +218 +219 @molecular_search_settings.setter +220 def molecular_search_settings(self, settings_class_instance): 221 self.parameter.molecular_search = settings_class_instance 222 223 @property @@ -1189,431 +1250,492 @@

    292 for gc_peak in self: 293 if gc_peak: 294 for compound_obj in gc_peak: -295 -296 if compound_obj.name in metabolites.keys(): -297 current_score = metabolites[compound_obj.name]["highest_similarity_score"] -298 compound_score = compound_obj.spectral_similarity_score -299 metabolites[compound_obj.name]["highest_similarity_score"] = compound_score if compound_score > current_score else current_score -300 -301 else: -302 if compound_obj.metadata: -303 metabolites[compound_obj.name] = { -304 "name": compound_obj.name, -305 "highest_similarity_score": compound_obj.spectral_similarity_score, -306 "casno": compound_obj.metadata.cas, -307 "kegg": compound_obj.metadata.kegg, -308 "inchi": compound_obj.metadata.inchi, -309 "inchi_key": compound_obj.metadata.inchikey, -310 "chebi": compound_obj.metadata.chebi, -311 "smiles": compound_obj.metadata.smiles -312 } -313 else: -314 metabolites[compound_obj.name] = { "name": compound_obj.name, -315 "highest_similarity_score": compound_obj.spectral_similarity_score, -316 "casno": "", -317 "kegg": "", -318 "inchi": "", -319 "inchikey": "", -320 "chebi": "", -321 "smiles": "" -322 } -323 -324 return list(metabolites.values()) -325 -326 @property -327 def no_matched_peaks(self): -328 """Peaks with no Matched Metabolites""" -329 return [peak for peak in self if not peak] -330 -331 @retention_time.setter -332 def retention_time(self, alist): -333 # self._retention_time_list = linspace(0, 80, num=len(self._scans_number_list)) -334 self._retention_time_list = alist -335 -336 @scans_number.setter -337 def scans_number(self, alist): -338 -339 self._scans_number_list = alist -340 -341 @tic.setter -342 def tic(self, alist): -343 -344 self._tic_list = array(alist) +295 if compound_obj.name in metabolites.keys(): +296 current_score = metabolites[compound_obj.name][ +297 "highest_similarity_score" +298 ] +299 compound_score = compound_obj.spectral_similarity_score +300 metabolites[compound_obj.name]["highest_similarity_score"] = ( +301 compound_score +302 if compound_score > current_score +303 else current_score +304 ) +305 +306 else: +307 if compound_obj.metadata: +308 metabolites[compound_obj.name] = { +309 "name": compound_obj.name, +310 "highest_similarity_score": compound_obj.spectral_similarity_score, +311 "casno": compound_obj.metadata.cas, +312 "kegg": compound_obj.metadata.kegg, +313 "inchi": compound_obj.metadata.inchi, +314 "inchi_key": compound_obj.metadata.inchikey, +315 "chebi": compound_obj.metadata.chebi, +316 "smiles": compound_obj.metadata.smiles, +317 } +318 else: +319 metabolites[compound_obj.name] = { +320 "name": compound_obj.name, +321 "highest_similarity_score": compound_obj.spectral_similarity_score, +322 "casno": "", +323 "kegg": "", +324 "inchi": "", +325 "inchikey": "", +326 "chebi": "", +327 "smiles": "", +328 } +329 +330 return list(metabolites.values()) +331 +332 @property +333 def no_matched_peaks(self): +334 """Peaks with no Matched Metabolites""" +335 return [peak for peak in self if not peak] +336 +337 @retention_time.setter +338 def retention_time(self, alist): +339 # self._retention_time_list = linspace(0, 80, num=len(self._scans_number_list)) +340 self._retention_time_list = alist +341 +342 @scans_number.setter +343 def scans_number(self, alist): +344 self._scans_number_list = alist 345 -346 def plot_gc_peaks(self, ax=None, color="red"): # pragma: no cover -347 """Plot the GC peaks. -348 -349 This method plots the GC peaks. -350 -351 Parameters -352 ---------- -353 ax : matplotlib.axes.Axes, optional -354 Axes object to plot the GC peaks. Defaults to None. -355 color : str, optional -356 Color of the GC peaks. Defaults to 'red'. -357 """ -358 -359 import matplotlib.pyplot as plt -360 fig = plt.gcf() -361 if ax is None: -362 ax = plt.gca() -363 -364 max_rts = [gc_peak.mass_spectrum.retention_time for gc_peak in self] -365 max_tics = [gc_peak.mass_spectrum.tic for gc_peak in self] -366 -367 # min_rts = [self._ms[gc_peak.start_index].retention_time for gc_peak in self] + [self._ms[gc_peak.final_index].retention_time for gc_peak in self] -368 # min_tics = [self._ms[gc_peak.start_index].tic for gc_peak in self] + [self._ms[gc_peak.final_index].tic for gc_peak in self] -369 # sc = ax.scatter(min_rts, min_tics, color='yellow', linewidth=0, marker='v') -370 -371 sc = ax.scatter(max_rts, max_tics, color=color, marker='v') -372 -373 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') -374 -375 annot = ax.annotate("", xy=(0, 0), xytext=(20, 20), textcoords="offset points", -376 bbox=dict(boxstyle="round", fc="w"), -377 arrowprops=dict(arrowstyle="->")) -378 annot.set_visible(False) -379 annot.get_bbox_patch().set_facecolor(('lightblue')) -380 annot.get_bbox_patch().set_alpha(0.8) -381 -382 def update_annot(ind): -383 -384 pos = sc.get_offsets()[ind["ind"][0]] -385 annot.xy = pos -386 -387 text = "RT: {}\nRT Ref: {}\nRI: {}\nRI Ref: {}\nSimilarity Score: {}\nName: {}".format(" ".join([str(round(self[n].retention_time, 2)) for n in ind["ind"]]), -388 " ".join([str(round(self[n].highest_score_compound.retention_time, 2) if self[n].highest_score_compound else None) for n in ind["ind"]]), -389 " ".join([str(round(self[n].ri, 2) if self[n].ri else None) for n in ind["ind"]]), -390 " ".join([str(round(self[n].highest_score_compound.ri, 2) if self[n].highest_score_compound else None) for n in ind["ind"]]), -391 " ".join([str(round(self[n].highest_score_compound.similarity_score, 4) if self[n].highest_score_compound else None) for n in ind["ind"]]), -392 " ".join([str(self[n].highest_score_compound.name if self[n].highest_score_compound else None) for n in ind["ind"]]) -393 ) -394 annot.set_text(text) +346 @tic.setter +347 def tic(self, alist): +348 self._tic_list = array(alist) +349 +350 def plot_gc_peaks(self, ax=None, color="red"): # pragma: no cover +351 """Plot the GC peaks. +352 +353 This method plots the GC peaks. +354 +355 Parameters +356 ---------- +357 ax : matplotlib.axes.Axes, optional +358 Axes object to plot the GC peaks. Defaults to None. +359 color : str, optional +360 Color of the GC peaks. Defaults to 'red'. +361 """ +362 +363 import matplotlib.pyplot as plt +364 +365 fig = plt.gcf() +366 if ax is None: +367 ax = plt.gca() +368 +369 max_rts = [gc_peak.mass_spectrum.retention_time for gc_peak in self] +370 max_tics = [gc_peak.mass_spectrum.tic for gc_peak in self] +371 +372 # min_rts = [self._ms[gc_peak.start_index].retention_time for gc_peak in self] + [self._ms[gc_peak.final_index].retention_time for gc_peak in self] +373 # min_tics = [self._ms[gc_peak.start_index].tic for gc_peak in self] + [self._ms[gc_peak.final_index].tic for gc_peak in self] +374 # sc = ax.scatter(min_rts, min_tics, color='yellow', linewidth=0, marker='v') +375 +376 sc = ax.scatter(max_rts, max_tics, color=color, marker="v") +377 +378 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +379 +380 annot = ax.annotate( +381 "", +382 xy=(0, 0), +383 xytext=(20, 20), +384 textcoords="offset points", +385 bbox=dict(boxstyle="round", fc="w"), +386 arrowprops=dict(arrowstyle="->"), +387 ) +388 annot.set_visible(False) +389 annot.get_bbox_patch().set_facecolor(("lightblue")) +390 annot.get_bbox_patch().set_alpha(0.8) +391 +392 def update_annot(ind): +393 pos = sc.get_offsets()[ind["ind"][0]] +394 annot.xy = pos 395 -396 def hover(event): -397 vis = annot.get_visible() -398 if event.inaxes == ax: -399 cont, ind = sc.contains(event) -400 if cont: -401 update_annot(ind) -402 annot.set_visible(True) -403 fig.canvas.draw_idle() -404 else: -405 if vis: -406 annot.set_visible(False) -407 fig.canvas.draw_idle() -408 -409 fig.canvas.mpl_connect("motion_notify_event", hover) -410 -411 return ax -412 -413 def to_excel(self, out_file_path, write_mode='ab', write_metadata=True, id_label="corems:"): -414 """Export the GC-MS data to an Excel file. -415 -416 This method exports the GC-MS data to an Excel file. -417 -418 Parameters -419 ---------- -420 out_file_path : str, pathlib.Path, or s3path.S3Path -421 Path object containing the file location. -422 write_mode : str, optional -423 Write mode. Defaults to 'ab'. -424 write_metadata : bool, optional -425 If True, write the metadata. Defaults to True. -426 id_label : str, optional -427 Label of the ID. Defaults to 'corems:'. -428 -429 """ -430 -431 if isinstance(out_file_path, str): -432 out_file_path = Path(out_file_path) -433 -434 exportMS = LowResGCMSExport(out_file_path, self) -435 exportMS.to_excel(id_label=id_label, write_mode=write_mode, write_metadata=write_metadata) -436 -437 return out_file_path.with_suffix('.xlsx') -438 -439 def to_csv(self, out_file_path, separate_output=False, write_metadata=True, id_label="corems:"): -440 """Export the GC-MS data to a CSV file. -441 -442 Parameters -443 ---------- -444 out_file_path : str, pathlib.Path, or s3path.S3Path -445 Path object containing the file location. -446 separate_output : bool, optional -447 If True, separate the output. Defaults to False. -448 write_metadata : bool, optional -449 If True, write the metadata. Defaults to True. -450 -451 """ -452 -453 if isinstance(out_file_path, str): -454 out_file_path = Path(out_file_path) -455 -456 exportMS = LowResGCMSExport(out_file_path, self) -457 exportMS.to_csv(id_label=id_label, separate_output=separate_output, write_metadata=write_metadata) -458 -459 return out_file_path.with_suffix('.csv') -460 -461 def to_pandas(self, out_file_path, write_metadata=True, id_label="corems:"): -462 """Export the GC-MS data to a Pandas dataframe. +396 text = "RT: {}\nRT Ref: {}\nRI: {}\nRI Ref: {}\nSimilarity Score: {}\nName: {}".format( +397 " ".join([str(round(self[n].retention_time, 2)) for n in ind["ind"]]), +398 " ".join( +399 [ +400 str( +401 round(self[n].highest_score_compound.retention_time, 2) +402 if self[n].highest_score_compound +403 else None +404 ) +405 for n in ind["ind"] +406 ] +407 ), +408 " ".join( +409 [ +410 str(round(self[n].ri, 2) if self[n].ri else None) +411 for n in ind["ind"] +412 ] +413 ), +414 " ".join( +415 [ +416 str( +417 round(self[n].highest_score_compound.ri, 2) +418 if self[n].highest_score_compound +419 else None +420 ) +421 for n in ind["ind"] +422 ] +423 ), +424 " ".join( +425 [ +426 str( +427 round(self[n].highest_score_compound.similarity_score, 4) +428 if self[n].highest_score_compound +429 else None +430 ) +431 for n in ind["ind"] +432 ] +433 ), +434 " ".join( +435 [ +436 str( +437 self[n].highest_score_compound.name +438 if self[n].highest_score_compound +439 else None +440 ) +441 for n in ind["ind"] +442 ] +443 ), +444 ) +445 annot.set_text(text) +446 +447 def hover(event): +448 vis = annot.get_visible() +449 if event.inaxes == ax: +450 cont, ind = sc.contains(event) +451 if cont: +452 update_annot(ind) +453 annot.set_visible(True) +454 fig.canvas.draw_idle() +455 else: +456 if vis: +457 annot.set_visible(False) +458 fig.canvas.draw_idle() +459 +460 fig.canvas.mpl_connect("motion_notify_event", hover) +461 +462 return ax 463 -464 Parameters -465 ---------- -466 out_file_path : str, pathlib.Path, or s3path.S3Path -467 Path object containing the file location. -468 write_metadata : bool, optional -469 If True, write the metadata. Defaults to True. -470 id_label : str, optional -471 Label of the ID. Defaults to 'corems:'. -472 -473 """ -474 -475 if isinstance(out_file_path, str): -476 out_file_path = Path(out_file_path) -477 # pickle dataframe (pkl extension) -478 exportMS = LowResGCMSExport(out_file_path, self) -479 exportMS.to_pandas(id_label=id_label, write_metadata=write_metadata) -480 -481 return out_file_path.with_suffix('.pkl') -482 -483 def to_dataframe(self, id_label="corems:"): -484 """Export the GC-MS data to a Pandas dataframe. -485 -486 Parameters -487 ---------- -488 id_label : str, optional -489 Label of the ID. Defaults to 'corems:'. -490 -491 """ -492 -493 # returns pandas dataframe -494 exportMS = LowResGCMSExport(self.sample_name, self) -495 return exportMS.get_pandas_df(id_label=id_label) -496 -497 def processing_stats(self): -498 """Return the processing statistics. -499 -500 """ -501 -502 # returns json string -503 exportMS = LowResGCMSExport(self.sample_name, self) -504 return exportMS.get_data_stats(self) -505 -506 def parameters_json(self, id_label="corems:", output_path=" "): -507 """Return the parameters in JSON format. -508 -509 Parameters -510 ---------- -511 id_label : str, optional -512 Label of the ID. Defaults to 'corems:'. -513 output_path : str, optional -514 Path object containing the file location. Defaults to " ". -515 """ +464 def to_excel( +465 self, out_file_path, write_mode="ab", write_metadata=True, id_label="corems:" +466 ): +467 """Export the GC-MS data to an Excel file. +468 +469 This method exports the GC-MS data to an Excel file. +470 +471 Parameters +472 ---------- +473 out_file_path : str, pathlib.Path, or s3path.S3Path +474 Path object containing the file location. +475 write_mode : str, optional +476 Write mode. Defaults to 'ab'. +477 write_metadata : bool, optional +478 If True, write the metadata. Defaults to True. +479 id_label : str, optional +480 Label of the ID. Defaults to 'corems:'. +481 +482 """ +483 +484 if isinstance(out_file_path, str): +485 out_file_path = Path(out_file_path) +486 +487 exportMS = LowResGCMSExport(out_file_path, self) +488 exportMS.to_excel( +489 id_label=id_label, write_mode=write_mode, write_metadata=write_metadata +490 ) +491 +492 return out_file_path.with_suffix(".xlsx") +493 +494 def to_csv( +495 self, +496 out_file_path, +497 separate_output=False, +498 write_metadata=True, +499 id_label="corems:", +500 ): +501 """Export the GC-MS data to a CSV file. +502 +503 Parameters +504 ---------- +505 out_file_path : str, pathlib.Path, or s3path.S3Path +506 Path object containing the file location. +507 separate_output : bool, optional +508 If True, separate the output. Defaults to False. +509 write_metadata : bool, optional +510 If True, write the metadata. Defaults to True. +511 +512 """ +513 +514 if isinstance(out_file_path, str): +515 out_file_path = Path(out_file_path) 516 -517 # returns json string -518 exportMS = LowResGCMSExport(self.sample_name, self) -519 return exportMS.get_parameters_json(self, id_label, output_path) -520 -521 def to_json(self, id_label="corems:"): -522 """Export the GC-MS data to a JSON file. +517 exportMS = LowResGCMSExport(out_file_path, self) +518 exportMS.to_csv( +519 id_label=id_label, +520 separate_output=separate_output, +521 write_metadata=write_metadata, +522 ) 523 -524 Parameters -525 ---------- -526 id_label : str, optional -527 Label of the ID. Defaults to 'corems:'. -528 -529 """ -530 -531 # returns pandas dataframe -532 exportMS = LowResGCMSExport(self.sample_name, self) -533 return exportMS.get_json(id_label=id_label) -534 -535 def to_hdf(self, id_label="corems:"): -536 """Export the GC-MS data to a HDF file. +524 return out_file_path.with_suffix(".csv") +525 +526 def to_pandas(self, out_file_path, write_metadata=True, id_label="corems:"): +527 """Export the GC-MS data to a Pandas dataframe. +528 +529 Parameters +530 ---------- +531 out_file_path : str, pathlib.Path, or s3path.S3Path +532 Path object containing the file location. +533 write_metadata : bool, optional +534 If True, write the metadata. Defaults to True. +535 id_label : str, optional +536 Label of the ID. Defaults to 'corems:'. 537 -538 Parameters -539 ---------- -540 id_label : str, optional -541 Label of the ID. Defaults to 'corems:'. -542 -543 """ -544 -545 # returns pandas dataframe -546 exportMS = LowResGCMSExport(self.sample_name, self) -547 return exportMS.to_hdf(id_label=id_label) -548 -549 def plot_chromatogram(self, ax=None, color="blue"): #pragma: no cover -550 """Plot the chromatogram. -551 -552 Parameters -553 ---------- -554 ax : matplotlib.axes.Axes, optional -555 Axes object to plot the chromatogram. Defaults to None. -556 color : str, optional -557 Color of the chromatogram. Defaults to 'blue'. -558 -559 """ -560 -561 import matplotlib.pyplot as plt -562 -563 if ax is None: -564 ax = plt.gca() -565 -566 ax.plot(self.retention_time, self.tic, color=color) -567 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') +538 """ +539 +540 if isinstance(out_file_path, str): +541 out_file_path = Path(out_file_path) +542 # pickle dataframe (pkl extension) +543 exportMS = LowResGCMSExport(out_file_path, self) +544 exportMS.to_pandas(id_label=id_label, write_metadata=write_metadata) +545 +546 return out_file_path.with_suffix(".pkl") +547 +548 def to_dataframe(self, id_label="corems:"): +549 """Export the GC-MS data to a Pandas dataframe. +550 +551 Parameters +552 ---------- +553 id_label : str, optional +554 Label of the ID. Defaults to 'corems:'. +555 +556 """ +557 +558 # returns pandas dataframe +559 exportMS = LowResGCMSExport(self.sample_name, self) +560 return exportMS.get_pandas_df(id_label=id_label) +561 +562 def processing_stats(self): +563 """Return the processing statistics.""" +564 +565 # returns json string +566 exportMS = LowResGCMSExport(self.sample_name, self) +567 return exportMS.get_data_stats(self) 568 -569 return ax -570 -571 def plot_smoothed_chromatogram(self, ax=None, color="green"): #pragma: no cover -572 """Plot the smoothed chromatogram. -573 -574 Parameters -575 ---------- -576 ax : matplotlib.axes.Axes, optional -577 Axes object to plot the smoothed chromatogram. Defaults to None. -578 color : str, optional -579 Color of the smoothed chromatogram. Defaults to 'green'. -580 -581 """ -582 -583 import matplotlib.pyplot as plt -584 -585 if ax is None: +569 def parameters_json(self, id_label="corems:", output_path=" "): +570 """Return the parameters in JSON format. +571 +572 Parameters +573 ---------- +574 id_label : str, optional +575 Label of the ID. Defaults to 'corems:'. +576 output_path : str, optional +577 Path object containing the file location. Defaults to " ". +578 """ +579 +580 # returns json string +581 exportMS = LowResGCMSExport(self.sample_name, self) +582 return exportMS.get_parameters_json(self, id_label, output_path) +583 +584 def to_json(self, id_label="corems:"): +585 """Export the GC-MS data to a JSON file. 586 -587 ax = plt.gca() -588 -589 ax.plot(self.retention_time, self.smooth_tic(self.tic), color=color) -590 -591 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') -592 -593 return ax -594 -595 def plot_detected_baseline(self, ax=None, color="blue"): # pragma: no cover -596 """Plot the detected baseline. +587 Parameters +588 ---------- +589 id_label : str, optional +590 Label of the ID. Defaults to 'corems:'. +591 +592 """ +593 +594 # returns pandas dataframe +595 exportMS = LowResGCMSExport(self.sample_name, self) +596 return exportMS.get_json(id_label=id_label) 597 -598 Parameters -599 ---------- -600 ax : matplotlib.axes.Axes, optional -601 Axes object to plot the detected baseline. Defaults to None. -602 color : str, optional -603 Color of the detected baseline. Defaults to 'blue'. -604 -605 """ -606 -607 import matplotlib.pyplot as plt -608 -609 if ax is None: -610 -611 ax = plt.gca() -612 -613 max_height = self.chromatogram_settings.peak_height_max_percent -614 max_prominence = self.chromatogram_settings.peak_max_prominence_percent -615 -616 baseline = sp.baseline_detector(self.tic, self.retention_time, max_height, max_prominence) -617 ax.plot(self.retention_time, color=color) -618 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') -619 -620 return ax +598 def to_hdf(self, id_label="corems:"): +599 """Export the GC-MS data to a HDF file. +600 +601 Parameters +602 ---------- +603 id_label : str, optional +604 Label of the ID. Defaults to 'corems:'. +605 +606 """ +607 +608 # returns pandas dataframe +609 exportMS = LowResGCMSExport(self.sample_name, self) +610 return exportMS.to_hdf(id_label=id_label) +611 +612 def plot_chromatogram(self, ax=None, color="blue"): # pragma: no cover +613 """Plot the chromatogram. +614 +615 Parameters +616 ---------- +617 ax : matplotlib.axes.Axes, optional +618 Axes object to plot the chromatogram. Defaults to None. +619 color : str, optional +620 Color of the chromatogram. Defaults to 'blue'. 621 -622 def plot_baseline_subtraction(self, ax=None, color="black"): # pragma: no cover -623 """Plot the baseline subtraction. -624 -625 Parameters -626 ---------- -627 ax : matplotlib.axes.Axes, optional -628 Axes object to plot the baseline subtraction. Defaults to None. -629 color : str, optional -630 Color of the baseline subtraction. Defaults to 'black'. +622 """ +623 +624 import matplotlib.pyplot as plt +625 +626 if ax is None: +627 ax = plt.gca() +628 +629 ax.plot(self.retention_time, self.tic, color=color) +630 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") 631 -632 """ +632 return ax 633 -634 import matplotlib.pyplot as plt -635 -636 if ax is None: -637 -638 ax = plt.gca() -639 -640 max_height = self.chromatogram_settings.peak_height_max_percent -641 -642 max_prominence = self.chromatogram_settings.peak_max_prominence_percent +634 def plot_smoothed_chromatogram(self, ax=None, color="green"): # pragma: no cover +635 """Plot the smoothed chromatogram. +636 +637 Parameters +638 ---------- +639 ax : matplotlib.axes.Axes, optional +640 Axes object to plot the smoothed chromatogram. Defaults to None. +641 color : str, optional +642 Color of the smoothed chromatogram. Defaults to 'green'. 643 -644 x = self.tic + sp.baseline_detector(self.tic, self.retention_time, max_height, max_prominence) +644 """ 645 -646 ax.plot(self.retention_time, x, color=color) +646 import matplotlib.pyplot as plt 647 -648 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') -649 -650 return ax -651 -652 def peaks_rt_tic(self, json_string=False): -653 """Return the peaks, retention time, and total ion chromatogram. -654 -655 Parameters -656 ---------- -657 json_string : bool, optional -658 If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False. -659 -660 """ -661 -662 peaks_list = dict() -663 -664 all_candidates_data = {} -665 -666 all_peaks_data = {} -667 -668 for gcms_peak in self.sorted_gcpeaks: -669 -670 dict_data = {'rt': gcms_peak.rt_list, -671 'tic': gcms_peak.tic_list, -672 'mz': gcms_peak.mass_spectrum.mz_exp.tolist(), -673 'abundance': gcms_peak.mass_spectrum.abundance.tolist(), -674 'candidate_names': gcms_peak.compound_names, -675 } +648 if ax is None: +649 ax = plt.gca() +650 +651 ax.plot(self.retention_time, self.smooth_tic(self.tic), color=color) +652 +653 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +654 +655 return ax +656 +657 def plot_detected_baseline(self, ax=None, color="blue"): # pragma: no cover +658 """Plot the detected baseline. +659 +660 Parameters +661 ---------- +662 ax : matplotlib.axes.Axes, optional +663 Axes object to plot the detected baseline. Defaults to None. +664 color : str, optional +665 Color of the detected baseline. Defaults to 'blue'. +666 +667 """ +668 +669 import matplotlib.pyplot as plt +670 +671 if ax is None: +672 ax = plt.gca() +673 +674 max_height = self.chromatogram_settings.peak_height_max_percent +675 max_prominence = self.chromatogram_settings.peak_max_prominence_percent 676 -677 peaks_list[gcms_peak.retention_time] = dict_data -678 -679 for compound in gcms_peak: -680 -681 if compound.name not in all_candidates_data.keys(): -682 mz = array(compound.mz).tolist() -683 abundance = array(compound.abundance).tolist() -684 data = {'mz': mz, "abundance": abundance} -685 all_candidates_data[compound.name] = data -686 -687 all_peaks_data["peak_data"] = peaks_list -688 all_peaks_data["ref_data"] = all_candidates_data -689 -690 if json_string: -691 -692 return json.dumps(all_peaks_data) -693 -694 else: -695 return all_peaks_data +677 baseline = sp.baseline_detector( +678 self.tic, self.retention_time, max_height, max_prominence +679 ) +680 ax.plot(self.retention_time, color=color) +681 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +682 +683 return ax +684 +685 def plot_baseline_subtraction(self, ax=None, color="black"): # pragma: no cover +686 """Plot the baseline subtraction. +687 +688 Parameters +689 ---------- +690 ax : matplotlib.axes.Axes, optional +691 Axes object to plot the baseline subtraction. Defaults to None. +692 color : str, optional +693 Color of the baseline subtraction. Defaults to 'black'. +694 +695 """ 696 -697 def plot_processed_chromatogram(self, ax=None, color="black"): -698 """Plot the processed chromatogram. -699 -700 Parameters -701 ---------- -702 ax : matplotlib.axes.Axes, optional -703 Axes object to plot the processed chromatogram. Defaults to None. -704 color : str, optional -705 Color of the processed chromatogram. Defaults to 'black'. -706 -707 """ -708 -709 import matplotlib.pyplot as plt -710 -711 if ax is None: -712 -713 ax = plt.gca() -714 -715 ax.plot(self.retention_time, self.processed_tic, color=color) -716 -717 ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram') +697 import matplotlib.pyplot as plt +698 +699 if ax is None: +700 ax = plt.gca() +701 +702 max_height = self.chromatogram_settings.peak_height_max_percent +703 +704 max_prominence = self.chromatogram_settings.peak_max_prominence_percent +705 +706 x = self.tic + sp.baseline_detector( +707 self.tic, self.retention_time, max_height, max_prominence +708 ) +709 +710 ax.plot(self.retention_time, x, color=color) +711 +712 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +713 +714 return ax +715 +716 def peaks_rt_tic(self, json_string=False): +717 """Return the peaks, retention time, and total ion chromatogram. 718 -719 return ax +719 Parameters +720 ---------- +721 json_string : bool, optional +722 If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False. +723 +724 """ +725 +726 peaks_list = dict() +727 +728 all_candidates_data = {} +729 +730 all_peaks_data = {} +731 +732 for gcms_peak in self.sorted_gcpeaks: +733 dict_data = { +734 "rt": gcms_peak.rt_list, +735 "tic": gcms_peak.tic_list, +736 "mz": gcms_peak.mass_spectrum.mz_exp.tolist(), +737 "abundance": gcms_peak.mass_spectrum.abundance.tolist(), +738 "candidate_names": gcms_peak.compound_names, +739 } +740 +741 peaks_list[gcms_peak.retention_time] = dict_data +742 +743 for compound in gcms_peak: +744 if compound.name not in all_candidates_data.keys(): +745 mz = array(compound.mz).tolist() +746 abundance = array(compound.abundance).tolist() +747 data = {"mz": mz, "abundance": abundance} +748 all_candidates_data[compound.name] = data +749 +750 all_peaks_data["peak_data"] = peaks_list +751 all_peaks_data["ref_data"] = all_candidates_data +752 +753 if json_string: +754 return json.dumps(all_peaks_data) +755 +756 else: +757 return all_peaks_data +758 +759 def plot_processed_chromatogram(self, ax=None, color="black"): +760 """Plot the processed chromatogram. +761 +762 Parameters +763 ---------- +764 ax : matplotlib.axes.Axes, optional +765 Axes object to plot the processed chromatogram. Defaults to None. +766 color : str, optional +767 Color of the processed chromatogram. Defaults to 'black'. +768 +769 """ +770 +771 import matplotlib.pyplot as plt +772 +773 if ax is None: +774 ax = plt.gca() +775 +776 ax.plot(self.retention_time, self.processed_tic, color=color) +777 +778 ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram") +779 +780 return ax

    @@ -1682,40 +1804,44 @@
    Methods
    -
     71    def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown', sample_name=None):
    - 72
    - 73        if isinstance(file_location, str):
    - 74            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    - 75            file_location = Path(file_location)
    - 76
    - 77        if not file_location.exists():
    - 78
    - 79            raise FileExistsError("File does not exist: " + str(file_location))
    - 80
    - 81        self.file_location = file_location
    - 82
    - 83        if sample_name:
    - 84            self.sample_name = sample_name
    - 85        else:
    - 86            self.sample_name = file_location.stem
    - 87
    - 88        self.analyzer = analyzer
    - 89        self.instrument_label = instrument_label
    - 90        self._init_settings()
    +            
     71    def __init__(
    + 72        self,
    + 73        file_location,
    + 74        analyzer="Unknown",
    + 75        instrument_label="Unknown",
    + 76        sample_name=None,
    + 77    ):
    + 78        if isinstance(file_location, str):
    + 79            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    + 80            file_location = Path(file_location)
    + 81
    + 82        if not file_location.exists():
    + 83            raise FileExistsError("File does not exist: " + str(file_location))
    + 84
    + 85        self.file_location = file_location
    + 86
    + 87        if sample_name:
    + 88            self.sample_name = sample_name
    + 89        else:
    + 90            self.sample_name = file_location.stem
      91
    - 92        self._retention_time_list = []
    - 93        self._scans_number_list = []
    - 94        self._tic_list = []
    + 92        self.analyzer = analyzer
    + 93        self.instrument_label = instrument_label
    + 94        self._init_settings()
      95
    - 96        # all scans
    - 97        self._ms = {}
    - 98
    - 99        # after peak detection
    -100        self._processed_tic = []
    -101        self.gcpeaks = []
    + 96        self._retention_time_list = []
    + 97        self._scans_number_list = []
    + 98        self._tic_list = []
    + 99
    +100        # all scans
    +101        self._ms = {}
     102
    -103        self.ri_pairs_ref = None
    -104        self.cal_file_path = None
    +103        # after peak detection
    +104        self._processed_tic = []
    +105        self.gcpeaks = []
    +106
    +107        self.ri_pairs_ref = None
    +108        self.cal_file_path = None
     
    @@ -1799,46 +1925,44 @@
    Methods
    -
    125    def process_chromatogram(self, plot_res=False):
    -126        """Process the chromatogram.
    -127        
    -128        This method processes the chromatogram.
    -129        
    -130        Parameters
    -131        ----------
    -132        plot_res : bool, optional
    -133            If True, plot the results. Defaults to False.           
    -134        """
    -135
    -136        # tic = self.tic - self.baseline_detector(self.tic)
    -137
    -138        self._processed_tic = self.smooth_tic(self.tic)
    +            
    129    def process_chromatogram(self, plot_res=False):
    +130        """Process the chromatogram.
    +131
    +132        This method processes the chromatogram.
    +133
    +134        Parameters
    +135        ----------
    +136        plot_res : bool, optional
    +137            If True, plot the results. Defaults to False.
    +138        """
     139
    -140        for index, tic in enumerate(self._processed_tic):
    +140        # tic = self.tic - self.baseline_detector(self.tic)
     141
    -142            self._ms[index]._processed_tic = tic
    +142        self._processed_tic = self.smooth_tic(self.tic)
     143
    -144        # self.second_derivative_threshold(self._processed_tic)
    -145
    -146        if self.chromatogram_settings.use_deconvolution:
    -147
    -148            self.run_deconvolution(plot_res=False)
    -149
    -150        else:
    +144        for index, tic in enumerate(self._processed_tic):
    +145            self._ms[index]._processed_tic = tic
    +146
    +147        # self.second_derivative_threshold(self._processed_tic)
    +148
    +149        if self.chromatogram_settings.use_deconvolution:
    +150            self.run_deconvolution(plot_res=False)
     151
    -152            peaks_index = self.centroid_detector(self._processed_tic, self.retention_time)
    -153
    -154            for i in peaks_index:
    -155
    -156                apex_index = i[1]
    -157
    -158                gc_peak = GCPeak(self, self._ms[apex_index], i )
    +152        else:
    +153            peaks_index = self.centroid_detector(
    +154                self._processed_tic, self.retention_time
    +155            )
    +156
    +157            for i in peaks_index:
    +158                apex_index = i[1]
     159
    -160                gc_peak.calc_area(self._processed_tic, 1)
    +160                gc_peak = GCPeak(self, self._ms[apex_index], i)
     161
    -162                self.gcpeaks.append(gc_peak)
    +162                gc_peak.calc_area(self._processed_tic, 1)
     163
    -164                # self.gcpeaks[self.scans_number[apex_index]] = gc_peak
    +164                self.gcpeaks.append(gc_peak)
    +165
    +166                # self.gcpeaks[self.scans_number[apex_index]] = gc_peak
     
    @@ -1867,18 +1991,18 @@
    Parameters
    -
    166    def add_mass_spectrum(self, mass_spec):
    -167        """Add a mass spectrum to the GC-MS object.
    -168
    -169        This method adds a mass spectrum to the GC-MS object.
    +            
    168    def add_mass_spectrum(self, mass_spec):
    +169        """Add a mass spectrum to the GC-MS object.
     170
    -171        Parameters
    -172        ----------
    -173        mass_spec : MassSpectrum
    -174            Mass spectrum to be added.
    -175        """
    -176
    -177        self._ms[mass_spec.scan_number] = mass_spec
    +171        This method adds a mass spectrum to the GC-MS object.
    +172
    +173        Parameters
    +174        ----------
    +175        mass_spec : MassSpectrum
    +176            Mass spectrum to be added.
    +177        """
    +178
    +179        self._ms[mass_spec.scan_number] = mass_spec
     
    @@ -1907,12 +2031,12 @@
    Parameters
    -
    179    def set_tic_list_from_data(self):
    -180        """Set the total ion chromatogram list from the mass spectra data within the GC-MS data object."""
    -181
    -182        self.tic = [self._ms.get(i).tic for i in self.scans_number]
    +            
    181    def set_tic_list_from_data(self):
    +182        """Set the total ion chromatogram list from the mass spectra data within the GC-MS data object."""
     183
    -184        # self.set_tic_list([self._ms.get(i).get_sumed_signal_to_noise() for i in self.get_scans_number()])
    +184        self.tic = [self._ms.get(i).tic for i in self.scans_number]
    +185
    +186        # self.set_tic_list([self._ms.get(i).get_sumed_signal_to_noise() for i in self.get_scans_number()])
     
    @@ -1932,18 +2056,17 @@
    Parameters
    -
    186    def set_retention_time_from_data(self):
    -187        """Set the retention time list from the mass spectra data within the GC-MS data object."""
    -188
    -189        retention_time_list = []
    +            
    188    def set_retention_time_from_data(self):
    +189        """Set the retention time list from the mass spectra data within the GC-MS data object."""
     190
    -191        for key_ms in sorted(self._ms.keys()):
    +191        retention_time_list = []
     192
    -193            retention_time_list.append(self._ms.get(key_ms).retention_time)
    -194
    -195        self.retention_time = retention_time_list
    -196
    -197        # self.set_retention_time_list(sorted(self._ms.keys()))
    +193        for key_ms in sorted(self._ms.keys()):
    +194            retention_time_list.append(self._ms.get(key_ms).retention_time)
    +195
    +196        self.retention_time = retention_time_list
    +197
    +198        # self.set_retention_time_list(sorted(self._ms.keys()))
     
    @@ -1963,10 +2086,10 @@
    Parameters
    -
    199    def set_scans_number_from_data(self):
    -200        """Set the scan number list from the mass spectra data within the GC-MS data object."""
    -201
    -202        self.scans_number = sorted(self._ms.keys())
    +            
    200    def set_scans_number_from_data(self):
    +201        """Set the scan number list from the mass spectra data within the GC-MS data object."""
    +202
    +203        self.scans_number = sorted(self._ms.keys())
     
    @@ -2181,72 +2304,119 @@
    Parameters
    -
    346    def plot_gc_peaks(self, ax=None, color="red"):  # pragma: no cover
    -347        """Plot the GC peaks.
    -348        
    -349        This method plots the GC peaks.
    -350        
    -351        Parameters
    -352        ----------
    -353        ax : matplotlib.axes.Axes, optional
    -354            Axes object to plot the GC peaks. Defaults to None.
    -355        color : str, optional
    -356            Color of the GC peaks. Defaults to 'red'.
    -357        """
    -358
    -359        import matplotlib.pyplot as plt
    -360        fig = plt.gcf()
    -361        if ax is None:
    -362            ax = plt.gca()
    -363
    -364        max_rts = [gc_peak.mass_spectrum.retention_time for gc_peak in self]
    -365        max_tics = [gc_peak.mass_spectrum.tic for gc_peak in self]
    -366
    -367        # min_rts = [self._ms[gc_peak.start_index].retention_time for gc_peak in self] + [self._ms[gc_peak.final_index].retention_time for gc_peak in self]
    -368        # min_tics = [self._ms[gc_peak.start_index].tic for gc_peak in self] + [self._ms[gc_peak.final_index].tic for gc_peak in self]
    -369        # sc = ax.scatter(min_rts, min_tics, color='yellow', linewidth=0, marker='v')
    -370
    -371        sc = ax.scatter(max_rts, max_tics, color=color, marker='v')
    -372
    -373        ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram')
    -374
    -375        annot = ax.annotate("", xy=(0, 0), xytext=(20, 20), textcoords="offset points",
    -376                            bbox=dict(boxstyle="round", fc="w"),
    -377                            arrowprops=dict(arrowstyle="->"))
    -378        annot.set_visible(False)
    -379        annot.get_bbox_patch().set_facecolor(('lightblue'))
    -380        annot.get_bbox_patch().set_alpha(0.8)
    -381
    -382        def update_annot(ind):
    -383
    -384            pos = sc.get_offsets()[ind["ind"][0]]
    -385            annot.xy = pos
    -386
    -387            text = "RT: {}\nRT Ref: {}\nRI: {}\nRI Ref: {}\nSimilarity Score: {}\nName: {}".format(" ".join([str(round(self[n].retention_time, 2)) for n in ind["ind"]]),
    -388                           " ".join([str(round(self[n].highest_score_compound.retention_time, 2) if self[n].highest_score_compound else None) for n in ind["ind"]]),
    -389                           " ".join([str(round(self[n].ri, 2) if self[n].ri else None) for n in ind["ind"]]),
    -390                           " ".join([str(round(self[n].highest_score_compound.ri, 2) if self[n].highest_score_compound else None) for n in ind["ind"]]),                           
    -391                           " ".join([str(round(self[n].highest_score_compound.similarity_score, 4) if self[n].highest_score_compound else None) for n in ind["ind"]]),
    -392                           " ".join([str(self[n].highest_score_compound.name if self[n].highest_score_compound else None) for n in ind["ind"]])
    -393                           )
    -394            annot.set_text(text)
    +            
    350    def plot_gc_peaks(self, ax=None, color="red"):  # pragma: no cover
    +351        """Plot the GC peaks.
    +352
    +353        This method plots the GC peaks.
    +354
    +355        Parameters
    +356        ----------
    +357        ax : matplotlib.axes.Axes, optional
    +358            Axes object to plot the GC peaks. Defaults to None.
    +359        color : str, optional
    +360            Color of the GC peaks. Defaults to 'red'.
    +361        """
    +362
    +363        import matplotlib.pyplot as plt
    +364
    +365        fig = plt.gcf()
    +366        if ax is None:
    +367            ax = plt.gca()
    +368
    +369        max_rts = [gc_peak.mass_spectrum.retention_time for gc_peak in self]
    +370        max_tics = [gc_peak.mass_spectrum.tic for gc_peak in self]
    +371
    +372        # min_rts = [self._ms[gc_peak.start_index].retention_time for gc_peak in self] + [self._ms[gc_peak.final_index].retention_time for gc_peak in self]
    +373        # min_tics = [self._ms[gc_peak.start_index].tic for gc_peak in self] + [self._ms[gc_peak.final_index].tic for gc_peak in self]
    +374        # sc = ax.scatter(min_rts, min_tics, color='yellow', linewidth=0, marker='v')
    +375
    +376        sc = ax.scatter(max_rts, max_tics, color=color, marker="v")
    +377
    +378        ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram")
    +379
    +380        annot = ax.annotate(
    +381            "",
    +382            xy=(0, 0),
    +383            xytext=(20, 20),
    +384            textcoords="offset points",
    +385            bbox=dict(boxstyle="round", fc="w"),
    +386            arrowprops=dict(arrowstyle="->"),
    +387        )
    +388        annot.set_visible(False)
    +389        annot.get_bbox_patch().set_facecolor(("lightblue"))
    +390        annot.get_bbox_patch().set_alpha(0.8)
    +391
    +392        def update_annot(ind):
    +393            pos = sc.get_offsets()[ind["ind"][0]]
    +394            annot.xy = pos
     395
    -396        def hover(event):
    -397            vis = annot.get_visible()
    -398            if event.inaxes == ax:
    -399                cont, ind = sc.contains(event)
    -400                if cont:
    -401                    update_annot(ind)
    -402                    annot.set_visible(True)
    -403                    fig.canvas.draw_idle()
    -404                else:
    -405                    if vis:
    -406                        annot.set_visible(False)
    -407                        fig.canvas.draw_idle()
    -408
    -409        fig.canvas.mpl_connect("motion_notify_event", hover)
    -410
    -411        return ax
    +396            text = "RT: {}\nRT Ref: {}\nRI: {}\nRI Ref: {}\nSimilarity Score: {}\nName: {}".format(
    +397                " ".join([str(round(self[n].retention_time, 2)) for n in ind["ind"]]),
    +398                " ".join(
    +399                    [
    +400                        str(
    +401                            round(self[n].highest_score_compound.retention_time, 2)
    +402                            if self[n].highest_score_compound
    +403                            else None
    +404                        )
    +405                        for n in ind["ind"]
    +406                    ]
    +407                ),
    +408                " ".join(
    +409                    [
    +410                        str(round(self[n].ri, 2) if self[n].ri else None)
    +411                        for n in ind["ind"]
    +412                    ]
    +413                ),
    +414                " ".join(
    +415                    [
    +416                        str(
    +417                            round(self[n].highest_score_compound.ri, 2)
    +418                            if self[n].highest_score_compound
    +419                            else None
    +420                        )
    +421                        for n in ind["ind"]
    +422                    ]
    +423                ),
    +424                " ".join(
    +425                    [
    +426                        str(
    +427                            round(self[n].highest_score_compound.similarity_score, 4)
    +428                            if self[n].highest_score_compound
    +429                            else None
    +430                        )
    +431                        for n in ind["ind"]
    +432                    ]
    +433                ),
    +434                " ".join(
    +435                    [
    +436                        str(
    +437                            self[n].highest_score_compound.name
    +438                            if self[n].highest_score_compound
    +439                            else None
    +440                        )
    +441                        for n in ind["ind"]
    +442                    ]
    +443                ),
    +444            )
    +445            annot.set_text(text)
    +446
    +447        def hover(event):
    +448            vis = annot.get_visible()
    +449            if event.inaxes == ax:
    +450                cont, ind = sc.contains(event)
    +451                if cont:
    +452                    update_annot(ind)
    +453                    annot.set_visible(True)
    +454                    fig.canvas.draw_idle()
    +455                else:
    +456                    if vis:
    +457                        annot.set_visible(False)
    +458                        fig.canvas.draw_idle()
    +459
    +460        fig.canvas.mpl_connect("motion_notify_event", hover)
    +461
    +462        return ax
     
    @@ -2277,31 +2447,35 @@
    Parameters
    -
    413    def to_excel(self, out_file_path, write_mode='ab', write_metadata=True, id_label="corems:"):
    -414        """Export the GC-MS data to an Excel file.
    -415
    -416        This method exports the GC-MS data to an Excel file.
    -417
    -418        Parameters
    -419        ----------
    -420        out_file_path : str, pathlib.Path, or s3path.S3Path
    -421            Path object containing the file location.
    -422        write_mode : str, optional
    -423            Write mode. Defaults to 'ab'.
    -424        write_metadata : bool, optional
    -425            If True, write the metadata. Defaults to True.
    -426        id_label : str, optional
    -427            Label of the ID. Defaults to 'corems:'.
    -428
    -429        """
    -430
    -431        if isinstance(out_file_path, str):
    -432            out_file_path = Path(out_file_path)
    -433
    -434        exportMS = LowResGCMSExport(out_file_path, self)
    -435        exportMS.to_excel(id_label=id_label, write_mode=write_mode, write_metadata=write_metadata)
    -436
    -437        return out_file_path.with_suffix('.xlsx')
    +            
    464    def to_excel(
    +465        self, out_file_path, write_mode="ab", write_metadata=True, id_label="corems:"
    +466    ):
    +467        """Export the GC-MS data to an Excel file.
    +468
    +469        This method exports the GC-MS data to an Excel file.
    +470
    +471        Parameters
    +472        ----------
    +473        out_file_path : str, pathlib.Path, or s3path.S3Path
    +474            Path object containing the file location.
    +475        write_mode : str, optional
    +476            Write mode. Defaults to 'ab'.
    +477        write_metadata : bool, optional
    +478            If True, write the metadata. Defaults to True.
    +479        id_label : str, optional
    +480            Label of the ID. Defaults to 'corems:'.
    +481
    +482        """
    +483
    +484        if isinstance(out_file_path, str):
    +485            out_file_path = Path(out_file_path)
    +486
    +487        exportMS = LowResGCMSExport(out_file_path, self)
    +488        exportMS.to_excel(
    +489            id_label=id_label, write_mode=write_mode, write_metadata=write_metadata
    +490        )
    +491
    +492        return out_file_path.with_suffix(".xlsx")
     
    @@ -2336,27 +2510,37 @@
    Parameters
    -
    439    def to_csv(self, out_file_path, separate_output=False, write_metadata=True, id_label="corems:"):
    -440        """Export the GC-MS data to a CSV file.
    -441
    -442        Parameters
    -443        ----------
    -444        out_file_path : str, pathlib.Path, or s3path.S3Path
    -445            Path object containing the file location.
    -446        separate_output : bool, optional
    -447            If True, separate the output. Defaults to False.
    -448        write_metadata : bool, optional
    -449            If True, write the metadata. Defaults to True.
    -450
    -451        """
    -452
    -453        if isinstance(out_file_path, str):
    -454            out_file_path = Path(out_file_path)
    -455
    -456        exportMS = LowResGCMSExport(out_file_path, self)
    -457        exportMS.to_csv(id_label=id_label, separate_output=separate_output, write_metadata=write_metadata)
    -458
    -459        return out_file_path.with_suffix('.csv')
    +            
    494    def to_csv(
    +495        self,
    +496        out_file_path,
    +497        separate_output=False,
    +498        write_metadata=True,
    +499        id_label="corems:",
    +500    ):
    +501        """Export the GC-MS data to a CSV file.
    +502
    +503        Parameters
    +504        ----------
    +505        out_file_path : str, pathlib.Path, or s3path.S3Path
    +506            Path object containing the file location.
    +507        separate_output : bool, optional
    +508            If True, separate the output. Defaults to False.
    +509        write_metadata : bool, optional
    +510            If True, write the metadata. Defaults to True.
    +511
    +512        """
    +513
    +514        if isinstance(out_file_path, str):
    +515            out_file_path = Path(out_file_path)
    +516
    +517        exportMS = LowResGCMSExport(out_file_path, self)
    +518        exportMS.to_csv(
    +519            id_label=id_label,
    +520            separate_output=separate_output,
    +521            write_metadata=write_metadata,
    +522        )
    +523
    +524        return out_file_path.with_suffix(".csv")
     
    @@ -2387,27 +2571,27 @@
    Parameters
    -
    461    def to_pandas(self, out_file_path, write_metadata=True, id_label="corems:"):
    -462        """Export the GC-MS data to a Pandas dataframe.
    -463
    -464        Parameters
    -465        ----------
    -466        out_file_path : str, pathlib.Path, or s3path.S3Path
    -467            Path object containing the file location.
    -468        write_metadata : bool, optional
    -469            If True, write the metadata. Defaults to True.
    -470        id_label : str, optional
    -471            Label of the ID. Defaults to 'corems:'.
    -472
    -473        """
    -474        
    -475        if isinstance(out_file_path, str):
    -476            out_file_path = Path(out_file_path)
    -477        # pickle dataframe (pkl extension)
    -478        exportMS = LowResGCMSExport(out_file_path, self)
    -479        exportMS.to_pandas(id_label=id_label, write_metadata=write_metadata)
    -480
    -481        return out_file_path.with_suffix('.pkl')
    +            
    526    def to_pandas(self, out_file_path, write_metadata=True, id_label="corems:"):
    +527        """Export the GC-MS data to a Pandas dataframe.
    +528
    +529        Parameters
    +530        ----------
    +531        out_file_path : str, pathlib.Path, or s3path.S3Path
    +532            Path object containing the file location.
    +533        write_metadata : bool, optional
    +534            If True, write the metadata. Defaults to True.
    +535        id_label : str, optional
    +536            Label of the ID. Defaults to 'corems:'.
    +537
    +538        """
    +539
    +540        if isinstance(out_file_path, str):
    +541            out_file_path = Path(out_file_path)
    +542        # pickle dataframe (pkl extension)
    +543        exportMS = LowResGCMSExport(out_file_path, self)
    +544        exportMS.to_pandas(id_label=id_label, write_metadata=write_metadata)
    +545
    +546        return out_file_path.with_suffix(".pkl")
     
    @@ -2438,19 +2622,19 @@
    Parameters
    -
    483    def to_dataframe(self, id_label="corems:"):
    -484        """Export the GC-MS data to a Pandas dataframe.
    -485
    -486        Parameters
    -487        ----------
    -488        id_label : str, optional
    -489            Label of the ID. Defaults to 'corems:'.
    -490        
    -491        """
    -492
    -493        # returns pandas dataframe
    -494        exportMS = LowResGCMSExport(self.sample_name, self)
    -495        return exportMS.get_pandas_df(id_label=id_label)
    +            
    548    def to_dataframe(self, id_label="corems:"):
    +549        """Export the GC-MS data to a Pandas dataframe.
    +550
    +551        Parameters
    +552        ----------
    +553        id_label : str, optional
    +554            Label of the ID. Defaults to 'corems:'.
    +555
    +556        """
    +557
    +558        # returns pandas dataframe
    +559        exportMS = LowResGCMSExport(self.sample_name, self)
    +560        return exportMS.get_pandas_df(id_label=id_label)
     
    @@ -2477,14 +2661,12 @@
    Parameters
    -
    497    def processing_stats(self):
    -498        """Return the processing statistics.
    -499
    -500        """
    -501        
    -502        # returns json string
    -503        exportMS = LowResGCMSExport(self.sample_name, self)
    -504        return exportMS.get_data_stats(self)
    +            
    562    def processing_stats(self):
    +563        """Return the processing statistics."""
    +564
    +565        # returns json string
    +566        exportMS = LowResGCMSExport(self.sample_name, self)
    +567        return exportMS.get_data_stats(self)
     
    @@ -2504,20 +2686,20 @@
    Parameters
    -
    506    def parameters_json(self, id_label="corems:", output_path=" "):
    -507        """Return the parameters in JSON format.
    -508
    -509        Parameters
    -510        ----------
    -511        id_label : str, optional
    -512            Label of the ID. Defaults to 'corems:'.
    -513        output_path : str, optional
    -514            Path object containing the file location. Defaults to " ".
    -515        """
    -516
    -517        # returns json string
    -518        exportMS = LowResGCMSExport(self.sample_name, self)
    -519        return exportMS.get_parameters_json(self, id_label, output_path)
    +            
    569    def parameters_json(self, id_label="corems:", output_path=" "):
    +570        """Return the parameters in JSON format.
    +571
    +572        Parameters
    +573        ----------
    +574        id_label : str, optional
    +575            Label of the ID. Defaults to 'corems:'.
    +576        output_path : str, optional
    +577            Path object containing the file location. Defaults to " ".
    +578        """
    +579
    +580        # returns json string
    +581        exportMS = LowResGCMSExport(self.sample_name, self)
    +582        return exportMS.get_parameters_json(self, id_label, output_path)
     
    @@ -2546,19 +2728,19 @@
    Parameters
    -
    521    def to_json(self, id_label="corems:"):
    -522        """Export the GC-MS data to a JSON file.
    -523
    -524        Parameters
    -525        ----------
    -526        id_label : str, optional
    -527            Label of the ID. Defaults to 'corems:'.
    -528        
    -529        """      
    -530
    -531        # returns pandas dataframe
    -532        exportMS = LowResGCMSExport(self.sample_name, self)
    -533        return exportMS.get_json(id_label=id_label)
    +            
    584    def to_json(self, id_label="corems:"):
    +585        """Export the GC-MS data to a JSON file.
    +586
    +587        Parameters
    +588        ----------
    +589        id_label : str, optional
    +590            Label of the ID. Defaults to 'corems:'.
    +591
    +592        """
    +593
    +594        # returns pandas dataframe
    +595        exportMS = LowResGCMSExport(self.sample_name, self)
    +596        return exportMS.get_json(id_label=id_label)
     
    @@ -2585,19 +2767,19 @@
    Parameters
    -
    535    def to_hdf(self, id_label="corems:"):
    -536        """Export the GC-MS data to a HDF file.
    -537
    -538        Parameters
    -539        ----------
    -540        id_label : str, optional
    -541            Label of the ID. Defaults to 'corems:'.
    -542
    -543        """
    -544
    -545        # returns pandas dataframe
    -546        exportMS = LowResGCMSExport(self.sample_name, self)
    -547        return exportMS.to_hdf(id_label=id_label)
    +            
    598    def to_hdf(self, id_label="corems:"):
    +599        """Export the GC-MS data to a HDF file.
    +600
    +601        Parameters
    +602        ----------
    +603        id_label : str, optional
    +604            Label of the ID. Defaults to 'corems:'.
    +605
    +606        """
    +607
    +608        # returns pandas dataframe
    +609        exportMS = LowResGCMSExport(self.sample_name, self)
    +610        return exportMS.to_hdf(id_label=id_label)
     
    @@ -2624,27 +2806,27 @@
    Parameters
    -
    549    def plot_chromatogram(self, ax=None, color="blue"): #pragma: no cover
    -550        """Plot the chromatogram.
    -551        
    -552        Parameters
    -553        ----------
    -554        ax : matplotlib.axes.Axes, optional
    -555            Axes object to plot the chromatogram. Defaults to None.
    -556        color : str, optional
    -557            Color of the chromatogram. Defaults to 'blue'.
    -558            
    -559        """
    -560
    -561        import matplotlib.pyplot as plt
    -562
    -563        if ax is None:
    -564            ax = plt.gca()
    -565
    -566        ax.plot(self.retention_time, self.tic, color=color)
    -567        ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram')
    -568
    -569        return ax
    +            
    612    def plot_chromatogram(self, ax=None, color="blue"):  # pragma: no cover
    +613        """Plot the chromatogram.
    +614
    +615        Parameters
    +616        ----------
    +617        ax : matplotlib.axes.Axes, optional
    +618            Axes object to plot the chromatogram. Defaults to None.
    +619        color : str, optional
    +620            Color of the chromatogram. Defaults to 'blue'.
    +621
    +622        """
    +623
    +624        import matplotlib.pyplot as plt
    +625
    +626        if ax is None:
    +627            ax = plt.gca()
    +628
    +629        ax.plot(self.retention_time, self.tic, color=color)
    +630        ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram")
    +631
    +632        return ax
     
    @@ -2673,29 +2855,28 @@
    Parameters
    -
    571    def plot_smoothed_chromatogram(self, ax=None, color="green"):  #pragma: no cover
    -572        """Plot the smoothed chromatogram.
    -573        
    -574        Parameters
    -575        ----------
    -576        ax : matplotlib.axes.Axes, optional
    -577            Axes object to plot the smoothed chromatogram. Defaults to None.
    -578        color : str, optional
    -579            Color of the smoothed chromatogram. Defaults to 'green'.
    -580            
    -581        """
    -582
    -583        import matplotlib.pyplot as plt
    -584
    -585        if ax is None:
    -586
    -587            ax = plt.gca()
    -588
    -589        ax.plot(self.retention_time, self.smooth_tic(self.tic), color=color)
    -590
    -591        ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram')
    -592
    -593        return ax
    +            
    634    def plot_smoothed_chromatogram(self, ax=None, color="green"):  # pragma: no cover
    +635        """Plot the smoothed chromatogram.
    +636
    +637        Parameters
    +638        ----------
    +639        ax : matplotlib.axes.Axes, optional
    +640            Axes object to plot the smoothed chromatogram. Defaults to None.
    +641        color : str, optional
    +642            Color of the smoothed chromatogram. Defaults to 'green'.
    +643
    +644        """
    +645
    +646        import matplotlib.pyplot as plt
    +647
    +648        if ax is None:
    +649            ax = plt.gca()
    +650
    +651        ax.plot(self.retention_time, self.smooth_tic(self.tic), color=color)
    +652
    +653        ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram")
    +654
    +655        return ax
     
    @@ -2724,32 +2905,33 @@
    Parameters
    -
    595    def plot_detected_baseline(self, ax=None, color="blue"):  # pragma: no cover
    -596        """Plot the detected baseline.
    -597
    -598        Parameters
    -599        ----------
    -600        ax : matplotlib.axes.Axes, optional
    -601            Axes object to plot the detected baseline. Defaults to None.
    -602        color : str, optional
    -603            Color of the detected baseline. Defaults to 'blue'.
    -604
    -605        """
    -606
    -607        import matplotlib.pyplot as plt
    -608
    -609        if ax is None:
    -610
    -611            ax = plt.gca()
    -612
    -613        max_height = self.chromatogram_settings.peak_height_max_percent
    -614        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    -615
    -616        baseline = sp.baseline_detector(self.tic, self.retention_time, max_height, max_prominence)
    -617        ax.plot(self.retention_time, color=color)
    -618        ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram')
    -619
    -620        return ax
    +            
    657    def plot_detected_baseline(self, ax=None, color="blue"):  # pragma: no cover
    +658        """Plot the detected baseline.
    +659
    +660        Parameters
    +661        ----------
    +662        ax : matplotlib.axes.Axes, optional
    +663            Axes object to plot the detected baseline. Defaults to None.
    +664        color : str, optional
    +665            Color of the detected baseline. Defaults to 'blue'.
    +666
    +667        """
    +668
    +669        import matplotlib.pyplot as plt
    +670
    +671        if ax is None:
    +672            ax = plt.gca()
    +673
    +674        max_height = self.chromatogram_settings.peak_height_max_percent
    +675        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    +676
    +677        baseline = sp.baseline_detector(
    +678            self.tic, self.retention_time, max_height, max_prominence
    +679        )
    +680        ax.plot(self.retention_time, color=color)
    +681        ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram")
    +682
    +683        return ax
     
    @@ -2778,35 +2960,36 @@
    Parameters
    -
    622    def plot_baseline_subtraction(self, ax=None, color="black"):  # pragma: no cover
    -623        """Plot the baseline subtraction.
    -624
    -625        Parameters
    -626        ----------
    -627        ax : matplotlib.axes.Axes, optional
    -628            Axes object to plot the baseline subtraction. Defaults to None.
    -629        color : str, optional
    -630            Color of the baseline subtraction. Defaults to 'black'.
    -631
    -632        """
    -633
    -634        import matplotlib.pyplot as plt
    -635
    -636        if ax is None:
    -637
    -638            ax = plt.gca()
    -639
    -640        max_height = self.chromatogram_settings.peak_height_max_percent
    -641
    -642        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    -643
    -644        x = self.tic + sp.baseline_detector(self.tic, self.retention_time, max_height, max_prominence)
    -645
    -646        ax.plot(self.retention_time, x, color=color)
    -647
    -648        ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram')
    -649
    -650        return ax
    +            
    685    def plot_baseline_subtraction(self, ax=None, color="black"):  # pragma: no cover
    +686        """Plot the baseline subtraction.
    +687
    +688        Parameters
    +689        ----------
    +690        ax : matplotlib.axes.Axes, optional
    +691            Axes object to plot the baseline subtraction. Defaults to None.
    +692        color : str, optional
    +693            Color of the baseline subtraction. Defaults to 'black'.
    +694
    +695        """
    +696
    +697        import matplotlib.pyplot as plt
    +698
    +699        if ax is None:
    +700            ax = plt.gca()
    +701
    +702        max_height = self.chromatogram_settings.peak_height_max_percent
    +703
    +704        max_prominence = self.chromatogram_settings.peak_max_prominence_percent
    +705
    +706        x = self.tic + sp.baseline_detector(
    +707            self.tic, self.retention_time, max_height, max_prominence
    +708        )
    +709
    +710        ax.plot(self.retention_time, x, color=color)
    +711
    +712        ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram")
    +713
    +714        return ax
     
    @@ -2835,50 +3018,48 @@
    Parameters
    -
    652    def peaks_rt_tic(self, json_string=False):
    -653        """Return the peaks, retention time, and total ion chromatogram.
    -654        
    -655        Parameters
    -656        ----------
    -657        json_string : bool, optional
    -658            If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False.
    -659        
    -660        """
    -661
    -662        peaks_list = dict()
    -663
    -664        all_candidates_data = {}
    -665
    -666        all_peaks_data = {}
    -667
    -668        for gcms_peak in self.sorted_gcpeaks:
    -669
    -670            dict_data = {'rt': gcms_peak.rt_list,
    -671                         'tic': gcms_peak.tic_list,
    -672                         'mz': gcms_peak.mass_spectrum.mz_exp.tolist(),
    -673                         'abundance': gcms_peak.mass_spectrum.abundance.tolist(),
    -674                         'candidate_names': gcms_peak.compound_names,
    -675                         }
    -676
    -677            peaks_list[gcms_peak.retention_time] = dict_data
    -678
    -679            for compound in gcms_peak:
    -680
    -681                if compound.name not in all_candidates_data.keys():
    -682                    mz = array(compound.mz).tolist()
    -683                    abundance = array(compound.abundance).tolist()
    -684                    data = {'mz': mz, "abundance": abundance}
    -685                    all_candidates_data[compound.name] = data
    -686
    -687        all_peaks_data["peak_data"] = peaks_list
    -688        all_peaks_data["ref_data"] = all_candidates_data
    -689
    -690        if json_string:
    -691
    -692            return json.dumps(all_peaks_data)
    -693
    -694        else:
    -695            return all_peaks_data
    +            
    716    def peaks_rt_tic(self, json_string=False):
    +717        """Return the peaks, retention time, and total ion chromatogram.
    +718
    +719        Parameters
    +720        ----------
    +721        json_string : bool, optional
    +722            If True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False.
    +723
    +724        """
    +725
    +726        peaks_list = dict()
    +727
    +728        all_candidates_data = {}
    +729
    +730        all_peaks_data = {}
    +731
    +732        for gcms_peak in self.sorted_gcpeaks:
    +733            dict_data = {
    +734                "rt": gcms_peak.rt_list,
    +735                "tic": gcms_peak.tic_list,
    +736                "mz": gcms_peak.mass_spectrum.mz_exp.tolist(),
    +737                "abundance": gcms_peak.mass_spectrum.abundance.tolist(),
    +738                "candidate_names": gcms_peak.compound_names,
    +739            }
    +740
    +741            peaks_list[gcms_peak.retention_time] = dict_data
    +742
    +743            for compound in gcms_peak:
    +744                if compound.name not in all_candidates_data.keys():
    +745                    mz = array(compound.mz).tolist()
    +746                    abundance = array(compound.abundance).tolist()
    +747                    data = {"mz": mz, "abundance": abundance}
    +748                    all_candidates_data[compound.name] = data
    +749
    +750        all_peaks_data["peak_data"] = peaks_list
    +751        all_peaks_data["ref_data"] = all_candidates_data
    +752
    +753        if json_string:
    +754            return json.dumps(all_peaks_data)
    +755
    +756        else:
    +757            return all_peaks_data
     
    @@ -2905,29 +3086,28 @@
    Parameters
    -
    697    def plot_processed_chromatogram(self, ax=None, color="black"):
    -698        """Plot the processed chromatogram.
    -699
    -700        Parameters
    -701        ----------
    -702        ax : matplotlib.axes.Axes, optional
    -703            Axes object to plot the processed chromatogram. Defaults to None.
    -704        color : str, optional
    -705            Color of the processed chromatogram. Defaults to 'black'.
    -706
    -707        """
    -708        
    -709        import matplotlib.pyplot as plt
    -710
    -711        if ax is None:
    -712
    -713            ax = plt.gca()
    -714
    -715        ax.plot(self.retention_time, self.processed_tic, color=color)
    -716
    -717        ax.set(xlabel='Retention Time (s)', ylabel='Total Ion Chromatogram')
    -718
    -719        return ax
    +            
    759    def plot_processed_chromatogram(self, ax=None, color="black"):
    +760        """Plot the processed chromatogram.
    +761
    +762        Parameters
    +763        ----------
    +764        ax : matplotlib.axes.Axes, optional
    +765            Axes object to plot the processed chromatogram. Defaults to None.
    +766        color : str, optional
    +767            Color of the processed chromatogram. Defaults to 'black'.
    +768
    +769        """
    +770
    +771        import matplotlib.pyplot as plt
    +772
    +773        if ax is None:
    +774            ax = plt.gca()
    +775
    +776        ax.plot(self.retention_time, self.processed_tic, color=color)
    +777
    +778        ax.set(xlabel="Retention Time (s)", ylabel="Total Ion Chromatogram")
    +779
    +780        return ax
     
    diff --git a/docs/corems/mass_spectra/factory/LC_Class.html b/docs/corems/mass_spectra/factory/LC_Class.html index f87efa7a..8b2e27cb 100644 --- a/docs/corems/mass_spectra/factory/LC_Class.html +++ b/docs/corems/mass_spectra/factory/LC_Class.html @@ -158,1104 +158,1113 @@

    9from corems.molecular_id.search.lcms_spectral_search import LCMSSpectralSearch 10from corems.mass_spectrum.input.numpyArray import ms_from_array_profile 11 - 12class MassSpectraBase: - 13 """Base class for mass spectra objects. - 14 - 15 Parameters - 16 ----------- - 17 file_location : str or Path - 18 The location of the file containing the mass spectra data. - 19 analyzer : str, optional - 20 The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'. - 21 instrument_label : str, optional - 22 The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'. - 23 sample_name : str, optional - 24 The name of the sample; defaults to the file name if not provided to the parser. Defaults to None. - 25 spectra_parser : object, optional - 26 The spectra parser object used to create the mass spectra object. Defaults to None. - 27 - 28 Attributes - 29 ----------- - 30 spectra_parser_class : class - 31 The class of the spectra parser used to create the mass spectra object. - 32 file_location : str or Path - 33 The location of the file containing the mass spectra data. - 34 sample_name : str - 35 The name of the sample; defaults to the file name if not provided to the parser. - 36 analyzer : str - 37 The type of analyzer used to generate the mass spectra data. Derived from the spectra parser. - 38 instrument_label : str - 39 The type of instrument used to generate the mass spectra data. Derived from the spectra parser. - 40 _scan_info : dict - 41 A dictionary containing the scan data with columns for scan number, scan time, ms level, precursor m/z, - 42 scan text, and scan window (lower and upper). - 43 Associated with the property scan_df, which returns a pandas DataFrame or can set this attribute from a pandas DataFrame. - 44 _ms : dict - 45 A dictionary containing mass spectra for the dataset, keys of dictionary are scan numbers. Initialized as an empty dictionary. - 46 _ms_unprocessed: dictionary of pandas.DataFrames or None - 47 A dictionary of unprocssed mass spectra data, as an (optional) intermediate data product for peak picking. - 48 Key is ms_level, and value is dataframe with columns for scan number, m/z, and intensity. Default is None. - 49 - 50 Methods - 51 -------- - 52 * add_mass_spectra(scan_list, spectrum_mode: str = 'profile', use_parser = True, auto_process=True). - 53 Add mass spectra (or singlel mass spectrum) to _ms slot, from a list of scans - 54 * get_time_of_scan_id(scan). - 55 Returns the scan time for the specified scan number. - 56 """ - 57 - 58 def __init__( - 59 self, - 60 file_location, - 61 analyzer="Unknown", - 62 instrument_label="Unknown", - 63 sample_name=None, - 64 spectra_parser=None, - 65 ): - 66 if isinstance(file_location, str): - 67 file_location = Path(file_location) - 68 else: - 69 file_location = file_location - 70 if not file_location.exists(): - 71 raise FileExistsError("File does not exist: " + str(file_location)) - 72 - 73 if sample_name: - 74 self.sample_name = sample_name - 75 else: - 76 self.sample_name = file_location.stem - 77 - 78 self.file_location = file_location - 79 self.analyzer = analyzer - 80 self.instrument_label = instrument_label - 81 - 82 # Add the spectra parser class to the object if it is not None - 83 if spectra_parser is not None: - 84 self.spectra_parser_class = spectra_parser.__class__ - 85 self.spectra_parser = spectra_parser - 86 # Check that spectra_pasrser.sample_name is same as sample_name etc, raise warning if not - 87 if ( - 88 self.sample_name is not None - 89 and self.sample_name != self.spectra_parser.sample_name - 90 ): - 91 warnings.warn( - 92 "sample_name provided to MassSpectraBase object does not match sample_name provided to spectra parser object", - 93 UserWarning - 94 ) - 95 if self.analyzer != self.spectra_parser.analyzer: - 96 warnings.warn( - 97 "analyzer provided to MassSpectraBase object does not match analyzer provided to spectra parser object", - 98 UserWarning - 99 ) - 100 if self.instrument_label != self.spectra_parser.instrument_label: - 101 warnings.warn( - 102 "instrument provided to MassSpectraBase object does not match instrument provided to spectra parser object", - 103 UserWarning - 104 ) - 105 if self.file_location != self.spectra_parser.file_location: - 106 warnings.warn( - 107 "file_location provided to MassSpectraBase object does not match file_location provided to spectra parser object", - 108 UserWarning - 109 ) - 110 - 111 # Instantiate empty dictionaries for scan information and mass spectra - 112 self._scan_info = {} - 113 self._ms = {} - 114 self._ms_unprocessed = {} - 115 - 116 def add_mass_spectrum(self, mass_spec): - 117 """Adds a mass spectrum to the dataset. - 118 - 119 Parameters - 120 ----------- - 121 mass_spec : MassSpectrum - 122 The corems MassSpectrum object to be added to the dataset. - 123 - 124 Notes - 125 ----- - 126 This is a helper function for the add_mass_spectra() method, and is not intended to be called directly. - 127 """ - 128 # check if mass_spec has a scan_number attribute - 129 if not hasattr(mass_spec, "scan_number"): - 130 raise ValueError( - 131 "Mass spectrum must have a scan_number attribute to be added to the dataset correctly" - 132 ) - 133 self._ms[mass_spec.scan_number] = mass_spec - 134 - 135 def add_mass_spectra( - 136 self, - 137 scan_list, - 138 spectrum_mode=None, - 139 ms_level=1, - 140 use_parser=True, - 141 auto_process=True, - 142 ms_params=None, - 143 ): - 144 """Add mass spectra to _ms dictionary, from a list of scans or single scan - 145 - 146 Notes - 147 ----- - 148 The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object. - 149 + 12 + 13class MassSpectraBase: + 14 """Base class for mass spectra objects. + 15 + 16 Parameters + 17 ----------- + 18 file_location : str or Path + 19 The location of the file containing the mass spectra data. + 20 analyzer : str, optional + 21 The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'. + 22 instrument_label : str, optional + 23 The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'. + 24 sample_name : str, optional + 25 The name of the sample; defaults to the file name if not provided to the parser. Defaults to None. + 26 spectra_parser : object, optional + 27 The spectra parser object used to create the mass spectra object. Defaults to None. + 28 + 29 Attributes + 30 ----------- + 31 spectra_parser_class : class + 32 The class of the spectra parser used to create the mass spectra object. + 33 file_location : str or Path + 34 The location of the file containing the mass spectra data. + 35 sample_name : str + 36 The name of the sample; defaults to the file name if not provided to the parser. + 37 analyzer : str + 38 The type of analyzer used to generate the mass spectra data. Derived from the spectra parser. + 39 instrument_label : str + 40 The type of instrument used to generate the mass spectra data. Derived from the spectra parser. + 41 _scan_info : dict + 42 A dictionary containing the scan data with columns for scan number, scan time, ms level, precursor m/z, + 43 scan text, and scan window (lower and upper). + 44 Associated with the property scan_df, which returns a pandas DataFrame or can set this attribute from a pandas DataFrame. + 45 _ms : dict + 46 A dictionary containing mass spectra for the dataset, keys of dictionary are scan numbers. Initialized as an empty dictionary. + 47 _ms_unprocessed: dictionary of pandas.DataFrames or None + 48 A dictionary of unprocssed mass spectra data, as an (optional) intermediate data product for peak picking. + 49 Key is ms_level, and value is dataframe with columns for scan number, m/z, and intensity. Default is None. + 50 + 51 Methods + 52 -------- + 53 * add_mass_spectra(scan_list, spectrum_mode: str = 'profile', use_parser = True, auto_process=True). + 54 Add mass spectra (or singlel mass spectrum) to _ms slot, from a list of scans + 55 * get_time_of_scan_id(scan). + 56 Returns the scan time for the specified scan number. + 57 """ + 58 + 59 def __init__( + 60 self, + 61 file_location, + 62 analyzer="Unknown", + 63 instrument_label="Unknown", + 64 sample_name=None, + 65 spectra_parser=None, + 66 ): + 67 if isinstance(file_location, str): + 68 file_location = Path(file_location) + 69 else: + 70 file_location = file_location + 71 if not file_location.exists(): + 72 raise FileExistsError("File does not exist: " + str(file_location)) + 73 + 74 if sample_name: + 75 self.sample_name = sample_name + 76 else: + 77 self.sample_name = file_location.stem + 78 + 79 self.file_location = file_location + 80 self.analyzer = analyzer + 81 self.instrument_label = instrument_label + 82 + 83 # Add the spectra parser class to the object if it is not None + 84 if spectra_parser is not None: + 85 self.spectra_parser_class = spectra_parser.__class__ + 86 self.spectra_parser = spectra_parser + 87 # Check that spectra_pasrser.sample_name is same as sample_name etc, raise warning if not + 88 if ( + 89 self.sample_name is not None + 90 and self.sample_name != self.spectra_parser.sample_name + 91 ): + 92 warnings.warn( + 93 "sample_name provided to MassSpectraBase object does not match sample_name provided to spectra parser object", + 94 UserWarning, + 95 ) + 96 if self.analyzer != self.spectra_parser.analyzer: + 97 warnings.warn( + 98 "analyzer provided to MassSpectraBase object does not match analyzer provided to spectra parser object", + 99 UserWarning, + 100 ) + 101 if self.instrument_label != self.spectra_parser.instrument_label: + 102 warnings.warn( + 103 "instrument provided to MassSpectraBase object does not match instrument provided to spectra parser object", + 104 UserWarning, + 105 ) + 106 if self.file_location != self.spectra_parser.file_location: + 107 warnings.warn( + 108 "file_location provided to MassSpectraBase object does not match file_location provided to spectra parser object", + 109 UserWarning, + 110 ) + 111 + 112 # Instantiate empty dictionaries for scan information and mass spectra + 113 self._scan_info = {} + 114 self._ms = {} + 115 self._ms_unprocessed = {} + 116 + 117 def add_mass_spectrum(self, mass_spec): + 118 """Adds a mass spectrum to the dataset. + 119 + 120 Parameters + 121 ----------- + 122 mass_spec : MassSpectrum + 123 The corems MassSpectrum object to be added to the dataset. + 124 + 125 Notes + 126 ----- + 127 This is a helper function for the add_mass_spectra() method, and is not intended to be called directly. + 128 """ + 129 # check if mass_spec has a scan_number attribute + 130 if not hasattr(mass_spec, "scan_number"): + 131 raise ValueError( + 132 "Mass spectrum must have a scan_number attribute to be added to the dataset correctly" + 133 ) + 134 self._ms[mass_spec.scan_number] = mass_spec + 135 + 136 def add_mass_spectra( + 137 self, + 138 scan_list, + 139 spectrum_mode=None, + 140 ms_level=1, + 141 use_parser=True, + 142 auto_process=True, + 143 ms_params=None, + 144 ): + 145 """Add mass spectra to _ms dictionary, from a list of scans or single scan + 146 + 147 Notes + 148 ----- + 149 The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object. 150 - 151 Parameters - 152 ----------- - 153 scan_list : list of ints - 154 List of scans to use to populate _ms slot - 155 spectrum_mode : str or None - 156 The spectrum mode to use for the mass spectra. - 157 If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types). - 158 Defaults to None. - 159 ms_level : int, optional - 160 The MS level to use for the mass spectra. - 161 This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects. - 162 Defaults to 1. - 163 using_parser : bool - 164 Whether to use the mass spectra parser to get the mass spectra. Defaults to True. - 165 auto_process : bool - 166 Whether to auto-process the mass spectra. Defaults to True. - 167 ms_params : MSParameters or None - 168 The mass spectrum parameters to use for the mass spectra. If None, uses the globally set MSParameters. - 169 - 170 Raises - 171 ------ - 172 TypeError - 173 If scan_list is not a list of ints - 174 ValueError - 175 If polarity is not 'positive' or 'negative' - 176 If ms_level is not 1 or 2 - 177 """ - 178 - 179 # check if scan_list is a list or a single int; if single int, convert to list - 180 if isinstance(scan_list, int): - 181 scan_list = [scan_list] - 182 if not isinstance(scan_list, list): - 183 raise TypeError("scan_list must be a list of integers") - 184 for scan in scan_list: - 185 if not isinstance(scan, int): - 186 raise TypeError("scan_list must be a list of integers") - 187 - 188 # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation) - 189 if self.polarity == "negative": - 190 polarity = -1 - 191 elif self.polarity == "positive": - 192 polarity = 1 - 193 else: - 194 raise ValueError( - 195 "Polarity not set for dataset, must be a either 'positive' or 'negative'" - 196 ) - 197 - 198 # is not using_parser, check that ms1 and ms2 are not None - 199 if not use_parser: - 200 if ms_level not in self._ms_unprocessed.keys(): - 201 raise ValueError( - 202 "ms_level {} not found in _ms_unprocessed dictionary".format( - 203 ms_level - 204 ) - 205 ) - 206 - 207 scan_list = list(set(scan_list)) - 208 scan_list.sort() - 209 if not use_parser: - 210 if self._ms_unprocessed[ms_level] is None: - 211 raise ValueError( - 212 "No unprocessed data found for ms_level {}".format(ms_level) - 213 ) - 214 if ( - 215 len( - 216 np.setdiff1d( - 217 scan_list, self._ms_unprocessed[ms_level].scan.tolist() - 218 ) - 219 ) - 220 > 0 - 221 ): - 222 raise ValueError( - 223 "Not all scans in scan_list are present in the unprocessed data" - 224 ) - 225 # Prepare the ms_df for parsing - 226 ms_df = self._ms_unprocessed[ms_level].copy().set_index("scan", drop=False) - 227 - 228 for scan in scan_list: - 229 ms = None - 230 if spectrum_mode is None: - 231 # get spectrum mode from _scan_info - 232 spectrum_mode_scan = self.scan_df.loc[scan, "ms_format"] - 233 else: - 234 spectrum_mode_scan = spectrum_mode - 235 # Instantiate the mass spectrum object using the parser or the unprocessed data - 236 if not use_parser: - 237 my_ms_df = ms_df.loc[scan] - 238 if spectrum_mode_scan == "profile": - 239 # Check this - it might be better to use the MassSpectrumProfile class to instantiate the mass spectrum - 240 ms = ms_from_array_profile( - 241 my_ms_df.mz, - 242 my_ms_df.intensity, - 243 self.file_location, - 244 polarity=polarity, - 245 auto_process=False, - 246 ) - 247 else: - 248 raise ValueError( - 249 "Only profile mode is supported for unprocessed data" - 250 ) - 251 if use_parser: - 252 ms = self.spectra_parser.get_mass_spectrum_from_scan( - 253 scan_number=scan, - 254 spectrum_mode=spectrum_mode_scan, - 255 auto_process=False, - 256 ) - 257 - 258 # Set the mass spectrum parameters, auto-process if auto_process is True, and add to the dataset - 259 if ms is not None: - 260 if ms_params is not None: - 261 ms.parameters = ms_params - 262 ms.scan_number = scan - 263 if auto_process: - 264 ms.process_mass_spec() - 265 self.add_mass_spectrum(ms) - 266 - 267 def get_time_of_scan_id(self, scan): - 268 """Returns the scan time for the specified scan number. - 269 - 270 Parameters - 271 ----------- - 272 scan : int - 273 The scan number of the desired scan time. - 274 - 275 Returns - 276 -------- - 277 float - 278 The scan time for the specified scan number (in minutes). - 279 - 280 Raises - 281 ------ - 282 ValueError - 283 If no scan time is found for the specified scan number. - 284 """ - 285 # Check if _retenion_time_list is empty and raise error if so - 286 if len(self._retention_time_list) == 0: - 287 raise ValueError("No retention times found in dataset") - 288 rt = self._retention_time_list[self._scans_number_list.index(scan)] - 289 return rt - 290 - 291 @property - 292 def scan_df(self): - 293 """ - 294 pandas.DataFrame : A pandas DataFrame containing the scan info data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper). - 295 """ - 296 scan_df = pd.DataFrame.from_dict(self._scan_info) - 297 return scan_df - 298 - 299 @scan_df.setter - 300 def scan_df(self, df): - 301 """ - 302 Sets the scan data for the dataset. - 303 - 304 Parameters - 305 ----------- - 306 df : pandas.DataFrame - 307 A pandas DataFrame containing the scan data with columns for scan number, scan time, ms level, - 308 precursor m/z, scan text, and scan window (lower and upper). - 309 """ - 310 self._scan_info = df.to_dict() - 311 - 312 def __getitem__(self, scan_number): - 313 return self._ms.get(scan_number) - 314 + 151 + 152 Parameters + 153 ----------- + 154 scan_list : list of ints + 155 List of scans to use to populate _ms slot + 156 spectrum_mode : str or None + 157 The spectrum mode to use for the mass spectra. + 158 If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types). + 159 Defaults to None. + 160 ms_level : int, optional + 161 The MS level to use for the mass spectra. + 162 This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects. + 163 Defaults to 1. + 164 using_parser : bool + 165 Whether to use the mass spectra parser to get the mass spectra. Defaults to True. + 166 auto_process : bool + 167 Whether to auto-process the mass spectra. Defaults to True. + 168 ms_params : MSParameters or None + 169 The mass spectrum parameters to use for the mass spectra. If None, uses the globally set MSParameters. + 170 + 171 Raises + 172 ------ + 173 TypeError + 174 If scan_list is not a list of ints + 175 ValueError + 176 If polarity is not 'positive' or 'negative' + 177 If ms_level is not 1 or 2 + 178 """ + 179 + 180 # check if scan_list is a list or a single int; if single int, convert to list + 181 if isinstance(scan_list, int): + 182 scan_list = [scan_list] + 183 if not isinstance(scan_list, list): + 184 raise TypeError("scan_list must be a list of integers") + 185 for scan in scan_list: + 186 if not isinstance(scan, int): + 187 raise TypeError("scan_list must be a list of integers") + 188 + 189 # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation) + 190 if self.polarity == "negative": + 191 polarity = -1 + 192 elif self.polarity == "positive": + 193 polarity = 1 + 194 else: + 195 raise ValueError( + 196 "Polarity not set for dataset, must be a either 'positive' or 'negative'" + 197 ) + 198 + 199 # is not using_parser, check that ms1 and ms2 are not None + 200 if not use_parser: + 201 if ms_level not in self._ms_unprocessed.keys(): + 202 raise ValueError( + 203 "ms_level {} not found in _ms_unprocessed dictionary".format( + 204 ms_level + 205 ) + 206 ) + 207 + 208 scan_list = list(set(scan_list)) + 209 scan_list.sort() + 210 if not use_parser: + 211 if self._ms_unprocessed[ms_level] is None: + 212 raise ValueError( + 213 "No unprocessed data found for ms_level {}".format(ms_level) + 214 ) + 215 if ( + 216 len( + 217 np.setdiff1d( + 218 scan_list, self._ms_unprocessed[ms_level].scan.tolist() + 219 ) + 220 ) + 221 > 0 + 222 ): + 223 raise ValueError( + 224 "Not all scans in scan_list are present in the unprocessed data" + 225 ) + 226 # Prepare the ms_df for parsing + 227 ms_df = self._ms_unprocessed[ms_level].copy().set_index("scan", drop=False) + 228 + 229 for scan in scan_list: + 230 ms = None + 231 if spectrum_mode is None: + 232 # get spectrum mode from _scan_info + 233 spectrum_mode_scan = self.scan_df.loc[scan, "ms_format"] + 234 else: + 235 spectrum_mode_scan = spectrum_mode + 236 # Instantiate the mass spectrum object using the parser or the unprocessed data + 237 if not use_parser: + 238 my_ms_df = ms_df.loc[scan] + 239 if spectrum_mode_scan == "profile": + 240 # Check this - it might be better to use the MassSpectrumProfile class to instantiate the mass spectrum + 241 ms = ms_from_array_profile( + 242 my_ms_df.mz, + 243 my_ms_df.intensity, + 244 self.file_location, + 245 polarity=polarity, + 246 auto_process=False, + 247 ) + 248 else: + 249 raise ValueError( + 250 "Only profile mode is supported for unprocessed data" + 251 ) + 252 if use_parser: + 253 ms = self.spectra_parser.get_mass_spectrum_from_scan( + 254 scan_number=scan, + 255 spectrum_mode=spectrum_mode_scan, + 256 auto_process=False, + 257 ) + 258 + 259 # Set the mass spectrum parameters, auto-process if auto_process is True, and add to the dataset + 260 if ms is not None: + 261 if ms_params is not None: + 262 ms.parameters = ms_params + 263 ms.scan_number = scan + 264 if auto_process: + 265 ms.process_mass_spec() + 266 self.add_mass_spectrum(ms) + 267 + 268 def get_time_of_scan_id(self, scan): + 269 """Returns the scan time for the specified scan number. + 270 + 271 Parameters + 272 ----------- + 273 scan : int + 274 The scan number of the desired scan time. + 275 + 276 Returns + 277 -------- + 278 float + 279 The scan time for the specified scan number (in minutes). + 280 + 281 Raises + 282 ------ + 283 ValueError + 284 If no scan time is found for the specified scan number. + 285 """ + 286 # Check if _retenion_time_list is empty and raise error if so + 287 if len(self._retention_time_list) == 0: + 288 raise ValueError("No retention times found in dataset") + 289 rt = self._retention_time_list[self._scans_number_list.index(scan)] + 290 return rt + 291 + 292 @property + 293 def scan_df(self): + 294 """ + 295 pandas.DataFrame : A pandas DataFrame containing the scan info data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper). + 296 """ + 297 scan_df = pd.DataFrame.from_dict(self._scan_info) + 298 return scan_df + 299 + 300 @scan_df.setter + 301 def scan_df(self, df): + 302 """ + 303 Sets the scan data for the dataset. + 304 + 305 Parameters + 306 ----------- + 307 df : pandas.DataFrame + 308 A pandas DataFrame containing the scan data with columns for scan number, scan time, ms level, + 309 precursor m/z, scan text, and scan window (lower and upper). + 310 """ + 311 self._scan_info = df.to_dict() + 312 + 313 def __getitem__(self, scan_number): + 314 return self._ms.get(scan_number) 315 - 316class LCMSBase(MassSpectraBase, LCCalculations, PHCalculations, LCMSSpectralSearch): - 317 """A class representing a liquid chromatography-mass spectrometry (LC-MS) data object. - 318 - 319 This class is not intended to be instantiated directly, but rather to be instantiated by an appropriate mass spectra parser using the get_lcms_obj() method. - 320 - 321 Parameters - 322 ----------- - 323 file_location : str or Path - 324 The location of the file containing the mass spectra data. - 325 analyzer : str, optional - 326 The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'. - 327 instrument_label : str, optional - 328 The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'. - 329 sample_name : str, optional - 330 The name of the sample; defaults to the file name if not provided to the parser. Defaults to None. - 331 spectra_parser : object, optional - 332 The spectra parser object used to create the mass spectra object. Defaults to None. - 333 - 334 Attributes - 335 ----------- - 336 polarity : str - 337 The polarity of the ionization mode used for the dataset. - 338 _parameters : LCMSParameters - 339 The parameters used for all methods called on the LCMSBase object. Set upon instantiation from LCMSParameters. - 340 _retention_time_list : numpy.ndarray - 341 An array of retention times for the dataset. - 342 _scans_number_list : list - 343 A list of scan numbers for the dataset. - 344 _tic_list : numpy.ndarray - 345 An array of total ion current (TIC) values for the dataset. - 346 eics : dict - 347 A dictionary containing extracted ion chromatograms (EICs) for the dataset. - 348 Key is the mz of the EIC. Initialized as an empty dictionary. - 349 mass_features : dictionary of LCMSMassFeature objects - 350 A dictionary containing mass features for the dataset. - 351 Key is mass feature ID. Initialized as an empty dictionary. - 352 spectral_search_results : dictionary of MS2SearchResults objects - 353 A dictionary containing spectral search results for the dataset. - 354 Key is scan number : precursor mz. Initialized as an empty dictionary. - 355 - 356 Methods - 357 -------- - 358 * get_parameters_json(). - 359 Returns the parameters used for the LC-MS analysis in JSON format. - 360 * add_associated_ms2_dda(add_to_lcmsobj=True, auto_process=True, use_parser=True) - 361 Adds which MS2 scans are associated with each mass feature to the - 362 mass_features dictionary and optionally adds the MS2 spectra to the _ms dictionary. - 363 * add_associated_ms1(add_to_lcmsobj=True, auto_process=True, use_parser=True) - 364 Adds the MS1 spectra associated with each mass feature to the - 365 mass_features dictionary and adds the MS1 spectra to the _ms dictionary. - 366 * mass_features_to_df() - 367 Returns a pandas dataframe summarizing the mass features in the dataset. - 368 * set_tic_list_from_data(overwrite=False) - 369 Sets the TIC list from the mass spectrum objects within the _ms dictionary. - 370 * set_retention_time_from_data(overwrite=False) - 371 Sets the retention time list from the data in the _ms dictionary. - 372 * set_scans_number_from_data(overwrite=False) - 373 Sets the scan number list from the data in the _ms dictionary. - 374 """ - 375 - 376 def __init__( - 377 self, - 378 file_location, - 379 analyzer="Unknown", - 380 instrument_label="Unknown", - 381 sample_name=None, - 382 spectra_parser=None, - 383 ): - 384 super().__init__( - 385 file_location, analyzer, instrument_label, sample_name, spectra_parser - 386 ) - 387 self.polarity = "" - 388 self._parameters = LCMSParameters() - 389 self._retention_time_list = [] - 390 self._scans_number_list = [] - 391 self._tic_list = [] - 392 self.eics = {} - 393 self.mass_features = {} - 394 self.spectral_search_results = {} - 395 - 396 def get_parameters_json(self): - 397 """Returns the parameters stored for the LC-MS object in JSON format. - 398 - 399 Returns - 400 -------- - 401 str - 402 The parameters used for the LC-MS analysis in JSON format. - 403 """ - 404 return self.parameters.to_json() - 405 - 406 def remove_unprocessed_data(self, ms_level=None): - 407 """Removes the unprocessed data from the LCMSBase object. - 408 - 409 Parameters - 410 ----------- - 411 ms_level : int, optional - 412 The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels. - 413 - 414 Raises - 415 ------ - 416 ValueError - 417 If ms_level is not 1 or 2. - 418 - 419 Notes - 420 ----- - 421 This method is useful for freeing up memory after the data has been processed. - 422 """ - 423 if ms_level is None: - 424 for ms_level in self._ms_unprocessed.keys(): - 425 self._ms_unprocessed[ms_level] = None - 426 if ms_level not in [1, 2]: - 427 raise ValueError("ms_level must be 1 or 2") - 428 self._ms_unprocessed[ms_level] = None - 429 - 430 def add_associated_ms2_dda( - 431 self, auto_process=True, use_parser=True, spectrum_mode=None, ms_params_key="ms2", scan_filter=None - 432 ): - 433 """Add MS2 spectra associated with mass features to the dataset. - 434 - 435 Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject) - 436 - 437 Parameters - 438 ----------- - 439 auto_process : bool, optional - 440 If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True. - 441 use_parser : bool, optional - 442 If True, envoke the spectra parser to get the MS2 spectra. Default is True. - 443 spectrum_mode : str or None, optional - 444 The spectrum mode to use for the mass spectra. If None, method will use the spectrum mode - 445 from the spectra parser to ascertain the spectrum mode (this allows for mixed types). - 446 Defaults to None. (faster if defined, otherwise will check each scan) - 447 ms_params_key : string, optional - 448 The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute. - 449 Defaults to 'ms2'. - 450 scan_filter : str - 451 A string to filter the scans to add to the _ms dictionary. If None, all scans are added. Defaults to None. - 452 "hcd" will pull out only HCD scans. - 453 - 454 Raises - 455 ------ - 456 ValueError - 457 If mass_features is not set, must run find_mass_features() first. - 458 If no MS2 scans are found in the dataset. - 459 If no precursor m/z values are found in MS2 scans, not a DDA dataset. - 460 """ - 461 # Check if mass_features is set, raise error if not - 462 if self.mass_features is None: - 463 raise ValueError( - 464 "mass_features not set, must run find_mass_features() first" - 465 ) - 466 - 467 # reconfigure ms_params to get the correct mass spectrum parameters from the key - 468 ms_params = self.parameters.mass_spectrum[ms_params_key] - 469 - 470 mf_df = self.mass_features_to_df().copy() - 471 # Find ms2 scans that have a precursor m/z value - 472 ms2_scans = self.scan_df[self.scan_df.ms_level == 2] - 473 ms2_scans = ms2_scans[~ms2_scans.precursor_mz.isna()] - 474 # drop ms2 scans that have no tic - 475 ms2_scans = ms2_scans[ms2_scans.tic > 0] - 476 if ms2_scans is None: - 477 raise ValueError("No DDA scans found in dataset") - 478 - 479 if scan_filter is not None: - 480 ms2_scans = ms2_scans[ms2_scans.scan_text.str.contains(scan_filter)] - 481 # set tolerance in rt space (in minutes) and mz space (in daltons) - 482 time_tol = self.parameters.lc_ms.ms2_dda_rt_tolerance - 483 mz_tol = self.parameters.lc_ms.ms2_dda_mz_tolerance + 316 + 317class LCMSBase(MassSpectraBase, LCCalculations, PHCalculations, LCMSSpectralSearch): + 318 """A class representing a liquid chromatography-mass spectrometry (LC-MS) data object. + 319 + 320 This class is not intended to be instantiated directly, but rather to be instantiated by an appropriate mass spectra parser using the get_lcms_obj() method. + 321 + 322 Parameters + 323 ----------- + 324 file_location : str or Path + 325 The location of the file containing the mass spectra data. + 326 analyzer : str, optional + 327 The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'. + 328 instrument_label : str, optional + 329 The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'. + 330 sample_name : str, optional + 331 The name of the sample; defaults to the file name if not provided to the parser. Defaults to None. + 332 spectra_parser : object, optional + 333 The spectra parser object used to create the mass spectra object. Defaults to None. + 334 + 335 Attributes + 336 ----------- + 337 polarity : str + 338 The polarity of the ionization mode used for the dataset. + 339 _parameters : LCMSParameters + 340 The parameters used for all methods called on the LCMSBase object. Set upon instantiation from LCMSParameters. + 341 _retention_time_list : numpy.ndarray + 342 An array of retention times for the dataset. + 343 _scans_number_list : list + 344 A list of scan numbers for the dataset. + 345 _tic_list : numpy.ndarray + 346 An array of total ion current (TIC) values for the dataset. + 347 eics : dict + 348 A dictionary containing extracted ion chromatograms (EICs) for the dataset. + 349 Key is the mz of the EIC. Initialized as an empty dictionary. + 350 mass_features : dictionary of LCMSMassFeature objects + 351 A dictionary containing mass features for the dataset. + 352 Key is mass feature ID. Initialized as an empty dictionary. + 353 spectral_search_results : dictionary of MS2SearchResults objects + 354 A dictionary containing spectral search results for the dataset. + 355 Key is scan number : precursor mz. Initialized as an empty dictionary. + 356 + 357 Methods + 358 -------- + 359 * get_parameters_json(). + 360 Returns the parameters used for the LC-MS analysis in JSON format. + 361 * add_associated_ms2_dda(add_to_lcmsobj=True, auto_process=True, use_parser=True) + 362 Adds which MS2 scans are associated with each mass feature to the + 363 mass_features dictionary and optionally adds the MS2 spectra to the _ms dictionary. + 364 * add_associated_ms1(add_to_lcmsobj=True, auto_process=True, use_parser=True) + 365 Adds the MS1 spectra associated with each mass feature to the + 366 mass_features dictionary and adds the MS1 spectra to the _ms dictionary. + 367 * mass_features_to_df() + 368 Returns a pandas dataframe summarizing the mass features in the dataset. + 369 * set_tic_list_from_data(overwrite=False) + 370 Sets the TIC list from the mass spectrum objects within the _ms dictionary. + 371 * set_retention_time_from_data(overwrite=False) + 372 Sets the retention time list from the data in the _ms dictionary. + 373 * set_scans_number_from_data(overwrite=False) + 374 Sets the scan number list from the data in the _ms dictionary. + 375 """ + 376 + 377 def __init__( + 378 self, + 379 file_location, + 380 analyzer="Unknown", + 381 instrument_label="Unknown", + 382 sample_name=None, + 383 spectra_parser=None, + 384 ): + 385 super().__init__( + 386 file_location, analyzer, instrument_label, sample_name, spectra_parser + 387 ) + 388 self.polarity = "" + 389 self._parameters = LCMSParameters() + 390 self._retention_time_list = [] + 391 self._scans_number_list = [] + 392 self._tic_list = [] + 393 self.eics = {} + 394 self.mass_features = {} + 395 self.spectral_search_results = {} + 396 + 397 def get_parameters_json(self): + 398 """Returns the parameters stored for the LC-MS object in JSON format. + 399 + 400 Returns + 401 -------- + 402 str + 403 The parameters used for the LC-MS analysis in JSON format. + 404 """ + 405 return self.parameters.to_json() + 406 + 407 def remove_unprocessed_data(self, ms_level=None): + 408 """Removes the unprocessed data from the LCMSBase object. + 409 + 410 Parameters + 411 ----------- + 412 ms_level : int, optional + 413 The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels. + 414 + 415 Raises + 416 ------ + 417 ValueError + 418 If ms_level is not 1 or 2. + 419 + 420 Notes + 421 ----- + 422 This method is useful for freeing up memory after the data has been processed. + 423 """ + 424 if ms_level is None: + 425 for ms_level in self._ms_unprocessed.keys(): + 426 self._ms_unprocessed[ms_level] = None + 427 if ms_level not in [1, 2]: + 428 raise ValueError("ms_level must be 1 or 2") + 429 self._ms_unprocessed[ms_level] = None + 430 + 431 def add_associated_ms2_dda( + 432 self, + 433 auto_process=True, + 434 use_parser=True, + 435 spectrum_mode=None, + 436 ms_params_key="ms2", + 437 scan_filter=None, + 438 ): + 439 """Add MS2 spectra associated with mass features to the dataset. + 440 + 441 Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject) + 442 + 443 Parameters + 444 ----------- + 445 auto_process : bool, optional + 446 If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True. + 447 use_parser : bool, optional + 448 If True, envoke the spectra parser to get the MS2 spectra. Default is True. + 449 spectrum_mode : str or None, optional + 450 The spectrum mode to use for the mass spectra. If None, method will use the spectrum mode + 451 from the spectra parser to ascertain the spectrum mode (this allows for mixed types). + 452 Defaults to None. (faster if defined, otherwise will check each scan) + 453 ms_params_key : string, optional + 454 The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute. + 455 Defaults to 'ms2'. + 456 scan_filter : str + 457 A string to filter the scans to add to the _ms dictionary. If None, all scans are added. Defaults to None. + 458 "hcd" will pull out only HCD scans. + 459 + 460 Raises + 461 ------ + 462 ValueError + 463 If mass_features is not set, must run find_mass_features() first. + 464 If no MS2 scans are found in the dataset. + 465 If no precursor m/z values are found in MS2 scans, not a DDA dataset. + 466 """ + 467 # Check if mass_features is set, raise error if not + 468 if self.mass_features is None: + 469 raise ValueError( + 470 "mass_features not set, must run find_mass_features() first" + 471 ) + 472 + 473 # reconfigure ms_params to get the correct mass spectrum parameters from the key + 474 ms_params = self.parameters.mass_spectrum[ms_params_key] + 475 + 476 mf_df = self.mass_features_to_df().copy() + 477 # Find ms2 scans that have a precursor m/z value + 478 ms2_scans = self.scan_df[self.scan_df.ms_level == 2] + 479 ms2_scans = ms2_scans[~ms2_scans.precursor_mz.isna()] + 480 # drop ms2 scans that have no tic + 481 ms2_scans = ms2_scans[ms2_scans.tic > 0] + 482 if ms2_scans is None: + 483 raise ValueError("No DDA scans found in dataset") 484 - 485 # for each mass feature, find the ms2 scans that are within the roi scan time and mz range - 486 dda_scans = [] - 487 for i, row in mf_df.iterrows(): - 488 ms2_scans_filtered = ms2_scans[ - 489 ms2_scans.scan_time.between( - 490 row.scan_time - time_tol, row.scan_time + time_tol - 491 ) - 492 ] - 493 ms2_scans_filtered = ms2_scans_filtered[ - 494 ms2_scans_filtered.precursor_mz.between( - 495 row.mz - mz_tol, row.mz + mz_tol - 496 ) - 497 ] - 498 dda_scans = dda_scans + ms2_scans_filtered.scan.tolist() - 499 self.mass_features[i].ms2_scan_numbers = ms2_scans_filtered.scan.tolist() + self.mass_features[i].ms2_scan_numbers - 500 # add to _ms attribute - 501 self.add_mass_spectra( - 502 scan_list=list(set(dda_scans)), - 503 auto_process=auto_process, - 504 spectrum_mode=spectrum_mode, - 505 use_parser=use_parser, - 506 ms_params=ms_params, - 507 ) - 508 # associate appropriate _ms attribute to appropriate mass feature's ms2_mass_spectra attribute - 509 for mf_id in self.mass_features: - 510 if self.mass_features[mf_id].ms2_scan_numbers is not None: - 511 for dda_scan in self.mass_features[mf_id].ms2_scan_numbers: - 512 if dda_scan in self._ms.keys(): - 513 self.mass_features[mf_id].ms2_mass_spectra[dda_scan] = self._ms[ - 514 dda_scan - 515 ] - 516 - 517 def add_associated_ms1( - 518 self, auto_process=True, use_parser=True, spectrum_mode=None - 519 ): - 520 """Add MS1 spectra associated with mass features to the dataset. - 521 - 522 Parameters - 523 ----------- - 524 auto_process : bool, optional - 525 If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True. - 526 use_parser : bool, optional - 527 If True, envoke the spectra parser to get the MS1 spectra. Default is True. - 528 spectrum_mode : str or None, optional - 529 The spectrum mode to use for the mass spectra. If None, method will use the spectrum mode - 530 from the spectra parser to ascertain the spectrum mode (this allows for mixed types). - 531 Defaults to None. (faster if defined, otherwise will check each scan) - 532 - 533 Raises - 534 ------ - 535 ValueError - 536 If mass_features is not set, must run find_mass_features() first. - 537 If apex scans are not profile mode, all apex scans must be profile mode for averaging. - 538 If number of scans to average is not 1 or an integer with an integer median (i.e. 3, 5, 7, 9). - 539 If deconvolute is True and no EICs are found, did you run integrate_mass_features() first? - 540 """ - 541 # Check if mass_features is set, raise error if not - 542 if self.mass_features is None: - 543 raise ValueError( - 544 "mass_features not set, must run find_mass_features() first" - 545 ) - 546 scans_to_average = self.parameters.lc_ms.ms1_scans_to_average - 547 - 548 if scans_to_average == 1: - 549 # Add to LCMSobj - 550 self.add_mass_spectra( - 551 scan_list=[ - 552 int(x) for x in self.mass_features_to_df().apex_scan.tolist() - 553 ], - 554 auto_process=auto_process, - 555 use_parser=use_parser, - 556 spectrum_mode=spectrum_mode, - 557 ms_params=self.parameters.mass_spectrum["ms1"], - 558 ) - 559 - 560 elif ( - 561 (scans_to_average - 1) % 2 - 562 ) == 0: # scans_to_average = 3, 5, 7 etc, mirror l/r around apex - 563 apex_scans = list(set(self.mass_features_to_df().apex_scan.tolist())) - 564 # Check if all apex scans are profile mode, raise error if not - 565 if not all(self.scan_df.loc[apex_scans, "ms_format"] == "profile"): - 566 raise ValueError("All apex scans must be profile mode for averaging") - 567 - 568 # First get sets of scans to average - 569 def get_scans_from_apex(ms1_scans, apex_scan, scans_to_average): - 570 ms1_idx_start = ms1_scans.index(apex_scan) - int( - 571 (scans_to_average - 1) / 2 - 572 ) - 573 if ms1_idx_start < 0: - 574 ms1_idx_start = 0 - 575 ms1_idx_end = ( - 576 ms1_scans.index(apex_scan) + int((scans_to_average - 1) / 2) + 1 - 577 ) - 578 if ms1_idx_end > (len(ms1_scans) - 1): - 579 ms1_idx_end = len(ms1_scans) - 1 - 580 scan_list = ms1_scans[ms1_idx_start:ms1_idx_end] - 581 return scan_list - 582 - 583 ms1_scans = self.ms1_scans - 584 scans_lists = [ - 585 get_scans_from_apex(ms1_scans, apex_scan, scans_to_average) - 586 for apex_scan in apex_scans - 587 ] - 588 - 589 # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation) - 590 if self.polarity == "negative": - 591 polarity = -1 - 592 elif self.polarity == "positive": - 593 polarity = 1 - 594 - 595 if not use_parser: - 596 # Perform checks and prepare _ms_unprocessed dictionary if use_parser is False (saves time to do this once) - 597 ms1_unprocessed = self._ms_unprocessed[1].copy() - 598 # Set the index on _ms_unprocessed[1] to scan number - 599 ms1_unprocessed = ms1_unprocessed.set_index("scan", drop=False) - 600 self._ms_unprocessed[1] = ms1_unprocessed - 601 - 602 # Check that all the scans in scan_lists are indexs in self._ms_unprocessed[1] - 603 scans_lists_flat = list( - 604 set([scan for sublist in scans_lists for scan in sublist]) - 605 ) - 606 if ( - 607 len( - 608 np.setdiff1d( - 609 np.sort(scans_lists_flat), - 610 np.sort(ms1_unprocessed.index.values), - 611 ) - 612 ) - 613 > 0 - 614 ): - 615 raise ValueError( - 616 "Not all scans to average are present in the unprocessed data" - 617 ) - 618 - 619 for scan_list_average, apex_scan in zip(scans_lists, apex_scans): - 620 # Get unprocessed mass spectrum from scans - 621 ms = self.get_average_mass_spectrum( - 622 scan_list=scan_list_average, - 623 apex_scan=apex_scan, - 624 spectrum_mode="profile", - 625 ms_level=1, - 626 auto_process=auto_process, - 627 use_parser=use_parser, - 628 perform_checks=False, - 629 polarity=polarity, - 630 ms_params=self.parameters.mass_spectrum["ms1"], - 631 ) - 632 # Add mass spectrum to LCMS object and associated with mass feature - 633 self.add_mass_spectrum(ms) - 634 - 635 if not use_parser: - 636 # Reset the index on _ms_unprocessed[1] to not be scan number - 637 ms1_unprocessed = ms1_unprocessed.reset_index(drop=True) - 638 self._ms_unprocessed[1] = ms1_unprocessed - 639 else: - 640 raise ValueError( - 641 "Number of scans to average must be 1 or an integer with an integer median (i.e. 3, 5, 7, 9)" - 642 ) + 485 if scan_filter is not None: + 486 ms2_scans = ms2_scans[ms2_scans.scan_text.str.contains(scan_filter)] + 487 # set tolerance in rt space (in minutes) and mz space (in daltons) + 488 time_tol = self.parameters.lc_ms.ms2_dda_rt_tolerance + 489 mz_tol = self.parameters.lc_ms.ms2_dda_mz_tolerance + 490 + 491 # for each mass feature, find the ms2 scans that are within the roi scan time and mz range + 492 dda_scans = [] + 493 for i, row in mf_df.iterrows(): + 494 ms2_scans_filtered = ms2_scans[ + 495 ms2_scans.scan_time.between( + 496 row.scan_time - time_tol, row.scan_time + time_tol + 497 ) + 498 ] + 499 ms2_scans_filtered = ms2_scans_filtered[ + 500 ms2_scans_filtered.precursor_mz.between( + 501 row.mz - mz_tol, row.mz + mz_tol + 502 ) + 503 ] + 504 dda_scans = dda_scans + ms2_scans_filtered.scan.tolist() + 505 self.mass_features[i].ms2_scan_numbers = ( + 506 ms2_scans_filtered.scan.tolist() + 507 + self.mass_features[i].ms2_scan_numbers + 508 ) + 509 # add to _ms attribute + 510 self.add_mass_spectra( + 511 scan_list=list(set(dda_scans)), + 512 auto_process=auto_process, + 513 spectrum_mode=spectrum_mode, + 514 use_parser=use_parser, + 515 ms_params=ms_params, + 516 ) + 517 # associate appropriate _ms attribute to appropriate mass feature's ms2_mass_spectra attribute + 518 for mf_id in self.mass_features: + 519 if self.mass_features[mf_id].ms2_scan_numbers is not None: + 520 for dda_scan in self.mass_features[mf_id].ms2_scan_numbers: + 521 if dda_scan in self._ms.keys(): + 522 self.mass_features[mf_id].ms2_mass_spectra[dda_scan] = self._ms[ + 523 dda_scan + 524 ] + 525 + 526 def add_associated_ms1( + 527 self, auto_process=True, use_parser=True, spectrum_mode=None + 528 ): + 529 """Add MS1 spectra associated with mass features to the dataset. + 530 + 531 Parameters + 532 ----------- + 533 auto_process : bool, optional + 534 If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True. + 535 use_parser : bool, optional + 536 If True, envoke the spectra parser to get the MS1 spectra. Default is True. + 537 spectrum_mode : str or None, optional + 538 The spectrum mode to use for the mass spectra. If None, method will use the spectrum mode + 539 from the spectra parser to ascertain the spectrum mode (this allows for mixed types). + 540 Defaults to None. (faster if defined, otherwise will check each scan) + 541 + 542 Raises + 543 ------ + 544 ValueError + 545 If mass_features is not set, must run find_mass_features() first. + 546 If apex scans are not profile mode, all apex scans must be profile mode for averaging. + 547 If number of scans to average is not 1 or an integer with an integer median (i.e. 3, 5, 7, 9). + 548 If deconvolute is True and no EICs are found, did you run integrate_mass_features() first? + 549 """ + 550 # Check if mass_features is set, raise error if not + 551 if self.mass_features is None: + 552 raise ValueError( + 553 "mass_features not set, must run find_mass_features() first" + 554 ) + 555 scans_to_average = self.parameters.lc_ms.ms1_scans_to_average + 556 + 557 if scans_to_average == 1: + 558 # Add to LCMSobj + 559 self.add_mass_spectra( + 560 scan_list=[ + 561 int(x) for x in self.mass_features_to_df().apex_scan.tolist() + 562 ], + 563 auto_process=auto_process, + 564 use_parser=use_parser, + 565 spectrum_mode=spectrum_mode, + 566 ms_params=self.parameters.mass_spectrum["ms1"], + 567 ) + 568 + 569 elif ( + 570 (scans_to_average - 1) % 2 + 571 ) == 0: # scans_to_average = 3, 5, 7 etc, mirror l/r around apex + 572 apex_scans = list(set(self.mass_features_to_df().apex_scan.tolist())) + 573 # Check if all apex scans are profile mode, raise error if not + 574 if not all(self.scan_df.loc[apex_scans, "ms_format"] == "profile"): + 575 raise ValueError("All apex scans must be profile mode for averaging") + 576 + 577 # First get sets of scans to average + 578 def get_scans_from_apex(ms1_scans, apex_scan, scans_to_average): + 579 ms1_idx_start = ms1_scans.index(apex_scan) - int( + 580 (scans_to_average - 1) / 2 + 581 ) + 582 if ms1_idx_start < 0: + 583 ms1_idx_start = 0 + 584 ms1_idx_end = ( + 585 ms1_scans.index(apex_scan) + int((scans_to_average - 1) / 2) + 1 + 586 ) + 587 if ms1_idx_end > (len(ms1_scans) - 1): + 588 ms1_idx_end = len(ms1_scans) - 1 + 589 scan_list = ms1_scans[ms1_idx_start:ms1_idx_end] + 590 return scan_list + 591 + 592 ms1_scans = self.ms1_scans + 593 scans_lists = [ + 594 get_scans_from_apex(ms1_scans, apex_scan, scans_to_average) + 595 for apex_scan in apex_scans + 596 ] + 597 + 598 # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation) + 599 if self.polarity == "negative": + 600 polarity = -1 + 601 elif self.polarity == "positive": + 602 polarity = 1 + 603 + 604 if not use_parser: + 605 # Perform checks and prepare _ms_unprocessed dictionary if use_parser is False (saves time to do this once) + 606 ms1_unprocessed = self._ms_unprocessed[1].copy() + 607 # Set the index on _ms_unprocessed[1] to scan number + 608 ms1_unprocessed = ms1_unprocessed.set_index("scan", drop=False) + 609 self._ms_unprocessed[1] = ms1_unprocessed + 610 + 611 # Check that all the scans in scan_lists are indexs in self._ms_unprocessed[1] + 612 scans_lists_flat = list( + 613 set([scan for sublist in scans_lists for scan in sublist]) + 614 ) + 615 if ( + 616 len( + 617 np.setdiff1d( + 618 np.sort(scans_lists_flat), + 619 np.sort(ms1_unprocessed.index.values), + 620 ) + 621 ) + 622 > 0 + 623 ): + 624 raise ValueError( + 625 "Not all scans to average are present in the unprocessed data" + 626 ) + 627 + 628 for scan_list_average, apex_scan in zip(scans_lists, apex_scans): + 629 # Get unprocessed mass spectrum from scans + 630 ms = self.get_average_mass_spectrum( + 631 scan_list=scan_list_average, + 632 apex_scan=apex_scan, + 633 spectrum_mode="profile", + 634 ms_level=1, + 635 auto_process=auto_process, + 636 use_parser=use_parser, + 637 perform_checks=False, + 638 polarity=polarity, + 639 ms_params=self.parameters.mass_spectrum["ms1"], + 640 ) + 641 # Add mass spectrum to LCMS object and associated with mass feature + 642 self.add_mass_spectrum(ms) 643 - 644 # Associate the ms1 spectra with the mass features - 645 for mf_id in self.mass_features: - 646 self.mass_features[mf_id].mass_spectrum = self._ms[ - 647 self.mass_features[mf_id].apex_scan - 648 ] - 649 self.mass_features[mf_id].update_mz() - 650 - 651 # Re-process clustering if persistent homology is selected to remove duplicate mass features after adding and processing MS1 spectra - 652 if self.parameters.lc_ms.peak_picking_method == "persistent homology": - 653 self.cluster_mass_features(drop_children=True, sort_by="persistence") - 654 - 655 def mass_features_to_df(self): - 656 """Returns a pandas dataframe summarizing the mass features. - 657 - 658 The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity, - 659 persistence, area, monoisotopic_mf_id, and isotopologue_type. The index is set to mf_id (mass feature ID). - 660 - 661 - 662 Returns - 663 -------- - 664 pandas.DataFrame - 665 A pandas dataframe of mass features with the following columns: - 666 mf_id, mz, apex_scan, scan_time, intensity, persistence, area. - 667 """ - 668 - 669 def mass_spectrum_to_string( - 670 mass_spec, normalize=True, min_normalized_abun=0.01 - 671 ): - 672 """Converts a mass spectrum to a string of m/z:abundance pairs. - 673 - 674 Parameters - 675 ----------- - 676 mass_spec : MassSpectrum - 677 A MassSpectrum object to be converted to a string. - 678 normalize : bool, optional - 679 If True, normalizes the abundance values to a maximum of 1. Defaults to True. - 680 min_normalized_abun : float, optional - 681 The minimum normalized abundance value to include in the string, only used if normalize is True. Defaults to 0.01. + 644 if not use_parser: + 645 # Reset the index on _ms_unprocessed[1] to not be scan number + 646 ms1_unprocessed = ms1_unprocessed.reset_index(drop=True) + 647 self._ms_unprocessed[1] = ms1_unprocessed + 648 else: + 649 raise ValueError( + 650 "Number of scans to average must be 1 or an integer with an integer median (i.e. 3, 5, 7, 9)" + 651 ) + 652 + 653 # Associate the ms1 spectra with the mass features + 654 for mf_id in self.mass_features: + 655 self.mass_features[mf_id].mass_spectrum = self._ms[ + 656 self.mass_features[mf_id].apex_scan + 657 ] + 658 self.mass_features[mf_id].update_mz() + 659 + 660 # Re-process clustering if persistent homology is selected to remove duplicate mass features after adding and processing MS1 spectra + 661 if self.parameters.lc_ms.peak_picking_method == "persistent homology": + 662 self.cluster_mass_features(drop_children=True, sort_by="persistence") + 663 + 664 def mass_features_to_df(self): + 665 """Returns a pandas dataframe summarizing the mass features. + 666 + 667 The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity, + 668 persistence, area, monoisotopic_mf_id, and isotopologue_type. The index is set to mf_id (mass feature ID). + 669 + 670 + 671 Returns + 672 -------- + 673 pandas.DataFrame + 674 A pandas dataframe of mass features with the following columns: + 675 mf_id, mz, apex_scan, scan_time, intensity, persistence, area. + 676 """ + 677 + 678 def mass_spectrum_to_string( + 679 mass_spec, normalize=True, min_normalized_abun=0.01 + 680 ): + 681 """Converts a mass spectrum to a string of m/z:abundance pairs. 682 - 683 Returns - 684 -------- - 685 str - 686 A string of m/z:abundance pairs from the mass spectrum, separated by a semicolon. - 687 """ - 688 mz_np = mass_spec.to_dataframe()["m/z"].values - 689 abun_np = mass_spec.to_dataframe()["Peak Height"].values - 690 if normalize: - 691 abun_np = abun_np / abun_np.max() - 692 mz_abun = np.column_stack((mz_np, abun_np)) - 693 if normalize: - 694 mz_abun = mz_abun[mz_abun[:, 1] > min_normalized_abun] - 695 mz_abun_str = [ - 696 str(round(mz, ndigits=4)) + ":" + str(round(abun, ndigits=2)) - 697 for mz, abun in mz_abun - 698 ] - 699 return "; ".join(mz_abun_str) - 700 - 701 cols_in_df = [ - 702 "id", - 703 "_apex_scan", - 704 "start_scan", - 705 "final_scan", - 706 "_retention_time", - 707 "_intensity", - 708 "_persistence", - 709 "_area", - 710 "_dispersity_index", - 711 "_tailing_factor", - 712 "monoisotopic_mf_id", - 713 "isotopologue_type", - 714 "mass_spectrum_deconvoluted_parent", - 715 ] - 716 df_mf_list = [] - 717 for mf_id in self.mass_features.keys(): - 718 # Find cols_in_df that are in single_mf - 719 df_keys = list( - 720 set(cols_in_df).intersection(self.mass_features[mf_id].__dir__()) - 721 ) - 722 dict_mf = {} - 723 for key in df_keys: - 724 dict_mf[key] = getattr(self.mass_features[mf_id], key) - 725 if len(self.mass_features[mf_id].ms2_scan_numbers) > 0: - 726 # Add MS2 spectra info - 727 best_ms2_spectrum = self.mass_features[mf_id].best_ms2 - 728 dict_mf["ms2_spectrum"] = mass_spectrum_to_string(best_ms2_spectrum) - 729 if len(self.mass_features[mf_id].associated_mass_features_deconvoluted) > 0: - 730 dict_mf["associated_mass_features"] = ", ".join( - 731 map( - 732 str, - 733 self.mass_features[mf_id].associated_mass_features_deconvoluted, - 734 ) - 735 ) - 736 if self.mass_features[mf_id]._half_height_width is not None: - 737 dict_mf["half_height_width"] = self.mass_features[ - 738 mf_id - 739 ].half_height_width - 740 # Check if EIC for mass feature is set - 741 df_mf_single = pd.DataFrame(dict_mf, index=[mf_id]) - 742 df_mf_single["mz"] = self.mass_features[mf_id].mz - 743 df_mf_list.append(df_mf_single) - 744 df_mf = pd.concat(df_mf_list) - 745 - 746 # rename _area to area and id to mf_id - 747 df_mf = df_mf.rename( - 748 columns={ - 749 "_area": "area", - 750 "id": "mf_id", - 751 "_apex_scan": "apex_scan", - 752 "_retention_time": "scan_time", - 753 "_intensity": "intensity", - 754 "_persistence": "persistence", - 755 "_dispersity_index": "dispersity_index", - 756 "_tailing_factor": "tailing_factor", - 757 } - 758 ) - 759 - 760 # reorder columns - 761 col_order = [ - 762 "mf_id", - 763 "scan_time", - 764 "mz", - 765 "apex_scan", - 766 "start_scan", - 767 "final_scan", - 768 "intensity", - 769 "persistence", - 770 "area", - 771 "half_height_width", - 772 "tailing_factor", - 773 "dispersity_index", - 774 "monoisotopic_mf_id", - 775 "isotopologue_type", - 776 "mass_spectrum_deconvoluted_parent", - 777 "associated_mass_features", - 778 "ms2_spectrum", - 779 ] - 780 # drop columns that are not in col_order - 781 cols_to_order = [col for col in col_order if col in df_mf.columns] - 782 df_mf = df_mf[cols_to_order] - 783 - 784 # reset index to mf_id - 785 df_mf = df_mf.set_index("mf_id") - 786 df_mf.index.name = "mf_id" - 787 - 788 return df_mf - 789 - 790 def mass_features_ms1_annot_to_df(self): - 791 """Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset. + 683 Parameters + 684 ----------- + 685 mass_spec : MassSpectrum + 686 A MassSpectrum object to be converted to a string. + 687 normalize : bool, optional + 688 If True, normalizes the abundance values to a maximum of 1. Defaults to True. + 689 min_normalized_abun : float, optional + 690 The minimum normalized abundance value to include in the string, only used if normalize is True. Defaults to 0.01. + 691 + 692 Returns + 693 -------- + 694 str + 695 A string of m/z:abundance pairs from the mass spectrum, separated by a semicolon. + 696 """ + 697 mz_np = mass_spec.to_dataframe()["m/z"].values + 698 abun_np = mass_spec.to_dataframe()["Peak Height"].values + 699 if normalize: + 700 abun_np = abun_np / abun_np.max() + 701 mz_abun = np.column_stack((mz_np, abun_np)) + 702 if normalize: + 703 mz_abun = mz_abun[mz_abun[:, 1] > min_normalized_abun] + 704 mz_abun_str = [ + 705 str(round(mz, ndigits=4)) + ":" + str(round(abun, ndigits=2)) + 706 for mz, abun in mz_abun + 707 ] + 708 return "; ".join(mz_abun_str) + 709 + 710 cols_in_df = [ + 711 "id", + 712 "_apex_scan", + 713 "start_scan", + 714 "final_scan", + 715 "_retention_time", + 716 "_intensity", + 717 "_persistence", + 718 "_area", + 719 "_dispersity_index", + 720 "_tailing_factor", + 721 "monoisotopic_mf_id", + 722 "isotopologue_type", + 723 "mass_spectrum_deconvoluted_parent", + 724 ] + 725 df_mf_list = [] + 726 for mf_id in self.mass_features.keys(): + 727 # Find cols_in_df that are in single_mf + 728 df_keys = list( + 729 set(cols_in_df).intersection(self.mass_features[mf_id].__dir__()) + 730 ) + 731 dict_mf = {} + 732 for key in df_keys: + 733 dict_mf[key] = getattr(self.mass_features[mf_id], key) + 734 if len(self.mass_features[mf_id].ms2_scan_numbers) > 0: + 735 # Add MS2 spectra info + 736 best_ms2_spectrum = self.mass_features[mf_id].best_ms2 + 737 dict_mf["ms2_spectrum"] = mass_spectrum_to_string(best_ms2_spectrum) + 738 if len(self.mass_features[mf_id].associated_mass_features_deconvoluted) > 0: + 739 dict_mf["associated_mass_features"] = ", ".join( + 740 map( + 741 str, + 742 self.mass_features[mf_id].associated_mass_features_deconvoluted, + 743 ) + 744 ) + 745 if self.mass_features[mf_id]._half_height_width is not None: + 746 dict_mf["half_height_width"] = self.mass_features[ + 747 mf_id + 748 ].half_height_width + 749 # Check if EIC for mass feature is set + 750 df_mf_single = pd.DataFrame(dict_mf, index=[mf_id]) + 751 df_mf_single["mz"] = self.mass_features[mf_id].mz + 752 df_mf_list.append(df_mf_single) + 753 df_mf = pd.concat(df_mf_list) + 754 + 755 # rename _area to area and id to mf_id + 756 df_mf = df_mf.rename( + 757 columns={ + 758 "_area": "area", + 759 "id": "mf_id", + 760 "_apex_scan": "apex_scan", + 761 "_retention_time": "scan_time", + 762 "_intensity": "intensity", + 763 "_persistence": "persistence", + 764 "_dispersity_index": "dispersity_index", + 765 "_tailing_factor": "tailing_factor", + 766 } + 767 ) + 768 + 769 # reorder columns + 770 col_order = [ + 771 "mf_id", + 772 "scan_time", + 773 "mz", + 774 "apex_scan", + 775 "start_scan", + 776 "final_scan", + 777 "intensity", + 778 "persistence", + 779 "area", + 780 "half_height_width", + 781 "tailing_factor", + 782 "dispersity_index", + 783 "monoisotopic_mf_id", + 784 "isotopologue_type", + 785 "mass_spectrum_deconvoluted_parent", + 786 "associated_mass_features", + 787 "ms2_spectrum", + 788 ] + 789 # drop columns that are not in col_order + 790 cols_to_order = [col for col in col_order if col in df_mf.columns] + 791 df_mf = df_mf[cols_to_order] 792 - 793 Returns - 794 -------- - 795 pandas.DataFrame - 796 A pandas dataframe of MS1 annotations for the mass features in the dataset. - 797 The index is set to mf_id (mass feature ID) + 793 # reset index to mf_id + 794 df_mf = df_mf.set_index("mf_id") + 795 df_mf.index.name = "mf_id" + 796 + 797 return df_mf 798 - 799 Raises - 800 ------ - 801 Warning - 802 If no MS1 annotations were found for the mass features in the dataset. - 803 """ - 804 annot_df_list_ms1 = [] - 805 for mf_id in self.mass_features.keys(): - 806 if self.mass_features[mf_id].mass_spectrum is None: - 807 pass - 808 else: - 809 # Add ms1 annotations to ms1 annotation list - 810 if ( - 811 np.abs( - 812 ( - 813 self.mass_features[mf_id].ms1_peak.mz_exp - 814 - self.mass_features[mf_id].mz - 815 ) - 816 ) - 817 < 0.01 - 818 ): - 819 # Get the molecular formula from the mass spectrum - 820 annot_df = self.mass_features[mf_id].mass_spectrum.to_dataframe() - 821 # Subset to pull out only the peak associated with the mass feature - 822 annot_df = annot_df[ - 823 annot_df["Index"] == self.mass_features[mf_id].ms1_peak.index - 824 ].copy() - 825 - 826 # Remove the index column and add column for mf_id - 827 annot_df = annot_df.drop(columns=["Index"]) - 828 annot_df["mf_id"] = mf_id - 829 annot_df_list_ms1.append(annot_df) - 830 - 831 if len(annot_df_list_ms1) > 0: - 832 annot_ms1_df_full = pd.concat(annot_df_list_ms1) - 833 annot_ms1_df_full = annot_ms1_df_full.set_index("mf_id") - 834 annot_ms1_df_full.index.name = "mf_id" - 835 - 836 else: - 837 annot_ms1_df_full = None - 838 # Warn that no ms1 annotations were found - 839 warnings.warn( - 840 "No MS1 annotations found for mass features in dataset, were MS1 spectra added and processed within the dataset?", - 841 UserWarning - 842 ) - 843 - 844 return annot_ms1_df_full - 845 - 846 def mass_features_ms2_annot_to_df(self, molecular_metadata=None): - 847 """Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset. - 848 - 849 Parameters - 850 ----------- - 851 molecular_metadata : dict of MolecularMetadata objects - 852 A dictionary of MolecularMetadata objects, keyed by metabref_mol_id. Defaults to None. - 853 - 854 Returns - 855 -------- - 856 pandas.DataFrame - 857 A pandas dataframe of MS2 annotations for the mass features in the dataset, - 858 and optionally molecular metadata. The index is set to mf_id (mass feature ID) - 859 - 860 Raises - 861 ------ - 862 Warning - 863 If no MS2 annotations were found for the mass features in the dataset. - 864 """ - 865 annot_df_list_ms2 = [] - 866 for mf_id in self.mass_features.keys(): - 867 if len(self.mass_features[mf_id].ms2_similarity_results) > 0: - 868 # Add ms2 annotations to ms2 annotation list - 869 for result in self.mass_features[mf_id].ms2_similarity_results: - 870 annot_df_ms2 = result.to_dataframe() - 871 annot_df_ms2["mf_id"] = mf_id - 872 annot_df_list_ms2.append(annot_df_ms2) - 873 - 874 if len(annot_df_list_ms2) > 0: - 875 annot_ms2_df_full = pd.concat(annot_df_list_ms2) - 876 if molecular_metadata is not None: - 877 molecular_metadata_df = pd.concat( - 878 [ - 879 pd.DataFrame.from_dict(v.__dict__, orient="index").transpose() - 880 for k, v in molecular_metadata.items() - 881 ], - 882 ignore_index=True, - 883 ) - 884 molecular_metadata_df = molecular_metadata_df.rename( - 885 columns={"id": "ref_mol_id"} - 886 ) - 887 annot_ms2_df_full = annot_ms2_df_full.merge( - 888 molecular_metadata_df, on="ref_mol_id", how="left" - 889 ) - 890 annot_ms2_df_full = annot_ms2_df_full.drop_duplicates( - 891 subset=["mf_id", "query_spectrum_id", "ref_ms_id"] - 892 ).copy() - 893 annot_ms2_df_full = annot_ms2_df_full.set_index("mf_id") - 894 annot_ms2_df_full.index.name = "mf_id" - 895 else: - 896 annot_ms2_df_full = None - 897 # Warn that no ms2 annotations were found - 898 warnings.warn( - 899 "No MS2 annotations found for mass features in dataset, were MS2 spectra added and searched against a database?", - 900 UserWarning - 901 ) - 902 - 903 return annot_ms2_df_full - 904 - 905 def __len__(self): - 906 """ - 907 Returns the number of mass spectra in the dataset. - 908 - 909 Returns - 910 -------- - 911 int - 912 The number of mass spectra in the dataset. - 913 """ - 914 return len(self._ms) - 915 - 916 def __getitem__(self, scan_number): - 917 """ - 918 Returns the mass spectrum corresponding to the specified scan number. - 919 - 920 Parameters - 921 ----------- - 922 scan_number : int - 923 The scan number of the desired mass spectrum. + 799 def mass_features_ms1_annot_to_df(self): + 800 """Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset. + 801 + 802 Returns + 803 -------- + 804 pandas.DataFrame + 805 A pandas dataframe of MS1 annotations for the mass features in the dataset. + 806 The index is set to mf_id (mass feature ID) + 807 + 808 Raises + 809 ------ + 810 Warning + 811 If no MS1 annotations were found for the mass features in the dataset. + 812 """ + 813 annot_df_list_ms1 = [] + 814 for mf_id in self.mass_features.keys(): + 815 if self.mass_features[mf_id].mass_spectrum is None: + 816 pass + 817 else: + 818 # Add ms1 annotations to ms1 annotation list + 819 if ( + 820 np.abs( + 821 ( + 822 self.mass_features[mf_id].ms1_peak.mz_exp + 823 - self.mass_features[mf_id].mz + 824 ) + 825 ) + 826 < 0.01 + 827 ): + 828 # Get the molecular formula from the mass spectrum + 829 annot_df = self.mass_features[mf_id].mass_spectrum.to_dataframe() + 830 # Subset to pull out only the peak associated with the mass feature + 831 annot_df = annot_df[ + 832 annot_df["Index"] == self.mass_features[mf_id].ms1_peak.index + 833 ].copy() + 834 + 835 # Remove the index column and add column for mf_id + 836 annot_df = annot_df.drop(columns=["Index"]) + 837 annot_df["mf_id"] = mf_id + 838 annot_df_list_ms1.append(annot_df) + 839 + 840 if len(annot_df_list_ms1) > 0: + 841 annot_ms1_df_full = pd.concat(annot_df_list_ms1) + 842 annot_ms1_df_full = annot_ms1_df_full.set_index("mf_id") + 843 annot_ms1_df_full.index.name = "mf_id" + 844 + 845 else: + 846 annot_ms1_df_full = None + 847 # Warn that no ms1 annotations were found + 848 warnings.warn( + 849 "No MS1 annotations found for mass features in dataset, were MS1 spectra added and processed within the dataset?", + 850 UserWarning, + 851 ) + 852 + 853 return annot_ms1_df_full + 854 + 855 def mass_features_ms2_annot_to_df(self, molecular_metadata=None): + 856 """Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset. + 857 + 858 Parameters + 859 ----------- + 860 molecular_metadata : dict of MolecularMetadata objects + 861 A dictionary of MolecularMetadata objects, keyed by metabref_mol_id. Defaults to None. + 862 + 863 Returns + 864 -------- + 865 pandas.DataFrame + 866 A pandas dataframe of MS2 annotations for the mass features in the dataset, + 867 and optionally molecular metadata. The index is set to mf_id (mass feature ID) + 868 + 869 Raises + 870 ------ + 871 Warning + 872 If no MS2 annotations were found for the mass features in the dataset. + 873 """ + 874 annot_df_list_ms2 = [] + 875 for mf_id in self.mass_features.keys(): + 876 if len(self.mass_features[mf_id].ms2_similarity_results) > 0: + 877 # Add ms2 annotations to ms2 annotation list + 878 for result in self.mass_features[mf_id].ms2_similarity_results: + 879 annot_df_ms2 = result.to_dataframe() + 880 annot_df_ms2["mf_id"] = mf_id + 881 annot_df_list_ms2.append(annot_df_ms2) + 882 + 883 if len(annot_df_list_ms2) > 0: + 884 annot_ms2_df_full = pd.concat(annot_df_list_ms2) + 885 if molecular_metadata is not None: + 886 molecular_metadata_df = pd.concat( + 887 [ + 888 pd.DataFrame.from_dict(v.__dict__, orient="index").transpose() + 889 for k, v in molecular_metadata.items() + 890 ], + 891 ignore_index=True, + 892 ) + 893 molecular_metadata_df = molecular_metadata_df.rename( + 894 columns={"id": "ref_mol_id"} + 895 ) + 896 annot_ms2_df_full = annot_ms2_df_full.merge( + 897 molecular_metadata_df, on="ref_mol_id", how="left" + 898 ) + 899 annot_ms2_df_full = annot_ms2_df_full.drop_duplicates( + 900 subset=["mf_id", "query_spectrum_id", "ref_ms_id"] + 901 ).copy() + 902 annot_ms2_df_full = annot_ms2_df_full.set_index("mf_id") + 903 annot_ms2_df_full.index.name = "mf_id" + 904 else: + 905 annot_ms2_df_full = None + 906 # Warn that no ms2 annotations were found + 907 warnings.warn( + 908 "No MS2 annotations found for mass features in dataset, were MS2 spectra added and searched against a database?", + 909 UserWarning, + 910 ) + 911 + 912 return annot_ms2_df_full + 913 + 914 def __len__(self): + 915 """ + 916 Returns the number of mass spectra in the dataset. + 917 + 918 Returns + 919 -------- + 920 int + 921 The number of mass spectra in the dataset. + 922 """ + 923 return len(self._ms) 924 - 925 Returns - 926 -------- - 927 MassSpectrum - 928 The mass spectrum corresponding to the specified scan number. - 929 """ - 930 return self._ms.get(scan_number) - 931 - 932 def __iter__(self): - 933 """Returns an iterator over the mass spectra in the dataset. - 934 - 935 Returns - 936 -------- - 937 iterator - 938 An iterator over the mass spectra in the dataset. - 939 """ - 940 return iter(self._ms.values()) - 941 - 942 def set_tic_list_from_data(self, overwrite=False): - 943 """Sets the TIC list from the mass spectrum objects within the _ms dictionary. - 944 - 945 Parameters - 946 ----------- - 947 overwrite : bool, optional - 948 If True, overwrites the TIC list if it is already set. Defaults to False. - 949 - 950 Notes - 951 ----- - 952 If the _ms dictionary is incomplete, sets the TIC list to an empty list. + 925 def __getitem__(self, scan_number): + 926 """ + 927 Returns the mass spectrum corresponding to the specified scan number. + 928 + 929 Parameters + 930 ----------- + 931 scan_number : int + 932 The scan number of the desired mass spectrum. + 933 + 934 Returns + 935 -------- + 936 MassSpectrum + 937 The mass spectrum corresponding to the specified scan number. + 938 """ + 939 return self._ms.get(scan_number) + 940 + 941 def __iter__(self): + 942 """Returns an iterator over the mass spectra in the dataset. + 943 + 944 Returns + 945 -------- + 946 iterator + 947 An iterator over the mass spectra in the dataset. + 948 """ + 949 return iter(self._ms.values()) + 950 + 951 def set_tic_list_from_data(self, overwrite=False): + 952 """Sets the TIC list from the mass spectrum objects within the _ms dictionary. 953 - 954 Raises - 955 ------ - 956 ValueError - 957 If no mass spectra are found in the dataset. - 958 If the TIC list is already set and overwrite is False. - 959 """ - 960 # Check if _ms is empty and raise error if so - 961 if len(self._ms) == 0: - 962 raise ValueError("No mass spectra found in dataset") - 963 - 964 # Check if tic_list is already set and raise error if so - 965 if len(self.tic) > 0 and not overwrite: - 966 raise ValueError("TIC list already set, use overwrite=True to overwrite") - 967 - 968 self.tic = [self._ms.get(i).tic for i in self.scans_number] - 969 - 970 def set_retention_time_from_data(self, overwrite=False): - 971 """Sets the retention time list from the data in the _ms dictionary. + 954 Parameters + 955 ----------- + 956 overwrite : bool, optional + 957 If True, overwrites the TIC list if it is already set. Defaults to False. + 958 + 959 Notes + 960 ----- + 961 If the _ms dictionary is incomplete, sets the TIC list to an empty list. + 962 + 963 Raises + 964 ------ + 965 ValueError + 966 If no mass spectra are found in the dataset. + 967 If the TIC list is already set and overwrite is False. + 968 """ + 969 # Check if _ms is empty and raise error if so + 970 if len(self._ms) == 0: + 971 raise ValueError("No mass spectra found in dataset") 972 - 973 Parameters - 974 ----------- - 975 overwrite : bool, optional - 976 If True, overwrites the retention time list if it is already set. Defaults to False. - 977 - 978 Notes - 979 ----- - 980 If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list. + 973 # Check if tic_list is already set and raise error if so + 974 if len(self.tic) > 0 and not overwrite: + 975 raise ValueError("TIC list already set, use overwrite=True to overwrite") + 976 + 977 self.tic = [self._ms.get(i).tic for i in self.scans_number] + 978 + 979 def set_retention_time_from_data(self, overwrite=False): + 980 """Sets the retention time list from the data in the _ms dictionary. 981 - 982 Raises - 983 ------ - 984 ValueError - 985 If no mass spectra are found in the dataset. - 986 If the retention time list is already set and overwrite is False. - 987 """ - 988 # Check if _ms is empty and raise error if so - 989 if len(self._ms) == 0: - 990 raise ValueError("No mass spectra found in dataset") - 991 - 992 # Check if retention_time_list is already set and raise error if so - 993 if len(self.retention_time) > 0 and not overwrite: - 994 raise ValueError( - 995 "Retention time list already set, use overwrite=True to overwrite" - 996 ) - 997 - 998 retention_time_list = [] - 999 for key_ms in sorted(self._ms.keys()): -1000 retention_time_list.append(self._ms.get(key_ms).retention_time) -1001 self.retention_time = retention_time_list -1002 -1003 def set_scans_number_from_data(self, overwrite=False): -1004 """Sets the scan number list from the data in the _ms dictionary. -1005 -1006 Notes -1007 ----- -1008 If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list. -1009 -1010 Raises -1011 ------ -1012 ValueError -1013 If no mass spectra are found in the dataset. -1014 If the scan number list is already set and overwrite is False. -1015 """ -1016 # Check if _ms is empty and raise error if so -1017 if len(self._ms) == 0: -1018 raise ValueError("No mass spectra found in dataset") -1019 -1020 # Check if scans_number_list is already set and raise error if so -1021 if len(self.scans_number) > 0 and not overwrite: -1022 raise ValueError( -1023 "Scan number list already set, use overwrite=True to overwrite" -1024 ) -1025 -1026 self.scans_number = sorted(self._ms.keys()) -1027 -1028 @property -1029 def ms1_scans(self): -1030 """ -1031 list : A list of MS1 scan numbers for the dataset. -1032 """ -1033 return self.scan_df[self.scan_df.ms_level == 1].index.tolist() + 982 Parameters + 983 ----------- + 984 overwrite : bool, optional + 985 If True, overwrites the retention time list if it is already set. Defaults to False. + 986 + 987 Notes + 988 ----- + 989 If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list. + 990 + 991 Raises + 992 ------ + 993 ValueError + 994 If no mass spectra are found in the dataset. + 995 If the retention time list is already set and overwrite is False. + 996 """ + 997 # Check if _ms is empty and raise error if so + 998 if len(self._ms) == 0: + 999 raise ValueError("No mass spectra found in dataset") +1000 +1001 # Check if retention_time_list is already set and raise error if so +1002 if len(self.retention_time) > 0 and not overwrite: +1003 raise ValueError( +1004 "Retention time list already set, use overwrite=True to overwrite" +1005 ) +1006 +1007 retention_time_list = [] +1008 for key_ms in sorted(self._ms.keys()): +1009 retention_time_list.append(self._ms.get(key_ms).retention_time) +1010 self.retention_time = retention_time_list +1011 +1012 def set_scans_number_from_data(self, overwrite=False): +1013 """Sets the scan number list from the data in the _ms dictionary. +1014 +1015 Notes +1016 ----- +1017 If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list. +1018 +1019 Raises +1020 ------ +1021 ValueError +1022 If no mass spectra are found in the dataset. +1023 If the scan number list is already set and overwrite is False. +1024 """ +1025 # Check if _ms is empty and raise error if so +1026 if len(self._ms) == 0: +1027 raise ValueError("No mass spectra found in dataset") +1028 +1029 # Check if scans_number_list is already set and raise error if so +1030 if len(self.scans_number) > 0 and not overwrite: +1031 raise ValueError( +1032 "Scan number list already set, use overwrite=True to overwrite" +1033 ) 1034 -1035 @property -1036 def parameters(self): -1037 """ -1038 LCMSParameters : The parameters used for the LC-MS analysis. -1039 """ -1040 return self._parameters -1041 -1042 @parameters.setter -1043 def parameters(self, paramsinstance): -1044 """ -1045 Sets the parameters used for the LC-MS analysis. -1046 -1047 Parameters -1048 ----------- -1049 paramsinstance : LCMSParameters -1050 The parameters used for the LC-MS analysis. -1051 """ -1052 self._parameters = paramsinstance -1053 -1054 @property -1055 def scans_number(self): -1056 """ -1057 list : A list of scan numbers for the dataset. -1058 """ -1059 return self._scans_number_list -1060 -1061 @scans_number.setter -1062 def scans_number(self, scan_numbers_list): -1063 """ -1064 Sets the scan numbers for the dataset. -1065 -1066 Parameters -1067 ----------- -1068 scan_numbers_list : list -1069 A list of scan numbers for the dataset. -1070 """ -1071 self._scans_number_list = scan_numbers_list -1072 -1073 @property -1074 def retention_time(self): -1075 """ -1076 numpy.ndarray : An array of retention times for the dataset. -1077 """ -1078 return self._retention_time_list -1079 -1080 @retention_time.setter -1081 def retention_time(self, rt_list): -1082 """ -1083 Sets the retention times for the dataset. -1084 -1085 Parameters -1086 ----------- -1087 rt_list : list -1088 A list of retention times for the dataset. -1089 """ -1090 self._retention_time_list = np.array(rt_list) -1091 -1092 @property -1093 def tic(self): -1094 """ -1095 numpy.ndarray : An array of TIC values for the dataset. -1096 """ -1097 return self._tic_list -1098 -1099 @tic.setter -1100 def tic(self, tic_list): -1101 """ -1102 Sets the TIC values for the dataset. -1103 -1104 Parameters -1105 ----------- -1106 tic_list : list -1107 A list of TIC values for the dataset. -1108 """ -1109 self._tic_list = np.array(tic_list) +1035 self.scans_number = sorted(self._ms.keys()) +1036 +1037 @property +1038 def ms1_scans(self): +1039 """ +1040 list : A list of MS1 scan numbers for the dataset. +1041 """ +1042 return self.scan_df[self.scan_df.ms_level == 1].index.tolist() +1043 +1044 @property +1045 def parameters(self): +1046 """ +1047 LCMSParameters : The parameters used for the LC-MS analysis. +1048 """ +1049 return self._parameters +1050 +1051 @parameters.setter +1052 def parameters(self, paramsinstance): +1053 """ +1054 Sets the parameters used for the LC-MS analysis. +1055 +1056 Parameters +1057 ----------- +1058 paramsinstance : LCMSParameters +1059 The parameters used for the LC-MS analysis. +1060 """ +1061 self._parameters = paramsinstance +1062 +1063 @property +1064 def scans_number(self): +1065 """ +1066 list : A list of scan numbers for the dataset. +1067 """ +1068 return self._scans_number_list +1069 +1070 @scans_number.setter +1071 def scans_number(self, scan_numbers_list): +1072 """ +1073 Sets the scan numbers for the dataset. +1074 +1075 Parameters +1076 ----------- +1077 scan_numbers_list : list +1078 A list of scan numbers for the dataset. +1079 """ +1080 self._scans_number_list = scan_numbers_list +1081 +1082 @property +1083 def retention_time(self): +1084 """ +1085 numpy.ndarray : An array of retention times for the dataset. +1086 """ +1087 return self._retention_time_list +1088 +1089 @retention_time.setter +1090 def retention_time(self, rt_list): +1091 """ +1092 Sets the retention times for the dataset. +1093 +1094 Parameters +1095 ----------- +1096 rt_list : list +1097 A list of retention times for the dataset. +1098 """ +1099 self._retention_time_list = np.array(rt_list) +1100 +1101 @property +1102 def tic(self): +1103 """ +1104 numpy.ndarray : An array of TIC values for the dataset. +1105 """ +1106 return self._tic_list +1107 +1108 @tic.setter +1109 def tic(self, tic_list): +1110 """ +1111 Sets the TIC values for the dataset. +1112 +1113 Parameters +1114 ----------- +1115 tic_list : list +1116 A list of TIC values for the dataset. +1117 """ +1118 self._tic_list = np.array(tic_list)

    @@ -1271,308 +1280,308 @@

    -
     13class MassSpectraBase:
    - 14    """Base class for mass spectra objects.
    - 15
    - 16    Parameters
    - 17    -----------
    - 18    file_location : str or Path
    - 19        The location of the file containing the mass spectra data.
    - 20    analyzer : str, optional
    - 21        The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
    - 22    instrument_label : str, optional
    - 23        The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
    - 24    sample_name : str, optional
    - 25        The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
    - 26    spectra_parser : object, optional
    - 27        The spectra parser object used to create the mass spectra object. Defaults to None.
    - 28
    - 29    Attributes
    - 30    -----------
    - 31    spectra_parser_class : class
    - 32        The class of the spectra parser used to create the mass spectra object.
    - 33    file_location : str or Path
    - 34        The location of the file containing the mass spectra data.
    - 35    sample_name : str
    - 36        The name of the sample; defaults to the file name if not provided to the parser.
    - 37    analyzer : str
    - 38        The type of analyzer used to generate the mass spectra data. Derived from the spectra parser.
    - 39    instrument_label : str
    - 40        The type of instrument used to generate the mass spectra data. Derived from the spectra parser.
    - 41    _scan_info : dict
    - 42        A dictionary containing the scan data with columns for scan number, scan time, ms level, precursor m/z,
    - 43        scan text, and scan window (lower and upper).
    - 44        Associated with the property scan_df, which returns a pandas DataFrame or can set this attribute from a pandas DataFrame.
    - 45    _ms : dict
    - 46        A dictionary containing mass spectra for the dataset, keys of dictionary are scan numbers. Initialized as an empty dictionary.
    - 47    _ms_unprocessed: dictionary of pandas.DataFrames or None
    - 48        A dictionary of unprocssed mass spectra data, as an (optional) intermediate data product for peak picking.
    - 49        Key is ms_level, and value is dataframe with columns for scan number, m/z, and intensity. Default is None.
    - 50
    - 51    Methods
    - 52    --------
    - 53    * add_mass_spectra(scan_list, spectrum_mode: str = 'profile', use_parser = True, auto_process=True).
    - 54        Add mass spectra (or singlel mass spectrum) to _ms slot, from a list of scans
    - 55    * get_time_of_scan_id(scan).
    - 56        Returns the scan time for the specified scan number.
    - 57    """
    - 58
    - 59    def __init__(
    - 60        self,
    - 61        file_location,
    - 62        analyzer="Unknown",
    - 63        instrument_label="Unknown",
    - 64        sample_name=None,
    - 65        spectra_parser=None,
    - 66    ):
    - 67        if isinstance(file_location, str):
    - 68            file_location = Path(file_location)
    - 69        else:
    - 70            file_location = file_location
    - 71        if not file_location.exists():
    - 72            raise FileExistsError("File does not exist: " + str(file_location))
    - 73
    - 74        if sample_name:
    - 75            self.sample_name = sample_name
    - 76        else:
    - 77            self.sample_name = file_location.stem
    - 78
    - 79        self.file_location = file_location
    - 80        self.analyzer = analyzer
    - 81        self.instrument_label = instrument_label
    - 82
    - 83        # Add the spectra parser class to the object if it is not None
    - 84        if spectra_parser is not None:
    - 85            self.spectra_parser_class = spectra_parser.__class__
    - 86            self.spectra_parser = spectra_parser
    - 87            # Check that spectra_pasrser.sample_name is same as sample_name etc, raise warning if not
    - 88            if (
    - 89                self.sample_name is not None
    - 90                and self.sample_name != self.spectra_parser.sample_name
    - 91            ):
    - 92                warnings.warn(
    - 93                    "sample_name provided to MassSpectraBase object does not match sample_name provided to spectra parser object",
    - 94                    UserWarning
    - 95                )
    - 96            if self.analyzer != self.spectra_parser.analyzer:
    - 97                warnings.warn(
    - 98                    "analyzer provided to MassSpectraBase object does not match analyzer provided to spectra parser object",
    - 99                    UserWarning
    -100                )
    -101            if self.instrument_label != self.spectra_parser.instrument_label:
    -102                warnings.warn(
    -103                    "instrument provided to MassSpectraBase object does not match instrument provided to spectra parser object",
    -104                    UserWarning
    -105                )
    -106            if self.file_location != self.spectra_parser.file_location:
    -107                warnings.warn(
    -108                    "file_location provided to MassSpectraBase object does not match file_location provided to spectra parser object",
    -109                    UserWarning
    -110                )
    -111
    -112        # Instantiate empty dictionaries for scan information and mass spectra
    -113        self._scan_info = {}
    -114        self._ms = {}
    -115        self._ms_unprocessed = {}
    -116
    -117    def add_mass_spectrum(self, mass_spec):
    -118        """Adds a mass spectrum to the dataset.
    -119
    -120        Parameters
    -121        -----------
    -122        mass_spec : MassSpectrum
    -123            The corems MassSpectrum object to be added to the dataset.
    -124
    -125        Notes
    -126        -----
    -127        This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.
    -128        """
    -129        # check if mass_spec has a scan_number attribute
    -130        if not hasattr(mass_spec, "scan_number"):
    -131            raise ValueError(
    -132                "Mass spectrum must have a scan_number attribute to be added to the dataset correctly"
    -133            )
    -134        self._ms[mass_spec.scan_number] = mass_spec
    -135
    -136    def add_mass_spectra(
    -137        self,
    -138        scan_list,
    -139        spectrum_mode=None,
    -140        ms_level=1,
    -141        use_parser=True,
    -142        auto_process=True,
    -143        ms_params=None,
    -144    ):
    -145        """Add mass spectra to _ms dictionary, from a list of scans or single scan
    -146
    -147        Notes
    -148        -----
    -149        The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.
    -150
    +            
     14class MassSpectraBase:
    + 15    """Base class for mass spectra objects.
    + 16
    + 17    Parameters
    + 18    -----------
    + 19    file_location : str or Path
    + 20        The location of the file containing the mass spectra data.
    + 21    analyzer : str, optional
    + 22        The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
    + 23    instrument_label : str, optional
    + 24        The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
    + 25    sample_name : str, optional
    + 26        The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
    + 27    spectra_parser : object, optional
    + 28        The spectra parser object used to create the mass spectra object. Defaults to None.
    + 29
    + 30    Attributes
    + 31    -----------
    + 32    spectra_parser_class : class
    + 33        The class of the spectra parser used to create the mass spectra object.
    + 34    file_location : str or Path
    + 35        The location of the file containing the mass spectra data.
    + 36    sample_name : str
    + 37        The name of the sample; defaults to the file name if not provided to the parser.
    + 38    analyzer : str
    + 39        The type of analyzer used to generate the mass spectra data. Derived from the spectra parser.
    + 40    instrument_label : str
    + 41        The type of instrument used to generate the mass spectra data. Derived from the spectra parser.
    + 42    _scan_info : dict
    + 43        A dictionary containing the scan data with columns for scan number, scan time, ms level, precursor m/z,
    + 44        scan text, and scan window (lower and upper).
    + 45        Associated with the property scan_df, which returns a pandas DataFrame or can set this attribute from a pandas DataFrame.
    + 46    _ms : dict
    + 47        A dictionary containing mass spectra for the dataset, keys of dictionary are scan numbers. Initialized as an empty dictionary.
    + 48    _ms_unprocessed: dictionary of pandas.DataFrames or None
    + 49        A dictionary of unprocssed mass spectra data, as an (optional) intermediate data product for peak picking.
    + 50        Key is ms_level, and value is dataframe with columns for scan number, m/z, and intensity. Default is None.
    + 51
    + 52    Methods
    + 53    --------
    + 54    * add_mass_spectra(scan_list, spectrum_mode: str = 'profile', use_parser = True, auto_process=True).
    + 55        Add mass spectra (or singlel mass spectrum) to _ms slot, from a list of scans
    + 56    * get_time_of_scan_id(scan).
    + 57        Returns the scan time for the specified scan number.
    + 58    """
    + 59
    + 60    def __init__(
    + 61        self,
    + 62        file_location,
    + 63        analyzer="Unknown",
    + 64        instrument_label="Unknown",
    + 65        sample_name=None,
    + 66        spectra_parser=None,
    + 67    ):
    + 68        if isinstance(file_location, str):
    + 69            file_location = Path(file_location)
    + 70        else:
    + 71            file_location = file_location
    + 72        if not file_location.exists():
    + 73            raise FileExistsError("File does not exist: " + str(file_location))
    + 74
    + 75        if sample_name:
    + 76            self.sample_name = sample_name
    + 77        else:
    + 78            self.sample_name = file_location.stem
    + 79
    + 80        self.file_location = file_location
    + 81        self.analyzer = analyzer
    + 82        self.instrument_label = instrument_label
    + 83
    + 84        # Add the spectra parser class to the object if it is not None
    + 85        if spectra_parser is not None:
    + 86            self.spectra_parser_class = spectra_parser.__class__
    + 87            self.spectra_parser = spectra_parser
    + 88            # Check that spectra_pasrser.sample_name is same as sample_name etc, raise warning if not
    + 89            if (
    + 90                self.sample_name is not None
    + 91                and self.sample_name != self.spectra_parser.sample_name
    + 92            ):
    + 93                warnings.warn(
    + 94                    "sample_name provided to MassSpectraBase object does not match sample_name provided to spectra parser object",
    + 95                    UserWarning,
    + 96                )
    + 97            if self.analyzer != self.spectra_parser.analyzer:
    + 98                warnings.warn(
    + 99                    "analyzer provided to MassSpectraBase object does not match analyzer provided to spectra parser object",
    +100                    UserWarning,
    +101                )
    +102            if self.instrument_label != self.spectra_parser.instrument_label:
    +103                warnings.warn(
    +104                    "instrument provided to MassSpectraBase object does not match instrument provided to spectra parser object",
    +105                    UserWarning,
    +106                )
    +107            if self.file_location != self.spectra_parser.file_location:
    +108                warnings.warn(
    +109                    "file_location provided to MassSpectraBase object does not match file_location provided to spectra parser object",
    +110                    UserWarning,
    +111                )
    +112
    +113        # Instantiate empty dictionaries for scan information and mass spectra
    +114        self._scan_info = {}
    +115        self._ms = {}
    +116        self._ms_unprocessed = {}
    +117
    +118    def add_mass_spectrum(self, mass_spec):
    +119        """Adds a mass spectrum to the dataset.
    +120
    +121        Parameters
    +122        -----------
    +123        mass_spec : MassSpectrum
    +124            The corems MassSpectrum object to be added to the dataset.
    +125
    +126        Notes
    +127        -----
    +128        This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.
    +129        """
    +130        # check if mass_spec has a scan_number attribute
    +131        if not hasattr(mass_spec, "scan_number"):
    +132            raise ValueError(
    +133                "Mass spectrum must have a scan_number attribute to be added to the dataset correctly"
    +134            )
    +135        self._ms[mass_spec.scan_number] = mass_spec
    +136
    +137    def add_mass_spectra(
    +138        self,
    +139        scan_list,
    +140        spectrum_mode=None,
    +141        ms_level=1,
    +142        use_parser=True,
    +143        auto_process=True,
    +144        ms_params=None,
    +145    ):
    +146        """Add mass spectra to _ms dictionary, from a list of scans or single scan
    +147
    +148        Notes
    +149        -----
    +150        The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.
     151
    -152        Parameters
    -153        -----------
    -154        scan_list : list of ints
    -155            List of scans to use to populate _ms slot
    -156        spectrum_mode : str or None
    -157            The spectrum mode to use for the mass spectra.
    -158            If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    -159            Defaults to None.
    -160        ms_level : int, optional
    -161            The MS level to use for the mass spectra.
    -162            This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects.
    -163            Defaults to 1.
    -164        using_parser : bool
    -165            Whether to use the mass spectra parser to get the mass spectra.  Defaults to True.
    -166        auto_process : bool
    -167            Whether to auto-process the mass spectra.  Defaults to True.
    -168        ms_params : MSParameters or None
    -169            The mass spectrum parameters to use for the mass spectra.  If None, uses the globally set MSParameters.
    -170
    -171        Raises
    -172        ------
    -173        TypeError
    -174            If scan_list is not a list of ints
    -175        ValueError
    -176            If polarity is not 'positive' or 'negative'
    -177            If ms_level is not 1 or 2
    -178        """
    -179
    -180        # check if scan_list is a list or a single int; if single int, convert to list
    -181        if isinstance(scan_list, int):
    -182            scan_list = [scan_list]
    -183        if not isinstance(scan_list, list):
    -184            raise TypeError("scan_list must be a list of integers")
    -185        for scan in scan_list:
    -186            if not isinstance(scan, int):
    -187                raise TypeError("scan_list must be a list of integers")
    -188
    -189        # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
    -190        if self.polarity == "negative":
    -191            polarity = -1
    -192        elif self.polarity == "positive":
    -193            polarity = 1
    -194        else:
    -195            raise ValueError(
    -196                "Polarity not set for dataset, must be a either 'positive' or 'negative'"
    -197            )
    -198
    -199        # is not using_parser, check that ms1 and ms2 are not None
    -200        if not use_parser:
    -201            if ms_level not in self._ms_unprocessed.keys():
    -202                raise ValueError(
    -203                    "ms_level {} not found in _ms_unprocessed dictionary".format(
    -204                        ms_level
    -205                    )
    -206                )
    -207
    -208        scan_list = list(set(scan_list))
    -209        scan_list.sort()
    -210        if not use_parser:
    -211            if self._ms_unprocessed[ms_level] is None:
    -212                raise ValueError(
    -213                    "No unprocessed data found for ms_level {}".format(ms_level)
    -214                )
    -215            if (
    -216                len(
    -217                    np.setdiff1d(
    -218                        scan_list, self._ms_unprocessed[ms_level].scan.tolist()
    -219                    )
    -220                )
    -221                > 0
    -222            ):
    -223                raise ValueError(
    -224                    "Not all scans in scan_list are present in the unprocessed data"
    -225                )
    -226            # Prepare the ms_df for parsing
    -227            ms_df = self._ms_unprocessed[ms_level].copy().set_index("scan", drop=False)
    -228
    -229        for scan in scan_list:
    -230            ms = None
    -231            if spectrum_mode is None:
    -232                # get spectrum mode from _scan_info
    -233                spectrum_mode_scan = self.scan_df.loc[scan, "ms_format"]
    -234            else:
    -235                spectrum_mode_scan = spectrum_mode
    -236            # Instantiate the mass spectrum object using the parser or the unprocessed data
    -237            if not use_parser:
    -238                my_ms_df = ms_df.loc[scan]
    -239                if spectrum_mode_scan == "profile":
    -240                    # Check this - it might be better to use the MassSpectrumProfile class to instantiate the mass spectrum
    -241                    ms = ms_from_array_profile(
    -242                        my_ms_df.mz,
    -243                        my_ms_df.intensity,
    -244                        self.file_location,
    -245                        polarity=polarity,
    -246                        auto_process=False,
    -247                    )
    -248                else:
    -249                    raise ValueError(
    -250                        "Only profile mode is supported for unprocessed data"
    -251                    )
    -252            if use_parser:
    -253                ms = self.spectra_parser.get_mass_spectrum_from_scan(
    -254                    scan_number=scan,
    -255                    spectrum_mode=spectrum_mode_scan,
    -256                    auto_process=False,
    -257                )
    -258
    -259            # Set the mass spectrum parameters, auto-process if auto_process is True, and add to the dataset
    -260            if ms is not None:
    -261                if ms_params is not None:
    -262                    ms.parameters = ms_params
    -263                ms.scan_number = scan
    -264                if auto_process:
    -265                    ms.process_mass_spec()
    -266                self.add_mass_spectrum(ms)
    -267
    -268    def get_time_of_scan_id(self, scan):
    -269        """Returns the scan time for the specified scan number.
    -270
    -271        Parameters
    -272        -----------
    -273        scan : int
    -274            The scan number of the desired scan time.
    -275
    -276        Returns
    -277        --------
    -278        float
    -279            The scan time for the specified scan number (in minutes).
    -280
    -281        Raises
    -282        ------
    -283        ValueError
    -284            If no scan time is found for the specified scan number.
    -285        """
    -286        # Check if _retenion_time_list is empty and raise error if so
    -287        if len(self._retention_time_list) == 0:
    -288            raise ValueError("No retention times found in dataset")
    -289        rt = self._retention_time_list[self._scans_number_list.index(scan)]
    -290        return rt
    -291
    -292    @property
    -293    def scan_df(self):
    -294        """
    -295        pandas.DataFrame : A pandas DataFrame containing the scan info data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper).
    -296        """
    -297        scan_df = pd.DataFrame.from_dict(self._scan_info)
    -298        return scan_df
    -299
    -300    @scan_df.setter
    -301    def scan_df(self, df):
    -302        """
    -303        Sets the scan data for the dataset.
    -304
    -305        Parameters
    -306        -----------
    -307        df : pandas.DataFrame
    -308            A pandas DataFrame containing the scan data with columns for scan number, scan time, ms level,
    -309            precursor m/z, scan text, and scan window (lower and upper).
    -310        """
    -311        self._scan_info = df.to_dict()
    -312
    -313    def __getitem__(self, scan_number):
    -314        return self._ms.get(scan_number)
    +152
    +153        Parameters
    +154        -----------
    +155        scan_list : list of ints
    +156            List of scans to use to populate _ms slot
    +157        spectrum_mode : str or None
    +158            The spectrum mode to use for the mass spectra.
    +159            If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    +160            Defaults to None.
    +161        ms_level : int, optional
    +162            The MS level to use for the mass spectra.
    +163            This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects.
    +164            Defaults to 1.
    +165        using_parser : bool
    +166            Whether to use the mass spectra parser to get the mass spectra.  Defaults to True.
    +167        auto_process : bool
    +168            Whether to auto-process the mass spectra.  Defaults to True.
    +169        ms_params : MSParameters or None
    +170            The mass spectrum parameters to use for the mass spectra.  If None, uses the globally set MSParameters.
    +171
    +172        Raises
    +173        ------
    +174        TypeError
    +175            If scan_list is not a list of ints
    +176        ValueError
    +177            If polarity is not 'positive' or 'negative'
    +178            If ms_level is not 1 or 2
    +179        """
    +180
    +181        # check if scan_list is a list or a single int; if single int, convert to list
    +182        if isinstance(scan_list, int):
    +183            scan_list = [scan_list]
    +184        if not isinstance(scan_list, list):
    +185            raise TypeError("scan_list must be a list of integers")
    +186        for scan in scan_list:
    +187            if not isinstance(scan, int):
    +188                raise TypeError("scan_list must be a list of integers")
    +189
    +190        # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
    +191        if self.polarity == "negative":
    +192            polarity = -1
    +193        elif self.polarity == "positive":
    +194            polarity = 1
    +195        else:
    +196            raise ValueError(
    +197                "Polarity not set for dataset, must be a either 'positive' or 'negative'"
    +198            )
    +199
    +200        # is not using_parser, check that ms1 and ms2 are not None
    +201        if not use_parser:
    +202            if ms_level not in self._ms_unprocessed.keys():
    +203                raise ValueError(
    +204                    "ms_level {} not found in _ms_unprocessed dictionary".format(
    +205                        ms_level
    +206                    )
    +207                )
    +208
    +209        scan_list = list(set(scan_list))
    +210        scan_list.sort()
    +211        if not use_parser:
    +212            if self._ms_unprocessed[ms_level] is None:
    +213                raise ValueError(
    +214                    "No unprocessed data found for ms_level {}".format(ms_level)
    +215                )
    +216            if (
    +217                len(
    +218                    np.setdiff1d(
    +219                        scan_list, self._ms_unprocessed[ms_level].scan.tolist()
    +220                    )
    +221                )
    +222                > 0
    +223            ):
    +224                raise ValueError(
    +225                    "Not all scans in scan_list are present in the unprocessed data"
    +226                )
    +227            # Prepare the ms_df for parsing
    +228            ms_df = self._ms_unprocessed[ms_level].copy().set_index("scan", drop=False)
    +229
    +230        for scan in scan_list:
    +231            ms = None
    +232            if spectrum_mode is None:
    +233                # get spectrum mode from _scan_info
    +234                spectrum_mode_scan = self.scan_df.loc[scan, "ms_format"]
    +235            else:
    +236                spectrum_mode_scan = spectrum_mode
    +237            # Instantiate the mass spectrum object using the parser or the unprocessed data
    +238            if not use_parser:
    +239                my_ms_df = ms_df.loc[scan]
    +240                if spectrum_mode_scan == "profile":
    +241                    # Check this - it might be better to use the MassSpectrumProfile class to instantiate the mass spectrum
    +242                    ms = ms_from_array_profile(
    +243                        my_ms_df.mz,
    +244                        my_ms_df.intensity,
    +245                        self.file_location,
    +246                        polarity=polarity,
    +247                        auto_process=False,
    +248                    )
    +249                else:
    +250                    raise ValueError(
    +251                        "Only profile mode is supported for unprocessed data"
    +252                    )
    +253            if use_parser:
    +254                ms = self.spectra_parser.get_mass_spectrum_from_scan(
    +255                    scan_number=scan,
    +256                    spectrum_mode=spectrum_mode_scan,
    +257                    auto_process=False,
    +258                )
    +259
    +260            # Set the mass spectrum parameters, auto-process if auto_process is True, and add to the dataset
    +261            if ms is not None:
    +262                if ms_params is not None:
    +263                    ms.parameters = ms_params
    +264                ms.scan_number = scan
    +265                if auto_process:
    +266                    ms.process_mass_spec()
    +267                self.add_mass_spectrum(ms)
    +268
    +269    def get_time_of_scan_id(self, scan):
    +270        """Returns the scan time for the specified scan number.
    +271
    +272        Parameters
    +273        -----------
    +274        scan : int
    +275            The scan number of the desired scan time.
    +276
    +277        Returns
    +278        --------
    +279        float
    +280            The scan time for the specified scan number (in minutes).
    +281
    +282        Raises
    +283        ------
    +284        ValueError
    +285            If no scan time is found for the specified scan number.
    +286        """
    +287        # Check if _retenion_time_list is empty and raise error if so
    +288        if len(self._retention_time_list) == 0:
    +289            raise ValueError("No retention times found in dataset")
    +290        rt = self._retention_time_list[self._scans_number_list.index(scan)]
    +291        return rt
    +292
    +293    @property
    +294    def scan_df(self):
    +295        """
    +296        pandas.DataFrame : A pandas DataFrame containing the scan info data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper).
    +297        """
    +298        scan_df = pd.DataFrame.from_dict(self._scan_info)
    +299        return scan_df
    +300
    +301    @scan_df.setter
    +302    def scan_df(self, df):
    +303        """
    +304        Sets the scan data for the dataset.
    +305
    +306        Parameters
    +307        -----------
    +308        df : pandas.DataFrame
    +309            A pandas DataFrame containing the scan data with columns for scan number, scan time, ms level,
    +310            precursor m/z, scan text, and scan window (lower and upper).
    +311        """
    +312        self._scan_info = df.to_dict()
    +313
    +314    def __getitem__(self, scan_number):
    +315        return self._ms.get(scan_number)
     
    @@ -1638,63 +1647,63 @@
    Methods
    -
     59    def __init__(
    - 60        self,
    - 61        file_location,
    - 62        analyzer="Unknown",
    - 63        instrument_label="Unknown",
    - 64        sample_name=None,
    - 65        spectra_parser=None,
    - 66    ):
    - 67        if isinstance(file_location, str):
    - 68            file_location = Path(file_location)
    - 69        else:
    - 70            file_location = file_location
    - 71        if not file_location.exists():
    - 72            raise FileExistsError("File does not exist: " + str(file_location))
    - 73
    - 74        if sample_name:
    - 75            self.sample_name = sample_name
    - 76        else:
    - 77            self.sample_name = file_location.stem
    - 78
    - 79        self.file_location = file_location
    - 80        self.analyzer = analyzer
    - 81        self.instrument_label = instrument_label
    - 82
    - 83        # Add the spectra parser class to the object if it is not None
    - 84        if spectra_parser is not None:
    - 85            self.spectra_parser_class = spectra_parser.__class__
    - 86            self.spectra_parser = spectra_parser
    - 87            # Check that spectra_pasrser.sample_name is same as sample_name etc, raise warning if not
    - 88            if (
    - 89                self.sample_name is not None
    - 90                and self.sample_name != self.spectra_parser.sample_name
    - 91            ):
    - 92                warnings.warn(
    - 93                    "sample_name provided to MassSpectraBase object does not match sample_name provided to spectra parser object",
    - 94                    UserWarning
    - 95                )
    - 96            if self.analyzer != self.spectra_parser.analyzer:
    - 97                warnings.warn(
    - 98                    "analyzer provided to MassSpectraBase object does not match analyzer provided to spectra parser object",
    - 99                    UserWarning
    -100                )
    -101            if self.instrument_label != self.spectra_parser.instrument_label:
    -102                warnings.warn(
    -103                    "instrument provided to MassSpectraBase object does not match instrument provided to spectra parser object",
    -104                    UserWarning
    -105                )
    -106            if self.file_location != self.spectra_parser.file_location:
    -107                warnings.warn(
    -108                    "file_location provided to MassSpectraBase object does not match file_location provided to spectra parser object",
    -109                    UserWarning
    -110                )
    -111
    -112        # Instantiate empty dictionaries for scan information and mass spectra
    -113        self._scan_info = {}
    -114        self._ms = {}
    -115        self._ms_unprocessed = {}
    +            
     60    def __init__(
    + 61        self,
    + 62        file_location,
    + 63        analyzer="Unknown",
    + 64        instrument_label="Unknown",
    + 65        sample_name=None,
    + 66        spectra_parser=None,
    + 67    ):
    + 68        if isinstance(file_location, str):
    + 69            file_location = Path(file_location)
    + 70        else:
    + 71            file_location = file_location
    + 72        if not file_location.exists():
    + 73            raise FileExistsError("File does not exist: " + str(file_location))
    + 74
    + 75        if sample_name:
    + 76            self.sample_name = sample_name
    + 77        else:
    + 78            self.sample_name = file_location.stem
    + 79
    + 80        self.file_location = file_location
    + 81        self.analyzer = analyzer
    + 82        self.instrument_label = instrument_label
    + 83
    + 84        # Add the spectra parser class to the object if it is not None
    + 85        if spectra_parser is not None:
    + 86            self.spectra_parser_class = spectra_parser.__class__
    + 87            self.spectra_parser = spectra_parser
    + 88            # Check that spectra_pasrser.sample_name is same as sample_name etc, raise warning if not
    + 89            if (
    + 90                self.sample_name is not None
    + 91                and self.sample_name != self.spectra_parser.sample_name
    + 92            ):
    + 93                warnings.warn(
    + 94                    "sample_name provided to MassSpectraBase object does not match sample_name provided to spectra parser object",
    + 95                    UserWarning,
    + 96                )
    + 97            if self.analyzer != self.spectra_parser.analyzer:
    + 98                warnings.warn(
    + 99                    "analyzer provided to MassSpectraBase object does not match analyzer provided to spectra parser object",
    +100                    UserWarning,
    +101                )
    +102            if self.instrument_label != self.spectra_parser.instrument_label:
    +103                warnings.warn(
    +104                    "instrument provided to MassSpectraBase object does not match instrument provided to spectra parser object",
    +105                    UserWarning,
    +106                )
    +107            if self.file_location != self.spectra_parser.file_location:
    +108                warnings.warn(
    +109                    "file_location provided to MassSpectraBase object does not match file_location provided to spectra parser object",
    +110                    UserWarning,
    +111                )
    +112
    +113        # Instantiate empty dictionaries for scan information and mass spectra
    +114        self._scan_info = {}
    +115        self._ms = {}
    +116        self._ms_unprocessed = {}
     
    @@ -1745,24 +1754,24 @@
    Methods
    -
    117    def add_mass_spectrum(self, mass_spec):
    -118        """Adds a mass spectrum to the dataset.
    -119
    -120        Parameters
    -121        -----------
    -122        mass_spec : MassSpectrum
    -123            The corems MassSpectrum object to be added to the dataset.
    -124
    -125        Notes
    -126        -----
    -127        This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.
    -128        """
    -129        # check if mass_spec has a scan_number attribute
    -130        if not hasattr(mass_spec, "scan_number"):
    -131            raise ValueError(
    -132                "Mass spectrum must have a scan_number attribute to be added to the dataset correctly"
    -133            )
    -134        self._ms[mass_spec.scan_number] = mass_spec
    +            
    118    def add_mass_spectrum(self, mass_spec):
    +119        """Adds a mass spectrum to the dataset.
    +120
    +121        Parameters
    +122        -----------
    +123        mass_spec : MassSpectrum
    +124            The corems MassSpectrum object to be added to the dataset.
    +125
    +126        Notes
    +127        -----
    +128        This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.
    +129        """
    +130        # check if mass_spec has a scan_number attribute
    +131        if not hasattr(mass_spec, "scan_number"):
    +132            raise ValueError(
    +133                "Mass spectrum must have a scan_number attribute to be added to the dataset correctly"
    +134            )
    +135        self._ms[mass_spec.scan_number] = mass_spec
     
    @@ -1793,137 +1802,137 @@
    Notes
    -
    136    def add_mass_spectra(
    -137        self,
    -138        scan_list,
    -139        spectrum_mode=None,
    -140        ms_level=1,
    -141        use_parser=True,
    -142        auto_process=True,
    -143        ms_params=None,
    -144    ):
    -145        """Add mass spectra to _ms dictionary, from a list of scans or single scan
    -146
    -147        Notes
    -148        -----
    -149        The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.
    -150
    +            
    137    def add_mass_spectra(
    +138        self,
    +139        scan_list,
    +140        spectrum_mode=None,
    +141        ms_level=1,
    +142        use_parser=True,
    +143        auto_process=True,
    +144        ms_params=None,
    +145    ):
    +146        """Add mass spectra to _ms dictionary, from a list of scans or single scan
    +147
    +148        Notes
    +149        -----
    +150        The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.
     151
    -152        Parameters
    -153        -----------
    -154        scan_list : list of ints
    -155            List of scans to use to populate _ms slot
    -156        spectrum_mode : str or None
    -157            The spectrum mode to use for the mass spectra.
    -158            If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    -159            Defaults to None.
    -160        ms_level : int, optional
    -161            The MS level to use for the mass spectra.
    -162            This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects.
    -163            Defaults to 1.
    -164        using_parser : bool
    -165            Whether to use the mass spectra parser to get the mass spectra.  Defaults to True.
    -166        auto_process : bool
    -167            Whether to auto-process the mass spectra.  Defaults to True.
    -168        ms_params : MSParameters or None
    -169            The mass spectrum parameters to use for the mass spectra.  If None, uses the globally set MSParameters.
    -170
    -171        Raises
    -172        ------
    -173        TypeError
    -174            If scan_list is not a list of ints
    -175        ValueError
    -176            If polarity is not 'positive' or 'negative'
    -177            If ms_level is not 1 or 2
    -178        """
    -179
    -180        # check if scan_list is a list or a single int; if single int, convert to list
    -181        if isinstance(scan_list, int):
    -182            scan_list = [scan_list]
    -183        if not isinstance(scan_list, list):
    -184            raise TypeError("scan_list must be a list of integers")
    -185        for scan in scan_list:
    -186            if not isinstance(scan, int):
    -187                raise TypeError("scan_list must be a list of integers")
    -188
    -189        # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
    -190        if self.polarity == "negative":
    -191            polarity = -1
    -192        elif self.polarity == "positive":
    -193            polarity = 1
    -194        else:
    -195            raise ValueError(
    -196                "Polarity not set for dataset, must be a either 'positive' or 'negative'"
    -197            )
    -198
    -199        # is not using_parser, check that ms1 and ms2 are not None
    -200        if not use_parser:
    -201            if ms_level not in self._ms_unprocessed.keys():
    -202                raise ValueError(
    -203                    "ms_level {} not found in _ms_unprocessed dictionary".format(
    -204                        ms_level
    -205                    )
    -206                )
    -207
    -208        scan_list = list(set(scan_list))
    -209        scan_list.sort()
    -210        if not use_parser:
    -211            if self._ms_unprocessed[ms_level] is None:
    -212                raise ValueError(
    -213                    "No unprocessed data found for ms_level {}".format(ms_level)
    -214                )
    -215            if (
    -216                len(
    -217                    np.setdiff1d(
    -218                        scan_list, self._ms_unprocessed[ms_level].scan.tolist()
    -219                    )
    -220                )
    -221                > 0
    -222            ):
    -223                raise ValueError(
    -224                    "Not all scans in scan_list are present in the unprocessed data"
    -225                )
    -226            # Prepare the ms_df for parsing
    -227            ms_df = self._ms_unprocessed[ms_level].copy().set_index("scan", drop=False)
    -228
    -229        for scan in scan_list:
    -230            ms = None
    -231            if spectrum_mode is None:
    -232                # get spectrum mode from _scan_info
    -233                spectrum_mode_scan = self.scan_df.loc[scan, "ms_format"]
    -234            else:
    -235                spectrum_mode_scan = spectrum_mode
    -236            # Instantiate the mass spectrum object using the parser or the unprocessed data
    -237            if not use_parser:
    -238                my_ms_df = ms_df.loc[scan]
    -239                if spectrum_mode_scan == "profile":
    -240                    # Check this - it might be better to use the MassSpectrumProfile class to instantiate the mass spectrum
    -241                    ms = ms_from_array_profile(
    -242                        my_ms_df.mz,
    -243                        my_ms_df.intensity,
    -244                        self.file_location,
    -245                        polarity=polarity,
    -246                        auto_process=False,
    -247                    )
    -248                else:
    -249                    raise ValueError(
    -250                        "Only profile mode is supported for unprocessed data"
    -251                    )
    -252            if use_parser:
    -253                ms = self.spectra_parser.get_mass_spectrum_from_scan(
    -254                    scan_number=scan,
    -255                    spectrum_mode=spectrum_mode_scan,
    -256                    auto_process=False,
    -257                )
    -258
    -259            # Set the mass spectrum parameters, auto-process if auto_process is True, and add to the dataset
    -260            if ms is not None:
    -261                if ms_params is not None:
    -262                    ms.parameters = ms_params
    -263                ms.scan_number = scan
    -264                if auto_process:
    -265                    ms.process_mass_spec()
    -266                self.add_mass_spectrum(ms)
    +152
    +153        Parameters
    +154        -----------
    +155        scan_list : list of ints
    +156            List of scans to use to populate _ms slot
    +157        spectrum_mode : str or None
    +158            The spectrum mode to use for the mass spectra.
    +159            If None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    +160            Defaults to None.
    +161        ms_level : int, optional
    +162            The MS level to use for the mass spectra.
    +163            This is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects.
    +164            Defaults to 1.
    +165        using_parser : bool
    +166            Whether to use the mass spectra parser to get the mass spectra.  Defaults to True.
    +167        auto_process : bool
    +168            Whether to auto-process the mass spectra.  Defaults to True.
    +169        ms_params : MSParameters or None
    +170            The mass spectrum parameters to use for the mass spectra.  If None, uses the globally set MSParameters.
    +171
    +172        Raises
    +173        ------
    +174        TypeError
    +175            If scan_list is not a list of ints
    +176        ValueError
    +177            If polarity is not 'positive' or 'negative'
    +178            If ms_level is not 1 or 2
    +179        """
    +180
    +181        # check if scan_list is a list or a single int; if single int, convert to list
    +182        if isinstance(scan_list, int):
    +183            scan_list = [scan_list]
    +184        if not isinstance(scan_list, list):
    +185            raise TypeError("scan_list must be a list of integers")
    +186        for scan in scan_list:
    +187            if not isinstance(scan, int):
    +188                raise TypeError("scan_list must be a list of integers")
    +189
    +190        # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
    +191        if self.polarity == "negative":
    +192            polarity = -1
    +193        elif self.polarity == "positive":
    +194            polarity = 1
    +195        else:
    +196            raise ValueError(
    +197                "Polarity not set for dataset, must be a either 'positive' or 'negative'"
    +198            )
    +199
    +200        # is not using_parser, check that ms1 and ms2 are not None
    +201        if not use_parser:
    +202            if ms_level not in self._ms_unprocessed.keys():
    +203                raise ValueError(
    +204                    "ms_level {} not found in _ms_unprocessed dictionary".format(
    +205                        ms_level
    +206                    )
    +207                )
    +208
    +209        scan_list = list(set(scan_list))
    +210        scan_list.sort()
    +211        if not use_parser:
    +212            if self._ms_unprocessed[ms_level] is None:
    +213                raise ValueError(
    +214                    "No unprocessed data found for ms_level {}".format(ms_level)
    +215                )
    +216            if (
    +217                len(
    +218                    np.setdiff1d(
    +219                        scan_list, self._ms_unprocessed[ms_level].scan.tolist()
    +220                    )
    +221                )
    +222                > 0
    +223            ):
    +224                raise ValueError(
    +225                    "Not all scans in scan_list are present in the unprocessed data"
    +226                )
    +227            # Prepare the ms_df for parsing
    +228            ms_df = self._ms_unprocessed[ms_level].copy().set_index("scan", drop=False)
    +229
    +230        for scan in scan_list:
    +231            ms = None
    +232            if spectrum_mode is None:
    +233                # get spectrum mode from _scan_info
    +234                spectrum_mode_scan = self.scan_df.loc[scan, "ms_format"]
    +235            else:
    +236                spectrum_mode_scan = spectrum_mode
    +237            # Instantiate the mass spectrum object using the parser or the unprocessed data
    +238            if not use_parser:
    +239                my_ms_df = ms_df.loc[scan]
    +240                if spectrum_mode_scan == "profile":
    +241                    # Check this - it might be better to use the MassSpectrumProfile class to instantiate the mass spectrum
    +242                    ms = ms_from_array_profile(
    +243                        my_ms_df.mz,
    +244                        my_ms_df.intensity,
    +245                        self.file_location,
    +246                        polarity=polarity,
    +247                        auto_process=False,
    +248                    )
    +249                else:
    +250                    raise ValueError(
    +251                        "Only profile mode is supported for unprocessed data"
    +252                    )
    +253            if use_parser:
    +254                ms = self.spectra_parser.get_mass_spectrum_from_scan(
    +255                    scan_number=scan,
    +256                    spectrum_mode=spectrum_mode_scan,
    +257                    auto_process=False,
    +258                )
    +259
    +260            # Set the mass spectrum parameters, auto-process if auto_process is True, and add to the dataset
    +261            if ms is not None:
    +262                if ms_params is not None:
    +263                    ms.parameters = ms_params
    +264                ms.scan_number = scan
    +265                if auto_process:
    +266                    ms.process_mass_spec()
    +267                self.add_mass_spectrum(ms)
     
    @@ -1976,29 +1985,29 @@
    Raises
    -
    268    def get_time_of_scan_id(self, scan):
    -269        """Returns the scan time for the specified scan number.
    -270
    -271        Parameters
    -272        -----------
    -273        scan : int
    -274            The scan number of the desired scan time.
    -275
    -276        Returns
    -277        --------
    -278        float
    -279            The scan time for the specified scan number (in minutes).
    -280
    -281        Raises
    -282        ------
    -283        ValueError
    -284            If no scan time is found for the specified scan number.
    -285        """
    -286        # Check if _retenion_time_list is empty and raise error if so
    -287        if len(self._retention_time_list) == 0:
    -288            raise ValueError("No retention times found in dataset")
    -289        rt = self._retention_time_list[self._scans_number_list.index(scan)]
    -290        return rt
    +            
    269    def get_time_of_scan_id(self, scan):
    +270        """Returns the scan time for the specified scan number.
    +271
    +272        Parameters
    +273        -----------
    +274        scan : int
    +275            The scan number of the desired scan time.
    +276
    +277        Returns
    +278        --------
    +279        float
    +280            The scan time for the specified scan number (in minutes).
    +281
    +282        Raises
    +283        ------
    +284        ValueError
    +285            If no scan time is found for the specified scan number.
    +286        """
    +287        # Check if _retenion_time_list is empty and raise error if so
    +288        if len(self._retention_time_list) == 0:
    +289            raise ValueError("No retention times found in dataset")
    +290        rt = self._retention_time_list[self._scans_number_list.index(scan)]
    +291        return rt
     
    @@ -2051,800 +2060,808 @@
    Raises
    -
     317class LCMSBase(MassSpectraBase, LCCalculations, PHCalculations, LCMSSpectralSearch):
    - 318    """A class representing a liquid chromatography-mass spectrometry (LC-MS) data object.
    - 319
    - 320    This class is not intended to be instantiated directly, but rather to be instantiated by an appropriate mass spectra parser using the get_lcms_obj() method.
    - 321
    - 322    Parameters
    - 323    -----------
    - 324    file_location : str or Path
    - 325        The location of the file containing the mass spectra data.
    - 326    analyzer : str, optional
    - 327        The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
    - 328    instrument_label : str, optional
    - 329        The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
    - 330    sample_name : str, optional
    - 331        The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
    - 332    spectra_parser : object, optional
    - 333        The spectra parser object used to create the mass spectra object. Defaults to None.
    - 334
    - 335    Attributes
    - 336    -----------
    - 337    polarity : str
    - 338        The polarity of the ionization mode used for the dataset.
    - 339    _parameters : LCMSParameters
    - 340        The parameters used for all methods called on the LCMSBase object. Set upon instantiation from LCMSParameters.
    - 341    _retention_time_list : numpy.ndarray
    - 342        An array of retention times for the dataset.
    - 343    _scans_number_list : list
    - 344        A list of scan numbers for the dataset.
    - 345    _tic_list : numpy.ndarray
    - 346        An array of total ion current (TIC) values for the dataset.
    - 347    eics : dict
    - 348        A dictionary containing extracted ion chromatograms (EICs) for the dataset.
    - 349        Key is the mz of the EIC. Initialized as an empty dictionary.
    - 350    mass_features : dictionary of LCMSMassFeature objects
    - 351        A dictionary containing mass features for the dataset.
    - 352        Key is mass feature ID. Initialized as an empty dictionary.
    - 353    spectral_search_results : dictionary of MS2SearchResults objects
    - 354        A dictionary containing spectral search results for the dataset.
    - 355        Key is scan number : precursor mz. Initialized as an empty dictionary.
    - 356
    - 357    Methods
    - 358    --------
    - 359    * get_parameters_json().
    - 360        Returns the parameters used for the LC-MS analysis in JSON format.
    - 361    * add_associated_ms2_dda(add_to_lcmsobj=True, auto_process=True, use_parser=True)
    - 362        Adds which MS2 scans are associated with each mass feature to the
    - 363        mass_features dictionary and optionally adds the MS2 spectra to the _ms dictionary.
    - 364    * add_associated_ms1(add_to_lcmsobj=True, auto_process=True, use_parser=True)
    - 365        Adds the MS1 spectra associated with each mass feature to the
    - 366        mass_features dictionary and adds the MS1 spectra to the _ms dictionary.
    - 367    * mass_features_to_df()
    - 368        Returns a pandas dataframe summarizing the mass features in the dataset.
    - 369    * set_tic_list_from_data(overwrite=False)
    - 370        Sets the TIC list from the mass spectrum objects within the _ms dictionary.
    - 371    * set_retention_time_from_data(overwrite=False)
    - 372        Sets the retention time list from the data in the _ms dictionary.
    - 373    * set_scans_number_from_data(overwrite=False)
    - 374        Sets the scan number list from the data in the _ms dictionary.
    - 375    """
    - 376
    - 377    def __init__(
    - 378        self,
    - 379        file_location,
    - 380        analyzer="Unknown",
    - 381        instrument_label="Unknown",
    - 382        sample_name=None,
    - 383        spectra_parser=None,
    - 384    ):
    - 385        super().__init__(
    - 386            file_location, analyzer, instrument_label, sample_name, spectra_parser
    - 387        )
    - 388        self.polarity = ""
    - 389        self._parameters = LCMSParameters()
    - 390        self._retention_time_list = []
    - 391        self._scans_number_list = []
    - 392        self._tic_list = []
    - 393        self.eics = {}
    - 394        self.mass_features = {}
    - 395        self.spectral_search_results = {}
    - 396
    - 397    def get_parameters_json(self):
    - 398        """Returns the parameters stored for the LC-MS object in JSON format.
    - 399
    - 400        Returns
    - 401        --------
    - 402        str
    - 403            The parameters used for the LC-MS analysis in JSON format.
    - 404        """
    - 405        return self.parameters.to_json()
    - 406
    - 407    def remove_unprocessed_data(self, ms_level=None):
    - 408        """Removes the unprocessed data from the LCMSBase object.
    - 409
    - 410        Parameters
    - 411        -----------
    - 412        ms_level : int, optional
    - 413            The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
    - 414
    - 415        Raises
    - 416        ------
    - 417        ValueError
    - 418            If ms_level is not 1 or 2.
    - 419
    - 420        Notes
    - 421        -----
    - 422        This method is useful for freeing up memory after the data has been processed.
    - 423        """
    - 424        if ms_level is None:
    - 425            for ms_level in self._ms_unprocessed.keys():
    - 426                self._ms_unprocessed[ms_level] = None
    - 427        if ms_level not in [1, 2]:
    - 428            raise ValueError("ms_level must be 1 or 2")
    - 429        self._ms_unprocessed[ms_level] = None
    - 430
    - 431    def add_associated_ms2_dda(
    - 432        self, auto_process=True, use_parser=True, spectrum_mode=None, ms_params_key="ms2", scan_filter=None
    - 433    ):
    - 434        """Add MS2 spectra associated with mass features to the dataset.
    - 435
    - 436        Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)
    - 437
    - 438        Parameters
    - 439        -----------
    - 440        auto_process : bool, optional
    - 441            If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
    - 442        use_parser : bool, optional
    - 443            If True, envoke the spectra parser to get the MS2 spectra. Default is True.
    - 444        spectrum_mode : str or None, optional
    - 445            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
    - 446            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    - 447            Defaults to None. (faster if defined, otherwise will check each scan)
    - 448        ms_params_key : string, optional
    - 449            The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute.
    - 450            Defaults to 'ms2'.
    - 451        scan_filter : str
    - 452            A string to filter the scans to add to the _ms dictionary.  If None, all scans are added.  Defaults to None.
    - 453            "hcd" will pull out only HCD scans.
    - 454
    - 455        Raises
    - 456        ------
    - 457        ValueError
    - 458            If mass_features is not set, must run find_mass_features() first.
    - 459            If no MS2 scans are found in the dataset.
    - 460            If no precursor m/z values are found in MS2 scans, not a DDA dataset.
    - 461        """
    - 462        # Check if mass_features is set, raise error if not
    - 463        if self.mass_features is None:
    - 464            raise ValueError(
    - 465                "mass_features not set, must run find_mass_features() first"
    - 466            )
    - 467        
    - 468        # reconfigure ms_params to get the correct mass spectrum parameters from the key
    - 469        ms_params = self.parameters.mass_spectrum[ms_params_key]
    - 470
    - 471        mf_df = self.mass_features_to_df().copy()
    - 472        # Find ms2 scans that have a precursor m/z value
    - 473        ms2_scans = self.scan_df[self.scan_df.ms_level == 2]
    - 474        ms2_scans = ms2_scans[~ms2_scans.precursor_mz.isna()]
    - 475        # drop ms2 scans that have no tic
    - 476        ms2_scans = ms2_scans[ms2_scans.tic > 0]
    - 477        if ms2_scans is None:
    - 478            raise ValueError("No DDA scans found in dataset")
    - 479
    - 480        if scan_filter is not None:
    - 481            ms2_scans = ms2_scans[ms2_scans.scan_text.str.contains(scan_filter)]
    - 482        # set tolerance in rt space (in minutes) and mz space (in daltons)
    - 483        time_tol = self.parameters.lc_ms.ms2_dda_rt_tolerance
    - 484        mz_tol = self.parameters.lc_ms.ms2_dda_mz_tolerance
    +            
     318class LCMSBase(MassSpectraBase, LCCalculations, PHCalculations, LCMSSpectralSearch):
    + 319    """A class representing a liquid chromatography-mass spectrometry (LC-MS) data object.
    + 320
    + 321    This class is not intended to be instantiated directly, but rather to be instantiated by an appropriate mass spectra parser using the get_lcms_obj() method.
    + 322
    + 323    Parameters
    + 324    -----------
    + 325    file_location : str or Path
    + 326        The location of the file containing the mass spectra data.
    + 327    analyzer : str, optional
    + 328        The type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
    + 329    instrument_label : str, optional
    + 330        The type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
    + 331    sample_name : str, optional
    + 332        The name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
    + 333    spectra_parser : object, optional
    + 334        The spectra parser object used to create the mass spectra object. Defaults to None.
    + 335
    + 336    Attributes
    + 337    -----------
    + 338    polarity : str
    + 339        The polarity of the ionization mode used for the dataset.
    + 340    _parameters : LCMSParameters
    + 341        The parameters used for all methods called on the LCMSBase object. Set upon instantiation from LCMSParameters.
    + 342    _retention_time_list : numpy.ndarray
    + 343        An array of retention times for the dataset.
    + 344    _scans_number_list : list
    + 345        A list of scan numbers for the dataset.
    + 346    _tic_list : numpy.ndarray
    + 347        An array of total ion current (TIC) values for the dataset.
    + 348    eics : dict
    + 349        A dictionary containing extracted ion chromatograms (EICs) for the dataset.
    + 350        Key is the mz of the EIC. Initialized as an empty dictionary.
    + 351    mass_features : dictionary of LCMSMassFeature objects
    + 352        A dictionary containing mass features for the dataset.
    + 353        Key is mass feature ID. Initialized as an empty dictionary.
    + 354    spectral_search_results : dictionary of MS2SearchResults objects
    + 355        A dictionary containing spectral search results for the dataset.
    + 356        Key is scan number : precursor mz. Initialized as an empty dictionary.
    + 357
    + 358    Methods
    + 359    --------
    + 360    * get_parameters_json().
    + 361        Returns the parameters used for the LC-MS analysis in JSON format.
    + 362    * add_associated_ms2_dda(add_to_lcmsobj=True, auto_process=True, use_parser=True)
    + 363        Adds which MS2 scans are associated with each mass feature to the
    + 364        mass_features dictionary and optionally adds the MS2 spectra to the _ms dictionary.
    + 365    * add_associated_ms1(add_to_lcmsobj=True, auto_process=True, use_parser=True)
    + 366        Adds the MS1 spectra associated with each mass feature to the
    + 367        mass_features dictionary and adds the MS1 spectra to the _ms dictionary.
    + 368    * mass_features_to_df()
    + 369        Returns a pandas dataframe summarizing the mass features in the dataset.
    + 370    * set_tic_list_from_data(overwrite=False)
    + 371        Sets the TIC list from the mass spectrum objects within the _ms dictionary.
    + 372    * set_retention_time_from_data(overwrite=False)
    + 373        Sets the retention time list from the data in the _ms dictionary.
    + 374    * set_scans_number_from_data(overwrite=False)
    + 375        Sets the scan number list from the data in the _ms dictionary.
    + 376    """
    + 377
    + 378    def __init__(
    + 379        self,
    + 380        file_location,
    + 381        analyzer="Unknown",
    + 382        instrument_label="Unknown",
    + 383        sample_name=None,
    + 384        spectra_parser=None,
    + 385    ):
    + 386        super().__init__(
    + 387            file_location, analyzer, instrument_label, sample_name, spectra_parser
    + 388        )
    + 389        self.polarity = ""
    + 390        self._parameters = LCMSParameters()
    + 391        self._retention_time_list = []
    + 392        self._scans_number_list = []
    + 393        self._tic_list = []
    + 394        self.eics = {}
    + 395        self.mass_features = {}
    + 396        self.spectral_search_results = {}
    + 397
    + 398    def get_parameters_json(self):
    + 399        """Returns the parameters stored for the LC-MS object in JSON format.
    + 400
    + 401        Returns
    + 402        --------
    + 403        str
    + 404            The parameters used for the LC-MS analysis in JSON format.
    + 405        """
    + 406        return self.parameters.to_json()
    + 407
    + 408    def remove_unprocessed_data(self, ms_level=None):
    + 409        """Removes the unprocessed data from the LCMSBase object.
    + 410
    + 411        Parameters
    + 412        -----------
    + 413        ms_level : int, optional
    + 414            The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
    + 415
    + 416        Raises
    + 417        ------
    + 418        ValueError
    + 419            If ms_level is not 1 or 2.
    + 420
    + 421        Notes
    + 422        -----
    + 423        This method is useful for freeing up memory after the data has been processed.
    + 424        """
    + 425        if ms_level is None:
    + 426            for ms_level in self._ms_unprocessed.keys():
    + 427                self._ms_unprocessed[ms_level] = None
    + 428        if ms_level not in [1, 2]:
    + 429            raise ValueError("ms_level must be 1 or 2")
    + 430        self._ms_unprocessed[ms_level] = None
    + 431
    + 432    def add_associated_ms2_dda(
    + 433        self,
    + 434        auto_process=True,
    + 435        use_parser=True,
    + 436        spectrum_mode=None,
    + 437        ms_params_key="ms2",
    + 438        scan_filter=None,
    + 439    ):
    + 440        """Add MS2 spectra associated with mass features to the dataset.
    + 441
    + 442        Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)
    + 443
    + 444        Parameters
    + 445        -----------
    + 446        auto_process : bool, optional
    + 447            If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
    + 448        use_parser : bool, optional
    + 449            If True, envoke the spectra parser to get the MS2 spectra. Default is True.
    + 450        spectrum_mode : str or None, optional
    + 451            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
    + 452            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    + 453            Defaults to None. (faster if defined, otherwise will check each scan)
    + 454        ms_params_key : string, optional
    + 455            The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute.
    + 456            Defaults to 'ms2'.
    + 457        scan_filter : str
    + 458            A string to filter the scans to add to the _ms dictionary.  If None, all scans are added.  Defaults to None.
    + 459            "hcd" will pull out only HCD scans.
    + 460
    + 461        Raises
    + 462        ------
    + 463        ValueError
    + 464            If mass_features is not set, must run find_mass_features() first.
    + 465            If no MS2 scans are found in the dataset.
    + 466            If no precursor m/z values are found in MS2 scans, not a DDA dataset.
    + 467        """
    + 468        # Check if mass_features is set, raise error if not
    + 469        if self.mass_features is None:
    + 470            raise ValueError(
    + 471                "mass_features not set, must run find_mass_features() first"
    + 472            )
    + 473
    + 474        # reconfigure ms_params to get the correct mass spectrum parameters from the key
    + 475        ms_params = self.parameters.mass_spectrum[ms_params_key]
    + 476
    + 477        mf_df = self.mass_features_to_df().copy()
    + 478        # Find ms2 scans that have a precursor m/z value
    + 479        ms2_scans = self.scan_df[self.scan_df.ms_level == 2]
    + 480        ms2_scans = ms2_scans[~ms2_scans.precursor_mz.isna()]
    + 481        # drop ms2 scans that have no tic
    + 482        ms2_scans = ms2_scans[ms2_scans.tic > 0]
    + 483        if ms2_scans is None:
    + 484            raise ValueError("No DDA scans found in dataset")
      485
    - 486        # for each mass feature, find the ms2 scans that are within the roi scan time and mz range
    - 487        dda_scans = []
    - 488        for i, row in mf_df.iterrows():
    - 489            ms2_scans_filtered = ms2_scans[
    - 490                ms2_scans.scan_time.between(
    - 491                    row.scan_time - time_tol, row.scan_time + time_tol
    - 492                )
    - 493            ]
    - 494            ms2_scans_filtered = ms2_scans_filtered[
    - 495                ms2_scans_filtered.precursor_mz.between(
    - 496                    row.mz - mz_tol, row.mz + mz_tol
    - 497                )
    - 498            ]
    - 499            dda_scans = dda_scans + ms2_scans_filtered.scan.tolist()
    - 500            self.mass_features[i].ms2_scan_numbers = ms2_scans_filtered.scan.tolist() + self.mass_features[i].ms2_scan_numbers
    - 501        # add to _ms attribute
    - 502        self.add_mass_spectra(
    - 503            scan_list=list(set(dda_scans)),
    - 504            auto_process=auto_process,
    - 505            spectrum_mode=spectrum_mode,
    - 506            use_parser=use_parser,
    - 507            ms_params=ms_params,
    - 508        )
    - 509        # associate appropriate _ms attribute to appropriate mass feature's ms2_mass_spectra attribute
    - 510        for mf_id in self.mass_features:
    - 511            if self.mass_features[mf_id].ms2_scan_numbers is not None:
    - 512                for dda_scan in self.mass_features[mf_id].ms2_scan_numbers:
    - 513                    if dda_scan in self._ms.keys():
    - 514                        self.mass_features[mf_id].ms2_mass_spectra[dda_scan] = self._ms[
    - 515                            dda_scan
    - 516                        ]
    - 517
    - 518    def add_associated_ms1(
    - 519        self, auto_process=True, use_parser=True, spectrum_mode=None
    - 520    ):
    - 521        """Add MS1 spectra associated with mass features to the dataset.
    - 522
    - 523        Parameters
    - 524        -----------
    - 525        auto_process : bool, optional
    - 526            If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
    - 527        use_parser : bool, optional
    - 528            If True, envoke the spectra parser to get the MS1 spectra. Default is True.
    - 529        spectrum_mode : str or None, optional
    - 530            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
    - 531            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    - 532            Defaults to None. (faster if defined, otherwise will check each scan)
    - 533
    - 534        Raises
    - 535        ------
    - 536        ValueError
    - 537            If mass_features is not set, must run find_mass_features() first.
    - 538            If apex scans are not profile mode, all apex scans must be profile mode for averaging.
    - 539            If number of scans to average is not  1 or an integer with an integer median (i.e. 3, 5, 7, 9).
    - 540            If deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
    - 541        """
    - 542        # Check if mass_features is set, raise error if not
    - 543        if self.mass_features is None:
    - 544            raise ValueError(
    - 545                "mass_features not set, must run find_mass_features() first"
    - 546            )
    - 547        scans_to_average = self.parameters.lc_ms.ms1_scans_to_average
    - 548
    - 549        if scans_to_average == 1:
    - 550            # Add to LCMSobj
    - 551            self.add_mass_spectra(
    - 552                scan_list=[
    - 553                    int(x) for x in self.mass_features_to_df().apex_scan.tolist()
    - 554                ],
    - 555                auto_process=auto_process,
    - 556                use_parser=use_parser,
    - 557                spectrum_mode=spectrum_mode,
    - 558                ms_params=self.parameters.mass_spectrum["ms1"],
    - 559            )
    - 560
    - 561        elif (
    - 562            (scans_to_average - 1) % 2
    - 563        ) == 0:  # scans_to_average = 3, 5, 7 etc, mirror l/r around apex
    - 564            apex_scans = list(set(self.mass_features_to_df().apex_scan.tolist()))
    - 565            # Check if all apex scans are profile mode, raise error if not
    - 566            if not all(self.scan_df.loc[apex_scans, "ms_format"] == "profile"):
    - 567                raise ValueError("All apex scans must be profile mode for averaging")
    - 568
    - 569            # First get sets of scans to average
    - 570            def get_scans_from_apex(ms1_scans, apex_scan, scans_to_average):
    - 571                ms1_idx_start = ms1_scans.index(apex_scan) - int(
    - 572                    (scans_to_average - 1) / 2
    - 573                )
    - 574                if ms1_idx_start < 0:
    - 575                    ms1_idx_start = 0
    - 576                ms1_idx_end = (
    - 577                    ms1_scans.index(apex_scan) + int((scans_to_average - 1) / 2) + 1
    - 578                )
    - 579                if ms1_idx_end > (len(ms1_scans) - 1):
    - 580                    ms1_idx_end = len(ms1_scans) - 1
    - 581                scan_list = ms1_scans[ms1_idx_start:ms1_idx_end]
    - 582                return scan_list
    - 583
    - 584            ms1_scans = self.ms1_scans
    - 585            scans_lists = [
    - 586                get_scans_from_apex(ms1_scans, apex_scan, scans_to_average)
    - 587                for apex_scan in apex_scans
    - 588            ]
    - 589
    - 590            # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
    - 591            if self.polarity == "negative":
    - 592                polarity = -1
    - 593            elif self.polarity == "positive":
    - 594                polarity = 1
    - 595
    - 596            if not use_parser:
    - 597                # Perform checks and prepare _ms_unprocessed dictionary if use_parser is False (saves time to do this once)
    - 598                ms1_unprocessed = self._ms_unprocessed[1].copy()
    - 599                # Set the index on _ms_unprocessed[1] to scan number
    - 600                ms1_unprocessed = ms1_unprocessed.set_index("scan", drop=False)
    - 601                self._ms_unprocessed[1] = ms1_unprocessed
    - 602
    - 603                # Check that all the scans in scan_lists are indexs in self._ms_unprocessed[1]
    - 604                scans_lists_flat = list(
    - 605                    set([scan for sublist in scans_lists for scan in sublist])
    - 606                )
    - 607                if (
    - 608                    len(
    - 609                        np.setdiff1d(
    - 610                            np.sort(scans_lists_flat),
    - 611                            np.sort(ms1_unprocessed.index.values),
    - 612                        )
    - 613                    )
    - 614                    > 0
    - 615                ):
    - 616                    raise ValueError(
    - 617                        "Not all scans to average are present in the unprocessed data"
    - 618                    )
    - 619
    - 620            for scan_list_average, apex_scan in zip(scans_lists, apex_scans):
    - 621                # Get unprocessed mass spectrum from scans
    - 622                ms = self.get_average_mass_spectrum(
    - 623                    scan_list=scan_list_average,
    - 624                    apex_scan=apex_scan,
    - 625                    spectrum_mode="profile",
    - 626                    ms_level=1,
    - 627                    auto_process=auto_process,
    - 628                    use_parser=use_parser,
    - 629                    perform_checks=False,
    - 630                    polarity=polarity,
    - 631                    ms_params=self.parameters.mass_spectrum["ms1"],
    - 632                )
    - 633                # Add mass spectrum to LCMS object and associated with mass feature
    - 634                self.add_mass_spectrum(ms)
    - 635
    - 636            if not use_parser:
    - 637                # Reset the index on _ms_unprocessed[1] to not be scan number
    - 638                ms1_unprocessed = ms1_unprocessed.reset_index(drop=True)
    - 639                self._ms_unprocessed[1] = ms1_unprocessed
    - 640        else:
    - 641            raise ValueError(
    - 642                "Number of scans to average must be 1 or an integer with an integer median (i.e. 3, 5, 7, 9)"
    - 643            )
    + 486        if scan_filter is not None:
    + 487            ms2_scans = ms2_scans[ms2_scans.scan_text.str.contains(scan_filter)]
    + 488        # set tolerance in rt space (in minutes) and mz space (in daltons)
    + 489        time_tol = self.parameters.lc_ms.ms2_dda_rt_tolerance
    + 490        mz_tol = self.parameters.lc_ms.ms2_dda_mz_tolerance
    + 491
    + 492        # for each mass feature, find the ms2 scans that are within the roi scan time and mz range
    + 493        dda_scans = []
    + 494        for i, row in mf_df.iterrows():
    + 495            ms2_scans_filtered = ms2_scans[
    + 496                ms2_scans.scan_time.between(
    + 497                    row.scan_time - time_tol, row.scan_time + time_tol
    + 498                )
    + 499            ]
    + 500            ms2_scans_filtered = ms2_scans_filtered[
    + 501                ms2_scans_filtered.precursor_mz.between(
    + 502                    row.mz - mz_tol, row.mz + mz_tol
    + 503                )
    + 504            ]
    + 505            dda_scans = dda_scans + ms2_scans_filtered.scan.tolist()
    + 506            self.mass_features[i].ms2_scan_numbers = (
    + 507                ms2_scans_filtered.scan.tolist()
    + 508                + self.mass_features[i].ms2_scan_numbers
    + 509            )
    + 510        # add to _ms attribute
    + 511        self.add_mass_spectra(
    + 512            scan_list=list(set(dda_scans)),
    + 513            auto_process=auto_process,
    + 514            spectrum_mode=spectrum_mode,
    + 515            use_parser=use_parser,
    + 516            ms_params=ms_params,
    + 517        )
    + 518        # associate appropriate _ms attribute to appropriate mass feature's ms2_mass_spectra attribute
    + 519        for mf_id in self.mass_features:
    + 520            if self.mass_features[mf_id].ms2_scan_numbers is not None:
    + 521                for dda_scan in self.mass_features[mf_id].ms2_scan_numbers:
    + 522                    if dda_scan in self._ms.keys():
    + 523                        self.mass_features[mf_id].ms2_mass_spectra[dda_scan] = self._ms[
    + 524                            dda_scan
    + 525                        ]
    + 526
    + 527    def add_associated_ms1(
    + 528        self, auto_process=True, use_parser=True, spectrum_mode=None
    + 529    ):
    + 530        """Add MS1 spectra associated with mass features to the dataset.
    + 531
    + 532        Parameters
    + 533        -----------
    + 534        auto_process : bool, optional
    + 535            If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
    + 536        use_parser : bool, optional
    + 537            If True, envoke the spectra parser to get the MS1 spectra. Default is True.
    + 538        spectrum_mode : str or None, optional
    + 539            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
    + 540            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    + 541            Defaults to None. (faster if defined, otherwise will check each scan)
    + 542
    + 543        Raises
    + 544        ------
    + 545        ValueError
    + 546            If mass_features is not set, must run find_mass_features() first.
    + 547            If apex scans are not profile mode, all apex scans must be profile mode for averaging.
    + 548            If number of scans to average is not  1 or an integer with an integer median (i.e. 3, 5, 7, 9).
    + 549            If deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
    + 550        """
    + 551        # Check if mass_features is set, raise error if not
    + 552        if self.mass_features is None:
    + 553            raise ValueError(
    + 554                "mass_features not set, must run find_mass_features() first"
    + 555            )
    + 556        scans_to_average = self.parameters.lc_ms.ms1_scans_to_average
    + 557
    + 558        if scans_to_average == 1:
    + 559            # Add to LCMSobj
    + 560            self.add_mass_spectra(
    + 561                scan_list=[
    + 562                    int(x) for x in self.mass_features_to_df().apex_scan.tolist()
    + 563                ],
    + 564                auto_process=auto_process,
    + 565                use_parser=use_parser,
    + 566                spectrum_mode=spectrum_mode,
    + 567                ms_params=self.parameters.mass_spectrum["ms1"],
    + 568            )
    + 569
    + 570        elif (
    + 571            (scans_to_average - 1) % 2
    + 572        ) == 0:  # scans_to_average = 3, 5, 7 etc, mirror l/r around apex
    + 573            apex_scans = list(set(self.mass_features_to_df().apex_scan.tolist()))
    + 574            # Check if all apex scans are profile mode, raise error if not
    + 575            if not all(self.scan_df.loc[apex_scans, "ms_format"] == "profile"):
    + 576                raise ValueError("All apex scans must be profile mode for averaging")
    + 577
    + 578            # First get sets of scans to average
    + 579            def get_scans_from_apex(ms1_scans, apex_scan, scans_to_average):
    + 580                ms1_idx_start = ms1_scans.index(apex_scan) - int(
    + 581                    (scans_to_average - 1) / 2
    + 582                )
    + 583                if ms1_idx_start < 0:
    + 584                    ms1_idx_start = 0
    + 585                ms1_idx_end = (
    + 586                    ms1_scans.index(apex_scan) + int((scans_to_average - 1) / 2) + 1
    + 587                )
    + 588                if ms1_idx_end > (len(ms1_scans) - 1):
    + 589                    ms1_idx_end = len(ms1_scans) - 1
    + 590                scan_list = ms1_scans[ms1_idx_start:ms1_idx_end]
    + 591                return scan_list
    + 592
    + 593            ms1_scans = self.ms1_scans
    + 594            scans_lists = [
    + 595                get_scans_from_apex(ms1_scans, apex_scan, scans_to_average)
    + 596                for apex_scan in apex_scans
    + 597            ]
    + 598
    + 599            # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
    + 600            if self.polarity == "negative":
    + 601                polarity = -1
    + 602            elif self.polarity == "positive":
    + 603                polarity = 1
    + 604
    + 605            if not use_parser:
    + 606                # Perform checks and prepare _ms_unprocessed dictionary if use_parser is False (saves time to do this once)
    + 607                ms1_unprocessed = self._ms_unprocessed[1].copy()
    + 608                # Set the index on _ms_unprocessed[1] to scan number
    + 609                ms1_unprocessed = ms1_unprocessed.set_index("scan", drop=False)
    + 610                self._ms_unprocessed[1] = ms1_unprocessed
    + 611
    + 612                # Check that all the scans in scan_lists are indexs in self._ms_unprocessed[1]
    + 613                scans_lists_flat = list(
    + 614                    set([scan for sublist in scans_lists for scan in sublist])
    + 615                )
    + 616                if (
    + 617                    len(
    + 618                        np.setdiff1d(
    + 619                            np.sort(scans_lists_flat),
    + 620                            np.sort(ms1_unprocessed.index.values),
    + 621                        )
    + 622                    )
    + 623                    > 0
    + 624                ):
    + 625                    raise ValueError(
    + 626                        "Not all scans to average are present in the unprocessed data"
    + 627                    )
    + 628
    + 629            for scan_list_average, apex_scan in zip(scans_lists, apex_scans):
    + 630                # Get unprocessed mass spectrum from scans
    + 631                ms = self.get_average_mass_spectrum(
    + 632                    scan_list=scan_list_average,
    + 633                    apex_scan=apex_scan,
    + 634                    spectrum_mode="profile",
    + 635                    ms_level=1,
    + 636                    auto_process=auto_process,
    + 637                    use_parser=use_parser,
    + 638                    perform_checks=False,
    + 639                    polarity=polarity,
    + 640                    ms_params=self.parameters.mass_spectrum["ms1"],
    + 641                )
    + 642                # Add mass spectrum to LCMS object and associated with mass feature
    + 643                self.add_mass_spectrum(ms)
      644
    - 645        # Associate the ms1 spectra with the mass features
    - 646        for mf_id in self.mass_features:
    - 647            self.mass_features[mf_id].mass_spectrum = self._ms[
    - 648                self.mass_features[mf_id].apex_scan
    - 649            ]
    - 650            self.mass_features[mf_id].update_mz()
    - 651        
    - 652        # Re-process clustering if persistent homology is selected to remove duplicate mass features after adding and processing MS1 spectra
    - 653        if self.parameters.lc_ms.peak_picking_method == "persistent homology":
    - 654            self.cluster_mass_features(drop_children=True, sort_by="persistence")
    - 655
    - 656    def mass_features_to_df(self):
    - 657        """Returns a pandas dataframe summarizing the mass features.
    - 658
    - 659        The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity,
    - 660        persistence, area, monoisotopic_mf_id, and isotopologue_type.  The index is set to mf_id (mass feature ID).
    - 661
    - 662
    - 663        Returns
    - 664        --------
    - 665        pandas.DataFrame
    - 666            A pandas dataframe of mass features with the following columns:
    - 667            mf_id, mz, apex_scan, scan_time, intensity, persistence, area.
    - 668        """
    - 669
    - 670        def mass_spectrum_to_string(
    - 671            mass_spec, normalize=True, min_normalized_abun=0.01
    - 672        ):
    - 673            """Converts a mass spectrum to a string of m/z:abundance pairs.
    - 674
    - 675            Parameters
    - 676            -----------
    - 677            mass_spec : MassSpectrum
    - 678                A MassSpectrum object to be converted to a string.
    - 679            normalize : bool, optional
    - 680                If True, normalizes the abundance values to a maximum of 1. Defaults to True.
    - 681            min_normalized_abun : float, optional
    - 682                The minimum normalized abundance value to include in the string, only used if normalize is True. Defaults to 0.01.
    + 645            if not use_parser:
    + 646                # Reset the index on _ms_unprocessed[1] to not be scan number
    + 647                ms1_unprocessed = ms1_unprocessed.reset_index(drop=True)
    + 648                self._ms_unprocessed[1] = ms1_unprocessed
    + 649        else:
    + 650            raise ValueError(
    + 651                "Number of scans to average must be 1 or an integer with an integer median (i.e. 3, 5, 7, 9)"
    + 652            )
    + 653
    + 654        # Associate the ms1 spectra with the mass features
    + 655        for mf_id in self.mass_features:
    + 656            self.mass_features[mf_id].mass_spectrum = self._ms[
    + 657                self.mass_features[mf_id].apex_scan
    + 658            ]
    + 659            self.mass_features[mf_id].update_mz()
    + 660
    + 661        # Re-process clustering if persistent homology is selected to remove duplicate mass features after adding and processing MS1 spectra
    + 662        if self.parameters.lc_ms.peak_picking_method == "persistent homology":
    + 663            self.cluster_mass_features(drop_children=True, sort_by="persistence")
    + 664
    + 665    def mass_features_to_df(self):
    + 666        """Returns a pandas dataframe summarizing the mass features.
    + 667
    + 668        The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity,
    + 669        persistence, area, monoisotopic_mf_id, and isotopologue_type.  The index is set to mf_id (mass feature ID).
    + 670
    + 671
    + 672        Returns
    + 673        --------
    + 674        pandas.DataFrame
    + 675            A pandas dataframe of mass features with the following columns:
    + 676            mf_id, mz, apex_scan, scan_time, intensity, persistence, area.
    + 677        """
    + 678
    + 679        def mass_spectrum_to_string(
    + 680            mass_spec, normalize=True, min_normalized_abun=0.01
    + 681        ):
    + 682            """Converts a mass spectrum to a string of m/z:abundance pairs.
      683
    - 684            Returns
    - 685            --------
    - 686            str
    - 687                A string of m/z:abundance pairs from the mass spectrum, separated by a semicolon.
    - 688            """
    - 689            mz_np = mass_spec.to_dataframe()["m/z"].values
    - 690            abun_np = mass_spec.to_dataframe()["Peak Height"].values
    - 691            if normalize:
    - 692                abun_np = abun_np / abun_np.max()
    - 693            mz_abun = np.column_stack((mz_np, abun_np))
    - 694            if normalize:
    - 695                mz_abun = mz_abun[mz_abun[:, 1] > min_normalized_abun]
    - 696            mz_abun_str = [
    - 697                str(round(mz, ndigits=4)) + ":" + str(round(abun, ndigits=2))
    - 698                for mz, abun in mz_abun
    - 699            ]
    - 700            return "; ".join(mz_abun_str)
    - 701
    - 702        cols_in_df = [
    - 703            "id",
    - 704            "_apex_scan",
    - 705            "start_scan",
    - 706            "final_scan",
    - 707            "_retention_time",
    - 708            "_intensity",
    - 709            "_persistence",
    - 710            "_area",
    - 711            "_dispersity_index",
    - 712            "_tailing_factor",
    - 713            "monoisotopic_mf_id",
    - 714            "isotopologue_type",
    - 715            "mass_spectrum_deconvoluted_parent",
    - 716        ]
    - 717        df_mf_list = []
    - 718        for mf_id in self.mass_features.keys():
    - 719            # Find cols_in_df that are in single_mf
    - 720            df_keys = list(
    - 721                set(cols_in_df).intersection(self.mass_features[mf_id].__dir__())
    - 722            )
    - 723            dict_mf = {}
    - 724            for key in df_keys:
    - 725                dict_mf[key] = getattr(self.mass_features[mf_id], key)
    - 726            if len(self.mass_features[mf_id].ms2_scan_numbers) > 0:
    - 727                # Add MS2 spectra info
    - 728                best_ms2_spectrum = self.mass_features[mf_id].best_ms2
    - 729                dict_mf["ms2_spectrum"] = mass_spectrum_to_string(best_ms2_spectrum)
    - 730            if len(self.mass_features[mf_id].associated_mass_features_deconvoluted) > 0:
    - 731                dict_mf["associated_mass_features"] = ", ".join(
    - 732                    map(
    - 733                        str,
    - 734                        self.mass_features[mf_id].associated_mass_features_deconvoluted,
    - 735                    )
    - 736                )
    - 737            if self.mass_features[mf_id]._half_height_width is not None:
    - 738                dict_mf["half_height_width"] = self.mass_features[
    - 739                    mf_id
    - 740                ].half_height_width
    - 741            # Check if EIC for mass feature is set
    - 742            df_mf_single = pd.DataFrame(dict_mf, index=[mf_id])
    - 743            df_mf_single["mz"] = self.mass_features[mf_id].mz
    - 744            df_mf_list.append(df_mf_single)
    - 745        df_mf = pd.concat(df_mf_list)
    - 746
    - 747        # rename _area to area and id to mf_id
    - 748        df_mf = df_mf.rename(
    - 749            columns={
    - 750                "_area": "area",
    - 751                "id": "mf_id",
    - 752                "_apex_scan": "apex_scan",
    - 753                "_retention_time": "scan_time",
    - 754                "_intensity": "intensity",
    - 755                "_persistence": "persistence",
    - 756                "_dispersity_index": "dispersity_index",
    - 757                "_tailing_factor": "tailing_factor",
    - 758            }
    - 759        )
    - 760
    - 761        # reorder columns
    - 762        col_order = [
    - 763            "mf_id",
    - 764            "scan_time",
    - 765            "mz",
    - 766            "apex_scan",
    - 767            "start_scan",
    - 768            "final_scan",
    - 769            "intensity",
    - 770            "persistence",
    - 771            "area",
    - 772            "half_height_width",
    - 773            "tailing_factor",
    - 774            "dispersity_index",
    - 775            "monoisotopic_mf_id",
    - 776            "isotopologue_type",
    - 777            "mass_spectrum_deconvoluted_parent",
    - 778            "associated_mass_features",
    - 779            "ms2_spectrum",
    - 780        ]
    - 781        # drop columns that are not in col_order
    - 782        cols_to_order = [col for col in col_order if col in df_mf.columns]
    - 783        df_mf = df_mf[cols_to_order]
    - 784
    - 785        # reset index to mf_id
    - 786        df_mf = df_mf.set_index("mf_id")
    - 787        df_mf.index.name = "mf_id"
    - 788
    - 789        return df_mf
    - 790
    - 791    def mass_features_ms1_annot_to_df(self):
    - 792        """Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.
    + 684            Parameters
    + 685            -----------
    + 686            mass_spec : MassSpectrum
    + 687                A MassSpectrum object to be converted to a string.
    + 688            normalize : bool, optional
    + 689                If True, normalizes the abundance values to a maximum of 1. Defaults to True.
    + 690            min_normalized_abun : float, optional
    + 691                The minimum normalized abundance value to include in the string, only used if normalize is True. Defaults to 0.01.
    + 692
    + 693            Returns
    + 694            --------
    + 695            str
    + 696                A string of m/z:abundance pairs from the mass spectrum, separated by a semicolon.
    + 697            """
    + 698            mz_np = mass_spec.to_dataframe()["m/z"].values
    + 699            abun_np = mass_spec.to_dataframe()["Peak Height"].values
    + 700            if normalize:
    + 701                abun_np = abun_np / abun_np.max()
    + 702            mz_abun = np.column_stack((mz_np, abun_np))
    + 703            if normalize:
    + 704                mz_abun = mz_abun[mz_abun[:, 1] > min_normalized_abun]
    + 705            mz_abun_str = [
    + 706                str(round(mz, ndigits=4)) + ":" + str(round(abun, ndigits=2))
    + 707                for mz, abun in mz_abun
    + 708            ]
    + 709            return "; ".join(mz_abun_str)
    + 710
    + 711        cols_in_df = [
    + 712            "id",
    + 713            "_apex_scan",
    + 714            "start_scan",
    + 715            "final_scan",
    + 716            "_retention_time",
    + 717            "_intensity",
    + 718            "_persistence",
    + 719            "_area",
    + 720            "_dispersity_index",
    + 721            "_tailing_factor",
    + 722            "monoisotopic_mf_id",
    + 723            "isotopologue_type",
    + 724            "mass_spectrum_deconvoluted_parent",
    + 725        ]
    + 726        df_mf_list = []
    + 727        for mf_id in self.mass_features.keys():
    + 728            # Find cols_in_df that are in single_mf
    + 729            df_keys = list(
    + 730                set(cols_in_df).intersection(self.mass_features[mf_id].__dir__())
    + 731            )
    + 732            dict_mf = {}
    + 733            for key in df_keys:
    + 734                dict_mf[key] = getattr(self.mass_features[mf_id], key)
    + 735            if len(self.mass_features[mf_id].ms2_scan_numbers) > 0:
    + 736                # Add MS2 spectra info
    + 737                best_ms2_spectrum = self.mass_features[mf_id].best_ms2
    + 738                dict_mf["ms2_spectrum"] = mass_spectrum_to_string(best_ms2_spectrum)
    + 739            if len(self.mass_features[mf_id].associated_mass_features_deconvoluted) > 0:
    + 740                dict_mf["associated_mass_features"] = ", ".join(
    + 741                    map(
    + 742                        str,
    + 743                        self.mass_features[mf_id].associated_mass_features_deconvoluted,
    + 744                    )
    + 745                )
    + 746            if self.mass_features[mf_id]._half_height_width is not None:
    + 747                dict_mf["half_height_width"] = self.mass_features[
    + 748                    mf_id
    + 749                ].half_height_width
    + 750            # Check if EIC for mass feature is set
    + 751            df_mf_single = pd.DataFrame(dict_mf, index=[mf_id])
    + 752            df_mf_single["mz"] = self.mass_features[mf_id].mz
    + 753            df_mf_list.append(df_mf_single)
    + 754        df_mf = pd.concat(df_mf_list)
    + 755
    + 756        # rename _area to area and id to mf_id
    + 757        df_mf = df_mf.rename(
    + 758            columns={
    + 759                "_area": "area",
    + 760                "id": "mf_id",
    + 761                "_apex_scan": "apex_scan",
    + 762                "_retention_time": "scan_time",
    + 763                "_intensity": "intensity",
    + 764                "_persistence": "persistence",
    + 765                "_dispersity_index": "dispersity_index",
    + 766                "_tailing_factor": "tailing_factor",
    + 767            }
    + 768        )
    + 769
    + 770        # reorder columns
    + 771        col_order = [
    + 772            "mf_id",
    + 773            "scan_time",
    + 774            "mz",
    + 775            "apex_scan",
    + 776            "start_scan",
    + 777            "final_scan",
    + 778            "intensity",
    + 779            "persistence",
    + 780            "area",
    + 781            "half_height_width",
    + 782            "tailing_factor",
    + 783            "dispersity_index",
    + 784            "monoisotopic_mf_id",
    + 785            "isotopologue_type",
    + 786            "mass_spectrum_deconvoluted_parent",
    + 787            "associated_mass_features",
    + 788            "ms2_spectrum",
    + 789        ]
    + 790        # drop columns that are not in col_order
    + 791        cols_to_order = [col for col in col_order if col in df_mf.columns]
    + 792        df_mf = df_mf[cols_to_order]
      793
    - 794        Returns
    - 795        --------
    - 796        pandas.DataFrame
    - 797            A pandas dataframe of MS1 annotations for the mass features in the dataset.
    - 798            The index is set to mf_id (mass feature ID)
    + 794        # reset index to mf_id
    + 795        df_mf = df_mf.set_index("mf_id")
    + 796        df_mf.index.name = "mf_id"
    + 797
    + 798        return df_mf
      799
    - 800        Raises
    - 801        ------
    - 802        Warning
    - 803            If no MS1 annotations were found for the mass features in the dataset.
    - 804        """
    - 805        annot_df_list_ms1 = []
    - 806        for mf_id in self.mass_features.keys():
    - 807            if self.mass_features[mf_id].mass_spectrum is None:
    - 808                pass
    - 809            else:
    - 810                # Add ms1 annotations to ms1 annotation list
    - 811                if (
    - 812                    np.abs(
    - 813                        (
    - 814                            self.mass_features[mf_id].ms1_peak.mz_exp
    - 815                            - self.mass_features[mf_id].mz
    - 816                        )
    - 817                    )
    - 818                    < 0.01
    - 819                ):
    - 820                    # Get the molecular formula from the mass spectrum
    - 821                    annot_df = self.mass_features[mf_id].mass_spectrum.to_dataframe()
    - 822                    # Subset to pull out only the peak associated with the mass feature
    - 823                    annot_df = annot_df[
    - 824                        annot_df["Index"] == self.mass_features[mf_id].ms1_peak.index
    - 825                    ].copy()
    - 826
    - 827                    # Remove the index column and add column for mf_id
    - 828                    annot_df = annot_df.drop(columns=["Index"])
    - 829                    annot_df["mf_id"] = mf_id
    - 830                    annot_df_list_ms1.append(annot_df)
    - 831
    - 832        if len(annot_df_list_ms1) > 0:
    - 833            annot_ms1_df_full = pd.concat(annot_df_list_ms1)
    - 834            annot_ms1_df_full = annot_ms1_df_full.set_index("mf_id")
    - 835            annot_ms1_df_full.index.name = "mf_id"
    - 836
    - 837        else:
    - 838            annot_ms1_df_full = None
    - 839            # Warn that no ms1 annotations were found
    - 840            warnings.warn(
    - 841                "No MS1 annotations found for mass features in dataset, were MS1 spectra added and processed within the dataset?",
    - 842                UserWarning
    - 843            )
    - 844
    - 845        return annot_ms1_df_full
    - 846
    - 847    def mass_features_ms2_annot_to_df(self, molecular_metadata=None):
    - 848        """Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.
    - 849
    - 850        Parameters
    - 851        -----------
    - 852        molecular_metadata :  dict of MolecularMetadata objects
    - 853            A dictionary of MolecularMetadata objects, keyed by metabref_mol_id.  Defaults to None.
    - 854
    - 855        Returns
    - 856        --------
    - 857        pandas.DataFrame
    - 858            A pandas dataframe of MS2 annotations for the mass features in the dataset, 
    - 859            and optionally molecular metadata. The index is set to mf_id (mass feature ID)
    - 860
    - 861        Raises
    - 862        ------
    - 863        Warning
    - 864            If no MS2 annotations were found for the mass features in the dataset.
    - 865        """
    - 866        annot_df_list_ms2 = []
    - 867        for mf_id in self.mass_features.keys():
    - 868            if len(self.mass_features[mf_id].ms2_similarity_results) > 0:
    - 869                # Add ms2 annotations to ms2 annotation list
    - 870                for result in self.mass_features[mf_id].ms2_similarity_results:
    - 871                    annot_df_ms2 = result.to_dataframe()
    - 872                    annot_df_ms2["mf_id"] = mf_id
    - 873                    annot_df_list_ms2.append(annot_df_ms2)
    - 874
    - 875        if len(annot_df_list_ms2) > 0:
    - 876            annot_ms2_df_full = pd.concat(annot_df_list_ms2)
    - 877            if molecular_metadata is not None:
    - 878                molecular_metadata_df = pd.concat(
    - 879                    [
    - 880                        pd.DataFrame.from_dict(v.__dict__, orient="index").transpose()
    - 881                        for k, v in molecular_metadata.items()
    - 882                    ],
    - 883                    ignore_index=True,
    - 884                )
    - 885                molecular_metadata_df = molecular_metadata_df.rename(
    - 886                    columns={"id": "ref_mol_id"}
    - 887                )
    - 888                annot_ms2_df_full = annot_ms2_df_full.merge(
    - 889                    molecular_metadata_df, on="ref_mol_id", how="left"
    - 890                )
    - 891            annot_ms2_df_full = annot_ms2_df_full.drop_duplicates(
    - 892                subset=["mf_id", "query_spectrum_id", "ref_ms_id"]
    - 893            ).copy()
    - 894            annot_ms2_df_full = annot_ms2_df_full.set_index("mf_id")
    - 895            annot_ms2_df_full.index.name = "mf_id"
    - 896        else:
    - 897            annot_ms2_df_full = None
    - 898            # Warn that no ms2 annotations were found
    - 899            warnings.warn(
    - 900                "No MS2 annotations found for mass features in dataset, were MS2 spectra added and searched against a database?",
    - 901                UserWarning
    - 902            )
    - 903
    - 904        return annot_ms2_df_full
    - 905
    - 906    def __len__(self):
    - 907        """
    - 908        Returns the number of mass spectra in the dataset.
    - 909
    - 910        Returns
    - 911        --------
    - 912        int
    - 913            The number of mass spectra in the dataset.
    - 914        """
    - 915        return len(self._ms)
    - 916
    - 917    def __getitem__(self, scan_number):
    - 918        """
    - 919        Returns the mass spectrum corresponding to the specified scan number.
    - 920
    - 921        Parameters
    - 922        -----------
    - 923        scan_number : int
    - 924            The scan number of the desired mass spectrum.
    + 800    def mass_features_ms1_annot_to_df(self):
    + 801        """Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.
    + 802
    + 803        Returns
    + 804        --------
    + 805        pandas.DataFrame
    + 806            A pandas dataframe of MS1 annotations for the mass features in the dataset.
    + 807            The index is set to mf_id (mass feature ID)
    + 808
    + 809        Raises
    + 810        ------
    + 811        Warning
    + 812            If no MS1 annotations were found for the mass features in the dataset.
    + 813        """
    + 814        annot_df_list_ms1 = []
    + 815        for mf_id in self.mass_features.keys():
    + 816            if self.mass_features[mf_id].mass_spectrum is None:
    + 817                pass
    + 818            else:
    + 819                # Add ms1 annotations to ms1 annotation list
    + 820                if (
    + 821                    np.abs(
    + 822                        (
    + 823                            self.mass_features[mf_id].ms1_peak.mz_exp
    + 824                            - self.mass_features[mf_id].mz
    + 825                        )
    + 826                    )
    + 827                    < 0.01
    + 828                ):
    + 829                    # Get the molecular formula from the mass spectrum
    + 830                    annot_df = self.mass_features[mf_id].mass_spectrum.to_dataframe()
    + 831                    # Subset to pull out only the peak associated with the mass feature
    + 832                    annot_df = annot_df[
    + 833                        annot_df["Index"] == self.mass_features[mf_id].ms1_peak.index
    + 834                    ].copy()
    + 835
    + 836                    # Remove the index column and add column for mf_id
    + 837                    annot_df = annot_df.drop(columns=["Index"])
    + 838                    annot_df["mf_id"] = mf_id
    + 839                    annot_df_list_ms1.append(annot_df)
    + 840
    + 841        if len(annot_df_list_ms1) > 0:
    + 842            annot_ms1_df_full = pd.concat(annot_df_list_ms1)
    + 843            annot_ms1_df_full = annot_ms1_df_full.set_index("mf_id")
    + 844            annot_ms1_df_full.index.name = "mf_id"
    + 845
    + 846        else:
    + 847            annot_ms1_df_full = None
    + 848            # Warn that no ms1 annotations were found
    + 849            warnings.warn(
    + 850                "No MS1 annotations found for mass features in dataset, were MS1 spectra added and processed within the dataset?",
    + 851                UserWarning,
    + 852            )
    + 853
    + 854        return annot_ms1_df_full
    + 855
    + 856    def mass_features_ms2_annot_to_df(self, molecular_metadata=None):
    + 857        """Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.
    + 858
    + 859        Parameters
    + 860        -----------
    + 861        molecular_metadata :  dict of MolecularMetadata objects
    + 862            A dictionary of MolecularMetadata objects, keyed by metabref_mol_id.  Defaults to None.
    + 863
    + 864        Returns
    + 865        --------
    + 866        pandas.DataFrame
    + 867            A pandas dataframe of MS2 annotations for the mass features in the dataset,
    + 868            and optionally molecular metadata. The index is set to mf_id (mass feature ID)
    + 869
    + 870        Raises
    + 871        ------
    + 872        Warning
    + 873            If no MS2 annotations were found for the mass features in the dataset.
    + 874        """
    + 875        annot_df_list_ms2 = []
    + 876        for mf_id in self.mass_features.keys():
    + 877            if len(self.mass_features[mf_id].ms2_similarity_results) > 0:
    + 878                # Add ms2 annotations to ms2 annotation list
    + 879                for result in self.mass_features[mf_id].ms2_similarity_results:
    + 880                    annot_df_ms2 = result.to_dataframe()
    + 881                    annot_df_ms2["mf_id"] = mf_id
    + 882                    annot_df_list_ms2.append(annot_df_ms2)
    + 883
    + 884        if len(annot_df_list_ms2) > 0:
    + 885            annot_ms2_df_full = pd.concat(annot_df_list_ms2)
    + 886            if molecular_metadata is not None:
    + 887                molecular_metadata_df = pd.concat(
    + 888                    [
    + 889                        pd.DataFrame.from_dict(v.__dict__, orient="index").transpose()
    + 890                        for k, v in molecular_metadata.items()
    + 891                    ],
    + 892                    ignore_index=True,
    + 893                )
    + 894                molecular_metadata_df = molecular_metadata_df.rename(
    + 895                    columns={"id": "ref_mol_id"}
    + 896                )
    + 897                annot_ms2_df_full = annot_ms2_df_full.merge(
    + 898                    molecular_metadata_df, on="ref_mol_id", how="left"
    + 899                )
    + 900            annot_ms2_df_full = annot_ms2_df_full.drop_duplicates(
    + 901                subset=["mf_id", "query_spectrum_id", "ref_ms_id"]
    + 902            ).copy()
    + 903            annot_ms2_df_full = annot_ms2_df_full.set_index("mf_id")
    + 904            annot_ms2_df_full.index.name = "mf_id"
    + 905        else:
    + 906            annot_ms2_df_full = None
    + 907            # Warn that no ms2 annotations were found
    + 908            warnings.warn(
    + 909                "No MS2 annotations found for mass features in dataset, were MS2 spectra added and searched against a database?",
    + 910                UserWarning,
    + 911            )
    + 912
    + 913        return annot_ms2_df_full
    + 914
    + 915    def __len__(self):
    + 916        """
    + 917        Returns the number of mass spectra in the dataset.
    + 918
    + 919        Returns
    + 920        --------
    + 921        int
    + 922            The number of mass spectra in the dataset.
    + 923        """
    + 924        return len(self._ms)
      925
    - 926        Returns
    - 927        --------
    - 928        MassSpectrum
    - 929            The mass spectrum corresponding to the specified scan number.
    - 930        """
    - 931        return self._ms.get(scan_number)
    - 932
    - 933    def __iter__(self):
    - 934        """Returns an iterator over the mass spectra in the dataset.
    - 935
    - 936        Returns
    - 937        --------
    - 938        iterator
    - 939            An iterator over the mass spectra in the dataset.
    - 940        """
    - 941        return iter(self._ms.values())
    - 942
    - 943    def set_tic_list_from_data(self, overwrite=False):
    - 944        """Sets the TIC list from the mass spectrum objects within the _ms dictionary.
    - 945
    - 946        Parameters
    - 947        -----------
    - 948        overwrite : bool, optional
    - 949            If True, overwrites the TIC list if it is already set. Defaults to False.
    - 950
    - 951        Notes
    - 952        -----
    - 953        If the _ms dictionary is incomplete, sets the TIC list to an empty list.
    + 926    def __getitem__(self, scan_number):
    + 927        """
    + 928        Returns the mass spectrum corresponding to the specified scan number.
    + 929
    + 930        Parameters
    + 931        -----------
    + 932        scan_number : int
    + 933            The scan number of the desired mass spectrum.
    + 934
    + 935        Returns
    + 936        --------
    + 937        MassSpectrum
    + 938            The mass spectrum corresponding to the specified scan number.
    + 939        """
    + 940        return self._ms.get(scan_number)
    + 941
    + 942    def __iter__(self):
    + 943        """Returns an iterator over the mass spectra in the dataset.
    + 944
    + 945        Returns
    + 946        --------
    + 947        iterator
    + 948            An iterator over the mass spectra in the dataset.
    + 949        """
    + 950        return iter(self._ms.values())
    + 951
    + 952    def set_tic_list_from_data(self, overwrite=False):
    + 953        """Sets the TIC list from the mass spectrum objects within the _ms dictionary.
      954
    - 955        Raises
    - 956        ------
    - 957        ValueError
    - 958            If no mass spectra are found in the dataset.
    - 959            If the TIC list is already set and overwrite is False.
    - 960        """
    - 961        # Check if _ms is empty and raise error if so
    - 962        if len(self._ms) == 0:
    - 963            raise ValueError("No mass spectra found in dataset")
    - 964
    - 965        # Check if tic_list is already set and raise error if so
    - 966        if len(self.tic) > 0 and not overwrite:
    - 967            raise ValueError("TIC list already set, use overwrite=True to overwrite")
    - 968
    - 969        self.tic = [self._ms.get(i).tic for i in self.scans_number]
    - 970
    - 971    def set_retention_time_from_data(self, overwrite=False):
    - 972        """Sets the retention time list from the data in the _ms dictionary.
    + 955        Parameters
    + 956        -----------
    + 957        overwrite : bool, optional
    + 958            If True, overwrites the TIC list if it is already set. Defaults to False.
    + 959
    + 960        Notes
    + 961        -----
    + 962        If the _ms dictionary is incomplete, sets the TIC list to an empty list.
    + 963
    + 964        Raises
    + 965        ------
    + 966        ValueError
    + 967            If no mass spectra are found in the dataset.
    + 968            If the TIC list is already set and overwrite is False.
    + 969        """
    + 970        # Check if _ms is empty and raise error if so
    + 971        if len(self._ms) == 0:
    + 972            raise ValueError("No mass spectra found in dataset")
      973
    - 974        Parameters
    - 975        -----------
    - 976        overwrite : bool, optional
    - 977            If True, overwrites the retention time list if it is already set. Defaults to False.
    - 978
    - 979        Notes
    - 980        -----
    - 981        If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.
    + 974        # Check if tic_list is already set and raise error if so
    + 975        if len(self.tic) > 0 and not overwrite:
    + 976            raise ValueError("TIC list already set, use overwrite=True to overwrite")
    + 977
    + 978        self.tic = [self._ms.get(i).tic for i in self.scans_number]
    + 979
    + 980    def set_retention_time_from_data(self, overwrite=False):
    + 981        """Sets the retention time list from the data in the _ms dictionary.
      982
    - 983        Raises
    - 984        ------
    - 985        ValueError
    - 986            If no mass spectra are found in the dataset.
    - 987            If the retention time list is already set and overwrite is False.
    - 988        """
    - 989        # Check if _ms is empty and raise error if so
    - 990        if len(self._ms) == 0:
    - 991            raise ValueError("No mass spectra found in dataset")
    - 992
    - 993        # Check if retention_time_list is already set and raise error if so
    - 994        if len(self.retention_time) > 0 and not overwrite:
    - 995            raise ValueError(
    - 996                "Retention time list already set, use overwrite=True to overwrite"
    - 997            )
    - 998
    - 999        retention_time_list = []
    -1000        for key_ms in sorted(self._ms.keys()):
    -1001            retention_time_list.append(self._ms.get(key_ms).retention_time)
    -1002        self.retention_time = retention_time_list
    -1003
    -1004    def set_scans_number_from_data(self, overwrite=False):
    -1005        """Sets the scan number list from the data in the _ms dictionary.
    -1006
    -1007        Notes
    -1008        -----
    -1009        If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.
    -1010
    -1011        Raises
    -1012        ------
    -1013        ValueError
    -1014            If no mass spectra are found in the dataset.
    -1015            If the scan number list is already set and overwrite is False.
    -1016        """
    -1017        # Check if _ms is empty and raise error if so
    -1018        if len(self._ms) == 0:
    -1019            raise ValueError("No mass spectra found in dataset")
    -1020
    -1021        # Check if scans_number_list is already set and raise error if so
    -1022        if len(self.scans_number) > 0 and not overwrite:
    -1023            raise ValueError(
    -1024                "Scan number list already set, use overwrite=True to overwrite"
    -1025            )
    -1026
    -1027        self.scans_number = sorted(self._ms.keys())
    -1028
    -1029    @property
    -1030    def ms1_scans(self):
    -1031        """
    -1032        list : A list of MS1 scan numbers for the dataset.
    -1033        """
    -1034        return self.scan_df[self.scan_df.ms_level == 1].index.tolist()
    + 983        Parameters
    + 984        -----------
    + 985        overwrite : bool, optional
    + 986            If True, overwrites the retention time list if it is already set. Defaults to False.
    + 987
    + 988        Notes
    + 989        -----
    + 990        If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.
    + 991
    + 992        Raises
    + 993        ------
    + 994        ValueError
    + 995            If no mass spectra are found in the dataset.
    + 996            If the retention time list is already set and overwrite is False.
    + 997        """
    + 998        # Check if _ms is empty and raise error if so
    + 999        if len(self._ms) == 0:
    +1000            raise ValueError("No mass spectra found in dataset")
    +1001
    +1002        # Check if retention_time_list is already set and raise error if so
    +1003        if len(self.retention_time) > 0 and not overwrite:
    +1004            raise ValueError(
    +1005                "Retention time list already set, use overwrite=True to overwrite"
    +1006            )
    +1007
    +1008        retention_time_list = []
    +1009        for key_ms in sorted(self._ms.keys()):
    +1010            retention_time_list.append(self._ms.get(key_ms).retention_time)
    +1011        self.retention_time = retention_time_list
    +1012
    +1013    def set_scans_number_from_data(self, overwrite=False):
    +1014        """Sets the scan number list from the data in the _ms dictionary.
    +1015
    +1016        Notes
    +1017        -----
    +1018        If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.
    +1019
    +1020        Raises
    +1021        ------
    +1022        ValueError
    +1023            If no mass spectra are found in the dataset.
    +1024            If the scan number list is already set and overwrite is False.
    +1025        """
    +1026        # Check if _ms is empty and raise error if so
    +1027        if len(self._ms) == 0:
    +1028            raise ValueError("No mass spectra found in dataset")
    +1029
    +1030        # Check if scans_number_list is already set and raise error if so
    +1031        if len(self.scans_number) > 0 and not overwrite:
    +1032            raise ValueError(
    +1033                "Scan number list already set, use overwrite=True to overwrite"
    +1034            )
     1035
    -1036    @property
    -1037    def parameters(self):
    -1038        """
    -1039        LCMSParameters : The parameters used for the LC-MS analysis.
    -1040        """
    -1041        return self._parameters
    -1042
    -1043    @parameters.setter
    -1044    def parameters(self, paramsinstance):
    -1045        """
    -1046        Sets the parameters used for the LC-MS analysis.
    -1047
    -1048        Parameters
    -1049        -----------
    -1050        paramsinstance : LCMSParameters
    -1051            The parameters used for the LC-MS analysis.
    -1052        """
    -1053        self._parameters = paramsinstance
    -1054
    -1055    @property
    -1056    def scans_number(self):
    -1057        """
    -1058        list : A list of scan numbers for the dataset.
    -1059        """
    -1060        return self._scans_number_list
    -1061
    -1062    @scans_number.setter
    -1063    def scans_number(self, scan_numbers_list):
    -1064        """
    -1065        Sets the scan numbers for the dataset.
    -1066
    -1067        Parameters
    -1068        -----------
    -1069        scan_numbers_list : list
    -1070            A list of scan numbers for the dataset.
    -1071        """
    -1072        self._scans_number_list = scan_numbers_list
    -1073
    -1074    @property
    -1075    def retention_time(self):
    -1076        """
    -1077        numpy.ndarray : An array of retention times for the dataset.
    -1078        """
    -1079        return self._retention_time_list
    -1080
    -1081    @retention_time.setter
    -1082    def retention_time(self, rt_list):
    -1083        """
    -1084        Sets the retention times for the dataset.
    -1085
    -1086        Parameters
    -1087        -----------
    -1088        rt_list : list
    -1089            A list of retention times for the dataset.
    -1090        """
    -1091        self._retention_time_list = np.array(rt_list)
    -1092
    -1093    @property
    -1094    def tic(self):
    -1095        """
    -1096        numpy.ndarray : An array of TIC values for the dataset.
    -1097        """
    -1098        return self._tic_list
    -1099
    -1100    @tic.setter
    -1101    def tic(self, tic_list):
    -1102        """
    -1103        Sets the TIC values for the dataset.
    -1104
    -1105        Parameters
    -1106        -----------
    -1107        tic_list : list
    -1108            A list of TIC values for the dataset.
    -1109        """
    -1110        self._tic_list = np.array(tic_list)
    +1036        self.scans_number = sorted(self._ms.keys())
    +1037
    +1038    @property
    +1039    def ms1_scans(self):
    +1040        """
    +1041        list : A list of MS1 scan numbers for the dataset.
    +1042        """
    +1043        return self.scan_df[self.scan_df.ms_level == 1].index.tolist()
    +1044
    +1045    @property
    +1046    def parameters(self):
    +1047        """
    +1048        LCMSParameters : The parameters used for the LC-MS analysis.
    +1049        """
    +1050        return self._parameters
    +1051
    +1052    @parameters.setter
    +1053    def parameters(self, paramsinstance):
    +1054        """
    +1055        Sets the parameters used for the LC-MS analysis.
    +1056
    +1057        Parameters
    +1058        -----------
    +1059        paramsinstance : LCMSParameters
    +1060            The parameters used for the LC-MS analysis.
    +1061        """
    +1062        self._parameters = paramsinstance
    +1063
    +1064    @property
    +1065    def scans_number(self):
    +1066        """
    +1067        list : A list of scan numbers for the dataset.
    +1068        """
    +1069        return self._scans_number_list
    +1070
    +1071    @scans_number.setter
    +1072    def scans_number(self, scan_numbers_list):
    +1073        """
    +1074        Sets the scan numbers for the dataset.
    +1075
    +1076        Parameters
    +1077        -----------
    +1078        scan_numbers_list : list
    +1079            A list of scan numbers for the dataset.
    +1080        """
    +1081        self._scans_number_list = scan_numbers_list
    +1082
    +1083    @property
    +1084    def retention_time(self):
    +1085        """
    +1086        numpy.ndarray : An array of retention times for the dataset.
    +1087        """
    +1088        return self._retention_time_list
    +1089
    +1090    @retention_time.setter
    +1091    def retention_time(self, rt_list):
    +1092        """
    +1093        Sets the retention times for the dataset.
    +1094
    +1095        Parameters
    +1096        -----------
    +1097        rt_list : list
    +1098            A list of retention times for the dataset.
    +1099        """
    +1100        self._retention_time_list = np.array(rt_list)
    +1101
    +1102    @property
    +1103    def tic(self):
    +1104        """
    +1105        numpy.ndarray : An array of TIC values for the dataset.
    +1106        """
    +1107        return self._tic_list
    +1108
    +1109    @tic.setter
    +1110    def tic(self, tic_list):
    +1111        """
    +1112        Sets the TIC values for the dataset.
    +1113
    +1114        Parameters
    +1115        -----------
    +1116        tic_list : list
    +1117            A list of TIC values for the dataset.
    +1118        """
    +1119        self._tic_list = np.array(tic_list)
     
    @@ -2924,25 +2941,25 @@
    Methods
    -
    377    def __init__(
    -378        self,
    -379        file_location,
    -380        analyzer="Unknown",
    -381        instrument_label="Unknown",
    -382        sample_name=None,
    -383        spectra_parser=None,
    -384    ):
    -385        super().__init__(
    -386            file_location, analyzer, instrument_label, sample_name, spectra_parser
    -387        )
    -388        self.polarity = ""
    -389        self._parameters = LCMSParameters()
    -390        self._retention_time_list = []
    -391        self._scans_number_list = []
    -392        self._tic_list = []
    -393        self.eics = {}
    -394        self.mass_features = {}
    -395        self.spectral_search_results = {}
    +            
    378    def __init__(
    +379        self,
    +380        file_location,
    +381        analyzer="Unknown",
    +382        instrument_label="Unknown",
    +383        sample_name=None,
    +384        spectra_parser=None,
    +385    ):
    +386        super().__init__(
    +387            file_location, analyzer, instrument_label, sample_name, spectra_parser
    +388        )
    +389        self.polarity = ""
    +390        self._parameters = LCMSParameters()
    +391        self._retention_time_list = []
    +392        self._scans_number_list = []
    +393        self._tic_list = []
    +394        self.eics = {}
    +395        self.mass_features = {}
    +396        self.spectral_search_results = {}
     
    @@ -3004,15 +3021,15 @@
    Methods
    -
    397    def get_parameters_json(self):
    -398        """Returns the parameters stored for the LC-MS object in JSON format.
    -399
    -400        Returns
    -401        --------
    -402        str
    -403            The parameters used for the LC-MS analysis in JSON format.
    -404        """
    -405        return self.parameters.to_json()
    +            
    398    def get_parameters_json(self):
    +399        """Returns the parameters stored for the LC-MS object in JSON format.
    +400
    +401        Returns
    +402        --------
    +403        str
    +404            The parameters used for the LC-MS analysis in JSON format.
    +405        """
    +406        return self.parameters.to_json()
     
    @@ -3038,29 +3055,29 @@
    Returns
    -
    407    def remove_unprocessed_data(self, ms_level=None):
    -408        """Removes the unprocessed data from the LCMSBase object.
    -409
    -410        Parameters
    -411        -----------
    -412        ms_level : int, optional
    -413            The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
    -414
    -415        Raises
    -416        ------
    -417        ValueError
    -418            If ms_level is not 1 or 2.
    -419
    -420        Notes
    -421        -----
    -422        This method is useful for freeing up memory after the data has been processed.
    -423        """
    -424        if ms_level is None:
    -425            for ms_level in self._ms_unprocessed.keys():
    -426                self._ms_unprocessed[ms_level] = None
    -427        if ms_level not in [1, 2]:
    -428            raise ValueError("ms_level must be 1 or 2")
    -429        self._ms_unprocessed[ms_level] = None
    +            
    408    def remove_unprocessed_data(self, ms_level=None):
    +409        """Removes the unprocessed data from the LCMSBase object.
    +410
    +411        Parameters
    +412        -----------
    +413        ms_level : int, optional
    +414            The MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
    +415
    +416        Raises
    +417        ------
    +418        ValueError
    +419            If ms_level is not 1 or 2.
    +420
    +421        Notes
    +422        -----
    +423        This method is useful for freeing up memory after the data has been processed.
    +424        """
    +425        if ms_level is None:
    +426            for ms_level in self._ms_unprocessed.keys():
    +427                self._ms_unprocessed[ms_level] = None
    +428        if ms_level not in [1, 2]:
    +429            raise ValueError("ms_level must be 1 or 2")
    +430        self._ms_unprocessed[ms_level] = None
     
    @@ -3097,92 +3114,100 @@
    Notes
    -
    431    def add_associated_ms2_dda(
    -432        self, auto_process=True, use_parser=True, spectrum_mode=None, ms_params_key="ms2", scan_filter=None
    -433    ):
    -434        """Add MS2 spectra associated with mass features to the dataset.
    -435
    -436        Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)
    -437
    -438        Parameters
    -439        -----------
    -440        auto_process : bool, optional
    -441            If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
    -442        use_parser : bool, optional
    -443            If True, envoke the spectra parser to get the MS2 spectra. Default is True.
    -444        spectrum_mode : str or None, optional
    -445            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
    -446            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    -447            Defaults to None. (faster if defined, otherwise will check each scan)
    -448        ms_params_key : string, optional
    -449            The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute.
    -450            Defaults to 'ms2'.
    -451        scan_filter : str
    -452            A string to filter the scans to add to the _ms dictionary.  If None, all scans are added.  Defaults to None.
    -453            "hcd" will pull out only HCD scans.
    -454
    -455        Raises
    -456        ------
    -457        ValueError
    -458            If mass_features is not set, must run find_mass_features() first.
    -459            If no MS2 scans are found in the dataset.
    -460            If no precursor m/z values are found in MS2 scans, not a DDA dataset.
    -461        """
    -462        # Check if mass_features is set, raise error if not
    -463        if self.mass_features is None:
    -464            raise ValueError(
    -465                "mass_features not set, must run find_mass_features() first"
    -466            )
    -467        
    -468        # reconfigure ms_params to get the correct mass spectrum parameters from the key
    -469        ms_params = self.parameters.mass_spectrum[ms_params_key]
    -470
    -471        mf_df = self.mass_features_to_df().copy()
    -472        # Find ms2 scans that have a precursor m/z value
    -473        ms2_scans = self.scan_df[self.scan_df.ms_level == 2]
    -474        ms2_scans = ms2_scans[~ms2_scans.precursor_mz.isna()]
    -475        # drop ms2 scans that have no tic
    -476        ms2_scans = ms2_scans[ms2_scans.tic > 0]
    -477        if ms2_scans is None:
    -478            raise ValueError("No DDA scans found in dataset")
    -479
    -480        if scan_filter is not None:
    -481            ms2_scans = ms2_scans[ms2_scans.scan_text.str.contains(scan_filter)]
    -482        # set tolerance in rt space (in minutes) and mz space (in daltons)
    -483        time_tol = self.parameters.lc_ms.ms2_dda_rt_tolerance
    -484        mz_tol = self.parameters.lc_ms.ms2_dda_mz_tolerance
    +            
    432    def add_associated_ms2_dda(
    +433        self,
    +434        auto_process=True,
    +435        use_parser=True,
    +436        spectrum_mode=None,
    +437        ms_params_key="ms2",
    +438        scan_filter=None,
    +439    ):
    +440        """Add MS2 spectra associated with mass features to the dataset.
    +441
    +442        Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)
    +443
    +444        Parameters
    +445        -----------
    +446        auto_process : bool, optional
    +447            If True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
    +448        use_parser : bool, optional
    +449            If True, envoke the spectra parser to get the MS2 spectra. Default is True.
    +450        spectrum_mode : str or None, optional
    +451            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
    +452            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    +453            Defaults to None. (faster if defined, otherwise will check each scan)
    +454        ms_params_key : string, optional
    +455            The key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute.
    +456            Defaults to 'ms2'.
    +457        scan_filter : str
    +458            A string to filter the scans to add to the _ms dictionary.  If None, all scans are added.  Defaults to None.
    +459            "hcd" will pull out only HCD scans.
    +460
    +461        Raises
    +462        ------
    +463        ValueError
    +464            If mass_features is not set, must run find_mass_features() first.
    +465            If no MS2 scans are found in the dataset.
    +466            If no precursor m/z values are found in MS2 scans, not a DDA dataset.
    +467        """
    +468        # Check if mass_features is set, raise error if not
    +469        if self.mass_features is None:
    +470            raise ValueError(
    +471                "mass_features not set, must run find_mass_features() first"
    +472            )
    +473
    +474        # reconfigure ms_params to get the correct mass spectrum parameters from the key
    +475        ms_params = self.parameters.mass_spectrum[ms_params_key]
    +476
    +477        mf_df = self.mass_features_to_df().copy()
    +478        # Find ms2 scans that have a precursor m/z value
    +479        ms2_scans = self.scan_df[self.scan_df.ms_level == 2]
    +480        ms2_scans = ms2_scans[~ms2_scans.precursor_mz.isna()]
    +481        # drop ms2 scans that have no tic
    +482        ms2_scans = ms2_scans[ms2_scans.tic > 0]
    +483        if ms2_scans is None:
    +484            raise ValueError("No DDA scans found in dataset")
     485
    -486        # for each mass feature, find the ms2 scans that are within the roi scan time and mz range
    -487        dda_scans = []
    -488        for i, row in mf_df.iterrows():
    -489            ms2_scans_filtered = ms2_scans[
    -490                ms2_scans.scan_time.between(
    -491                    row.scan_time - time_tol, row.scan_time + time_tol
    -492                )
    -493            ]
    -494            ms2_scans_filtered = ms2_scans_filtered[
    -495                ms2_scans_filtered.precursor_mz.between(
    -496                    row.mz - mz_tol, row.mz + mz_tol
    -497                )
    -498            ]
    -499            dda_scans = dda_scans + ms2_scans_filtered.scan.tolist()
    -500            self.mass_features[i].ms2_scan_numbers = ms2_scans_filtered.scan.tolist() + self.mass_features[i].ms2_scan_numbers
    -501        # add to _ms attribute
    -502        self.add_mass_spectra(
    -503            scan_list=list(set(dda_scans)),
    -504            auto_process=auto_process,
    -505            spectrum_mode=spectrum_mode,
    -506            use_parser=use_parser,
    -507            ms_params=ms_params,
    -508        )
    -509        # associate appropriate _ms attribute to appropriate mass feature's ms2_mass_spectra attribute
    -510        for mf_id in self.mass_features:
    -511            if self.mass_features[mf_id].ms2_scan_numbers is not None:
    -512                for dda_scan in self.mass_features[mf_id].ms2_scan_numbers:
    -513                    if dda_scan in self._ms.keys():
    -514                        self.mass_features[mf_id].ms2_mass_spectra[dda_scan] = self._ms[
    -515                            dda_scan
    -516                        ]
    +486        if scan_filter is not None:
    +487            ms2_scans = ms2_scans[ms2_scans.scan_text.str.contains(scan_filter)]
    +488        # set tolerance in rt space (in minutes) and mz space (in daltons)
    +489        time_tol = self.parameters.lc_ms.ms2_dda_rt_tolerance
    +490        mz_tol = self.parameters.lc_ms.ms2_dda_mz_tolerance
    +491
    +492        # for each mass feature, find the ms2 scans that are within the roi scan time and mz range
    +493        dda_scans = []
    +494        for i, row in mf_df.iterrows():
    +495            ms2_scans_filtered = ms2_scans[
    +496                ms2_scans.scan_time.between(
    +497                    row.scan_time - time_tol, row.scan_time + time_tol
    +498                )
    +499            ]
    +500            ms2_scans_filtered = ms2_scans_filtered[
    +501                ms2_scans_filtered.precursor_mz.between(
    +502                    row.mz - mz_tol, row.mz + mz_tol
    +503                )
    +504            ]
    +505            dda_scans = dda_scans + ms2_scans_filtered.scan.tolist()
    +506            self.mass_features[i].ms2_scan_numbers = (
    +507                ms2_scans_filtered.scan.tolist()
    +508                + self.mass_features[i].ms2_scan_numbers
    +509            )
    +510        # add to _ms attribute
    +511        self.add_mass_spectra(
    +512            scan_list=list(set(dda_scans)),
    +513            auto_process=auto_process,
    +514            spectrum_mode=spectrum_mode,
    +515            use_parser=use_parser,
    +516            ms_params=ms_params,
    +517        )
    +518        # associate appropriate _ms attribute to appropriate mass feature's ms2_mass_spectra attribute
    +519        for mf_id in self.mass_features:
    +520            if self.mass_features[mf_id].ms2_scan_numbers is not None:
    +521                for dda_scan in self.mass_features[mf_id].ms2_scan_numbers:
    +522                    if dda_scan in self._ms.keys():
    +523                        self.mass_features[mf_id].ms2_mass_spectra[dda_scan] = self._ms[
    +524                            dda_scan
    +525                        ]
     
    @@ -3231,143 +3256,143 @@
    Raises
    -
    518    def add_associated_ms1(
    -519        self, auto_process=True, use_parser=True, spectrum_mode=None
    -520    ):
    -521        """Add MS1 spectra associated with mass features to the dataset.
    -522
    -523        Parameters
    -524        -----------
    -525        auto_process : bool, optional
    -526            If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
    -527        use_parser : bool, optional
    -528            If True, envoke the spectra parser to get the MS1 spectra. Default is True.
    -529        spectrum_mode : str or None, optional
    -530            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
    -531            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    -532            Defaults to None. (faster if defined, otherwise will check each scan)
    -533
    -534        Raises
    -535        ------
    -536        ValueError
    -537            If mass_features is not set, must run find_mass_features() first.
    -538            If apex scans are not profile mode, all apex scans must be profile mode for averaging.
    -539            If number of scans to average is not  1 or an integer with an integer median (i.e. 3, 5, 7, 9).
    -540            If deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
    -541        """
    -542        # Check if mass_features is set, raise error if not
    -543        if self.mass_features is None:
    -544            raise ValueError(
    -545                "mass_features not set, must run find_mass_features() first"
    -546            )
    -547        scans_to_average = self.parameters.lc_ms.ms1_scans_to_average
    -548
    -549        if scans_to_average == 1:
    -550            # Add to LCMSobj
    -551            self.add_mass_spectra(
    -552                scan_list=[
    -553                    int(x) for x in self.mass_features_to_df().apex_scan.tolist()
    -554                ],
    -555                auto_process=auto_process,
    -556                use_parser=use_parser,
    -557                spectrum_mode=spectrum_mode,
    -558                ms_params=self.parameters.mass_spectrum["ms1"],
    -559            )
    -560
    -561        elif (
    -562            (scans_to_average - 1) % 2
    -563        ) == 0:  # scans_to_average = 3, 5, 7 etc, mirror l/r around apex
    -564            apex_scans = list(set(self.mass_features_to_df().apex_scan.tolist()))
    -565            # Check if all apex scans are profile mode, raise error if not
    -566            if not all(self.scan_df.loc[apex_scans, "ms_format"] == "profile"):
    -567                raise ValueError("All apex scans must be profile mode for averaging")
    -568
    -569            # First get sets of scans to average
    -570            def get_scans_from_apex(ms1_scans, apex_scan, scans_to_average):
    -571                ms1_idx_start = ms1_scans.index(apex_scan) - int(
    -572                    (scans_to_average - 1) / 2
    -573                )
    -574                if ms1_idx_start < 0:
    -575                    ms1_idx_start = 0
    -576                ms1_idx_end = (
    -577                    ms1_scans.index(apex_scan) + int((scans_to_average - 1) / 2) + 1
    -578                )
    -579                if ms1_idx_end > (len(ms1_scans) - 1):
    -580                    ms1_idx_end = len(ms1_scans) - 1
    -581                scan_list = ms1_scans[ms1_idx_start:ms1_idx_end]
    -582                return scan_list
    -583
    -584            ms1_scans = self.ms1_scans
    -585            scans_lists = [
    -586                get_scans_from_apex(ms1_scans, apex_scan, scans_to_average)
    -587                for apex_scan in apex_scans
    -588            ]
    -589
    -590            # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
    -591            if self.polarity == "negative":
    -592                polarity = -1
    -593            elif self.polarity == "positive":
    -594                polarity = 1
    -595
    -596            if not use_parser:
    -597                # Perform checks and prepare _ms_unprocessed dictionary if use_parser is False (saves time to do this once)
    -598                ms1_unprocessed = self._ms_unprocessed[1].copy()
    -599                # Set the index on _ms_unprocessed[1] to scan number
    -600                ms1_unprocessed = ms1_unprocessed.set_index("scan", drop=False)
    -601                self._ms_unprocessed[1] = ms1_unprocessed
    -602
    -603                # Check that all the scans in scan_lists are indexs in self._ms_unprocessed[1]
    -604                scans_lists_flat = list(
    -605                    set([scan for sublist in scans_lists for scan in sublist])
    -606                )
    -607                if (
    -608                    len(
    -609                        np.setdiff1d(
    -610                            np.sort(scans_lists_flat),
    -611                            np.sort(ms1_unprocessed.index.values),
    -612                        )
    -613                    )
    -614                    > 0
    -615                ):
    -616                    raise ValueError(
    -617                        "Not all scans to average are present in the unprocessed data"
    -618                    )
    -619
    -620            for scan_list_average, apex_scan in zip(scans_lists, apex_scans):
    -621                # Get unprocessed mass spectrum from scans
    -622                ms = self.get_average_mass_spectrum(
    -623                    scan_list=scan_list_average,
    -624                    apex_scan=apex_scan,
    -625                    spectrum_mode="profile",
    -626                    ms_level=1,
    -627                    auto_process=auto_process,
    -628                    use_parser=use_parser,
    -629                    perform_checks=False,
    -630                    polarity=polarity,
    -631                    ms_params=self.parameters.mass_spectrum["ms1"],
    -632                )
    -633                # Add mass spectrum to LCMS object and associated with mass feature
    -634                self.add_mass_spectrum(ms)
    -635
    -636            if not use_parser:
    -637                # Reset the index on _ms_unprocessed[1] to not be scan number
    -638                ms1_unprocessed = ms1_unprocessed.reset_index(drop=True)
    -639                self._ms_unprocessed[1] = ms1_unprocessed
    -640        else:
    -641            raise ValueError(
    -642                "Number of scans to average must be 1 or an integer with an integer median (i.e. 3, 5, 7, 9)"
    -643            )
    +            
    527    def add_associated_ms1(
    +528        self, auto_process=True, use_parser=True, spectrum_mode=None
    +529    ):
    +530        """Add MS1 spectra associated with mass features to the dataset.
    +531
    +532        Parameters
    +533        -----------
    +534        auto_process : bool, optional
    +535            If True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
    +536        use_parser : bool, optional
    +537            If True, envoke the spectra parser to get the MS1 spectra. Default is True.
    +538        spectrum_mode : str or None, optional
    +539            The spectrum mode to use for the mass spectra.  If None, method will use the spectrum mode
    +540            from the spectra parser to ascertain the spectrum mode (this allows for mixed types).
    +541            Defaults to None. (faster if defined, otherwise will check each scan)
    +542
    +543        Raises
    +544        ------
    +545        ValueError
    +546            If mass_features is not set, must run find_mass_features() first.
    +547            If apex scans are not profile mode, all apex scans must be profile mode for averaging.
    +548            If number of scans to average is not  1 or an integer with an integer median (i.e. 3, 5, 7, 9).
    +549            If deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
    +550        """
    +551        # Check if mass_features is set, raise error if not
    +552        if self.mass_features is None:
    +553            raise ValueError(
    +554                "mass_features not set, must run find_mass_features() first"
    +555            )
    +556        scans_to_average = self.parameters.lc_ms.ms1_scans_to_average
    +557
    +558        if scans_to_average == 1:
    +559            # Add to LCMSobj
    +560            self.add_mass_spectra(
    +561                scan_list=[
    +562                    int(x) for x in self.mass_features_to_df().apex_scan.tolist()
    +563                ],
    +564                auto_process=auto_process,
    +565                use_parser=use_parser,
    +566                spectrum_mode=spectrum_mode,
    +567                ms_params=self.parameters.mass_spectrum["ms1"],
    +568            )
    +569
    +570        elif (
    +571            (scans_to_average - 1) % 2
    +572        ) == 0:  # scans_to_average = 3, 5, 7 etc, mirror l/r around apex
    +573            apex_scans = list(set(self.mass_features_to_df().apex_scan.tolist()))
    +574            # Check if all apex scans are profile mode, raise error if not
    +575            if not all(self.scan_df.loc[apex_scans, "ms_format"] == "profile"):
    +576                raise ValueError("All apex scans must be profile mode for averaging")
    +577
    +578            # First get sets of scans to average
    +579            def get_scans_from_apex(ms1_scans, apex_scan, scans_to_average):
    +580                ms1_idx_start = ms1_scans.index(apex_scan) - int(
    +581                    (scans_to_average - 1) / 2
    +582                )
    +583                if ms1_idx_start < 0:
    +584                    ms1_idx_start = 0
    +585                ms1_idx_end = (
    +586                    ms1_scans.index(apex_scan) + int((scans_to_average - 1) / 2) + 1
    +587                )
    +588                if ms1_idx_end > (len(ms1_scans) - 1):
    +589                    ms1_idx_end = len(ms1_scans) - 1
    +590                scan_list = ms1_scans[ms1_idx_start:ms1_idx_end]
    +591                return scan_list
    +592
    +593            ms1_scans = self.ms1_scans
    +594            scans_lists = [
    +595                get_scans_from_apex(ms1_scans, apex_scan, scans_to_average)
    +596                for apex_scan in apex_scans
    +597            ]
    +598
    +599            # set polarity to -1 if negative mode, 1 if positive mode (for mass spectrum creation)
    +600            if self.polarity == "negative":
    +601                polarity = -1
    +602            elif self.polarity == "positive":
    +603                polarity = 1
    +604
    +605            if not use_parser:
    +606                # Perform checks and prepare _ms_unprocessed dictionary if use_parser is False (saves time to do this once)
    +607                ms1_unprocessed = self._ms_unprocessed[1].copy()
    +608                # Set the index on _ms_unprocessed[1] to scan number
    +609                ms1_unprocessed = ms1_unprocessed.set_index("scan", drop=False)
    +610                self._ms_unprocessed[1] = ms1_unprocessed
    +611
    +612                # Check that all the scans in scan_lists are indexs in self._ms_unprocessed[1]
    +613                scans_lists_flat = list(
    +614                    set([scan for sublist in scans_lists for scan in sublist])
    +615                )
    +616                if (
    +617                    len(
    +618                        np.setdiff1d(
    +619                            np.sort(scans_lists_flat),
    +620                            np.sort(ms1_unprocessed.index.values),
    +621                        )
    +622                    )
    +623                    > 0
    +624                ):
    +625                    raise ValueError(
    +626                        "Not all scans to average are present in the unprocessed data"
    +627                    )
    +628
    +629            for scan_list_average, apex_scan in zip(scans_lists, apex_scans):
    +630                # Get unprocessed mass spectrum from scans
    +631                ms = self.get_average_mass_spectrum(
    +632                    scan_list=scan_list_average,
    +633                    apex_scan=apex_scan,
    +634                    spectrum_mode="profile",
    +635                    ms_level=1,
    +636                    auto_process=auto_process,
    +637                    use_parser=use_parser,
    +638                    perform_checks=False,
    +639                    polarity=polarity,
    +640                    ms_params=self.parameters.mass_spectrum["ms1"],
    +641                )
    +642                # Add mass spectrum to LCMS object and associated with mass feature
    +643                self.add_mass_spectrum(ms)
     644
    -645        # Associate the ms1 spectra with the mass features
    -646        for mf_id in self.mass_features:
    -647            self.mass_features[mf_id].mass_spectrum = self._ms[
    -648                self.mass_features[mf_id].apex_scan
    -649            ]
    -650            self.mass_features[mf_id].update_mz()
    -651        
    -652        # Re-process clustering if persistent homology is selected to remove duplicate mass features after adding and processing MS1 spectra
    -653        if self.parameters.lc_ms.peak_picking_method == "persistent homology":
    -654            self.cluster_mass_features(drop_children=True, sort_by="persistence")
    +645            if not use_parser:
    +646                # Reset the index on _ms_unprocessed[1] to not be scan number
    +647                ms1_unprocessed = ms1_unprocessed.reset_index(drop=True)
    +648                self._ms_unprocessed[1] = ms1_unprocessed
    +649        else:
    +650            raise ValueError(
    +651                "Number of scans to average must be 1 or an integer with an integer median (i.e. 3, 5, 7, 9)"
    +652            )
    +653
    +654        # Associate the ms1 spectra with the mass features
    +655        for mf_id in self.mass_features:
    +656            self.mass_features[mf_id].mass_spectrum = self._ms[
    +657                self.mass_features[mf_id].apex_scan
    +658            ]
    +659            self.mass_features[mf_id].update_mz()
    +660
    +661        # Re-process clustering if persistent homology is selected to remove duplicate mass features after adding and processing MS1 spectra
    +662        if self.parameters.lc_ms.peak_picking_method == "persistent homology":
    +663            self.cluster_mass_features(drop_children=True, sort_by="persistence")
     
    @@ -3409,140 +3434,140 @@
    Raises
    -
    656    def mass_features_to_df(self):
    -657        """Returns a pandas dataframe summarizing the mass features.
    -658
    -659        The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity,
    -660        persistence, area, monoisotopic_mf_id, and isotopologue_type.  The index is set to mf_id (mass feature ID).
    -661
    -662
    -663        Returns
    -664        --------
    -665        pandas.DataFrame
    -666            A pandas dataframe of mass features with the following columns:
    -667            mf_id, mz, apex_scan, scan_time, intensity, persistence, area.
    -668        """
    -669
    -670        def mass_spectrum_to_string(
    -671            mass_spec, normalize=True, min_normalized_abun=0.01
    -672        ):
    -673            """Converts a mass spectrum to a string of m/z:abundance pairs.
    -674
    -675            Parameters
    -676            -----------
    -677            mass_spec : MassSpectrum
    -678                A MassSpectrum object to be converted to a string.
    -679            normalize : bool, optional
    -680                If True, normalizes the abundance values to a maximum of 1. Defaults to True.
    -681            min_normalized_abun : float, optional
    -682                The minimum normalized abundance value to include in the string, only used if normalize is True. Defaults to 0.01.
    +            
    665    def mass_features_to_df(self):
    +666        """Returns a pandas dataframe summarizing the mass features.
    +667
    +668        The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity,
    +669        persistence, area, monoisotopic_mf_id, and isotopologue_type.  The index is set to mf_id (mass feature ID).
    +670
    +671
    +672        Returns
    +673        --------
    +674        pandas.DataFrame
    +675            A pandas dataframe of mass features with the following columns:
    +676            mf_id, mz, apex_scan, scan_time, intensity, persistence, area.
    +677        """
    +678
    +679        def mass_spectrum_to_string(
    +680            mass_spec, normalize=True, min_normalized_abun=0.01
    +681        ):
    +682            """Converts a mass spectrum to a string of m/z:abundance pairs.
     683
    -684            Returns
    -685            --------
    -686            str
    -687                A string of m/z:abundance pairs from the mass spectrum, separated by a semicolon.
    -688            """
    -689            mz_np = mass_spec.to_dataframe()["m/z"].values
    -690            abun_np = mass_spec.to_dataframe()["Peak Height"].values
    -691            if normalize:
    -692                abun_np = abun_np / abun_np.max()
    -693            mz_abun = np.column_stack((mz_np, abun_np))
    -694            if normalize:
    -695                mz_abun = mz_abun[mz_abun[:, 1] > min_normalized_abun]
    -696            mz_abun_str = [
    -697                str(round(mz, ndigits=4)) + ":" + str(round(abun, ndigits=2))
    -698                for mz, abun in mz_abun
    -699            ]
    -700            return "; ".join(mz_abun_str)
    -701
    -702        cols_in_df = [
    -703            "id",
    -704            "_apex_scan",
    -705            "start_scan",
    -706            "final_scan",
    -707            "_retention_time",
    -708            "_intensity",
    -709            "_persistence",
    -710            "_area",
    -711            "_dispersity_index",
    -712            "_tailing_factor",
    -713            "monoisotopic_mf_id",
    -714            "isotopologue_type",
    -715            "mass_spectrum_deconvoluted_parent",
    -716        ]
    -717        df_mf_list = []
    -718        for mf_id in self.mass_features.keys():
    -719            # Find cols_in_df that are in single_mf
    -720            df_keys = list(
    -721                set(cols_in_df).intersection(self.mass_features[mf_id].__dir__())
    -722            )
    -723            dict_mf = {}
    -724            for key in df_keys:
    -725                dict_mf[key] = getattr(self.mass_features[mf_id], key)
    -726            if len(self.mass_features[mf_id].ms2_scan_numbers) > 0:
    -727                # Add MS2 spectra info
    -728                best_ms2_spectrum = self.mass_features[mf_id].best_ms2
    -729                dict_mf["ms2_spectrum"] = mass_spectrum_to_string(best_ms2_spectrum)
    -730            if len(self.mass_features[mf_id].associated_mass_features_deconvoluted) > 0:
    -731                dict_mf["associated_mass_features"] = ", ".join(
    -732                    map(
    -733                        str,
    -734                        self.mass_features[mf_id].associated_mass_features_deconvoluted,
    -735                    )
    -736                )
    -737            if self.mass_features[mf_id]._half_height_width is not None:
    -738                dict_mf["half_height_width"] = self.mass_features[
    -739                    mf_id
    -740                ].half_height_width
    -741            # Check if EIC for mass feature is set
    -742            df_mf_single = pd.DataFrame(dict_mf, index=[mf_id])
    -743            df_mf_single["mz"] = self.mass_features[mf_id].mz
    -744            df_mf_list.append(df_mf_single)
    -745        df_mf = pd.concat(df_mf_list)
    -746
    -747        # rename _area to area and id to mf_id
    -748        df_mf = df_mf.rename(
    -749            columns={
    -750                "_area": "area",
    -751                "id": "mf_id",
    -752                "_apex_scan": "apex_scan",
    -753                "_retention_time": "scan_time",
    -754                "_intensity": "intensity",
    -755                "_persistence": "persistence",
    -756                "_dispersity_index": "dispersity_index",
    -757                "_tailing_factor": "tailing_factor",
    -758            }
    -759        )
    -760
    -761        # reorder columns
    -762        col_order = [
    -763            "mf_id",
    -764            "scan_time",
    -765            "mz",
    -766            "apex_scan",
    -767            "start_scan",
    -768            "final_scan",
    -769            "intensity",
    -770            "persistence",
    -771            "area",
    -772            "half_height_width",
    -773            "tailing_factor",
    -774            "dispersity_index",
    -775            "monoisotopic_mf_id",
    -776            "isotopologue_type",
    -777            "mass_spectrum_deconvoluted_parent",
    -778            "associated_mass_features",
    -779            "ms2_spectrum",
    -780        ]
    -781        # drop columns that are not in col_order
    -782        cols_to_order = [col for col in col_order if col in df_mf.columns]
    -783        df_mf = df_mf[cols_to_order]
    -784
    -785        # reset index to mf_id
    -786        df_mf = df_mf.set_index("mf_id")
    -787        df_mf.index.name = "mf_id"
    -788
    -789        return df_mf
    +684            Parameters
    +685            -----------
    +686            mass_spec : MassSpectrum
    +687                A MassSpectrum object to be converted to a string.
    +688            normalize : bool, optional
    +689                If True, normalizes the abundance values to a maximum of 1. Defaults to True.
    +690            min_normalized_abun : float, optional
    +691                The minimum normalized abundance value to include in the string, only used if normalize is True. Defaults to 0.01.
    +692
    +693            Returns
    +694            --------
    +695            str
    +696                A string of m/z:abundance pairs from the mass spectrum, separated by a semicolon.
    +697            """
    +698            mz_np = mass_spec.to_dataframe()["m/z"].values
    +699            abun_np = mass_spec.to_dataframe()["Peak Height"].values
    +700            if normalize:
    +701                abun_np = abun_np / abun_np.max()
    +702            mz_abun = np.column_stack((mz_np, abun_np))
    +703            if normalize:
    +704                mz_abun = mz_abun[mz_abun[:, 1] > min_normalized_abun]
    +705            mz_abun_str = [
    +706                str(round(mz, ndigits=4)) + ":" + str(round(abun, ndigits=2))
    +707                for mz, abun in mz_abun
    +708            ]
    +709            return "; ".join(mz_abun_str)
    +710
    +711        cols_in_df = [
    +712            "id",
    +713            "_apex_scan",
    +714            "start_scan",
    +715            "final_scan",
    +716            "_retention_time",
    +717            "_intensity",
    +718            "_persistence",
    +719            "_area",
    +720            "_dispersity_index",
    +721            "_tailing_factor",
    +722            "monoisotopic_mf_id",
    +723            "isotopologue_type",
    +724            "mass_spectrum_deconvoluted_parent",
    +725        ]
    +726        df_mf_list = []
    +727        for mf_id in self.mass_features.keys():
    +728            # Find cols_in_df that are in single_mf
    +729            df_keys = list(
    +730                set(cols_in_df).intersection(self.mass_features[mf_id].__dir__())
    +731            )
    +732            dict_mf = {}
    +733            for key in df_keys:
    +734                dict_mf[key] = getattr(self.mass_features[mf_id], key)
    +735            if len(self.mass_features[mf_id].ms2_scan_numbers) > 0:
    +736                # Add MS2 spectra info
    +737                best_ms2_spectrum = self.mass_features[mf_id].best_ms2
    +738                dict_mf["ms2_spectrum"] = mass_spectrum_to_string(best_ms2_spectrum)
    +739            if len(self.mass_features[mf_id].associated_mass_features_deconvoluted) > 0:
    +740                dict_mf["associated_mass_features"] = ", ".join(
    +741                    map(
    +742                        str,
    +743                        self.mass_features[mf_id].associated_mass_features_deconvoluted,
    +744                    )
    +745                )
    +746            if self.mass_features[mf_id]._half_height_width is not None:
    +747                dict_mf["half_height_width"] = self.mass_features[
    +748                    mf_id
    +749                ].half_height_width
    +750            # Check if EIC for mass feature is set
    +751            df_mf_single = pd.DataFrame(dict_mf, index=[mf_id])
    +752            df_mf_single["mz"] = self.mass_features[mf_id].mz
    +753            df_mf_list.append(df_mf_single)
    +754        df_mf = pd.concat(df_mf_list)
    +755
    +756        # rename _area to area and id to mf_id
    +757        df_mf = df_mf.rename(
    +758            columns={
    +759                "_area": "area",
    +760                "id": "mf_id",
    +761                "_apex_scan": "apex_scan",
    +762                "_retention_time": "scan_time",
    +763                "_intensity": "intensity",
    +764                "_persistence": "persistence",
    +765                "_dispersity_index": "dispersity_index",
    +766                "_tailing_factor": "tailing_factor",
    +767            }
    +768        )
    +769
    +770        # reorder columns
    +771        col_order = [
    +772            "mf_id",
    +773            "scan_time",
    +774            "mz",
    +775            "apex_scan",
    +776            "start_scan",
    +777            "final_scan",
    +778            "intensity",
    +779            "persistence",
    +780            "area",
    +781            "half_height_width",
    +782            "tailing_factor",
    +783            "dispersity_index",
    +784            "monoisotopic_mf_id",
    +785            "isotopologue_type",
    +786            "mass_spectrum_deconvoluted_parent",
    +787            "associated_mass_features",
    +788            "ms2_spectrum",
    +789        ]
    +790        # drop columns that are not in col_order
    +791        cols_to_order = [col for col in col_order if col in df_mf.columns]
    +792        df_mf = df_mf[cols_to_order]
    +793
    +794        # reset index to mf_id
    +795        df_mf = df_mf.set_index("mf_id")
    +796        df_mf.index.name = "mf_id"
    +797
    +798        return df_mf
     
    @@ -3572,61 +3597,61 @@
    Returns
    -
    791    def mass_features_ms1_annot_to_df(self):
    -792        """Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.
    -793
    -794        Returns
    -795        --------
    -796        pandas.DataFrame
    -797            A pandas dataframe of MS1 annotations for the mass features in the dataset.
    -798            The index is set to mf_id (mass feature ID)
    -799
    -800        Raises
    -801        ------
    -802        Warning
    -803            If no MS1 annotations were found for the mass features in the dataset.
    -804        """
    -805        annot_df_list_ms1 = []
    -806        for mf_id in self.mass_features.keys():
    -807            if self.mass_features[mf_id].mass_spectrum is None:
    -808                pass
    -809            else:
    -810                # Add ms1 annotations to ms1 annotation list
    -811                if (
    -812                    np.abs(
    -813                        (
    -814                            self.mass_features[mf_id].ms1_peak.mz_exp
    -815                            - self.mass_features[mf_id].mz
    -816                        )
    -817                    )
    -818                    < 0.01
    -819                ):
    -820                    # Get the molecular formula from the mass spectrum
    -821                    annot_df = self.mass_features[mf_id].mass_spectrum.to_dataframe()
    -822                    # Subset to pull out only the peak associated with the mass feature
    -823                    annot_df = annot_df[
    -824                        annot_df["Index"] == self.mass_features[mf_id].ms1_peak.index
    -825                    ].copy()
    -826
    -827                    # Remove the index column and add column for mf_id
    -828                    annot_df = annot_df.drop(columns=["Index"])
    -829                    annot_df["mf_id"] = mf_id
    -830                    annot_df_list_ms1.append(annot_df)
    -831
    -832        if len(annot_df_list_ms1) > 0:
    -833            annot_ms1_df_full = pd.concat(annot_df_list_ms1)
    -834            annot_ms1_df_full = annot_ms1_df_full.set_index("mf_id")
    -835            annot_ms1_df_full.index.name = "mf_id"
    -836
    -837        else:
    -838            annot_ms1_df_full = None
    -839            # Warn that no ms1 annotations were found
    -840            warnings.warn(
    -841                "No MS1 annotations found for mass features in dataset, were MS1 spectra added and processed within the dataset?",
    -842                UserWarning
    -843            )
    -844
    -845        return annot_ms1_df_full
    +            
    800    def mass_features_ms1_annot_to_df(self):
    +801        """Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.
    +802
    +803        Returns
    +804        --------
    +805        pandas.DataFrame
    +806            A pandas dataframe of MS1 annotations for the mass features in the dataset.
    +807            The index is set to mf_id (mass feature ID)
    +808
    +809        Raises
    +810        ------
    +811        Warning
    +812            If no MS1 annotations were found for the mass features in the dataset.
    +813        """
    +814        annot_df_list_ms1 = []
    +815        for mf_id in self.mass_features.keys():
    +816            if self.mass_features[mf_id].mass_spectrum is None:
    +817                pass
    +818            else:
    +819                # Add ms1 annotations to ms1 annotation list
    +820                if (
    +821                    np.abs(
    +822                        (
    +823                            self.mass_features[mf_id].ms1_peak.mz_exp
    +824                            - self.mass_features[mf_id].mz
    +825                        )
    +826                    )
    +827                    < 0.01
    +828                ):
    +829                    # Get the molecular formula from the mass spectrum
    +830                    annot_df = self.mass_features[mf_id].mass_spectrum.to_dataframe()
    +831                    # Subset to pull out only the peak associated with the mass feature
    +832                    annot_df = annot_df[
    +833                        annot_df["Index"] == self.mass_features[mf_id].ms1_peak.index
    +834                    ].copy()
    +835
    +836                    # Remove the index column and add column for mf_id
    +837                    annot_df = annot_df.drop(columns=["Index"])
    +838                    annot_df["mf_id"] = mf_id
    +839                    annot_df_list_ms1.append(annot_df)
    +840
    +841        if len(annot_df_list_ms1) > 0:
    +842            annot_ms1_df_full = pd.concat(annot_df_list_ms1)
    +843            annot_ms1_df_full = annot_ms1_df_full.set_index("mf_id")
    +844            annot_ms1_df_full.index.name = "mf_id"
    +845
    +846        else:
    +847            annot_ms1_df_full = None
    +848            # Warn that no ms1 annotations were found
    +849            warnings.warn(
    +850                "No MS1 annotations found for mass features in dataset, were MS1 spectra added and processed within the dataset?",
    +851                UserWarning,
    +852            )
    +853
    +854        return annot_ms1_df_full
     
    @@ -3659,64 +3684,64 @@
    Raises
    -
    847    def mass_features_ms2_annot_to_df(self, molecular_metadata=None):
    -848        """Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.
    -849
    -850        Parameters
    -851        -----------
    -852        molecular_metadata :  dict of MolecularMetadata objects
    -853            A dictionary of MolecularMetadata objects, keyed by metabref_mol_id.  Defaults to None.
    -854
    -855        Returns
    -856        --------
    -857        pandas.DataFrame
    -858            A pandas dataframe of MS2 annotations for the mass features in the dataset, 
    -859            and optionally molecular metadata. The index is set to mf_id (mass feature ID)
    -860
    -861        Raises
    -862        ------
    -863        Warning
    -864            If no MS2 annotations were found for the mass features in the dataset.
    -865        """
    -866        annot_df_list_ms2 = []
    -867        for mf_id in self.mass_features.keys():
    -868            if len(self.mass_features[mf_id].ms2_similarity_results) > 0:
    -869                # Add ms2 annotations to ms2 annotation list
    -870                for result in self.mass_features[mf_id].ms2_similarity_results:
    -871                    annot_df_ms2 = result.to_dataframe()
    -872                    annot_df_ms2["mf_id"] = mf_id
    -873                    annot_df_list_ms2.append(annot_df_ms2)
    -874
    -875        if len(annot_df_list_ms2) > 0:
    -876            annot_ms2_df_full = pd.concat(annot_df_list_ms2)
    -877            if molecular_metadata is not None:
    -878                molecular_metadata_df = pd.concat(
    -879                    [
    -880                        pd.DataFrame.from_dict(v.__dict__, orient="index").transpose()
    -881                        for k, v in molecular_metadata.items()
    -882                    ],
    -883                    ignore_index=True,
    -884                )
    -885                molecular_metadata_df = molecular_metadata_df.rename(
    -886                    columns={"id": "ref_mol_id"}
    -887                )
    -888                annot_ms2_df_full = annot_ms2_df_full.merge(
    -889                    molecular_metadata_df, on="ref_mol_id", how="left"
    -890                )
    -891            annot_ms2_df_full = annot_ms2_df_full.drop_duplicates(
    -892                subset=["mf_id", "query_spectrum_id", "ref_ms_id"]
    -893            ).copy()
    -894            annot_ms2_df_full = annot_ms2_df_full.set_index("mf_id")
    -895            annot_ms2_df_full.index.name = "mf_id"
    -896        else:
    -897            annot_ms2_df_full = None
    -898            # Warn that no ms2 annotations were found
    -899            warnings.warn(
    -900                "No MS2 annotations found for mass features in dataset, were MS2 spectra added and searched against a database?",
    -901                UserWarning
    -902            )
    -903
    -904        return annot_ms2_df_full
    +            
    856    def mass_features_ms2_annot_to_df(self, molecular_metadata=None):
    +857        """Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.
    +858
    +859        Parameters
    +860        -----------
    +861        molecular_metadata :  dict of MolecularMetadata objects
    +862            A dictionary of MolecularMetadata objects, keyed by metabref_mol_id.  Defaults to None.
    +863
    +864        Returns
    +865        --------
    +866        pandas.DataFrame
    +867            A pandas dataframe of MS2 annotations for the mass features in the dataset,
    +868            and optionally molecular metadata. The index is set to mf_id (mass feature ID)
    +869
    +870        Raises
    +871        ------
    +872        Warning
    +873            If no MS2 annotations were found for the mass features in the dataset.
    +874        """
    +875        annot_df_list_ms2 = []
    +876        for mf_id in self.mass_features.keys():
    +877            if len(self.mass_features[mf_id].ms2_similarity_results) > 0:
    +878                # Add ms2 annotations to ms2 annotation list
    +879                for result in self.mass_features[mf_id].ms2_similarity_results:
    +880                    annot_df_ms2 = result.to_dataframe()
    +881                    annot_df_ms2["mf_id"] = mf_id
    +882                    annot_df_list_ms2.append(annot_df_ms2)
    +883
    +884        if len(annot_df_list_ms2) > 0:
    +885            annot_ms2_df_full = pd.concat(annot_df_list_ms2)
    +886            if molecular_metadata is not None:
    +887                molecular_metadata_df = pd.concat(
    +888                    [
    +889                        pd.DataFrame.from_dict(v.__dict__, orient="index").transpose()
    +890                        for k, v in molecular_metadata.items()
    +891                    ],
    +892                    ignore_index=True,
    +893                )
    +894                molecular_metadata_df = molecular_metadata_df.rename(
    +895                    columns={"id": "ref_mol_id"}
    +896                )
    +897                annot_ms2_df_full = annot_ms2_df_full.merge(
    +898                    molecular_metadata_df, on="ref_mol_id", how="left"
    +899                )
    +900            annot_ms2_df_full = annot_ms2_df_full.drop_duplicates(
    +901                subset=["mf_id", "query_spectrum_id", "ref_ms_id"]
    +902            ).copy()
    +903            annot_ms2_df_full = annot_ms2_df_full.set_index("mf_id")
    +904            annot_ms2_df_full.index.name = "mf_id"
    +905        else:
    +906            annot_ms2_df_full = None
    +907            # Warn that no ms2 annotations were found
    +908            warnings.warn(
    +909                "No MS2 annotations found for mass features in dataset, were MS2 spectra added and searched against a database?",
    +910                UserWarning,
    +911            )
    +912
    +913        return annot_ms2_df_full
     
    @@ -3732,7 +3757,7 @@
    Parameters
    Returns
      -
    • pandas.DataFrame: A pandas dataframe of MS2 annotations for the mass features in the dataset, +
    • pandas.DataFrame: A pandas dataframe of MS2 annotations for the mass features in the dataset, and optionally molecular metadata. The index is set to mf_id (mass feature ID)
    @@ -3756,33 +3781,33 @@
    Raises
    -
    943    def set_tic_list_from_data(self, overwrite=False):
    -944        """Sets the TIC list from the mass spectrum objects within the _ms dictionary.
    -945
    -946        Parameters
    -947        -----------
    -948        overwrite : bool, optional
    -949            If True, overwrites the TIC list if it is already set. Defaults to False.
    -950
    -951        Notes
    -952        -----
    -953        If the _ms dictionary is incomplete, sets the TIC list to an empty list.
    +            
    952    def set_tic_list_from_data(self, overwrite=False):
    +953        """Sets the TIC list from the mass spectrum objects within the _ms dictionary.
     954
    -955        Raises
    -956        ------
    -957        ValueError
    -958            If no mass spectra are found in the dataset.
    -959            If the TIC list is already set and overwrite is False.
    -960        """
    -961        # Check if _ms is empty and raise error if so
    -962        if len(self._ms) == 0:
    -963            raise ValueError("No mass spectra found in dataset")
    -964
    -965        # Check if tic_list is already set and raise error if so
    -966        if len(self.tic) > 0 and not overwrite:
    -967            raise ValueError("TIC list already set, use overwrite=True to overwrite")
    -968
    -969        self.tic = [self._ms.get(i).tic for i in self.scans_number]
    +955        Parameters
    +956        -----------
    +957        overwrite : bool, optional
    +958            If True, overwrites the TIC list if it is already set. Defaults to False.
    +959
    +960        Notes
    +961        -----
    +962        If the _ms dictionary is incomplete, sets the TIC list to an empty list.
    +963
    +964        Raises
    +965        ------
    +966        ValueError
    +967            If no mass spectra are found in the dataset.
    +968            If the TIC list is already set and overwrite is False.
    +969        """
    +970        # Check if _ms is empty and raise error if so
    +971        if len(self._ms) == 0:
    +972            raise ValueError("No mass spectra found in dataset")
    +973
    +974        # Check if tic_list is already set and raise error if so
    +975        if len(self.tic) > 0 and not overwrite:
    +976            raise ValueError("TIC list already set, use overwrite=True to overwrite")
    +977
    +978        self.tic = [self._ms.get(i).tic for i in self.scans_number]
     
    @@ -3820,38 +3845,38 @@
    Raises
    -
     971    def set_retention_time_from_data(self, overwrite=False):
    - 972        """Sets the retention time list from the data in the _ms dictionary.
    - 973
    - 974        Parameters
    - 975        -----------
    - 976        overwrite : bool, optional
    - 977            If True, overwrites the retention time list if it is already set. Defaults to False.
    - 978
    - 979        Notes
    - 980        -----
    - 981        If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.
    +            
     980    def set_retention_time_from_data(self, overwrite=False):
    + 981        """Sets the retention time list from the data in the _ms dictionary.
      982
    - 983        Raises
    - 984        ------
    - 985        ValueError
    - 986            If no mass spectra are found in the dataset.
    - 987            If the retention time list is already set and overwrite is False.
    - 988        """
    - 989        # Check if _ms is empty and raise error if so
    - 990        if len(self._ms) == 0:
    - 991            raise ValueError("No mass spectra found in dataset")
    - 992
    - 993        # Check if retention_time_list is already set and raise error if so
    - 994        if len(self.retention_time) > 0 and not overwrite:
    - 995            raise ValueError(
    - 996                "Retention time list already set, use overwrite=True to overwrite"
    - 997            )
    - 998
    - 999        retention_time_list = []
    -1000        for key_ms in sorted(self._ms.keys()):
    -1001            retention_time_list.append(self._ms.get(key_ms).retention_time)
    -1002        self.retention_time = retention_time_list
    + 983        Parameters
    + 984        -----------
    + 985        overwrite : bool, optional
    + 986            If True, overwrites the retention time list if it is already set. Defaults to False.
    + 987
    + 988        Notes
    + 989        -----
    + 990        If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.
    + 991
    + 992        Raises
    + 993        ------
    + 994        ValueError
    + 995            If no mass spectra are found in the dataset.
    + 996            If the retention time list is already set and overwrite is False.
    + 997        """
    + 998        # Check if _ms is empty and raise error if so
    + 999        if len(self._ms) == 0:
    +1000            raise ValueError("No mass spectra found in dataset")
    +1001
    +1002        # Check if retention_time_list is already set and raise error if so
    +1003        if len(self.retention_time) > 0 and not overwrite:
    +1004            raise ValueError(
    +1005                "Retention time list already set, use overwrite=True to overwrite"
    +1006            )
    +1007
    +1008        retention_time_list = []
    +1009        for key_ms in sorted(self._ms.keys()):
    +1010            retention_time_list.append(self._ms.get(key_ms).retention_time)
    +1011        self.retention_time = retention_time_list
     
    @@ -3889,30 +3914,30 @@
    Raises
    -
    1004    def set_scans_number_from_data(self, overwrite=False):
    -1005        """Sets the scan number list from the data in the _ms dictionary.
    -1006
    -1007        Notes
    -1008        -----
    -1009        If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.
    -1010
    -1011        Raises
    -1012        ------
    -1013        ValueError
    -1014            If no mass spectra are found in the dataset.
    -1015            If the scan number list is already set and overwrite is False.
    -1016        """
    -1017        # Check if _ms is empty and raise error if so
    -1018        if len(self._ms) == 0:
    -1019            raise ValueError("No mass spectra found in dataset")
    -1020
    -1021        # Check if scans_number_list is already set and raise error if so
    -1022        if len(self.scans_number) > 0 and not overwrite:
    -1023            raise ValueError(
    -1024                "Scan number list already set, use overwrite=True to overwrite"
    -1025            )
    -1026
    -1027        self.scans_number = sorted(self._ms.keys())
    +            
    1013    def set_scans_number_from_data(self, overwrite=False):
    +1014        """Sets the scan number list from the data in the _ms dictionary.
    +1015
    +1016        Notes
    +1017        -----
    +1018        If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.
    +1019
    +1020        Raises
    +1021        ------
    +1022        ValueError
    +1023            If no mass spectra are found in the dataset.
    +1024            If the scan number list is already set and overwrite is False.
    +1025        """
    +1026        # Check if _ms is empty and raise error if so
    +1027        if len(self._ms) == 0:
    +1028            raise ValueError("No mass spectra found in dataset")
    +1029
    +1030        # Check if scans_number_list is already set and raise error if so
    +1031        if len(self.scans_number) > 0 and not overwrite:
    +1032            raise ValueError(
    +1033                "Scan number list already set, use overwrite=True to overwrite"
    +1034            )
    +1035
    +1036        self.scans_number = sorted(self._ms.keys())
     
    diff --git a/docs/corems/mass_spectra/factory/chromat_data.html b/docs/corems/mass_spectra/factory/chromat_data.html index 9f66f27e..159be678 100644 --- a/docs/corems/mass_spectra/factory/chromat_data.html +++ b/docs/corems/mass_spectra/factory/chromat_data.html @@ -118,39 +118,41 @@

    14 total ion current [chromatogram] 15 bpc: [floats] 16 base peak [chromatogram] -17 Apexes: [int] -18 original thermo apex scan number after peak picking -19 """ -20 scans : List[int] = field(default_factory=list) -21 time : List[float] = field(default_factory=list) -22 tic : List[float] = field(default_factory=list) -23 bpc : List[float] = field(default_factory=list) -24 apexes : List[int] = field(default_factory=list) -25 -26@dataclass -27class EIC_Data: -28 """A class to represent extracted ion chromatogram data. -29 -30 scans: [int] -31 original scan numbers -32 time: [floats] -33 list of retention times -34 eic: [floats] -35 extracted ion chromatogram -36 eic_smoothed: [floats] -37 extracted ion chromatogram smoothed -38 apexes: [int] -39 original apex scan number after peak picking -40 areas: [floats] -41 area under the curve for each apex -42 """ -43 -44 scans: List[int] = field(default_factory=list) -45 time: List[float] = field(default_factory=list) -46 eic: List[float] = field(default_factory=list) -47 eic_smoothed: List[float] = field(default_factory=list) -48 apexes: List[int] = field(default_factory=list) -49 areas: List[float] = field(default_factory=list) +17 Apexes: [int] +18 original thermo apex scan number after peak picking +19 """ +20 +21 scans: List[int] = field(default_factory=list) +22 time: List[float] = field(default_factory=list) +23 tic: List[float] = field(default_factory=list) +24 bpc: List[float] = field(default_factory=list) +25 apexes: List[int] = field(default_factory=list) +26 +27 +28@dataclass +29class EIC_Data: +30 """A class to represent extracted ion chromatogram data. +31 +32 scans: [int] +33 original scan numbers +34 time: [floats] +35 list of retention times +36 eic: [floats] +37 extracted ion chromatogram +38 eic_smoothed: [floats] +39 extracted ion chromatogram smoothed +40 apexes: [int] +41 original apex scan number after peak picking +42 areas: [floats] +43 area under the curve for each apex +44 """ +45 +46 scans: List[int] = field(default_factory=list) +47 time: List[float] = field(default_factory=list) +48 eic: List[float] = field(default_factory=list) +49 eic_smoothed: List[float] = field(default_factory=list) +50 apexes: List[int] = field(default_factory=list) +51 areas: List[float] = field(default_factory=list)

    @@ -179,14 +181,15 @@

    15 total ion current [chromatogram] 16 bpc: [floats] 17 base peak [chromatogram] -18 Apexes: [int] -19 original thermo apex scan number after peak picking -20 """ -21 scans : List[int] = field(default_factory=list) -22 time : List[float] = field(default_factory=list) -23 tic : List[float] = field(default_factory=list) -24 bpc : List[float] = field(default_factory=list) -25 apexes : List[int] = field(default_factory=list) +18 Apexes: [int] +19 original thermo apex scan number after peak picking +20 """ +21 +22 scans: List[int] = field(default_factory=list) +23 time: List[float] = field(default_factory=list) +24 tic: List[float] = field(default_factory=list) +25 bpc: List[float] = field(default_factory=list) +26 apexes: List[int] = field(default_factory=list) @@ -200,7 +203,7 @@

    total ion current [chromatogram] bpc: [floats] base peak [chromatogram] -Apexes: [int]
    +Apexes: [int] original thermo apex scan number after peak picking

    @@ -285,30 +288,30 @@

    -
    27@dataclass
    -28class EIC_Data:
    -29    """A class to represent extracted ion chromatogram data.
    -30
    -31    scans: [int]
    -32        original scan numbers
    -33    time: [floats]
    -34        list of retention times
    -35    eic: [floats]
    -36        extracted ion chromatogram
    -37    eic_smoothed: [floats]
    -38        extracted ion chromatogram smoothed
    -39    apexes: [int]
    -40        original apex scan number after peak picking
    -41    areas:  [floats]
    -42        area under the curve for each apex
    -43    """
    -44
    -45    scans: List[int] = field(default_factory=list)
    -46    time: List[float] = field(default_factory=list)
    -47    eic: List[float] = field(default_factory=list)
    -48    eic_smoothed: List[float] = field(default_factory=list)
    -49    apexes: List[int] = field(default_factory=list)
    -50    areas: List[float] = field(default_factory=list)
    +            
    29@dataclass
    +30class EIC_Data:
    +31    """A class to represent extracted ion chromatogram data.
    +32
    +33    scans: [int]
    +34        original scan numbers
    +35    time: [floats]
    +36        list of retention times
    +37    eic: [floats]
    +38        extracted ion chromatogram
    +39    eic_smoothed: [floats]
    +40        extracted ion chromatogram smoothed
    +41    apexes: [int]
    +42        original apex scan number after peak picking
    +43    areas:  [floats]
    +44        area under the curve for each apex
    +45    """
    +46
    +47    scans: List[int] = field(default_factory=list)
    +48    time: List[float] = field(default_factory=list)
    +49    eic: List[float] = field(default_factory=list)
    +50    eic_smoothed: List[float] = field(default_factory=list)
    +51    apexes: List[int] = field(default_factory=list)
    +52    areas: List[float] = field(default_factory=list)
     
    diff --git a/docs/corems/mass_spectra/input/andiNetCDF.html b/docs/corems/mass_spectra/input/andiNetCDF.html index f227395a..80eb56c2 100644 --- a/docs/corems/mass_spectra/input/andiNetCDF.html +++ b/docs/corems/mass_spectra/input/andiNetCDF.html @@ -155,15 +155,15 @@

    54 55 Methods 56 -------- - 57 * polarity(). + 57 * polarity(). 58 Get the polarity of the ionization. - 59 * get_mass_spectrum(mz, abun, rp, d_params). + 59 * get_mass_spectrum(mz, abun, rp, d_params). 60 Add a mass spectrum to the GCMSBase object. - 61 * run(). + 61 * run(). 62 Populate the GCMSBase object with mass spectra data. - 63 * import_mass_spectra(d_params). + 63 * import_mass_spectra(d_params). 64 Import mass spectra data from the AndiNetCDF file. - 65 * get_gcms_obj(). + 65 * get_gcms_obj(). 66 Get the GCMSBase object. 67 68 """ @@ -175,131 +175,130 @@

    74 instrument_label="GCMS-Agilent", 75 auto_process=True, 76 ): - 77 - 78 Thread.__init__(self) - 79 - 80 if isinstance(file_location, str): - 81 self.file_location = Path(file_location) - 82 else: - 83 self.file_location = file_location - 84 - 85 if not self.file_location.exists(): - 86 raise FileNotFoundError("File does not exist at %s", file_location) - 87 - 88 if isinstance(file_location, S3Path): - 89 bytes_io = self.file_location.open("rb").read() - 90 self.net_cdf_obj = Dataset( - 91 self.file_location.name, - 92 "r", - 93 diskless=True, - 94 memory=bytes_io, - 95 format="NETCDF3_CLASSIC", - 96 ) - 97 else: - 98 self.net_cdf_obj = Dataset( - 99 self.file_location, "r", format="NETCDF3_CLASSIC" -100 ) -101 -102 self.ionization_type = self.net_cdf_obj.test_ionization_mode -103 self.experiment_type = self.net_cdf_obj.experiment_type -104 self.list_scans = range( -105 len(self.net_cdf_obj.variables.get("actual_scan_number")[:]) -106 ) -107 self.initial_scan_number = self.list_scans[0] -108 self.final_scan_number = self.list_scans[-1] -109 self.analyzer = analyzer -110 self.instrument_label = instrument_label -111 self.gcms = GCMSBase(self.file_location, analyzer, instrument_label) -112 -113 @property -114 def polarity(self): -115 """ -116 Get the polarity of the ionization. -117 -118 """ -119 polarity = str(self.net_cdf_obj.test_ionization_polarity) -120 if polarity == "Positive Polarity": -121 return +1 -122 else: -123 return -1 -124 -125 def get_mass_spectrum(self, mz, abun, rp, d_params): -126 """ -127 Add a mass spectrum to the GCMSBase object. -128 -129 Parameters -130 ----------- -131 mz : array-like -132 The m/z values of the mass spectrum. -133 abun : array-like -134 The abundance values of the mass spectrum. -135 rp : array-like -136 The resolution values of the mass spectrum. -137 d_params : dict -138 Additional parameters for the mass spectrum. -139 -140 """ -141 data_dict = { -142 Labels.mz: mz, -143 Labels.abundance: abun, -144 Labels.rp: rp, -145 Labels.s2n: None, -146 } -147 mass_spec = MassSpecCentroidLowRes(data_dict, d_params) -148 self.gcms.add_mass_spectrum(mass_spec) -149 -150 def run(self): -151 """ -152 Populate the GCMSBase object with mass spectra data. -153 """ -154 d_parameters = default_parameters(self.file_location) -155 self.import_mass_spectra(d_parameters) -156 -157 def import_mass_spectra(self, d_params): -158 """ -159 Import mass spectra data from the AndiNetCDF file. -160 -161 Parameters -162 ----------- -163 d_params : dict -164 Additional parameters for the mass spectra. -165 -166 """ -167 ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:] -168 list_tic = self.net_cdf_obj.variables.get("total_intensity")[:] -169 list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60 -170 mass_values = self.net_cdf_obj.variables.get("mass_values")[:] -171 intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:] -172 resolution = self.net_cdf_obj.variables.get("resolution")[:] -173 individual_rp = len(mass_values) == len(resolution) -174 finish_location = -1 -175 for scan_index in self.list_scans: -176 datapoints = ms_datapoints_per_scans[scan_index] -177 finish_location += datapoints -178 start_location = finish_location - datapoints + 1 -179 d_params["rt"] = list_rt[scan_index] -180 d_params["scan_number"] = scan_index -181 d_params["label"] = Labels.gcms_centroid -182 d_params["polarity"] = self.polarity -183 d_params["analyzer"] = self.analyzer -184 d_params["instrument_label"] = self.instrument_label -185 mz = mass_values[start_location:finish_location] -186 abun = intensity_values[start_location:finish_location] -187 if individual_rp: -188 rp = resolution[start_location:finish_location] -189 else: -190 rp = [resolution[scan_index]] * datapoints -191 self.get_mass_spectrum(mz, abun, rp, d_params) -192 self.gcms.retention_time = list_rt -193 self.gcms.tic = list_tic -194 self.gcms.scans_number = self.list_scans -195 -196 def get_gcms_obj(self): -197 """ -198 Get the GCMSBase object. -199 -200 """ -201 return self.gcms + 77 Thread.__init__(self) + 78 + 79 if isinstance(file_location, str): + 80 self.file_location = Path(file_location) + 81 else: + 82 self.file_location = file_location + 83 + 84 if not self.file_location.exists(): + 85 raise FileNotFoundError("File does not exist at %s", file_location) + 86 + 87 if isinstance(file_location, S3Path): + 88 bytes_io = self.file_location.open("rb").read() + 89 self.net_cdf_obj = Dataset( + 90 self.file_location.name, + 91 "r", + 92 diskless=True, + 93 memory=bytes_io, + 94 format="NETCDF3_CLASSIC", + 95 ) + 96 else: + 97 self.net_cdf_obj = Dataset( + 98 self.file_location, "r", format="NETCDF3_CLASSIC" + 99 ) +100 +101 self.ionization_type = self.net_cdf_obj.test_ionization_mode +102 self.experiment_type = self.net_cdf_obj.experiment_type +103 self.list_scans = range( +104 len(self.net_cdf_obj.variables.get("actual_scan_number")[:]) +105 ) +106 self.initial_scan_number = self.list_scans[0] +107 self.final_scan_number = self.list_scans[-1] +108 self.analyzer = analyzer +109 self.instrument_label = instrument_label +110 self.gcms = GCMSBase(self.file_location, analyzer, instrument_label) +111 +112 @property +113 def polarity(self): +114 """ +115 Get the polarity of the ionization. +116 +117 """ +118 polarity = str(self.net_cdf_obj.test_ionization_polarity) +119 if polarity == "Positive Polarity": +120 return +1 +121 else: +122 return -1 +123 +124 def get_mass_spectrum(self, mz, abun, rp, d_params): +125 """ +126 Add a mass spectrum to the GCMSBase object. +127 +128 Parameters +129 ----------- +130 mz : array-like +131 The m/z values of the mass spectrum. +132 abun : array-like +133 The abundance values of the mass spectrum. +134 rp : array-like +135 The resolution values of the mass spectrum. +136 d_params : dict +137 Additional parameters for the mass spectrum. +138 +139 """ +140 data_dict = { +141 Labels.mz: mz, +142 Labels.abundance: abun, +143 Labels.rp: rp, +144 Labels.s2n: None, +145 } +146 mass_spec = MassSpecCentroidLowRes(data_dict, d_params) +147 self.gcms.add_mass_spectrum(mass_spec) +148 +149 def run(self): +150 """ +151 Populate the GCMSBase object with mass spectra data. +152 """ +153 d_parameters = default_parameters(self.file_location) +154 self.import_mass_spectra(d_parameters) +155 +156 def import_mass_spectra(self, d_params): +157 """ +158 Import mass spectra data from the AndiNetCDF file. +159 +160 Parameters +161 ----------- +162 d_params : dict +163 Additional parameters for the mass spectra. +164 +165 """ +166 ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:] +167 list_tic = self.net_cdf_obj.variables.get("total_intensity")[:] +168 list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60 +169 mass_values = self.net_cdf_obj.variables.get("mass_values")[:] +170 intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:] +171 resolution = self.net_cdf_obj.variables.get("resolution")[:] +172 individual_rp = len(mass_values) == len(resolution) +173 finish_location = -1 +174 for scan_index in self.list_scans: +175 datapoints = ms_datapoints_per_scans[scan_index] +176 finish_location += datapoints +177 start_location = finish_location - datapoints + 1 +178 d_params["rt"] = list_rt[scan_index] +179 d_params["scan_number"] = scan_index +180 d_params["label"] = Labels.gcms_centroid +181 d_params["polarity"] = self.polarity +182 d_params["analyzer"] = self.analyzer +183 d_params["instrument_label"] = self.instrument_label +184 mz = mass_values[start_location:finish_location] +185 abun = intensity_values[start_location:finish_location] +186 if individual_rp: +187 rp = resolution[start_location:finish_location] +188 else: +189 rp = [resolution[scan_index]] * datapoints +190 self.get_mass_spectrum(mz, abun, rp, d_params) +191 self.gcms.retention_time = list_rt +192 self.gcms.tic = list_tic +193 self.gcms.scans_number = self.list_scans +194 +195 def get_gcms_obj(self): +196 """ +197 Get the GCMSBase object. +198 +199 """ +200 return self.gcms

    @@ -355,15 +354,15 @@

    55 56 Methods 57 -------- - 58 * polarity(). + 58 * polarity(). 59 Get the polarity of the ionization. - 60 * get_mass_spectrum(mz, abun, rp, d_params). + 60 * get_mass_spectrum(mz, abun, rp, d_params). 61 Add a mass spectrum to the GCMSBase object. - 62 * run(). + 62 * run(). 63 Populate the GCMSBase object with mass spectra data. - 64 * import_mass_spectra(d_params). + 64 * import_mass_spectra(d_params). 65 Import mass spectra data from the AndiNetCDF file. - 66 * get_gcms_obj(). + 66 * get_gcms_obj(). 67 Get the GCMSBase object. 68 69 """ @@ -375,131 +374,130 @@

    75 instrument_label="GCMS-Agilent", 76 auto_process=True, 77 ): - 78 - 79 Thread.__init__(self) - 80 - 81 if isinstance(file_location, str): - 82 self.file_location = Path(file_location) - 83 else: - 84 self.file_location = file_location - 85 - 86 if not self.file_location.exists(): - 87 raise FileNotFoundError("File does not exist at %s", file_location) - 88 - 89 if isinstance(file_location, S3Path): - 90 bytes_io = self.file_location.open("rb").read() - 91 self.net_cdf_obj = Dataset( - 92 self.file_location.name, - 93 "r", - 94 diskless=True, - 95 memory=bytes_io, - 96 format="NETCDF3_CLASSIC", - 97 ) - 98 else: - 99 self.net_cdf_obj = Dataset( -100 self.file_location, "r", format="NETCDF3_CLASSIC" -101 ) -102 -103 self.ionization_type = self.net_cdf_obj.test_ionization_mode -104 self.experiment_type = self.net_cdf_obj.experiment_type -105 self.list_scans = range( -106 len(self.net_cdf_obj.variables.get("actual_scan_number")[:]) -107 ) -108 self.initial_scan_number = self.list_scans[0] -109 self.final_scan_number = self.list_scans[-1] -110 self.analyzer = analyzer -111 self.instrument_label = instrument_label -112 self.gcms = GCMSBase(self.file_location, analyzer, instrument_label) -113 -114 @property -115 def polarity(self): -116 """ -117 Get the polarity of the ionization. -118 -119 """ -120 polarity = str(self.net_cdf_obj.test_ionization_polarity) -121 if polarity == "Positive Polarity": -122 return +1 -123 else: -124 return -1 -125 -126 def get_mass_spectrum(self, mz, abun, rp, d_params): -127 """ -128 Add a mass spectrum to the GCMSBase object. -129 -130 Parameters -131 ----------- -132 mz : array-like -133 The m/z values of the mass spectrum. -134 abun : array-like -135 The abundance values of the mass spectrum. -136 rp : array-like -137 The resolution values of the mass spectrum. -138 d_params : dict -139 Additional parameters for the mass spectrum. -140 -141 """ -142 data_dict = { -143 Labels.mz: mz, -144 Labels.abundance: abun, -145 Labels.rp: rp, -146 Labels.s2n: None, -147 } -148 mass_spec = MassSpecCentroidLowRes(data_dict, d_params) -149 self.gcms.add_mass_spectrum(mass_spec) -150 -151 def run(self): -152 """ -153 Populate the GCMSBase object with mass spectra data. -154 """ -155 d_parameters = default_parameters(self.file_location) -156 self.import_mass_spectra(d_parameters) -157 -158 def import_mass_spectra(self, d_params): -159 """ -160 Import mass spectra data from the AndiNetCDF file. -161 -162 Parameters -163 ----------- -164 d_params : dict -165 Additional parameters for the mass spectra. -166 -167 """ -168 ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:] -169 list_tic = self.net_cdf_obj.variables.get("total_intensity")[:] -170 list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60 -171 mass_values = self.net_cdf_obj.variables.get("mass_values")[:] -172 intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:] -173 resolution = self.net_cdf_obj.variables.get("resolution")[:] -174 individual_rp = len(mass_values) == len(resolution) -175 finish_location = -1 -176 for scan_index in self.list_scans: -177 datapoints = ms_datapoints_per_scans[scan_index] -178 finish_location += datapoints -179 start_location = finish_location - datapoints + 1 -180 d_params["rt"] = list_rt[scan_index] -181 d_params["scan_number"] = scan_index -182 d_params["label"] = Labels.gcms_centroid -183 d_params["polarity"] = self.polarity -184 d_params["analyzer"] = self.analyzer -185 d_params["instrument_label"] = self.instrument_label -186 mz = mass_values[start_location:finish_location] -187 abun = intensity_values[start_location:finish_location] -188 if individual_rp: -189 rp = resolution[start_location:finish_location] -190 else: -191 rp = [resolution[scan_index]] * datapoints -192 self.get_mass_spectrum(mz, abun, rp, d_params) -193 self.gcms.retention_time = list_rt -194 self.gcms.tic = list_tic -195 self.gcms.scans_number = self.list_scans -196 -197 def get_gcms_obj(self): -198 """ -199 Get the GCMSBase object. -200 -201 """ -202 return self.gcms + 78 Thread.__init__(self) + 79 + 80 if isinstance(file_location, str): + 81 self.file_location = Path(file_location) + 82 else: + 83 self.file_location = file_location + 84 + 85 if not self.file_location.exists(): + 86 raise FileNotFoundError("File does not exist at %s", file_location) + 87 + 88 if isinstance(file_location, S3Path): + 89 bytes_io = self.file_location.open("rb").read() + 90 self.net_cdf_obj = Dataset( + 91 self.file_location.name, + 92 "r", + 93 diskless=True, + 94 memory=bytes_io, + 95 format="NETCDF3_CLASSIC", + 96 ) + 97 else: + 98 self.net_cdf_obj = Dataset( + 99 self.file_location, "r", format="NETCDF3_CLASSIC" +100 ) +101 +102 self.ionization_type = self.net_cdf_obj.test_ionization_mode +103 self.experiment_type = self.net_cdf_obj.experiment_type +104 self.list_scans = range( +105 len(self.net_cdf_obj.variables.get("actual_scan_number")[:]) +106 ) +107 self.initial_scan_number = self.list_scans[0] +108 self.final_scan_number = self.list_scans[-1] +109 self.analyzer = analyzer +110 self.instrument_label = instrument_label +111 self.gcms = GCMSBase(self.file_location, analyzer, instrument_label) +112 +113 @property +114 def polarity(self): +115 """ +116 Get the polarity of the ionization. +117 +118 """ +119 polarity = str(self.net_cdf_obj.test_ionization_polarity) +120 if polarity == "Positive Polarity": +121 return +1 +122 else: +123 return -1 +124 +125 def get_mass_spectrum(self, mz, abun, rp, d_params): +126 """ +127 Add a mass spectrum to the GCMSBase object. +128 +129 Parameters +130 ----------- +131 mz : array-like +132 The m/z values of the mass spectrum. +133 abun : array-like +134 The abundance values of the mass spectrum. +135 rp : array-like +136 The resolution values of the mass spectrum. +137 d_params : dict +138 Additional parameters for the mass spectrum. +139 +140 """ +141 data_dict = { +142 Labels.mz: mz, +143 Labels.abundance: abun, +144 Labels.rp: rp, +145 Labels.s2n: None, +146 } +147 mass_spec = MassSpecCentroidLowRes(data_dict, d_params) +148 self.gcms.add_mass_spectrum(mass_spec) +149 +150 def run(self): +151 """ +152 Populate the GCMSBase object with mass spectra data. +153 """ +154 d_parameters = default_parameters(self.file_location) +155 self.import_mass_spectra(d_parameters) +156 +157 def import_mass_spectra(self, d_params): +158 """ +159 Import mass spectra data from the AndiNetCDF file. +160 +161 Parameters +162 ----------- +163 d_params : dict +164 Additional parameters for the mass spectra. +165 +166 """ +167 ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:] +168 list_tic = self.net_cdf_obj.variables.get("total_intensity")[:] +169 list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60 +170 mass_values = self.net_cdf_obj.variables.get("mass_values")[:] +171 intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:] +172 resolution = self.net_cdf_obj.variables.get("resolution")[:] +173 individual_rp = len(mass_values) == len(resolution) +174 finish_location = -1 +175 for scan_index in self.list_scans: +176 datapoints = ms_datapoints_per_scans[scan_index] +177 finish_location += datapoints +178 start_location = finish_location - datapoints + 1 +179 d_params["rt"] = list_rt[scan_index] +180 d_params["scan_number"] = scan_index +181 d_params["label"] = Labels.gcms_centroid +182 d_params["polarity"] = self.polarity +183 d_params["analyzer"] = self.analyzer +184 d_params["instrument_label"] = self.instrument_label +185 mz = mass_values[start_location:finish_location] +186 abun = intensity_values[start_location:finish_location] +187 if individual_rp: +188 rp = resolution[start_location:finish_location] +189 else: +190 rp = [resolution[scan_index]] * datapoints +191 self.get_mass_spectrum(mz, abun, rp, d_params) +192 self.gcms.retention_time = list_rt +193 self.gcms.tic = list_tic +194 self.gcms.scans_number = self.list_scans +195 +196 def get_gcms_obj(self): +197 """ +198 Get the GCMSBase object. +199 +200 """ +201 return self.gcms @@ -546,15 +544,15 @@

    Attributes
    Methods
      -
    • polarity(). +
    • polarity(). Get the polarity of the ionization.
    • -
    • get_mass_spectrum(mz, abun, rp, d_params). +
    • get_mass_spectrum(mz, abun, rp, d_params). Add a mass spectrum to the GCMSBase object.
    • -
    • run(). +
    • run(). Populate the GCMSBase object with mass spectra data.
    • -
    • import_mass_spectra(d_params). +
    • import_mass_spectra(d_params). Import mass spectra data from the AndiNetCDF file.
    • -
    • get_gcms_obj(). +
    • get_gcms_obj(). Get the GCMSBase object.
    @@ -577,41 +575,40 @@
    Methods
    75 instrument_label="GCMS-Agilent", 76 auto_process=True, 77 ): - 78 - 79 Thread.__init__(self) - 80 - 81 if isinstance(file_location, str): - 82 self.file_location = Path(file_location) - 83 else: - 84 self.file_location = file_location - 85 - 86 if not self.file_location.exists(): - 87 raise FileNotFoundError("File does not exist at %s", file_location) - 88 - 89 if isinstance(file_location, S3Path): - 90 bytes_io = self.file_location.open("rb").read() - 91 self.net_cdf_obj = Dataset( - 92 self.file_location.name, - 93 "r", - 94 diskless=True, - 95 memory=bytes_io, - 96 format="NETCDF3_CLASSIC", - 97 ) - 98 else: - 99 self.net_cdf_obj = Dataset( -100 self.file_location, "r", format="NETCDF3_CLASSIC" -101 ) -102 -103 self.ionization_type = self.net_cdf_obj.test_ionization_mode -104 self.experiment_type = self.net_cdf_obj.experiment_type -105 self.list_scans = range( -106 len(self.net_cdf_obj.variables.get("actual_scan_number")[:]) -107 ) -108 self.initial_scan_number = self.list_scans[0] -109 self.final_scan_number = self.list_scans[-1] -110 self.analyzer = analyzer -111 self.instrument_label = instrument_label -112 self.gcms = GCMSBase(self.file_location, analyzer, instrument_label) + 78 Thread.__init__(self) + 79 + 80 if isinstance(file_location, str): + 81 self.file_location = Path(file_location) + 82 else: + 83 self.file_location = file_location + 84 + 85 if not self.file_location.exists(): + 86 raise FileNotFoundError("File does not exist at %s", file_location) + 87 + 88 if isinstance(file_location, S3Path): + 89 bytes_io = self.file_location.open("rb").read() + 90 self.net_cdf_obj = Dataset( + 91 self.file_location.name, + 92 "r", + 93 diskless=True, + 94 memory=bytes_io, + 95 format="NETCDF3_CLASSIC", + 96 ) + 97 else: + 98 self.net_cdf_obj = Dataset( + 99 self.file_location, "r", format="NETCDF3_CLASSIC" +100 ) +101 +102 self.ionization_type = self.net_cdf_obj.test_ionization_mode +103 self.experiment_type = self.net_cdf_obj.experiment_type +104 self.list_scans = range( +105 len(self.net_cdf_obj.variables.get("actual_scan_number")[:]) +106 ) +107 self.initial_scan_number = self.list_scans[0] +108 self.final_scan_number = self.list_scans[-1] +109 self.analyzer = analyzer +110 self.instrument_label = instrument_label +111 self.gcms = GCMSBase(self.file_location, analyzer, instrument_label) @@ -750,30 +747,30 @@

    Methods
    -
    126    def get_mass_spectrum(self, mz, abun, rp, d_params):
    -127        """
    -128        Add a mass spectrum to the GCMSBase object.
    -129
    -130        Parameters
    -131        -----------
    -132        mz : array-like
    -133                The m/z values of the mass spectrum.
    -134        abun : array-like
    -135                The abundance values of the mass spectrum.
    -136        rp : array-like
    -137                The resolution values of the mass spectrum.
    -138        d_params : dict
    -139                Additional parameters for the mass spectrum.
    -140
    -141        """
    -142        data_dict = {
    -143            Labels.mz: mz,
    -144            Labels.abundance: abun,
    -145            Labels.rp: rp,
    -146            Labels.s2n: None,
    -147        }
    -148        mass_spec = MassSpecCentroidLowRes(data_dict, d_params)
    -149        self.gcms.add_mass_spectrum(mass_spec)
    +            
    125    def get_mass_spectrum(self, mz, abun, rp, d_params):
    +126        """
    +127        Add a mass spectrum to the GCMSBase object.
    +128
    +129        Parameters
    +130        -----------
    +131        mz : array-like
    +132                The m/z values of the mass spectrum.
    +133        abun : array-like
    +134                The abundance values of the mass spectrum.
    +135        rp : array-like
    +136                The resolution values of the mass spectrum.
    +137        d_params : dict
    +138                Additional parameters for the mass spectrum.
    +139
    +140        """
    +141        data_dict = {
    +142            Labels.mz: mz,
    +143            Labels.abundance: abun,
    +144            Labels.rp: rp,
    +145            Labels.s2n: None,
    +146        }
    +147        mass_spec = MassSpecCentroidLowRes(data_dict, d_params)
    +148        self.gcms.add_mass_spectrum(mass_spec)
     
    @@ -806,12 +803,12 @@
    Parameters
    -
    151    def run(self):
    -152        """
    -153        Populate the GCMSBase object with mass spectra data.
    -154        """
    -155        d_parameters = default_parameters(self.file_location)
    -156        self.import_mass_spectra(d_parameters)
    +            
    150    def run(self):
    +151        """
    +152        Populate the GCMSBase object with mass spectra data.
    +153        """
    +154        d_parameters = default_parameters(self.file_location)
    +155        self.import_mass_spectra(d_parameters)
     
    @@ -831,44 +828,44 @@
    Parameters
    -
    158    def import_mass_spectra(self, d_params):
    -159        """
    -160        Import mass spectra data from the AndiNetCDF file.
    -161
    -162        Parameters
    -163        -----------
    -164        d_params : dict
    -165                Additional parameters for the mass spectra.
    -166
    -167        """
    -168        ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:]
    -169        list_tic = self.net_cdf_obj.variables.get("total_intensity")[:]
    -170        list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60
    -171        mass_values = self.net_cdf_obj.variables.get("mass_values")[:]
    -172        intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:]
    -173        resolution = self.net_cdf_obj.variables.get("resolution")[:]
    -174        individual_rp = len(mass_values) == len(resolution)
    -175        finish_location = -1
    -176        for scan_index in self.list_scans:
    -177            datapoints = ms_datapoints_per_scans[scan_index]
    -178            finish_location += datapoints
    -179            start_location = finish_location - datapoints + 1
    -180            d_params["rt"] = list_rt[scan_index]
    -181            d_params["scan_number"] = scan_index
    -182            d_params["label"] = Labels.gcms_centroid
    -183            d_params["polarity"] = self.polarity
    -184            d_params["analyzer"] = self.analyzer
    -185            d_params["instrument_label"] = self.instrument_label
    -186            mz = mass_values[start_location:finish_location]
    -187            abun = intensity_values[start_location:finish_location]
    -188            if individual_rp:
    -189                rp = resolution[start_location:finish_location]
    -190            else:
    -191                rp = [resolution[scan_index]] * datapoints
    -192            self.get_mass_spectrum(mz, abun, rp, d_params)
    -193        self.gcms.retention_time = list_rt
    -194        self.gcms.tic = list_tic
    -195        self.gcms.scans_number = self.list_scans
    +            
    157    def import_mass_spectra(self, d_params):
    +158        """
    +159        Import mass spectra data from the AndiNetCDF file.
    +160
    +161        Parameters
    +162        -----------
    +163        d_params : dict
    +164                Additional parameters for the mass spectra.
    +165
    +166        """
    +167        ms_datapoints_per_scans = self.net_cdf_obj.variables.get("point_count")[:]
    +168        list_tic = self.net_cdf_obj.variables.get("total_intensity")[:]
    +169        list_rt = self.net_cdf_obj.variables.get("scan_acquisition_time")[:] / 60
    +170        mass_values = self.net_cdf_obj.variables.get("mass_values")[:]
    +171        intensity_values = self.net_cdf_obj.variables.get("intensity_values")[:]
    +172        resolution = self.net_cdf_obj.variables.get("resolution")[:]
    +173        individual_rp = len(mass_values) == len(resolution)
    +174        finish_location = -1
    +175        for scan_index in self.list_scans:
    +176            datapoints = ms_datapoints_per_scans[scan_index]
    +177            finish_location += datapoints
    +178            start_location = finish_location - datapoints + 1
    +179            d_params["rt"] = list_rt[scan_index]
    +180            d_params["scan_number"] = scan_index
    +181            d_params["label"] = Labels.gcms_centroid
    +182            d_params["polarity"] = self.polarity
    +183            d_params["analyzer"] = self.analyzer
    +184            d_params["instrument_label"] = self.instrument_label
    +185            mz = mass_values[start_location:finish_location]
    +186            abun = intensity_values[start_location:finish_location]
    +187            if individual_rp:
    +188                rp = resolution[start_location:finish_location]
    +189            else:
    +190                rp = [resolution[scan_index]] * datapoints
    +191            self.get_mass_spectrum(mz, abun, rp, d_params)
    +192        self.gcms.retention_time = list_rt
    +193        self.gcms.tic = list_tic
    +194        self.gcms.scans_number = self.list_scans
     
    @@ -895,12 +892,12 @@
    Parameters
    -
    197    def get_gcms_obj(self):
    -198        """
    -199        Get the GCMSBase object.
    -200
    -201        """
    -202        return self.gcms
    +            
    196    def get_gcms_obj(self):
    +197        """
    +198        Get the GCMSBase object.
    +199
    +200        """
    +201        return self.gcms
     
    diff --git a/docs/corems/mass_spectra/input/boosterHDF5.html b/docs/corems/mass_spectra/input/boosterHDF5.html index 783cd5d8..f85dd543 100644 --- a/docs/corems/mass_spectra/input/boosterHDF5.html +++ b/docs/corems/mass_spectra/input/boosterHDF5.html @@ -317,7 +317,7 @@

    210 Get the LCMS object. 211 212 """ -213 if len(self.lcms)>0: +213 if len(self.lcms) > 0: 214 return self.lcms 215 else: 216 raise Exception("Returning an empty LCMS class") @@ -532,7 +532,7 @@

    211 Get the LCMS object. 212 213 """ -214 if len(self.lcms)>0: +214 if len(self.lcms) > 0: 215 return self.lcms 216 else: 217 raise Exception("Returning an empty LCMS class") @@ -1000,7 +1000,7 @@
    Parameters
    211 Get the LCMS object. 212 213 """ -214 if len(self.lcms)>0: +214 if len(self.lcms) > 0: 215 return self.lcms 216 else: 217 raise Exception("Returning an empty LCMS class") diff --git a/docs/corems/mass_spectra/input/brukerSolarix.html b/docs/corems/mass_spectra/input/brukerSolarix.html index 7ef72640..fc4b0afd 100644 --- a/docs/corems/mass_spectra/input/brukerSolarix.html +++ b/docs/corems/mass_spectra/input/brukerSolarix.html @@ -129,119 +129,118 @@

    40 auto_process=True, 41 keep_profile=False, 42 ): - 43 - 44 Thread.__init__(self) - 45 - 46 if isinstance(d_directory_location, str): - 47 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed - 48 d_directory_location = Path(d_directory_location) - 49 - 50 if not d_directory_location.exists(): - 51 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) - 52 - 53 self.scan_attr = d_directory_location / "scan.xml" - 54 - 55 if not self.scan_attr.exists(): - 56 raise FileExistsError( - 57 "%s does not seem to be a valid Solarix Mass Spectra Experiment,\ - 58 maybe an Imaging experiment?\ - 59 please ReadBruker_SolarixTransientImage class for Imaging dataset " - 60 % d_directory_location - 61 ) - 62 - 63 self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label) - 64 - 65 self.auto_process = auto_process - 66 self.keep_profile = keep_profile - 67 - 68 def get_scan_attr(self) -> dict: - 69 """ - 70 Get the scan attributes from the scan.xml file. - 71 - 72 Returns - 73 ------- - 74 dict - 75 Dictionary containing the scan number as key and a tuple of retention time and TIC as value. - 76 """ - 77 from bs4 import BeautifulSoup - 78 - 79 soup = BeautifulSoup(self.scan_attr.open(), "xml") - 80 - 81 list_rt = [float(rt.text) for rt in soup.find_all("minutes")] - 82 list_tic = [float(tic.text) for tic in soup.find_all("tic")] - 83 list_scan = [int(scan.text) for scan in soup.find_all("count")] - 84 - 85 dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic))) - 86 - 87 return dict_scan_rt_tic - 88 - 89 def import_mass_spectra(self) -> None: - 90 """ - 91 Import the mass spectra from the scan.xml file. - 92 """ - 93 dict_scan_rt_tic = self.get_scan_attr() - 94 - 95 list_rt, list_tic = ( + 43 Thread.__init__(self) + 44 + 45 if isinstance(d_directory_location, str): + 46 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed + 47 d_directory_location = Path(d_directory_location) + 48 + 49 if not d_directory_location.exists(): + 50 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) + 51 + 52 self.scan_attr = d_directory_location / "scan.xml" + 53 + 54 if not self.scan_attr.exists(): + 55 raise FileExistsError( + 56 "%s does not seem to be a valid Solarix Mass Spectra Experiment,\ + 57 maybe an Imaging experiment?\ + 58 please ReadBruker_SolarixTransientImage class for Imaging dataset " + 59 % d_directory_location + 60 ) + 61 + 62 self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label) + 63 + 64 self.auto_process = auto_process + 65 self.keep_profile = keep_profile + 66 + 67 def get_scan_attr(self) -> dict: + 68 """ + 69 Get the scan attributes from the scan.xml file. + 70 + 71 Returns + 72 ------- + 73 dict + 74 Dictionary containing the scan number as key and a tuple of retention time and TIC as value. + 75 """ + 76 from bs4 import BeautifulSoup + 77 + 78 soup = BeautifulSoup(self.scan_attr.open(), "xml") + 79 + 80 list_rt = [float(rt.text) for rt in soup.find_all("minutes")] + 81 list_tic = [float(tic.text) for tic in soup.find_all("tic")] + 82 list_scan = [int(scan.text) for scan in soup.find_all("count")] + 83 + 84 dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic))) + 85 + 86 return dict_scan_rt_tic + 87 + 88 def import_mass_spectra(self) -> None: + 89 """ + 90 Import the mass spectra from the scan.xml file. + 91 """ + 92 dict_scan_rt_tic = self.get_scan_attr() + 93 + 94 list_rt, list_tic = ( + 95 list(), 96 list(), - 97 list(), - 98 ) - 99 -100 list_scans = sorted(list(dict_scan_rt_tic.keys())) -101 -102 for scan_number in list_scans: -103 mass_spec = self.get_mass_spectrum(scan_number) -104 -105 self.lcms.add_mass_spectrum(mass_spec) -106 -107 list_rt.append(dict_scan_rt_tic.get(scan_number)[0]) -108 -109 list_tic.append(dict_scan_rt_tic.get(scan_number)[1]) -110 -111 self.lcms.retention_time = list_rt -112 self.lcms.tic = list_tic -113 self.lcms.scans_number = list_scans -114 -115 def get_mass_spectrum(self, scan_number: int): -116 """ -117 Get the mass spectrum for a given scan number. -118 -119 Parameters -120 ---------- -121 scan_number : int -122 Scan number. -123 -124 """ -125 bruker_reader = ReadBrukerSolarix(self.lcms.file_location) -126 -127 bruker_transient = bruker_reader.get_transient(scan_number) -128 -129 mass_spec = bruker_transient.get_mass_spectrum( -130 plot_result=False, -131 auto_process=self.auto_process, -132 keep_profile=self.keep_profile, -133 ) -134 -135 return mass_spec -136 -137 def run(self): -138 """ -139 Run the import_mass_spectra method. -140 """ -141 self.import_mass_spectra() -142 -143 def get_lcms_obj(self): -144 """ -145 Get the LCMSBase object. -146 -147 Raises -148 ------ -149 Exception -150 If the LCMSBase object is empty. -151 """ -152 if self.lcms: -153 return self.lcms -154 else: -155 raise Exception("Returning an empty LCMSBase class.") + 97 ) + 98 + 99 list_scans = sorted(list(dict_scan_rt_tic.keys())) +100 +101 for scan_number in list_scans: +102 mass_spec = self.get_mass_spectrum(scan_number) +103 +104 self.lcms.add_mass_spectrum(mass_spec) +105 +106 list_rt.append(dict_scan_rt_tic.get(scan_number)[0]) +107 +108 list_tic.append(dict_scan_rt_tic.get(scan_number)[1]) +109 +110 self.lcms.retention_time = list_rt +111 self.lcms.tic = list_tic +112 self.lcms.scans_number = list_scans +113 +114 def get_mass_spectrum(self, scan_number: int): +115 """ +116 Get the mass spectrum for a given scan number. +117 +118 Parameters +119 ---------- +120 scan_number : int +121 Scan number. +122 +123 """ +124 bruker_reader = ReadBrukerSolarix(self.lcms.file_location) +125 +126 bruker_transient = bruker_reader.get_transient(scan_number) +127 +128 mass_spec = bruker_transient.get_mass_spectrum( +129 plot_result=False, +130 auto_process=self.auto_process, +131 keep_profile=self.keep_profile, +132 ) +133 +134 return mass_spec +135 +136 def run(self): +137 """ +138 Run the import_mass_spectra method. +139 """ +140 self.import_mass_spectra() +141 +142 def get_lcms_obj(self): +143 """ +144 Get the LCMSBase object. +145 +146 Raises +147 ------ +148 Exception +149 If the LCMSBase object is empty. +150 """ +151 if self.lcms: +152 return self.lcms +153 else: +154 raise Exception("Returning an empty LCMSBase class.")

    @@ -283,119 +282,118 @@

    41 auto_process=True, 42 keep_profile=False, 43 ): - 44 - 45 Thread.__init__(self) - 46 - 47 if isinstance(d_directory_location, str): - 48 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed - 49 d_directory_location = Path(d_directory_location) - 50 - 51 if not d_directory_location.exists(): - 52 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) - 53 - 54 self.scan_attr = d_directory_location / "scan.xml" - 55 - 56 if not self.scan_attr.exists(): - 57 raise FileExistsError( - 58 "%s does not seem to be a valid Solarix Mass Spectra Experiment,\ - 59 maybe an Imaging experiment?\ - 60 please ReadBruker_SolarixTransientImage class for Imaging dataset " - 61 % d_directory_location - 62 ) - 63 - 64 self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label) - 65 - 66 self.auto_process = auto_process - 67 self.keep_profile = keep_profile - 68 - 69 def get_scan_attr(self) -> dict: - 70 """ - 71 Get the scan attributes from the scan.xml file. - 72 - 73 Returns - 74 ------- - 75 dict - 76 Dictionary containing the scan number as key and a tuple of retention time and TIC as value. - 77 """ - 78 from bs4 import BeautifulSoup - 79 - 80 soup = BeautifulSoup(self.scan_attr.open(), "xml") - 81 - 82 list_rt = [float(rt.text) for rt in soup.find_all("minutes")] - 83 list_tic = [float(tic.text) for tic in soup.find_all("tic")] - 84 list_scan = [int(scan.text) for scan in soup.find_all("count")] - 85 - 86 dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic))) - 87 - 88 return dict_scan_rt_tic - 89 - 90 def import_mass_spectra(self) -> None: - 91 """ - 92 Import the mass spectra from the scan.xml file. - 93 """ - 94 dict_scan_rt_tic = self.get_scan_attr() - 95 - 96 list_rt, list_tic = ( + 44 Thread.__init__(self) + 45 + 46 if isinstance(d_directory_location, str): + 47 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed + 48 d_directory_location = Path(d_directory_location) + 49 + 50 if not d_directory_location.exists(): + 51 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) + 52 + 53 self.scan_attr = d_directory_location / "scan.xml" + 54 + 55 if not self.scan_attr.exists(): + 56 raise FileExistsError( + 57 "%s does not seem to be a valid Solarix Mass Spectra Experiment,\ + 58 maybe an Imaging experiment?\ + 59 please ReadBruker_SolarixTransientImage class for Imaging dataset " + 60 % d_directory_location + 61 ) + 62 + 63 self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label) + 64 + 65 self.auto_process = auto_process + 66 self.keep_profile = keep_profile + 67 + 68 def get_scan_attr(self) -> dict: + 69 """ + 70 Get the scan attributes from the scan.xml file. + 71 + 72 Returns + 73 ------- + 74 dict + 75 Dictionary containing the scan number as key and a tuple of retention time and TIC as value. + 76 """ + 77 from bs4 import BeautifulSoup + 78 + 79 soup = BeautifulSoup(self.scan_attr.open(), "xml") + 80 + 81 list_rt = [float(rt.text) for rt in soup.find_all("minutes")] + 82 list_tic = [float(tic.text) for tic in soup.find_all("tic")] + 83 list_scan = [int(scan.text) for scan in soup.find_all("count")] + 84 + 85 dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic))) + 86 + 87 return dict_scan_rt_tic + 88 + 89 def import_mass_spectra(self) -> None: + 90 """ + 91 Import the mass spectra from the scan.xml file. + 92 """ + 93 dict_scan_rt_tic = self.get_scan_attr() + 94 + 95 list_rt, list_tic = ( + 96 list(), 97 list(), - 98 list(), - 99 ) -100 -101 list_scans = sorted(list(dict_scan_rt_tic.keys())) -102 -103 for scan_number in list_scans: -104 mass_spec = self.get_mass_spectrum(scan_number) -105 -106 self.lcms.add_mass_spectrum(mass_spec) -107 -108 list_rt.append(dict_scan_rt_tic.get(scan_number)[0]) -109 -110 list_tic.append(dict_scan_rt_tic.get(scan_number)[1]) -111 -112 self.lcms.retention_time = list_rt -113 self.lcms.tic = list_tic -114 self.lcms.scans_number = list_scans -115 -116 def get_mass_spectrum(self, scan_number: int): -117 """ -118 Get the mass spectrum for a given scan number. -119 -120 Parameters -121 ---------- -122 scan_number : int -123 Scan number. -124 -125 """ -126 bruker_reader = ReadBrukerSolarix(self.lcms.file_location) -127 -128 bruker_transient = bruker_reader.get_transient(scan_number) -129 -130 mass_spec = bruker_transient.get_mass_spectrum( -131 plot_result=False, -132 auto_process=self.auto_process, -133 keep_profile=self.keep_profile, -134 ) -135 -136 return mass_spec -137 -138 def run(self): -139 """ -140 Run the import_mass_spectra method. -141 """ -142 self.import_mass_spectra() -143 -144 def get_lcms_obj(self): -145 """ -146 Get the LCMSBase object. -147 -148 Raises -149 ------ -150 Exception -151 If the LCMSBase object is empty. -152 """ -153 if self.lcms: -154 return self.lcms -155 else: -156 raise Exception("Returning an empty LCMSBase class.") + 98 ) + 99 +100 list_scans = sorted(list(dict_scan_rt_tic.keys())) +101 +102 for scan_number in list_scans: +103 mass_spec = self.get_mass_spectrum(scan_number) +104 +105 self.lcms.add_mass_spectrum(mass_spec) +106 +107 list_rt.append(dict_scan_rt_tic.get(scan_number)[0]) +108 +109 list_tic.append(dict_scan_rt_tic.get(scan_number)[1]) +110 +111 self.lcms.retention_time = list_rt +112 self.lcms.tic = list_tic +113 self.lcms.scans_number = list_scans +114 +115 def get_mass_spectrum(self, scan_number: int): +116 """ +117 Get the mass spectrum for a given scan number. +118 +119 Parameters +120 ---------- +121 scan_number : int +122 Scan number. +123 +124 """ +125 bruker_reader = ReadBrukerSolarix(self.lcms.file_location) +126 +127 bruker_transient = bruker_reader.get_transient(scan_number) +128 +129 mass_spec = bruker_transient.get_mass_spectrum( +130 plot_result=False, +131 auto_process=self.auto_process, +132 keep_profile=self.keep_profile, +133 ) +134 +135 return mass_spec +136 +137 def run(self): +138 """ +139 Run the import_mass_spectra method. +140 """ +141 self.import_mass_spectra() +142 +143 def get_lcms_obj(self): +144 """ +145 Get the LCMSBase object. +146 +147 Raises +148 ------ +149 Exception +150 If the LCMSBase object is empty. +151 """ +152 if self.lcms: +153 return self.lcms +154 else: +155 raise Exception("Returning an empty LCMSBase class.") @@ -436,30 +434,29 @@

    Parameters
    41 auto_process=True, 42 keep_profile=False, 43 ): -44 -45 Thread.__init__(self) -46 -47 if isinstance(d_directory_location, str): -48 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed -49 d_directory_location = Path(d_directory_location) -50 -51 if not d_directory_location.exists(): -52 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) -53 -54 self.scan_attr = d_directory_location / "scan.xml" -55 -56 if not self.scan_attr.exists(): -57 raise FileExistsError( -58 "%s does not seem to be a valid Solarix Mass Spectra Experiment,\ -59 maybe an Imaging experiment?\ -60 please ReadBruker_SolarixTransientImage class for Imaging dataset " -61 % d_directory_location -62 ) -63 -64 self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label) -65 -66 self.auto_process = auto_process -67 self.keep_profile = keep_profile +44 Thread.__init__(self) +45 +46 if isinstance(d_directory_location, str): +47 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed +48 d_directory_location = Path(d_directory_location) +49 +50 if not d_directory_location.exists(): +51 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) +52 +53 self.scan_attr = d_directory_location / "scan.xml" +54 +55 if not self.scan_attr.exists(): +56 raise FileExistsError( +57 "%s does not seem to be a valid Solarix Mass Spectra Experiment,\ +58 maybe an Imaging experiment?\ +59 please ReadBruker_SolarixTransientImage class for Imaging dataset " +60 % d_directory_location +61 ) +62 +63 self.lcms = LCMSBase(d_directory_location, analyzer, instrument_label) +64 +65 self.auto_process = auto_process +66 self.keep_profile = keep_profile @@ -541,26 +538,26 @@
    Parameters
    -
    69    def get_scan_attr(self) -> dict:
    -70        """
    -71        Get the scan attributes from the scan.xml file.
    -72
    -73        Returns
    -74        -------
    -75        dict
    -76            Dictionary containing the scan number as key and a tuple of retention time and TIC as value.
    -77        """
    -78        from bs4 import BeautifulSoup
    -79
    -80        soup = BeautifulSoup(self.scan_attr.open(), "xml")
    -81
    -82        list_rt = [float(rt.text) for rt in soup.find_all("minutes")]
    -83        list_tic = [float(tic.text) for tic in soup.find_all("tic")]
    -84        list_scan = [int(scan.text) for scan in soup.find_all("count")]
    -85
    -86        dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic)))
    -87
    -88        return dict_scan_rt_tic
    +            
    68    def get_scan_attr(self) -> dict:
    +69        """
    +70        Get the scan attributes from the scan.xml file.
    +71
    +72        Returns
    +73        -------
    +74        dict
    +75            Dictionary containing the scan number as key and a tuple of retention time and TIC as value.
    +76        """
    +77        from bs4 import BeautifulSoup
    +78
    +79        soup = BeautifulSoup(self.scan_attr.open(), "xml")
    +80
    +81        list_rt = [float(rt.text) for rt in soup.find_all("minutes")]
    +82        list_tic = [float(tic.text) for tic in soup.find_all("tic")]
    +83        list_scan = [int(scan.text) for scan in soup.find_all("count")]
    +84
    +85        dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic)))
    +86
    +87        return dict_scan_rt_tic
     
    @@ -586,31 +583,31 @@
    Returns
    -
     90    def import_mass_spectra(self) -> None:
    - 91        """
    - 92        Import the mass spectra from the scan.xml file.
    - 93        """
    - 94        dict_scan_rt_tic = self.get_scan_attr()
    - 95
    - 96        list_rt, list_tic = (
    +            
     89    def import_mass_spectra(self) -> None:
    + 90        """
    + 91        Import the mass spectra from the scan.xml file.
    + 92        """
    + 93        dict_scan_rt_tic = self.get_scan_attr()
    + 94
    + 95        list_rt, list_tic = (
    + 96            list(),
      97            list(),
    - 98            list(),
    - 99        )
    -100
    -101        list_scans = sorted(list(dict_scan_rt_tic.keys()))
    -102
    -103        for scan_number in list_scans:
    -104            mass_spec = self.get_mass_spectrum(scan_number)
    -105
    -106            self.lcms.add_mass_spectrum(mass_spec)
    -107
    -108            list_rt.append(dict_scan_rt_tic.get(scan_number)[0])
    -109
    -110            list_tic.append(dict_scan_rt_tic.get(scan_number)[1])
    -111
    -112        self.lcms.retention_time = list_rt
    -113        self.lcms.tic = list_tic
    -114        self.lcms.scans_number = list_scans
    + 98        )
    + 99
    +100        list_scans = sorted(list(dict_scan_rt_tic.keys()))
    +101
    +102        for scan_number in list_scans:
    +103            mass_spec = self.get_mass_spectrum(scan_number)
    +104
    +105            self.lcms.add_mass_spectrum(mass_spec)
    +106
    +107            list_rt.append(dict_scan_rt_tic.get(scan_number)[0])
    +108
    +109            list_tic.append(dict_scan_rt_tic.get(scan_number)[1])
    +110
    +111        self.lcms.retention_time = list_rt
    +112        self.lcms.tic = list_tic
    +113        self.lcms.scans_number = list_scans
     
    @@ -630,27 +627,27 @@
    Returns
    -
    116    def get_mass_spectrum(self, scan_number: int):
    -117        """
    -118        Get the mass spectrum for a given scan number.
    -119
    -120        Parameters
    -121        ----------
    -122        scan_number : int
    -123            Scan number.
    -124
    -125        """
    -126        bruker_reader = ReadBrukerSolarix(self.lcms.file_location)
    -127
    -128        bruker_transient = bruker_reader.get_transient(scan_number)
    -129
    -130        mass_spec = bruker_transient.get_mass_spectrum(
    -131            plot_result=False,
    -132            auto_process=self.auto_process,
    -133            keep_profile=self.keep_profile,
    -134        )
    -135
    -136        return mass_spec
    +            
    115    def get_mass_spectrum(self, scan_number: int):
    +116        """
    +117        Get the mass spectrum for a given scan number.
    +118
    +119        Parameters
    +120        ----------
    +121        scan_number : int
    +122            Scan number.
    +123
    +124        """
    +125        bruker_reader = ReadBrukerSolarix(self.lcms.file_location)
    +126
    +127        bruker_transient = bruker_reader.get_transient(scan_number)
    +128
    +129        mass_spec = bruker_transient.get_mass_spectrum(
    +130            plot_result=False,
    +131            auto_process=self.auto_process,
    +132            keep_profile=self.keep_profile,
    +133        )
    +134
    +135        return mass_spec
     
    @@ -677,11 +674,11 @@
    Parameters
    -
    138    def run(self):
    -139        """
    -140        Run the import_mass_spectra method.
    -141        """
    -142        self.import_mass_spectra()
    +            
    137    def run(self):
    +138        """
    +139        Run the import_mass_spectra method.
    +140        """
    +141        self.import_mass_spectra()
     
    @@ -701,19 +698,19 @@
    Parameters
    -
    144    def get_lcms_obj(self):
    -145        """
    -146        Get the LCMSBase object.
    -147
    -148        Raises
    -149        ------
    -150        Exception
    -151            If the LCMSBase object is empty.
    -152        """
    -153        if self.lcms:
    -154            return self.lcms
    -155        else:
    -156            raise Exception("Returning an empty LCMSBase class.")
    +            
    143    def get_lcms_obj(self):
    +144        """
    +145        Get the LCMSBase object.
    +146
    +147        Raises
    +148        ------
    +149        Exception
    +150            If the LCMSBase object is empty.
    +151        """
    +152        if self.lcms:
    +153            return self.lcms
    +154        else:
    +155            raise Exception("Returning an empty LCMSBase class.")
     
    diff --git a/docs/corems/mass_spectra/input/corems_hdf5.html b/docs/corems/mass_spectra/input/corems_hdf5.html index 4e1e82dd..6d441744 100644 --- a/docs/corems/mass_spectra/input/corems_hdf5.html +++ b/docs/corems/mass_spectra/input/corems_hdf5.html @@ -133,506 +133,514 @@

    20from corems.mass_spectra.input.rawFileReader import ImportMassSpectraThermoMSFileReader 21from corems.mass_spectra.input.mzml import MZMLSpectraParser 22 - 23class ReadCoreMSHDFMassSpectra( - 24 SpectraParserInterface, ReadCoreMSHDF_MassSpectrum, Thread - 25): - 26 """Class to read CoreMS HDF5 files and populate a LCMS or MassSpectraBase object. - 27 - 28 Parameters - 29 ---------- - 30 file_location : str - 31 The location of the HDF5 file to read, including the suffix. - 32 - 33 Attributes - 34 ---------- - 35 file_location : str - 36 The location of the HDF5 file to read. - 37 h5pydata : h5py.File - 38 The HDF5 file object. - 39 scans : list - 40 A list of the location of individual mass spectra within the HDF5 file. - 41 scan_number_list : list - 42 A list of the scan numbers of the mass spectra within the HDF5 file. - 43 parameters_location : str - 44 The location of the parameters file (json or toml). - 45 - 46 Methods - 47 ------- - 48 * import_mass_spectra(mass_spectra). - 49 Imports all mass spectra from the HDF5 file onto the LCMS or MassSpectraBase object. - 50 * get_mass_spectrum_from_scan(scan_number). - 51 Return mass spectrum data object from scan number. - 52 * load(). - 53 Placeholder method to meet the requirements of the SpectraParserInterface. - 54 * run(mass_spectra). - 55 Runs the importer functions to populate a LCMS or MassSpectraBase object. - 56 * import_scan_info(mass_spectra). - 57 Imports the scan info from the HDF5 file to populate the _scan_info attribute - 58 on the LCMS or MassSpectraBase object - 59 * import_ms_unprocessed(mass_spectra). - 60 Imports the unprocessed mass spectra from the HDF5 file to populate the - 61 _ms_unprocessed attribute on the LCMS or MassSpectraBase object - 62 * import_parameters(mass_spectra). - 63 Imports the parameters from the HDF5 file to populate the parameters - 64 attribute on the LCMS or MassSpectraBase object - 65 * import_mass_features(mass_spectra). - 66 Imports the mass features from the HDF5 file to populate the mass_features - 67 attribute on the LCMS or MassSpectraBase object - 68 * import_eics(mass_spectra). - 69 Imports the extracted ion chromatograms from the HDF5 file to populate the - 70 eics attribute on the LCMS or MassSpectraBase object - 71 * import_spectral_search_results(mass_spectra). - 72 Imports the spectral search results from the HDF5 file to populate the - 73 spectral_search_results attribute on the LCMS or MassSpectraBase object - 74 * get_mass_spectra_obj(). - 75 Return mass spectra data object, populating the _ms list on the LCMS or - 76 MassSpectraBase object from the HDF5 file - 77 * get_lcms_obj(). - 78 Return LCMSBase object, populating the majority of the attributes on the - 79 LCMS object from the HDF5 file - 80 - 81 """ - 82 - 83 def __init__(self, file_location: str): - 84 Thread.__init__(self) - 85 ReadCoreMSHDF_MassSpectrum.__init__(self, file_location) - 86 - 87 # override the scans attribute on ReadCoreMSHDF_MassSpectrum class to expect a nested location within the HDF5 file - 88 self.scans = [ - 89 "mass_spectra/" + x for x in list(self.h5pydata["mass_spectra"].keys()) - 90 ] - 91 self.scan_number_list = sorted( - 92 [int(float(i)) for i in list(self.h5pydata["mass_spectra"].keys())] - 93 ) - 94 - 95 # set the location of the parameters file (json or toml) - 96 add_files = [ - 97 x - 98 for x in self.file_location.parent.glob( - 99 self.file_location.name.replace(".hdf5", ".*") -100 ) -101 if x.suffix != ".hdf5" -102 ] -103 if len([x for x in add_files if x.suffix == ".json"]) > 0: -104 self.parameters_location = [x for x in add_files if x.suffix == ".json"][0] -105 elif len([x for x in add_files if x.suffix == ".toml"]) > 0: -106 self.parameters_location = [x for x in add_files if x.suffix == ".toml"][0] -107 else: -108 self.parameters_location = None -109 -110 def get_mass_spectrum_from_scan(self, scan_number): -111 """Return mass spectrum data object from scan number.""" -112 if scan_number in self.scan_number_list: -113 mass_spec = self.get_mass_spectrum(scan_number) -114 return mass_spec -115 else: -116 raise Exception("Scan number not found in HDF5 file.") -117 -118 def load(self) -> None: -119 """ """ -120 pass -121 -122 def get_ms_raw(self, spectra = None, scan_df = None) -> dict: -123 """ """ -124 # Warn if spectra or scan_df are not None that they are not used for CoreMS HDF5 files and should be rerun after instantiation -125 if spectra is not None or scan_df is not None: -126 SyntaxWarning("get_ms_raw method for CoreMS HDF5 files can only access saved data, consider rerunning after instantiation.") -127 ms_unprocessed = {} -128 dict_group_load = self.h5pydata["ms_unprocessed"] -129 dict_group_keys = dict_group_load.keys() -130 for k in dict_group_keys: -131 ms_up_int = dict_group_load[k][:] -132 ms_unprocessed[int(k)] = pd.DataFrame( -133 ms_up_int, columns=["scan", "mz", "intensity"] -134 ) -135 return ms_unprocessed -136 -137 def get_scan_df(self) -> pd.DataFrame: -138 scan_info = {} -139 dict_group_load = self.h5pydata["scan_info"] -140 dict_group_keys = dict_group_load.keys() -141 for k in dict_group_keys: -142 scan_info[k] = dict_group_load[k][:] -143 scan_df = pd.DataFrame(scan_info) -144 scan_df.set_index("scan", inplace=True, drop=False) -145 str_df = scan_df.select_dtypes([object]) -146 str_df = str_df.stack().str.decode("utf-8").unstack() -147 for col in str_df: -148 scan_df[col] = str_df[col] -149 return scan_df -150 -151 def run(self, mass_spectra, load_raw=True) -> None: -152 """Runs the importer functions to populate a LCMS or MassSpectraBase object. + 23 + 24class ReadCoreMSHDFMassSpectra( + 25 SpectraParserInterface, ReadCoreMSHDF_MassSpectrum, Thread + 26): + 27 """Class to read CoreMS HDF5 files and populate a LCMS or MassSpectraBase object. + 28 + 29 Parameters + 30 ---------- + 31 file_location : str + 32 The location of the HDF5 file to read, including the suffix. + 33 + 34 Attributes + 35 ---------- + 36 file_location : str + 37 The location of the HDF5 file to read. + 38 h5pydata : h5py.File + 39 The HDF5 file object. + 40 scans : list + 41 A list of the location of individual mass spectra within the HDF5 file. + 42 scan_number_list : list + 43 A list of the scan numbers of the mass spectra within the HDF5 file. + 44 parameters_location : str + 45 The location of the parameters file (json or toml). + 46 + 47 Methods + 48 ------- + 49 * import_mass_spectra(mass_spectra). + 50 Imports all mass spectra from the HDF5 file onto the LCMS or MassSpectraBase object. + 51 * get_mass_spectrum_from_scan(scan_number). + 52 Return mass spectrum data object from scan number. + 53 * load(). + 54 Placeholder method to meet the requirements of the SpectraParserInterface. + 55 * run(mass_spectra). + 56 Runs the importer functions to populate a LCMS or MassSpectraBase object. + 57 * import_scan_info(mass_spectra). + 58 Imports the scan info from the HDF5 file to populate the _scan_info attribute + 59 on the LCMS or MassSpectraBase object + 60 * import_ms_unprocessed(mass_spectra). + 61 Imports the unprocessed mass spectra from the HDF5 file to populate the + 62 _ms_unprocessed attribute on the LCMS or MassSpectraBase object + 63 * import_parameters(mass_spectra). + 64 Imports the parameters from the HDF5 file to populate the parameters + 65 attribute on the LCMS or MassSpectraBase object + 66 * import_mass_features(mass_spectra). + 67 Imports the mass features from the HDF5 file to populate the mass_features + 68 attribute on the LCMS or MassSpectraBase object + 69 * import_eics(mass_spectra). + 70 Imports the extracted ion chromatograms from the HDF5 file to populate the + 71 eics attribute on the LCMS or MassSpectraBase object + 72 * import_spectral_search_results(mass_spectra). + 73 Imports the spectral search results from the HDF5 file to populate the + 74 spectral_search_results attribute on the LCMS or MassSpectraBase object + 75 * get_mass_spectra_obj(). + 76 Return mass spectra data object, populating the _ms list on the LCMS or + 77 MassSpectraBase object from the HDF5 file + 78 * get_lcms_obj(). + 79 Return LCMSBase object, populating the majority of the attributes on the + 80 LCMS object from the HDF5 file + 81 + 82 """ + 83 + 84 def __init__(self, file_location: str): + 85 Thread.__init__(self) + 86 ReadCoreMSHDF_MassSpectrum.__init__(self, file_location) + 87 + 88 # override the scans attribute on ReadCoreMSHDF_MassSpectrum class to expect a nested location within the HDF5 file + 89 self.scans = [ + 90 "mass_spectra/" + x for x in list(self.h5pydata["mass_spectra"].keys()) + 91 ] + 92 self.scan_number_list = sorted( + 93 [int(float(i)) for i in list(self.h5pydata["mass_spectra"].keys())] + 94 ) + 95 + 96 # set the location of the parameters file (json or toml) + 97 add_files = [ + 98 x + 99 for x in self.file_location.parent.glob( +100 self.file_location.name.replace(".hdf5", ".*") +101 ) +102 if x.suffix != ".hdf5" +103 ] +104 if len([x for x in add_files if x.suffix == ".json"]) > 0: +105 self.parameters_location = [x for x in add_files if x.suffix == ".json"][0] +106 elif len([x for x in add_files if x.suffix == ".toml"]) > 0: +107 self.parameters_location = [x for x in add_files if x.suffix == ".toml"][0] +108 else: +109 self.parameters_location = None +110 +111 def get_mass_spectrum_from_scan(self, scan_number): +112 """Return mass spectrum data object from scan number.""" +113 if scan_number in self.scan_number_list: +114 mass_spec = self.get_mass_spectrum(scan_number) +115 return mass_spec +116 else: +117 raise Exception("Scan number not found in HDF5 file.") +118 +119 def load(self) -> None: +120 """ """ +121 pass +122 +123 def get_ms_raw(self, spectra=None, scan_df=None) -> dict: +124 """ """ +125 # Warn if spectra or scan_df are not None that they are not used for CoreMS HDF5 files and should be rerun after instantiation +126 if spectra is not None or scan_df is not None: +127 SyntaxWarning( +128 "get_ms_raw method for CoreMS HDF5 files can only access saved data, consider rerunning after instantiation." +129 ) +130 ms_unprocessed = {} +131 dict_group_load = self.h5pydata["ms_unprocessed"] +132 dict_group_keys = dict_group_load.keys() +133 for k in dict_group_keys: +134 ms_up_int = dict_group_load[k][:] +135 ms_unprocessed[int(k)] = pd.DataFrame( +136 ms_up_int, columns=["scan", "mz", "intensity"] +137 ) +138 return ms_unprocessed +139 +140 def get_scan_df(self) -> pd.DataFrame: +141 scan_info = {} +142 dict_group_load = self.h5pydata["scan_info"] +143 dict_group_keys = dict_group_load.keys() +144 for k in dict_group_keys: +145 scan_info[k] = dict_group_load[k][:] +146 scan_df = pd.DataFrame(scan_info) +147 scan_df.set_index("scan", inplace=True, drop=False) +148 str_df = scan_df.select_dtypes([object]) +149 str_df = str_df.stack().str.decode("utf-8").unstack() +150 for col in str_df: +151 scan_df[col] = str_df[col] +152 return scan_df 153 -154 Notes -155 ----- -156 The following functions are run in order, if the HDF5 file contains the necessary data: -157 1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object. -158 2. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object. -159 3. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object. -160 4. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object. -161 5. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object. -162 6. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object. -163 7. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object. -164 -165 Parameters -166 ---------- -167 mass_spectra : LCMSBase or MassSpectraBase -168 The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes. -169 load_raw : bool -170 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. -171 Returns -172 ------- -173 None, but populates several attributes on the LCMS or MassSpectraBase object. -174 -175 """ -176 if self.parameters_location is not None: -177 # Populate the parameters attribute on the LCMS object -178 self.import_parameters(mass_spectra) -179 -180 if "mass_spectra" in self.h5pydata: -181 # Populate the _ms list on the LCMS object -182 self.import_mass_spectra(mass_spectra, load_raw=load_raw) -183 -184 if "scan_info" in self.h5pydata: -185 # Populate the _scan_info attribute on the LCMS object -186 self.import_scan_info(mass_spectra) -187 -188 if "ms_unprocessed" in self.h5pydata and load_raw: -189 # Populate the _ms_unprocessed attribute on the LCMS object -190 self.import_ms_unprocessed(mass_spectra) -191 -192 if "mass_features" in self.h5pydata: -193 # Populate the mass_features attribute on the LCMS object -194 self.import_mass_features(mass_spectra) -195 -196 if "eics" in self.h5pydata: -197 # Populate the eics attribute on the LCMS object -198 self.import_eics(mass_spectra) -199 -200 if "spectral_search_results" in self.h5pydata: -201 # Populate the spectral_search_results attribute on the LCMS object -202 self.import_spectral_search_results(mass_spectra) -203 -204 def import_mass_spectra(self, mass_spectra, load_raw=True) -> None: -205 """Imports all mass spectra from the HDF5 file. +154 def run(self, mass_spectra, load_raw=True) -> None: +155 """Runs the importer functions to populate a LCMS or MassSpectraBase object. +156 +157 Notes +158 ----- +159 The following functions are run in order, if the HDF5 file contains the necessary data: +160 1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object. +161 2. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object. +162 3. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object. +163 4. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object. +164 5. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object. +165 6. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object. +166 7. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object. +167 +168 Parameters +169 ---------- +170 mass_spectra : LCMSBase or MassSpectraBase +171 The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes. +172 load_raw : bool +173 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. +174 Returns +175 ------- +176 None, but populates several attributes on the LCMS or MassSpectraBase object. +177 +178 """ +179 if self.parameters_location is not None: +180 # Populate the parameters attribute on the LCMS object +181 self.import_parameters(mass_spectra) +182 +183 if "mass_spectra" in self.h5pydata: +184 # Populate the _ms list on the LCMS object +185 self.import_mass_spectra(mass_spectra, load_raw=load_raw) +186 +187 if "scan_info" in self.h5pydata: +188 # Populate the _scan_info attribute on the LCMS object +189 self.import_scan_info(mass_spectra) +190 +191 if "ms_unprocessed" in self.h5pydata and load_raw: +192 # Populate the _ms_unprocessed attribute on the LCMS object +193 self.import_ms_unprocessed(mass_spectra) +194 +195 if "mass_features" in self.h5pydata: +196 # Populate the mass_features attribute on the LCMS object +197 self.import_mass_features(mass_spectra) +198 +199 if "eics" in self.h5pydata: +200 # Populate the eics attribute on the LCMS object +201 self.import_eics(mass_spectra) +202 +203 if "spectral_search_results" in self.h5pydata: +204 # Populate the spectral_search_results attribute on the LCMS object +205 self.import_spectral_search_results(mass_spectra) 206 -207 Parameters -208 ---------- -209 mass_spectra : LCMSBase | MassSpectraBase -210 The MassSpectraBase or LCMSBase object to populate with mass spectra. -211 load_raw : bool -212 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default -213 -214 Returns -215 ------- -216 None, but populates the '_ms' list on the LCMSBase or MassSpectraBase -217 object with mass spectra from the HDF5 file. -218 """ -219 for scan_number in self.scan_number_list: -220 mass_spec = self.get_mass_spectrum(scan_number, load_raw=load_raw) -221 mass_spec.scan_number = scan_number -222 mass_spectra.add_mass_spectrum(mass_spec) -223 -224 def import_scan_info(self, mass_spectra) -> None: -225 """Imports the scan info from the HDF5 file. +207 def import_mass_spectra(self, mass_spectra, load_raw=True) -> None: +208 """Imports all mass spectra from the HDF5 file. +209 +210 Parameters +211 ---------- +212 mass_spectra : LCMSBase | MassSpectraBase +213 The MassSpectraBase or LCMSBase object to populate with mass spectra. +214 load_raw : bool +215 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default +216 +217 Returns +218 ------- +219 None, but populates the '_ms' list on the LCMSBase or MassSpectraBase +220 object with mass spectra from the HDF5 file. +221 """ +222 for scan_number in self.scan_number_list: +223 mass_spec = self.get_mass_spectrum(scan_number, load_raw=load_raw) +224 mass_spec.scan_number = scan_number +225 mass_spectra.add_mass_spectrum(mass_spec) 226 -227 Parameters -228 ---------- -229 lcms : LCMSBase | MassSpectraBase -230 The MassSpectraBase or LCMSBase objects -231 -232 Returns -233 ------- -234 None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase -235 object with a pandas DataFrame of the 'scan_info' from the HDF5 file. -236 -237 """ -238 scan_df = self.get_scan_df() -239 mass_spectra.scan_df = scan_df -240 -241 def import_ms_unprocessed(self, mass_spectra) -> None: -242 """Imports the unprocessed mass spectra from the HDF5 file. +227 def import_scan_info(self, mass_spectra) -> None: +228 """Imports the scan info from the HDF5 file. +229 +230 Parameters +231 ---------- +232 lcms : LCMSBase | MassSpectraBase +233 The MassSpectraBase or LCMSBase objects +234 +235 Returns +236 ------- +237 None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase +238 object with a pandas DataFrame of the 'scan_info' from the HDF5 file. +239 +240 """ +241 scan_df = self.get_scan_df() +242 mass_spectra.scan_df = scan_df 243 -244 Parameters -245 ---------- -246 lcms : LCMSBase | MassSpectraBase -247 The MassSpectraBase or LCMSBase objects -248 -249 Returns -250 ------- -251 None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase -252 object with a dictionary of the 'ms_unprocessed' from the HDF5 file. -253 -254 """ -255 ms_unprocessed = self.get_ms_raw() -256 mass_spectra._ms_unprocessed = ms_unprocessed -257 -258 def import_parameters(self, mass_spectra) -> None: -259 """Imports the parameters from the HDF5 file. +244 def import_ms_unprocessed(self, mass_spectra) -> None: +245 """Imports the unprocessed mass spectra from the HDF5 file. +246 +247 Parameters +248 ---------- +249 lcms : LCMSBase | MassSpectraBase +250 The MassSpectraBase or LCMSBase objects +251 +252 Returns +253 ------- +254 None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase +255 object with a dictionary of the 'ms_unprocessed' from the HDF5 file. +256 +257 """ +258 ms_unprocessed = self.get_ms_raw() +259 mass_spectra._ms_unprocessed = ms_unprocessed 260 -261 Parameters -262 ---------- -263 mass_spectra : LCMSBase | MassSpectraBase -264 The MassSpectraBase or LCMSBase object to populate with parameters. -265 -266 Returns -267 ------- -268 None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase -269 object with a dictionary of the 'parameters' from the HDF5 file. -270 -271 """ -272 if ".json" == self.parameters_location.suffix: -273 load_and_set_json_parameters_lcms(mass_spectra, self.parameters_location) -274 if ".toml" == self.parameters_location.suffix: -275 load_and_set_toml_parameters_lcms(mass_spectra, self.parameters_location) -276 else: -277 raise Exception( -278 "Parameters file must be in JSON format, TOML format is not yet supported." -279 ) -280 -281 def import_mass_features(self, mass_spectra) -> None: -282 """Imports the mass features from the HDF5 file. +261 def import_parameters(self, mass_spectra) -> None: +262 """Imports the parameters from the HDF5 file. +263 +264 Parameters +265 ---------- +266 mass_spectra : LCMSBase | MassSpectraBase +267 The MassSpectraBase or LCMSBase object to populate with parameters. +268 +269 Returns +270 ------- +271 None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase +272 object with a dictionary of the 'parameters' from the HDF5 file. +273 +274 """ +275 if ".json" == self.parameters_location.suffix: +276 load_and_set_json_parameters_lcms(mass_spectra, self.parameters_location) +277 if ".toml" == self.parameters_location.suffix: +278 load_and_set_toml_parameters_lcms(mass_spectra, self.parameters_location) +279 else: +280 raise Exception( +281 "Parameters file must be in JSON format, TOML format is not yet supported." +282 ) 283 -284 Parameters -285 ---------- -286 mass_spectra : LCMSBase | MassSpectraBase -287 The MassSpectraBase or LCMSBase object to populate with mass features. -288 -289 Returns -290 ------- -291 None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase -292 object with a dictionary of the 'mass_features' from the HDF5 file. -293 -294 """ -295 dict_group_load = self.h5pydata["mass_features"] -296 dict_group_keys = dict_group_load.keys() -297 for k in dict_group_keys: -298 # Instantiate the MassFeature object -299 mass_feature = LCMSMassFeature( -300 mass_spectra, -301 mz=dict_group_load[k].attrs["_mz_exp"], -302 retention_time=dict_group_load[k].attrs["_retention_time"], -303 intensity=dict_group_load[k].attrs["_intensity"], -304 apex_scan=dict_group_load[k].attrs["_apex_scan"], -305 persistence=dict_group_load[k].attrs["_persistence"], -306 id=int(k), -307 ) -308 -309 # Populate additional attributes on the MassFeature object -310 for key in dict_group_load[k].attrs.keys() - { -311 "_mz_exp", -312 "_mz_cal", -313 "_retention_time", -314 "_intensity", -315 "_apex_scan", -316 "_persistence", -317 }: -318 setattr(mass_feature, key, dict_group_load[k].attrs[key]) -319 -320 # Populate attributes on MassFeature object that are lists -321 for key in dict_group_load[k].keys(): -322 setattr(mass_feature, key, dict_group_load[k][key][:]) -323 -324 mass_spectra.mass_features[int(k)] = mass_feature -325 -326 # Associate mass features with ms1 and ms2 spectra, if available -327 for mf_id in mass_spectra.mass_features.keys(): -328 if mass_spectra.mass_features[mf_id].apex_scan in mass_spectra._ms.keys(): -329 mass_spectra.mass_features[mf_id].mass_spectrum = mass_spectra._ms[ -330 mass_spectra.mass_features[mf_id].apex_scan -331 ] -332 if mass_spectra.mass_features[mf_id].ms2_scan_numbers is not None: -333 for ms2_scan in mass_spectra.mass_features[mf_id].ms2_scan_numbers: -334 if ms2_scan in mass_spectra._ms.keys(): -335 mass_spectra.mass_features[mf_id].ms2_mass_spectra[ms2_scan] = ( -336 mass_spectra._ms[ms2_scan] -337 ) -338 -339 def import_eics(self, mass_spectra): -340 """Imports the extracted ion chromatograms from the HDF5 file. +284 def import_mass_features(self, mass_spectra) -> None: +285 """Imports the mass features from the HDF5 file. +286 +287 Parameters +288 ---------- +289 mass_spectra : LCMSBase | MassSpectraBase +290 The MassSpectraBase or LCMSBase object to populate with mass features. +291 +292 Returns +293 ------- +294 None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase +295 object with a dictionary of the 'mass_features' from the HDF5 file. +296 +297 """ +298 dict_group_load = self.h5pydata["mass_features"] +299 dict_group_keys = dict_group_load.keys() +300 for k in dict_group_keys: +301 # Instantiate the MassFeature object +302 mass_feature = LCMSMassFeature( +303 mass_spectra, +304 mz=dict_group_load[k].attrs["_mz_exp"], +305 retention_time=dict_group_load[k].attrs["_retention_time"], +306 intensity=dict_group_load[k].attrs["_intensity"], +307 apex_scan=dict_group_load[k].attrs["_apex_scan"], +308 persistence=dict_group_load[k].attrs["_persistence"], +309 id=int(k), +310 ) +311 +312 # Populate additional attributes on the MassFeature object +313 for key in dict_group_load[k].attrs.keys() - { +314 "_mz_exp", +315 "_mz_cal", +316 "_retention_time", +317 "_intensity", +318 "_apex_scan", +319 "_persistence", +320 }: +321 setattr(mass_feature, key, dict_group_load[k].attrs[key]) +322 +323 # Populate attributes on MassFeature object that are lists +324 for key in dict_group_load[k].keys(): +325 setattr(mass_feature, key, dict_group_load[k][key][:]) +326 +327 mass_spectra.mass_features[int(k)] = mass_feature +328 +329 # Associate mass features with ms1 and ms2 spectra, if available +330 for mf_id in mass_spectra.mass_features.keys(): +331 if mass_spectra.mass_features[mf_id].apex_scan in mass_spectra._ms.keys(): +332 mass_spectra.mass_features[mf_id].mass_spectrum = mass_spectra._ms[ +333 mass_spectra.mass_features[mf_id].apex_scan +334 ] +335 if mass_spectra.mass_features[mf_id].ms2_scan_numbers is not None: +336 for ms2_scan in mass_spectra.mass_features[mf_id].ms2_scan_numbers: +337 if ms2_scan in mass_spectra._ms.keys(): +338 mass_spectra.mass_features[mf_id].ms2_mass_spectra[ms2_scan] = ( +339 mass_spectra._ms[ms2_scan] +340 ) 341 -342 Parameters -343 ---------- -344 mass_spectra : LCMSBase | MassSpectraBase -345 The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms. -346 -347 Returns -348 ------- -349 None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase -350 object with a dictionary of the 'eics' from the HDF5 file. -351 -352 """ -353 dict_group_load = self.h5pydata["eics"] -354 dict_group_keys = dict_group_load.keys() -355 for k in dict_group_keys: -356 my_eic = EIC_Data( -357 scans=dict_group_load[k]["scans"][:], -358 time=dict_group_load[k]["time"][:], -359 eic=dict_group_load[k]["eic"][:], -360 ) -361 for key in dict_group_load[k].keys(): -362 if key not in ["scans", "time", "eic"]: -363 setattr(my_eic, key, dict_group_load[k][key][:]) -364 # if key is apexes, convert to a tuple of a list -365 if key == "apexes" and len(my_eic.apexes) > 0: -366 my_eic.apexes = [tuple(x) for x in my_eic.apexes] -367 # Add to mass_spectra object -368 mass_spectra.eics[dict_group_load[k].attrs["mz"]] = my_eic -369 -370 # Add to mass features -371 for idx in mass_spectra.mass_features.keys(): -372 mz = mass_spectra.mass_features[idx].mz -373 if mz in mass_spectra.eics.keys(): -374 mass_spectra.mass_features[idx]._eic_data = mass_spectra.eics[mz] -375 -376 def import_spectral_search_results(self, mass_spectra): -377 """Imports the spectral search results from the HDF5 file. +342 def import_eics(self, mass_spectra): +343 """Imports the extracted ion chromatograms from the HDF5 file. +344 +345 Parameters +346 ---------- +347 mass_spectra : LCMSBase | MassSpectraBase +348 The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms. +349 +350 Returns +351 ------- +352 None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase +353 object with a dictionary of the 'eics' from the HDF5 file. +354 +355 """ +356 dict_group_load = self.h5pydata["eics"] +357 dict_group_keys = dict_group_load.keys() +358 for k in dict_group_keys: +359 my_eic = EIC_Data( +360 scans=dict_group_load[k]["scans"][:], +361 time=dict_group_load[k]["time"][:], +362 eic=dict_group_load[k]["eic"][:], +363 ) +364 for key in dict_group_load[k].keys(): +365 if key not in ["scans", "time", "eic"]: +366 setattr(my_eic, key, dict_group_load[k][key][:]) +367 # if key is apexes, convert to a tuple of a list +368 if key == "apexes" and len(my_eic.apexes) > 0: +369 my_eic.apexes = [tuple(x) for x in my_eic.apexes] +370 # Add to mass_spectra object +371 mass_spectra.eics[dict_group_load[k].attrs["mz"]] = my_eic +372 +373 # Add to mass features +374 for idx in mass_spectra.mass_features.keys(): +375 mz = mass_spectra.mass_features[idx].mz +376 if mz in mass_spectra.eics.keys(): +377 mass_spectra.mass_features[idx]._eic_data = mass_spectra.eics[mz] 378 -379 Parameters -380 ---------- -381 mass_spectra : LCMSBase | MassSpectraBase -382 The MassSpectraBase or LCMSBase object to populate with spectral search results. -383 -384 Returns -385 ------- -386 None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase -387 object with a dictionary of the 'spectral_search_results' from the HDF5 file. -388 -389 """ -390 overall_results_dict = {} -391 ms2_results_load = self.h5pydata["spectral_search_results"] -392 for k in ms2_results_load.keys(): -393 overall_results_dict[int(k)] = {} -394 for k2 in ms2_results_load[k].keys(): -395 ms2_search_res = SpectrumSearchResults( -396 query_spectrum=mass_spectra._ms[int(k)], -397 precursor_mz=ms2_results_load[k][k2].attrs["precursor_mz"], -398 spectral_similarity_search_results={}, -399 ) -400 -401 for key in ms2_results_load[k][k2].keys() - {"precursor_mz"}: -402 setattr(ms2_search_res, key, list(ms2_results_load[k][k2][key][:])) -403 overall_results_dict[int(k)][ -404 ms2_results_load[k][k2].attrs["precursor_mz"] -405 ] = ms2_search_res -406 -407 # add to mass_spectra -408 mass_spectra.spectral_search_results.update(overall_results_dict) +379 def import_spectral_search_results(self, mass_spectra): +380 """Imports the spectral search results from the HDF5 file. +381 +382 Parameters +383 ---------- +384 mass_spectra : LCMSBase | MassSpectraBase +385 The MassSpectraBase or LCMSBase object to populate with spectral search results. +386 +387 Returns +388 ------- +389 None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase +390 object with a dictionary of the 'spectral_search_results' from the HDF5 file. +391 +392 """ +393 overall_results_dict = {} +394 ms2_results_load = self.h5pydata["spectral_search_results"] +395 for k in ms2_results_load.keys(): +396 overall_results_dict[int(k)] = {} +397 for k2 in ms2_results_load[k].keys(): +398 ms2_search_res = SpectrumSearchResults( +399 query_spectrum=mass_spectra._ms[int(k)], +400 precursor_mz=ms2_results_load[k][k2].attrs["precursor_mz"], +401 spectral_similarity_search_results={}, +402 ) +403 +404 for key in ms2_results_load[k][k2].keys() - {"precursor_mz"}: +405 setattr(ms2_search_res, key, list(ms2_results_load[k][k2][key][:])) +406 overall_results_dict[int(k)][ +407 ms2_results_load[k][k2].attrs["precursor_mz"] +408 ] = ms2_search_res 409 -410 # If there are mass features, associate the results with each mass feature -411 if len(mass_spectra.mass_features) > 0: -412 for mass_feature_id, mass_feature in mass_spectra.mass_features.items(): -413 scan_ids = mass_feature.ms2_scan_numbers -414 for ms2_scan_id in scan_ids: -415 precursor_mz = mass_feature.mz -416 try: -417 mass_spectra.spectral_search_results[ms2_scan_id][precursor_mz] -418 except KeyError: -419 pass -420 else: -421 mass_spectra.mass_features[ -422 mass_feature_id -423 ].ms2_similarity_results.append( -424 mass_spectra.spectral_search_results[ms2_scan_id][ -425 precursor_mz -426 ] -427 ) -428 -429 def get_mass_spectra_obj(self, load_raw=True) -> MassSpectraBase: -430 """ -431 Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file. -432 -433 Parameters -434 ---------- -435 load_raw : bool -436 If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True. -437 -438 """ -439 # Instantiate the LCMS object -440 spectra_obj = MassSpectraBase( -441 file_location=self.file_location, -442 analyzer=self.analyzer, -443 instrument_label=self.instrument_label, -444 sample_name=self.sample_name, -445 ) -446 -447 # This will populate the _ms list on the LCMS or MassSpectraBase object -448 self.run(spectra_obj, load_raw=load_raw) +410 # add to mass_spectra +411 mass_spectra.spectral_search_results.update(overall_results_dict) +412 +413 # If there are mass features, associate the results with each mass feature +414 if len(mass_spectra.mass_features) > 0: +415 for mass_feature_id, mass_feature in mass_spectra.mass_features.items(): +416 scan_ids = mass_feature.ms2_scan_numbers +417 for ms2_scan_id in scan_ids: +418 precursor_mz = mass_feature.mz +419 try: +420 mass_spectra.spectral_search_results[ms2_scan_id][precursor_mz] +421 except KeyError: +422 pass +423 else: +424 mass_spectra.mass_features[ +425 mass_feature_id +426 ].ms2_similarity_results.append( +427 mass_spectra.spectral_search_results[ms2_scan_id][ +428 precursor_mz +429 ] +430 ) +431 +432 def get_mass_spectra_obj(self, load_raw=True) -> MassSpectraBase: +433 """ +434 Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file. +435 +436 Parameters +437 ---------- +438 load_raw : bool +439 If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True. +440 +441 """ +442 # Instantiate the LCMS object +443 spectra_obj = MassSpectraBase( +444 file_location=self.file_location, +445 analyzer=self.analyzer, +446 instrument_label=self.instrument_label, +447 sample_name=self.sample_name, +448 ) 449 -450 return spectra_obj -451 -452 def get_lcms_obj(self, load_raw=True, use_original_parser=True, raw_file_path=None) -> LCMSBase: -453 """ -454 Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file. -455 -456 Parameters -457 ---------- -458 load_raw : bool -459 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. -460 use_original_parser : bool -461 If True, use the original parser to populate the LCMS object. Default is True. -462 raw_file_path : str -463 The location of the raw file to parse if attempting to use original parser. -464 Default is None, which attempts to get the raw file path from the HDF5 file. -465 If the original file path has moved, this parameter can be used to specify the new location. -466 """ -467 # Instantiate the LCMS object -468 lcms_obj = LCMSBase( -469 file_location=self.file_location, -470 analyzer=self.analyzer, -471 instrument_label=self.instrument_label, -472 sample_name=self.sample_name, -473 ) -474 -475 # This will populate the majority of the attributes on the LCMS object -476 self.run(lcms_obj, load_raw=load_raw) -477 -478 # Set final attributes of the LCMS object -479 lcms_obj.polarity = self.h5pydata.attrs["polarity"] -480 lcms_obj._scans_number_list = list(lcms_obj.scan_df.scan) -481 lcms_obj._retention_time_list = list(lcms_obj.scan_df.scan_time) -482 lcms_obj._tic_list = list(lcms_obj.scan_df.tic) -483 -484 # If use_original_parser is True, instantiate the original parser and populate the LCMS object -485 if use_original_parser: -486 lcms_obj = self.add_original_parser(lcms_obj, raw_file_path=raw_file_path) -487 -488 return lcms_obj -489 -490 def add_original_parser(self, mass_spectra, raw_file_path=None): -491 """ -492 Add the original parser to the mass spectra object. -493 -494 Parameters -495 ---------- -496 mass_spectra : MassSpectraBase | LCMSBase -497 The MassSpectraBase or LCMSBase object to add the original parser to. -498 raw_file_path : str -499 The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file. -500 """ -501 # Try to get the raw file path from the HDF5 file -502 if raw_file_path is None: -503 raw_file_path = self.h5pydata.attrs["original_file_location"] -504 #Check if og_file_location exists, if not raise an error -505 raw_file_path = self.h5pydata.attrs["original_file_location"] -506 -507 raw_file_path = Path(raw_file_path) -508 if not raw_file_path.exists(): -509 raise FileExistsError("File does not exist: " + str(raw_file_path), ". Cannot use original parser for instatiating the lcms_obj.") -510 -511 # Get the original parser type -512 og_parser_type = self.h5pydata.attrs["parser_type"] -513 -514 if og_parser_type == "ImportMassSpectraThermoMSFileReader": -515 parser = ImportMassSpectraThermoMSFileReader(raw_file_path) -516 elif og_parser_type == "MZMLSpectraParser": -517 parser = MZMLSpectraParser(raw_file_path) +450 # This will populate the _ms list on the LCMS or MassSpectraBase object +451 self.run(spectra_obj, load_raw=load_raw) +452 +453 return spectra_obj +454 +455 def get_lcms_obj( +456 self, load_raw=True, use_original_parser=True, raw_file_path=None +457 ) -> LCMSBase: +458 """ +459 Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file. +460 +461 Parameters +462 ---------- +463 load_raw : bool +464 If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True. +465 use_original_parser : bool +466 If True, use the original parser to populate the LCMS object. Default is True. +467 raw_file_path : str +468 The location of the raw file to parse if attempting to use original parser. +469 Default is None, which attempts to get the raw file path from the HDF5 file. +470 If the original file path has moved, this parameter can be used to specify the new location. +471 """ +472 # Instantiate the LCMS object +473 lcms_obj = LCMSBase( +474 file_location=self.file_location, +475 analyzer=self.analyzer, +476 instrument_label=self.instrument_label, +477 sample_name=self.sample_name, +478 ) +479 +480 # This will populate the majority of the attributes on the LCMS object +481 self.run(lcms_obj, load_raw=load_raw) +482 +483 # Set final attributes of the LCMS object +484 lcms_obj.polarity = self.h5pydata.attrs["polarity"] +485 lcms_obj._scans_number_list = list(lcms_obj.scan_df.scan) +486 lcms_obj._retention_time_list = list(lcms_obj.scan_df.scan_time) +487 lcms_obj._tic_list = list(lcms_obj.scan_df.tic) +488 +489 # If use_original_parser is True, instantiate the original parser and populate the LCMS object +490 if use_original_parser: +491 lcms_obj = self.add_original_parser(lcms_obj, raw_file_path=raw_file_path) +492 +493 return lcms_obj +494 +495 def add_original_parser(self, mass_spectra, raw_file_path=None): +496 """ +497 Add the original parser to the mass spectra object. +498 +499 Parameters +500 ---------- +501 mass_spectra : MassSpectraBase | LCMSBase +502 The MassSpectraBase or LCMSBase object to add the original parser to. +503 raw_file_path : str +504 The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file. +505 """ +506 # Try to get the raw file path from the HDF5 file +507 if raw_file_path is None: +508 raw_file_path = self.h5pydata.attrs["original_file_location"] +509 # Check if og_file_location exists, if not raise an error +510 raw_file_path = self.h5pydata.attrs["original_file_location"] +511 +512 raw_file_path = Path(raw_file_path) +513 if not raw_file_path.exists(): +514 raise FileExistsError( +515 "File does not exist: " + str(raw_file_path), +516 ". Cannot use original parser for instatiating the lcms_obj.", +517 ) 518 -519 mass_spectra.spectra_parser_class = parser.__class__ -520 mass_spectra.spectra_parser = parser +519 # Get the original parser type +520 og_parser_type = self.h5pydata.attrs["parser_type"] 521 -522 return mass_spectra +522 if og_parser_type == "ImportMassSpectraThermoMSFileReader": +523 parser = ImportMassSpectraThermoMSFileReader(raw_file_path) +524 elif og_parser_type == "MZMLSpectraParser": +525 parser = MZMLSpectraParser(raw_file_path) +526 +527 mass_spectra.spectra_parser_class = parser.__class__ +528 mass_spectra.spectra_parser = parser +529 +530 return mass_spectra

    @@ -648,506 +656,513 @@

    -
     24class ReadCoreMSHDFMassSpectra(
    - 25    SpectraParserInterface, ReadCoreMSHDF_MassSpectrum, Thread
    - 26):
    - 27    """Class to read CoreMS HDF5 files and populate a LCMS or MassSpectraBase object.
    - 28
    - 29    Parameters
    - 30    ----------
    - 31    file_location : str
    - 32        The location of the HDF5 file to read, including the suffix.
    - 33
    - 34    Attributes
    - 35    ----------
    - 36    file_location : str
    - 37        The location of the HDF5 file to read.
    - 38    h5pydata : h5py.File
    - 39        The HDF5 file object.
    - 40    scans : list
    - 41        A list of the location of individual mass spectra within the HDF5 file.
    - 42    scan_number_list : list
    - 43        A list of the scan numbers of the mass spectra within the HDF5 file.
    - 44    parameters_location : str
    - 45        The location of the parameters file (json or toml).
    - 46
    - 47    Methods
    - 48    -------
    - 49    * import_mass_spectra(mass_spectra).
    - 50        Imports all mass spectra from the HDF5 file onto the LCMS or MassSpectraBase object.
    - 51    * get_mass_spectrum_from_scan(scan_number).
    - 52        Return mass spectrum data object from scan number.
    - 53    * load().
    - 54        Placeholder method to meet the requirements of the SpectraParserInterface.
    - 55    * run(mass_spectra).
    - 56        Runs the importer functions to populate a LCMS or MassSpectraBase object.
    - 57    * import_scan_info(mass_spectra).
    - 58        Imports the scan info from the HDF5 file to populate the _scan_info attribute
    - 59        on the LCMS or MassSpectraBase object
    - 60    * import_ms_unprocessed(mass_spectra).
    - 61        Imports the unprocessed mass spectra from the HDF5 file to populate the
    - 62        _ms_unprocessed attribute on the LCMS or MassSpectraBase object
    - 63    * import_parameters(mass_spectra).
    - 64        Imports the parameters from the HDF5 file to populate the parameters
    - 65        attribute on the LCMS or MassSpectraBase object
    - 66    * import_mass_features(mass_spectra).
    - 67        Imports the mass features from the HDF5 file to populate the mass_features
    - 68        attribute on the LCMS or MassSpectraBase object
    - 69    * import_eics(mass_spectra).
    - 70        Imports the extracted ion chromatograms from the HDF5 file to populate the
    - 71        eics attribute on the LCMS or MassSpectraBase object
    - 72    * import_spectral_search_results(mass_spectra).
    - 73        Imports the spectral search results from the HDF5 file to populate the
    - 74        spectral_search_results attribute on the LCMS or MassSpectraBase object
    - 75    * get_mass_spectra_obj().
    - 76        Return mass spectra data object, populating the _ms list on the LCMS or
    - 77        MassSpectraBase object from the HDF5 file
    - 78    * get_lcms_obj().
    - 79        Return LCMSBase object, populating the majority of the attributes on the
    - 80        LCMS object from the HDF5 file
    - 81
    - 82    """
    - 83
    - 84    def __init__(self, file_location: str):
    - 85        Thread.__init__(self)
    - 86        ReadCoreMSHDF_MassSpectrum.__init__(self, file_location)
    - 87
    - 88        # override the scans attribute on ReadCoreMSHDF_MassSpectrum class to expect a nested location within the HDF5 file
    - 89        self.scans = [
    - 90            "mass_spectra/" + x for x in list(self.h5pydata["mass_spectra"].keys())
    - 91        ]
    - 92        self.scan_number_list = sorted(
    - 93            [int(float(i)) for i in list(self.h5pydata["mass_spectra"].keys())]
    - 94        )
    - 95
    - 96        # set the location of the parameters file (json or toml)
    - 97        add_files = [
    - 98            x
    - 99            for x in self.file_location.parent.glob(
    -100                self.file_location.name.replace(".hdf5", ".*")
    -101            )
    -102            if x.suffix != ".hdf5"
    -103        ]
    -104        if len([x for x in add_files if x.suffix == ".json"]) > 0:
    -105            self.parameters_location = [x for x in add_files if x.suffix == ".json"][0]
    -106        elif len([x for x in add_files if x.suffix == ".toml"]) > 0:
    -107            self.parameters_location = [x for x in add_files if x.suffix == ".toml"][0]
    -108        else:
    -109            self.parameters_location = None
    -110
    -111    def get_mass_spectrum_from_scan(self, scan_number):
    -112        """Return mass spectrum data object from scan number."""
    -113        if scan_number in self.scan_number_list:
    -114            mass_spec = self.get_mass_spectrum(scan_number)
    -115            return mass_spec
    -116        else:
    -117            raise Exception("Scan number not found in HDF5 file.")
    -118
    -119    def load(self) -> None:
    -120        """ """
    -121        pass
    -122
    -123    def get_ms_raw(self, spectra = None, scan_df = None) -> dict:
    -124        """ """
    -125        # Warn if spectra or scan_df are not None that they are not used for CoreMS HDF5 files and should be rerun after instantiation
    -126        if spectra is not None or scan_df is not None:
    -127            SyntaxWarning("get_ms_raw method for CoreMS HDF5 files can only access saved data, consider rerunning after instantiation.")
    -128        ms_unprocessed = {}
    -129        dict_group_load = self.h5pydata["ms_unprocessed"]
    -130        dict_group_keys = dict_group_load.keys()
    -131        for k in dict_group_keys:
    -132            ms_up_int = dict_group_load[k][:]
    -133            ms_unprocessed[int(k)] = pd.DataFrame(
    -134                ms_up_int, columns=["scan", "mz", "intensity"]
    -135            )
    -136        return ms_unprocessed
    -137
    -138    def get_scan_df(self) -> pd.DataFrame:
    -139        scan_info = {}
    -140        dict_group_load = self.h5pydata["scan_info"]
    -141        dict_group_keys = dict_group_load.keys()
    -142        for k in dict_group_keys:
    -143            scan_info[k] = dict_group_load[k][:]
    -144        scan_df = pd.DataFrame(scan_info)
    -145        scan_df.set_index("scan", inplace=True, drop=False)
    -146        str_df = scan_df.select_dtypes([object])
    -147        str_df = str_df.stack().str.decode("utf-8").unstack()
    -148        for col in str_df:
    -149            scan_df[col] = str_df[col]
    -150        return scan_df
    -151    
    -152    def run(self, mass_spectra, load_raw=True) -> None:
    -153        """Runs the importer functions to populate a LCMS or MassSpectraBase object.
    +            
     25class ReadCoreMSHDFMassSpectra(
    + 26    SpectraParserInterface, ReadCoreMSHDF_MassSpectrum, Thread
    + 27):
    + 28    """Class to read CoreMS HDF5 files and populate a LCMS or MassSpectraBase object.
    + 29
    + 30    Parameters
    + 31    ----------
    + 32    file_location : str
    + 33        The location of the HDF5 file to read, including the suffix.
    + 34
    + 35    Attributes
    + 36    ----------
    + 37    file_location : str
    + 38        The location of the HDF5 file to read.
    + 39    h5pydata : h5py.File
    + 40        The HDF5 file object.
    + 41    scans : list
    + 42        A list of the location of individual mass spectra within the HDF5 file.
    + 43    scan_number_list : list
    + 44        A list of the scan numbers of the mass spectra within the HDF5 file.
    + 45    parameters_location : str
    + 46        The location of the parameters file (json or toml).
    + 47
    + 48    Methods
    + 49    -------
    + 50    * import_mass_spectra(mass_spectra).
    + 51        Imports all mass spectra from the HDF5 file onto the LCMS or MassSpectraBase object.
    + 52    * get_mass_spectrum_from_scan(scan_number).
    + 53        Return mass spectrum data object from scan number.
    + 54    * load().
    + 55        Placeholder method to meet the requirements of the SpectraParserInterface.
    + 56    * run(mass_spectra).
    + 57        Runs the importer functions to populate a LCMS or MassSpectraBase object.
    + 58    * import_scan_info(mass_spectra).
    + 59        Imports the scan info from the HDF5 file to populate the _scan_info attribute
    + 60        on the LCMS or MassSpectraBase object
    + 61    * import_ms_unprocessed(mass_spectra).
    + 62        Imports the unprocessed mass spectra from the HDF5 file to populate the
    + 63        _ms_unprocessed attribute on the LCMS or MassSpectraBase object
    + 64    * import_parameters(mass_spectra).
    + 65        Imports the parameters from the HDF5 file to populate the parameters
    + 66        attribute on the LCMS or MassSpectraBase object
    + 67    * import_mass_features(mass_spectra).
    + 68        Imports the mass features from the HDF5 file to populate the mass_features
    + 69        attribute on the LCMS or MassSpectraBase object
    + 70    * import_eics(mass_spectra).
    + 71        Imports the extracted ion chromatograms from the HDF5 file to populate the
    + 72        eics attribute on the LCMS or MassSpectraBase object
    + 73    * import_spectral_search_results(mass_spectra).
    + 74        Imports the spectral search results from the HDF5 file to populate the
    + 75        spectral_search_results attribute on the LCMS or MassSpectraBase object
    + 76    * get_mass_spectra_obj().
    + 77        Return mass spectra data object, populating the _ms list on the LCMS or
    + 78        MassSpectraBase object from the HDF5 file
    + 79    * get_lcms_obj().
    + 80        Return LCMSBase object, populating the majority of the attributes on the
    + 81        LCMS object from the HDF5 file
    + 82
    + 83    """
    + 84
    + 85    def __init__(self, file_location: str):
    + 86        Thread.__init__(self)
    + 87        ReadCoreMSHDF_MassSpectrum.__init__(self, file_location)
    + 88
    + 89        # override the scans attribute on ReadCoreMSHDF_MassSpectrum class to expect a nested location within the HDF5 file
    + 90        self.scans = [
    + 91            "mass_spectra/" + x for x in list(self.h5pydata["mass_spectra"].keys())
    + 92        ]
    + 93        self.scan_number_list = sorted(
    + 94            [int(float(i)) for i in list(self.h5pydata["mass_spectra"].keys())]
    + 95        )
    + 96
    + 97        # set the location of the parameters file (json or toml)
    + 98        add_files = [
    + 99            x
    +100            for x in self.file_location.parent.glob(
    +101                self.file_location.name.replace(".hdf5", ".*")
    +102            )
    +103            if x.suffix != ".hdf5"
    +104        ]
    +105        if len([x for x in add_files if x.suffix == ".json"]) > 0:
    +106            self.parameters_location = [x for x in add_files if x.suffix == ".json"][0]
    +107        elif len([x for x in add_files if x.suffix == ".toml"]) > 0:
    +108            self.parameters_location = [x for x in add_files if x.suffix == ".toml"][0]
    +109        else:
    +110            self.parameters_location = None
    +111
    +112    def get_mass_spectrum_from_scan(self, scan_number):
    +113        """Return mass spectrum data object from scan number."""
    +114        if scan_number in self.scan_number_list:
    +115            mass_spec = self.get_mass_spectrum(scan_number)
    +116            return mass_spec
    +117        else:
    +118            raise Exception("Scan number not found in HDF5 file.")
    +119
    +120    def load(self) -> None:
    +121        """ """
    +122        pass
    +123
    +124    def get_ms_raw(self, spectra=None, scan_df=None) -> dict:
    +125        """ """
    +126        # Warn if spectra or scan_df are not None that they are not used for CoreMS HDF5 files and should be rerun after instantiation
    +127        if spectra is not None or scan_df is not None:
    +128            SyntaxWarning(
    +129                "get_ms_raw method for CoreMS HDF5 files can only access saved data, consider rerunning after instantiation."
    +130            )
    +131        ms_unprocessed = {}
    +132        dict_group_load = self.h5pydata["ms_unprocessed"]
    +133        dict_group_keys = dict_group_load.keys()
    +134        for k in dict_group_keys:
    +135            ms_up_int = dict_group_load[k][:]
    +136            ms_unprocessed[int(k)] = pd.DataFrame(
    +137                ms_up_int, columns=["scan", "mz", "intensity"]
    +138            )
    +139        return ms_unprocessed
    +140
    +141    def get_scan_df(self) -> pd.DataFrame:
    +142        scan_info = {}
    +143        dict_group_load = self.h5pydata["scan_info"]
    +144        dict_group_keys = dict_group_load.keys()
    +145        for k in dict_group_keys:
    +146            scan_info[k] = dict_group_load[k][:]
    +147        scan_df = pd.DataFrame(scan_info)
    +148        scan_df.set_index("scan", inplace=True, drop=False)
    +149        str_df = scan_df.select_dtypes([object])
    +150        str_df = str_df.stack().str.decode("utf-8").unstack()
    +151        for col in str_df:
    +152            scan_df[col] = str_df[col]
    +153        return scan_df
     154
    -155        Notes
    -156        -----
    -157        The following functions are run in order, if the HDF5 file contains the necessary data:
    -158        1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object.
    -159        2. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object.
    -160        3. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object.
    -161        4. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object.
    -162        5. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object.
    -163        6. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object.
    -164        7. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object.
    -165
    -166        Parameters
    -167        ----------
    -168        mass_spectra : LCMSBase or MassSpectraBase
    -169            The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes.
    -170        load_raw : bool
    -171            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    -172        Returns
    -173        -------
    -174        None, but populates several attributes on the LCMS or MassSpectraBase object.
    -175
    -176        """
    -177        if self.parameters_location is not None:
    -178            # Populate the parameters attribute on the LCMS object
    -179            self.import_parameters(mass_spectra)
    -180
    -181        if "mass_spectra" in self.h5pydata:
    -182            # Populate the _ms list on the LCMS object
    -183            self.import_mass_spectra(mass_spectra, load_raw=load_raw)
    -184
    -185        if "scan_info" in self.h5pydata:
    -186            # Populate the _scan_info attribute on the LCMS object
    -187            self.import_scan_info(mass_spectra)
    -188
    -189        if "ms_unprocessed" in self.h5pydata and load_raw:
    -190            # Populate the _ms_unprocessed attribute on the LCMS object
    -191            self.import_ms_unprocessed(mass_spectra)
    -192
    -193        if "mass_features" in self.h5pydata:
    -194            # Populate the mass_features attribute on the LCMS object
    -195            self.import_mass_features(mass_spectra)
    -196
    -197        if "eics" in self.h5pydata:
    -198            # Populate the eics attribute on the LCMS object
    -199            self.import_eics(mass_spectra)
    -200
    -201        if "spectral_search_results" in self.h5pydata:
    -202            # Populate the spectral_search_results attribute on the LCMS object
    -203            self.import_spectral_search_results(mass_spectra)
    -204
    -205    def import_mass_spectra(self, mass_spectra, load_raw=True) -> None:
    -206        """Imports all mass spectra from the HDF5 file.
    +155    def run(self, mass_spectra, load_raw=True) -> None:
    +156        """Runs the importer functions to populate a LCMS or MassSpectraBase object.
    +157
    +158        Notes
    +159        -----
    +160        The following functions are run in order, if the HDF5 file contains the necessary data:
    +161        1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object.
    +162        2. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object.
    +163        3. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object.
    +164        4. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object.
    +165        5. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object.
    +166        6. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object.
    +167        7. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object.
    +168
    +169        Parameters
    +170        ----------
    +171        mass_spectra : LCMSBase or MassSpectraBase
    +172            The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes.
    +173        load_raw : bool
    +174            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    +175        Returns
    +176        -------
    +177        None, but populates several attributes on the LCMS or MassSpectraBase object.
    +178
    +179        """
    +180        if self.parameters_location is not None:
    +181            # Populate the parameters attribute on the LCMS object
    +182            self.import_parameters(mass_spectra)
    +183
    +184        if "mass_spectra" in self.h5pydata:
    +185            # Populate the _ms list on the LCMS object
    +186            self.import_mass_spectra(mass_spectra, load_raw=load_raw)
    +187
    +188        if "scan_info" in self.h5pydata:
    +189            # Populate the _scan_info attribute on the LCMS object
    +190            self.import_scan_info(mass_spectra)
    +191
    +192        if "ms_unprocessed" in self.h5pydata and load_raw:
    +193            # Populate the _ms_unprocessed attribute on the LCMS object
    +194            self.import_ms_unprocessed(mass_spectra)
    +195
    +196        if "mass_features" in self.h5pydata:
    +197            # Populate the mass_features attribute on the LCMS object
    +198            self.import_mass_features(mass_spectra)
    +199
    +200        if "eics" in self.h5pydata:
    +201            # Populate the eics attribute on the LCMS object
    +202            self.import_eics(mass_spectra)
    +203
    +204        if "spectral_search_results" in self.h5pydata:
    +205            # Populate the spectral_search_results attribute on the LCMS object
    +206            self.import_spectral_search_results(mass_spectra)
     207
    -208        Parameters
    -209        ----------
    -210        mass_spectra : LCMSBase | MassSpectraBase
    -211            The MassSpectraBase or LCMSBase object to populate with mass spectra.
    -212        load_raw : bool
    -213            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default
    -214
    -215        Returns
    -216        -------
    -217        None, but populates the '_ms' list on the LCMSBase or MassSpectraBase
    -218        object with mass spectra from the HDF5 file.
    -219        """
    -220        for scan_number in self.scan_number_list:
    -221            mass_spec = self.get_mass_spectrum(scan_number, load_raw=load_raw)
    -222            mass_spec.scan_number = scan_number
    -223            mass_spectra.add_mass_spectrum(mass_spec)
    -224
    -225    def import_scan_info(self, mass_spectra) -> None:
    -226        """Imports the scan info from the HDF5 file.
    +208    def import_mass_spectra(self, mass_spectra, load_raw=True) -> None:
    +209        """Imports all mass spectra from the HDF5 file.
    +210
    +211        Parameters
    +212        ----------
    +213        mass_spectra : LCMSBase | MassSpectraBase
    +214            The MassSpectraBase or LCMSBase object to populate with mass spectra.
    +215        load_raw : bool
    +216            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default
    +217
    +218        Returns
    +219        -------
    +220        None, but populates the '_ms' list on the LCMSBase or MassSpectraBase
    +221        object with mass spectra from the HDF5 file.
    +222        """
    +223        for scan_number in self.scan_number_list:
    +224            mass_spec = self.get_mass_spectrum(scan_number, load_raw=load_raw)
    +225            mass_spec.scan_number = scan_number
    +226            mass_spectra.add_mass_spectrum(mass_spec)
     227
    -228        Parameters
    -229        ----------
    -230        lcms : LCMSBase | MassSpectraBase
    -231            The MassSpectraBase or LCMSBase objects
    -232
    -233        Returns
    -234        -------
    -235        None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase
    -236        object with a pandas DataFrame of the 'scan_info' from the HDF5 file.
    -237
    -238        """
    -239        scan_df = self.get_scan_df()
    -240        mass_spectra.scan_df = scan_df
    -241
    -242    def import_ms_unprocessed(self, mass_spectra) -> None:
    -243        """Imports the unprocessed mass spectra from the HDF5 file.
    +228    def import_scan_info(self, mass_spectra) -> None:
    +229        """Imports the scan info from the HDF5 file.
    +230
    +231        Parameters
    +232        ----------
    +233        lcms : LCMSBase | MassSpectraBase
    +234            The MassSpectraBase or LCMSBase objects
    +235
    +236        Returns
    +237        -------
    +238        None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase
    +239        object with a pandas DataFrame of the 'scan_info' from the HDF5 file.
    +240
    +241        """
    +242        scan_df = self.get_scan_df()
    +243        mass_spectra.scan_df = scan_df
     244
    -245        Parameters
    -246        ----------
    -247        lcms : LCMSBase | MassSpectraBase
    -248            The MassSpectraBase or LCMSBase objects
    -249
    -250        Returns
    -251        -------
    -252        None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase
    -253        object with a dictionary of the 'ms_unprocessed' from the HDF5 file.
    -254
    -255        """
    -256        ms_unprocessed = self.get_ms_raw()
    -257        mass_spectra._ms_unprocessed = ms_unprocessed
    -258
    -259    def import_parameters(self, mass_spectra) -> None:
    -260        """Imports the parameters from the HDF5 file.
    +245    def import_ms_unprocessed(self, mass_spectra) -> None:
    +246        """Imports the unprocessed mass spectra from the HDF5 file.
    +247
    +248        Parameters
    +249        ----------
    +250        lcms : LCMSBase | MassSpectraBase
    +251            The MassSpectraBase or LCMSBase objects
    +252
    +253        Returns
    +254        -------
    +255        None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase
    +256        object with a dictionary of the 'ms_unprocessed' from the HDF5 file.
    +257
    +258        """
    +259        ms_unprocessed = self.get_ms_raw()
    +260        mass_spectra._ms_unprocessed = ms_unprocessed
     261
    -262        Parameters
    -263        ----------
    -264        mass_spectra : LCMSBase | MassSpectraBase
    -265            The MassSpectraBase or LCMSBase object to populate with parameters.
    -266
    -267        Returns
    -268        -------
    -269        None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase
    -270        object with a dictionary of the 'parameters' from the HDF5 file.
    -271
    -272        """
    -273        if ".json" == self.parameters_location.suffix:
    -274            load_and_set_json_parameters_lcms(mass_spectra, self.parameters_location)
    -275        if ".toml" == self.parameters_location.suffix:
    -276            load_and_set_toml_parameters_lcms(mass_spectra, self.parameters_location)
    -277        else:
    -278            raise Exception(
    -279                "Parameters file must be in JSON format, TOML format is not yet supported."
    -280            )
    -281
    -282    def import_mass_features(self, mass_spectra) -> None:
    -283        """Imports the mass features from the HDF5 file.
    +262    def import_parameters(self, mass_spectra) -> None:
    +263        """Imports the parameters from the HDF5 file.
    +264
    +265        Parameters
    +266        ----------
    +267        mass_spectra : LCMSBase | MassSpectraBase
    +268            The MassSpectraBase or LCMSBase object to populate with parameters.
    +269
    +270        Returns
    +271        -------
    +272        None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase
    +273        object with a dictionary of the 'parameters' from the HDF5 file.
    +274
    +275        """
    +276        if ".json" == self.parameters_location.suffix:
    +277            load_and_set_json_parameters_lcms(mass_spectra, self.parameters_location)
    +278        if ".toml" == self.parameters_location.suffix:
    +279            load_and_set_toml_parameters_lcms(mass_spectra, self.parameters_location)
    +280        else:
    +281            raise Exception(
    +282                "Parameters file must be in JSON format, TOML format is not yet supported."
    +283            )
     284
    -285        Parameters
    -286        ----------
    -287        mass_spectra : LCMSBase | MassSpectraBase
    -288            The MassSpectraBase or LCMSBase object to populate with mass features.
    -289
    -290        Returns
    -291        -------
    -292        None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase
    -293        object with a dictionary of the 'mass_features' from the HDF5 file.
    -294
    -295        """
    -296        dict_group_load = self.h5pydata["mass_features"]
    -297        dict_group_keys = dict_group_load.keys()
    -298        for k in dict_group_keys:
    -299            # Instantiate the MassFeature object
    -300            mass_feature = LCMSMassFeature(
    -301                mass_spectra,
    -302                mz=dict_group_load[k].attrs["_mz_exp"],
    -303                retention_time=dict_group_load[k].attrs["_retention_time"],
    -304                intensity=dict_group_load[k].attrs["_intensity"],
    -305                apex_scan=dict_group_load[k].attrs["_apex_scan"],
    -306                persistence=dict_group_load[k].attrs["_persistence"],
    -307                id=int(k),
    -308            )
    -309
    -310            # Populate additional attributes on the MassFeature object
    -311            for key in dict_group_load[k].attrs.keys() - {
    -312                "_mz_exp",
    -313                "_mz_cal",
    -314                "_retention_time",
    -315                "_intensity",
    -316                "_apex_scan",
    -317                "_persistence",
    -318            }:
    -319                setattr(mass_feature, key, dict_group_load[k].attrs[key])
    -320
    -321            # Populate attributes on MassFeature object that are lists
    -322            for key in dict_group_load[k].keys():
    -323                setattr(mass_feature, key, dict_group_load[k][key][:])
    -324
    -325            mass_spectra.mass_features[int(k)] = mass_feature
    -326
    -327        # Associate mass features with ms1 and ms2 spectra, if available
    -328        for mf_id in mass_spectra.mass_features.keys():
    -329            if mass_spectra.mass_features[mf_id].apex_scan in mass_spectra._ms.keys():
    -330                mass_spectra.mass_features[mf_id].mass_spectrum = mass_spectra._ms[
    -331                    mass_spectra.mass_features[mf_id].apex_scan
    -332                ]
    -333            if mass_spectra.mass_features[mf_id].ms2_scan_numbers is not None:
    -334                for ms2_scan in mass_spectra.mass_features[mf_id].ms2_scan_numbers:
    -335                    if ms2_scan in mass_spectra._ms.keys():
    -336                        mass_spectra.mass_features[mf_id].ms2_mass_spectra[ms2_scan] = (
    -337                            mass_spectra._ms[ms2_scan]
    -338                        )
    -339
    -340    def import_eics(self, mass_spectra):
    -341        """Imports the extracted ion chromatograms from the HDF5 file.
    +285    def import_mass_features(self, mass_spectra) -> None:
    +286        """Imports the mass features from the HDF5 file.
    +287
    +288        Parameters
    +289        ----------
    +290        mass_spectra : LCMSBase | MassSpectraBase
    +291            The MassSpectraBase or LCMSBase object to populate with mass features.
    +292
    +293        Returns
    +294        -------
    +295        None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase
    +296        object with a dictionary of the 'mass_features' from the HDF5 file.
    +297
    +298        """
    +299        dict_group_load = self.h5pydata["mass_features"]
    +300        dict_group_keys = dict_group_load.keys()
    +301        for k in dict_group_keys:
    +302            # Instantiate the MassFeature object
    +303            mass_feature = LCMSMassFeature(
    +304                mass_spectra,
    +305                mz=dict_group_load[k].attrs["_mz_exp"],
    +306                retention_time=dict_group_load[k].attrs["_retention_time"],
    +307                intensity=dict_group_load[k].attrs["_intensity"],
    +308                apex_scan=dict_group_load[k].attrs["_apex_scan"],
    +309                persistence=dict_group_load[k].attrs["_persistence"],
    +310                id=int(k),
    +311            )
    +312
    +313            # Populate additional attributes on the MassFeature object
    +314            for key in dict_group_load[k].attrs.keys() - {
    +315                "_mz_exp",
    +316                "_mz_cal",
    +317                "_retention_time",
    +318                "_intensity",
    +319                "_apex_scan",
    +320                "_persistence",
    +321            }:
    +322                setattr(mass_feature, key, dict_group_load[k].attrs[key])
    +323
    +324            # Populate attributes on MassFeature object that are lists
    +325            for key in dict_group_load[k].keys():
    +326                setattr(mass_feature, key, dict_group_load[k][key][:])
    +327
    +328            mass_spectra.mass_features[int(k)] = mass_feature
    +329
    +330        # Associate mass features with ms1 and ms2 spectra, if available
    +331        for mf_id in mass_spectra.mass_features.keys():
    +332            if mass_spectra.mass_features[mf_id].apex_scan in mass_spectra._ms.keys():
    +333                mass_spectra.mass_features[mf_id].mass_spectrum = mass_spectra._ms[
    +334                    mass_spectra.mass_features[mf_id].apex_scan
    +335                ]
    +336            if mass_spectra.mass_features[mf_id].ms2_scan_numbers is not None:
    +337                for ms2_scan in mass_spectra.mass_features[mf_id].ms2_scan_numbers:
    +338                    if ms2_scan in mass_spectra._ms.keys():
    +339                        mass_spectra.mass_features[mf_id].ms2_mass_spectra[ms2_scan] = (
    +340                            mass_spectra._ms[ms2_scan]
    +341                        )
     342
    -343        Parameters
    -344        ----------
    -345        mass_spectra : LCMSBase | MassSpectraBase
    -346            The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms.
    -347
    -348        Returns
    -349        -------
    -350        None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase
    -351        object with a dictionary of the 'eics' from the HDF5 file.
    -352
    -353        """
    -354        dict_group_load = self.h5pydata["eics"]
    -355        dict_group_keys = dict_group_load.keys()
    -356        for k in dict_group_keys:
    -357            my_eic = EIC_Data(
    -358                scans=dict_group_load[k]["scans"][:],
    -359                time=dict_group_load[k]["time"][:],
    -360                eic=dict_group_load[k]["eic"][:],
    -361            )
    -362            for key in dict_group_load[k].keys():
    -363                if key not in ["scans", "time", "eic"]:
    -364                    setattr(my_eic, key, dict_group_load[k][key][:])
    -365                    # if key is apexes, convert to a tuple of a list
    -366                    if key == "apexes" and len(my_eic.apexes) > 0:
    -367                        my_eic.apexes = [tuple(x) for x in my_eic.apexes]
    -368            # Add to mass_spectra object
    -369            mass_spectra.eics[dict_group_load[k].attrs["mz"]] = my_eic
    -370
    -371        # Add to mass features
    -372        for idx in mass_spectra.mass_features.keys():
    -373            mz = mass_spectra.mass_features[idx].mz
    -374            if mz in mass_spectra.eics.keys():
    -375                mass_spectra.mass_features[idx]._eic_data = mass_spectra.eics[mz]
    -376
    -377    def import_spectral_search_results(self, mass_spectra):
    -378        """Imports the spectral search results from the HDF5 file.
    +343    def import_eics(self, mass_spectra):
    +344        """Imports the extracted ion chromatograms from the HDF5 file.
    +345
    +346        Parameters
    +347        ----------
    +348        mass_spectra : LCMSBase | MassSpectraBase
    +349            The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms.
    +350
    +351        Returns
    +352        -------
    +353        None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase
    +354        object with a dictionary of the 'eics' from the HDF5 file.
    +355
    +356        """
    +357        dict_group_load = self.h5pydata["eics"]
    +358        dict_group_keys = dict_group_load.keys()
    +359        for k in dict_group_keys:
    +360            my_eic = EIC_Data(
    +361                scans=dict_group_load[k]["scans"][:],
    +362                time=dict_group_load[k]["time"][:],
    +363                eic=dict_group_load[k]["eic"][:],
    +364            )
    +365            for key in dict_group_load[k].keys():
    +366                if key not in ["scans", "time", "eic"]:
    +367                    setattr(my_eic, key, dict_group_load[k][key][:])
    +368                    # if key is apexes, convert to a tuple of a list
    +369                    if key == "apexes" and len(my_eic.apexes) > 0:
    +370                        my_eic.apexes = [tuple(x) for x in my_eic.apexes]
    +371            # Add to mass_spectra object
    +372            mass_spectra.eics[dict_group_load[k].attrs["mz"]] = my_eic
    +373
    +374        # Add to mass features
    +375        for idx in mass_spectra.mass_features.keys():
    +376            mz = mass_spectra.mass_features[idx].mz
    +377            if mz in mass_spectra.eics.keys():
    +378                mass_spectra.mass_features[idx]._eic_data = mass_spectra.eics[mz]
     379
    -380        Parameters
    -381        ----------
    -382        mass_spectra : LCMSBase | MassSpectraBase
    -383            The MassSpectraBase or LCMSBase object to populate with spectral search results.
    -384
    -385        Returns
    -386        -------
    -387        None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase
    -388        object with a dictionary of the 'spectral_search_results' from the HDF5 file.
    -389
    -390        """
    -391        overall_results_dict = {}
    -392        ms2_results_load = self.h5pydata["spectral_search_results"]
    -393        for k in ms2_results_load.keys():
    -394            overall_results_dict[int(k)] = {}
    -395            for k2 in ms2_results_load[k].keys():
    -396                ms2_search_res = SpectrumSearchResults(
    -397                    query_spectrum=mass_spectra._ms[int(k)],
    -398                    precursor_mz=ms2_results_load[k][k2].attrs["precursor_mz"],
    -399                    spectral_similarity_search_results={},
    -400                )
    -401
    -402                for key in ms2_results_load[k][k2].keys() - {"precursor_mz"}:
    -403                    setattr(ms2_search_res, key, list(ms2_results_load[k][k2][key][:]))
    -404                overall_results_dict[int(k)][
    -405                    ms2_results_load[k][k2].attrs["precursor_mz"]
    -406                ] = ms2_search_res
    -407
    -408        # add to mass_spectra
    -409        mass_spectra.spectral_search_results.update(overall_results_dict)
    +380    def import_spectral_search_results(self, mass_spectra):
    +381        """Imports the spectral search results from the HDF5 file.
    +382
    +383        Parameters
    +384        ----------
    +385        mass_spectra : LCMSBase | MassSpectraBase
    +386            The MassSpectraBase or LCMSBase object to populate with spectral search results.
    +387
    +388        Returns
    +389        -------
    +390        None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase
    +391        object with a dictionary of the 'spectral_search_results' from the HDF5 file.
    +392
    +393        """
    +394        overall_results_dict = {}
    +395        ms2_results_load = self.h5pydata["spectral_search_results"]
    +396        for k in ms2_results_load.keys():
    +397            overall_results_dict[int(k)] = {}
    +398            for k2 in ms2_results_load[k].keys():
    +399                ms2_search_res = SpectrumSearchResults(
    +400                    query_spectrum=mass_spectra._ms[int(k)],
    +401                    precursor_mz=ms2_results_load[k][k2].attrs["precursor_mz"],
    +402                    spectral_similarity_search_results={},
    +403                )
    +404
    +405                for key in ms2_results_load[k][k2].keys() - {"precursor_mz"}:
    +406                    setattr(ms2_search_res, key, list(ms2_results_load[k][k2][key][:]))
    +407                overall_results_dict[int(k)][
    +408                    ms2_results_load[k][k2].attrs["precursor_mz"]
    +409                ] = ms2_search_res
     410
    -411        # If there are mass features, associate the results with each mass feature
    -412        if len(mass_spectra.mass_features) > 0:
    -413            for mass_feature_id, mass_feature in mass_spectra.mass_features.items():
    -414                scan_ids = mass_feature.ms2_scan_numbers
    -415                for ms2_scan_id in scan_ids:
    -416                    precursor_mz = mass_feature.mz
    -417                    try:
    -418                        mass_spectra.spectral_search_results[ms2_scan_id][precursor_mz]
    -419                    except KeyError:
    -420                        pass
    -421                    else:
    -422                        mass_spectra.mass_features[
    -423                            mass_feature_id
    -424                        ].ms2_similarity_results.append(
    -425                            mass_spectra.spectral_search_results[ms2_scan_id][
    -426                                precursor_mz
    -427                            ]
    -428                        )
    -429
    -430    def get_mass_spectra_obj(self, load_raw=True) -> MassSpectraBase:
    -431        """
    -432        Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file.
    -433        
    -434        Parameters
    -435        ----------
    -436        load_raw : bool
    -437            If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True.
    -438        
    -439        """
    -440        # Instantiate the LCMS object
    -441        spectra_obj = MassSpectraBase(
    -442            file_location=self.file_location,
    -443            analyzer=self.analyzer,
    -444            instrument_label=self.instrument_label,
    -445            sample_name=self.sample_name,
    -446        )
    -447
    -448        # This will populate the _ms list on the LCMS or MassSpectraBase object
    -449        self.run(spectra_obj, load_raw=load_raw)
    +411        # add to mass_spectra
    +412        mass_spectra.spectral_search_results.update(overall_results_dict)
    +413
    +414        # If there are mass features, associate the results with each mass feature
    +415        if len(mass_spectra.mass_features) > 0:
    +416            for mass_feature_id, mass_feature in mass_spectra.mass_features.items():
    +417                scan_ids = mass_feature.ms2_scan_numbers
    +418                for ms2_scan_id in scan_ids:
    +419                    precursor_mz = mass_feature.mz
    +420                    try:
    +421                        mass_spectra.spectral_search_results[ms2_scan_id][precursor_mz]
    +422                    except KeyError:
    +423                        pass
    +424                    else:
    +425                        mass_spectra.mass_features[
    +426                            mass_feature_id
    +427                        ].ms2_similarity_results.append(
    +428                            mass_spectra.spectral_search_results[ms2_scan_id][
    +429                                precursor_mz
    +430                            ]
    +431                        )
    +432
    +433    def get_mass_spectra_obj(self, load_raw=True) -> MassSpectraBase:
    +434        """
    +435        Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file.
    +436
    +437        Parameters
    +438        ----------
    +439        load_raw : bool
    +440            If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True.
    +441
    +442        """
    +443        # Instantiate the LCMS object
    +444        spectra_obj = MassSpectraBase(
    +445            file_location=self.file_location,
    +446            analyzer=self.analyzer,
    +447            instrument_label=self.instrument_label,
    +448            sample_name=self.sample_name,
    +449        )
     450
    -451        return spectra_obj
    -452
    -453    def get_lcms_obj(self, load_raw=True, use_original_parser=True, raw_file_path=None) -> LCMSBase:
    -454        """
    -455        Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file.
    -456
    -457        Parameters
    -458        ----------
    -459        load_raw : bool
    -460            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    -461        use_original_parser : bool
    -462            If True, use the original parser to populate the LCMS object. Default is True.        
    -463        raw_file_path : str
    -464            The location of the raw file to parse if attempting to use original parser.
    -465            Default is None, which attempts to get the raw file path from the HDF5 file.
    -466            If the original file path has moved, this parameter can be used to specify the new location.
    -467        """
    -468        # Instantiate the LCMS object
    -469        lcms_obj = LCMSBase(
    -470            file_location=self.file_location,
    -471            analyzer=self.analyzer,
    -472            instrument_label=self.instrument_label,
    -473            sample_name=self.sample_name,
    -474        )
    -475
    -476        # This will populate the majority of the attributes on the LCMS object
    -477        self.run(lcms_obj, load_raw=load_raw)
    -478
    -479        # Set final attributes of the LCMS object
    -480        lcms_obj.polarity = self.h5pydata.attrs["polarity"]
    -481        lcms_obj._scans_number_list = list(lcms_obj.scan_df.scan)
    -482        lcms_obj._retention_time_list = list(lcms_obj.scan_df.scan_time)
    -483        lcms_obj._tic_list = list(lcms_obj.scan_df.tic)
    -484
    -485        # If use_original_parser is True, instantiate the original parser and populate the LCMS object
    -486        if use_original_parser:
    -487            lcms_obj = self.add_original_parser(lcms_obj, raw_file_path=raw_file_path)
    -488
    -489        return lcms_obj
    -490    
    -491    def add_original_parser(self, mass_spectra, raw_file_path=None):
    -492        """
    -493        Add the original parser to the mass spectra object.
    -494
    -495        Parameters
    -496        ----------
    -497        mass_spectra : MassSpectraBase | LCMSBase
    -498            The MassSpectraBase or LCMSBase object to add the original parser to.
    -499        raw_file_path : str
    -500            The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file.
    -501        """
    -502        # Try to get the raw file path from the HDF5 file
    -503        if raw_file_path is None:
    -504            raw_file_path = self.h5pydata.attrs["original_file_location"]
    -505            #Check if og_file_location exists, if not raise an error
    -506            raw_file_path = self.h5pydata.attrs["original_file_location"]
    -507
    -508        raw_file_path = Path(raw_file_path)
    -509        if not raw_file_path.exists():
    -510            raise FileExistsError("File does not exist: " + str(raw_file_path), ". Cannot use original parser for instatiating the lcms_obj.")
    -511
    -512        # Get the original parser type
    -513        og_parser_type = self.h5pydata.attrs["parser_type"]
    -514
    -515        if og_parser_type == "ImportMassSpectraThermoMSFileReader":
    -516            parser = ImportMassSpectraThermoMSFileReader(raw_file_path)
    -517        elif og_parser_type == "MZMLSpectraParser":
    -518            parser = MZMLSpectraParser(raw_file_path)
    +451        # This will populate the _ms list on the LCMS or MassSpectraBase object
    +452        self.run(spectra_obj, load_raw=load_raw)
    +453
    +454        return spectra_obj
    +455
    +456    def get_lcms_obj(
    +457        self, load_raw=True, use_original_parser=True, raw_file_path=None
    +458    ) -> LCMSBase:
    +459        """
    +460        Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file.
    +461
    +462        Parameters
    +463        ----------
    +464        load_raw : bool
    +465            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    +466        use_original_parser : bool
    +467            If True, use the original parser to populate the LCMS object. Default is True.
    +468        raw_file_path : str
    +469            The location of the raw file to parse if attempting to use original parser.
    +470            Default is None, which attempts to get the raw file path from the HDF5 file.
    +471            If the original file path has moved, this parameter can be used to specify the new location.
    +472        """
    +473        # Instantiate the LCMS object
    +474        lcms_obj = LCMSBase(
    +475            file_location=self.file_location,
    +476            analyzer=self.analyzer,
    +477            instrument_label=self.instrument_label,
    +478            sample_name=self.sample_name,
    +479        )
    +480
    +481        # This will populate the majority of the attributes on the LCMS object
    +482        self.run(lcms_obj, load_raw=load_raw)
    +483
    +484        # Set final attributes of the LCMS object
    +485        lcms_obj.polarity = self.h5pydata.attrs["polarity"]
    +486        lcms_obj._scans_number_list = list(lcms_obj.scan_df.scan)
    +487        lcms_obj._retention_time_list = list(lcms_obj.scan_df.scan_time)
    +488        lcms_obj._tic_list = list(lcms_obj.scan_df.tic)
    +489
    +490        # If use_original_parser is True, instantiate the original parser and populate the LCMS object
    +491        if use_original_parser:
    +492            lcms_obj = self.add_original_parser(lcms_obj, raw_file_path=raw_file_path)
    +493
    +494        return lcms_obj
    +495
    +496    def add_original_parser(self, mass_spectra, raw_file_path=None):
    +497        """
    +498        Add the original parser to the mass spectra object.
    +499
    +500        Parameters
    +501        ----------
    +502        mass_spectra : MassSpectraBase | LCMSBase
    +503            The MassSpectraBase or LCMSBase object to add the original parser to.
    +504        raw_file_path : str
    +505            The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file.
    +506        """
    +507        # Try to get the raw file path from the HDF5 file
    +508        if raw_file_path is None:
    +509            raw_file_path = self.h5pydata.attrs["original_file_location"]
    +510            # Check if og_file_location exists, if not raise an error
    +511            raw_file_path = self.h5pydata.attrs["original_file_location"]
    +512
    +513        raw_file_path = Path(raw_file_path)
    +514        if not raw_file_path.exists():
    +515            raise FileExistsError(
    +516                "File does not exist: " + str(raw_file_path),
    +517                ". Cannot use original parser for instatiating the lcms_obj.",
    +518            )
     519
    -520        mass_spectra.spectra_parser_class = parser.__class__
    -521        mass_spectra.spectra_parser = parser
    +520        # Get the original parser type
    +521        og_parser_type = self.h5pydata.attrs["parser_type"]
     522
    -523        return mass_spectra
    +523        if og_parser_type == "ImportMassSpectraThermoMSFileReader":
    +524            parser = ImportMassSpectraThermoMSFileReader(raw_file_path)
    +525        elif og_parser_type == "MZMLSpectraParser":
    +526            parser = MZMLSpectraParser(raw_file_path)
    +527
    +528        mass_spectra.spectra_parser_class = parser.__class__
    +529        mass_spectra.spectra_parser = parser
    +530
    +531        return mass_spectra
     
    @@ -1224,32 +1239,32 @@
    Methods
    -
     84    def __init__(self, file_location: str):
    - 85        Thread.__init__(self)
    - 86        ReadCoreMSHDF_MassSpectrum.__init__(self, file_location)
    - 87
    - 88        # override the scans attribute on ReadCoreMSHDF_MassSpectrum class to expect a nested location within the HDF5 file
    - 89        self.scans = [
    - 90            "mass_spectra/" + x for x in list(self.h5pydata["mass_spectra"].keys())
    - 91        ]
    - 92        self.scan_number_list = sorted(
    - 93            [int(float(i)) for i in list(self.h5pydata["mass_spectra"].keys())]
    - 94        )
    - 95
    - 96        # set the location of the parameters file (json or toml)
    - 97        add_files = [
    - 98            x
    - 99            for x in self.file_location.parent.glob(
    -100                self.file_location.name.replace(".hdf5", ".*")
    -101            )
    -102            if x.suffix != ".hdf5"
    -103        ]
    -104        if len([x for x in add_files if x.suffix == ".json"]) > 0:
    -105            self.parameters_location = [x for x in add_files if x.suffix == ".json"][0]
    -106        elif len([x for x in add_files if x.suffix == ".toml"]) > 0:
    -107            self.parameters_location = [x for x in add_files if x.suffix == ".toml"][0]
    -108        else:
    -109            self.parameters_location = None
    +            
     85    def __init__(self, file_location: str):
    + 86        Thread.__init__(self)
    + 87        ReadCoreMSHDF_MassSpectrum.__init__(self, file_location)
    + 88
    + 89        # override the scans attribute on ReadCoreMSHDF_MassSpectrum class to expect a nested location within the HDF5 file
    + 90        self.scans = [
    + 91            "mass_spectra/" + x for x in list(self.h5pydata["mass_spectra"].keys())
    + 92        ]
    + 93        self.scan_number_list = sorted(
    + 94            [int(float(i)) for i in list(self.h5pydata["mass_spectra"].keys())]
    + 95        )
    + 96
    + 97        # set the location of the parameters file (json or toml)
    + 98        add_files = [
    + 99            x
    +100            for x in self.file_location.parent.glob(
    +101                self.file_location.name.replace(".hdf5", ".*")
    +102            )
    +103            if x.suffix != ".hdf5"
    +104        ]
    +105        if len([x for x in add_files if x.suffix == ".json"]) > 0:
    +106            self.parameters_location = [x for x in add_files if x.suffix == ".json"][0]
    +107        elif len([x for x in add_files if x.suffix == ".toml"]) > 0:
    +108            self.parameters_location = [x for x in add_files if x.suffix == ".toml"][0]
    +109        else:
    +110            self.parameters_location = None
     
    @@ -1309,13 +1324,13 @@
    Methods
    -
    111    def get_mass_spectrum_from_scan(self, scan_number):
    -112        """Return mass spectrum data object from scan number."""
    -113        if scan_number in self.scan_number_list:
    -114            mass_spec = self.get_mass_spectrum(scan_number)
    -115            return mass_spec
    -116        else:
    -117            raise Exception("Scan number not found in HDF5 file.")
    +            
    112    def get_mass_spectrum_from_scan(self, scan_number):
    +113        """Return mass spectrum data object from scan number."""
    +114        if scan_number in self.scan_number_list:
    +115            mass_spec = self.get_mass_spectrum(scan_number)
    +116            return mass_spec
    +117        else:
    +118            raise Exception("Scan number not found in HDF5 file.")
     
    @@ -1335,9 +1350,9 @@
    Methods
    -
    119    def load(self) -> None:
    -120        """ """
    -121        pass
    +            
    120    def load(self) -> None:
    +121        """ """
    +122        pass
     
    @@ -1355,20 +1370,22 @@
    Methods
    -
    123    def get_ms_raw(self, spectra = None, scan_df = None) -> dict:
    -124        """ """
    -125        # Warn if spectra or scan_df are not None that they are not used for CoreMS HDF5 files and should be rerun after instantiation
    -126        if spectra is not None or scan_df is not None:
    -127            SyntaxWarning("get_ms_raw method for CoreMS HDF5 files can only access saved data, consider rerunning after instantiation.")
    -128        ms_unprocessed = {}
    -129        dict_group_load = self.h5pydata["ms_unprocessed"]
    -130        dict_group_keys = dict_group_load.keys()
    -131        for k in dict_group_keys:
    -132            ms_up_int = dict_group_load[k][:]
    -133            ms_unprocessed[int(k)] = pd.DataFrame(
    -134                ms_up_int, columns=["scan", "mz", "intensity"]
    -135            )
    -136        return ms_unprocessed
    +            
    124    def get_ms_raw(self, spectra=None, scan_df=None) -> dict:
    +125        """ """
    +126        # Warn if spectra or scan_df are not None that they are not used for CoreMS HDF5 files and should be rerun after instantiation
    +127        if spectra is not None or scan_df is not None:
    +128            SyntaxWarning(
    +129                "get_ms_raw method for CoreMS HDF5 files can only access saved data, consider rerunning after instantiation."
    +130            )
    +131        ms_unprocessed = {}
    +132        dict_group_load = self.h5pydata["ms_unprocessed"]
    +133        dict_group_keys = dict_group_load.keys()
    +134        for k in dict_group_keys:
    +135            ms_up_int = dict_group_load[k][:]
    +136            ms_unprocessed[int(k)] = pd.DataFrame(
    +137                ms_up_int, columns=["scan", "mz", "intensity"]
    +138            )
    +139        return ms_unprocessed
     
    @@ -1386,19 +1403,19 @@
    Methods
    -
    138    def get_scan_df(self) -> pd.DataFrame:
    -139        scan_info = {}
    -140        dict_group_load = self.h5pydata["scan_info"]
    -141        dict_group_keys = dict_group_load.keys()
    -142        for k in dict_group_keys:
    -143            scan_info[k] = dict_group_load[k][:]
    -144        scan_df = pd.DataFrame(scan_info)
    -145        scan_df.set_index("scan", inplace=True, drop=False)
    -146        str_df = scan_df.select_dtypes([object])
    -147        str_df = str_df.stack().str.decode("utf-8").unstack()
    -148        for col in str_df:
    -149            scan_df[col] = str_df[col]
    -150        return scan_df
    +            
    141    def get_scan_df(self) -> pd.DataFrame:
    +142        scan_info = {}
    +143        dict_group_load = self.h5pydata["scan_info"]
    +144        dict_group_keys = dict_group_load.keys()
    +145        for k in dict_group_keys:
    +146            scan_info[k] = dict_group_load[k][:]
    +147        scan_df = pd.DataFrame(scan_info)
    +148        scan_df.set_index("scan", inplace=True, drop=False)
    +149        str_df = scan_df.select_dtypes([object])
    +150        str_df = str_df.stack().str.decode("utf-8").unstack()
    +151        for col in str_df:
    +152            scan_df[col] = str_df[col]
    +153        return scan_df
     
    @@ -1418,58 +1435,58 @@
    Methods
    -
    152    def run(self, mass_spectra, load_raw=True) -> None:
    -153        """Runs the importer functions to populate a LCMS or MassSpectraBase object.
    -154
    -155        Notes
    -156        -----
    -157        The following functions are run in order, if the HDF5 file contains the necessary data:
    -158        1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object.
    -159        2. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object.
    -160        3. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object.
    -161        4. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object.
    -162        5. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object.
    -163        6. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object.
    -164        7. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object.
    -165
    -166        Parameters
    -167        ----------
    -168        mass_spectra : LCMSBase or MassSpectraBase
    -169            The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes.
    -170        load_raw : bool
    -171            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    -172        Returns
    -173        -------
    -174        None, but populates several attributes on the LCMS or MassSpectraBase object.
    -175
    -176        """
    -177        if self.parameters_location is not None:
    -178            # Populate the parameters attribute on the LCMS object
    -179            self.import_parameters(mass_spectra)
    -180
    -181        if "mass_spectra" in self.h5pydata:
    -182            # Populate the _ms list on the LCMS object
    -183            self.import_mass_spectra(mass_spectra, load_raw=load_raw)
    -184
    -185        if "scan_info" in self.h5pydata:
    -186            # Populate the _scan_info attribute on the LCMS object
    -187            self.import_scan_info(mass_spectra)
    -188
    -189        if "ms_unprocessed" in self.h5pydata and load_raw:
    -190            # Populate the _ms_unprocessed attribute on the LCMS object
    -191            self.import_ms_unprocessed(mass_spectra)
    -192
    -193        if "mass_features" in self.h5pydata:
    -194            # Populate the mass_features attribute on the LCMS object
    -195            self.import_mass_features(mass_spectra)
    -196
    -197        if "eics" in self.h5pydata:
    -198            # Populate the eics attribute on the LCMS object
    -199            self.import_eics(mass_spectra)
    -200
    -201        if "spectral_search_results" in self.h5pydata:
    -202            # Populate the spectral_search_results attribute on the LCMS object
    -203            self.import_spectral_search_results(mass_spectra)
    +            
    155    def run(self, mass_spectra, load_raw=True) -> None:
    +156        """Runs the importer functions to populate a LCMS or MassSpectraBase object.
    +157
    +158        Notes
    +159        -----
    +160        The following functions are run in order, if the HDF5 file contains the necessary data:
    +161        1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object.
    +162        2. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object.
    +163        3. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object.
    +164        4. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object.
    +165        5. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object.
    +166        6. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object.
    +167        7. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object.
    +168
    +169        Parameters
    +170        ----------
    +171        mass_spectra : LCMSBase or MassSpectraBase
    +172            The LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes.
    +173        load_raw : bool
    +174            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    +175        Returns
    +176        -------
    +177        None, but populates several attributes on the LCMS or MassSpectraBase object.
    +178
    +179        """
    +180        if self.parameters_location is not None:
    +181            # Populate the parameters attribute on the LCMS object
    +182            self.import_parameters(mass_spectra)
    +183
    +184        if "mass_spectra" in self.h5pydata:
    +185            # Populate the _ms list on the LCMS object
    +186            self.import_mass_spectra(mass_spectra, load_raw=load_raw)
    +187
    +188        if "scan_info" in self.h5pydata:
    +189            # Populate the _scan_info attribute on the LCMS object
    +190            self.import_scan_info(mass_spectra)
    +191
    +192        if "ms_unprocessed" in self.h5pydata and load_raw:
    +193            # Populate the _ms_unprocessed attribute on the LCMS object
    +194            self.import_ms_unprocessed(mass_spectra)
    +195
    +196        if "mass_features" in self.h5pydata:
    +197            # Populate the mass_features attribute on the LCMS object
    +198            self.import_mass_features(mass_spectra)
    +199
    +200        if "eics" in self.h5pydata:
    +201            # Populate the eics attribute on the LCMS object
    +202            self.import_eics(mass_spectra)
    +203
    +204        if "spectral_search_results" in self.h5pydata:
    +205            # Populate the spectral_search_results attribute on the LCMS object
    +206            self.import_spectral_search_results(mass_spectra)
     
    @@ -1518,25 +1535,25 @@
    Returns
    -
    205    def import_mass_spectra(self, mass_spectra, load_raw=True) -> None:
    -206        """Imports all mass spectra from the HDF5 file.
    -207
    -208        Parameters
    -209        ----------
    -210        mass_spectra : LCMSBase | MassSpectraBase
    -211            The MassSpectraBase or LCMSBase object to populate with mass spectra.
    -212        load_raw : bool
    -213            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default
    -214
    -215        Returns
    -216        -------
    -217        None, but populates the '_ms' list on the LCMSBase or MassSpectraBase
    -218        object with mass spectra from the HDF5 file.
    -219        """
    -220        for scan_number in self.scan_number_list:
    -221            mass_spec = self.get_mass_spectrum(scan_number, load_raw=load_raw)
    -222            mass_spec.scan_number = scan_number
    -223            mass_spectra.add_mass_spectrum(mass_spec)
    +            
    208    def import_mass_spectra(self, mass_spectra, load_raw=True) -> None:
    +209        """Imports all mass spectra from the HDF5 file.
    +210
    +211        Parameters
    +212        ----------
    +213        mass_spectra : LCMSBase | MassSpectraBase
    +214            The MassSpectraBase or LCMSBase object to populate with mass spectra.
    +215        load_raw : bool
    +216            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default
    +217
    +218        Returns
    +219        -------
    +220        None, but populates the '_ms' list on the LCMSBase or MassSpectraBase
    +221        object with mass spectra from the HDF5 file.
    +222        """
    +223        for scan_number in self.scan_number_list:
    +224            mass_spec = self.get_mass_spectrum(scan_number, load_raw=load_raw)
    +225            mass_spec.scan_number = scan_number
    +226            mass_spectra.add_mass_spectrum(mass_spec)
     
    @@ -1572,22 +1589,22 @@
    Returns
    -
    225    def import_scan_info(self, mass_spectra) -> None:
    -226        """Imports the scan info from the HDF5 file.
    -227
    -228        Parameters
    -229        ----------
    -230        lcms : LCMSBase | MassSpectraBase
    -231            The MassSpectraBase or LCMSBase objects
    -232
    -233        Returns
    -234        -------
    -235        None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase
    -236        object with a pandas DataFrame of the 'scan_info' from the HDF5 file.
    -237
    -238        """
    -239        scan_df = self.get_scan_df()
    -240        mass_spectra.scan_df = scan_df
    +            
    228    def import_scan_info(self, mass_spectra) -> None:
    +229        """Imports the scan info from the HDF5 file.
    +230
    +231        Parameters
    +232        ----------
    +233        lcms : LCMSBase | MassSpectraBase
    +234            The MassSpectraBase or LCMSBase objects
    +235
    +236        Returns
    +237        -------
    +238        None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase
    +239        object with a pandas DataFrame of the 'scan_info' from the HDF5 file.
    +240
    +241        """
    +242        scan_df = self.get_scan_df()
    +243        mass_spectra.scan_df = scan_df
     
    @@ -1621,22 +1638,22 @@
    Returns
    -
    242    def import_ms_unprocessed(self, mass_spectra) -> None:
    -243        """Imports the unprocessed mass spectra from the HDF5 file.
    -244
    -245        Parameters
    -246        ----------
    -247        lcms : LCMSBase | MassSpectraBase
    -248            The MassSpectraBase or LCMSBase objects
    -249
    -250        Returns
    -251        -------
    -252        None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase
    -253        object with a dictionary of the 'ms_unprocessed' from the HDF5 file.
    -254
    -255        """
    -256        ms_unprocessed = self.get_ms_raw()
    -257        mass_spectra._ms_unprocessed = ms_unprocessed
    +            
    245    def import_ms_unprocessed(self, mass_spectra) -> None:
    +246        """Imports the unprocessed mass spectra from the HDF5 file.
    +247
    +248        Parameters
    +249        ----------
    +250        lcms : LCMSBase | MassSpectraBase
    +251            The MassSpectraBase or LCMSBase objects
    +252
    +253        Returns
    +254        -------
    +255        None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase
    +256        object with a dictionary of the 'ms_unprocessed' from the HDF5 file.
    +257
    +258        """
    +259        ms_unprocessed = self.get_ms_raw()
    +260        mass_spectra._ms_unprocessed = ms_unprocessed
     
    @@ -1670,28 +1687,28 @@
    Returns
    -
    259    def import_parameters(self, mass_spectra) -> None:
    -260        """Imports the parameters from the HDF5 file.
    -261
    -262        Parameters
    -263        ----------
    -264        mass_spectra : LCMSBase | MassSpectraBase
    -265            The MassSpectraBase or LCMSBase object to populate with parameters.
    -266
    -267        Returns
    -268        -------
    -269        None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase
    -270        object with a dictionary of the 'parameters' from the HDF5 file.
    -271
    -272        """
    -273        if ".json" == self.parameters_location.suffix:
    -274            load_and_set_json_parameters_lcms(mass_spectra, self.parameters_location)
    -275        if ".toml" == self.parameters_location.suffix:
    -276            load_and_set_toml_parameters_lcms(mass_spectra, self.parameters_location)
    -277        else:
    -278            raise Exception(
    -279                "Parameters file must be in JSON format, TOML format is not yet supported."
    -280            )
    +            
    262    def import_parameters(self, mass_spectra) -> None:
    +263        """Imports the parameters from the HDF5 file.
    +264
    +265        Parameters
    +266        ----------
    +267        mass_spectra : LCMSBase | MassSpectraBase
    +268            The MassSpectraBase or LCMSBase object to populate with parameters.
    +269
    +270        Returns
    +271        -------
    +272        None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase
    +273        object with a dictionary of the 'parameters' from the HDF5 file.
    +274
    +275        """
    +276        if ".json" == self.parameters_location.suffix:
    +277            load_and_set_json_parameters_lcms(mass_spectra, self.parameters_location)
    +278        if ".toml" == self.parameters_location.suffix:
    +279            load_and_set_toml_parameters_lcms(mass_spectra, self.parameters_location)
    +280        else:
    +281            raise Exception(
    +282                "Parameters file must be in JSON format, TOML format is not yet supported."
    +283            )
     
    @@ -1725,63 +1742,63 @@
    Returns
    -
    282    def import_mass_features(self, mass_spectra) -> None:
    -283        """Imports the mass features from the HDF5 file.
    -284
    -285        Parameters
    -286        ----------
    -287        mass_spectra : LCMSBase | MassSpectraBase
    -288            The MassSpectraBase or LCMSBase object to populate with mass features.
    -289
    -290        Returns
    -291        -------
    -292        None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase
    -293        object with a dictionary of the 'mass_features' from the HDF5 file.
    -294
    -295        """
    -296        dict_group_load = self.h5pydata["mass_features"]
    -297        dict_group_keys = dict_group_load.keys()
    -298        for k in dict_group_keys:
    -299            # Instantiate the MassFeature object
    -300            mass_feature = LCMSMassFeature(
    -301                mass_spectra,
    -302                mz=dict_group_load[k].attrs["_mz_exp"],
    -303                retention_time=dict_group_load[k].attrs["_retention_time"],
    -304                intensity=dict_group_load[k].attrs["_intensity"],
    -305                apex_scan=dict_group_load[k].attrs["_apex_scan"],
    -306                persistence=dict_group_load[k].attrs["_persistence"],
    -307                id=int(k),
    -308            )
    -309
    -310            # Populate additional attributes on the MassFeature object
    -311            for key in dict_group_load[k].attrs.keys() - {
    -312                "_mz_exp",
    -313                "_mz_cal",
    -314                "_retention_time",
    -315                "_intensity",
    -316                "_apex_scan",
    -317                "_persistence",
    -318            }:
    -319                setattr(mass_feature, key, dict_group_load[k].attrs[key])
    -320
    -321            # Populate attributes on MassFeature object that are lists
    -322            for key in dict_group_load[k].keys():
    -323                setattr(mass_feature, key, dict_group_load[k][key][:])
    -324
    -325            mass_spectra.mass_features[int(k)] = mass_feature
    -326
    -327        # Associate mass features with ms1 and ms2 spectra, if available
    -328        for mf_id in mass_spectra.mass_features.keys():
    -329            if mass_spectra.mass_features[mf_id].apex_scan in mass_spectra._ms.keys():
    -330                mass_spectra.mass_features[mf_id].mass_spectrum = mass_spectra._ms[
    -331                    mass_spectra.mass_features[mf_id].apex_scan
    -332                ]
    -333            if mass_spectra.mass_features[mf_id].ms2_scan_numbers is not None:
    -334                for ms2_scan in mass_spectra.mass_features[mf_id].ms2_scan_numbers:
    -335                    if ms2_scan in mass_spectra._ms.keys():
    -336                        mass_spectra.mass_features[mf_id].ms2_mass_spectra[ms2_scan] = (
    -337                            mass_spectra._ms[ms2_scan]
    -338                        )
    +            
    285    def import_mass_features(self, mass_spectra) -> None:
    +286        """Imports the mass features from the HDF5 file.
    +287
    +288        Parameters
    +289        ----------
    +290        mass_spectra : LCMSBase | MassSpectraBase
    +291            The MassSpectraBase or LCMSBase object to populate with mass features.
    +292
    +293        Returns
    +294        -------
    +295        None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase
    +296        object with a dictionary of the 'mass_features' from the HDF5 file.
    +297
    +298        """
    +299        dict_group_load = self.h5pydata["mass_features"]
    +300        dict_group_keys = dict_group_load.keys()
    +301        for k in dict_group_keys:
    +302            # Instantiate the MassFeature object
    +303            mass_feature = LCMSMassFeature(
    +304                mass_spectra,
    +305                mz=dict_group_load[k].attrs["_mz_exp"],
    +306                retention_time=dict_group_load[k].attrs["_retention_time"],
    +307                intensity=dict_group_load[k].attrs["_intensity"],
    +308                apex_scan=dict_group_load[k].attrs["_apex_scan"],
    +309                persistence=dict_group_load[k].attrs["_persistence"],
    +310                id=int(k),
    +311            )
    +312
    +313            # Populate additional attributes on the MassFeature object
    +314            for key in dict_group_load[k].attrs.keys() - {
    +315                "_mz_exp",
    +316                "_mz_cal",
    +317                "_retention_time",
    +318                "_intensity",
    +319                "_apex_scan",
    +320                "_persistence",
    +321            }:
    +322                setattr(mass_feature, key, dict_group_load[k].attrs[key])
    +323
    +324            # Populate attributes on MassFeature object that are lists
    +325            for key in dict_group_load[k].keys():
    +326                setattr(mass_feature, key, dict_group_load[k][key][:])
    +327
    +328            mass_spectra.mass_features[int(k)] = mass_feature
    +329
    +330        # Associate mass features with ms1 and ms2 spectra, if available
    +331        for mf_id in mass_spectra.mass_features.keys():
    +332            if mass_spectra.mass_features[mf_id].apex_scan in mass_spectra._ms.keys():
    +333                mass_spectra.mass_features[mf_id].mass_spectrum = mass_spectra._ms[
    +334                    mass_spectra.mass_features[mf_id].apex_scan
    +335                ]
    +336            if mass_spectra.mass_features[mf_id].ms2_scan_numbers is not None:
    +337                for ms2_scan in mass_spectra.mass_features[mf_id].ms2_scan_numbers:
    +338                    if ms2_scan in mass_spectra._ms.keys():
    +339                        mass_spectra.mass_features[mf_id].ms2_mass_spectra[ms2_scan] = (
    +340                            mass_spectra._ms[ms2_scan]
    +341                        )
     
    @@ -1815,42 +1832,42 @@
    Returns
    -
    340    def import_eics(self, mass_spectra):
    -341        """Imports the extracted ion chromatograms from the HDF5 file.
    -342
    -343        Parameters
    -344        ----------
    -345        mass_spectra : LCMSBase | MassSpectraBase
    -346            The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms.
    -347
    -348        Returns
    -349        -------
    -350        None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase
    -351        object with a dictionary of the 'eics' from the HDF5 file.
    -352
    -353        """
    -354        dict_group_load = self.h5pydata["eics"]
    -355        dict_group_keys = dict_group_load.keys()
    -356        for k in dict_group_keys:
    -357            my_eic = EIC_Data(
    -358                scans=dict_group_load[k]["scans"][:],
    -359                time=dict_group_load[k]["time"][:],
    -360                eic=dict_group_load[k]["eic"][:],
    -361            )
    -362            for key in dict_group_load[k].keys():
    -363                if key not in ["scans", "time", "eic"]:
    -364                    setattr(my_eic, key, dict_group_load[k][key][:])
    -365                    # if key is apexes, convert to a tuple of a list
    -366                    if key == "apexes" and len(my_eic.apexes) > 0:
    -367                        my_eic.apexes = [tuple(x) for x in my_eic.apexes]
    -368            # Add to mass_spectra object
    -369            mass_spectra.eics[dict_group_load[k].attrs["mz"]] = my_eic
    -370
    -371        # Add to mass features
    -372        for idx in mass_spectra.mass_features.keys():
    -373            mz = mass_spectra.mass_features[idx].mz
    -374            if mz in mass_spectra.eics.keys():
    -375                mass_spectra.mass_features[idx]._eic_data = mass_spectra.eics[mz]
    +            
    343    def import_eics(self, mass_spectra):
    +344        """Imports the extracted ion chromatograms from the HDF5 file.
    +345
    +346        Parameters
    +347        ----------
    +348        mass_spectra : LCMSBase | MassSpectraBase
    +349            The MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms.
    +350
    +351        Returns
    +352        -------
    +353        None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase
    +354        object with a dictionary of the 'eics' from the HDF5 file.
    +355
    +356        """
    +357        dict_group_load = self.h5pydata["eics"]
    +358        dict_group_keys = dict_group_load.keys()
    +359        for k in dict_group_keys:
    +360            my_eic = EIC_Data(
    +361                scans=dict_group_load[k]["scans"][:],
    +362                time=dict_group_load[k]["time"][:],
    +363                eic=dict_group_load[k]["eic"][:],
    +364            )
    +365            for key in dict_group_load[k].keys():
    +366                if key not in ["scans", "time", "eic"]:
    +367                    setattr(my_eic, key, dict_group_load[k][key][:])
    +368                    # if key is apexes, convert to a tuple of a list
    +369                    if key == "apexes" and len(my_eic.apexes) > 0:
    +370                        my_eic.apexes = [tuple(x) for x in my_eic.apexes]
    +371            # Add to mass_spectra object
    +372            mass_spectra.eics[dict_group_load[k].attrs["mz"]] = my_eic
    +373
    +374        # Add to mass features
    +375        for idx in mass_spectra.mass_features.keys():
    +376            mz = mass_spectra.mass_features[idx].mz
    +377            if mz in mass_spectra.eics.keys():
    +378                mass_spectra.mass_features[idx]._eic_data = mass_spectra.eics[mz]
     
    @@ -1884,58 +1901,58 @@
    Returns
    -
    377    def import_spectral_search_results(self, mass_spectra):
    -378        """Imports the spectral search results from the HDF5 file.
    -379
    -380        Parameters
    -381        ----------
    -382        mass_spectra : LCMSBase | MassSpectraBase
    -383            The MassSpectraBase or LCMSBase object to populate with spectral search results.
    -384
    -385        Returns
    -386        -------
    -387        None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase
    -388        object with a dictionary of the 'spectral_search_results' from the HDF5 file.
    -389
    -390        """
    -391        overall_results_dict = {}
    -392        ms2_results_load = self.h5pydata["spectral_search_results"]
    -393        for k in ms2_results_load.keys():
    -394            overall_results_dict[int(k)] = {}
    -395            for k2 in ms2_results_load[k].keys():
    -396                ms2_search_res = SpectrumSearchResults(
    -397                    query_spectrum=mass_spectra._ms[int(k)],
    -398                    precursor_mz=ms2_results_load[k][k2].attrs["precursor_mz"],
    -399                    spectral_similarity_search_results={},
    -400                )
    -401
    -402                for key in ms2_results_load[k][k2].keys() - {"precursor_mz"}:
    -403                    setattr(ms2_search_res, key, list(ms2_results_load[k][k2][key][:]))
    -404                overall_results_dict[int(k)][
    -405                    ms2_results_load[k][k2].attrs["precursor_mz"]
    -406                ] = ms2_search_res
    -407
    -408        # add to mass_spectra
    -409        mass_spectra.spectral_search_results.update(overall_results_dict)
    +            
    380    def import_spectral_search_results(self, mass_spectra):
    +381        """Imports the spectral search results from the HDF5 file.
    +382
    +383        Parameters
    +384        ----------
    +385        mass_spectra : LCMSBase | MassSpectraBase
    +386            The MassSpectraBase or LCMSBase object to populate with spectral search results.
    +387
    +388        Returns
    +389        -------
    +390        None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase
    +391        object with a dictionary of the 'spectral_search_results' from the HDF5 file.
    +392
    +393        """
    +394        overall_results_dict = {}
    +395        ms2_results_load = self.h5pydata["spectral_search_results"]
    +396        for k in ms2_results_load.keys():
    +397            overall_results_dict[int(k)] = {}
    +398            for k2 in ms2_results_load[k].keys():
    +399                ms2_search_res = SpectrumSearchResults(
    +400                    query_spectrum=mass_spectra._ms[int(k)],
    +401                    precursor_mz=ms2_results_load[k][k2].attrs["precursor_mz"],
    +402                    spectral_similarity_search_results={},
    +403                )
    +404
    +405                for key in ms2_results_load[k][k2].keys() - {"precursor_mz"}:
    +406                    setattr(ms2_search_res, key, list(ms2_results_load[k][k2][key][:]))
    +407                overall_results_dict[int(k)][
    +408                    ms2_results_load[k][k2].attrs["precursor_mz"]
    +409                ] = ms2_search_res
     410
    -411        # If there are mass features, associate the results with each mass feature
    -412        if len(mass_spectra.mass_features) > 0:
    -413            for mass_feature_id, mass_feature in mass_spectra.mass_features.items():
    -414                scan_ids = mass_feature.ms2_scan_numbers
    -415                for ms2_scan_id in scan_ids:
    -416                    precursor_mz = mass_feature.mz
    -417                    try:
    -418                        mass_spectra.spectral_search_results[ms2_scan_id][precursor_mz]
    -419                    except KeyError:
    -420                        pass
    -421                    else:
    -422                        mass_spectra.mass_features[
    -423                            mass_feature_id
    -424                        ].ms2_similarity_results.append(
    -425                            mass_spectra.spectral_search_results[ms2_scan_id][
    -426                                precursor_mz
    -427                            ]
    -428                        )
    +411        # add to mass_spectra
    +412        mass_spectra.spectral_search_results.update(overall_results_dict)
    +413
    +414        # If there are mass features, associate the results with each mass feature
    +415        if len(mass_spectra.mass_features) > 0:
    +416            for mass_feature_id, mass_feature in mass_spectra.mass_features.items():
    +417                scan_ids = mass_feature.ms2_scan_numbers
    +418                for ms2_scan_id in scan_ids:
    +419                    precursor_mz = mass_feature.mz
    +420                    try:
    +421                        mass_spectra.spectral_search_results[ms2_scan_id][precursor_mz]
    +422                    except KeyError:
    +423                        pass
    +424                    else:
    +425                        mass_spectra.mass_features[
    +426                            mass_feature_id
    +427                        ].ms2_similarity_results.append(
    +428                            mass_spectra.spectral_search_results[ms2_scan_id][
    +429                                precursor_mz
    +430                            ]
    +431                        )
     
    @@ -1969,28 +1986,28 @@
    Returns
    -
    430    def get_mass_spectra_obj(self, load_raw=True) -> MassSpectraBase:
    -431        """
    -432        Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file.
    -433        
    -434        Parameters
    -435        ----------
    -436        load_raw : bool
    -437            If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True.
    -438        
    -439        """
    -440        # Instantiate the LCMS object
    -441        spectra_obj = MassSpectraBase(
    -442            file_location=self.file_location,
    -443            analyzer=self.analyzer,
    -444            instrument_label=self.instrument_label,
    -445            sample_name=self.sample_name,
    -446        )
    -447
    -448        # This will populate the _ms list on the LCMS or MassSpectraBase object
    -449        self.run(spectra_obj, load_raw=load_raw)
    +            
    433    def get_mass_spectra_obj(self, load_raw=True) -> MassSpectraBase:
    +434        """
    +435        Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file.
    +436
    +437        Parameters
    +438        ----------
    +439        load_raw : bool
    +440            If True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True.
    +441
    +442        """
    +443        # Instantiate the LCMS object
    +444        spectra_obj = MassSpectraBase(
    +445            file_location=self.file_location,
    +446            analyzer=self.analyzer,
    +447            instrument_label=self.instrument_label,
    +448            sample_name=self.sample_name,
    +449        )
     450
    -451        return spectra_obj
    +451        # This will populate the _ms list on the LCMS or MassSpectraBase object
    +452        self.run(spectra_obj, load_raw=load_raw)
    +453
    +454        return spectra_obj
     
    @@ -2017,43 +2034,45 @@
    Parameters
    -
    453    def get_lcms_obj(self, load_raw=True, use_original_parser=True, raw_file_path=None) -> LCMSBase:
    -454        """
    -455        Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file.
    -456
    -457        Parameters
    -458        ----------
    -459        load_raw : bool
    -460            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    -461        use_original_parser : bool
    -462            If True, use the original parser to populate the LCMS object. Default is True.        
    -463        raw_file_path : str
    -464            The location of the raw file to parse if attempting to use original parser.
    -465            Default is None, which attempts to get the raw file path from the HDF5 file.
    -466            If the original file path has moved, this parameter can be used to specify the new location.
    -467        """
    -468        # Instantiate the LCMS object
    -469        lcms_obj = LCMSBase(
    -470            file_location=self.file_location,
    -471            analyzer=self.analyzer,
    -472            instrument_label=self.instrument_label,
    -473            sample_name=self.sample_name,
    -474        )
    -475
    -476        # This will populate the majority of the attributes on the LCMS object
    -477        self.run(lcms_obj, load_raw=load_raw)
    -478
    -479        # Set final attributes of the LCMS object
    -480        lcms_obj.polarity = self.h5pydata.attrs["polarity"]
    -481        lcms_obj._scans_number_list = list(lcms_obj.scan_df.scan)
    -482        lcms_obj._retention_time_list = list(lcms_obj.scan_df.scan_time)
    -483        lcms_obj._tic_list = list(lcms_obj.scan_df.tic)
    -484
    -485        # If use_original_parser is True, instantiate the original parser and populate the LCMS object
    -486        if use_original_parser:
    -487            lcms_obj = self.add_original_parser(lcms_obj, raw_file_path=raw_file_path)
    -488
    -489        return lcms_obj
    +            
    456    def get_lcms_obj(
    +457        self, load_raw=True, use_original_parser=True, raw_file_path=None
    +458    ) -> LCMSBase:
    +459        """
    +460        Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file.
    +461
    +462        Parameters
    +463        ----------
    +464        load_raw : bool
    +465            If True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    +466        use_original_parser : bool
    +467            If True, use the original parser to populate the LCMS object. Default is True.
    +468        raw_file_path : str
    +469            The location of the raw file to parse if attempting to use original parser.
    +470            Default is None, which attempts to get the raw file path from the HDF5 file.
    +471            If the original file path has moved, this parameter can be used to specify the new location.
    +472        """
    +473        # Instantiate the LCMS object
    +474        lcms_obj = LCMSBase(
    +475            file_location=self.file_location,
    +476            analyzer=self.analyzer,
    +477            instrument_label=self.instrument_label,
    +478            sample_name=self.sample_name,
    +479        )
    +480
    +481        # This will populate the majority of the attributes on the LCMS object
    +482        self.run(lcms_obj, load_raw=load_raw)
    +483
    +484        # Set final attributes of the LCMS object
    +485        lcms_obj.polarity = self.h5pydata.attrs["polarity"]
    +486        lcms_obj._scans_number_list = list(lcms_obj.scan_df.scan)
    +487        lcms_obj._retention_time_list = list(lcms_obj.scan_df.scan_time)
    +488        lcms_obj._tic_list = list(lcms_obj.scan_df.tic)
    +489
    +490        # If use_original_parser is True, instantiate the original parser and populate the LCMS object
    +491        if use_original_parser:
    +492            lcms_obj = self.add_original_parser(lcms_obj, raw_file_path=raw_file_path)
    +493
    +494        return lcms_obj
     
    @@ -2086,39 +2105,42 @@
    Parameters
    -
    491    def add_original_parser(self, mass_spectra, raw_file_path=None):
    -492        """
    -493        Add the original parser to the mass spectra object.
    -494
    -495        Parameters
    -496        ----------
    -497        mass_spectra : MassSpectraBase | LCMSBase
    -498            The MassSpectraBase or LCMSBase object to add the original parser to.
    -499        raw_file_path : str
    -500            The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file.
    -501        """
    -502        # Try to get the raw file path from the HDF5 file
    -503        if raw_file_path is None:
    -504            raw_file_path = self.h5pydata.attrs["original_file_location"]
    -505            #Check if og_file_location exists, if not raise an error
    -506            raw_file_path = self.h5pydata.attrs["original_file_location"]
    -507
    -508        raw_file_path = Path(raw_file_path)
    -509        if not raw_file_path.exists():
    -510            raise FileExistsError("File does not exist: " + str(raw_file_path), ". Cannot use original parser for instatiating the lcms_obj.")
    -511
    -512        # Get the original parser type
    -513        og_parser_type = self.h5pydata.attrs["parser_type"]
    -514
    -515        if og_parser_type == "ImportMassSpectraThermoMSFileReader":
    -516            parser = ImportMassSpectraThermoMSFileReader(raw_file_path)
    -517        elif og_parser_type == "MZMLSpectraParser":
    -518            parser = MZMLSpectraParser(raw_file_path)
    +            
    496    def add_original_parser(self, mass_spectra, raw_file_path=None):
    +497        """
    +498        Add the original parser to the mass spectra object.
    +499
    +500        Parameters
    +501        ----------
    +502        mass_spectra : MassSpectraBase | LCMSBase
    +503            The MassSpectraBase or LCMSBase object to add the original parser to.
    +504        raw_file_path : str
    +505            The location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file.
    +506        """
    +507        # Try to get the raw file path from the HDF5 file
    +508        if raw_file_path is None:
    +509            raw_file_path = self.h5pydata.attrs["original_file_location"]
    +510            # Check if og_file_location exists, if not raise an error
    +511            raw_file_path = self.h5pydata.attrs["original_file_location"]
    +512
    +513        raw_file_path = Path(raw_file_path)
    +514        if not raw_file_path.exists():
    +515            raise FileExistsError(
    +516                "File does not exist: " + str(raw_file_path),
    +517                ". Cannot use original parser for instatiating the lcms_obj.",
    +518            )
     519
    -520        mass_spectra.spectra_parser_class = parser.__class__
    -521        mass_spectra.spectra_parser = parser
    +520        # Get the original parser type
    +521        og_parser_type = self.h5pydata.attrs["parser_type"]
     522
    -523        return mass_spectra
    +523        if og_parser_type == "ImportMassSpectraThermoMSFileReader":
    +524            parser = ImportMassSpectraThermoMSFileReader(raw_file_path)
    +525        elif og_parser_type == "MZMLSpectraParser":
    +526            parser = MZMLSpectraParser(raw_file_path)
    +527
    +528        mass_spectra.spectra_parser_class = parser.__class__
    +529        mass_spectra.spectra_parser = parser
    +530
    +531        return mass_spectra
     
    diff --git a/docs/corems/mass_spectra/input/massList.html b/docs/corems/mass_spectra/input/massList.html index 488d4f1c..379920ee 100644 --- a/docs/corems/mass_spectra/input/massList.html +++ b/docs/corems/mass_spectra/input/massList.html @@ -118,101 +118,100 @@

    38 * get_lcms_obj(). Get the LCMSBase object. 39 """ 40 - 41 def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown'): - 42 - 43 if isinstance(file_location, str): - 44 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed - 45 file_location = Path(file_location) - 46 - 47 if not file_location.exists(): - 48 raise FileNotFoundError("%s not found" % file_location) - 49 - 50 if not file_location.suffix == '.corems': - 51 raise TypeError("%s is not a valid CoreMS file" % file_location) - 52 - 53 Thread.__init__(self) - 54 - 55 ReadCoremsMasslist.__init__(self, file_location) - 56 - 57 self.lcms = LCMSBase( - 58 self.file_location, analyzer=analyzer, instrument_label=instrument_label - 59 ) - 60 - 61 def get_scans_filename(self) -> list: - 62 all_other = self.file_location.glob("*_scan*[!.json]") - 63 - 64 scans_filepath = [ - 65 (file_path_obj.stem.split("scan")[1], file_path_obj) - 66 for file_path_obj in all_other - 67 ] - 68 - 69 scans_filepath.sort(key=lambda m: int(m[0])) - 70 - 71 return scans_filepath - 72 - 73 def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None: - 74 self.file_location = file_path_obj - 75 - 76 if file_path_obj.suffix == ".pkl": - 77 self.data_type == "dataframe" - 78 - 79 else: - 80 if file_path_obj.suffix == ".csv": - 81 self.data_type == "txt" - 82 self.delimiter = "," - 83 - 84 elif file_path_obj.suffix == ".xlsx": - 85 self.data_type == "excel" - 86 self.delimiter = "," - 87 - 88 elif file_path_obj.suffix == ".txt": - 89 self.data_type == "txt" - 90 self.delimiter = "\t" - 91 warnings.warn("using tab as delimiter") - 92 else: - 93 raise NotImplementedError( - 94 "%s data not yet supported " % file_path_obj.suffix - 95 ) - 96 - 97 def import_mass_spectra(self) -> None: - 98 list_rt, list_tic, list_scan = list(), list(), list() - 99 -100 for scan_number, file_path_obj in self.get_scans_filename(): -101 self.set_filepath_datatype_and_delimiter(file_path_obj) -102 -103 mass_spec = self.get_mass_spectrum(int(scan_number)) -104 -105 list_scan.append(int(scan_number)) -106 -107 list_rt.append(mass_spec.retention_time) -108 -109 list_tic.append(mass_spec.tic) -110 -111 self.lcms.add_mass_spectrum(mass_spec) -112 -113 self.lcms.retention_time = list_rt -114 self.lcms.tic_list = list_tic # TODO: check if this is correct -115 self.lcms.scans_number = list_scan -116 -117 def run(self) -> None: -118 """Creates the LCMS object and imports mass spectra.""" -119 -120 self.import_mass_spectra() -121 -122 def get_lcms_obj(self) -> LCMSBase: -123 """ -124 Returns the LCMSBase object associated with the massList. -125 -126 If the LCMSBase object is already initialized, it is returned. -127 Otherwise, an exception is raised. -128 -129 Raises: -130 Exception: If the LCMSBase object is not initialized. -131 """ -132 if self.lcms: -133 return self.lcms -134 else: -135 raise Exception("returning an empty lcms class") + 41 def __init__(self, file_location, analyzer="Unknown", instrument_label="Unknown"): + 42 if isinstance(file_location, str): + 43 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed + 44 file_location = Path(file_location) + 45 + 46 if not file_location.exists(): + 47 raise FileNotFoundError("%s not found" % file_location) + 48 + 49 if not file_location.suffix == ".corems": + 50 raise TypeError("%s is not a valid CoreMS file" % file_location) + 51 + 52 Thread.__init__(self) + 53 + 54 ReadCoremsMasslist.__init__(self, file_location) + 55 + 56 self.lcms = LCMSBase( + 57 self.file_location, analyzer=analyzer, instrument_label=instrument_label + 58 ) + 59 + 60 def get_scans_filename(self) -> list: + 61 all_other = self.file_location.glob("*_scan*[!.json]") + 62 + 63 scans_filepath = [ + 64 (file_path_obj.stem.split("scan")[1], file_path_obj) + 65 for file_path_obj in all_other + 66 ] + 67 + 68 scans_filepath.sort(key=lambda m: int(m[0])) + 69 + 70 return scans_filepath + 71 + 72 def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None: + 73 self.file_location = file_path_obj + 74 + 75 if file_path_obj.suffix == ".pkl": + 76 self.data_type == "dataframe" + 77 + 78 else: + 79 if file_path_obj.suffix == ".csv": + 80 self.data_type == "txt" + 81 self.delimiter = "," + 82 + 83 elif file_path_obj.suffix == ".xlsx": + 84 self.data_type == "excel" + 85 self.delimiter = "," + 86 + 87 elif file_path_obj.suffix == ".txt": + 88 self.data_type == "txt" + 89 self.delimiter = "\t" + 90 warnings.warn("using tab as delimiter") + 91 else: + 92 raise NotImplementedError( + 93 "%s data not yet supported " % file_path_obj.suffix + 94 ) + 95 + 96 def import_mass_spectra(self) -> None: + 97 list_rt, list_tic, list_scan = list(), list(), list() + 98 + 99 for scan_number, file_path_obj in self.get_scans_filename(): +100 self.set_filepath_datatype_and_delimiter(file_path_obj) +101 +102 mass_spec = self.get_mass_spectrum(int(scan_number)) +103 +104 list_scan.append(int(scan_number)) +105 +106 list_rt.append(mass_spec.retention_time) +107 +108 list_tic.append(mass_spec.tic) +109 +110 self.lcms.add_mass_spectrum(mass_spec) +111 +112 self.lcms.retention_time = list_rt +113 self.lcms.tic_list = list_tic # TODO: check if this is correct +114 self.lcms.scans_number = list_scan +115 +116 def run(self) -> None: +117 """Creates the LCMS object and imports mass spectra.""" +118 +119 self.import_mass_spectra() +120 +121 def get_lcms_obj(self) -> LCMSBase: +122 """ +123 Returns the LCMSBase object associated with the massList. +124 +125 If the LCMSBase object is already initialized, it is returned. +126 Otherwise, an exception is raised. +127 +128 Raises: +129 Exception: If the LCMSBase object is not initialized. +130 """ +131 if self.lcms: +132 return self.lcms +133 else: +134 raise Exception("returning an empty lcms class")

    @@ -255,101 +254,100 @@

    39 * get_lcms_obj(). Get the LCMSBase object. 40 """ 41 - 42 def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown'): - 43 - 44 if isinstance(file_location, str): - 45 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed - 46 file_location = Path(file_location) - 47 - 48 if not file_location.exists(): - 49 raise FileNotFoundError("%s not found" % file_location) - 50 - 51 if not file_location.suffix == '.corems': - 52 raise TypeError("%s is not a valid CoreMS file" % file_location) - 53 - 54 Thread.__init__(self) - 55 - 56 ReadCoremsMasslist.__init__(self, file_location) - 57 - 58 self.lcms = LCMSBase( - 59 self.file_location, analyzer=analyzer, instrument_label=instrument_label - 60 ) - 61 - 62 def get_scans_filename(self) -> list: - 63 all_other = self.file_location.glob("*_scan*[!.json]") - 64 - 65 scans_filepath = [ - 66 (file_path_obj.stem.split("scan")[1], file_path_obj) - 67 for file_path_obj in all_other - 68 ] - 69 - 70 scans_filepath.sort(key=lambda m: int(m[0])) - 71 - 72 return scans_filepath - 73 - 74 def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None: - 75 self.file_location = file_path_obj - 76 - 77 if file_path_obj.suffix == ".pkl": - 78 self.data_type == "dataframe" - 79 - 80 else: - 81 if file_path_obj.suffix == ".csv": - 82 self.data_type == "txt" - 83 self.delimiter = "," - 84 - 85 elif file_path_obj.suffix == ".xlsx": - 86 self.data_type == "excel" - 87 self.delimiter = "," - 88 - 89 elif file_path_obj.suffix == ".txt": - 90 self.data_type == "txt" - 91 self.delimiter = "\t" - 92 warnings.warn("using tab as delimiter") - 93 else: - 94 raise NotImplementedError( - 95 "%s data not yet supported " % file_path_obj.suffix - 96 ) - 97 - 98 def import_mass_spectra(self) -> None: - 99 list_rt, list_tic, list_scan = list(), list(), list() -100 -101 for scan_number, file_path_obj in self.get_scans_filename(): -102 self.set_filepath_datatype_and_delimiter(file_path_obj) -103 -104 mass_spec = self.get_mass_spectrum(int(scan_number)) -105 -106 list_scan.append(int(scan_number)) -107 -108 list_rt.append(mass_spec.retention_time) -109 -110 list_tic.append(mass_spec.tic) -111 -112 self.lcms.add_mass_spectrum(mass_spec) -113 -114 self.lcms.retention_time = list_rt -115 self.lcms.tic_list = list_tic # TODO: check if this is correct -116 self.lcms.scans_number = list_scan -117 -118 def run(self) -> None: -119 """Creates the LCMS object and imports mass spectra.""" -120 -121 self.import_mass_spectra() -122 -123 def get_lcms_obj(self) -> LCMSBase: -124 """ -125 Returns the LCMSBase object associated with the massList. -126 -127 If the LCMSBase object is already initialized, it is returned. -128 Otherwise, an exception is raised. -129 -130 Raises: -131 Exception: If the LCMSBase object is not initialized. -132 """ -133 if self.lcms: -134 return self.lcms -135 else: -136 raise Exception("returning an empty lcms class") + 42 def __init__(self, file_location, analyzer="Unknown", instrument_label="Unknown"): + 43 if isinstance(file_location, str): + 44 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed + 45 file_location = Path(file_location) + 46 + 47 if not file_location.exists(): + 48 raise FileNotFoundError("%s not found" % file_location) + 49 + 50 if not file_location.suffix == ".corems": + 51 raise TypeError("%s is not a valid CoreMS file" % file_location) + 52 + 53 Thread.__init__(self) + 54 + 55 ReadCoremsMasslist.__init__(self, file_location) + 56 + 57 self.lcms = LCMSBase( + 58 self.file_location, analyzer=analyzer, instrument_label=instrument_label + 59 ) + 60 + 61 def get_scans_filename(self) -> list: + 62 all_other = self.file_location.glob("*_scan*[!.json]") + 63 + 64 scans_filepath = [ + 65 (file_path_obj.stem.split("scan")[1], file_path_obj) + 66 for file_path_obj in all_other + 67 ] + 68 + 69 scans_filepath.sort(key=lambda m: int(m[0])) + 70 + 71 return scans_filepath + 72 + 73 def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None: + 74 self.file_location = file_path_obj + 75 + 76 if file_path_obj.suffix == ".pkl": + 77 self.data_type == "dataframe" + 78 + 79 else: + 80 if file_path_obj.suffix == ".csv": + 81 self.data_type == "txt" + 82 self.delimiter = "," + 83 + 84 elif file_path_obj.suffix == ".xlsx": + 85 self.data_type == "excel" + 86 self.delimiter = "," + 87 + 88 elif file_path_obj.suffix == ".txt": + 89 self.data_type == "txt" + 90 self.delimiter = "\t" + 91 warnings.warn("using tab as delimiter") + 92 else: + 93 raise NotImplementedError( + 94 "%s data not yet supported " % file_path_obj.suffix + 95 ) + 96 + 97 def import_mass_spectra(self) -> None: + 98 list_rt, list_tic, list_scan = list(), list(), list() + 99 +100 for scan_number, file_path_obj in self.get_scans_filename(): +101 self.set_filepath_datatype_and_delimiter(file_path_obj) +102 +103 mass_spec = self.get_mass_spectrum(int(scan_number)) +104 +105 list_scan.append(int(scan_number)) +106 +107 list_rt.append(mass_spec.retention_time) +108 +109 list_tic.append(mass_spec.tic) +110 +111 self.lcms.add_mass_spectrum(mass_spec) +112 +113 self.lcms.retention_time = list_rt +114 self.lcms.tic_list = list_tic # TODO: check if this is correct +115 self.lcms.scans_number = list_scan +116 +117 def run(self) -> None: +118 """Creates the LCMS object and imports mass spectra.""" +119 +120 self.import_mass_spectra() +121 +122 def get_lcms_obj(self) -> LCMSBase: +123 """ +124 Returns the LCMSBase object associated with the massList. +125 +126 If the LCMSBase object is already initialized, it is returned. +127 Otherwise, an exception is raised. +128 +129 Raises: +130 Exception: If the LCMSBase object is not initialized. +131 """ +132 if self.lcms: +133 return self.lcms +134 else: +135 raise Exception("returning an empty lcms class") @@ -395,25 +393,24 @@

    Methods
    -
    42    def __init__(self, file_location, analyzer='Unknown', instrument_label='Unknown'):
    -43        
    -44        if isinstance(file_location, str):
    -45            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    -46            file_location = Path(file_location)
    -47
    -48        if not file_location.exists():
    -49            raise FileNotFoundError("%s not found" % file_location)
    -50        
    -51        if not file_location.suffix == '.corems':
    -52            raise TypeError("%s is not a valid CoreMS file" % file_location)
    -53        
    -54        Thread.__init__(self)
    -55
    -56        ReadCoremsMasslist.__init__(self, file_location)
    -57
    -58        self.lcms = LCMSBase(
    -59            self.file_location, analyzer=analyzer, instrument_label=instrument_label
    -60        )
    +            
    42    def __init__(self, file_location, analyzer="Unknown", instrument_label="Unknown"):
    +43        if isinstance(file_location, str):
    +44            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    +45            file_location = Path(file_location)
    +46
    +47        if not file_location.exists():
    +48            raise FileNotFoundError("%s not found" % file_location)
    +49
    +50        if not file_location.suffix == ".corems":
    +51            raise TypeError("%s is not a valid CoreMS file" % file_location)
    +52
    +53        Thread.__init__(self)
    +54
    +55        ReadCoremsMasslist.__init__(self, file_location)
    +56
    +57        self.lcms = LCMSBase(
    +58            self.file_location, analyzer=analyzer, instrument_label=instrument_label
    +59        )
     
    @@ -462,17 +459,17 @@
    Methods
    -
    62    def get_scans_filename(self) -> list:
    -63        all_other = self.file_location.glob("*_scan*[!.json]")
    -64
    -65        scans_filepath = [
    -66            (file_path_obj.stem.split("scan")[1], file_path_obj)
    -67            for file_path_obj in all_other
    -68        ]
    -69
    -70        scans_filepath.sort(key=lambda m: int(m[0]))
    -71
    -72        return scans_filepath
    +            
    61    def get_scans_filename(self) -> list:
    +62        all_other = self.file_location.glob("*_scan*[!.json]")
    +63
    +64        scans_filepath = [
    +65            (file_path_obj.stem.split("scan")[1], file_path_obj)
    +66            for file_path_obj in all_other
    +67        ]
    +68
    +69        scans_filepath.sort(key=lambda m: int(m[0]))
    +70
    +71        return scans_filepath
     
    @@ -490,29 +487,29 @@
    Methods
    -
    74    def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None:
    -75        self.file_location = file_path_obj
    -76
    -77        if file_path_obj.suffix == ".pkl":
    -78            self.data_type == "dataframe"
    -79
    -80        else:
    -81            if file_path_obj.suffix == ".csv":
    -82                self.data_type == "txt"
    -83                self.delimiter = ","
    -84
    -85            elif file_path_obj.suffix == ".xlsx":
    -86                self.data_type == "excel"
    -87                self.delimiter = ","
    -88
    -89            elif file_path_obj.suffix == ".txt":
    -90                self.data_type == "txt"
    -91                self.delimiter = "\t"
    -92                warnings.warn("using tab as delimiter")
    -93            else:
    -94                raise NotImplementedError(
    -95                    "%s data not yet supported " % file_path_obj.suffix
    -96                )
    +            
    73    def set_filepath_datatype_and_delimiter(self, file_path_obj) -> None:
    +74        self.file_location = file_path_obj
    +75
    +76        if file_path_obj.suffix == ".pkl":
    +77            self.data_type == "dataframe"
    +78
    +79        else:
    +80            if file_path_obj.suffix == ".csv":
    +81                self.data_type == "txt"
    +82                self.delimiter = ","
    +83
    +84            elif file_path_obj.suffix == ".xlsx":
    +85                self.data_type == "excel"
    +86                self.delimiter = ","
    +87
    +88            elif file_path_obj.suffix == ".txt":
    +89                self.data_type == "txt"
    +90                self.delimiter = "\t"
    +91                warnings.warn("using tab as delimiter")
    +92            else:
    +93                raise NotImplementedError(
    +94                    "%s data not yet supported " % file_path_obj.suffix
    +95                )
     
    @@ -530,25 +527,25 @@
    Methods
    -
     98    def import_mass_spectra(self) -> None:
    - 99        list_rt, list_tic, list_scan = list(), list(), list()
    -100
    -101        for scan_number, file_path_obj in self.get_scans_filename():
    -102            self.set_filepath_datatype_and_delimiter(file_path_obj)
    -103
    -104            mass_spec = self.get_mass_spectrum(int(scan_number))
    -105
    -106            list_scan.append(int(scan_number))
    -107
    -108            list_rt.append(mass_spec.retention_time)
    -109
    -110            list_tic.append(mass_spec.tic)
    -111
    -112            self.lcms.add_mass_spectrum(mass_spec)
    -113
    -114        self.lcms.retention_time = list_rt
    -115        self.lcms.tic_list = list_tic  # TODO: check if this is correct
    -116        self.lcms.scans_number = list_scan
    +            
     97    def import_mass_spectra(self) -> None:
    + 98        list_rt, list_tic, list_scan = list(), list(), list()
    + 99
    +100        for scan_number, file_path_obj in self.get_scans_filename():
    +101            self.set_filepath_datatype_and_delimiter(file_path_obj)
    +102
    +103            mass_spec = self.get_mass_spectrum(int(scan_number))
    +104
    +105            list_scan.append(int(scan_number))
    +106
    +107            list_rt.append(mass_spec.retention_time)
    +108
    +109            list_tic.append(mass_spec.tic)
    +110
    +111            self.lcms.add_mass_spectrum(mass_spec)
    +112
    +113        self.lcms.retention_time = list_rt
    +114        self.lcms.tic_list = list_tic  # TODO: check if this is correct
    +115        self.lcms.scans_number = list_scan
     
    @@ -566,10 +563,10 @@
    Methods
    -
    118    def run(self) -> None:
    -119        """Creates the LCMS object and imports mass spectra."""
    -120
    -121        self.import_mass_spectra()
    +            
    117    def run(self) -> None:
    +118        """Creates the LCMS object and imports mass spectra."""
    +119
    +120        self.import_mass_spectra()
     
    @@ -589,20 +586,20 @@
    Methods
    -
    123    def get_lcms_obj(self) -> LCMSBase:
    -124        """
    -125        Returns the LCMSBase object associated with the massList.
    -126
    -127        If the LCMSBase object is already initialized, it is returned.
    -128        Otherwise, an exception is raised.
    -129
    -130        Raises:
    -131            Exception: If the LCMSBase object is not initialized.
    -132        """
    -133        if self.lcms:
    -134            return self.lcms
    -135        else:
    -136            raise Exception("returning an empty lcms class")
    +            
    122    def get_lcms_obj(self) -> LCMSBase:
    +123        """
    +124        Returns the LCMSBase object associated with the massList.
    +125
    +126        If the LCMSBase object is already initialized, it is returned.
    +127        Otherwise, an exception is raised.
    +128
    +129        Raises:
    +130            Exception: If the LCMSBase object is not initialized.
    +131        """
    +132        if self.lcms:
    +133            return self.lcms
    +134        else:
    +135            raise Exception("returning an empty lcms class")
     
    diff --git a/docs/corems/mass_spectra/input/mzml.html b/docs/corems/mass_spectra/input/mzml.html index 15c141bb..b9504200 100644 --- a/docs/corems/mass_spectra/input/mzml.html +++ b/docs/corems/mass_spectra/input/mzml.html @@ -96,475 +96,471 @@

    4import numpy as np 5import pandas as pd 6import pymzml - 7import warnings - 8 - 9from corems.encapsulation.constant import Labels - 10from corems.encapsulation.factory.parameters import default_parameters - 11from corems.mass_spectra.factory.lc_class import LCMSBase, MassSpectraBase - 12from corems.mass_spectra.input.parserbase import SpectraParserInterface - 13from corems.mass_spectrum.factory.MassSpectrumClasses import ( - 14 MassSpecCentroid, - 15 MassSpecProfile, - 16) + 7 + 8from corems.encapsulation.constant import Labels + 9from corems.encapsulation.factory.parameters import default_parameters + 10from corems.mass_spectra.factory.lc_class import LCMSBase, MassSpectraBase + 11from corems.mass_spectra.input.parserbase import SpectraParserInterface + 12from corems.mass_spectrum.factory.MassSpectrumClasses import ( + 13 MassSpecCentroid, + 14 MassSpecProfile, + 15) + 16 17 - 18 - 19class MZMLSpectraParser(SpectraParserInterface): - 20 """A class for parsing mzml spectrometry data files into MassSpectraBase or LCMSBase objects - 21 - 22 Parameters - 23 ---------- - 24 file_location : str or Path - 25 The path to the RAW file to be parsed. - 26 analyzer : str, optional - 27 The type of mass analyzer used in the instrument. Default is "Unknown". - 28 instrument_label : str, optional - 29 The name of the instrument used to acquire the data. Default is "Unknown". - 30 sample_name : str, optional - 31 The name of the sample being analyzed. If not provided, the stem of the file_location path will be used. - 32 - 33 Attributes - 34 ---------- - 35 file_location : Path - 36 The path to the RAW file being parsed. - 37 analyzer : str - 38 The type of mass analyzer used in the instrument. - 39 instrument_label : str - 40 The name of the instrument used to acquire the data. - 41 sample_name : str - 42 The name of the sample being analyzed. - 43 - 44 Methods - 45 ------- - 46 * load(). - 47 Load mzML file using pymzml.run.Reader and return the data as a numpy array. - 48 * run(spectra=True). - 49 Parses the mzml file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe. - 50 * get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True) - 51 Parses the mzml file and returns a MassSpecBase object from a single scan. - 52 * get_mass_spectra_obj(). - 53 Parses the mzml file and instantiates a MassSpectraBase object. - 54 * get_lcms_obj(). - 55 Parses the mzml file and instantiates an LCMSBase object. - 56 - 57 Inherits from ThermoBaseClass and SpectraParserInterface - 58 """ - 59 - 60 def __init__( - 61 self, - 62 file_location, - 63 analyzer="Unknown", - 64 instrument_label="Unknown", - 65 sample_name=None, - 66 ): - 67 # implementation details - 68 if isinstance(file_location, str): - 69 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed - 70 file_location = Path(file_location) - 71 if not file_location.exists(): - 72 raise FileExistsError("File does not exist: " + str(file_location)) - 73 self.file_location = file_location - 74 self.analyzer = analyzer - 75 self.instrument_label = instrument_label - 76 - 77 if sample_name: - 78 self.sample_name = sample_name - 79 else: - 80 self.sample_name = file_location.stem - 81 - 82 def load(self): - 83 """ - 84 Load mzML file using pymzml.run.Reader and return the data as a numpy array. - 85 - 86 Returns - 87 ------- - 88 numpy.ndarray - 89 The mass spectra data as a numpy array. - 90 """ - 91 data = pymzml.run.Reader(self.file_location) - 92 return data - 93 - 94 def get_scan_df(self, data): - 95 """ - 96 Return scan data as a pandas DataFrame. - 97 - 98 Parameters - 99 ---------- -100 data : pymzml.run.Reader -101 The mass spectra data. -102 -103 Returns -104 ------- -105 pandas.DataFrame -106 A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time. -107 """ -108 # Scan dict -109 # instatinate scan dict, with empty lists of size of scans -110 n_scans = data.get_spectrum_count() -111 scan_dict = { -112 "scan": np.empty(n_scans, dtype=np.int32), -113 "scan_time": np.empty(n_scans, dtype=np.float32), -114 "ms_level": [None] * n_scans, -115 "polarity": [None] * n_scans, -116 "precursor_mz": [None] * n_scans, -117 "scan_text": [None] * n_scans, -118 "scan_window_lower": np.empty(n_scans, dtype=np.float32), -119 "scan_window_upper": np.empty(n_scans, dtype=np.float32), -120 "scan_precision": [None] * n_scans, -121 "tic": np.empty(n_scans, dtype=np.float32), -122 "ms_format": [None] * n_scans, -123 } -124 -125 # First pass: loop through scans to get scan info -126 for i, spec in enumerate(data): -127 scan_dict["scan"][i] = spec.ID -128 scan_dict["ms_level"][i] = spec.ms_level -129 scan_dict["scan_precision"][i] = spec._measured_precision -130 scan_dict["tic"][i] = spec.TIC -131 if spec.selected_precursors: -132 scan_dict["precursor_mz"][i] = spec.selected_precursors[0].get( -133 "mz", None -134 ) -135 if spec["negative scan"] is not None: -136 scan_dict["polarity"][i] = "negative" -137 if spec["positive scan"] is not None: -138 scan_dict["polarity"][i] = "positive" -139 if ( -140 spec["negative scan"] is not None -141 and spec["positive scan"] is not None -142 ): -143 raise ValueError( -144 "Error: scan {0} has both negative and positive polarity".format( -145 spec.ID -146 ) -147 ) -148 -149 scan_dict["scan_time"][i] = spec.get("MS:1000016") -150 scan_dict["scan_text"][i] = spec.get("MS:1000512") -151 scan_dict["scan_window_lower"][i] = spec.get("MS:1000501") -152 scan_dict["scan_window_upper"][i] = spec.get("MS:1000500") -153 if spec.get("MS:1000128"): -154 scan_dict["ms_format"][i] = "profile" -155 elif spec.get("MS:1000127"): -156 scan_dict["ms_format"][i] = "centroid" -157 else: -158 scan_dict["ms_format"][i] = None + 18class MZMLSpectraParser(SpectraParserInterface): + 19 """A class for parsing mzml spectrometry data files into MassSpectraBase or LCMSBase objects + 20 + 21 Parameters + 22 ---------- + 23 file_location : str or Path + 24 The path to the RAW file to be parsed. + 25 analyzer : str, optional + 26 The type of mass analyzer used in the instrument. Default is "Unknown". + 27 instrument_label : str, optional + 28 The name of the instrument used to acquire the data. Default is "Unknown". + 29 sample_name : str, optional + 30 The name of the sample being analyzed. If not provided, the stem of the file_location path will be used. + 31 + 32 Attributes + 33 ---------- + 34 file_location : Path + 35 The path to the RAW file being parsed. + 36 analyzer : str + 37 The type of mass analyzer used in the instrument. + 38 instrument_label : str + 39 The name of the instrument used to acquire the data. + 40 sample_name : str + 41 The name of the sample being analyzed. + 42 + 43 Methods + 44 ------- + 45 * load(). + 46 Load mzML file using pymzml.run.Reader and return the data as a numpy array. + 47 * run(spectra=True). + 48 Parses the mzml file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe. + 49 * get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True) + 50 Parses the mzml file and returns a MassSpecBase object from a single scan. + 51 * get_mass_spectra_obj(). + 52 Parses the mzml file and instantiates a MassSpectraBase object. + 53 * get_lcms_obj(). + 54 Parses the mzml file and instantiates an LCMSBase object. + 55 + 56 Inherits from ThermoBaseClass and SpectraParserInterface + 57 """ + 58 + 59 def __init__( + 60 self, + 61 file_location, + 62 analyzer="Unknown", + 63 instrument_label="Unknown", + 64 sample_name=None, + 65 ): + 66 # implementation details + 67 if isinstance(file_location, str): + 68 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed + 69 file_location = Path(file_location) + 70 if not file_location.exists(): + 71 raise FileExistsError("File does not exist: " + str(file_location)) + 72 self.file_location = file_location + 73 self.analyzer = analyzer + 74 self.instrument_label = instrument_label + 75 + 76 if sample_name: + 77 self.sample_name = sample_name + 78 else: + 79 self.sample_name = file_location.stem + 80 + 81 def load(self): + 82 """ + 83 Load mzML file using pymzml.run.Reader and return the data as a numpy array. + 84 + 85 Returns + 86 ------- + 87 numpy.ndarray + 88 The mass spectra data as a numpy array. + 89 """ + 90 data = pymzml.run.Reader(self.file_location) + 91 return data + 92 + 93 def get_scan_df(self, data): + 94 """ + 95 Return scan data as a pandas DataFrame. + 96 + 97 Parameters + 98 ---------- + 99 data : pymzml.run.Reader +100 The mass spectra data. +101 +102 Returns +103 ------- +104 pandas.DataFrame +105 A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time. +106 """ +107 # Scan dict +108 # instatinate scan dict, with empty lists of size of scans +109 n_scans = data.get_spectrum_count() +110 scan_dict = { +111 "scan": np.empty(n_scans, dtype=np.int32), +112 "scan_time": np.empty(n_scans, dtype=np.float32), +113 "ms_level": [None] * n_scans, +114 "polarity": [None] * n_scans, +115 "precursor_mz": [None] * n_scans, +116 "scan_text": [None] * n_scans, +117 "scan_window_lower": np.empty(n_scans, dtype=np.float32), +118 "scan_window_upper": np.empty(n_scans, dtype=np.float32), +119 "scan_precision": [None] * n_scans, +120 "tic": np.empty(n_scans, dtype=np.float32), +121 "ms_format": [None] * n_scans, +122 } +123 +124 # First pass: loop through scans to get scan info +125 for i, spec in enumerate(data): +126 scan_dict["scan"][i] = spec.ID +127 scan_dict["ms_level"][i] = spec.ms_level +128 scan_dict["scan_precision"][i] = spec._measured_precision +129 scan_dict["tic"][i] = spec.TIC +130 if spec.selected_precursors: +131 scan_dict["precursor_mz"][i] = spec.selected_precursors[0].get( +132 "mz", None +133 ) +134 if spec["negative scan"] is not None: +135 scan_dict["polarity"][i] = "negative" +136 if spec["positive scan"] is not None: +137 scan_dict["polarity"][i] = "positive" +138 if spec["negative scan"] is not None and spec["positive scan"] is not None: +139 raise ValueError( +140 "Error: scan {0} has both negative and positive polarity".format( +141 spec.ID +142 ) +143 ) +144 +145 scan_dict["scan_time"][i] = spec.get("MS:1000016") +146 scan_dict["scan_text"][i] = spec.get("MS:1000512") +147 scan_dict["scan_window_lower"][i] = spec.get("MS:1000501") +148 scan_dict["scan_window_upper"][i] = spec.get("MS:1000500") +149 if spec.get("MS:1000128"): +150 scan_dict["ms_format"][i] = "profile" +151 elif spec.get("MS:1000127"): +152 scan_dict["ms_format"][i] = "centroid" +153 else: +154 scan_dict["ms_format"][i] = None +155 +156 scan_df = pd.DataFrame(scan_dict) +157 +158 return scan_df 159 -160 scan_df = pd.DataFrame(scan_dict) -161 -162 return scan_df -163 -164 def get_ms_raw(self, spectra, scan_df, data): -165 """Return a dictionary of mass spectra data as a pandas DataFrame. -166 -167 Parameters -168 ---------- -169 spectra : str -170 Which mass spectra data to include in the output. -171 Options: None, "ms1", "ms2", "all". -172 scan_df : pandas.DataFrame -173 Scan dataframe. Output from get_scan_df(). -174 data : pymzml.run.Reader -175 The mass spectra data. -176 -177 Returns -178 ------- -179 dict -180 A dictionary containing the mass spectra data as pandas DataFrames, with keys corresponding to the MS level. -181 -182 """ -183 if spectra == "all": -184 scan_df_forspec = scan_df -185 elif spectra == "ms1": -186 scan_df_forspec = scan_df[scan_df.ms_level == 1] -187 elif spectra == "ms2": -188 scan_df_forspec = scan_df[scan_df.ms_level == 2] -189 else: -190 raise ValueError("spectra must be 'all', 'ms1', or 'ms2'") -191 -192 # Result container -193 res = {} -194 -195 # Row count container -196 counter = {} -197 -198 # Column name container -199 cols = {} -200 -201 # set at float32 -202 dtype = np.float32 -203 -204 # First pass: get nrows -205 N = defaultdict(lambda: 0) -206 for i, spec in enumerate(data): -207 if i in scan_df_forspec.scan: -208 # Get ms level -209 level = "ms{}".format(spec.ms_level) -210 -211 # Number of rows -212 N[level] += spec.mz.shape[0] -213 -214 # Second pass: parse -215 for i, spec in enumerate(data): -216 if i in scan_df_forspec.scan: -217 # Number of rows -218 n = spec.mz.shape[0] +160 def get_ms_raw(self, spectra, scan_df, data): +161 """Return a dictionary of mass spectra data as a pandas DataFrame. +162 +163 Parameters +164 ---------- +165 spectra : str +166 Which mass spectra data to include in the output. +167 Options: None, "ms1", "ms2", "all". +168 scan_df : pandas.DataFrame +169 Scan dataframe. Output from get_scan_df(). +170 data : pymzml.run.Reader +171 The mass spectra data. +172 +173 Returns +174 ------- +175 dict +176 A dictionary containing the mass spectra data as pandas DataFrames, with keys corresponding to the MS level. +177 +178 """ +179 if spectra == "all": +180 scan_df_forspec = scan_df +181 elif spectra == "ms1": +182 scan_df_forspec = scan_df[scan_df.ms_level == 1] +183 elif spectra == "ms2": +184 scan_df_forspec = scan_df[scan_df.ms_level == 2] +185 else: +186 raise ValueError("spectra must be 'all', 'ms1', or 'ms2'") +187 +188 # Result container +189 res = {} +190 +191 # Row count container +192 counter = {} +193 +194 # Column name container +195 cols = {} +196 +197 # set at float32 +198 dtype = np.float32 +199 +200 # First pass: get nrows +201 N = defaultdict(lambda: 0) +202 for i, spec in enumerate(data): +203 if i in scan_df_forspec.scan: +204 # Get ms level +205 level = "ms{}".format(spec.ms_level) +206 +207 # Number of rows +208 N[level] += spec.mz.shape[0] +209 +210 # Second pass: parse +211 for i, spec in enumerate(data): +212 if i in scan_df_forspec.scan: +213 # Number of rows +214 n = spec.mz.shape[0] +215 +216 # No measurements +217 if n == 0: +218 continue 219 -220 # No measurements -221 if n == 0: -222 continue -223 -224 # Dimension check -225 if len(spec.mz) != len(spec.i): -226 # raise an error if the mz and intensity arrays are not the same length -227 raise ValueError("m/z and intensity array dimension mismatch") -228 -229 # Scan/frame info -230 id_dict = spec.id_dict -231 -232 # Get ms level -233 level = "ms{}".format(spec.ms_level) +220 # Dimension check +221 if len(spec.mz) != len(spec.i): +222 # raise an error if the mz and intensity arrays are not the same length +223 raise ValueError("m/z and intensity array dimension mismatch") +224 +225 # Scan/frame info +226 id_dict = spec.id_dict +227 +228 # Get ms level +229 level = "ms{}".format(spec.ms_level) +230 +231 # Columns +232 cols[level] = list(id_dict.keys()) + ["mz", "intensity"] +233 m = len(cols[level]) 234 -235 # Columns -236 cols[level] = list(id_dict.keys()) + ["mz", "intensity"] -237 m = len(cols[level]) +235 # Subarray init +236 arr = np.empty((n, m), dtype=dtype) +237 inx = 0 238 -239 # Subarray init -240 arr = np.empty((n, m), dtype=dtype) -241 inx = 0 -242 -243 # Populate scan/frame info -244 for k, v in id_dict.items(): -245 arr[:, inx] = v -246 inx += 1 +239 # Populate scan/frame info +240 for k, v in id_dict.items(): +241 arr[:, inx] = v +242 inx += 1 +243 +244 # Populate m/z +245 arr[:, inx] = spec.mz +246 inx += 1 247 -248 # Populate m/z -249 arr[:, inx] = spec.mz +248 # Populate intensity +249 arr[:, inx] = spec.i 250 inx += 1 251 -252 # Populate intensity -253 arr[:, inx] = spec.i -254 inx += 1 -255 -256 # Initialize output container -257 if level not in res: -258 res[level] = np.empty((N[level], m), dtype=dtype) -259 counter[level] = 0 +252 # Initialize output container +253 if level not in res: +254 res[level] = np.empty((N[level], m), dtype=dtype) +255 counter[level] = 0 +256 +257 # Insert subarray +258 res[level][counter[level] : counter[level] + n, :] = arr +259 counter[level] += n 260 -261 # Insert subarray -262 res[level][counter[level] : counter[level] + n, :] = arr -263 counter[level] += n -264 -265 # Construct ms1 and ms2 mz dataframes -266 for level in res.keys(): -267 res[level] = pd.DataFrame(res[level], columns=cols[level]).drop( -268 columns=["controllerType", "controllerNumber"], -269 axis=1, -270 inplace=False, -271 ) -272 -273 return res -274 -275 def run(self, spectra="all", scan_df=None): -276 """Parse the mzML file and return a dictionary of spectra dataframes and a scan metadata dataframe. -277 -278 Parameters -279 ---------- -280 spectra : str, optional -281 Which mass spectra data to include in the output. Default is "all". -282 Other options: None, "ms1", "ms2". -283 scan_df : pandas.DataFrame, optional -284 Scan dataframe. If not provided, the scan dataframe is created from the mzML file. -285 -286 Returns -287 ------- -288 tuple -289 A tuple containing two elements: -290 - A dictionary containing the mass spectra data as numpy arrays, with keys corresponding to the MS level. -291 - A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time. -292 """ -293 -294 # Open file -295 data = self.load() -296 -297 if scan_df is None: -298 scan_df = self.get_scan_df(data) -299 -300 if spectra != "none": -301 res = self.get_ms_raw(spectra, scan_df, data) -302 -303 else: -304 res = None -305 -306 return res, scan_df -307 -308 def get_mass_spectrum_from_scan( -309 self, scan_number, spectrum_mode, auto_process=True -310 ): -311 """Instatiate a mass spectrum object from the mzML file. -312 -313 Parameters -314 ---------- -315 scan_number : int -316 The scan number to be parsed. -317 spectrum_mode : str -318 The type of spectrum to instantiate. Must be'profile' or 'centroid'. -319 polarity : int -320 The polarity of the scan. Must be -1 or 1. -321 auto_process : bool, optional -322 If True, process the mass spectrum. Default is True. -323 -324 Returns -325 ------- -326 MassSpecProfile | MassSpecCentroid -327 The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum. -328 """ -329 -330 def set_metadata( -331 scan_number: int, -332 polarity: int, -333 file_location: str, -334 label=Labels.thermo_profile, -335 ): -336 """ -337 Set the output parameters for creating a MassSpecProfile or MassSpecCentroid object. -338 -339 Parameters -340 ---------- -341 scan_number : int -342 The scan number. -343 polarity : int -344 The polarity of the data. -345 file_location : str -346 The file location. -347 label : str, optional -348 The label for the mass spectrum. Default is Labels.thermo_profile. -349 -350 Returns -351 ------- -352 dict -353 The output parameters ready for creating a MassSpecProfile or MassSpecCentroid object. -354 """ -355 d_params = default_parameters(file_location) -356 d_params["label"] = label -357 d_params["polarity"] = polarity -358 d_params["filename_path"] = file_location -359 d_params["scan_number"] = scan_number -360 -361 return d_params -362 -363 # Open file -364 data = self.load() -365 -366 # Pluck out individual scan mz and intensity -367 spec = data[scan_number] -368 -369 # Get polarity -370 if spec["negative scan"] is not None: -371 polarity = -1 -372 elif spec["positive scan"] is not None: -373 polarity = 1 -374 -375 # Get mass spectrum -376 if spectrum_mode == "profile": -377 # Check if profile -378 if not spec.get("MS:1000128"): -379 raise ValueError("spectrum is not profile") -380 data_dict = { -381 Labels.mz: spec.mz, -382 Labels.abundance: spec.i, -383 } -384 d_params = set_metadata( -385 scan_number, -386 polarity, -387 self.file_location, -388 label=Labels.simulated_profile, -389 ) -390 mass_spectrum_obj = mass_spectrum_obj = MassSpecProfile( -391 data_dict, d_params, auto_process=auto_process -392 ) -393 elif spectrum_mode == "centroid": -394 # Check if centroided -395 if not spec.get("MS:1000127"): -396 raise ValueError("spectrum is not centroided") -397 data_dict = { -398 Labels.mz: spec.mz, -399 Labels.abundance: spec.i, -400 Labels.rp: [np.nan] * len(spec.mz), -401 Labels.s2n: [np.nan] * len(spec.i), -402 } -403 d_params = set_metadata( -404 scan_number, polarity, self.file_location, label=Labels.corems_centroid -405 ) -406 mass_spectrum_obj = MassSpecCentroid( -407 data_dict, d_params, auto_process=auto_process -408 ) -409 -410 return mass_spectrum_obj +261 # Construct ms1 and ms2 mz dataframes +262 for level in res.keys(): +263 res[level] = pd.DataFrame(res[level], columns=cols[level]).drop( +264 columns=["controllerType", "controllerNumber"], +265 axis=1, +266 inplace=False, +267 ) +268 +269 return res +270 +271 def run(self, spectra="all", scan_df=None): +272 """Parse the mzML file and return a dictionary of spectra dataframes and a scan metadata dataframe. +273 +274 Parameters +275 ---------- +276 spectra : str, optional +277 Which mass spectra data to include in the output. Default is "all". +278 Other options: None, "ms1", "ms2". +279 scan_df : pandas.DataFrame, optional +280 Scan dataframe. If not provided, the scan dataframe is created from the mzML file. +281 +282 Returns +283 ------- +284 tuple +285 A tuple containing two elements: +286 - A dictionary containing the mass spectra data as numpy arrays, with keys corresponding to the MS level. +287 - A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time. +288 """ +289 +290 # Open file +291 data = self.load() +292 +293 if scan_df is None: +294 scan_df = self.get_scan_df(data) +295 +296 if spectra != "none": +297 res = self.get_ms_raw(spectra, scan_df, data) +298 +299 else: +300 res = None +301 +302 return res, scan_df +303 +304 def get_mass_spectrum_from_scan( +305 self, scan_number, spectrum_mode, auto_process=True +306 ): +307 """Instatiate a mass spectrum object from the mzML file. +308 +309 Parameters +310 ---------- +311 scan_number : int +312 The scan number to be parsed. +313 spectrum_mode : str +314 The type of spectrum to instantiate. Must be'profile' or 'centroid'. +315 polarity : int +316 The polarity of the scan. Must be -1 or 1. +317 auto_process : bool, optional +318 If True, process the mass spectrum. Default is True. +319 +320 Returns +321 ------- +322 MassSpecProfile | MassSpecCentroid +323 The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum. +324 """ +325 +326 def set_metadata( +327 scan_number: int, +328 polarity: int, +329 file_location: str, +330 label=Labels.thermo_profile, +331 ): +332 """ +333 Set the output parameters for creating a MassSpecProfile or MassSpecCentroid object. +334 +335 Parameters +336 ---------- +337 scan_number : int +338 The scan number. +339 polarity : int +340 The polarity of the data. +341 file_location : str +342 The file location. +343 label : str, optional +344 The label for the mass spectrum. Default is Labels.thermo_profile. +345 +346 Returns +347 ------- +348 dict +349 The output parameters ready for creating a MassSpecProfile or MassSpecCentroid object. +350 """ +351 d_params = default_parameters(file_location) +352 d_params["label"] = label +353 d_params["polarity"] = polarity +354 d_params["filename_path"] = file_location +355 d_params["scan_number"] = scan_number +356 +357 return d_params +358 +359 # Open file +360 data = self.load() +361 +362 # Pluck out individual scan mz and intensity +363 spec = data[scan_number] +364 +365 # Get polarity +366 if spec["negative scan"] is not None: +367 polarity = -1 +368 elif spec["positive scan"] is not None: +369 polarity = 1 +370 +371 # Get mass spectrum +372 if spectrum_mode == "profile": +373 # Check if profile +374 if not spec.get("MS:1000128"): +375 raise ValueError("spectrum is not profile") +376 data_dict = { +377 Labels.mz: spec.mz, +378 Labels.abundance: spec.i, +379 } +380 d_params = set_metadata( +381 scan_number, +382 polarity, +383 self.file_location, +384 label=Labels.simulated_profile, +385 ) +386 mass_spectrum_obj = mass_spectrum_obj = MassSpecProfile( +387 data_dict, d_params, auto_process=auto_process +388 ) +389 elif spectrum_mode == "centroid": +390 # Check if centroided +391 if not spec.get("MS:1000127"): +392 raise ValueError("spectrum is not centroided") +393 data_dict = { +394 Labels.mz: spec.mz, +395 Labels.abundance: spec.i, +396 Labels.rp: [np.nan] * len(spec.mz), +397 Labels.s2n: [np.nan] * len(spec.i), +398 } +399 d_params = set_metadata( +400 scan_number, polarity, self.file_location, label=Labels.corems_centroid +401 ) +402 mass_spectrum_obj = MassSpecCentroid( +403 data_dict, d_params, auto_process=auto_process +404 ) +405 +406 return mass_spectrum_obj +407 +408 def get_mass_spectra_obj(self): +409 """Instatiate a MassSpectraBase object from the mzML file. +410 411 -412 def get_mass_spectra_obj(self): -413 """Instatiate a MassSpectraBase object from the mzML file. -414 -415 -416 Returns -417 ------- -418 MassSpectraBase -419 The MassSpectra object containing the parsed mass spectra. -420 The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe. -421 """ -422 _, scan_df = self.run(spectra=False) -423 mass_spectra_obj = MassSpectraBase( -424 self.file_location, -425 self.analyzer, -426 self.instrument_label, -427 self.sample_name, -428 self, -429 ) -430 scan_df = scan_df.set_index("scan", drop=False) -431 mass_spectra_obj.scan_df = scan_df -432 -433 return mass_spectra_obj -434 -435 def get_lcms_obj(self, spectra="all"): -436 """Instatiates a LCMSBase object from the mzML file. -437 -438 Parameters -439 ---------- -440 spectra : str, optional -441 Which mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2. -442 -443 Returns -444 ------- -445 LCMSBase -446 LCMS object containing mass spectra data. -447 The object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe, -448 and mz dataframe(s), as well as lists of scan numbers, retention times, and TICs. -449 """ -450 _, scan_df = self.run(spectra="none") # first run it to just get scan info -451 res, scan_df = self.run( -452 scan_df=scan_df, spectra=spectra -453 ) # second run to parse data -454 lcms_obj = LCMSBase( -455 self.file_location, -456 self.analyzer, -457 self.instrument_label, -458 self.sample_name, -459 self, -460 ) -461 for key in res: -462 key_int = int(key.replace("ms", "")) -463 res[key] = res[key][res[key].intensity > 0] -464 res[key] = res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True) -465 lcms_obj._ms_unprocessed[key_int] = res[key] -466 lcms_obj.scan_df = scan_df.set_index("scan", drop=False) -467 # Check if polarity is mixed -468 if len(set(scan_df.polarity)) > 1: -469 raise ValueError("Mixed polarities detected in scan data") -470 lcms_obj.polarity = scan_df.polarity[0] -471 lcms_obj._scans_number_list = list(scan_df.scan) -472 lcms_obj._retention_time_list = list(scan_df.scan_time) -473 lcms_obj._tic_list = list(scan_df.tic) -474 -475 return lcms_obj +412 Returns +413 ------- +414 MassSpectraBase +415 The MassSpectra object containing the parsed mass spectra. +416 The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe. +417 """ +418 _, scan_df = self.run(spectra=False) +419 mass_spectra_obj = MassSpectraBase( +420 self.file_location, +421 self.analyzer, +422 self.instrument_label, +423 self.sample_name, +424 self, +425 ) +426 scan_df = scan_df.set_index("scan", drop=False) +427 mass_spectra_obj.scan_df = scan_df +428 +429 return mass_spectra_obj +430 +431 def get_lcms_obj(self, spectra="all"): +432 """Instatiates a LCMSBase object from the mzML file. +433 +434 Parameters +435 ---------- +436 spectra : str, optional +437 Which mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2. +438 +439 Returns +440 ------- +441 LCMSBase +442 LCMS object containing mass spectra data. +443 The object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe, +444 and mz dataframe(s), as well as lists of scan numbers, retention times, and TICs. +445 """ +446 _, scan_df = self.run(spectra="none") # first run it to just get scan info +447 res, scan_df = self.run( +448 scan_df=scan_df, spectra=spectra +449 ) # second run to parse data +450 lcms_obj = LCMSBase( +451 self.file_location, +452 self.analyzer, +453 self.instrument_label, +454 self.sample_name, +455 self, +456 ) +457 for key in res: +458 key_int = int(key.replace("ms", "")) +459 res[key] = res[key][res[key].intensity > 0] +460 res[key] = res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True) +461 lcms_obj._ms_unprocessed[key_int] = res[key] +462 lcms_obj.scan_df = scan_df.set_index("scan", drop=False) +463 # Check if polarity is mixed +464 if len(set(scan_df.polarity)) > 1: +465 raise ValueError("Mixed polarities detected in scan data") +466 lcms_obj.polarity = scan_df.polarity[0] +467 lcms_obj._scans_number_list = list(scan_df.scan) +468 lcms_obj._retention_time_list = list(scan_df.scan_time) +469 lcms_obj._tic_list = list(scan_df.tic) +470 +471 return lcms_obj

    @@ -580,463 +576,460 @@

    -
     20class MZMLSpectraParser(SpectraParserInterface):
    - 21    """A class for parsing mzml spectrometry data files into MassSpectraBase or LCMSBase objects
    - 22
    - 23    Parameters
    - 24    ----------
    - 25    file_location : str or Path
    - 26        The path to the RAW file to be parsed.
    - 27    analyzer : str, optional
    - 28        The type of mass analyzer used in the instrument. Default is "Unknown".
    - 29    instrument_label : str, optional
    - 30        The name of the instrument used to acquire the data. Default is "Unknown".
    - 31    sample_name : str, optional
    - 32        The name of the sample being analyzed. If not provided, the stem of the file_location path will be used.
    - 33
    - 34    Attributes
    - 35    ----------
    - 36    file_location : Path
    - 37        The path to the RAW file being parsed.
    - 38    analyzer : str
    - 39        The type of mass analyzer used in the instrument.
    - 40    instrument_label : str
    - 41        The name of the instrument used to acquire the data.
    - 42    sample_name : str
    - 43        The name of the sample being analyzed.
    - 44
    - 45    Methods
    - 46    -------
    - 47    * load().
    - 48        Load mzML file using pymzml.run.Reader and return the data as a numpy array.
    - 49    * run(spectra=True).
    - 50        Parses the mzml file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe.
    - 51    * get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True)
    - 52        Parses the mzml file and returns a MassSpecBase object from a single scan.
    - 53    * get_mass_spectra_obj().
    - 54        Parses the mzml file and instantiates a MassSpectraBase object.
    - 55    * get_lcms_obj().
    - 56        Parses the mzml file and instantiates an LCMSBase object.
    - 57
    - 58    Inherits from ThermoBaseClass and SpectraParserInterface
    - 59    """
    - 60
    - 61    def __init__(
    - 62        self,
    - 63        file_location,
    - 64        analyzer="Unknown",
    - 65        instrument_label="Unknown",
    - 66        sample_name=None,
    - 67    ):
    - 68        # implementation details
    - 69        if isinstance(file_location, str):
    - 70            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    - 71            file_location = Path(file_location)
    - 72        if not file_location.exists():
    - 73            raise FileExistsError("File does not exist: " + str(file_location))
    - 74        self.file_location = file_location
    - 75        self.analyzer = analyzer
    - 76        self.instrument_label = instrument_label
    - 77
    - 78        if sample_name:
    - 79            self.sample_name = sample_name
    - 80        else:
    - 81            self.sample_name = file_location.stem
    - 82
    - 83    def load(self):
    - 84        """
    - 85        Load mzML file using pymzml.run.Reader and return the data as a numpy array.
    - 86
    - 87        Returns
    - 88        -------
    - 89        numpy.ndarray
    - 90            The mass spectra data as a numpy array.
    - 91        """
    - 92        data = pymzml.run.Reader(self.file_location)
    - 93        return data
    - 94
    - 95    def get_scan_df(self, data):
    - 96        """
    - 97        Return scan data as a pandas DataFrame.
    - 98
    - 99        Parameters
    -100        ----------
    -101        data : pymzml.run.Reader
    -102            The mass spectra data.
    -103
    -104        Returns
    -105        -------
    -106        pandas.DataFrame
    -107            A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    -108        """
    -109        # Scan dict
    -110        # instatinate scan dict, with empty lists of size of scans
    -111        n_scans = data.get_spectrum_count()
    -112        scan_dict = {
    -113            "scan": np.empty(n_scans, dtype=np.int32),
    -114            "scan_time": np.empty(n_scans, dtype=np.float32),
    -115            "ms_level": [None] * n_scans,
    -116            "polarity": [None] * n_scans,
    -117            "precursor_mz": [None] * n_scans,
    -118            "scan_text": [None] * n_scans,
    -119            "scan_window_lower": np.empty(n_scans, dtype=np.float32),
    -120            "scan_window_upper": np.empty(n_scans, dtype=np.float32),
    -121            "scan_precision": [None] * n_scans,
    -122            "tic": np.empty(n_scans, dtype=np.float32),
    -123            "ms_format": [None] * n_scans,
    -124        }
    -125
    -126        # First pass: loop through scans to get scan info
    -127        for i, spec in enumerate(data):
    -128            scan_dict["scan"][i] = spec.ID
    -129            scan_dict["ms_level"][i] = spec.ms_level
    -130            scan_dict["scan_precision"][i] = spec._measured_precision
    -131            scan_dict["tic"][i] = spec.TIC
    -132            if spec.selected_precursors:
    -133                scan_dict["precursor_mz"][i] = spec.selected_precursors[0].get(
    -134                    "mz", None
    -135                )
    -136            if spec["negative scan"] is not None:
    -137                scan_dict["polarity"][i] = "negative"
    -138            if spec["positive scan"] is not None:
    -139                scan_dict["polarity"][i] = "positive"
    -140            if (
    -141                spec["negative scan"] is not None
    -142                and spec["positive scan"] is not None
    -143            ):
    -144                raise ValueError(
    -145                    "Error: scan {0} has both negative and positive polarity".format(
    -146                        spec.ID
    -147                    )
    -148                )
    -149
    -150            scan_dict["scan_time"][i] = spec.get("MS:1000016")
    -151            scan_dict["scan_text"][i] = spec.get("MS:1000512")
    -152            scan_dict["scan_window_lower"][i] = spec.get("MS:1000501")
    -153            scan_dict["scan_window_upper"][i] = spec.get("MS:1000500")
    -154            if spec.get("MS:1000128"):
    -155                scan_dict["ms_format"][i] = "profile"
    -156            elif spec.get("MS:1000127"):
    -157                scan_dict["ms_format"][i] = "centroid"
    -158            else:
    -159                scan_dict["ms_format"][i] = None
    +            
     19class MZMLSpectraParser(SpectraParserInterface):
    + 20    """A class for parsing mzml spectrometry data files into MassSpectraBase or LCMSBase objects
    + 21
    + 22    Parameters
    + 23    ----------
    + 24    file_location : str or Path
    + 25        The path to the RAW file to be parsed.
    + 26    analyzer : str, optional
    + 27        The type of mass analyzer used in the instrument. Default is "Unknown".
    + 28    instrument_label : str, optional
    + 29        The name of the instrument used to acquire the data. Default is "Unknown".
    + 30    sample_name : str, optional
    + 31        The name of the sample being analyzed. If not provided, the stem of the file_location path will be used.
    + 32
    + 33    Attributes
    + 34    ----------
    + 35    file_location : Path
    + 36        The path to the RAW file being parsed.
    + 37    analyzer : str
    + 38        The type of mass analyzer used in the instrument.
    + 39    instrument_label : str
    + 40        The name of the instrument used to acquire the data.
    + 41    sample_name : str
    + 42        The name of the sample being analyzed.
    + 43
    + 44    Methods
    + 45    -------
    + 46    * load().
    + 47        Load mzML file using pymzml.run.Reader and return the data as a numpy array.
    + 48    * run(spectra=True).
    + 49        Parses the mzml file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe.
    + 50    * get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True)
    + 51        Parses the mzml file and returns a MassSpecBase object from a single scan.
    + 52    * get_mass_spectra_obj().
    + 53        Parses the mzml file and instantiates a MassSpectraBase object.
    + 54    * get_lcms_obj().
    + 55        Parses the mzml file and instantiates an LCMSBase object.
    + 56
    + 57    Inherits from ThermoBaseClass and SpectraParserInterface
    + 58    """
    + 59
    + 60    def __init__(
    + 61        self,
    + 62        file_location,
    + 63        analyzer="Unknown",
    + 64        instrument_label="Unknown",
    + 65        sample_name=None,
    + 66    ):
    + 67        # implementation details
    + 68        if isinstance(file_location, str):
    + 69            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    + 70            file_location = Path(file_location)
    + 71        if not file_location.exists():
    + 72            raise FileExistsError("File does not exist: " + str(file_location))
    + 73        self.file_location = file_location
    + 74        self.analyzer = analyzer
    + 75        self.instrument_label = instrument_label
    + 76
    + 77        if sample_name:
    + 78            self.sample_name = sample_name
    + 79        else:
    + 80            self.sample_name = file_location.stem
    + 81
    + 82    def load(self):
    + 83        """
    + 84        Load mzML file using pymzml.run.Reader and return the data as a numpy array.
    + 85
    + 86        Returns
    + 87        -------
    + 88        numpy.ndarray
    + 89            The mass spectra data as a numpy array.
    + 90        """
    + 91        data = pymzml.run.Reader(self.file_location)
    + 92        return data
    + 93
    + 94    def get_scan_df(self, data):
    + 95        """
    + 96        Return scan data as a pandas DataFrame.
    + 97
    + 98        Parameters
    + 99        ----------
    +100        data : pymzml.run.Reader
    +101            The mass spectra data.
    +102
    +103        Returns
    +104        -------
    +105        pandas.DataFrame
    +106            A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    +107        """
    +108        # Scan dict
    +109        # instatinate scan dict, with empty lists of size of scans
    +110        n_scans = data.get_spectrum_count()
    +111        scan_dict = {
    +112            "scan": np.empty(n_scans, dtype=np.int32),
    +113            "scan_time": np.empty(n_scans, dtype=np.float32),
    +114            "ms_level": [None] * n_scans,
    +115            "polarity": [None] * n_scans,
    +116            "precursor_mz": [None] * n_scans,
    +117            "scan_text": [None] * n_scans,
    +118            "scan_window_lower": np.empty(n_scans, dtype=np.float32),
    +119            "scan_window_upper": np.empty(n_scans, dtype=np.float32),
    +120            "scan_precision": [None] * n_scans,
    +121            "tic": np.empty(n_scans, dtype=np.float32),
    +122            "ms_format": [None] * n_scans,
    +123        }
    +124
    +125        # First pass: loop through scans to get scan info
    +126        for i, spec in enumerate(data):
    +127            scan_dict["scan"][i] = spec.ID
    +128            scan_dict["ms_level"][i] = spec.ms_level
    +129            scan_dict["scan_precision"][i] = spec._measured_precision
    +130            scan_dict["tic"][i] = spec.TIC
    +131            if spec.selected_precursors:
    +132                scan_dict["precursor_mz"][i] = spec.selected_precursors[0].get(
    +133                    "mz", None
    +134                )
    +135            if spec["negative scan"] is not None:
    +136                scan_dict["polarity"][i] = "negative"
    +137            if spec["positive scan"] is not None:
    +138                scan_dict["polarity"][i] = "positive"
    +139            if spec["negative scan"] is not None and spec["positive scan"] is not None:
    +140                raise ValueError(
    +141                    "Error: scan {0} has both negative and positive polarity".format(
    +142                        spec.ID
    +143                    )
    +144                )
    +145
    +146            scan_dict["scan_time"][i] = spec.get("MS:1000016")
    +147            scan_dict["scan_text"][i] = spec.get("MS:1000512")
    +148            scan_dict["scan_window_lower"][i] = spec.get("MS:1000501")
    +149            scan_dict["scan_window_upper"][i] = spec.get("MS:1000500")
    +150            if spec.get("MS:1000128"):
    +151                scan_dict["ms_format"][i] = "profile"
    +152            elif spec.get("MS:1000127"):
    +153                scan_dict["ms_format"][i] = "centroid"
    +154            else:
    +155                scan_dict["ms_format"][i] = None
    +156
    +157        scan_df = pd.DataFrame(scan_dict)
    +158
    +159        return scan_df
     160
    -161        scan_df = pd.DataFrame(scan_dict)
    -162
    -163        return scan_df
    -164
    -165    def get_ms_raw(self, spectra, scan_df, data):
    -166        """Return a dictionary of mass spectra data as a pandas DataFrame.
    -167
    -168        Parameters
    -169        ----------
    -170        spectra : str
    -171            Which mass spectra data to include in the output. 
    -172            Options: None, "ms1", "ms2", "all".
    -173        scan_df : pandas.DataFrame
    -174            Scan dataframe. Output from get_scan_df().
    -175        data : pymzml.run.Reader
    -176            The mass spectra data.
    -177
    -178        Returns
    -179        -------
    -180        dict
    -181            A dictionary containing the mass spectra data as pandas DataFrames, with keys corresponding to the MS level.
    -182        
    -183        """
    -184        if spectra == "all":
    -185            scan_df_forspec = scan_df
    -186        elif spectra == "ms1":
    -187            scan_df_forspec = scan_df[scan_df.ms_level == 1]
    -188        elif spectra == "ms2":
    -189            scan_df_forspec = scan_df[scan_df.ms_level == 2]
    -190        else:
    -191            raise ValueError("spectra must be 'all', 'ms1', or 'ms2'")
    -192
    -193        # Result container
    -194        res = {}
    -195
    -196        # Row count container
    -197        counter = {}
    -198
    -199        # Column name container
    -200        cols = {}
    -201
    -202        # set at float32
    -203        dtype = np.float32
    -204
    -205        # First pass: get nrows
    -206        N = defaultdict(lambda: 0)
    -207        for i, spec in enumerate(data):
    -208            if i in scan_df_forspec.scan:
    -209                # Get ms level
    -210                level = "ms{}".format(spec.ms_level)
    -211
    -212                # Number of rows
    -213                N[level] += spec.mz.shape[0]
    -214
    -215        # Second pass: parse
    -216        for i, spec in enumerate(data):
    -217            if i in scan_df_forspec.scan:
    -218                # Number of rows
    -219                n = spec.mz.shape[0]
    +161    def get_ms_raw(self, spectra, scan_df, data):
    +162        """Return a dictionary of mass spectra data as a pandas DataFrame.
    +163
    +164        Parameters
    +165        ----------
    +166        spectra : str
    +167            Which mass spectra data to include in the output.
    +168            Options: None, "ms1", "ms2", "all".
    +169        scan_df : pandas.DataFrame
    +170            Scan dataframe. Output from get_scan_df().
    +171        data : pymzml.run.Reader
    +172            The mass spectra data.
    +173
    +174        Returns
    +175        -------
    +176        dict
    +177            A dictionary containing the mass spectra data as pandas DataFrames, with keys corresponding to the MS level.
    +178
    +179        """
    +180        if spectra == "all":
    +181            scan_df_forspec = scan_df
    +182        elif spectra == "ms1":
    +183            scan_df_forspec = scan_df[scan_df.ms_level == 1]
    +184        elif spectra == "ms2":
    +185            scan_df_forspec = scan_df[scan_df.ms_level == 2]
    +186        else:
    +187            raise ValueError("spectra must be 'all', 'ms1', or 'ms2'")
    +188
    +189        # Result container
    +190        res = {}
    +191
    +192        # Row count container
    +193        counter = {}
    +194
    +195        # Column name container
    +196        cols = {}
    +197
    +198        # set at float32
    +199        dtype = np.float32
    +200
    +201        # First pass: get nrows
    +202        N = defaultdict(lambda: 0)
    +203        for i, spec in enumerate(data):
    +204            if i in scan_df_forspec.scan:
    +205                # Get ms level
    +206                level = "ms{}".format(spec.ms_level)
    +207
    +208                # Number of rows
    +209                N[level] += spec.mz.shape[0]
    +210
    +211        # Second pass: parse
    +212        for i, spec in enumerate(data):
    +213            if i in scan_df_forspec.scan:
    +214                # Number of rows
    +215                n = spec.mz.shape[0]
    +216
    +217                # No measurements
    +218                if n == 0:
    +219                    continue
     220
    -221                # No measurements
    -222                if n == 0:
    -223                    continue
    -224
    -225                # Dimension check
    -226                if len(spec.mz) != len(spec.i):
    -227                    # raise an error if the mz and intensity arrays are not the same length
    -228                    raise ValueError("m/z and intensity array dimension mismatch")
    -229
    -230                # Scan/frame info
    -231                id_dict = spec.id_dict
    -232
    -233                # Get ms level
    -234                level = "ms{}".format(spec.ms_level)
    +221                # Dimension check
    +222                if len(spec.mz) != len(spec.i):
    +223                    # raise an error if the mz and intensity arrays are not the same length
    +224                    raise ValueError("m/z and intensity array dimension mismatch")
    +225
    +226                # Scan/frame info
    +227                id_dict = spec.id_dict
    +228
    +229                # Get ms level
    +230                level = "ms{}".format(spec.ms_level)
    +231
    +232                # Columns
    +233                cols[level] = list(id_dict.keys()) + ["mz", "intensity"]
    +234                m = len(cols[level])
     235
    -236                # Columns
    -237                cols[level] = list(id_dict.keys()) + ["mz", "intensity"]
    -238                m = len(cols[level])
    +236                # Subarray init
    +237                arr = np.empty((n, m), dtype=dtype)
    +238                inx = 0
     239
    -240                # Subarray init
    -241                arr = np.empty((n, m), dtype=dtype)
    -242                inx = 0
    -243
    -244                # Populate scan/frame info
    -245                for k, v in id_dict.items():
    -246                    arr[:, inx] = v
    -247                    inx += 1
    +240                # Populate scan/frame info
    +241                for k, v in id_dict.items():
    +242                    arr[:, inx] = v
    +243                    inx += 1
    +244
    +245                # Populate m/z
    +246                arr[:, inx] = spec.mz
    +247                inx += 1
     248
    -249                # Populate m/z
    -250                arr[:, inx] = spec.mz
    +249                # Populate intensity
    +250                arr[:, inx] = spec.i
     251                inx += 1
     252
    -253                # Populate intensity
    -254                arr[:, inx] = spec.i
    -255                inx += 1
    -256
    -257                # Initialize output container
    -258                if level not in res:
    -259                    res[level] = np.empty((N[level], m), dtype=dtype)
    -260                    counter[level] = 0
    +253                # Initialize output container
    +254                if level not in res:
    +255                    res[level] = np.empty((N[level], m), dtype=dtype)
    +256                    counter[level] = 0
    +257
    +258                # Insert subarray
    +259                res[level][counter[level] : counter[level] + n, :] = arr
    +260                counter[level] += n
     261
    -262                # Insert subarray
    -263                res[level][counter[level] : counter[level] + n, :] = arr
    -264                counter[level] += n
    -265
    -266        # Construct ms1 and ms2 mz dataframes
    -267        for level in res.keys():
    -268            res[level] = pd.DataFrame(res[level], columns=cols[level]).drop(
    -269                columns=["controllerType", "controllerNumber"],
    -270                axis=1,
    -271                inplace=False,
    -272            )
    -273        
    -274        return res
    -275
    -276    def run(self, spectra="all", scan_df=None):
    -277        """Parse the mzML file and return a dictionary of spectra dataframes and a scan metadata dataframe.
    -278
    -279        Parameters
    -280        ----------
    -281        spectra : str, optional
    -282            Which mass spectra data to include in the output. Default is "all".
    -283            Other options: None, "ms1", "ms2".
    -284        scan_df : pandas.DataFrame, optional
    -285            Scan dataframe.  If not provided, the scan dataframe is created from the mzML file.
    -286
    -287        Returns
    -288        -------
    -289        tuple
    -290            A tuple containing two elements:
    -291            - A dictionary containing the mass spectra data as numpy arrays, with keys corresponding to the MS level.
    -292            - A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    -293        """
    -294
    -295        # Open file
    -296        data = self.load()
    -297
    -298        if scan_df is None:
    -299            scan_df = self.get_scan_df(data)
    -300
    -301        if spectra != "none":
    -302            res = self.get_ms_raw(spectra, scan_df, data)
    -303            
    -304        else:
    -305            res = None
    -306
    -307        return res, scan_df
    -308
    -309    def get_mass_spectrum_from_scan(
    -310        self, scan_number, spectrum_mode, auto_process=True
    -311    ):
    -312        """Instatiate a mass spectrum object from the mzML file.
    -313
    -314        Parameters
    -315        ----------
    -316        scan_number : int
    -317            The scan number to be parsed.
    -318        spectrum_mode : str
    -319            The type of spectrum to instantiate.  Must be'profile' or 'centroid'.
    -320        polarity : int
    -321            The polarity of the scan.  Must be -1 or 1.
    -322        auto_process : bool, optional
    -323            If True, process the mass spectrum. Default is True.
    -324
    -325        Returns
    -326        -------
    -327        MassSpecProfile | MassSpecCentroid
    -328            The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    -329        """
    -330
    -331        def set_metadata(
    -332            scan_number: int,
    -333            polarity: int,
    -334            file_location: str,
    -335            label=Labels.thermo_profile,
    -336        ):
    -337            """
    -338            Set the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
    -339
    -340            Parameters
    -341            ----------
    -342            scan_number : int
    -343                The scan number.
    -344            polarity : int
    -345                The polarity of the data.
    -346            file_location : str
    -347                The file location.
    -348            label : str, optional
    -349                The label for the mass spectrum. Default is Labels.thermo_profile.
    -350
    -351            Returns
    -352            -------
    -353            dict
    -354                The output parameters ready for creating a MassSpecProfile or MassSpecCentroid object.
    -355            """
    -356            d_params = default_parameters(file_location)
    -357            d_params["label"] = label
    -358            d_params["polarity"] = polarity
    -359            d_params["filename_path"] = file_location
    -360            d_params["scan_number"] = scan_number
    -361
    -362            return d_params
    -363
    -364        # Open file
    -365        data = self.load()
    -366
    -367        # Pluck out individual scan mz and intensity
    -368        spec = data[scan_number]
    -369
    -370        # Get polarity
    -371        if spec["negative scan"] is not None:
    -372            polarity = -1
    -373        elif spec["positive scan"] is not None:
    -374            polarity = 1
    -375
    -376        # Get mass spectrum
    -377        if spectrum_mode == "profile":
    -378            # Check if profile
    -379            if not spec.get("MS:1000128"):
    -380                raise ValueError("spectrum is not profile")
    -381            data_dict = {
    -382                Labels.mz: spec.mz,
    -383                Labels.abundance: spec.i,
    -384            }
    -385            d_params = set_metadata(
    -386                scan_number,
    -387                polarity,
    -388                self.file_location,
    -389                label=Labels.simulated_profile,
    -390            )
    -391            mass_spectrum_obj = mass_spectrum_obj = MassSpecProfile(
    -392                data_dict, d_params, auto_process=auto_process
    -393            )
    -394        elif spectrum_mode == "centroid":
    -395            # Check if centroided
    -396            if not spec.get("MS:1000127"):
    -397                raise ValueError("spectrum is not centroided")
    -398            data_dict = {
    -399                Labels.mz: spec.mz,
    -400                Labels.abundance: spec.i,
    -401                Labels.rp: [np.nan] * len(spec.mz),
    -402                Labels.s2n: [np.nan] * len(spec.i),
    -403            }
    -404            d_params = set_metadata(
    -405                scan_number, polarity, self.file_location, label=Labels.corems_centroid
    -406            )
    -407            mass_spectrum_obj = MassSpecCentroid(
    -408                data_dict, d_params, auto_process=auto_process
    -409            )
    -410
    -411        return mass_spectrum_obj
    +262        # Construct ms1 and ms2 mz dataframes
    +263        for level in res.keys():
    +264            res[level] = pd.DataFrame(res[level], columns=cols[level]).drop(
    +265                columns=["controllerType", "controllerNumber"],
    +266                axis=1,
    +267                inplace=False,
    +268            )
    +269
    +270        return res
    +271
    +272    def run(self, spectra="all", scan_df=None):
    +273        """Parse the mzML file and return a dictionary of spectra dataframes and a scan metadata dataframe.
    +274
    +275        Parameters
    +276        ----------
    +277        spectra : str, optional
    +278            Which mass spectra data to include in the output. Default is "all".
    +279            Other options: None, "ms1", "ms2".
    +280        scan_df : pandas.DataFrame, optional
    +281            Scan dataframe.  If not provided, the scan dataframe is created from the mzML file.
    +282
    +283        Returns
    +284        -------
    +285        tuple
    +286            A tuple containing two elements:
    +287            - A dictionary containing the mass spectra data as numpy arrays, with keys corresponding to the MS level.
    +288            - A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    +289        """
    +290
    +291        # Open file
    +292        data = self.load()
    +293
    +294        if scan_df is None:
    +295            scan_df = self.get_scan_df(data)
    +296
    +297        if spectra != "none":
    +298            res = self.get_ms_raw(spectra, scan_df, data)
    +299
    +300        else:
    +301            res = None
    +302
    +303        return res, scan_df
    +304
    +305    def get_mass_spectrum_from_scan(
    +306        self, scan_number, spectrum_mode, auto_process=True
    +307    ):
    +308        """Instatiate a mass spectrum object from the mzML file.
    +309
    +310        Parameters
    +311        ----------
    +312        scan_number : int
    +313            The scan number to be parsed.
    +314        spectrum_mode : str
    +315            The type of spectrum to instantiate.  Must be'profile' or 'centroid'.
    +316        polarity : int
    +317            The polarity of the scan.  Must be -1 or 1.
    +318        auto_process : bool, optional
    +319            If True, process the mass spectrum. Default is True.
    +320
    +321        Returns
    +322        -------
    +323        MassSpecProfile | MassSpecCentroid
    +324            The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    +325        """
    +326
    +327        def set_metadata(
    +328            scan_number: int,
    +329            polarity: int,
    +330            file_location: str,
    +331            label=Labels.thermo_profile,
    +332        ):
    +333            """
    +334            Set the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
    +335
    +336            Parameters
    +337            ----------
    +338            scan_number : int
    +339                The scan number.
    +340            polarity : int
    +341                The polarity of the data.
    +342            file_location : str
    +343                The file location.
    +344            label : str, optional
    +345                The label for the mass spectrum. Default is Labels.thermo_profile.
    +346
    +347            Returns
    +348            -------
    +349            dict
    +350                The output parameters ready for creating a MassSpecProfile or MassSpecCentroid object.
    +351            """
    +352            d_params = default_parameters(file_location)
    +353            d_params["label"] = label
    +354            d_params["polarity"] = polarity
    +355            d_params["filename_path"] = file_location
    +356            d_params["scan_number"] = scan_number
    +357
    +358            return d_params
    +359
    +360        # Open file
    +361        data = self.load()
    +362
    +363        # Pluck out individual scan mz and intensity
    +364        spec = data[scan_number]
    +365
    +366        # Get polarity
    +367        if spec["negative scan"] is not None:
    +368            polarity = -1
    +369        elif spec["positive scan"] is not None:
    +370            polarity = 1
    +371
    +372        # Get mass spectrum
    +373        if spectrum_mode == "profile":
    +374            # Check if profile
    +375            if not spec.get("MS:1000128"):
    +376                raise ValueError("spectrum is not profile")
    +377            data_dict = {
    +378                Labels.mz: spec.mz,
    +379                Labels.abundance: spec.i,
    +380            }
    +381            d_params = set_metadata(
    +382                scan_number,
    +383                polarity,
    +384                self.file_location,
    +385                label=Labels.simulated_profile,
    +386            )
    +387            mass_spectrum_obj = mass_spectrum_obj = MassSpecProfile(
    +388                data_dict, d_params, auto_process=auto_process
    +389            )
    +390        elif spectrum_mode == "centroid":
    +391            # Check if centroided
    +392            if not spec.get("MS:1000127"):
    +393                raise ValueError("spectrum is not centroided")
    +394            data_dict = {
    +395                Labels.mz: spec.mz,
    +396                Labels.abundance: spec.i,
    +397                Labels.rp: [np.nan] * len(spec.mz),
    +398                Labels.s2n: [np.nan] * len(spec.i),
    +399            }
    +400            d_params = set_metadata(
    +401                scan_number, polarity, self.file_location, label=Labels.corems_centroid
    +402            )
    +403            mass_spectrum_obj = MassSpecCentroid(
    +404                data_dict, d_params, auto_process=auto_process
    +405            )
    +406
    +407        return mass_spectrum_obj
    +408
    +409    def get_mass_spectra_obj(self):
    +410        """Instatiate a MassSpectraBase object from the mzML file.
    +411
     412
    -413    def get_mass_spectra_obj(self):
    -414        """Instatiate a MassSpectraBase object from the mzML file.
    -415
    -416
    -417        Returns
    -418        -------
    -419        MassSpectraBase
    -420            The MassSpectra object containing the parsed mass spectra.  
    -421            The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    -422        """
    -423        _, scan_df = self.run(spectra=False)
    -424        mass_spectra_obj = MassSpectraBase(
    -425            self.file_location,
    -426            self.analyzer,
    -427            self.instrument_label,
    -428            self.sample_name,
    -429            self,
    -430        )
    -431        scan_df = scan_df.set_index("scan", drop=False)
    -432        mass_spectra_obj.scan_df = scan_df
    -433
    -434        return mass_spectra_obj
    -435
    -436    def get_lcms_obj(self, spectra="all"):
    -437        """Instatiates a LCMSBase object from the mzML file.
    -438
    -439        Parameters
    -440        ----------
    -441        spectra : str, optional
    -442            Which mass spectra data to include in the output. Default is all.  Other options: none, ms1, ms2.
    -443
    -444        Returns
    -445        -------
    -446        LCMSBase
    -447            LCMS object containing mass spectra data. 
    -448            The object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe, 
    -449            and mz dataframe(s), as well as lists of scan numbers, retention times, and TICs.
    -450        """
    -451        _, scan_df = self.run(spectra="none")  # first run it to just get scan info
    -452        res, scan_df = self.run(
    -453            scan_df=scan_df, spectra=spectra
    -454        )  # second run to parse data
    -455        lcms_obj = LCMSBase(
    -456            self.file_location,
    -457            self.analyzer,
    -458            self.instrument_label,
    -459            self.sample_name,
    -460            self,
    -461        )
    -462        for key in res:
    -463            key_int = int(key.replace("ms", ""))
    -464            res[key] = res[key][res[key].intensity > 0]
    -465            res[key] = res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True)
    -466            lcms_obj._ms_unprocessed[key_int] = res[key]
    -467        lcms_obj.scan_df = scan_df.set_index("scan", drop=False)
    -468        # Check if polarity is mixed
    -469        if len(set(scan_df.polarity)) > 1:
    -470            raise ValueError("Mixed polarities detected in scan data")
    -471        lcms_obj.polarity = scan_df.polarity[0]
    -472        lcms_obj._scans_number_list = list(scan_df.scan)
    -473        lcms_obj._retention_time_list = list(scan_df.scan_time)
    -474        lcms_obj._tic_list = list(scan_df.tic)
    -475
    -476        return lcms_obj
    +413        Returns
    +414        -------
    +415        MassSpectraBase
    +416            The MassSpectra object containing the parsed mass spectra.
    +417            The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    +418        """
    +419        _, scan_df = self.run(spectra=False)
    +420        mass_spectra_obj = MassSpectraBase(
    +421            self.file_location,
    +422            self.analyzer,
    +423            self.instrument_label,
    +424            self.sample_name,
    +425            self,
    +426        )
    +427        scan_df = scan_df.set_index("scan", drop=False)
    +428        mass_spectra_obj.scan_df = scan_df
    +429
    +430        return mass_spectra_obj
    +431
    +432    def get_lcms_obj(self, spectra="all"):
    +433        """Instatiates a LCMSBase object from the mzML file.
    +434
    +435        Parameters
    +436        ----------
    +437        spectra : str, optional
    +438            Which mass spectra data to include in the output. Default is all.  Other options: none, ms1, ms2.
    +439
    +440        Returns
    +441        -------
    +442        LCMSBase
    +443            LCMS object containing mass spectra data.
    +444            The object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe,
    +445            and mz dataframe(s), as well as lists of scan numbers, retention times, and TICs.
    +446        """
    +447        _, scan_df = self.run(spectra="none")  # first run it to just get scan info
    +448        res, scan_df = self.run(
    +449            scan_df=scan_df, spectra=spectra
    +450        )  # second run to parse data
    +451        lcms_obj = LCMSBase(
    +452            self.file_location,
    +453            self.analyzer,
    +454            self.instrument_label,
    +455            self.sample_name,
    +456            self,
    +457        )
    +458        for key in res:
    +459            key_int = int(key.replace("ms", ""))
    +460            res[key] = res[key][res[key].intensity > 0]
    +461            res[key] = res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True)
    +462            lcms_obj._ms_unprocessed[key_int] = res[key]
    +463        lcms_obj.scan_df = scan_df.set_index("scan", drop=False)
    +464        # Check if polarity is mixed
    +465        if len(set(scan_df.polarity)) > 1:
    +466            raise ValueError("Mixed polarities detected in scan data")
    +467        lcms_obj.polarity = scan_df.polarity[0]
    +468        lcms_obj._scans_number_list = list(scan_df.scan)
    +469        lcms_obj._retention_time_list = list(scan_df.scan_time)
    +470        lcms_obj._tic_list = list(scan_df.tic)
    +471
    +472        return lcms_obj
     
    @@ -1097,27 +1090,27 @@
    Methods
    -
    61    def __init__(
    -62        self,
    -63        file_location,
    -64        analyzer="Unknown",
    -65        instrument_label="Unknown",
    -66        sample_name=None,
    -67    ):
    -68        # implementation details
    -69        if isinstance(file_location, str):
    -70            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    -71            file_location = Path(file_location)
    -72        if not file_location.exists():
    -73            raise FileExistsError("File does not exist: " + str(file_location))
    -74        self.file_location = file_location
    -75        self.analyzer = analyzer
    -76        self.instrument_label = instrument_label
    -77
    -78        if sample_name:
    -79            self.sample_name = sample_name
    -80        else:
    -81            self.sample_name = file_location.stem
    +            
    60    def __init__(
    +61        self,
    +62        file_location,
    +63        analyzer="Unknown",
    +64        instrument_label="Unknown",
    +65        sample_name=None,
    +66    ):
    +67        # implementation details
    +68        if isinstance(file_location, str):
    +69            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    +70            file_location = Path(file_location)
    +71        if not file_location.exists():
    +72            raise FileExistsError("File does not exist: " + str(file_location))
    +73        self.file_location = file_location
    +74        self.analyzer = analyzer
    +75        self.instrument_label = instrument_label
    +76
    +77        if sample_name:
    +78            self.sample_name = sample_name
    +79        else:
    +80            self.sample_name = file_location.stem
     
    @@ -1168,17 +1161,17 @@
    Methods
    -
    83    def load(self):
    -84        """
    -85        Load mzML file using pymzml.run.Reader and return the data as a numpy array.
    -86
    -87        Returns
    -88        -------
    -89        numpy.ndarray
    -90            The mass spectra data as a numpy array.
    -91        """
    -92        data = pymzml.run.Reader(self.file_location)
    -93        return data
    +            
    82    def load(self):
    +83        """
    +84        Load mzML file using pymzml.run.Reader and return the data as a numpy array.
    +85
    +86        Returns
    +87        -------
    +88        numpy.ndarray
    +89            The mass spectra data as a numpy array.
    +90        """
    +91        data = pymzml.run.Reader(self.file_location)
    +92        return data
     
    @@ -1204,75 +1197,72 @@
    Returns
    -
     95    def get_scan_df(self, data):
    - 96        """
    - 97        Return scan data as a pandas DataFrame.
    - 98
    - 99        Parameters
    -100        ----------
    -101        data : pymzml.run.Reader
    -102            The mass spectra data.
    -103
    -104        Returns
    -105        -------
    -106        pandas.DataFrame
    -107            A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    -108        """
    -109        # Scan dict
    -110        # instatinate scan dict, with empty lists of size of scans
    -111        n_scans = data.get_spectrum_count()
    -112        scan_dict = {
    -113            "scan": np.empty(n_scans, dtype=np.int32),
    -114            "scan_time": np.empty(n_scans, dtype=np.float32),
    -115            "ms_level": [None] * n_scans,
    -116            "polarity": [None] * n_scans,
    -117            "precursor_mz": [None] * n_scans,
    -118            "scan_text": [None] * n_scans,
    -119            "scan_window_lower": np.empty(n_scans, dtype=np.float32),
    -120            "scan_window_upper": np.empty(n_scans, dtype=np.float32),
    -121            "scan_precision": [None] * n_scans,
    -122            "tic": np.empty(n_scans, dtype=np.float32),
    -123            "ms_format": [None] * n_scans,
    -124        }
    -125
    -126        # First pass: loop through scans to get scan info
    -127        for i, spec in enumerate(data):
    -128            scan_dict["scan"][i] = spec.ID
    -129            scan_dict["ms_level"][i] = spec.ms_level
    -130            scan_dict["scan_precision"][i] = spec._measured_precision
    -131            scan_dict["tic"][i] = spec.TIC
    -132            if spec.selected_precursors:
    -133                scan_dict["precursor_mz"][i] = spec.selected_precursors[0].get(
    -134                    "mz", None
    -135                )
    -136            if spec["negative scan"] is not None:
    -137                scan_dict["polarity"][i] = "negative"
    -138            if spec["positive scan"] is not None:
    -139                scan_dict["polarity"][i] = "positive"
    -140            if (
    -141                spec["negative scan"] is not None
    -142                and spec["positive scan"] is not None
    -143            ):
    -144                raise ValueError(
    -145                    "Error: scan {0} has both negative and positive polarity".format(
    -146                        spec.ID
    -147                    )
    -148                )
    -149
    -150            scan_dict["scan_time"][i] = spec.get("MS:1000016")
    -151            scan_dict["scan_text"][i] = spec.get("MS:1000512")
    -152            scan_dict["scan_window_lower"][i] = spec.get("MS:1000501")
    -153            scan_dict["scan_window_upper"][i] = spec.get("MS:1000500")
    -154            if spec.get("MS:1000128"):
    -155                scan_dict["ms_format"][i] = "profile"
    -156            elif spec.get("MS:1000127"):
    -157                scan_dict["ms_format"][i] = "centroid"
    -158            else:
    -159                scan_dict["ms_format"][i] = None
    -160
    -161        scan_df = pd.DataFrame(scan_dict)
    -162
    -163        return scan_df
    +            
     94    def get_scan_df(self, data):
    + 95        """
    + 96        Return scan data as a pandas DataFrame.
    + 97
    + 98        Parameters
    + 99        ----------
    +100        data : pymzml.run.Reader
    +101            The mass spectra data.
    +102
    +103        Returns
    +104        -------
    +105        pandas.DataFrame
    +106            A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    +107        """
    +108        # Scan dict
    +109        # instatinate scan dict, with empty lists of size of scans
    +110        n_scans = data.get_spectrum_count()
    +111        scan_dict = {
    +112            "scan": np.empty(n_scans, dtype=np.int32),
    +113            "scan_time": np.empty(n_scans, dtype=np.float32),
    +114            "ms_level": [None] * n_scans,
    +115            "polarity": [None] * n_scans,
    +116            "precursor_mz": [None] * n_scans,
    +117            "scan_text": [None] * n_scans,
    +118            "scan_window_lower": np.empty(n_scans, dtype=np.float32),
    +119            "scan_window_upper": np.empty(n_scans, dtype=np.float32),
    +120            "scan_precision": [None] * n_scans,
    +121            "tic": np.empty(n_scans, dtype=np.float32),
    +122            "ms_format": [None] * n_scans,
    +123        }
    +124
    +125        # First pass: loop through scans to get scan info
    +126        for i, spec in enumerate(data):
    +127            scan_dict["scan"][i] = spec.ID
    +128            scan_dict["ms_level"][i] = spec.ms_level
    +129            scan_dict["scan_precision"][i] = spec._measured_precision
    +130            scan_dict["tic"][i] = spec.TIC
    +131            if spec.selected_precursors:
    +132                scan_dict["precursor_mz"][i] = spec.selected_precursors[0].get(
    +133                    "mz", None
    +134                )
    +135            if spec["negative scan"] is not None:
    +136                scan_dict["polarity"][i] = "negative"
    +137            if spec["positive scan"] is not None:
    +138                scan_dict["polarity"][i] = "positive"
    +139            if spec["negative scan"] is not None and spec["positive scan"] is not None:
    +140                raise ValueError(
    +141                    "Error: scan {0} has both negative and positive polarity".format(
    +142                        spec.ID
    +143                    )
    +144                )
    +145
    +146            scan_dict["scan_time"][i] = spec.get("MS:1000016")
    +147            scan_dict["scan_text"][i] = spec.get("MS:1000512")
    +148            scan_dict["scan_window_lower"][i] = spec.get("MS:1000501")
    +149            scan_dict["scan_window_upper"][i] = spec.get("MS:1000500")
    +150            if spec.get("MS:1000128"):
    +151                scan_dict["ms_format"][i] = "profile"
    +152            elif spec.get("MS:1000127"):
    +153                scan_dict["ms_format"][i] = "centroid"
    +154            else:
    +155                scan_dict["ms_format"][i] = None
    +156
    +157        scan_df = pd.DataFrame(scan_dict)
    +158
    +159        return scan_df
     
    @@ -1305,116 +1295,116 @@
    Returns
    -
    165    def get_ms_raw(self, spectra, scan_df, data):
    -166        """Return a dictionary of mass spectra data as a pandas DataFrame.
    -167
    -168        Parameters
    -169        ----------
    -170        spectra : str
    -171            Which mass spectra data to include in the output. 
    -172            Options: None, "ms1", "ms2", "all".
    -173        scan_df : pandas.DataFrame
    -174            Scan dataframe. Output from get_scan_df().
    -175        data : pymzml.run.Reader
    -176            The mass spectra data.
    -177
    -178        Returns
    -179        -------
    -180        dict
    -181            A dictionary containing the mass spectra data as pandas DataFrames, with keys corresponding to the MS level.
    -182        
    -183        """
    -184        if spectra == "all":
    -185            scan_df_forspec = scan_df
    -186        elif spectra == "ms1":
    -187            scan_df_forspec = scan_df[scan_df.ms_level == 1]
    -188        elif spectra == "ms2":
    -189            scan_df_forspec = scan_df[scan_df.ms_level == 2]
    -190        else:
    -191            raise ValueError("spectra must be 'all', 'ms1', or 'ms2'")
    -192
    -193        # Result container
    -194        res = {}
    -195
    -196        # Row count container
    -197        counter = {}
    -198
    -199        # Column name container
    -200        cols = {}
    -201
    -202        # set at float32
    -203        dtype = np.float32
    -204
    -205        # First pass: get nrows
    -206        N = defaultdict(lambda: 0)
    -207        for i, spec in enumerate(data):
    -208            if i in scan_df_forspec.scan:
    -209                # Get ms level
    -210                level = "ms{}".format(spec.ms_level)
    -211
    -212                # Number of rows
    -213                N[level] += spec.mz.shape[0]
    -214
    -215        # Second pass: parse
    -216        for i, spec in enumerate(data):
    -217            if i in scan_df_forspec.scan:
    -218                # Number of rows
    -219                n = spec.mz.shape[0]
    +            
    161    def get_ms_raw(self, spectra, scan_df, data):
    +162        """Return a dictionary of mass spectra data as a pandas DataFrame.
    +163
    +164        Parameters
    +165        ----------
    +166        spectra : str
    +167            Which mass spectra data to include in the output.
    +168            Options: None, "ms1", "ms2", "all".
    +169        scan_df : pandas.DataFrame
    +170            Scan dataframe. Output from get_scan_df().
    +171        data : pymzml.run.Reader
    +172            The mass spectra data.
    +173
    +174        Returns
    +175        -------
    +176        dict
    +177            A dictionary containing the mass spectra data as pandas DataFrames, with keys corresponding to the MS level.
    +178
    +179        """
    +180        if spectra == "all":
    +181            scan_df_forspec = scan_df
    +182        elif spectra == "ms1":
    +183            scan_df_forspec = scan_df[scan_df.ms_level == 1]
    +184        elif spectra == "ms2":
    +185            scan_df_forspec = scan_df[scan_df.ms_level == 2]
    +186        else:
    +187            raise ValueError("spectra must be 'all', 'ms1', or 'ms2'")
    +188
    +189        # Result container
    +190        res = {}
    +191
    +192        # Row count container
    +193        counter = {}
    +194
    +195        # Column name container
    +196        cols = {}
    +197
    +198        # set at float32
    +199        dtype = np.float32
    +200
    +201        # First pass: get nrows
    +202        N = defaultdict(lambda: 0)
    +203        for i, spec in enumerate(data):
    +204            if i in scan_df_forspec.scan:
    +205                # Get ms level
    +206                level = "ms{}".format(spec.ms_level)
    +207
    +208                # Number of rows
    +209                N[level] += spec.mz.shape[0]
    +210
    +211        # Second pass: parse
    +212        for i, spec in enumerate(data):
    +213            if i in scan_df_forspec.scan:
    +214                # Number of rows
    +215                n = spec.mz.shape[0]
    +216
    +217                # No measurements
    +218                if n == 0:
    +219                    continue
     220
    -221                # No measurements
    -222                if n == 0:
    -223                    continue
    -224
    -225                # Dimension check
    -226                if len(spec.mz) != len(spec.i):
    -227                    # raise an error if the mz and intensity arrays are not the same length
    -228                    raise ValueError("m/z and intensity array dimension mismatch")
    -229
    -230                # Scan/frame info
    -231                id_dict = spec.id_dict
    -232
    -233                # Get ms level
    -234                level = "ms{}".format(spec.ms_level)
    +221                # Dimension check
    +222                if len(spec.mz) != len(spec.i):
    +223                    # raise an error if the mz and intensity arrays are not the same length
    +224                    raise ValueError("m/z and intensity array dimension mismatch")
    +225
    +226                # Scan/frame info
    +227                id_dict = spec.id_dict
    +228
    +229                # Get ms level
    +230                level = "ms{}".format(spec.ms_level)
    +231
    +232                # Columns
    +233                cols[level] = list(id_dict.keys()) + ["mz", "intensity"]
    +234                m = len(cols[level])
     235
    -236                # Columns
    -237                cols[level] = list(id_dict.keys()) + ["mz", "intensity"]
    -238                m = len(cols[level])
    +236                # Subarray init
    +237                arr = np.empty((n, m), dtype=dtype)
    +238                inx = 0
     239
    -240                # Subarray init
    -241                arr = np.empty((n, m), dtype=dtype)
    -242                inx = 0
    -243
    -244                # Populate scan/frame info
    -245                for k, v in id_dict.items():
    -246                    arr[:, inx] = v
    -247                    inx += 1
    +240                # Populate scan/frame info
    +241                for k, v in id_dict.items():
    +242                    arr[:, inx] = v
    +243                    inx += 1
    +244
    +245                # Populate m/z
    +246                arr[:, inx] = spec.mz
    +247                inx += 1
     248
    -249                # Populate m/z
    -250                arr[:, inx] = spec.mz
    +249                # Populate intensity
    +250                arr[:, inx] = spec.i
     251                inx += 1
     252
    -253                # Populate intensity
    -254                arr[:, inx] = spec.i
    -255                inx += 1
    -256
    -257                # Initialize output container
    -258                if level not in res:
    -259                    res[level] = np.empty((N[level], m), dtype=dtype)
    -260                    counter[level] = 0
    +253                # Initialize output container
    +254                if level not in res:
    +255                    res[level] = np.empty((N[level], m), dtype=dtype)
    +256                    counter[level] = 0
    +257
    +258                # Insert subarray
    +259                res[level][counter[level] : counter[level] + n, :] = arr
    +260                counter[level] += n
     261
    -262                # Insert subarray
    -263                res[level][counter[level] : counter[level] + n, :] = arr
    -264                counter[level] += n
    -265
    -266        # Construct ms1 and ms2 mz dataframes
    -267        for level in res.keys():
    -268            res[level] = pd.DataFrame(res[level], columns=cols[level]).drop(
    -269                columns=["controllerType", "controllerNumber"],
    -270                axis=1,
    -271                inplace=False,
    -272            )
    -273        
    -274        return res
    +262        # Construct ms1 and ms2 mz dataframes
    +263        for level in res.keys():
    +264            res[level] = pd.DataFrame(res[level], columns=cols[level]).drop(
    +265                columns=["controllerType", "controllerNumber"],
    +266                axis=1,
    +267                inplace=False,
    +268            )
    +269
    +270        return res
     
    @@ -1424,7 +1414,7 @@
    Parameters
    • spectra (str): -Which mass spectra data to include in the output. +Which mass spectra data to include in the output. Options: None, "ms1", "ms2", "all".
    • scan_df (pandas.DataFrame): Scan dataframe. Output from get_scan_df().
    • @@ -1452,38 +1442,38 @@
      Returns
    -
    276    def run(self, spectra="all", scan_df=None):
    -277        """Parse the mzML file and return a dictionary of spectra dataframes and a scan metadata dataframe.
    -278
    -279        Parameters
    -280        ----------
    -281        spectra : str, optional
    -282            Which mass spectra data to include in the output. Default is "all".
    -283            Other options: None, "ms1", "ms2".
    -284        scan_df : pandas.DataFrame, optional
    -285            Scan dataframe.  If not provided, the scan dataframe is created from the mzML file.
    -286
    -287        Returns
    -288        -------
    -289        tuple
    -290            A tuple containing two elements:
    -291            - A dictionary containing the mass spectra data as numpy arrays, with keys corresponding to the MS level.
    -292            - A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    -293        """
    -294
    -295        # Open file
    -296        data = self.load()
    -297
    -298        if scan_df is None:
    -299            scan_df = self.get_scan_df(data)
    -300
    -301        if spectra != "none":
    -302            res = self.get_ms_raw(spectra, scan_df, data)
    -303            
    -304        else:
    -305            res = None
    -306
    -307        return res, scan_df
    +            
    272    def run(self, spectra="all", scan_df=None):
    +273        """Parse the mzML file and return a dictionary of spectra dataframes and a scan metadata dataframe.
    +274
    +275        Parameters
    +276        ----------
    +277        spectra : str, optional
    +278            Which mass spectra data to include in the output. Default is "all".
    +279            Other options: None, "ms1", "ms2".
    +280        scan_df : pandas.DataFrame, optional
    +281            Scan dataframe.  If not provided, the scan dataframe is created from the mzML file.
    +282
    +283        Returns
    +284        -------
    +285        tuple
    +286            A tuple containing two elements:
    +287            - A dictionary containing the mass spectra data as numpy arrays, with keys corresponding to the MS level.
    +288            - A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    +289        """
    +290
    +291        # Open file
    +292        data = self.load()
    +293
    +294        if scan_df is None:
    +295            scan_df = self.get_scan_df(data)
    +296
    +297        if spectra != "none":
    +298            res = self.get_ms_raw(spectra, scan_df, data)
    +299
    +300        else:
    +301            res = None
    +302
    +303        return res, scan_df
     
    @@ -1523,109 +1513,109 @@
    Returns
    -
    309    def get_mass_spectrum_from_scan(
    -310        self, scan_number, spectrum_mode, auto_process=True
    -311    ):
    -312        """Instatiate a mass spectrum object from the mzML file.
    -313
    -314        Parameters
    -315        ----------
    -316        scan_number : int
    -317            The scan number to be parsed.
    -318        spectrum_mode : str
    -319            The type of spectrum to instantiate.  Must be'profile' or 'centroid'.
    -320        polarity : int
    -321            The polarity of the scan.  Must be -1 or 1.
    -322        auto_process : bool, optional
    -323            If True, process the mass spectrum. Default is True.
    -324
    -325        Returns
    -326        -------
    -327        MassSpecProfile | MassSpecCentroid
    -328            The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    -329        """
    -330
    -331        def set_metadata(
    -332            scan_number: int,
    -333            polarity: int,
    -334            file_location: str,
    -335            label=Labels.thermo_profile,
    -336        ):
    -337            """
    -338            Set the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
    -339
    -340            Parameters
    -341            ----------
    -342            scan_number : int
    -343                The scan number.
    -344            polarity : int
    -345                The polarity of the data.
    -346            file_location : str
    -347                The file location.
    -348            label : str, optional
    -349                The label for the mass spectrum. Default is Labels.thermo_profile.
    -350
    -351            Returns
    -352            -------
    -353            dict
    -354                The output parameters ready for creating a MassSpecProfile or MassSpecCentroid object.
    -355            """
    -356            d_params = default_parameters(file_location)
    -357            d_params["label"] = label
    -358            d_params["polarity"] = polarity
    -359            d_params["filename_path"] = file_location
    -360            d_params["scan_number"] = scan_number
    -361
    -362            return d_params
    -363
    -364        # Open file
    -365        data = self.load()
    -366
    -367        # Pluck out individual scan mz and intensity
    -368        spec = data[scan_number]
    -369
    -370        # Get polarity
    -371        if spec["negative scan"] is not None:
    -372            polarity = -1
    -373        elif spec["positive scan"] is not None:
    -374            polarity = 1
    -375
    -376        # Get mass spectrum
    -377        if spectrum_mode == "profile":
    -378            # Check if profile
    -379            if not spec.get("MS:1000128"):
    -380                raise ValueError("spectrum is not profile")
    -381            data_dict = {
    -382                Labels.mz: spec.mz,
    -383                Labels.abundance: spec.i,
    -384            }
    -385            d_params = set_metadata(
    -386                scan_number,
    -387                polarity,
    -388                self.file_location,
    -389                label=Labels.simulated_profile,
    -390            )
    -391            mass_spectrum_obj = mass_spectrum_obj = MassSpecProfile(
    -392                data_dict, d_params, auto_process=auto_process
    -393            )
    -394        elif spectrum_mode == "centroid":
    -395            # Check if centroided
    -396            if not spec.get("MS:1000127"):
    -397                raise ValueError("spectrum is not centroided")
    -398            data_dict = {
    -399                Labels.mz: spec.mz,
    -400                Labels.abundance: spec.i,
    -401                Labels.rp: [np.nan] * len(spec.mz),
    -402                Labels.s2n: [np.nan] * len(spec.i),
    -403            }
    -404            d_params = set_metadata(
    -405                scan_number, polarity, self.file_location, label=Labels.corems_centroid
    -406            )
    -407            mass_spectrum_obj = MassSpecCentroid(
    -408                data_dict, d_params, auto_process=auto_process
    -409            )
    -410
    -411        return mass_spectrum_obj
    +            
    305    def get_mass_spectrum_from_scan(
    +306        self, scan_number, spectrum_mode, auto_process=True
    +307    ):
    +308        """Instatiate a mass spectrum object from the mzML file.
    +309
    +310        Parameters
    +311        ----------
    +312        scan_number : int
    +313            The scan number to be parsed.
    +314        spectrum_mode : str
    +315            The type of spectrum to instantiate.  Must be'profile' or 'centroid'.
    +316        polarity : int
    +317            The polarity of the scan.  Must be -1 or 1.
    +318        auto_process : bool, optional
    +319            If True, process the mass spectrum. Default is True.
    +320
    +321        Returns
    +322        -------
    +323        MassSpecProfile | MassSpecCentroid
    +324            The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    +325        """
    +326
    +327        def set_metadata(
    +328            scan_number: int,
    +329            polarity: int,
    +330            file_location: str,
    +331            label=Labels.thermo_profile,
    +332        ):
    +333            """
    +334            Set the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
    +335
    +336            Parameters
    +337            ----------
    +338            scan_number : int
    +339                The scan number.
    +340            polarity : int
    +341                The polarity of the data.
    +342            file_location : str
    +343                The file location.
    +344            label : str, optional
    +345                The label for the mass spectrum. Default is Labels.thermo_profile.
    +346
    +347            Returns
    +348            -------
    +349            dict
    +350                The output parameters ready for creating a MassSpecProfile or MassSpecCentroid object.
    +351            """
    +352            d_params = default_parameters(file_location)
    +353            d_params["label"] = label
    +354            d_params["polarity"] = polarity
    +355            d_params["filename_path"] = file_location
    +356            d_params["scan_number"] = scan_number
    +357
    +358            return d_params
    +359
    +360        # Open file
    +361        data = self.load()
    +362
    +363        # Pluck out individual scan mz and intensity
    +364        spec = data[scan_number]
    +365
    +366        # Get polarity
    +367        if spec["negative scan"] is not None:
    +368            polarity = -1
    +369        elif spec["positive scan"] is not None:
    +370            polarity = 1
    +371
    +372        # Get mass spectrum
    +373        if spectrum_mode == "profile":
    +374            # Check if profile
    +375            if not spec.get("MS:1000128"):
    +376                raise ValueError("spectrum is not profile")
    +377            data_dict = {
    +378                Labels.mz: spec.mz,
    +379                Labels.abundance: spec.i,
    +380            }
    +381            d_params = set_metadata(
    +382                scan_number,
    +383                polarity,
    +384                self.file_location,
    +385                label=Labels.simulated_profile,
    +386            )
    +387            mass_spectrum_obj = mass_spectrum_obj = MassSpecProfile(
    +388                data_dict, d_params, auto_process=auto_process
    +389            )
    +390        elif spectrum_mode == "centroid":
    +391            # Check if centroided
    +392            if not spec.get("MS:1000127"):
    +393                raise ValueError("spectrum is not centroided")
    +394            data_dict = {
    +395                Labels.mz: spec.mz,
    +396                Labels.abundance: spec.i,
    +397                Labels.rp: [np.nan] * len(spec.mz),
    +398                Labels.s2n: [np.nan] * len(spec.i),
    +399            }
    +400            d_params = set_metadata(
    +401                scan_number, polarity, self.file_location, label=Labels.corems_centroid
    +402            )
    +403            mass_spectrum_obj = MassSpecCentroid(
    +404                data_dict, d_params, auto_process=auto_process
    +405            )
    +406
    +407        return mass_spectrum_obj
     
    @@ -1664,28 +1654,28 @@
    Returns
    -
    413    def get_mass_spectra_obj(self):
    -414        """Instatiate a MassSpectraBase object from the mzML file.
    -415
    -416
    -417        Returns
    -418        -------
    -419        MassSpectraBase
    -420            The MassSpectra object containing the parsed mass spectra.  
    -421            The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    -422        """
    -423        _, scan_df = self.run(spectra=False)
    -424        mass_spectra_obj = MassSpectraBase(
    -425            self.file_location,
    -426            self.analyzer,
    -427            self.instrument_label,
    -428            self.sample_name,
    -429            self,
    -430        )
    -431        scan_df = scan_df.set_index("scan", drop=False)
    -432        mass_spectra_obj.scan_df = scan_df
    -433
    -434        return mass_spectra_obj
    +            
    409    def get_mass_spectra_obj(self):
    +410        """Instatiate a MassSpectraBase object from the mzML file.
    +411
    +412
    +413        Returns
    +414        -------
    +415        MassSpectraBase
    +416            The MassSpectra object containing the parsed mass spectra.
    +417            The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    +418        """
    +419        _, scan_df = self.run(spectra=False)
    +420        mass_spectra_obj = MassSpectraBase(
    +421            self.file_location,
    +422            self.analyzer,
    +423            self.instrument_label,
    +424            self.sample_name,
    +425            self,
    +426        )
    +427        scan_df = scan_df.set_index("scan", drop=False)
    +428        mass_spectra_obj.scan_df = scan_df
    +429
    +430        return mass_spectra_obj
     
    @@ -1694,7 +1684,7 @@
    Returns
    Returns
      -
    • MassSpectraBase: The MassSpectra object containing the parsed mass spectra.
      +
    • MassSpectraBase: The MassSpectra object containing the parsed mass spectra. The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    @@ -1712,47 +1702,47 @@

    Returns
    -
    436    def get_lcms_obj(self, spectra="all"):
    -437        """Instatiates a LCMSBase object from the mzML file.
    -438
    -439        Parameters
    -440        ----------
    -441        spectra : str, optional
    -442            Which mass spectra data to include in the output. Default is all.  Other options: none, ms1, ms2.
    -443
    -444        Returns
    -445        -------
    -446        LCMSBase
    -447            LCMS object containing mass spectra data. 
    -448            The object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe, 
    -449            and mz dataframe(s), as well as lists of scan numbers, retention times, and TICs.
    -450        """
    -451        _, scan_df = self.run(spectra="none")  # first run it to just get scan info
    -452        res, scan_df = self.run(
    -453            scan_df=scan_df, spectra=spectra
    -454        )  # second run to parse data
    -455        lcms_obj = LCMSBase(
    -456            self.file_location,
    -457            self.analyzer,
    -458            self.instrument_label,
    -459            self.sample_name,
    -460            self,
    -461        )
    -462        for key in res:
    -463            key_int = int(key.replace("ms", ""))
    -464            res[key] = res[key][res[key].intensity > 0]
    -465            res[key] = res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True)
    -466            lcms_obj._ms_unprocessed[key_int] = res[key]
    -467        lcms_obj.scan_df = scan_df.set_index("scan", drop=False)
    -468        # Check if polarity is mixed
    -469        if len(set(scan_df.polarity)) > 1:
    -470            raise ValueError("Mixed polarities detected in scan data")
    -471        lcms_obj.polarity = scan_df.polarity[0]
    -472        lcms_obj._scans_number_list = list(scan_df.scan)
    -473        lcms_obj._retention_time_list = list(scan_df.scan_time)
    -474        lcms_obj._tic_list = list(scan_df.tic)
    -475
    -476        return lcms_obj
    +            
    432    def get_lcms_obj(self, spectra="all"):
    +433        """Instatiates a LCMSBase object from the mzML file.
    +434
    +435        Parameters
    +436        ----------
    +437        spectra : str, optional
    +438            Which mass spectra data to include in the output. Default is all.  Other options: none, ms1, ms2.
    +439
    +440        Returns
    +441        -------
    +442        LCMSBase
    +443            LCMS object containing mass spectra data.
    +444            The object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe,
    +445            and mz dataframe(s), as well as lists of scan numbers, retention times, and TICs.
    +446        """
    +447        _, scan_df = self.run(spectra="none")  # first run it to just get scan info
    +448        res, scan_df = self.run(
    +449            scan_df=scan_df, spectra=spectra
    +450        )  # second run to parse data
    +451        lcms_obj = LCMSBase(
    +452            self.file_location,
    +453            self.analyzer,
    +454            self.instrument_label,
    +455            self.sample_name,
    +456            self,
    +457        )
    +458        for key in res:
    +459            key_int = int(key.replace("ms", ""))
    +460            res[key] = res[key][res[key].intensity > 0]
    +461            res[key] = res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True)
    +462            lcms_obj._ms_unprocessed[key_int] = res[key]
    +463        lcms_obj.scan_df = scan_df.set_index("scan", drop=False)
    +464        # Check if polarity is mixed
    +465        if len(set(scan_df.polarity)) > 1:
    +466            raise ValueError("Mixed polarities detected in scan data")
    +467        lcms_obj.polarity = scan_df.polarity[0]
    +468        lcms_obj._scans_number_list = list(scan_df.scan)
    +469        lcms_obj._retention_time_list = list(scan_df.scan_time)
    +470        lcms_obj._tic_list = list(scan_df.tic)
    +471
    +472        return lcms_obj
     
    @@ -1768,8 +1758,8 @@
    Parameters
    Returns
      -
    • LCMSBase: LCMS object containing mass spectra data. -The object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe, +
    • LCMSBase: LCMS object containing mass spectra data. +The object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe, and mz dataframe(s), as well as lists of scan numbers, retention times, and TICs.
    diff --git a/docs/corems/mass_spectra/input/parserbase.html b/docs/corems/mass_spectra/input/parserbase.html index cffc9568..4dfbc797 100644 --- a/docs/corems/mass_spectra/input/parserbase.html +++ b/docs/corems/mass_spectra/input/parserbase.html @@ -75,75 +75,72 @@

    -
     1__author__ = "Katherine Heal"
    - 2__date__ = "November 8, 2023"
    +                        
     1from abc import ABC, abstractmethod
    + 2
      3
    - 4from abc import ABC, abstractmethod
    - 5
    - 6
    - 7class SpectraParserInterface(ABC):
    - 8    """
    - 9    Interface for parsing mass spectra data into MassSpectraBase objects.
    -10
    -11    Methods
    -12    -------
    -13    * load().
    -14        Load mass spectra data.
    -15    * run().
    -16        Parse mass spectra data.
    -17    * get_mass_spectra_obj().
    -18        Return MassSpectraBase object with several attributes populated
    -19    * get_mass_spectrum_from_scan(scan_number).
    -20        Return MassSpecBase data object from scan number.
    -21
    -22    Notes
    -23    -----
    -24    This is an abstract class and should not be instantiated directly.
    -25    """
    -26
    -27    @abstractmethod
    -28    def load(self):
    -29        """
    -30        Load mass spectra data.
    -31        """
    -32        pass
    -33
    -34    @abstractmethod
    -35    def run(self):
    -36        """
    -37        Parse mass spectra data.
    -38        """
    -39        pass
    -40
    -41    @abstractmethod
    -42    def get_scan_df(self):
    -43        """
    -44        Return scan data as a pandas DataFrame.
    -45        """
    -46        pass
    -47
    -48    @abstractmethod
    -49    def get_ms_raw(self, spectra, scan_df):
    -50        """
    -51        Return a dictionary of mass spectra data as a pandas DataFrame.
    -52        """
    -53        pass
    -54
    -55    @abstractmethod
    -56    def get_mass_spectra_obj(self):
    -57        """
    -58        Return mass spectra data object.
    -59        """
    -60        pass
    -61
    -62    @abstractmethod
    -63    def get_mass_spectrum_from_scan(
    -64        self, scan_number, spectrum_mode, auto_process=True
    -65    ):
    -66        """
    -67        Return mass spectrum data object from scan number.
    -68        """
    -69        pass
    + 4class SpectraParserInterface(ABC):
    + 5    """
    + 6    Interface for parsing mass spectra data into MassSpectraBase objects.
    + 7
    + 8    Methods
    + 9    -------
    +10    * load().
    +11        Load mass spectra data.
    +12    * run().
    +13        Parse mass spectra data.
    +14    * get_mass_spectra_obj().
    +15        Return MassSpectraBase object with several attributes populated
    +16    * get_mass_spectrum_from_scan(scan_number).
    +17        Return MassSpecBase data object from scan number.
    +18
    +19    Notes
    +20    -----
    +21    This is an abstract class and should not be instantiated directly.
    +22    """
    +23
    +24    @abstractmethod
    +25    def load(self):
    +26        """
    +27        Load mass spectra data.
    +28        """
    +29        pass
    +30
    +31    @abstractmethod
    +32    def run(self):
    +33        """
    +34        Parse mass spectra data.
    +35        """
    +36        pass
    +37
    +38    @abstractmethod
    +39    def get_scan_df(self):
    +40        """
    +41        Return scan data as a pandas DataFrame.
    +42        """
    +43        pass
    +44
    +45    @abstractmethod
    +46    def get_ms_raw(self, spectra, scan_df):
    +47        """
    +48        Return a dictionary of mass spectra data as a pandas DataFrame.
    +49        """
    +50        pass
    +51
    +52    @abstractmethod
    +53    def get_mass_spectra_obj(self):
    +54        """
    +55        Return mass spectra data object.
    +56        """
    +57        pass
    +58
    +59    @abstractmethod
    +60    def get_mass_spectrum_from_scan(
    +61        self, scan_number, spectrum_mode, auto_process=True
    +62    ):
    +63        """
    +64        Return mass spectrum data object from scan number.
    +65        """
    +66        pass
     
    @@ -159,69 +156,69 @@

    -
     8class SpectraParserInterface(ABC):
    - 9    """
    -10    Interface for parsing mass spectra data into MassSpectraBase objects.
    -11
    -12    Methods
    -13    -------
    -14    * load().
    -15        Load mass spectra data.
    -16    * run().
    -17        Parse mass spectra data.
    -18    * get_mass_spectra_obj().
    -19        Return MassSpectraBase object with several attributes populated
    -20    * get_mass_spectrum_from_scan(scan_number).
    -21        Return MassSpecBase data object from scan number.
    -22
    -23    Notes
    -24    -----
    -25    This is an abstract class and should not be instantiated directly.
    -26    """
    -27
    -28    @abstractmethod
    -29    def load(self):
    -30        """
    -31        Load mass spectra data.
    -32        """
    -33        pass
    -34
    -35    @abstractmethod
    -36    def run(self):
    -37        """
    -38        Parse mass spectra data.
    -39        """
    -40        pass
    -41
    -42    @abstractmethod
    -43    def get_scan_df(self):
    -44        """
    -45        Return scan data as a pandas DataFrame.
    -46        """
    -47        pass
    -48
    -49    @abstractmethod
    -50    def get_ms_raw(self, spectra, scan_df):
    -51        """
    -52        Return a dictionary of mass spectra data as a pandas DataFrame.
    -53        """
    -54        pass
    -55
    -56    @abstractmethod
    -57    def get_mass_spectra_obj(self):
    -58        """
    -59        Return mass spectra data object.
    -60        """
    -61        pass
    -62
    -63    @abstractmethod
    -64    def get_mass_spectrum_from_scan(
    -65        self, scan_number, spectrum_mode, auto_process=True
    -66    ):
    -67        """
    -68        Return mass spectrum data object from scan number.
    -69        """
    -70        pass
    +            
     5class SpectraParserInterface(ABC):
    + 6    """
    + 7    Interface for parsing mass spectra data into MassSpectraBase objects.
    + 8
    + 9    Methods
    +10    -------
    +11    * load().
    +12        Load mass spectra data.
    +13    * run().
    +14        Parse mass spectra data.
    +15    * get_mass_spectra_obj().
    +16        Return MassSpectraBase object with several attributes populated
    +17    * get_mass_spectrum_from_scan(scan_number).
    +18        Return MassSpecBase data object from scan number.
    +19
    +20    Notes
    +21    -----
    +22    This is an abstract class and should not be instantiated directly.
    +23    """
    +24
    +25    @abstractmethod
    +26    def load(self):
    +27        """
    +28        Load mass spectra data.
    +29        """
    +30        pass
    +31
    +32    @abstractmethod
    +33    def run(self):
    +34        """
    +35        Parse mass spectra data.
    +36        """
    +37        pass
    +38
    +39    @abstractmethod
    +40    def get_scan_df(self):
    +41        """
    +42        Return scan data as a pandas DataFrame.
    +43        """
    +44        pass
    +45
    +46    @abstractmethod
    +47    def get_ms_raw(self, spectra, scan_df):
    +48        """
    +49        Return a dictionary of mass spectra data as a pandas DataFrame.
    +50        """
    +51        pass
    +52
    +53    @abstractmethod
    +54    def get_mass_spectra_obj(self):
    +55        """
    +56        Return mass spectra data object.
    +57        """
    +58        pass
    +59
    +60    @abstractmethod
    +61    def get_mass_spectrum_from_scan(
    +62        self, scan_number, spectrum_mode, auto_process=True
    +63    ):
    +64        """
    +65        Return mass spectrum data object from scan number.
    +66        """
    +67        pass
     
    @@ -258,12 +255,12 @@
    Notes
    -
    28    @abstractmethod
    -29    def load(self):
    -30        """
    -31        Load mass spectra data.
    -32        """
    -33        pass
    +            
    25    @abstractmethod
    +26    def load(self):
    +27        """
    +28        Load mass spectra data.
    +29        """
    +30        pass
     
    @@ -284,12 +281,12 @@
    Notes
    -
    35    @abstractmethod
    -36    def run(self):
    -37        """
    -38        Parse mass spectra data.
    -39        """
    -40        pass
    +            
    32    @abstractmethod
    +33    def run(self):
    +34        """
    +35        Parse mass spectra data.
    +36        """
    +37        pass
     
    @@ -310,12 +307,12 @@
    Notes
    -
    42    @abstractmethod
    -43    def get_scan_df(self):
    -44        """
    -45        Return scan data as a pandas DataFrame.
    -46        """
    -47        pass
    +            
    39    @abstractmethod
    +40    def get_scan_df(self):
    +41        """
    +42        Return scan data as a pandas DataFrame.
    +43        """
    +44        pass
     
    @@ -336,12 +333,12 @@
    Notes
    -
    49    @abstractmethod
    -50    def get_ms_raw(self, spectra, scan_df):
    -51        """
    -52        Return a dictionary of mass spectra data as a pandas DataFrame.
    -53        """
    -54        pass
    +            
    46    @abstractmethod
    +47    def get_ms_raw(self, spectra, scan_df):
    +48        """
    +49        Return a dictionary of mass spectra data as a pandas DataFrame.
    +50        """
    +51        pass
     
    @@ -362,12 +359,12 @@
    Notes
    -
    56    @abstractmethod
    -57    def get_mass_spectra_obj(self):
    -58        """
    -59        Return mass spectra data object.
    -60        """
    -61        pass
    +            
    53    @abstractmethod
    +54    def get_mass_spectra_obj(self):
    +55        """
    +56        Return mass spectra data object.
    +57        """
    +58        pass
     
    @@ -388,14 +385,14 @@
    Notes
    -
    63    @abstractmethod
    -64    def get_mass_spectrum_from_scan(
    -65        self, scan_number, spectrum_mode, auto_process=True
    -66    ):
    -67        """
    -68        Return mass spectrum data object from scan number.
    -69        """
    -70        pass
    +            
    60    @abstractmethod
    +61    def get_mass_spectrum_from_scan(
    +62        self, scan_number, spectrum_mode, auto_process=True
    +63    ):
    +64        """
    +65        Return mass spectrum data object from scan number.
    +66        """
    +67        pass
     
    diff --git a/docs/corems/mass_spectra/input/rawFileReader.html b/docs/corems/mass_spectra/input/rawFileReader.html index 36da1f2e..04086425 100644 --- a/docs/corems/mass_spectra/input/rawFileReader.html +++ b/docs/corems/mass_spectra/input/rawFileReader.html @@ -676,498 +676,498 @@

    500 # plt.show() 501 502 def get_tic( - 503 self, ms_type="MS !d", peak_detection=True, smooth=True, plot=False, ax=None,trace_type='TIC', - 504 ) -> Tuple[TIC_Data, axes.Axes]: - 505 """ms_type: str ('MS !d', 'MS2', None) - 506 if you use None you get all scans. - 507 peak_detection: bool - 508 smooth: bool - 509 plot: bool - 510 ax: matplotlib axis object - 511 trace_type: str ('TIC','BPC') - 512 - 513 returns: - 514 chroma: dict - 515 { - 516 Scan: [int] - 517 original thermo scan numberMS - 518 Time: [floats] - 519 list of retention times - 520 TIC: [floats] - 521 total ion chromatogram - 522 Apexes: [int] - 523 original thermo apex scan number after peak picking - 524 } - 525 """ - 526 if trace_type == 'TIC': - 527 settings = ChromatogramTraceSettings(TraceType.TIC) - 528 elif trace_type == 'BPC': - 529 settings = ChromatogramTraceSettings(TraceType.BasePeak) - 530 else: - 531 raise ValueError(f'{trace_type} undefined') - 532 if ms_type == "all": - 533 settings.Filter = None - 534 else: - 535 settings.Filter = ms_type - 536 - 537 chroma_settings = IChromatogramSettings(settings) - 538 - 539 data = self.iRawDataPlus.GetChromatogramData( - 540 [chroma_settings], self.start_scan, self.end_scan - 541 ) + 503 self, + 504 ms_type="MS !d", + 505 peak_detection=True, + 506 smooth=True, + 507 plot=False, + 508 ax=None, + 509 trace_type="TIC", + 510 ) -> Tuple[TIC_Data, axes.Axes]: + 511 """ms_type: str ('MS !d', 'MS2', None) + 512 if you use None you get all scans. + 513 peak_detection: bool + 514 smooth: bool + 515 plot: bool + 516 ax: matplotlib axis object + 517 trace_type: str ('TIC','BPC') + 518 + 519 returns: + 520 chroma: dict + 521 { + 522 Scan: [int] + 523 original thermo scan numberMS + 524 Time: [floats] + 525 list of retention times + 526 TIC: [floats] + 527 total ion chromatogram + 528 Apexes: [int] + 529 original thermo apex scan number after peak picking + 530 } + 531 """ + 532 if trace_type == "TIC": + 533 settings = ChromatogramTraceSettings(TraceType.TIC) + 534 elif trace_type == "BPC": + 535 settings = ChromatogramTraceSettings(TraceType.BasePeak) + 536 else: + 537 raise ValueError(f"{trace_type} undefined") + 538 if ms_type == "all": + 539 settings.Filter = None + 540 else: + 541 settings.Filter = ms_type 542 - 543 trace = ChromatogramSignal.FromChromatogramData(data) + 543 chroma_settings = IChromatogramSettings(settings) 544 - 545 data = TIC_Data(time=[], scans=[], tic=[], bpc=[], apexes=[]) - 546 - 547 if trace[0].Length > 0: - 548 for i in range(trace[0].Length): - 549 # print(trace[0].HasBasePeakData,trace[0].EndTime ) + 545 data = self.iRawDataPlus.GetChromatogramData( + 546 [chroma_settings], self.start_scan, self.end_scan + 547 ) + 548 + 549 trace = ChromatogramSignal.FromChromatogramData(data) 550 - 551 # print(" {} - {}, {}".format( i, trace[0].Times[i], trace[0].Intensities[i] )) - 552 data.time.append(trace[0].Times[i]) - 553 data.tic.append(trace[0].Intensities[i]) - 554 data.scans.append(trace[0].Scans[i]) - 555 - 556 # print(trace[0].Scans[i]) - 557 if smooth: - 558 data.tic = self.smooth_tic(data.tic) - 559 - 560 else: - 561 data.tic = np.array(data.tic) - 562 - 563 if peak_detection: - 564 centroid_peak_indexes = [ - 565 i for i in self.centroid_detector(data.time, data.tic) - 566 ] - 567 - 568 data.apexes = centroid_peak_indexes - 569 - 570 if plot: - 571 if not ax: - 572 import matplotlib.pyplot as plt + 551 data = TIC_Data(time=[], scans=[], tic=[], bpc=[], apexes=[]) + 552 + 553 if trace[0].Length > 0: + 554 for i in range(trace[0].Length): + 555 # print(trace[0].HasBasePeakData,trace[0].EndTime ) + 556 + 557 # print(" {} - {}, {}".format( i, trace[0].Times[i], trace[0].Intensities[i] )) + 558 data.time.append(trace[0].Times[i]) + 559 data.tic.append(trace[0].Intensities[i]) + 560 data.scans.append(trace[0].Scans[i]) + 561 + 562 # print(trace[0].Scans[i]) + 563 if smooth: + 564 data.tic = self.smooth_tic(data.tic) + 565 + 566 else: + 567 data.tic = np.array(data.tic) + 568 + 569 if peak_detection: + 570 centroid_peak_indexes = [ + 571 i for i in self.centroid_detector(data.time, data.tic) + 572 ] 573 - 574 ax = plt.gca() - 575 # fig, ax = plt.subplots(figsize=(6, 3)) - 576 - 577 ax.plot(data.time, data.tic, label=trace_type) - 578 ax.set_xlabel("Time (min)") - 579 ax.set_ylabel("a.u.") - 580 if peak_detection: - 581 for peak_indexes in data.apexes: - 582 apex_index = peak_indexes[1] - 583 ax.plot( - 584 data.time[apex_index], - 585 data.tic[apex_index], - 586 marker="x", - 587 linewidth=0, - 588 ) - 589 - 590 # plt.show() - 591 if trace_type == 'BPC': - 592 data.bpc = data.tic - 593 data.tic = [] - 594 return data, ax - 595 if trace_type == 'BPC': - 596 data.bpc = data.tic - 597 data.tic = [] - 598 return data, None - 599 - 600 else: - 601 return None, None - 602 - 603 def get_average_mass_spectrum( - 604 self, - 605 spectrum_mode: str = "profile", - 606 auto_process: bool = True, - 607 ppm_tolerance: float = 5.0, - 608 ms_type: str = "MS1", - 609 ) -> MassSpecProfile | MassSpecCentroid: - 610 """ - 611 Averages mass spectra over a scan range using Thermo's AverageScansInScanRange method - 612 or a scan list using Thermo's AverageScans method - 613 spectrum_mode: str - 614 centroid or profile mass spectrum - 615 auto_process: bool - 616 If true performs peak picking, and noise threshold calculation after creation of mass spectrum object - 617 ms_type: str - 618 String of form 'ms1' or 'ms2' or 'MS3' etc. Valid up to MS10. - 619 Internal function converts to Thermo MSOrderType class. - 620 - 621 """ - 622 - 623 def get_profile_mass_spec(averageScan, d_params: dict, auto_process: bool): - 624 mz_list = list(averageScan.SegmentedScan.Positions) - 625 abund_list = list(averageScan.SegmentedScan.Intensities) + 574 data.apexes = centroid_peak_indexes + 575 + 576 if plot: + 577 if not ax: + 578 import matplotlib.pyplot as plt + 579 + 580 ax = plt.gca() + 581 # fig, ax = plt.subplots(figsize=(6, 3)) + 582 + 583 ax.plot(data.time, data.tic, label=trace_type) + 584 ax.set_xlabel("Time (min)") + 585 ax.set_ylabel("a.u.") + 586 if peak_detection: + 587 for peak_indexes in data.apexes: + 588 apex_index = peak_indexes[1] + 589 ax.plot( + 590 data.time[apex_index], + 591 data.tic[apex_index], + 592 marker="x", + 593 linewidth=0, + 594 ) + 595 + 596 # plt.show() + 597 if trace_type == "BPC": + 598 data.bpc = data.tic + 599 data.tic = [] + 600 return data, ax + 601 if trace_type == "BPC": + 602 data.bpc = data.tic + 603 data.tic = [] + 604 return data, None + 605 + 606 else: + 607 return None, None + 608 + 609 def get_average_mass_spectrum( + 610 self, + 611 spectrum_mode: str = "profile", + 612 auto_process: bool = True, + 613 ppm_tolerance: float = 5.0, + 614 ms_type: str = "MS1", + 615 ) -> MassSpecProfile | MassSpecCentroid: + 616 """ + 617 Averages mass spectra over a scan range using Thermo's AverageScansInScanRange method + 618 or a scan list using Thermo's AverageScans method + 619 spectrum_mode: str + 620 centroid or profile mass spectrum + 621 auto_process: bool + 622 If true performs peak picking, and noise threshold calculation after creation of mass spectrum object + 623 ms_type: str + 624 String of form 'ms1' or 'ms2' or 'MS3' etc. Valid up to MS10. + 625 Internal function converts to Thermo MSOrderType class. 626 - 627 data_dict = { - 628 Labels.mz: mz_list, - 629 Labels.abundance: abund_list, - 630 } - 631 - 632 return MassSpecProfile(data_dict, d_params, auto_process=auto_process) - 633 - 634 def get_centroid_mass_spec(averageScan, d_params: dict): - 635 noise = list(averageScan.centroidScan.Noises) - 636 - 637 baselines = list(averageScan.centroidScan.Baselines) - 638 - 639 rp = list(averageScan.centroidScan.Resolutions) - 640 - 641 magnitude = list(averageScan.centroidScan.Intensities) + 627 """ + 628 + 629 def get_profile_mass_spec(averageScan, d_params: dict, auto_process: bool): + 630 mz_list = list(averageScan.SegmentedScan.Positions) + 631 abund_list = list(averageScan.SegmentedScan.Intensities) + 632 + 633 data_dict = { + 634 Labels.mz: mz_list, + 635 Labels.abundance: abund_list, + 636 } + 637 + 638 return MassSpecProfile(data_dict, d_params, auto_process=auto_process) + 639 + 640 def get_centroid_mass_spec(averageScan, d_params: dict): + 641 noise = list(averageScan.centroidScan.Noises) 642 - 643 mz = list(averageScan.centroidScan.Masses) + 643 baselines = list(averageScan.centroidScan.Baselines) 644 - 645 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 - 646 l_signal_to_noise = np.array(magnitude) / array_noise_std - 647 - 648 d_params["baseline_noise"] = np.average(array_noise_std) - 649 - 650 d_params["baseline_noise_std"] = np.std(array_noise_std) - 651 - 652 data_dict = { - 653 Labels.mz: mz, - 654 Labels.abundance: magnitude, - 655 Labels.rp: rp, - 656 Labels.s2n: list(l_signal_to_noise), - 657 } - 658 - 659 mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False) - 660 - 661 return mass_spec - 662 - 663 d_params = self.set_metadata( - 664 firstScanNumber=self.start_scan, lastScanNumber=self.end_scan - 665 ) + 645 rp = list(averageScan.centroidScan.Resolutions) + 646 + 647 magnitude = list(averageScan.centroidScan.Intensities) + 648 + 649 mz = list(averageScan.centroidScan.Masses) + 650 + 651 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 + 652 l_signal_to_noise = np.array(magnitude) / array_noise_std + 653 + 654 d_params["baseline_noise"] = np.average(array_noise_std) + 655 + 656 d_params["baseline_noise_std"] = np.std(array_noise_std) + 657 + 658 data_dict = { + 659 Labels.mz: mz, + 660 Labels.abundance: magnitude, + 661 Labels.rp: rp, + 662 Labels.s2n: list(l_signal_to_noise), + 663 } + 664 + 665 mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False) 666 - 667 # Create the mass options object that will be used when averaging the scans - 668 options = MassOptions() - 669 options.ToleranceUnits = ToleranceUnits.ppm - 670 options.Tolerance = ppm_tolerance - 671 - 672 # Get the scan filter for the first scan. This scan filter will be used to located - 673 # scans within the given scan range of the same type - 674 scanFilter = self.iRawDataPlus.GetFilterForScanNumber(self.start_scan) - 675 - 676 # force it to only look for the MSType - 677 scanFilter = self.set_msordertype(scanFilter, ms_type) - 678 - 679 if isinstance(self.scans, tuple): - 680 averageScan = Extensions.AverageScansInScanRange( - 681 self.iRawDataPlus, self.start_scan, self.end_scan, scanFilter, options - 682 ) - 683 - 684 if averageScan: - 685 if spectrum_mode == "profile": - 686 mass_spec = get_profile_mass_spec( - 687 averageScan, d_params, auto_process - 688 ) + 667 return mass_spec + 668 + 669 d_params = self.set_metadata( + 670 firstScanNumber=self.start_scan, lastScanNumber=self.end_scan + 671 ) + 672 + 673 # Create the mass options object that will be used when averaging the scans + 674 options = MassOptions() + 675 options.ToleranceUnits = ToleranceUnits.ppm + 676 options.Tolerance = ppm_tolerance + 677 + 678 # Get the scan filter for the first scan. This scan filter will be used to located + 679 # scans within the given scan range of the same type + 680 scanFilter = self.iRawDataPlus.GetFilterForScanNumber(self.start_scan) + 681 + 682 # force it to only look for the MSType + 683 scanFilter = self.set_msordertype(scanFilter, ms_type) + 684 + 685 if isinstance(self.scans, tuple): + 686 averageScan = Extensions.AverageScansInScanRange( + 687 self.iRawDataPlus, self.start_scan, self.end_scan, scanFilter, options + 688 ) 689 - 690 return mass_spec - 691 - 692 elif spectrum_mode == "centroid": - 693 if averageScan.HasCentroidStream: - 694 mass_spec = get_centroid_mass_spec(averageScan, d_params) + 690 if averageScan: + 691 if spectrum_mode == "profile": + 692 mass_spec = get_profile_mass_spec( + 693 averageScan, d_params, auto_process + 694 ) 695 - 696 return mass_spec + 696 return mass_spec 697 - 698 else: - 699 raise ValueError( - 700 "No Centroind data available for the selected scans" - 701 ) - 702 else: - 703 raise ValueError("spectrum_mode must be 'profile' or centroid") - 704 else: - 705 raise ValueError("No data found for the selected scans") - 706 - 707 elif isinstance(self.scans, list): - 708 d_params = self.set_metadata(scans_list=self.scans) - 709 - 710 scans = List[int]() - 711 for scan in self.scans: - 712 scans.Add(scan) - 713 - 714 averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options) + 698 elif spectrum_mode == "centroid": + 699 if averageScan.HasCentroidStream: + 700 mass_spec = get_centroid_mass_spec(averageScan, d_params) + 701 + 702 return mass_spec + 703 + 704 else: + 705 raise ValueError( + 706 "No Centroind data available for the selected scans" + 707 ) + 708 else: + 709 raise ValueError("spectrum_mode must be 'profile' or centroid") + 710 else: + 711 raise ValueError("No data found for the selected scans") + 712 + 713 elif isinstance(self.scans, list): + 714 d_params = self.set_metadata(scans_list=self.scans) 715 - 716 if averageScan: - 717 if spectrum_mode == "profile": - 718 mass_spec = get_profile_mass_spec( - 719 averageScan, d_params, auto_process - 720 ) + 716 scans = List[int]() + 717 for scan in self.scans: + 718 scans.Add(scan) + 719 + 720 averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options) 721 - 722 return mass_spec - 723 - 724 elif spectrum_mode == "centroid": - 725 if averageScan.HasCentroidStream: - 726 mass_spec = get_centroid_mass_spec(averageScan, d_params) + 722 if averageScan: + 723 if spectrum_mode == "profile": + 724 mass_spec = get_profile_mass_spec( + 725 averageScan, d_params, auto_process + 726 ) 727 - 728 return mass_spec + 728 return mass_spec 729 - 730 else: - 731 raise ValueError( - 732 "No Centroind data available for the selected scans" - 733 ) - 734 - 735 else: - 736 raise ValueError("spectrum_mode must be 'profile' or centroid") - 737 - 738 else: - 739 raise ValueError("No data found for the selected scans") + 730 elif spectrum_mode == "centroid": + 731 if averageScan.HasCentroidStream: + 732 mass_spec = get_centroid_mass_spec(averageScan, d_params) + 733 + 734 return mass_spec + 735 + 736 else: + 737 raise ValueError( + 738 "No Centroind data available for the selected scans" + 739 ) 740 - 741 else: - 742 raise ValueError("scans must be a list intergers or a tuple if integers") + 741 else: + 742 raise ValueError("spectrum_mode must be 'profile' or centroid") 743 - 744 def set_metadata( - 745 self, - 746 firstScanNumber=0, - 747 lastScanNumber=0, - 748 scans_list=False, - 749 label=Labels.thermo_profile, - 750 ): - 751 """ - 752 Collect metadata to be ingested in the mass spectrum object - 753 - 754 scans_list: list[int] or false - 755 lastScanNumber: int - 756 firstScanNumber: int - 757 """ - 758 - 759 d_params = default_parameters(self.file_path) - 760 - 761 # assumes scans is full scan or reduced profile scan - 762 - 763 d_params["label"] = label + 744 else: + 745 raise ValueError("No data found for the selected scans") + 746 + 747 else: + 748 raise ValueError("scans must be a list intergers or a tuple if integers") + 749 + 750 def set_metadata( + 751 self, + 752 firstScanNumber=0, + 753 lastScanNumber=0, + 754 scans_list=False, + 755 label=Labels.thermo_profile, + 756 ): + 757 """ + 758 Collect metadata to be ingested in the mass spectrum object + 759 + 760 scans_list: list[int] or false + 761 lastScanNumber: int + 762 firstScanNumber: int + 763 """ 764 - 765 if scans_list: - 766 d_params["scan_number"] = scans_list - 767 - 768 d_params["polarity"] = self.get_polarity_mode(scans_list[0]) - 769 - 770 else: - 771 d_params["scan_number"] = "{}-{}".format(firstScanNumber, lastScanNumber) - 772 - 773 d_params["polarity"] = self.get_polarity_mode(firstScanNumber) - 774 - 775 d_params["analyzer"] = self.iRawDataPlus.GetInstrumentData().Model - 776 - 777 d_params["acquisition_time"] = self.get_creation_time() + 765 d_params = default_parameters(self.file_path) + 766 + 767 # assumes scans is full scan or reduced profile scan + 768 + 769 d_params["label"] = label + 770 + 771 if scans_list: + 772 d_params["scan_number"] = scans_list + 773 + 774 d_params["polarity"] = self.get_polarity_mode(scans_list[0]) + 775 + 776 else: + 777 d_params["scan_number"] = "{}-{}".format(firstScanNumber, lastScanNumber) 778 - 779 d_params["instrument_label"] = self.iRawDataPlus.GetInstrumentData().Name + 779 d_params["polarity"] = self.get_polarity_mode(firstScanNumber) 780 - 781 return d_params + 781 d_params["analyzer"] = self.iRawDataPlus.GetInstrumentData().Model 782 - 783 def get_centroid_msms_data(self, scan): - 784 """ - 785 .. deprecated:: 2.0 - 786 This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality. - 787 """ + 783 d_params["acquisition_time"] = self.get_creation_time() + 784 + 785 d_params["instrument_label"] = self.iRawDataPlus.GetInstrumentData().Name + 786 + 787 return d_params 788 - 789 warnings.warn( - 790 "The `get_centroid_msms_data()` is deprecated as of CoreMS 2.0 and will be removed in a future version. " - 791 "Please use `get_average_mass_spectrum()` instead.", - 792 DeprecationWarning, - 793 ) + 789 def get_centroid_msms_data(self, scan): + 790 """ + 791 .. deprecated:: 2.0 + 792 This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality. + 793 """ 794 - 795 d_params = self.set_metadata(scans_list=[scan], label=Labels.thermo_centroid) - 796 - 797 centroidStream = self.iRawDataPlus.GetCentroidStream(scan, False) - 798 - 799 noise = list(centroidStream.Noises) + 795 warnings.warn( + 796 "The `get_centroid_msms_data()` is deprecated as of CoreMS 2.0 and will be removed in a future version. " + 797 "Please use `get_average_mass_spectrum()` instead.", + 798 DeprecationWarning, + 799 ) 800 - 801 baselines = list(centroidStream.Baselines) + 801 d_params = self.set_metadata(scans_list=[scan], label=Labels.thermo_centroid) 802 - 803 rp = list(centroidStream.Resolutions) + 803 centroidStream = self.iRawDataPlus.GetCentroidStream(scan, False) 804 - 805 magnitude = list(centroidStream.Intensities) + 805 noise = list(centroidStream.Noises) 806 - 807 mz = list(centroidStream.Masses) + 807 baselines = list(centroidStream.Baselines) 808 - 809 # charge = scans_labels[5] - 810 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 - 811 l_signal_to_noise = np.array(magnitude) / array_noise_std + 809 rp = list(centroidStream.Resolutions) + 810 + 811 magnitude = list(centroidStream.Intensities) 812 - 813 d_params["baseline_noise"] = np.average(array_noise_std) + 813 mz = list(centroidStream.Masses) 814 - 815 d_params["baseline_noise_std"] = np.std(array_noise_std) - 816 - 817 data_dict = { - 818 Labels.mz: mz, - 819 Labels.abundance: magnitude, - 820 Labels.rp: rp, - 821 Labels.s2n: list(l_signal_to_noise), - 822 } - 823 - 824 mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False) - 825 mass_spec.settings.noise_threshold_method = "relative_abundance" - 826 mass_spec.settings.noise_threshold_min_relative_abundance = 1 - 827 mass_spec.process_mass_spec() - 828 return mass_spec + 815 # charge = scans_labels[5] + 816 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 + 817 l_signal_to_noise = np.array(magnitude) / array_noise_std + 818 + 819 d_params["baseline_noise"] = np.average(array_noise_std) + 820 + 821 d_params["baseline_noise_std"] = np.std(array_noise_std) + 822 + 823 data_dict = { + 824 Labels.mz: mz, + 825 Labels.abundance: magnitude, + 826 Labels.rp: rp, + 827 Labels.s2n: list(l_signal_to_noise), + 828 } 829 - 830 def get_average_mass_spectrum_by_scanlist( - 831 self, - 832 scans_list: List[int], - 833 auto_process: bool = True, - 834 ppm_tolerance: float = 5.0, - 835 ) -> MassSpecProfile: - 836 """ - 837 Averages selected scans mass spectra using Thermo's AverageScans method - 838 scans_list: list[int] - 839 auto_process: bool - 840 If true performs peak picking, and noise threshold calculation after creation of mass spectrum object - 841 Returns: - 842 MassSpecProfile - 843 - 844 .. deprecated:: 2.0 - 845 This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality. - 846 """ - 847 - 848 warnings.warn( - 849 "The `get_average_mass_spectrum_by_scanlist()` is deprecated as of CoreMS 2.0 and will be removed in a future version. " - 850 "Please use `get_average_mass_spectrum()` instead.", - 851 DeprecationWarning, - 852 ) + 830 mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False) + 831 mass_spec.settings.noise_threshold_method = "relative_abundance" + 832 mass_spec.settings.noise_threshold_min_relative_abundance = 1 + 833 mass_spec.process_mass_spec() + 834 return mass_spec + 835 + 836 def get_average_mass_spectrum_by_scanlist( + 837 self, + 838 scans_list: List[int], + 839 auto_process: bool = True, + 840 ppm_tolerance: float = 5.0, + 841 ) -> MassSpecProfile: + 842 """ + 843 Averages selected scans mass spectra using Thermo's AverageScans method + 844 scans_list: list[int] + 845 auto_process: bool + 846 If true performs peak picking, and noise threshold calculation after creation of mass spectrum object + 847 Returns: + 848 MassSpecProfile + 849 + 850 .. deprecated:: 2.0 + 851 This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality. + 852 """ 853 - 854 d_params = self.set_metadata(scans_list=scans_list) - 855 - 856 # assumes scans is full scan or reduced profile scan - 857 - 858 scans = List[int]() - 859 for scan in scans_list: - 860 scans.Add(scan) + 854 warnings.warn( + 855 "The `get_average_mass_spectrum_by_scanlist()` is deprecated as of CoreMS 2.0 and will be removed in a future version. " + 856 "Please use `get_average_mass_spectrum()` instead.", + 857 DeprecationWarning, + 858 ) + 859 + 860 d_params = self.set_metadata(scans_list=scans_list) 861 - 862 # Create the mass options object that will be used when averaging the scans - 863 options = MassOptions() - 864 options.ToleranceUnits = ToleranceUnits.ppm - 865 options.Tolerance = ppm_tolerance - 866 - 867 # Get the scan filter for the first scan. This scan filter will be used to located - 868 # scans within the given scan range of the same type - 869 - 870 averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options) - 871 - 872 len_data = averageScan.SegmentedScan.Positions.Length - 873 - 874 mz_list = list(averageScan.SegmentedScan.Positions) - 875 abund_list = list(averageScan.SegmentedScan.Intensities) - 876 - 877 data_dict = { - 878 Labels.mz: mz_list, - 879 Labels.abundance: abund_list, - 880 } - 881 - 882 mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process) - 883 - 884 return mass_spec - 885 - 886 - 887class ImportMassSpectraThermoMSFileReader(ThermoBaseClass, SpectraParserInterface): - 888 """A class for parsing Thermo RAW mass spectrometry data files and instatiating MassSpectraBase or LCMSBase objects + 862 # assumes scans is full scan or reduced profile scan + 863 + 864 scans = List[int]() + 865 for scan in scans_list: + 866 scans.Add(scan) + 867 + 868 # Create the mass options object that will be used when averaging the scans + 869 options = MassOptions() + 870 options.ToleranceUnits = ToleranceUnits.ppm + 871 options.Tolerance = ppm_tolerance + 872 + 873 # Get the scan filter for the first scan. This scan filter will be used to located + 874 # scans within the given scan range of the same type + 875 + 876 averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options) + 877 + 878 len_data = averageScan.SegmentedScan.Positions.Length + 879 + 880 mz_list = list(averageScan.SegmentedScan.Positions) + 881 abund_list = list(averageScan.SegmentedScan.Intensities) + 882 + 883 data_dict = { + 884 Labels.mz: mz_list, + 885 Labels.abundance: abund_list, + 886 } + 887 + 888 mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process) 889 - 890 Parameters - 891 ---------- - 892 file_location : str or Path - 893 The path to the RAW file to be parsed. - 894 analyzer : str, optional - 895 The type of mass analyzer used in the instrument. Default is "Unknown". - 896 instrument_label : str, optional - 897 The name of the instrument used to acquire the data. Default is "Unknown". - 898 sample_name : str, optional - 899 The name of the sample being analyzed. If not provided, the stem of the file_location path will be used. - 900 - 901 Attributes - 902 ---------- - 903 file_location : Path - 904 The path to the RAW file being parsed. - 905 analyzer : str - 906 The type of mass analyzer used in the instrument. - 907 instrument_label : str - 908 The name of the instrument used to acquire the data. - 909 sample_name : str - 910 The name of the sample being analyzed. - 911 - 912 Methods - 913 ------- - 914 * run(spectra=True). - 915 Parses the RAW file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe. - 916 * get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True) - 917 Parses the RAW file and returns a MassSpecBase object from a single scan. - 918 * get_mass_spectra_obj(). - 919 Parses the RAW file and instantiates a MassSpectraBase object. - 920 * get_lcms_obj(). - 921 Parses the RAW file and instantiates an LCMSBase object. - 922 * get_icr_transient_times(). - 923 Return a list for transient time targets for all scans, or selected scans range - 924 - 925 Inherits from ThermoBaseClass and SpectraParserInterface - 926 """ - 927 - 928 def __init__( - 929 self, - 930 file_location, - 931 analyzer="Unknown", - 932 instrument_label="Unknown", - 933 sample_name=None, - 934 ): - 935 super().__init__(file_location) - 936 if isinstance(file_location, str): - 937 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed - 938 file_location = Path(file_location) - 939 if not file_location.exists(): - 940 raise FileExistsError("File does not exist: " + str(file_location)) - 941 - 942 self.file_location = file_location - 943 self.analyzer = analyzer - 944 self.instrument_label = instrument_label - 945 - 946 if sample_name: - 947 self.sample_name = sample_name - 948 else: - 949 self.sample_name = file_location.stem - 950 - 951 def load(self): - 952 pass - 953 - 954 def get_scan_df(self): - 955 # This automatically brings in all the data - 956 self.chromatogram_settings.scans = (-1, -1) - 957 - 958 # Get scan df info; starting with bulk ms1 and ms2 scans - 959 ms1_tic_data, _ = self.get_tic( - 960 ms_type="MS", peak_detection=False, smooth=False - 961 ) - 962 ms1_scan_dict = { - 963 "scan": ms1_tic_data.scans, - 964 "scan_time": ms1_tic_data.time, - 965 "tic": ms1_tic_data.tic, - 966 } - 967 ms1_tic_df = pd.DataFrame.from_dict(ms1_scan_dict) - 968 ms1_tic_df["ms_level"] = "ms1" - 969 - 970 ms2_tic_data, _ = self.get_tic( - 971 ms_type="MS2", peak_detection=False, smooth=False - 972 ) - 973 ms2_scan_dict = { - 974 "scan": ms2_tic_data.scans, - 975 "scan_time": ms2_tic_data.time, - 976 "tic": ms2_tic_data.tic, - 977 } - 978 ms2_tic_df = pd.DataFrame.from_dict(ms2_scan_dict) - 979 ms2_tic_df["ms_level"] = "ms2" - 980 - 981 scan_df = ( - 982 pd.concat([ms1_tic_df, ms2_tic_df], axis=0) - 983 .sort_values(by="scan") - 984 .reindex() - 985 ) - 986 - 987 # get scan text - 988 scan_filter_df = pd.DataFrame.from_dict( - 989 self.get_all_filters()[0], orient="index" - 990 ) - 991 scan_filter_df.reset_index(inplace=True) - 992 scan_filter_df.rename( - 993 columns={"index": "scan", 0: "scan_text"}, inplace=True - 994 ) + 890 return mass_spec + 891 + 892 + 893class ImportMassSpectraThermoMSFileReader(ThermoBaseClass, SpectraParserInterface): + 894 """A class for parsing Thermo RAW mass spectrometry data files and instatiating MassSpectraBase or LCMSBase objects + 895 + 896 Parameters + 897 ---------- + 898 file_location : str or Path + 899 The path to the RAW file to be parsed. + 900 analyzer : str, optional + 901 The type of mass analyzer used in the instrument. Default is "Unknown". + 902 instrument_label : str, optional + 903 The name of the instrument used to acquire the data. Default is "Unknown". + 904 sample_name : str, optional + 905 The name of the sample being analyzed. If not provided, the stem of the file_location path will be used. + 906 + 907 Attributes + 908 ---------- + 909 file_location : Path + 910 The path to the RAW file being parsed. + 911 analyzer : str + 912 The type of mass analyzer used in the instrument. + 913 instrument_label : str + 914 The name of the instrument used to acquire the data. + 915 sample_name : str + 916 The name of the sample being analyzed. + 917 + 918 Methods + 919 ------- + 920 * run(spectra=True). + 921 Parses the RAW file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe. + 922 * get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True) + 923 Parses the RAW file and returns a MassSpecBase object from a single scan. + 924 * get_mass_spectra_obj(). + 925 Parses the RAW file and instantiates a MassSpectraBase object. + 926 * get_lcms_obj(). + 927 Parses the RAW file and instantiates an LCMSBase object. + 928 * get_icr_transient_times(). + 929 Return a list for transient time targets for all scans, or selected scans range + 930 + 931 Inherits from ThermoBaseClass and SpectraParserInterface + 932 """ + 933 + 934 def __init__( + 935 self, + 936 file_location, + 937 analyzer="Unknown", + 938 instrument_label="Unknown", + 939 sample_name=None, + 940 ): + 941 super().__init__(file_location) + 942 if isinstance(file_location, str): + 943 # if obj is a string it defaults to create a Path obj, pass the S3Path if needed + 944 file_location = Path(file_location) + 945 if not file_location.exists(): + 946 raise FileExistsError("File does not exist: " + str(file_location)) + 947 + 948 self.file_location = file_location + 949 self.analyzer = analyzer + 950 self.instrument_label = instrument_label + 951 + 952 if sample_name: + 953 self.sample_name = sample_name + 954 else: + 955 self.sample_name = file_location.stem + 956 + 957 def load(self): + 958 pass + 959 + 960 def get_scan_df(self): + 961 # This automatically brings in all the data + 962 self.chromatogram_settings.scans = (-1, -1) + 963 + 964 # Get scan df info; starting with bulk ms1 and ms2 scans + 965 ms1_tic_data, _ = self.get_tic(ms_type="MS", peak_detection=False, smooth=False) + 966 ms1_scan_dict = { + 967 "scan": ms1_tic_data.scans, + 968 "scan_time": ms1_tic_data.time, + 969 "tic": ms1_tic_data.tic, + 970 } + 971 ms1_tic_df = pd.DataFrame.from_dict(ms1_scan_dict) + 972 ms1_tic_df["ms_level"] = "ms1" + 973 + 974 ms2_tic_data, _ = self.get_tic( + 975 ms_type="MS2", peak_detection=False, smooth=False + 976 ) + 977 ms2_scan_dict = { + 978 "scan": ms2_tic_data.scans, + 979 "scan_time": ms2_tic_data.time, + 980 "tic": ms2_tic_data.tic, + 981 } + 982 ms2_tic_df = pd.DataFrame.from_dict(ms2_scan_dict) + 983 ms2_tic_df["ms_level"] = "ms2" + 984 + 985 scan_df = ( + 986 pd.concat([ms1_tic_df, ms2_tic_df], axis=0).sort_values(by="scan").reindex() + 987 ) + 988 + 989 # get scan text + 990 scan_filter_df = pd.DataFrame.from_dict( + 991 self.get_all_filters()[0], orient="index" + 992 ) + 993 scan_filter_df.reset_index(inplace=True) + 994 scan_filter_df.rename(columns={"index": "scan", 0: "scan_text"}, inplace=True) 995 996 scan_df = scan_df.merge(scan_filter_df, on="scan", how="left") 997 scan_df["scan_window_lower"] = scan_df.scan_text.str.extract( @@ -1190,12 +1190,12 @@

    1014 scan_df.loc[scan_df.scan == i, "ms_format"] = "centroid" 1015 else: 1016 scan_df.loc[scan_df.scan == i, "ms_format"] = "profile" -1017 +1017 1018 return scan_df 1019 1020 def get_ms_raw(self, spectra, scan_df): 1021 if spectra == "all": -1022 scan_df_forspec = scan_df +1022 scan_df_forspec = scan_df 1023 elif spectra == "ms1": 1024 scan_df_forspec = scan_df[scan_df.ms_level == 1] 1025 elif spectra == "ms2": @@ -1218,310 +1218,306 @@

    1042 # First pass: get nrows 1043 N = defaultdict(lambda: 0) 1044 for i in scan_df_forspec.scan.to_list(): -1045 level = scan_df_forspec.loc[ -1046 scan_df_forspec.scan == i, "ms_level" -1047 ].values[0] -1048 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) -1049 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1050 i, scanStatistics -1051 ) -1052 abun = list(profileStream.Intensities) -1053 abun = np.array(abun)[np.where(np.array(abun) > 0)[0]] +1045 level = scan_df_forspec.loc[scan_df_forspec.scan == i, "ms_level"].values[0] +1046 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) +1047 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1048 i, scanStatistics +1049 ) +1050 abun = list(profileStream.Intensities) +1051 abun = np.array(abun)[np.where(np.array(abun) > 0)[0]] +1052 +1053 N[level] += len(abun) 1054 -1055 N[level] += len(abun) -1056 -1057 # Second pass: parse -1058 for i in scan_df_forspec.scan.to_list(): -1059 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) -1060 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1061 i, scanStatistics -1062 ) -1063 abun = list(profileStream.Intensities) -1064 mz = list(profileStream.Positions) -1065 -1066 # Get index of abun that are > 0 -1067 inx = np.where(np.array(abun) > 0)[0] -1068 mz = np.array(mz)[inx] -1069 mz = np.float32(mz) -1070 abun = np.array(abun)[inx] -1071 abun = np.float32(abun) +1055 # Second pass: parse +1056 for i in scan_df_forspec.scan.to_list(): +1057 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) +1058 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1059 i, scanStatistics +1060 ) +1061 abun = list(profileStream.Intensities) +1062 mz = list(profileStream.Positions) +1063 +1064 # Get index of abun that are > 0 +1065 inx = np.where(np.array(abun) > 0)[0] +1066 mz = np.array(mz)[inx] +1067 mz = np.float32(mz) +1068 abun = np.array(abun)[inx] +1069 abun = np.float32(abun) +1070 +1071 level = scan_df_forspec.loc[scan_df_forspec.scan == i, "ms_level"].values[0] 1072 -1073 level = scan_df_forspec.loc[ -1074 scan_df_forspec.scan == i, "ms_level" -1075 ].values[0] -1076 -1077 # Number of rows -1078 n = len(mz) +1073 # Number of rows +1074 n = len(mz) +1075 +1076 # No measurements +1077 if n == 0: +1078 continue 1079 -1080 # No measurements -1081 if n == 0: -1082 continue -1083 -1084 # Dimension check -1085 if len(mz) != len(abun): -1086 warnings.warn("m/z and intensity array dimension mismatch") -1087 continue -1088 -1089 # Scan/frame info -1090 id_dict = i +1080 # Dimension check +1081 if len(mz) != len(abun): +1082 warnings.warn("m/z and intensity array dimension mismatch") +1083 continue +1084 +1085 # Scan/frame info +1086 id_dict = i +1087 +1088 # Columns +1089 cols[level] = ["scan", "mz", "intensity"] +1090 m = len(cols[level]) 1091 -1092 # Columns -1093 cols[level] = ["scan", "mz", "intensity"] -1094 m = len(cols[level]) +1092 # Subarray init +1093 arr = np.empty((n, m), dtype=dtype) +1094 inx = 0 1095 -1096 # Subarray init -1097 arr = np.empty((n, m), dtype=dtype) -1098 inx = 0 +1096 # Populate scan/frame info +1097 arr[:, inx] = i +1098 inx += 1 1099 -1100 # Populate scan/frame info -1101 arr[:, inx] = i +1100 # Populate m/z +1101 arr[:, inx] = mz 1102 inx += 1 1103 -1104 # Populate m/z -1105 arr[:, inx] = mz +1104 # Populate intensity +1105 arr[:, inx] = abun 1106 inx += 1 1107 -1108 # Populate intensity -1109 arr[:, inx] = abun -1110 inx += 1 -1111 -1112 # Initialize output container -1113 if level not in res: -1114 res[level] = np.empty((N[level], m), dtype=dtype) -1115 counter[level] = 0 +1108 # Initialize output container +1109 if level not in res: +1110 res[level] = np.empty((N[level], m), dtype=dtype) +1111 counter[level] = 0 +1112 +1113 # Insert subarray +1114 res[level][counter[level] : counter[level] + n, :] = arr +1115 counter[level] += n 1116 -1117 # Insert subarray -1118 res[level][counter[level] : counter[level] + n, :] = arr -1119 counter[level] += n -1120 -1121 # Construct ms1 and ms2 mz dataframes -1122 for level in res.keys(): -1123 res[level] = pd.DataFrame(res[level]) -1124 res[level].columns = cols[level] -1125 # rename keys in res to add 'ms' prefix -1126 res = {f"ms{key}": value for key, value in res.items()} -1127 -1128 return res -1129 -1130 def run(self, spectra="all", scan_df=None): -1131 """ -1132 Extracts mass spectra data from a raw file. -1133 -1134 Parameters -1135 ---------- -1136 spectra : str, optional -1137 Which mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2. -1138 scan_df : pandas.DataFrame, optional -1139 Scan dataframe. If not provided, the scan dataframe is created from the mzML file. -1140 -1141 Returns -1142 ------- -1143 tuple -1144 A tuple containing two elements: -1145 - A dictionary containing mass spectra data, separated by MS level. -1146 - A pandas DataFrame containing scan information, including scan number, scan time, TIC, MS level, -1147 scan text, scan window lower and upper bounds, polarity, and precursor m/z (if applicable). -1148 """ -1149 # Prepare scan_df -1150 if scan_df is None: -1151 scan_df = self.get_scan_df() -1152 -1153 # Prepare mass spectra data -1154 if spectra != "none": -1155 res = self.get_ms_raw(spectra=spectra, scan_df=scan_df) -1156 else: -1157 res = None -1158 -1159 return res, scan_df -1160 -1161 def get_mass_spectrum_from_scan( -1162 self, scan_number, spectrum_mode, auto_process=True -1163 ): -1164 """Instatiate a MassSpecBase object from a single scan number from the binary file, currently only supports profile mode. -1165 -1166 Parameters -1167 ---------- -1168 scan_number : int -1169 The scan number to extract the mass spectrum from. -1170 polarity : int -1171 The polarity of the scan. 1 for positive mode, -1 for negative mode. -1172 spectrum_mode : str -1173 The type of mass spectrum to extract. Must be 'profile' or 'centroid'. -1174 auto_process : bool, optional -1175 If True, perform peak picking and noise threshold calculation after creating the mass spectrum object. Default is True. -1176 -1177 Returns -1178 ------- -1179 MassSpecProfile | MassSpecCentroid -1180 The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum. -1181 """ -1182 -1183 if spectrum_mode == "profile": -1184 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan_number) -1185 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1186 scan_number, scanStatistics -1187 ) -1188 abun = list(profileStream.Intensities) -1189 mz = list(profileStream.Positions) -1190 data_dict = { -1191 Labels.mz: mz, -1192 Labels.abundance: abun, -1193 } -1194 d_params = self.set_metadata( -1195 firstScanNumber=scan_number, -1196 lastScanNumber=scan_number, -1197 scans_list=False, -1198 label=Labels.thermo_profile, -1199 ) -1200 mass_spectrum_obj = MassSpecProfile( -1201 data_dict, d_params, auto_process=auto_process -1202 ) -1203 -1204 elif spectrum_mode == "centroid": -1205 centroid_scan = self.iRawDataPlus.GetCentroidStream(scan_number, False) -1206 if centroid_scan.Masses is not None: -1207 mz = list(centroid_scan.Masses) -1208 abun = list(centroid_scan.Intensities) -1209 rp = list(centroid_scan.Resolutions) -1210 magnitude = list(centroid_scan.Intensities) -1211 noise = list(centroid_scan.Noises) -1212 baselines = list(centroid_scan.Baselines) -1213 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 -1214 l_signal_to_noise = np.array(magnitude) / array_noise_std -1215 data_dict = { -1216 Labels.mz: mz, -1217 Labels.abundance: abun, -1218 Labels.rp: rp, -1219 Labels.s2n: list(l_signal_to_noise), -1220 } -1221 else: # For CID MS2, the centroid data are stored in the profile data location, they do not have any associated rp or baseline data, but they should be treated as centroid data -1222 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( -1223 scan_number -1224 ) -1225 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1226 scan_number, scanStatistics -1227 ) -1228 abun = list(profileStream.Intensities) -1229 mz = list(profileStream.Positions) -1230 data_dict = { -1231 Labels.mz: mz, -1232 Labels.abundance: abun, -1233 Labels.rp: [np.nan] * len(mz), -1234 Labels.s2n: [np.nan] * len(mz), -1235 } -1236 d_params = self.set_metadata( -1237 firstScanNumber=scan_number, -1238 lastScanNumber=scan_number, -1239 scans_list=False, -1240 label=Labels.thermo_centroid, -1241 ) -1242 mass_spectrum_obj = MassSpecCentroid( -1243 data_dict, d_params, auto_process=auto_process -1244 ) -1245 -1246 return mass_spectrum_obj -1247 -1248 def get_mass_spectra_obj(self): -1249 """Instatiate a MassSpectraBase object from the binary data file file. -1250 -1251 Returns -1252 ------- -1253 MassSpectraBase -1254 The MassSpectra object containing the parsed mass spectra. The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe. -1255 """ -1256 _, scan_df = self.run(spectra="none") -1257 mass_spectra_obj = MassSpectraBase( -1258 self.file_location, -1259 self.analyzer, -1260 self.instrument_label, -1261 self.sample_name, -1262 self, -1263 ) -1264 scan_df = scan_df.set_index("scan", drop=False) -1265 mass_spectra_obj.scan_df = scan_df -1266 -1267 return mass_spectra_obj -1268 -1269 def get_lcms_obj(self, spectra="all"): -1270 """Instatiates a LCMSBase object from the mzML file. -1271 -1272 Parameters -1273 ---------- -1274 verbose : bool, optional -1275 If True, print progress messages. Default is True. -1276 spectra : str, optional -1277 Which mass spectra data to include in the output. Default is "all". Other options: "none", "ms1", "ms2". -1278 -1279 Returns -1280 ------- -1281 LCMSBase -1282 LCMS object containing mass spectra data. The object is instatiated with the file location, analyzer, instrument, sample name, scan info, mz dataframe (as specifified), polarity, as well as the attributes holding the scans, retention times, and tics. -1283 """ -1284 _, scan_df = self.run(spectra="none") # first run it to just get scan info -1285 res, scan_df = self.run( -1286 scan_df=scan_df, spectra=spectra -1287 ) # second run to parse data -1288 lcms_obj = LCMSBase( -1289 self.file_location, -1290 self.analyzer, -1291 self.instrument_label, -1292 self.sample_name, -1293 self, -1294 ) -1295 if spectra != "none": -1296 for key in res: -1297 key_int = int(key.replace("ms", "")) -1298 res[key] = res[key][res[key].intensity > 0] -1299 res[key] = ( -1300 res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True) -1301 ) -1302 lcms_obj._ms_unprocessed[key_int] = res[key] -1303 lcms_obj.scan_df = scan_df.set_index("scan", drop=False) -1304 # Check if polarity is mixed -1305 if len(set(scan_df.polarity)) > 1: -1306 raise ValueError("Mixed polarities detected in scan data") -1307 lcms_obj.polarity = scan_df.polarity[0] -1308 lcms_obj._scans_number_list = list(scan_df.scan) -1309 lcms_obj._retention_time_list = list(scan_df.scan_time) -1310 lcms_obj._tic_list = list(scan_df.tic) -1311 -1312 return lcms_obj -1313 -1314 def get_icr_transient_times(self): -1315 """Return a list for transient time targets for all scans, or selected scans range -1316 -1317 Notes -1318 -------- -1319 Resolving Power and Transient time targets based on 7T FT-ICR MS system -1320 """ -1321 -1322 res_trans_time = { -1323 "50": 0.384, -1324 "100000": 0.768, -1325 "200000": 1.536, -1326 "400000": 3.072, -1327 "750000": 6.144, -1328 "1000000": 12.288, -1329 } +1117 # Construct ms1 and ms2 mz dataframes +1118 for level in res.keys(): +1119 res[level] = pd.DataFrame(res[level]) +1120 res[level].columns = cols[level] +1121 # rename keys in res to add 'ms' prefix +1122 res = {f"ms{key}": value for key, value in res.items()} +1123 +1124 return res +1125 +1126 def run(self, spectra="all", scan_df=None): +1127 """ +1128 Extracts mass spectra data from a raw file. +1129 +1130 Parameters +1131 ---------- +1132 spectra : str, optional +1133 Which mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2. +1134 scan_df : pandas.DataFrame, optional +1135 Scan dataframe. If not provided, the scan dataframe is created from the mzML file. +1136 +1137 Returns +1138 ------- +1139 tuple +1140 A tuple containing two elements: +1141 - A dictionary containing mass spectra data, separated by MS level. +1142 - A pandas DataFrame containing scan information, including scan number, scan time, TIC, MS level, +1143 scan text, scan window lower and upper bounds, polarity, and precursor m/z (if applicable). +1144 """ +1145 # Prepare scan_df +1146 if scan_df is None: +1147 scan_df = self.get_scan_df() +1148 +1149 # Prepare mass spectra data +1150 if spectra != "none": +1151 res = self.get_ms_raw(spectra=spectra, scan_df=scan_df) +1152 else: +1153 res = None +1154 +1155 return res, scan_df +1156 +1157 def get_mass_spectrum_from_scan( +1158 self, scan_number, spectrum_mode, auto_process=True +1159 ): +1160 """Instatiate a MassSpecBase object from a single scan number from the binary file, currently only supports profile mode. +1161 +1162 Parameters +1163 ---------- +1164 scan_number : int +1165 The scan number to extract the mass spectrum from. +1166 polarity : int +1167 The polarity of the scan. 1 for positive mode, -1 for negative mode. +1168 spectrum_mode : str +1169 The type of mass spectrum to extract. Must be 'profile' or 'centroid'. +1170 auto_process : bool, optional +1171 If True, perform peak picking and noise threshold calculation after creating the mass spectrum object. Default is True. +1172 +1173 Returns +1174 ------- +1175 MassSpecProfile | MassSpecCentroid +1176 The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum. +1177 """ +1178 +1179 if spectrum_mode == "profile": +1180 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan_number) +1181 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1182 scan_number, scanStatistics +1183 ) +1184 abun = list(profileStream.Intensities) +1185 mz = list(profileStream.Positions) +1186 data_dict = { +1187 Labels.mz: mz, +1188 Labels.abundance: abun, +1189 } +1190 d_params = self.set_metadata( +1191 firstScanNumber=scan_number, +1192 lastScanNumber=scan_number, +1193 scans_list=False, +1194 label=Labels.thermo_profile, +1195 ) +1196 mass_spectrum_obj = MassSpecProfile( +1197 data_dict, d_params, auto_process=auto_process +1198 ) +1199 +1200 elif spectrum_mode == "centroid": +1201 centroid_scan = self.iRawDataPlus.GetCentroidStream(scan_number, False) +1202 if centroid_scan.Masses is not None: +1203 mz = list(centroid_scan.Masses) +1204 abun = list(centroid_scan.Intensities) +1205 rp = list(centroid_scan.Resolutions) +1206 magnitude = list(centroid_scan.Intensities) +1207 noise = list(centroid_scan.Noises) +1208 baselines = list(centroid_scan.Baselines) +1209 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 +1210 l_signal_to_noise = np.array(magnitude) / array_noise_std +1211 data_dict = { +1212 Labels.mz: mz, +1213 Labels.abundance: abun, +1214 Labels.rp: rp, +1215 Labels.s2n: list(l_signal_to_noise), +1216 } +1217 else: # For CID MS2, the centroid data are stored in the profile data location, they do not have any associated rp or baseline data, but they should be treated as centroid data +1218 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( +1219 scan_number +1220 ) +1221 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1222 scan_number, scanStatistics +1223 ) +1224 abun = list(profileStream.Intensities) +1225 mz = list(profileStream.Positions) +1226 data_dict = { +1227 Labels.mz: mz, +1228 Labels.abundance: abun, +1229 Labels.rp: [np.nan] * len(mz), +1230 Labels.s2n: [np.nan] * len(mz), +1231 } +1232 d_params = self.set_metadata( +1233 firstScanNumber=scan_number, +1234 lastScanNumber=scan_number, +1235 scans_list=False, +1236 label=Labels.thermo_centroid, +1237 ) +1238 mass_spectrum_obj = MassSpecCentroid( +1239 data_dict, d_params, auto_process=auto_process +1240 ) +1241 +1242 return mass_spectrum_obj +1243 +1244 def get_mass_spectra_obj(self): +1245 """Instatiate a MassSpectraBase object from the binary data file file. +1246 +1247 Returns +1248 ------- +1249 MassSpectraBase +1250 The MassSpectra object containing the parsed mass spectra. The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe. +1251 """ +1252 _, scan_df = self.run(spectra="none") +1253 mass_spectra_obj = MassSpectraBase( +1254 self.file_location, +1255 self.analyzer, +1256 self.instrument_label, +1257 self.sample_name, +1258 self, +1259 ) +1260 scan_df = scan_df.set_index("scan", drop=False) +1261 mass_spectra_obj.scan_df = scan_df +1262 +1263 return mass_spectra_obj +1264 +1265 def get_lcms_obj(self, spectra="all"): +1266 """Instatiates a LCMSBase object from the mzML file. +1267 +1268 Parameters +1269 ---------- +1270 verbose : bool, optional +1271 If True, print progress messages. Default is True. +1272 spectra : str, optional +1273 Which mass spectra data to include in the output. Default is "all". Other options: "none", "ms1", "ms2". +1274 +1275 Returns +1276 ------- +1277 LCMSBase +1278 LCMS object containing mass spectra data. The object is instatiated with the file location, analyzer, instrument, sample name, scan info, mz dataframe (as specifified), polarity, as well as the attributes holding the scans, retention times, and tics. +1279 """ +1280 _, scan_df = self.run(spectra="none") # first run it to just get scan info +1281 res, scan_df = self.run( +1282 scan_df=scan_df, spectra=spectra +1283 ) # second run to parse data +1284 lcms_obj = LCMSBase( +1285 self.file_location, +1286 self.analyzer, +1287 self.instrument_label, +1288 self.sample_name, +1289 self, +1290 ) +1291 if spectra != "none": +1292 for key in res: +1293 key_int = int(key.replace("ms", "")) +1294 res[key] = res[key][res[key].intensity > 0] +1295 res[key] = ( +1296 res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True) +1297 ) +1298 lcms_obj._ms_unprocessed[key_int] = res[key] +1299 lcms_obj.scan_df = scan_df.set_index("scan", drop=False) +1300 # Check if polarity is mixed +1301 if len(set(scan_df.polarity)) > 1: +1302 raise ValueError("Mixed polarities detected in scan data") +1303 lcms_obj.polarity = scan_df.polarity[0] +1304 lcms_obj._scans_number_list = list(scan_df.scan) +1305 lcms_obj._retention_time_list = list(scan_df.scan_time) +1306 lcms_obj._tic_list = list(scan_df.tic) +1307 +1308 return lcms_obj +1309 +1310 def get_icr_transient_times(self): +1311 """Return a list for transient time targets for all scans, or selected scans range +1312 +1313 Notes +1314 -------- +1315 Resolving Power and Transient time targets based on 7T FT-ICR MS system +1316 """ +1317 +1318 res_trans_time = { +1319 "50": 0.384, +1320 "100000": 0.768, +1321 "200000": 1.536, +1322 "400000": 3.072, +1323 "750000": 6.144, +1324 "1000000": 12.288, +1325 } +1326 +1327 firstScanNumber = self.start_scan +1328 +1329 lastScanNumber = self.end_scan 1330 -1331 firstScanNumber = self.start_scan +1331 transient_time_list = [] 1332 -1333 lastScanNumber = self.end_scan -1334 -1335 transient_time_list = [] -1336 -1337 for scan in range(firstScanNumber, lastScanNumber): -1338 scan_header = self.get_scan_header(scan) +1333 for scan in range(firstScanNumber, lastScanNumber): +1334 scan_header = self.get_scan_header(scan) +1335 +1336 rp_target = scan_header["FT Resolution:"] +1337 +1338 transient_time = res_trans_time.get(rp_target) 1339 -1340 rp_target = scan_header["FT Resolution:"] +1340 transient_time_list.append(transient_time) 1341 -1342 transient_time = res_trans_time.get(rp_target) +1342 # print(transient_time, rp_target) 1343 -1344 transient_time_list.append(transient_time) -1345 -1346 # print(transient_time, rp_target) -1347 -1348 return transient_time_list +1344 return transient_time_list

    @@ -1983,388 +1979,394 @@

    501 # plt.show() 502 503 def get_tic( -504 self, ms_type="MS !d", peak_detection=True, smooth=True, plot=False, ax=None,trace_type='TIC', -505 ) -> Tuple[TIC_Data, axes.Axes]: -506 """ms_type: str ('MS !d', 'MS2', None) -507 if you use None you get all scans. -508 peak_detection: bool -509 smooth: bool -510 plot: bool -511 ax: matplotlib axis object -512 trace_type: str ('TIC','BPC') -513 -514 returns: -515 chroma: dict -516 { -517 Scan: [int] -518 original thermo scan numberMS -519 Time: [floats] -520 list of retention times -521 TIC: [floats] -522 total ion chromatogram -523 Apexes: [int] -524 original thermo apex scan number after peak picking -525 } -526 """ -527 if trace_type == 'TIC': -528 settings = ChromatogramTraceSettings(TraceType.TIC) -529 elif trace_type == 'BPC': -530 settings = ChromatogramTraceSettings(TraceType.BasePeak) -531 else: -532 raise ValueError(f'{trace_type} undefined') -533 if ms_type == "all": -534 settings.Filter = None -535 else: -536 settings.Filter = ms_type -537 -538 chroma_settings = IChromatogramSettings(settings) -539 -540 data = self.iRawDataPlus.GetChromatogramData( -541 [chroma_settings], self.start_scan, self.end_scan -542 ) +504 self, +505 ms_type="MS !d", +506 peak_detection=True, +507 smooth=True, +508 plot=False, +509 ax=None, +510 trace_type="TIC", +511 ) -> Tuple[TIC_Data, axes.Axes]: +512 """ms_type: str ('MS !d', 'MS2', None) +513 if you use None you get all scans. +514 peak_detection: bool +515 smooth: bool +516 plot: bool +517 ax: matplotlib axis object +518 trace_type: str ('TIC','BPC') +519 +520 returns: +521 chroma: dict +522 { +523 Scan: [int] +524 original thermo scan numberMS +525 Time: [floats] +526 list of retention times +527 TIC: [floats] +528 total ion chromatogram +529 Apexes: [int] +530 original thermo apex scan number after peak picking +531 } +532 """ +533 if trace_type == "TIC": +534 settings = ChromatogramTraceSettings(TraceType.TIC) +535 elif trace_type == "BPC": +536 settings = ChromatogramTraceSettings(TraceType.BasePeak) +537 else: +538 raise ValueError(f"{trace_type} undefined") +539 if ms_type == "all": +540 settings.Filter = None +541 else: +542 settings.Filter = ms_type 543 -544 trace = ChromatogramSignal.FromChromatogramData(data) +544 chroma_settings = IChromatogramSettings(settings) 545 -546 data = TIC_Data(time=[], scans=[], tic=[], bpc=[], apexes=[]) -547 -548 if trace[0].Length > 0: -549 for i in range(trace[0].Length): -550 # print(trace[0].HasBasePeakData,trace[0].EndTime ) +546 data = self.iRawDataPlus.GetChromatogramData( +547 [chroma_settings], self.start_scan, self.end_scan +548 ) +549 +550 trace = ChromatogramSignal.FromChromatogramData(data) 551 -552 # print(" {} - {}, {}".format( i, trace[0].Times[i], trace[0].Intensities[i] )) -553 data.time.append(trace[0].Times[i]) -554 data.tic.append(trace[0].Intensities[i]) -555 data.scans.append(trace[0].Scans[i]) -556 -557 # print(trace[0].Scans[i]) -558 if smooth: -559 data.tic = self.smooth_tic(data.tic) -560 -561 else: -562 data.tic = np.array(data.tic) -563 -564 if peak_detection: -565 centroid_peak_indexes = [ -566 i for i in self.centroid_detector(data.time, data.tic) -567 ] -568 -569 data.apexes = centroid_peak_indexes -570 -571 if plot: -572 if not ax: -573 import matplotlib.pyplot as plt +552 data = TIC_Data(time=[], scans=[], tic=[], bpc=[], apexes=[]) +553 +554 if trace[0].Length > 0: +555 for i in range(trace[0].Length): +556 # print(trace[0].HasBasePeakData,trace[0].EndTime ) +557 +558 # print(" {} - {}, {}".format( i, trace[0].Times[i], trace[0].Intensities[i] )) +559 data.time.append(trace[0].Times[i]) +560 data.tic.append(trace[0].Intensities[i]) +561 data.scans.append(trace[0].Scans[i]) +562 +563 # print(trace[0].Scans[i]) +564 if smooth: +565 data.tic = self.smooth_tic(data.tic) +566 +567 else: +568 data.tic = np.array(data.tic) +569 +570 if peak_detection: +571 centroid_peak_indexes = [ +572 i for i in self.centroid_detector(data.time, data.tic) +573 ] 574 -575 ax = plt.gca() -576 # fig, ax = plt.subplots(figsize=(6, 3)) -577 -578 ax.plot(data.time, data.tic, label=trace_type) -579 ax.set_xlabel("Time (min)") -580 ax.set_ylabel("a.u.") -581 if peak_detection: -582 for peak_indexes in data.apexes: -583 apex_index = peak_indexes[1] -584 ax.plot( -585 data.time[apex_index], -586 data.tic[apex_index], -587 marker="x", -588 linewidth=0, -589 ) -590 -591 # plt.show() -592 if trace_type == 'BPC': -593 data.bpc = data.tic -594 data.tic = [] -595 return data, ax -596 if trace_type == 'BPC': -597 data.bpc = data.tic -598 data.tic = [] -599 return data, None -600 -601 else: -602 return None, None -603 -604 def get_average_mass_spectrum( -605 self, -606 spectrum_mode: str = "profile", -607 auto_process: bool = True, -608 ppm_tolerance: float = 5.0, -609 ms_type: str = "MS1", -610 ) -> MassSpecProfile | MassSpecCentroid: -611 """ -612 Averages mass spectra over a scan range using Thermo's AverageScansInScanRange method -613 or a scan list using Thermo's AverageScans method -614 spectrum_mode: str -615 centroid or profile mass spectrum -616 auto_process: bool -617 If true performs peak picking, and noise threshold calculation after creation of mass spectrum object -618 ms_type: str -619 String of form 'ms1' or 'ms2' or 'MS3' etc. Valid up to MS10. -620 Internal function converts to Thermo MSOrderType class. -621 -622 """ -623 -624 def get_profile_mass_spec(averageScan, d_params: dict, auto_process: bool): -625 mz_list = list(averageScan.SegmentedScan.Positions) -626 abund_list = list(averageScan.SegmentedScan.Intensities) +575 data.apexes = centroid_peak_indexes +576 +577 if plot: +578 if not ax: +579 import matplotlib.pyplot as plt +580 +581 ax = plt.gca() +582 # fig, ax = plt.subplots(figsize=(6, 3)) +583 +584 ax.plot(data.time, data.tic, label=trace_type) +585 ax.set_xlabel("Time (min)") +586 ax.set_ylabel("a.u.") +587 if peak_detection: +588 for peak_indexes in data.apexes: +589 apex_index = peak_indexes[1] +590 ax.plot( +591 data.time[apex_index], +592 data.tic[apex_index], +593 marker="x", +594 linewidth=0, +595 ) +596 +597 # plt.show() +598 if trace_type == "BPC": +599 data.bpc = data.tic +600 data.tic = [] +601 return data, ax +602 if trace_type == "BPC": +603 data.bpc = data.tic +604 data.tic = [] +605 return data, None +606 +607 else: +608 return None, None +609 +610 def get_average_mass_spectrum( +611 self, +612 spectrum_mode: str = "profile", +613 auto_process: bool = True, +614 ppm_tolerance: float = 5.0, +615 ms_type: str = "MS1", +616 ) -> MassSpecProfile | MassSpecCentroid: +617 """ +618 Averages mass spectra over a scan range using Thermo's AverageScansInScanRange method +619 or a scan list using Thermo's AverageScans method +620 spectrum_mode: str +621 centroid or profile mass spectrum +622 auto_process: bool +623 If true performs peak picking, and noise threshold calculation after creation of mass spectrum object +624 ms_type: str +625 String of form 'ms1' or 'ms2' or 'MS3' etc. Valid up to MS10. +626 Internal function converts to Thermo MSOrderType class. 627 -628 data_dict = { -629 Labels.mz: mz_list, -630 Labels.abundance: abund_list, -631 } -632 -633 return MassSpecProfile(data_dict, d_params, auto_process=auto_process) -634 -635 def get_centroid_mass_spec(averageScan, d_params: dict): -636 noise = list(averageScan.centroidScan.Noises) -637 -638 baselines = list(averageScan.centroidScan.Baselines) -639 -640 rp = list(averageScan.centroidScan.Resolutions) -641 -642 magnitude = list(averageScan.centroidScan.Intensities) +628 """ +629 +630 def get_profile_mass_spec(averageScan, d_params: dict, auto_process: bool): +631 mz_list = list(averageScan.SegmentedScan.Positions) +632 abund_list = list(averageScan.SegmentedScan.Intensities) +633 +634 data_dict = { +635 Labels.mz: mz_list, +636 Labels.abundance: abund_list, +637 } +638 +639 return MassSpecProfile(data_dict, d_params, auto_process=auto_process) +640 +641 def get_centroid_mass_spec(averageScan, d_params: dict): +642 noise = list(averageScan.centroidScan.Noises) 643 -644 mz = list(averageScan.centroidScan.Masses) +644 baselines = list(averageScan.centroidScan.Baselines) 645 -646 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 -647 l_signal_to_noise = np.array(magnitude) / array_noise_std -648 -649 d_params["baseline_noise"] = np.average(array_noise_std) -650 -651 d_params["baseline_noise_std"] = np.std(array_noise_std) -652 -653 data_dict = { -654 Labels.mz: mz, -655 Labels.abundance: magnitude, -656 Labels.rp: rp, -657 Labels.s2n: list(l_signal_to_noise), -658 } -659 -660 mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False) -661 -662 return mass_spec -663 -664 d_params = self.set_metadata( -665 firstScanNumber=self.start_scan, lastScanNumber=self.end_scan -666 ) +646 rp = list(averageScan.centroidScan.Resolutions) +647 +648 magnitude = list(averageScan.centroidScan.Intensities) +649 +650 mz = list(averageScan.centroidScan.Masses) +651 +652 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 +653 l_signal_to_noise = np.array(magnitude) / array_noise_std +654 +655 d_params["baseline_noise"] = np.average(array_noise_std) +656 +657 d_params["baseline_noise_std"] = np.std(array_noise_std) +658 +659 data_dict = { +660 Labels.mz: mz, +661 Labels.abundance: magnitude, +662 Labels.rp: rp, +663 Labels.s2n: list(l_signal_to_noise), +664 } +665 +666 mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False) 667 -668 # Create the mass options object that will be used when averaging the scans -669 options = MassOptions() -670 options.ToleranceUnits = ToleranceUnits.ppm -671 options.Tolerance = ppm_tolerance -672 -673 # Get the scan filter for the first scan. This scan filter will be used to located -674 # scans within the given scan range of the same type -675 scanFilter = self.iRawDataPlus.GetFilterForScanNumber(self.start_scan) -676 -677 # force it to only look for the MSType -678 scanFilter = self.set_msordertype(scanFilter, ms_type) -679 -680 if isinstance(self.scans, tuple): -681 averageScan = Extensions.AverageScansInScanRange( -682 self.iRawDataPlus, self.start_scan, self.end_scan, scanFilter, options -683 ) -684 -685 if averageScan: -686 if spectrum_mode == "profile": -687 mass_spec = get_profile_mass_spec( -688 averageScan, d_params, auto_process -689 ) +668 return mass_spec +669 +670 d_params = self.set_metadata( +671 firstScanNumber=self.start_scan, lastScanNumber=self.end_scan +672 ) +673 +674 # Create the mass options object that will be used when averaging the scans +675 options = MassOptions() +676 options.ToleranceUnits = ToleranceUnits.ppm +677 options.Tolerance = ppm_tolerance +678 +679 # Get the scan filter for the first scan. This scan filter will be used to located +680 # scans within the given scan range of the same type +681 scanFilter = self.iRawDataPlus.GetFilterForScanNumber(self.start_scan) +682 +683 # force it to only look for the MSType +684 scanFilter = self.set_msordertype(scanFilter, ms_type) +685 +686 if isinstance(self.scans, tuple): +687 averageScan = Extensions.AverageScansInScanRange( +688 self.iRawDataPlus, self.start_scan, self.end_scan, scanFilter, options +689 ) 690 -691 return mass_spec -692 -693 elif spectrum_mode == "centroid": -694 if averageScan.HasCentroidStream: -695 mass_spec = get_centroid_mass_spec(averageScan, d_params) +691 if averageScan: +692 if spectrum_mode == "profile": +693 mass_spec = get_profile_mass_spec( +694 averageScan, d_params, auto_process +695 ) 696 -697 return mass_spec +697 return mass_spec 698 -699 else: -700 raise ValueError( -701 "No Centroind data available for the selected scans" -702 ) -703 else: -704 raise ValueError("spectrum_mode must be 'profile' or centroid") -705 else: -706 raise ValueError("No data found for the selected scans") -707 -708 elif isinstance(self.scans, list): -709 d_params = self.set_metadata(scans_list=self.scans) -710 -711 scans = List[int]() -712 for scan in self.scans: -713 scans.Add(scan) -714 -715 averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options) +699 elif spectrum_mode == "centroid": +700 if averageScan.HasCentroidStream: +701 mass_spec = get_centroid_mass_spec(averageScan, d_params) +702 +703 return mass_spec +704 +705 else: +706 raise ValueError( +707 "No Centroind data available for the selected scans" +708 ) +709 else: +710 raise ValueError("spectrum_mode must be 'profile' or centroid") +711 else: +712 raise ValueError("No data found for the selected scans") +713 +714 elif isinstance(self.scans, list): +715 d_params = self.set_metadata(scans_list=self.scans) 716 -717 if averageScan: -718 if spectrum_mode == "profile": -719 mass_spec = get_profile_mass_spec( -720 averageScan, d_params, auto_process -721 ) +717 scans = List[int]() +718 for scan in self.scans: +719 scans.Add(scan) +720 +721 averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options) 722 -723 return mass_spec -724 -725 elif spectrum_mode == "centroid": -726 if averageScan.HasCentroidStream: -727 mass_spec = get_centroid_mass_spec(averageScan, d_params) +723 if averageScan: +724 if spectrum_mode == "profile": +725 mass_spec = get_profile_mass_spec( +726 averageScan, d_params, auto_process +727 ) 728 -729 return mass_spec +729 return mass_spec 730 -731 else: -732 raise ValueError( -733 "No Centroind data available for the selected scans" -734 ) -735 -736 else: -737 raise ValueError("spectrum_mode must be 'profile' or centroid") -738 -739 else: -740 raise ValueError("No data found for the selected scans") +731 elif spectrum_mode == "centroid": +732 if averageScan.HasCentroidStream: +733 mass_spec = get_centroid_mass_spec(averageScan, d_params) +734 +735 return mass_spec +736 +737 else: +738 raise ValueError( +739 "No Centroind data available for the selected scans" +740 ) 741 -742 else: -743 raise ValueError("scans must be a list intergers or a tuple if integers") +742 else: +743 raise ValueError("spectrum_mode must be 'profile' or centroid") 744 -745 def set_metadata( -746 self, -747 firstScanNumber=0, -748 lastScanNumber=0, -749 scans_list=False, -750 label=Labels.thermo_profile, -751 ): -752 """ -753 Collect metadata to be ingested in the mass spectrum object -754 -755 scans_list: list[int] or false -756 lastScanNumber: int -757 firstScanNumber: int -758 """ -759 -760 d_params = default_parameters(self.file_path) -761 -762 # assumes scans is full scan or reduced profile scan -763 -764 d_params["label"] = label +745 else: +746 raise ValueError("No data found for the selected scans") +747 +748 else: +749 raise ValueError("scans must be a list intergers or a tuple if integers") +750 +751 def set_metadata( +752 self, +753 firstScanNumber=0, +754 lastScanNumber=0, +755 scans_list=False, +756 label=Labels.thermo_profile, +757 ): +758 """ +759 Collect metadata to be ingested in the mass spectrum object +760 +761 scans_list: list[int] or false +762 lastScanNumber: int +763 firstScanNumber: int +764 """ 765 -766 if scans_list: -767 d_params["scan_number"] = scans_list -768 -769 d_params["polarity"] = self.get_polarity_mode(scans_list[0]) -770 -771 else: -772 d_params["scan_number"] = "{}-{}".format(firstScanNumber, lastScanNumber) -773 -774 d_params["polarity"] = self.get_polarity_mode(firstScanNumber) -775 -776 d_params["analyzer"] = self.iRawDataPlus.GetInstrumentData().Model -777 -778 d_params["acquisition_time"] = self.get_creation_time() +766 d_params = default_parameters(self.file_path) +767 +768 # assumes scans is full scan or reduced profile scan +769 +770 d_params["label"] = label +771 +772 if scans_list: +773 d_params["scan_number"] = scans_list +774 +775 d_params["polarity"] = self.get_polarity_mode(scans_list[0]) +776 +777 else: +778 d_params["scan_number"] = "{}-{}".format(firstScanNumber, lastScanNumber) 779 -780 d_params["instrument_label"] = self.iRawDataPlus.GetInstrumentData().Name +780 d_params["polarity"] = self.get_polarity_mode(firstScanNumber) 781 -782 return d_params +782 d_params["analyzer"] = self.iRawDataPlus.GetInstrumentData().Model 783 -784 def get_centroid_msms_data(self, scan): -785 """ -786 .. deprecated:: 2.0 -787 This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality. -788 """ +784 d_params["acquisition_time"] = self.get_creation_time() +785 +786 d_params["instrument_label"] = self.iRawDataPlus.GetInstrumentData().Name +787 +788 return d_params 789 -790 warnings.warn( -791 "The `get_centroid_msms_data()` is deprecated as of CoreMS 2.0 and will be removed in a future version. " -792 "Please use `get_average_mass_spectrum()` instead.", -793 DeprecationWarning, -794 ) +790 def get_centroid_msms_data(self, scan): +791 """ +792 .. deprecated:: 2.0 +793 This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality. +794 """ 795 -796 d_params = self.set_metadata(scans_list=[scan], label=Labels.thermo_centroid) -797 -798 centroidStream = self.iRawDataPlus.GetCentroidStream(scan, False) -799 -800 noise = list(centroidStream.Noises) +796 warnings.warn( +797 "The `get_centroid_msms_data()` is deprecated as of CoreMS 2.0 and will be removed in a future version. " +798 "Please use `get_average_mass_spectrum()` instead.", +799 DeprecationWarning, +800 ) 801 -802 baselines = list(centroidStream.Baselines) +802 d_params = self.set_metadata(scans_list=[scan], label=Labels.thermo_centroid) 803 -804 rp = list(centroidStream.Resolutions) +804 centroidStream = self.iRawDataPlus.GetCentroidStream(scan, False) 805 -806 magnitude = list(centroidStream.Intensities) +806 noise = list(centroidStream.Noises) 807 -808 mz = list(centroidStream.Masses) +808 baselines = list(centroidStream.Baselines) 809 -810 # charge = scans_labels[5] -811 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 -812 l_signal_to_noise = np.array(magnitude) / array_noise_std +810 rp = list(centroidStream.Resolutions) +811 +812 magnitude = list(centroidStream.Intensities) 813 -814 d_params["baseline_noise"] = np.average(array_noise_std) +814 mz = list(centroidStream.Masses) 815 -816 d_params["baseline_noise_std"] = np.std(array_noise_std) -817 -818 data_dict = { -819 Labels.mz: mz, -820 Labels.abundance: magnitude, -821 Labels.rp: rp, -822 Labels.s2n: list(l_signal_to_noise), -823 } -824 -825 mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False) -826 mass_spec.settings.noise_threshold_method = "relative_abundance" -827 mass_spec.settings.noise_threshold_min_relative_abundance = 1 -828 mass_spec.process_mass_spec() -829 return mass_spec +816 # charge = scans_labels[5] +817 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 +818 l_signal_to_noise = np.array(magnitude) / array_noise_std +819 +820 d_params["baseline_noise"] = np.average(array_noise_std) +821 +822 d_params["baseline_noise_std"] = np.std(array_noise_std) +823 +824 data_dict = { +825 Labels.mz: mz, +826 Labels.abundance: magnitude, +827 Labels.rp: rp, +828 Labels.s2n: list(l_signal_to_noise), +829 } 830 -831 def get_average_mass_spectrum_by_scanlist( -832 self, -833 scans_list: List[int], -834 auto_process: bool = True, -835 ppm_tolerance: float = 5.0, -836 ) -> MassSpecProfile: -837 """ -838 Averages selected scans mass spectra using Thermo's AverageScans method -839 scans_list: list[int] -840 auto_process: bool -841 If true performs peak picking, and noise threshold calculation after creation of mass spectrum object -842 Returns: -843 MassSpecProfile -844 -845 .. deprecated:: 2.0 -846 This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality. -847 """ -848 -849 warnings.warn( -850 "The `get_average_mass_spectrum_by_scanlist()` is deprecated as of CoreMS 2.0 and will be removed in a future version. " -851 "Please use `get_average_mass_spectrum()` instead.", -852 DeprecationWarning, -853 ) +831 mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False) +832 mass_spec.settings.noise_threshold_method = "relative_abundance" +833 mass_spec.settings.noise_threshold_min_relative_abundance = 1 +834 mass_spec.process_mass_spec() +835 return mass_spec +836 +837 def get_average_mass_spectrum_by_scanlist( +838 self, +839 scans_list: List[int], +840 auto_process: bool = True, +841 ppm_tolerance: float = 5.0, +842 ) -> MassSpecProfile: +843 """ +844 Averages selected scans mass spectra using Thermo's AverageScans method +845 scans_list: list[int] +846 auto_process: bool +847 If true performs peak picking, and noise threshold calculation after creation of mass spectrum object +848 Returns: +849 MassSpecProfile +850 +851 .. deprecated:: 2.0 +852 This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality. +853 """ 854 -855 d_params = self.set_metadata(scans_list=scans_list) -856 -857 # assumes scans is full scan or reduced profile scan -858 -859 scans = List[int]() -860 for scan in scans_list: -861 scans.Add(scan) +855 warnings.warn( +856 "The `get_average_mass_spectrum_by_scanlist()` is deprecated as of CoreMS 2.0 and will be removed in a future version. " +857 "Please use `get_average_mass_spectrum()` instead.", +858 DeprecationWarning, +859 ) +860 +861 d_params = self.set_metadata(scans_list=scans_list) 862 -863 # Create the mass options object that will be used when averaging the scans -864 options = MassOptions() -865 options.ToleranceUnits = ToleranceUnits.ppm -866 options.Tolerance = ppm_tolerance -867 -868 # Get the scan filter for the first scan. This scan filter will be used to located -869 # scans within the given scan range of the same type -870 -871 averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options) -872 -873 len_data = averageScan.SegmentedScan.Positions.Length -874 -875 mz_list = list(averageScan.SegmentedScan.Positions) -876 abund_list = list(averageScan.SegmentedScan.Intensities) -877 -878 data_dict = { -879 Labels.mz: mz_list, -880 Labels.abundance: abund_list, -881 } -882 -883 mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process) -884 -885 return mass_spec +863 # assumes scans is full scan or reduced profile scan +864 +865 scans = List[int]() +866 for scan in scans_list: +867 scans.Add(scan) +868 +869 # Create the mass options object that will be used when averaging the scans +870 options = MassOptions() +871 options.ToleranceUnits = ToleranceUnits.ppm +872 options.Tolerance = ppm_tolerance +873 +874 # Get the scan filter for the first scan. This scan filter will be used to located +875 # scans within the given scan range of the same type +876 +877 averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options) +878 +879 len_data = averageScan.SegmentedScan.Positions.Length +880 +881 mz_list = list(averageScan.SegmentedScan.Positions) +882 abund_list = list(averageScan.SegmentedScan.Intensities) +883 +884 data_dict = { +885 Labels.mz: mz_list, +886 Labels.abundance: abund_list, +887 } +888 +889 mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process) +890 +891 return mass_spec @@ -3128,105 +3130,111 @@

    Parameters:

    503    def get_tic(
    -504        self, ms_type="MS !d", peak_detection=True, smooth=True, plot=False, ax=None,trace_type='TIC',
    -505    ) -> Tuple[TIC_Data, axes.Axes]:
    -506        """ms_type: str ('MS !d', 'MS2', None)
    -507            if you use None you get all scans.
    -508        peak_detection: bool
    -509        smooth: bool
    -510        plot: bool
    -511        ax: matplotlib axis object
    -512        trace_type: str ('TIC','BPC')
    -513
    -514        returns:
    -515            chroma: dict
    -516            {
    -517            Scan: [int]
    -518                original thermo scan numberMS
    -519            Time: [floats]
    -520                list of retention times
    -521            TIC: [floats]
    -522                total ion chromatogram
    -523            Apexes: [int]
    -524                original thermo apex scan number after peak picking
    -525            }
    -526        """
    -527        if trace_type == 'TIC':
    -528            settings = ChromatogramTraceSettings(TraceType.TIC)
    -529        elif trace_type == 'BPC':
    -530            settings = ChromatogramTraceSettings(TraceType.BasePeak)
    -531        else:
    -532            raise ValueError(f'{trace_type} undefined')
    -533        if ms_type == "all":
    -534            settings.Filter = None
    -535        else:
    -536            settings.Filter = ms_type
    -537
    -538        chroma_settings = IChromatogramSettings(settings)
    -539
    -540        data = self.iRawDataPlus.GetChromatogramData(
    -541            [chroma_settings], self.start_scan, self.end_scan
    -542        )
    +504        self,
    +505        ms_type="MS !d",
    +506        peak_detection=True,
    +507        smooth=True,
    +508        plot=False,
    +509        ax=None,
    +510        trace_type="TIC",
    +511    ) -> Tuple[TIC_Data, axes.Axes]:
    +512        """ms_type: str ('MS !d', 'MS2', None)
    +513            if you use None you get all scans.
    +514        peak_detection: bool
    +515        smooth: bool
    +516        plot: bool
    +517        ax: matplotlib axis object
    +518        trace_type: str ('TIC','BPC')
    +519
    +520        returns:
    +521            chroma: dict
    +522            {
    +523            Scan: [int]
    +524                original thermo scan numberMS
    +525            Time: [floats]
    +526                list of retention times
    +527            TIC: [floats]
    +528                total ion chromatogram
    +529            Apexes: [int]
    +530                original thermo apex scan number after peak picking
    +531            }
    +532        """
    +533        if trace_type == "TIC":
    +534            settings = ChromatogramTraceSettings(TraceType.TIC)
    +535        elif trace_type == "BPC":
    +536            settings = ChromatogramTraceSettings(TraceType.BasePeak)
    +537        else:
    +538            raise ValueError(f"{trace_type} undefined")
    +539        if ms_type == "all":
    +540            settings.Filter = None
    +541        else:
    +542            settings.Filter = ms_type
     543
    -544        trace = ChromatogramSignal.FromChromatogramData(data)
    +544        chroma_settings = IChromatogramSettings(settings)
     545
    -546        data = TIC_Data(time=[], scans=[], tic=[], bpc=[], apexes=[])
    -547
    -548        if trace[0].Length > 0:
    -549            for i in range(trace[0].Length):
    -550                # print(trace[0].HasBasePeakData,trace[0].EndTime )
    +546        data = self.iRawDataPlus.GetChromatogramData(
    +547            [chroma_settings], self.start_scan, self.end_scan
    +548        )
    +549
    +550        trace = ChromatogramSignal.FromChromatogramData(data)
     551
    -552                # print("  {} - {}, {}".format( i, trace[0].Times[i], trace[0].Intensities[i] ))
    -553                data.time.append(trace[0].Times[i])
    -554                data.tic.append(trace[0].Intensities[i])
    -555                data.scans.append(trace[0].Scans[i])
    -556
    -557                # print(trace[0].Scans[i])
    -558            if smooth:
    -559                data.tic = self.smooth_tic(data.tic)
    -560
    -561            else:
    -562                data.tic = np.array(data.tic)
    -563
    -564            if peak_detection:
    -565                centroid_peak_indexes = [
    -566                    i for i in self.centroid_detector(data.time, data.tic)
    -567                ]
    -568
    -569                data.apexes = centroid_peak_indexes
    -570
    -571            if plot:
    -572                if not ax:
    -573                    import matplotlib.pyplot as plt
    +552        data = TIC_Data(time=[], scans=[], tic=[], bpc=[], apexes=[])
    +553
    +554        if trace[0].Length > 0:
    +555            for i in range(trace[0].Length):
    +556                # print(trace[0].HasBasePeakData,trace[0].EndTime )
    +557
    +558                # print("  {} - {}, {}".format( i, trace[0].Times[i], trace[0].Intensities[i] ))
    +559                data.time.append(trace[0].Times[i])
    +560                data.tic.append(trace[0].Intensities[i])
    +561                data.scans.append(trace[0].Scans[i])
    +562
    +563                # print(trace[0].Scans[i])
    +564            if smooth:
    +565                data.tic = self.smooth_tic(data.tic)
    +566
    +567            else:
    +568                data.tic = np.array(data.tic)
    +569
    +570            if peak_detection:
    +571                centroid_peak_indexes = [
    +572                    i for i in self.centroid_detector(data.time, data.tic)
    +573                ]
     574
    -575                    ax = plt.gca()
    -576                    # fig, ax = plt.subplots(figsize=(6, 3))
    -577
    -578                ax.plot(data.time, data.tic, label=trace_type)
    -579                ax.set_xlabel("Time (min)")
    -580                ax.set_ylabel("a.u.")
    -581                if peak_detection:
    -582                    for peak_indexes in data.apexes:
    -583                        apex_index = peak_indexes[1]
    -584                        ax.plot(
    -585                            data.time[apex_index],
    -586                            data.tic[apex_index],
    -587                            marker="x",
    -588                            linewidth=0,
    -589                        )
    -590
    -591                # plt.show()
    -592                if trace_type == 'BPC':
    -593                    data.bpc = data.tic
    -594                    data.tic = []
    -595                return data, ax
    -596            if trace_type == 'BPC':
    -597                data.bpc = data.tic
    -598                data.tic = []
    -599            return data, None
    -600
    -601        else:
    -602            return None, None
    +575                data.apexes = centroid_peak_indexes
    +576
    +577            if plot:
    +578                if not ax:
    +579                    import matplotlib.pyplot as plt
    +580
    +581                    ax = plt.gca()
    +582                    # fig, ax = plt.subplots(figsize=(6, 3))
    +583
    +584                ax.plot(data.time, data.tic, label=trace_type)
    +585                ax.set_xlabel("Time (min)")
    +586                ax.set_ylabel("a.u.")
    +587                if peak_detection:
    +588                    for peak_indexes in data.apexes:
    +589                        apex_index = peak_indexes[1]
    +590                        ax.plot(
    +591                            data.time[apex_index],
    +592                            data.tic[apex_index],
    +593                            marker="x",
    +594                            linewidth=0,
    +595                        )
    +596
    +597                # plt.show()
    +598                if trace_type == "BPC":
    +599                    data.bpc = data.tic
    +600                    data.tic = []
    +601                return data, ax
    +602            if trace_type == "BPC":
    +603                data.bpc = data.tic
    +604                data.tic = []
    +605            return data, None
    +606
    +607        else:
    +608            return None, None
     
    @@ -3265,146 +3273,146 @@

    Parameters:

    -
    604    def get_average_mass_spectrum(
    -605        self,
    -606        spectrum_mode: str = "profile",
    -607        auto_process: bool = True,
    -608        ppm_tolerance: float = 5.0,
    -609        ms_type: str = "MS1",
    -610    ) -> MassSpecProfile | MassSpecCentroid:
    -611        """
    -612        Averages mass spectra over a scan range using Thermo's AverageScansInScanRange method
    -613        or a scan list using Thermo's AverageScans method
    -614        spectrum_mode: str
    -615            centroid or profile mass spectrum
    -616        auto_process: bool
    -617            If true performs peak picking, and noise threshold calculation after creation of mass spectrum object
    -618        ms_type: str
    -619            String of form 'ms1' or 'ms2' or 'MS3' etc. Valid up to MS10.
    -620            Internal function converts to Thermo MSOrderType class.
    -621
    -622        """
    -623
    -624        def get_profile_mass_spec(averageScan, d_params: dict, auto_process: bool):
    -625            mz_list = list(averageScan.SegmentedScan.Positions)
    -626            abund_list = list(averageScan.SegmentedScan.Intensities)
    +            
    610    def get_average_mass_spectrum(
    +611        self,
    +612        spectrum_mode: str = "profile",
    +613        auto_process: bool = True,
    +614        ppm_tolerance: float = 5.0,
    +615        ms_type: str = "MS1",
    +616    ) -> MassSpecProfile | MassSpecCentroid:
    +617        """
    +618        Averages mass spectra over a scan range using Thermo's AverageScansInScanRange method
    +619        or a scan list using Thermo's AverageScans method
    +620        spectrum_mode: str
    +621            centroid or profile mass spectrum
    +622        auto_process: bool
    +623            If true performs peak picking, and noise threshold calculation after creation of mass spectrum object
    +624        ms_type: str
    +625            String of form 'ms1' or 'ms2' or 'MS3' etc. Valid up to MS10.
    +626            Internal function converts to Thermo MSOrderType class.
     627
    -628            data_dict = {
    -629                Labels.mz: mz_list,
    -630                Labels.abundance: abund_list,
    -631            }
    -632
    -633            return MassSpecProfile(data_dict, d_params, auto_process=auto_process)
    -634
    -635        def get_centroid_mass_spec(averageScan, d_params: dict):
    -636            noise = list(averageScan.centroidScan.Noises)
    -637
    -638            baselines = list(averageScan.centroidScan.Baselines)
    -639
    -640            rp = list(averageScan.centroidScan.Resolutions)
    -641
    -642            magnitude = list(averageScan.centroidScan.Intensities)
    +628        """
    +629
    +630        def get_profile_mass_spec(averageScan, d_params: dict, auto_process: bool):
    +631            mz_list = list(averageScan.SegmentedScan.Positions)
    +632            abund_list = list(averageScan.SegmentedScan.Intensities)
    +633
    +634            data_dict = {
    +635                Labels.mz: mz_list,
    +636                Labels.abundance: abund_list,
    +637            }
    +638
    +639            return MassSpecProfile(data_dict, d_params, auto_process=auto_process)
    +640
    +641        def get_centroid_mass_spec(averageScan, d_params: dict):
    +642            noise = list(averageScan.centroidScan.Noises)
     643
    -644            mz = list(averageScan.centroidScan.Masses)
    +644            baselines = list(averageScan.centroidScan.Baselines)
     645
    -646            array_noise_std = (np.array(noise) - np.array(baselines)) / 3
    -647            l_signal_to_noise = np.array(magnitude) / array_noise_std
    -648
    -649            d_params["baseline_noise"] = np.average(array_noise_std)
    -650
    -651            d_params["baseline_noise_std"] = np.std(array_noise_std)
    -652
    -653            data_dict = {
    -654                Labels.mz: mz,
    -655                Labels.abundance: magnitude,
    -656                Labels.rp: rp,
    -657                Labels.s2n: list(l_signal_to_noise),
    -658            }
    -659
    -660            mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False)
    -661
    -662            return mass_spec
    -663
    -664        d_params = self.set_metadata(
    -665            firstScanNumber=self.start_scan, lastScanNumber=self.end_scan
    -666        )
    +646            rp = list(averageScan.centroidScan.Resolutions)
    +647
    +648            magnitude = list(averageScan.centroidScan.Intensities)
    +649
    +650            mz = list(averageScan.centroidScan.Masses)
    +651
    +652            array_noise_std = (np.array(noise) - np.array(baselines)) / 3
    +653            l_signal_to_noise = np.array(magnitude) / array_noise_std
    +654
    +655            d_params["baseline_noise"] = np.average(array_noise_std)
    +656
    +657            d_params["baseline_noise_std"] = np.std(array_noise_std)
    +658
    +659            data_dict = {
    +660                Labels.mz: mz,
    +661                Labels.abundance: magnitude,
    +662                Labels.rp: rp,
    +663                Labels.s2n: list(l_signal_to_noise),
    +664            }
    +665
    +666            mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False)
     667
    -668        # Create the mass options object that will be used when averaging the scans
    -669        options = MassOptions()
    -670        options.ToleranceUnits = ToleranceUnits.ppm
    -671        options.Tolerance = ppm_tolerance
    -672
    -673        # Get the scan filter for the first scan.  This scan filter will be used to located
    -674        # scans within the given scan range of the same type
    -675        scanFilter = self.iRawDataPlus.GetFilterForScanNumber(self.start_scan)
    -676
    -677        # force it to only look for the MSType
    -678        scanFilter = self.set_msordertype(scanFilter, ms_type)
    -679
    -680        if isinstance(self.scans, tuple):
    -681            averageScan = Extensions.AverageScansInScanRange(
    -682                self.iRawDataPlus, self.start_scan, self.end_scan, scanFilter, options
    -683            )
    -684
    -685            if averageScan:
    -686                if spectrum_mode == "profile":
    -687                    mass_spec = get_profile_mass_spec(
    -688                        averageScan, d_params, auto_process
    -689                    )
    +668            return mass_spec
    +669
    +670        d_params = self.set_metadata(
    +671            firstScanNumber=self.start_scan, lastScanNumber=self.end_scan
    +672        )
    +673
    +674        # Create the mass options object that will be used when averaging the scans
    +675        options = MassOptions()
    +676        options.ToleranceUnits = ToleranceUnits.ppm
    +677        options.Tolerance = ppm_tolerance
    +678
    +679        # Get the scan filter for the first scan.  This scan filter will be used to located
    +680        # scans within the given scan range of the same type
    +681        scanFilter = self.iRawDataPlus.GetFilterForScanNumber(self.start_scan)
    +682
    +683        # force it to only look for the MSType
    +684        scanFilter = self.set_msordertype(scanFilter, ms_type)
    +685
    +686        if isinstance(self.scans, tuple):
    +687            averageScan = Extensions.AverageScansInScanRange(
    +688                self.iRawDataPlus, self.start_scan, self.end_scan, scanFilter, options
    +689            )
     690
    -691                    return mass_spec
    -692
    -693                elif spectrum_mode == "centroid":
    -694                    if averageScan.HasCentroidStream:
    -695                        mass_spec = get_centroid_mass_spec(averageScan, d_params)
    +691            if averageScan:
    +692                if spectrum_mode == "profile":
    +693                    mass_spec = get_profile_mass_spec(
    +694                        averageScan, d_params, auto_process
    +695                    )
     696
    -697                        return mass_spec
    +697                    return mass_spec
     698
    -699                    else:
    -700                        raise ValueError(
    -701                            "No Centroind data available for the selected scans"
    -702                        )
    -703                else:
    -704                    raise ValueError("spectrum_mode must be 'profile' or centroid")
    -705            else:
    -706                raise ValueError("No data found for the selected scans")
    -707
    -708        elif isinstance(self.scans, list):
    -709            d_params = self.set_metadata(scans_list=self.scans)
    -710
    -711            scans = List[int]()
    -712            for scan in self.scans:
    -713                scans.Add(scan)
    -714
    -715            averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options)
    +699                elif spectrum_mode == "centroid":
    +700                    if averageScan.HasCentroidStream:
    +701                        mass_spec = get_centroid_mass_spec(averageScan, d_params)
    +702
    +703                        return mass_spec
    +704
    +705                    else:
    +706                        raise ValueError(
    +707                            "No Centroind data available for the selected scans"
    +708                        )
    +709                else:
    +710                    raise ValueError("spectrum_mode must be 'profile' or centroid")
    +711            else:
    +712                raise ValueError("No data found for the selected scans")
    +713
    +714        elif isinstance(self.scans, list):
    +715            d_params = self.set_metadata(scans_list=self.scans)
     716
    -717            if averageScan:
    -718                if spectrum_mode == "profile":
    -719                    mass_spec = get_profile_mass_spec(
    -720                        averageScan, d_params, auto_process
    -721                    )
    +717            scans = List[int]()
    +718            for scan in self.scans:
    +719                scans.Add(scan)
    +720
    +721            averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options)
     722
    -723                    return mass_spec
    -724
    -725                elif spectrum_mode == "centroid":
    -726                    if averageScan.HasCentroidStream:
    -727                        mass_spec = get_centroid_mass_spec(averageScan, d_params)
    +723            if averageScan:
    +724                if spectrum_mode == "profile":
    +725                    mass_spec = get_profile_mass_spec(
    +726                        averageScan, d_params, auto_process
    +727                    )
     728
    -729                        return mass_spec
    +729                    return mass_spec
     730
    -731                    else:
    -732                        raise ValueError(
    -733                            "No Centroind data available for the selected scans"
    -734                        )
    -735
    -736                else:
    -737                    raise ValueError("spectrum_mode must be 'profile' or centroid")
    -738
    -739            else:
    -740                raise ValueError("No data found for the selected scans")
    +731                elif spectrum_mode == "centroid":
    +732                    if averageScan.HasCentroidStream:
    +733                        mass_spec = get_centroid_mass_spec(averageScan, d_params)
    +734
    +735                        return mass_spec
    +736
    +737                    else:
    +738                        raise ValueError(
    +739                            "No Centroind data available for the selected scans"
    +740                        )
     741
    -742        else:
    -743            raise ValueError("scans must be a list intergers or a tuple if integers")
    +742                else:
    +743                    raise ValueError("spectrum_mode must be 'profile' or centroid")
    +744
    +745            else:
    +746                raise ValueError("No data found for the selected scans")
    +747
    +748        else:
    +749            raise ValueError("scans must be a list intergers or a tuple if integers")
     
    @@ -3432,44 +3440,44 @@

    Parameters:

    -
    745    def set_metadata(
    -746        self,
    -747        firstScanNumber=0,
    -748        lastScanNumber=0,
    -749        scans_list=False,
    -750        label=Labels.thermo_profile,
    -751    ):
    -752        """
    -753        Collect metadata to be ingested in the mass spectrum object
    -754
    -755        scans_list: list[int] or false
    -756        lastScanNumber: int
    -757        firstScanNumber: int
    -758        """
    -759
    -760        d_params = default_parameters(self.file_path)
    -761
    -762        # assumes scans is full scan or reduced profile scan
    -763
    -764        d_params["label"] = label
    +            
    751    def set_metadata(
    +752        self,
    +753        firstScanNumber=0,
    +754        lastScanNumber=0,
    +755        scans_list=False,
    +756        label=Labels.thermo_profile,
    +757    ):
    +758        """
    +759        Collect metadata to be ingested in the mass spectrum object
    +760
    +761        scans_list: list[int] or false
    +762        lastScanNumber: int
    +763        firstScanNumber: int
    +764        """
     765
    -766        if scans_list:
    -767            d_params["scan_number"] = scans_list
    -768
    -769            d_params["polarity"] = self.get_polarity_mode(scans_list[0])
    -770
    -771        else:
    -772            d_params["scan_number"] = "{}-{}".format(firstScanNumber, lastScanNumber)
    -773
    -774            d_params["polarity"] = self.get_polarity_mode(firstScanNumber)
    -775
    -776        d_params["analyzer"] = self.iRawDataPlus.GetInstrumentData().Model
    -777
    -778        d_params["acquisition_time"] = self.get_creation_time()
    +766        d_params = default_parameters(self.file_path)
    +767
    +768        # assumes scans is full scan or reduced profile scan
    +769
    +770        d_params["label"] = label
    +771
    +772        if scans_list:
    +773            d_params["scan_number"] = scans_list
    +774
    +775            d_params["polarity"] = self.get_polarity_mode(scans_list[0])
    +776
    +777        else:
    +778            d_params["scan_number"] = "{}-{}".format(firstScanNumber, lastScanNumber)
     779
    -780        d_params["instrument_label"] = self.iRawDataPlus.GetInstrumentData().Name
    +780            d_params["polarity"] = self.get_polarity_mode(firstScanNumber)
     781
    -782        return d_params
    +782        d_params["analyzer"] = self.iRawDataPlus.GetInstrumentData().Model
    +783
    +784        d_params["acquisition_time"] = self.get_creation_time()
    +785
    +786        d_params["instrument_label"] = self.iRawDataPlus.GetInstrumentData().Name
    +787
    +788        return d_params
     
    @@ -3493,52 +3501,52 @@

    Parameters:

    -
    784    def get_centroid_msms_data(self, scan):
    -785        """
    -786        .. deprecated:: 2.0
    -787            This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality.
    -788        """
    -789
    -790        warnings.warn(
    -791            "The `get_centroid_msms_data()` is deprecated as of CoreMS 2.0 and will be removed in a future version. "
    -792            "Please use `get_average_mass_spectrum()` instead.",
    -793            DeprecationWarning,
    -794        )
    +            
    790    def get_centroid_msms_data(self, scan):
    +791        """
    +792        .. deprecated:: 2.0
    +793            This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality.
    +794        """
     795
    -796        d_params = self.set_metadata(scans_list=[scan], label=Labels.thermo_centroid)
    -797
    -798        centroidStream = self.iRawDataPlus.GetCentroidStream(scan, False)
    -799
    -800        noise = list(centroidStream.Noises)
    +796        warnings.warn(
    +797            "The `get_centroid_msms_data()` is deprecated as of CoreMS 2.0 and will be removed in a future version. "
    +798            "Please use `get_average_mass_spectrum()` instead.",
    +799            DeprecationWarning,
    +800        )
     801
    -802        baselines = list(centroidStream.Baselines)
    +802        d_params = self.set_metadata(scans_list=[scan], label=Labels.thermo_centroid)
     803
    -804        rp = list(centroidStream.Resolutions)
    +804        centroidStream = self.iRawDataPlus.GetCentroidStream(scan, False)
     805
    -806        magnitude = list(centroidStream.Intensities)
    +806        noise = list(centroidStream.Noises)
     807
    -808        mz = list(centroidStream.Masses)
    +808        baselines = list(centroidStream.Baselines)
     809
    -810        # charge = scans_labels[5]
    -811        array_noise_std = (np.array(noise) - np.array(baselines)) / 3
    -812        l_signal_to_noise = np.array(magnitude) / array_noise_std
    +810        rp = list(centroidStream.Resolutions)
    +811
    +812        magnitude = list(centroidStream.Intensities)
     813
    -814        d_params["baseline_noise"] = np.average(array_noise_std)
    +814        mz = list(centroidStream.Masses)
     815
    -816        d_params["baseline_noise_std"] = np.std(array_noise_std)
    -817
    -818        data_dict = {
    -819            Labels.mz: mz,
    -820            Labels.abundance: magnitude,
    -821            Labels.rp: rp,
    -822            Labels.s2n: list(l_signal_to_noise),
    -823        }
    -824
    -825        mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False)
    -826        mass_spec.settings.noise_threshold_method = "relative_abundance"
    -827        mass_spec.settings.noise_threshold_min_relative_abundance = 1
    -828        mass_spec.process_mass_spec()
    -829        return mass_spec
    +816        # charge = scans_labels[5]
    +817        array_noise_std = (np.array(noise) - np.array(baselines)) / 3
    +818        l_signal_to_noise = np.array(magnitude) / array_noise_std
    +819
    +820        d_params["baseline_noise"] = np.average(array_noise_std)
    +821
    +822        d_params["baseline_noise_std"] = np.std(array_noise_std)
    +823
    +824        data_dict = {
    +825            Labels.mz: mz,
    +826            Labels.abundance: magnitude,
    +827            Labels.rp: rp,
    +828            Labels.s2n: list(l_signal_to_noise),
    +829        }
    +830
    +831        mass_spec = MassSpecCentroid(data_dict, d_params, auto_process=False)
    +832        mass_spec.settings.noise_threshold_method = "relative_abundance"
    +833        mass_spec.settings.noise_threshold_min_relative_abundance = 1
    +834        mass_spec.process_mass_spec()
    +835        return mass_spec
     
    @@ -3559,61 +3567,61 @@

    Parameters:

    -
    831    def get_average_mass_spectrum_by_scanlist(
    -832        self,
    -833        scans_list: List[int],
    -834        auto_process: bool = True,
    -835        ppm_tolerance: float = 5.0,
    -836    ) -> MassSpecProfile:
    -837        """
    -838        Averages selected scans mass spectra using Thermo's AverageScans method
    -839        scans_list: list[int]
    -840        auto_process: bool
    -841            If true performs peak picking, and noise threshold calculation after creation of mass spectrum object
    -842        Returns:
    -843            MassSpecProfile
    -844
    -845         .. deprecated:: 2.0
    -846        This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality.
    -847        """
    -848
    -849        warnings.warn(
    -850            "The `get_average_mass_spectrum_by_scanlist()` is deprecated as of CoreMS 2.0 and will be removed in a future version. "
    -851            "Please use `get_average_mass_spectrum()` instead.",
    -852            DeprecationWarning,
    -853        )
    +            
    837    def get_average_mass_spectrum_by_scanlist(
    +838        self,
    +839        scans_list: List[int],
    +840        auto_process: bool = True,
    +841        ppm_tolerance: float = 5.0,
    +842    ) -> MassSpecProfile:
    +843        """
    +844        Averages selected scans mass spectra using Thermo's AverageScans method
    +845        scans_list: list[int]
    +846        auto_process: bool
    +847            If true performs peak picking, and noise threshold calculation after creation of mass spectrum object
    +848        Returns:
    +849            MassSpecProfile
    +850
    +851         .. deprecated:: 2.0
    +852        This function will be removed in CoreMS 2.0. Please use `get_average_mass_spectrum()` instead for similar functionality.
    +853        """
     854
    -855        d_params = self.set_metadata(scans_list=scans_list)
    -856
    -857        # assumes scans is full scan or reduced profile scan
    -858
    -859        scans = List[int]()
    -860        for scan in scans_list:
    -861            scans.Add(scan)
    +855        warnings.warn(
    +856            "The `get_average_mass_spectrum_by_scanlist()` is deprecated as of CoreMS 2.0 and will be removed in a future version. "
    +857            "Please use `get_average_mass_spectrum()` instead.",
    +858            DeprecationWarning,
    +859        )
    +860
    +861        d_params = self.set_metadata(scans_list=scans_list)
     862
    -863        # Create the mass options object that will be used when averaging the scans
    -864        options = MassOptions()
    -865        options.ToleranceUnits = ToleranceUnits.ppm
    -866        options.Tolerance = ppm_tolerance
    -867
    -868        # Get the scan filter for the first scan.  This scan filter will be used to located
    -869        # scans within the given scan range of the same type
    -870
    -871        averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options)
    -872
    -873        len_data = averageScan.SegmentedScan.Positions.Length
    -874
    -875        mz_list = list(averageScan.SegmentedScan.Positions)
    -876        abund_list = list(averageScan.SegmentedScan.Intensities)
    -877
    -878        data_dict = {
    -879            Labels.mz: mz_list,
    -880            Labels.abundance: abund_list,
    -881        }
    -882
    -883        mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process)
    -884
    -885        return mass_spec
    +863        # assumes scans is full scan or reduced profile scan
    +864
    +865        scans = List[int]()
    +866        for scan in scans_list:
    +867            scans.Add(scan)
    +868
    +869        # Create the mass options object that will be used when averaging the scans
    +870        options = MassOptions()
    +871        options.ToleranceUnits = ToleranceUnits.ppm
    +872        options.Tolerance = ppm_tolerance
    +873
    +874        # Get the scan filter for the first scan.  This scan filter will be used to located
    +875        # scans within the given scan range of the same type
    +876
    +877        averageScan = Extensions.AverageScans(self.iRawDataPlus, scans, options)
    +878
    +879        len_data = averageScan.SegmentedScan.Positions.Length
    +880
    +881        mz_list = list(averageScan.SegmentedScan.Positions)
    +882        abund_list = list(averageScan.SegmentedScan.Intensities)
    +883
    +884        data_dict = {
    +885            Labels.mz: mz_list,
    +886            Labels.abundance: abund_list,
    +887        }
    +888
    +889        mass_spec = MassSpecProfile(data_dict, d_params, auto_process=auto_process)
    +890
    +891        return mass_spec
     
    @@ -3643,114 +3651,108 @@

    Parameters:

    -
     888class ImportMassSpectraThermoMSFileReader(ThermoBaseClass, SpectraParserInterface):
    - 889    """A class for parsing Thermo RAW mass spectrometry data files and instatiating MassSpectraBase or LCMSBase objects
    - 890
    - 891    Parameters
    - 892    ----------
    - 893    file_location : str or Path
    - 894        The path to the RAW file to be parsed.
    - 895    analyzer : str, optional
    - 896        The type of mass analyzer used in the instrument. Default is "Unknown".
    - 897    instrument_label : str, optional
    - 898        The name of the instrument used to acquire the data. Default is "Unknown".
    - 899    sample_name : str, optional
    - 900        The name of the sample being analyzed. If not provided, the stem of the file_location path will be used.
    - 901
    - 902    Attributes
    - 903    ----------
    - 904    file_location : Path
    - 905        The path to the RAW file being parsed.
    - 906    analyzer : str
    - 907        The type of mass analyzer used in the instrument.
    - 908    instrument_label : str
    - 909        The name of the instrument used to acquire the data.
    - 910    sample_name : str
    - 911        The name of the sample being analyzed.
    - 912
    - 913    Methods
    - 914    -------
    - 915    * run(spectra=True).
    - 916        Parses the RAW file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe.
    - 917    * get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True)
    - 918        Parses the RAW file and returns a MassSpecBase object from a single scan.
    - 919    * get_mass_spectra_obj().
    - 920        Parses the RAW file and instantiates a MassSpectraBase object.
    - 921    * get_lcms_obj().
    - 922        Parses the RAW file and instantiates an LCMSBase object.
    - 923    * get_icr_transient_times().
    - 924        Return a list for transient time targets for all scans, or selected scans range
    - 925
    - 926    Inherits from ThermoBaseClass and SpectraParserInterface
    - 927    """
    - 928
    - 929    def __init__(
    - 930        self,
    - 931        file_location,
    - 932        analyzer="Unknown",
    - 933        instrument_label="Unknown",
    - 934        sample_name=None,
    - 935    ):
    - 936        super().__init__(file_location)
    - 937        if isinstance(file_location, str):
    - 938            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    - 939            file_location = Path(file_location)
    - 940        if not file_location.exists():
    - 941            raise FileExistsError("File does not exist: " + str(file_location))
    - 942
    - 943        self.file_location = file_location
    - 944        self.analyzer = analyzer
    - 945        self.instrument_label = instrument_label
    - 946
    - 947        if sample_name:
    - 948            self.sample_name = sample_name
    - 949        else:
    - 950            self.sample_name = file_location.stem
    - 951
    - 952    def load(self):
    - 953        pass
    - 954
    - 955    def get_scan_df(self):
    - 956        # This automatically brings in all the data
    - 957        self.chromatogram_settings.scans = (-1, -1)
    - 958
    - 959        # Get scan df info; starting with bulk ms1 and ms2 scans
    - 960        ms1_tic_data, _ = self.get_tic(
    - 961            ms_type="MS", peak_detection=False, smooth=False
    - 962        )
    - 963        ms1_scan_dict = {
    - 964            "scan": ms1_tic_data.scans,
    - 965            "scan_time": ms1_tic_data.time,
    - 966            "tic": ms1_tic_data.tic,
    - 967        }
    - 968        ms1_tic_df = pd.DataFrame.from_dict(ms1_scan_dict)
    - 969        ms1_tic_df["ms_level"] = "ms1"
    - 970
    - 971        ms2_tic_data, _ = self.get_tic(
    - 972            ms_type="MS2", peak_detection=False, smooth=False
    - 973        )
    - 974        ms2_scan_dict = {
    - 975            "scan": ms2_tic_data.scans,
    - 976            "scan_time": ms2_tic_data.time,
    - 977            "tic": ms2_tic_data.tic,
    - 978        }
    - 979        ms2_tic_df = pd.DataFrame.from_dict(ms2_scan_dict)
    - 980        ms2_tic_df["ms_level"] = "ms2"
    - 981
    - 982        scan_df = (
    - 983            pd.concat([ms1_tic_df, ms2_tic_df], axis=0)
    - 984            .sort_values(by="scan")
    - 985            .reindex()
    - 986        )
    - 987
    - 988        # get scan text
    - 989        scan_filter_df = pd.DataFrame.from_dict(
    - 990            self.get_all_filters()[0], orient="index"
    - 991        )
    - 992        scan_filter_df.reset_index(inplace=True)
    - 993        scan_filter_df.rename(
    - 994            columns={"index": "scan", 0: "scan_text"}, inplace=True
    - 995        )
    +            
     894class ImportMassSpectraThermoMSFileReader(ThermoBaseClass, SpectraParserInterface):
    + 895    """A class for parsing Thermo RAW mass spectrometry data files and instatiating MassSpectraBase or LCMSBase objects
    + 896
    + 897    Parameters
    + 898    ----------
    + 899    file_location : str or Path
    + 900        The path to the RAW file to be parsed.
    + 901    analyzer : str, optional
    + 902        The type of mass analyzer used in the instrument. Default is "Unknown".
    + 903    instrument_label : str, optional
    + 904        The name of the instrument used to acquire the data. Default is "Unknown".
    + 905    sample_name : str, optional
    + 906        The name of the sample being analyzed. If not provided, the stem of the file_location path will be used.
    + 907
    + 908    Attributes
    + 909    ----------
    + 910    file_location : Path
    + 911        The path to the RAW file being parsed.
    + 912    analyzer : str
    + 913        The type of mass analyzer used in the instrument.
    + 914    instrument_label : str
    + 915        The name of the instrument used to acquire the data.
    + 916    sample_name : str
    + 917        The name of the sample being analyzed.
    + 918
    + 919    Methods
    + 920    -------
    + 921    * run(spectra=True).
    + 922        Parses the RAW file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe.
    + 923    * get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True)
    + 924        Parses the RAW file and returns a MassSpecBase object from a single scan.
    + 925    * get_mass_spectra_obj().
    + 926        Parses the RAW file and instantiates a MassSpectraBase object.
    + 927    * get_lcms_obj().
    + 928        Parses the RAW file and instantiates an LCMSBase object.
    + 929    * get_icr_transient_times().
    + 930        Return a list for transient time targets for all scans, or selected scans range
    + 931
    + 932    Inherits from ThermoBaseClass and SpectraParserInterface
    + 933    """
    + 934
    + 935    def __init__(
    + 936        self,
    + 937        file_location,
    + 938        analyzer="Unknown",
    + 939        instrument_label="Unknown",
    + 940        sample_name=None,
    + 941    ):
    + 942        super().__init__(file_location)
    + 943        if isinstance(file_location, str):
    + 944            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    + 945            file_location = Path(file_location)
    + 946        if not file_location.exists():
    + 947            raise FileExistsError("File does not exist: " + str(file_location))
    + 948
    + 949        self.file_location = file_location
    + 950        self.analyzer = analyzer
    + 951        self.instrument_label = instrument_label
    + 952
    + 953        if sample_name:
    + 954            self.sample_name = sample_name
    + 955        else:
    + 956            self.sample_name = file_location.stem
    + 957
    + 958    def load(self):
    + 959        pass
    + 960
    + 961    def get_scan_df(self):
    + 962        # This automatically brings in all the data
    + 963        self.chromatogram_settings.scans = (-1, -1)
    + 964
    + 965        # Get scan df info; starting with bulk ms1 and ms2 scans
    + 966        ms1_tic_data, _ = self.get_tic(ms_type="MS", peak_detection=False, smooth=False)
    + 967        ms1_scan_dict = {
    + 968            "scan": ms1_tic_data.scans,
    + 969            "scan_time": ms1_tic_data.time,
    + 970            "tic": ms1_tic_data.tic,
    + 971        }
    + 972        ms1_tic_df = pd.DataFrame.from_dict(ms1_scan_dict)
    + 973        ms1_tic_df["ms_level"] = "ms1"
    + 974
    + 975        ms2_tic_data, _ = self.get_tic(
    + 976            ms_type="MS2", peak_detection=False, smooth=False
    + 977        )
    + 978        ms2_scan_dict = {
    + 979            "scan": ms2_tic_data.scans,
    + 980            "scan_time": ms2_tic_data.time,
    + 981            "tic": ms2_tic_data.tic,
    + 982        }
    + 983        ms2_tic_df = pd.DataFrame.from_dict(ms2_scan_dict)
    + 984        ms2_tic_df["ms_level"] = "ms2"
    + 985
    + 986        scan_df = (
    + 987            pd.concat([ms1_tic_df, ms2_tic_df], axis=0).sort_values(by="scan").reindex()
    + 988        )
    + 989
    + 990        # get scan text
    + 991        scan_filter_df = pd.DataFrame.from_dict(
    + 992            self.get_all_filters()[0], orient="index"
    + 993        )
    + 994        scan_filter_df.reset_index(inplace=True)
    + 995        scan_filter_df.rename(columns={"index": "scan", 0: "scan_text"}, inplace=True)
      996
      997        scan_df = scan_df.merge(scan_filter_df, on="scan", how="left")
      998        scan_df["scan_window_lower"] = scan_df.scan_text.str.extract(
    @@ -3773,12 +3775,12 @@ 

    Parameters:

    1015 scan_df.loc[scan_df.scan == i, "ms_format"] = "centroid" 1016 else: 1017 scan_df.loc[scan_df.scan == i, "ms_format"] = "profile" -1018 +1018 1019 return scan_df 1020 1021 def get_ms_raw(self, spectra, scan_df): 1022 if spectra == "all": -1023 scan_df_forspec = scan_df +1023 scan_df_forspec = scan_df 1024 elif spectra == "ms1": 1025 scan_df_forspec = scan_df[scan_df.ms_level == 1] 1026 elif spectra == "ms2": @@ -3801,310 +3803,306 @@

    Parameters:

    1043 # First pass: get nrows 1044 N = defaultdict(lambda: 0) 1045 for i in scan_df_forspec.scan.to_list(): -1046 level = scan_df_forspec.loc[ -1047 scan_df_forspec.scan == i, "ms_level" -1048 ].values[0] -1049 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) -1050 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1051 i, scanStatistics -1052 ) -1053 abun = list(profileStream.Intensities) -1054 abun = np.array(abun)[np.where(np.array(abun) > 0)[0]] +1046 level = scan_df_forspec.loc[scan_df_forspec.scan == i, "ms_level"].values[0] +1047 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) +1048 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1049 i, scanStatistics +1050 ) +1051 abun = list(profileStream.Intensities) +1052 abun = np.array(abun)[np.where(np.array(abun) > 0)[0]] +1053 +1054 N[level] += len(abun) 1055 -1056 N[level] += len(abun) -1057 -1058 # Second pass: parse -1059 for i in scan_df_forspec.scan.to_list(): -1060 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) -1061 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1062 i, scanStatistics -1063 ) -1064 abun = list(profileStream.Intensities) -1065 mz = list(profileStream.Positions) -1066 -1067 # Get index of abun that are > 0 -1068 inx = np.where(np.array(abun) > 0)[0] -1069 mz = np.array(mz)[inx] -1070 mz = np.float32(mz) -1071 abun = np.array(abun)[inx] -1072 abun = np.float32(abun) +1056 # Second pass: parse +1057 for i in scan_df_forspec.scan.to_list(): +1058 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) +1059 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1060 i, scanStatistics +1061 ) +1062 abun = list(profileStream.Intensities) +1063 mz = list(profileStream.Positions) +1064 +1065 # Get index of abun that are > 0 +1066 inx = np.where(np.array(abun) > 0)[0] +1067 mz = np.array(mz)[inx] +1068 mz = np.float32(mz) +1069 abun = np.array(abun)[inx] +1070 abun = np.float32(abun) +1071 +1072 level = scan_df_forspec.loc[scan_df_forspec.scan == i, "ms_level"].values[0] 1073 -1074 level = scan_df_forspec.loc[ -1075 scan_df_forspec.scan == i, "ms_level" -1076 ].values[0] -1077 -1078 # Number of rows -1079 n = len(mz) +1074 # Number of rows +1075 n = len(mz) +1076 +1077 # No measurements +1078 if n == 0: +1079 continue 1080 -1081 # No measurements -1082 if n == 0: -1083 continue -1084 -1085 # Dimension check -1086 if len(mz) != len(abun): -1087 warnings.warn("m/z and intensity array dimension mismatch") -1088 continue -1089 -1090 # Scan/frame info -1091 id_dict = i +1081 # Dimension check +1082 if len(mz) != len(abun): +1083 warnings.warn("m/z and intensity array dimension mismatch") +1084 continue +1085 +1086 # Scan/frame info +1087 id_dict = i +1088 +1089 # Columns +1090 cols[level] = ["scan", "mz", "intensity"] +1091 m = len(cols[level]) 1092 -1093 # Columns -1094 cols[level] = ["scan", "mz", "intensity"] -1095 m = len(cols[level]) +1093 # Subarray init +1094 arr = np.empty((n, m), dtype=dtype) +1095 inx = 0 1096 -1097 # Subarray init -1098 arr = np.empty((n, m), dtype=dtype) -1099 inx = 0 +1097 # Populate scan/frame info +1098 arr[:, inx] = i +1099 inx += 1 1100 -1101 # Populate scan/frame info -1102 arr[:, inx] = i +1101 # Populate m/z +1102 arr[:, inx] = mz 1103 inx += 1 1104 -1105 # Populate m/z -1106 arr[:, inx] = mz +1105 # Populate intensity +1106 arr[:, inx] = abun 1107 inx += 1 1108 -1109 # Populate intensity -1110 arr[:, inx] = abun -1111 inx += 1 -1112 -1113 # Initialize output container -1114 if level not in res: -1115 res[level] = np.empty((N[level], m), dtype=dtype) -1116 counter[level] = 0 +1109 # Initialize output container +1110 if level not in res: +1111 res[level] = np.empty((N[level], m), dtype=dtype) +1112 counter[level] = 0 +1113 +1114 # Insert subarray +1115 res[level][counter[level] : counter[level] + n, :] = arr +1116 counter[level] += n 1117 -1118 # Insert subarray -1119 res[level][counter[level] : counter[level] + n, :] = arr -1120 counter[level] += n -1121 -1122 # Construct ms1 and ms2 mz dataframes -1123 for level in res.keys(): -1124 res[level] = pd.DataFrame(res[level]) -1125 res[level].columns = cols[level] -1126 # rename keys in res to add 'ms' prefix -1127 res = {f"ms{key}": value for key, value in res.items()} -1128 -1129 return res -1130 -1131 def run(self, spectra="all", scan_df=None): -1132 """ -1133 Extracts mass spectra data from a raw file. -1134 -1135 Parameters -1136 ---------- -1137 spectra : str, optional -1138 Which mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2. -1139 scan_df : pandas.DataFrame, optional -1140 Scan dataframe. If not provided, the scan dataframe is created from the mzML file. -1141 -1142 Returns -1143 ------- -1144 tuple -1145 A tuple containing two elements: -1146 - A dictionary containing mass spectra data, separated by MS level. -1147 - A pandas DataFrame containing scan information, including scan number, scan time, TIC, MS level, -1148 scan text, scan window lower and upper bounds, polarity, and precursor m/z (if applicable). -1149 """ -1150 # Prepare scan_df -1151 if scan_df is None: -1152 scan_df = self.get_scan_df() -1153 -1154 # Prepare mass spectra data -1155 if spectra != "none": -1156 res = self.get_ms_raw(spectra=spectra, scan_df=scan_df) -1157 else: -1158 res = None -1159 -1160 return res, scan_df -1161 -1162 def get_mass_spectrum_from_scan( -1163 self, scan_number, spectrum_mode, auto_process=True -1164 ): -1165 """Instatiate a MassSpecBase object from a single scan number from the binary file, currently only supports profile mode. -1166 -1167 Parameters -1168 ---------- -1169 scan_number : int -1170 The scan number to extract the mass spectrum from. -1171 polarity : int -1172 The polarity of the scan. 1 for positive mode, -1 for negative mode. -1173 spectrum_mode : str -1174 The type of mass spectrum to extract. Must be 'profile' or 'centroid'. -1175 auto_process : bool, optional -1176 If True, perform peak picking and noise threshold calculation after creating the mass spectrum object. Default is True. -1177 -1178 Returns -1179 ------- -1180 MassSpecProfile | MassSpecCentroid -1181 The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum. -1182 """ -1183 -1184 if spectrum_mode == "profile": -1185 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan_number) -1186 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1187 scan_number, scanStatistics -1188 ) -1189 abun = list(profileStream.Intensities) -1190 mz = list(profileStream.Positions) -1191 data_dict = { -1192 Labels.mz: mz, -1193 Labels.abundance: abun, -1194 } -1195 d_params = self.set_metadata( -1196 firstScanNumber=scan_number, -1197 lastScanNumber=scan_number, -1198 scans_list=False, -1199 label=Labels.thermo_profile, -1200 ) -1201 mass_spectrum_obj = MassSpecProfile( -1202 data_dict, d_params, auto_process=auto_process -1203 ) -1204 -1205 elif spectrum_mode == "centroid": -1206 centroid_scan = self.iRawDataPlus.GetCentroidStream(scan_number, False) -1207 if centroid_scan.Masses is not None: -1208 mz = list(centroid_scan.Masses) -1209 abun = list(centroid_scan.Intensities) -1210 rp = list(centroid_scan.Resolutions) -1211 magnitude = list(centroid_scan.Intensities) -1212 noise = list(centroid_scan.Noises) -1213 baselines = list(centroid_scan.Baselines) -1214 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 -1215 l_signal_to_noise = np.array(magnitude) / array_noise_std -1216 data_dict = { -1217 Labels.mz: mz, -1218 Labels.abundance: abun, -1219 Labels.rp: rp, -1220 Labels.s2n: list(l_signal_to_noise), -1221 } -1222 else: # For CID MS2, the centroid data are stored in the profile data location, they do not have any associated rp or baseline data, but they should be treated as centroid data -1223 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( -1224 scan_number -1225 ) -1226 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1227 scan_number, scanStatistics -1228 ) -1229 abun = list(profileStream.Intensities) -1230 mz = list(profileStream.Positions) -1231 data_dict = { -1232 Labels.mz: mz, -1233 Labels.abundance: abun, -1234 Labels.rp: [np.nan] * len(mz), -1235 Labels.s2n: [np.nan] * len(mz), -1236 } -1237 d_params = self.set_metadata( -1238 firstScanNumber=scan_number, -1239 lastScanNumber=scan_number, -1240 scans_list=False, -1241 label=Labels.thermo_centroid, -1242 ) -1243 mass_spectrum_obj = MassSpecCentroid( -1244 data_dict, d_params, auto_process=auto_process -1245 ) -1246 -1247 return mass_spectrum_obj -1248 -1249 def get_mass_spectra_obj(self): -1250 """Instatiate a MassSpectraBase object from the binary data file file. -1251 -1252 Returns -1253 ------- -1254 MassSpectraBase -1255 The MassSpectra object containing the parsed mass spectra. The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe. -1256 """ -1257 _, scan_df = self.run(spectra="none") -1258 mass_spectra_obj = MassSpectraBase( -1259 self.file_location, -1260 self.analyzer, -1261 self.instrument_label, -1262 self.sample_name, -1263 self, -1264 ) -1265 scan_df = scan_df.set_index("scan", drop=False) -1266 mass_spectra_obj.scan_df = scan_df -1267 -1268 return mass_spectra_obj -1269 -1270 def get_lcms_obj(self, spectra="all"): -1271 """Instatiates a LCMSBase object from the mzML file. -1272 -1273 Parameters -1274 ---------- -1275 verbose : bool, optional -1276 If True, print progress messages. Default is True. -1277 spectra : str, optional -1278 Which mass spectra data to include in the output. Default is "all". Other options: "none", "ms1", "ms2". -1279 -1280 Returns -1281 ------- -1282 LCMSBase -1283 LCMS object containing mass spectra data. The object is instatiated with the file location, analyzer, instrument, sample name, scan info, mz dataframe (as specifified), polarity, as well as the attributes holding the scans, retention times, and tics. -1284 """ -1285 _, scan_df = self.run(spectra="none") # first run it to just get scan info -1286 res, scan_df = self.run( -1287 scan_df=scan_df, spectra=spectra -1288 ) # second run to parse data -1289 lcms_obj = LCMSBase( -1290 self.file_location, -1291 self.analyzer, -1292 self.instrument_label, -1293 self.sample_name, -1294 self, -1295 ) -1296 if spectra != "none": -1297 for key in res: -1298 key_int = int(key.replace("ms", "")) -1299 res[key] = res[key][res[key].intensity > 0] -1300 res[key] = ( -1301 res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True) -1302 ) -1303 lcms_obj._ms_unprocessed[key_int] = res[key] -1304 lcms_obj.scan_df = scan_df.set_index("scan", drop=False) -1305 # Check if polarity is mixed -1306 if len(set(scan_df.polarity)) > 1: -1307 raise ValueError("Mixed polarities detected in scan data") -1308 lcms_obj.polarity = scan_df.polarity[0] -1309 lcms_obj._scans_number_list = list(scan_df.scan) -1310 lcms_obj._retention_time_list = list(scan_df.scan_time) -1311 lcms_obj._tic_list = list(scan_df.tic) -1312 -1313 return lcms_obj -1314 -1315 def get_icr_transient_times(self): -1316 """Return a list for transient time targets for all scans, or selected scans range -1317 -1318 Notes -1319 -------- -1320 Resolving Power and Transient time targets based on 7T FT-ICR MS system -1321 """ -1322 -1323 res_trans_time = { -1324 "50": 0.384, -1325 "100000": 0.768, -1326 "200000": 1.536, -1327 "400000": 3.072, -1328 "750000": 6.144, -1329 "1000000": 12.288, -1330 } +1118 # Construct ms1 and ms2 mz dataframes +1119 for level in res.keys(): +1120 res[level] = pd.DataFrame(res[level]) +1121 res[level].columns = cols[level] +1122 # rename keys in res to add 'ms' prefix +1123 res = {f"ms{key}": value for key, value in res.items()} +1124 +1125 return res +1126 +1127 def run(self, spectra="all", scan_df=None): +1128 """ +1129 Extracts mass spectra data from a raw file. +1130 +1131 Parameters +1132 ---------- +1133 spectra : str, optional +1134 Which mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2. +1135 scan_df : pandas.DataFrame, optional +1136 Scan dataframe. If not provided, the scan dataframe is created from the mzML file. +1137 +1138 Returns +1139 ------- +1140 tuple +1141 A tuple containing two elements: +1142 - A dictionary containing mass spectra data, separated by MS level. +1143 - A pandas DataFrame containing scan information, including scan number, scan time, TIC, MS level, +1144 scan text, scan window lower and upper bounds, polarity, and precursor m/z (if applicable). +1145 """ +1146 # Prepare scan_df +1147 if scan_df is None: +1148 scan_df = self.get_scan_df() +1149 +1150 # Prepare mass spectra data +1151 if spectra != "none": +1152 res = self.get_ms_raw(spectra=spectra, scan_df=scan_df) +1153 else: +1154 res = None +1155 +1156 return res, scan_df +1157 +1158 def get_mass_spectrum_from_scan( +1159 self, scan_number, spectrum_mode, auto_process=True +1160 ): +1161 """Instatiate a MassSpecBase object from a single scan number from the binary file, currently only supports profile mode. +1162 +1163 Parameters +1164 ---------- +1165 scan_number : int +1166 The scan number to extract the mass spectrum from. +1167 polarity : int +1168 The polarity of the scan. 1 for positive mode, -1 for negative mode. +1169 spectrum_mode : str +1170 The type of mass spectrum to extract. Must be 'profile' or 'centroid'. +1171 auto_process : bool, optional +1172 If True, perform peak picking and noise threshold calculation after creating the mass spectrum object. Default is True. +1173 +1174 Returns +1175 ------- +1176 MassSpecProfile | MassSpecCentroid +1177 The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum. +1178 """ +1179 +1180 if spectrum_mode == "profile": +1181 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan_number) +1182 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1183 scan_number, scanStatistics +1184 ) +1185 abun = list(profileStream.Intensities) +1186 mz = list(profileStream.Positions) +1187 data_dict = { +1188 Labels.mz: mz, +1189 Labels.abundance: abun, +1190 } +1191 d_params = self.set_metadata( +1192 firstScanNumber=scan_number, +1193 lastScanNumber=scan_number, +1194 scans_list=False, +1195 label=Labels.thermo_profile, +1196 ) +1197 mass_spectrum_obj = MassSpecProfile( +1198 data_dict, d_params, auto_process=auto_process +1199 ) +1200 +1201 elif spectrum_mode == "centroid": +1202 centroid_scan = self.iRawDataPlus.GetCentroidStream(scan_number, False) +1203 if centroid_scan.Masses is not None: +1204 mz = list(centroid_scan.Masses) +1205 abun = list(centroid_scan.Intensities) +1206 rp = list(centroid_scan.Resolutions) +1207 magnitude = list(centroid_scan.Intensities) +1208 noise = list(centroid_scan.Noises) +1209 baselines = list(centroid_scan.Baselines) +1210 array_noise_std = (np.array(noise) - np.array(baselines)) / 3 +1211 l_signal_to_noise = np.array(magnitude) / array_noise_std +1212 data_dict = { +1213 Labels.mz: mz, +1214 Labels.abundance: abun, +1215 Labels.rp: rp, +1216 Labels.s2n: list(l_signal_to_noise), +1217 } +1218 else: # For CID MS2, the centroid data are stored in the profile data location, they do not have any associated rp or baseline data, but they should be treated as centroid data +1219 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber( +1220 scan_number +1221 ) +1222 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1223 scan_number, scanStatistics +1224 ) +1225 abun = list(profileStream.Intensities) +1226 mz = list(profileStream.Positions) +1227 data_dict = { +1228 Labels.mz: mz, +1229 Labels.abundance: abun, +1230 Labels.rp: [np.nan] * len(mz), +1231 Labels.s2n: [np.nan] * len(mz), +1232 } +1233 d_params = self.set_metadata( +1234 firstScanNumber=scan_number, +1235 lastScanNumber=scan_number, +1236 scans_list=False, +1237 label=Labels.thermo_centroid, +1238 ) +1239 mass_spectrum_obj = MassSpecCentroid( +1240 data_dict, d_params, auto_process=auto_process +1241 ) +1242 +1243 return mass_spectrum_obj +1244 +1245 def get_mass_spectra_obj(self): +1246 """Instatiate a MassSpectraBase object from the binary data file file. +1247 +1248 Returns +1249 ------- +1250 MassSpectraBase +1251 The MassSpectra object containing the parsed mass spectra. The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe. +1252 """ +1253 _, scan_df = self.run(spectra="none") +1254 mass_spectra_obj = MassSpectraBase( +1255 self.file_location, +1256 self.analyzer, +1257 self.instrument_label, +1258 self.sample_name, +1259 self, +1260 ) +1261 scan_df = scan_df.set_index("scan", drop=False) +1262 mass_spectra_obj.scan_df = scan_df +1263 +1264 return mass_spectra_obj +1265 +1266 def get_lcms_obj(self, spectra="all"): +1267 """Instatiates a LCMSBase object from the mzML file. +1268 +1269 Parameters +1270 ---------- +1271 verbose : bool, optional +1272 If True, print progress messages. Default is True. +1273 spectra : str, optional +1274 Which mass spectra data to include in the output. Default is "all". Other options: "none", "ms1", "ms2". +1275 +1276 Returns +1277 ------- +1278 LCMSBase +1279 LCMS object containing mass spectra data. The object is instatiated with the file location, analyzer, instrument, sample name, scan info, mz dataframe (as specifified), polarity, as well as the attributes holding the scans, retention times, and tics. +1280 """ +1281 _, scan_df = self.run(spectra="none") # first run it to just get scan info +1282 res, scan_df = self.run( +1283 scan_df=scan_df, spectra=spectra +1284 ) # second run to parse data +1285 lcms_obj = LCMSBase( +1286 self.file_location, +1287 self.analyzer, +1288 self.instrument_label, +1289 self.sample_name, +1290 self, +1291 ) +1292 if spectra != "none": +1293 for key in res: +1294 key_int = int(key.replace("ms", "")) +1295 res[key] = res[key][res[key].intensity > 0] +1296 res[key] = ( +1297 res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True) +1298 ) +1299 lcms_obj._ms_unprocessed[key_int] = res[key] +1300 lcms_obj.scan_df = scan_df.set_index("scan", drop=False) +1301 # Check if polarity is mixed +1302 if len(set(scan_df.polarity)) > 1: +1303 raise ValueError("Mixed polarities detected in scan data") +1304 lcms_obj.polarity = scan_df.polarity[0] +1305 lcms_obj._scans_number_list = list(scan_df.scan) +1306 lcms_obj._retention_time_list = list(scan_df.scan_time) +1307 lcms_obj._tic_list = list(scan_df.tic) +1308 +1309 return lcms_obj +1310 +1311 def get_icr_transient_times(self): +1312 """Return a list for transient time targets for all scans, or selected scans range +1313 +1314 Notes +1315 -------- +1316 Resolving Power and Transient time targets based on 7T FT-ICR MS system +1317 """ +1318 +1319 res_trans_time = { +1320 "50": 0.384, +1321 "100000": 0.768, +1322 "200000": 1.536, +1323 "400000": 3.072, +1324 "750000": 6.144, +1325 "1000000": 12.288, +1326 } +1327 +1328 firstScanNumber = self.start_scan +1329 +1330 lastScanNumber = self.end_scan 1331 -1332 firstScanNumber = self.start_scan +1332 transient_time_list = [] 1333 -1334 lastScanNumber = self.end_scan -1335 -1336 transient_time_list = [] -1337 -1338 for scan in range(firstScanNumber, lastScanNumber): -1339 scan_header = self.get_scan_header(scan) +1334 for scan in range(firstScanNumber, lastScanNumber): +1335 scan_header = self.get_scan_header(scan) +1336 +1337 rp_target = scan_header["FT Resolution:"] +1338 +1339 transient_time = res_trans_time.get(rp_target) 1340 -1341 rp_target = scan_header["FT Resolution:"] +1341 transient_time_list.append(transient_time) 1342 -1343 transient_time = res_trans_time.get(rp_target) +1343 # print(transient_time, rp_target) 1344 -1345 transient_time_list.append(transient_time) -1346 -1347 # print(transient_time, rp_target) -1348 -1349 return transient_time_list +1345 return transient_time_list
    @@ -4165,28 +4163,28 @@
    Methods
    -
    929    def __init__(
    -930        self,
    -931        file_location,
    -932        analyzer="Unknown",
    -933        instrument_label="Unknown",
    -934        sample_name=None,
    -935    ):
    -936        super().__init__(file_location)
    -937        if isinstance(file_location, str):
    -938            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    -939            file_location = Path(file_location)
    -940        if not file_location.exists():
    -941            raise FileExistsError("File does not exist: " + str(file_location))
    -942
    -943        self.file_location = file_location
    -944        self.analyzer = analyzer
    -945        self.instrument_label = instrument_label
    -946
    -947        if sample_name:
    -948            self.sample_name = sample_name
    -949        else:
    -950            self.sample_name = file_location.stem
    +            
    935    def __init__(
    +936        self,
    +937        file_location,
    +938        analyzer="Unknown",
    +939        instrument_label="Unknown",
    +940        sample_name=None,
    +941    ):
    +942        super().__init__(file_location)
    +943        if isinstance(file_location, str):
    +944            # if obj is a string it defaults to create a Path obj, pass the S3Path if needed
    +945            file_location = Path(file_location)
    +946        if not file_location.exists():
    +947            raise FileExistsError("File does not exist: " + str(file_location))
    +948
    +949        self.file_location = file_location
    +950        self.analyzer = analyzer
    +951        self.instrument_label = instrument_label
    +952
    +953        if sample_name:
    +954            self.sample_name = sample_name
    +955        else:
    +956            self.sample_name = file_location.stem
     
    @@ -4240,8 +4238,8 @@
    Methods
    -
    952    def load(self):
    -953        pass
    +            
    958    def load(self):
    +959        pass
     
    @@ -4261,47 +4259,41 @@
    Methods
    -
     955    def get_scan_df(self):
    - 956        # This automatically brings in all the data
    - 957        self.chromatogram_settings.scans = (-1, -1)
    - 958
    - 959        # Get scan df info; starting with bulk ms1 and ms2 scans
    - 960        ms1_tic_data, _ = self.get_tic(
    - 961            ms_type="MS", peak_detection=False, smooth=False
    - 962        )
    - 963        ms1_scan_dict = {
    - 964            "scan": ms1_tic_data.scans,
    - 965            "scan_time": ms1_tic_data.time,
    - 966            "tic": ms1_tic_data.tic,
    - 967        }
    - 968        ms1_tic_df = pd.DataFrame.from_dict(ms1_scan_dict)
    - 969        ms1_tic_df["ms_level"] = "ms1"
    - 970
    - 971        ms2_tic_data, _ = self.get_tic(
    - 972            ms_type="MS2", peak_detection=False, smooth=False
    - 973        )
    - 974        ms2_scan_dict = {
    - 975            "scan": ms2_tic_data.scans,
    - 976            "scan_time": ms2_tic_data.time,
    - 977            "tic": ms2_tic_data.tic,
    - 978        }
    - 979        ms2_tic_df = pd.DataFrame.from_dict(ms2_scan_dict)
    - 980        ms2_tic_df["ms_level"] = "ms2"
    - 981
    - 982        scan_df = (
    - 983            pd.concat([ms1_tic_df, ms2_tic_df], axis=0)
    - 984            .sort_values(by="scan")
    - 985            .reindex()
    - 986        )
    - 987
    - 988        # get scan text
    - 989        scan_filter_df = pd.DataFrame.from_dict(
    - 990            self.get_all_filters()[0], orient="index"
    - 991        )
    - 992        scan_filter_df.reset_index(inplace=True)
    - 993        scan_filter_df.rename(
    - 994            columns={"index": "scan", 0: "scan_text"}, inplace=True
    - 995        )
    +            
     961    def get_scan_df(self):
    + 962        # This automatically brings in all the data
    + 963        self.chromatogram_settings.scans = (-1, -1)
    + 964
    + 965        # Get scan df info; starting with bulk ms1 and ms2 scans
    + 966        ms1_tic_data, _ = self.get_tic(ms_type="MS", peak_detection=False, smooth=False)
    + 967        ms1_scan_dict = {
    + 968            "scan": ms1_tic_data.scans,
    + 969            "scan_time": ms1_tic_data.time,
    + 970            "tic": ms1_tic_data.tic,
    + 971        }
    + 972        ms1_tic_df = pd.DataFrame.from_dict(ms1_scan_dict)
    + 973        ms1_tic_df["ms_level"] = "ms1"
    + 974
    + 975        ms2_tic_data, _ = self.get_tic(
    + 976            ms_type="MS2", peak_detection=False, smooth=False
    + 977        )
    + 978        ms2_scan_dict = {
    + 979            "scan": ms2_tic_data.scans,
    + 980            "scan_time": ms2_tic_data.time,
    + 981            "tic": ms2_tic_data.tic,
    + 982        }
    + 983        ms2_tic_df = pd.DataFrame.from_dict(ms2_scan_dict)
    + 984        ms2_tic_df["ms_level"] = "ms2"
    + 985
    + 986        scan_df = (
    + 987            pd.concat([ms1_tic_df, ms2_tic_df], axis=0).sort_values(by="scan").reindex()
    + 988        )
    + 989
    + 990        # get scan text
    + 991        scan_filter_df = pd.DataFrame.from_dict(
    + 992            self.get_all_filters()[0], orient="index"
    + 993        )
    + 994        scan_filter_df.reset_index(inplace=True)
    + 995        scan_filter_df.rename(columns={"index": "scan", 0: "scan_text"}, inplace=True)
      996
      997        scan_df = scan_df.merge(scan_filter_df, on="scan", how="left")
      998        scan_df["scan_window_lower"] = scan_df.scan_text.str.extract(
    @@ -4324,7 +4316,7 @@ 
    Methods
    1015 scan_df.loc[scan_df.scan == i, "ms_format"] = "centroid" 1016 else: 1017 scan_df.loc[scan_df.scan == i, "ms_format"] = "profile" -1018 +1018 1019 return scan_df
    @@ -4347,7 +4339,7 @@
    Methods
    1021    def get_ms_raw(self, spectra, scan_df):
     1022        if spectra == "all":
    -1023                scan_df_forspec = scan_df
    +1023            scan_df_forspec = scan_df
     1024        elif spectra == "ms1":
     1025            scan_df_forspec = scan_df[scan_df.ms_level == 1]
     1026        elif spectra == "ms2":
    @@ -4370,90 +4362,86 @@ 
    Methods
    1043 # First pass: get nrows 1044 N = defaultdict(lambda: 0) 1045 for i in scan_df_forspec.scan.to_list(): -1046 level = scan_df_forspec.loc[ -1047 scan_df_forspec.scan == i, "ms_level" -1048 ].values[0] -1049 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) -1050 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1051 i, scanStatistics -1052 ) -1053 abun = list(profileStream.Intensities) -1054 abun = np.array(abun)[np.where(np.array(abun) > 0)[0]] +1046 level = scan_df_forspec.loc[scan_df_forspec.scan == i, "ms_level"].values[0] +1047 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) +1048 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1049 i, scanStatistics +1050 ) +1051 abun = list(profileStream.Intensities) +1052 abun = np.array(abun)[np.where(np.array(abun) > 0)[0]] +1053 +1054 N[level] += len(abun) 1055 -1056 N[level] += len(abun) -1057 -1058 # Second pass: parse -1059 for i in scan_df_forspec.scan.to_list(): -1060 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) -1061 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( -1062 i, scanStatistics -1063 ) -1064 abun = list(profileStream.Intensities) -1065 mz = list(profileStream.Positions) -1066 -1067 # Get index of abun that are > 0 -1068 inx = np.where(np.array(abun) > 0)[0] -1069 mz = np.array(mz)[inx] -1070 mz = np.float32(mz) -1071 abun = np.array(abun)[inx] -1072 abun = np.float32(abun) +1056 # Second pass: parse +1057 for i in scan_df_forspec.scan.to_list(): +1058 scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(i) +1059 profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber( +1060 i, scanStatistics +1061 ) +1062 abun = list(profileStream.Intensities) +1063 mz = list(profileStream.Positions) +1064 +1065 # Get index of abun that are > 0 +1066 inx = np.where(np.array(abun) > 0)[0] +1067 mz = np.array(mz)[inx] +1068 mz = np.float32(mz) +1069 abun = np.array(abun)[inx] +1070 abun = np.float32(abun) +1071 +1072 level = scan_df_forspec.loc[scan_df_forspec.scan == i, "ms_level"].values[0] 1073 -1074 level = scan_df_forspec.loc[ -1075 scan_df_forspec.scan == i, "ms_level" -1076 ].values[0] -1077 -1078 # Number of rows -1079 n = len(mz) +1074 # Number of rows +1075 n = len(mz) +1076 +1077 # No measurements +1078 if n == 0: +1079 continue 1080 -1081 # No measurements -1082 if n == 0: -1083 continue -1084 -1085 # Dimension check -1086 if len(mz) != len(abun): -1087 warnings.warn("m/z and intensity array dimension mismatch") -1088 continue -1089 -1090 # Scan/frame info -1091 id_dict = i +1081 # Dimension check +1082 if len(mz) != len(abun): +1083 warnings.warn("m/z and intensity array dimension mismatch") +1084 continue +1085 +1086 # Scan/frame info +1087 id_dict = i +1088 +1089 # Columns +1090 cols[level] = ["scan", "mz", "intensity"] +1091 m = len(cols[level]) 1092 -1093 # Columns -1094 cols[level] = ["scan", "mz", "intensity"] -1095 m = len(cols[level]) +1093 # Subarray init +1094 arr = np.empty((n, m), dtype=dtype) +1095 inx = 0 1096 -1097 # Subarray init -1098 arr = np.empty((n, m), dtype=dtype) -1099 inx = 0 +1097 # Populate scan/frame info +1098 arr[:, inx] = i +1099 inx += 1 1100 -1101 # Populate scan/frame info -1102 arr[:, inx] = i +1101 # Populate m/z +1102 arr[:, inx] = mz 1103 inx += 1 1104 -1105 # Populate m/z -1106 arr[:, inx] = mz +1105 # Populate intensity +1106 arr[:, inx] = abun 1107 inx += 1 1108 -1109 # Populate intensity -1110 arr[:, inx] = abun -1111 inx += 1 -1112 -1113 # Initialize output container -1114 if level not in res: -1115 res[level] = np.empty((N[level], m), dtype=dtype) -1116 counter[level] = 0 +1109 # Initialize output container +1110 if level not in res: +1111 res[level] = np.empty((N[level], m), dtype=dtype) +1112 counter[level] = 0 +1113 +1114 # Insert subarray +1115 res[level][counter[level] : counter[level] + n, :] = arr +1116 counter[level] += n 1117 -1118 # Insert subarray -1119 res[level][counter[level] : counter[level] + n, :] = arr -1120 counter[level] += n -1121 -1122 # Construct ms1 and ms2 mz dataframes -1123 for level in res.keys(): -1124 res[level] = pd.DataFrame(res[level]) -1125 res[level].columns = cols[level] -1126 # rename keys in res to add 'ms' prefix -1127 res = {f"ms{key}": value for key, value in res.items()} -1128 -1129 return res +1118 # Construct ms1 and ms2 mz dataframes +1119 for level in res.keys(): +1120 res[level] = pd.DataFrame(res[level]) +1121 res[level].columns = cols[level] +1122 # rename keys in res to add 'ms' prefix +1123 res = {f"ms{key}": value for key, value in res.items()} +1124 +1125 return res
    @@ -4473,36 +4461,36 @@
    Methods
    -
    1131    def run(self, spectra="all", scan_df=None):
    -1132        """
    -1133        Extracts mass spectra data from a raw file.
    -1134
    -1135        Parameters
    -1136        ----------
    -1137        spectra : str, optional
    -1138            Which mass spectra data to include in the output. Default is all.  Other options: none, ms1, ms2.
    -1139        scan_df : pandas.DataFrame, optional
    -1140            Scan dataframe.  If not provided, the scan dataframe is created from the mzML file.
    -1141
    -1142        Returns
    -1143        -------
    -1144        tuple
    -1145            A tuple containing two elements:
    -1146            - A dictionary containing mass spectra data, separated by MS level.
    -1147            - A pandas DataFrame containing scan information, including scan number, scan time, TIC, MS level,
    -1148                scan text, scan window lower and upper bounds, polarity, and precursor m/z (if applicable).
    -1149        """
    -1150        # Prepare scan_df
    -1151        if scan_df is None:
    -1152            scan_df = self.get_scan_df()
    -1153
    -1154        # Prepare mass spectra data
    -1155        if spectra != "none":
    -1156            res = self.get_ms_raw(spectra=spectra, scan_df=scan_df)
    -1157        else:
    -1158            res = None
    -1159
    -1160        return res, scan_df
    +            
    1127    def run(self, spectra="all", scan_df=None):
    +1128        """
    +1129        Extracts mass spectra data from a raw file.
    +1130
    +1131        Parameters
    +1132        ----------
    +1133        spectra : str, optional
    +1134            Which mass spectra data to include in the output. Default is all.  Other options: none, ms1, ms2.
    +1135        scan_df : pandas.DataFrame, optional
    +1136            Scan dataframe.  If not provided, the scan dataframe is created from the mzML file.
    +1137
    +1138        Returns
    +1139        -------
    +1140        tuple
    +1141            A tuple containing two elements:
    +1142            - A dictionary containing mass spectra data, separated by MS level.
    +1143            - A pandas DataFrame containing scan information, including scan number, scan time, TIC, MS level,
    +1144                scan text, scan window lower and upper bounds, polarity, and precursor m/z (if applicable).
    +1145        """
    +1146        # Prepare scan_df
    +1147        if scan_df is None:
    +1148            scan_df = self.get_scan_df()
    +1149
    +1150        # Prepare mass spectra data
    +1151        if spectra != "none":
    +1152            res = self.get_ms_raw(spectra=spectra, scan_df=scan_df)
    +1153        else:
    +1154            res = None
    +1155
    +1156        return res, scan_df
     
    @@ -4542,92 +4530,92 @@
    Returns
    -
    1162    def get_mass_spectrum_from_scan(
    -1163        self, scan_number, spectrum_mode, auto_process=True
    -1164    ):
    -1165        """Instatiate a MassSpecBase object from a single scan number from the binary file, currently only supports profile mode.
    -1166
    -1167        Parameters
    -1168        ----------
    -1169        scan_number : int
    -1170            The scan number to extract the mass spectrum from.
    -1171        polarity : int
    -1172            The polarity of the scan.  1 for positive mode, -1 for negative mode.
    -1173        spectrum_mode : str
    -1174            The type of mass spectrum to extract.  Must be 'profile' or 'centroid'.
    -1175        auto_process : bool, optional
    -1176            If True, perform peak picking and noise threshold calculation after creating the mass spectrum object. Default is True.
    -1177
    -1178        Returns
    -1179        -------
    -1180        MassSpecProfile | MassSpecCentroid
    -1181            The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    -1182        """
    -1183
    -1184        if spectrum_mode == "profile":
    -1185            scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan_number)
    -1186            profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
    -1187                scan_number, scanStatistics
    -1188            )
    -1189            abun = list(profileStream.Intensities)
    -1190            mz = list(profileStream.Positions)
    -1191            data_dict = {
    -1192                Labels.mz: mz,
    -1193                Labels.abundance: abun,
    -1194            }
    -1195            d_params = self.set_metadata(
    -1196                firstScanNumber=scan_number,
    -1197                lastScanNumber=scan_number,
    -1198                scans_list=False,
    -1199                label=Labels.thermo_profile,
    -1200            )
    -1201            mass_spectrum_obj = MassSpecProfile(
    -1202                data_dict, d_params, auto_process=auto_process
    -1203            )
    -1204
    -1205        elif spectrum_mode == "centroid":
    -1206            centroid_scan = self.iRawDataPlus.GetCentroidStream(scan_number, False)
    -1207            if centroid_scan.Masses is not None:
    -1208                mz = list(centroid_scan.Masses)
    -1209                abun = list(centroid_scan.Intensities)
    -1210                rp = list(centroid_scan.Resolutions)
    -1211                magnitude = list(centroid_scan.Intensities)
    -1212                noise = list(centroid_scan.Noises)
    -1213                baselines = list(centroid_scan.Baselines)
    -1214                array_noise_std = (np.array(noise) - np.array(baselines)) / 3
    -1215                l_signal_to_noise = np.array(magnitude) / array_noise_std
    -1216                data_dict = {
    -1217                    Labels.mz: mz,
    -1218                    Labels.abundance: abun,
    -1219                    Labels.rp: rp,
    -1220                    Labels.s2n: list(l_signal_to_noise),
    -1221                }
    -1222            else:  # For CID MS2, the centroid data are stored in the profile data location, they do not have any associated rp or baseline data, but they should be treated as centroid data
    -1223                scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
    -1224                    scan_number
    -1225                )
    -1226                profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
    -1227                    scan_number, scanStatistics
    -1228                )
    -1229                abun = list(profileStream.Intensities)
    -1230                mz = list(profileStream.Positions)
    -1231                data_dict = {
    -1232                    Labels.mz: mz,
    -1233                    Labels.abundance: abun,
    -1234                    Labels.rp: [np.nan] * len(mz),
    -1235                    Labels.s2n: [np.nan] * len(mz),
    -1236                }
    -1237            d_params = self.set_metadata(
    -1238                firstScanNumber=scan_number,
    -1239                lastScanNumber=scan_number,
    -1240                scans_list=False,
    -1241                label=Labels.thermo_centroid,
    -1242            )
    -1243            mass_spectrum_obj = MassSpecCentroid(
    -1244                data_dict, d_params, auto_process=auto_process
    -1245            )
    -1246
    -1247        return mass_spectrum_obj
    +            
    1158    def get_mass_spectrum_from_scan(
    +1159        self, scan_number, spectrum_mode, auto_process=True
    +1160    ):
    +1161        """Instatiate a MassSpecBase object from a single scan number from the binary file, currently only supports profile mode.
    +1162
    +1163        Parameters
    +1164        ----------
    +1165        scan_number : int
    +1166            The scan number to extract the mass spectrum from.
    +1167        polarity : int
    +1168            The polarity of the scan.  1 for positive mode, -1 for negative mode.
    +1169        spectrum_mode : str
    +1170            The type of mass spectrum to extract.  Must be 'profile' or 'centroid'.
    +1171        auto_process : bool, optional
    +1172            If True, perform peak picking and noise threshold calculation after creating the mass spectrum object. Default is True.
    +1173
    +1174        Returns
    +1175        -------
    +1176        MassSpecProfile | MassSpecCentroid
    +1177            The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    +1178        """
    +1179
    +1180        if spectrum_mode == "profile":
    +1181            scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(scan_number)
    +1182            profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
    +1183                scan_number, scanStatistics
    +1184            )
    +1185            abun = list(profileStream.Intensities)
    +1186            mz = list(profileStream.Positions)
    +1187            data_dict = {
    +1188                Labels.mz: mz,
    +1189                Labels.abundance: abun,
    +1190            }
    +1191            d_params = self.set_metadata(
    +1192                firstScanNumber=scan_number,
    +1193                lastScanNumber=scan_number,
    +1194                scans_list=False,
    +1195                label=Labels.thermo_profile,
    +1196            )
    +1197            mass_spectrum_obj = MassSpecProfile(
    +1198                data_dict, d_params, auto_process=auto_process
    +1199            )
    +1200
    +1201        elif spectrum_mode == "centroid":
    +1202            centroid_scan = self.iRawDataPlus.GetCentroidStream(scan_number, False)
    +1203            if centroid_scan.Masses is not None:
    +1204                mz = list(centroid_scan.Masses)
    +1205                abun = list(centroid_scan.Intensities)
    +1206                rp = list(centroid_scan.Resolutions)
    +1207                magnitude = list(centroid_scan.Intensities)
    +1208                noise = list(centroid_scan.Noises)
    +1209                baselines = list(centroid_scan.Baselines)
    +1210                array_noise_std = (np.array(noise) - np.array(baselines)) / 3
    +1211                l_signal_to_noise = np.array(magnitude) / array_noise_std
    +1212                data_dict = {
    +1213                    Labels.mz: mz,
    +1214                    Labels.abundance: abun,
    +1215                    Labels.rp: rp,
    +1216                    Labels.s2n: list(l_signal_to_noise),
    +1217                }
    +1218            else:  # For CID MS2, the centroid data are stored in the profile data location, they do not have any associated rp or baseline data, but they should be treated as centroid data
    +1219                scanStatistics = self.iRawDataPlus.GetScanStatsForScanNumber(
    +1220                    scan_number
    +1221                )
    +1222                profileStream = self.iRawDataPlus.GetSegmentedScanFromScanNumber(
    +1223                    scan_number, scanStatistics
    +1224                )
    +1225                abun = list(profileStream.Intensities)
    +1226                mz = list(profileStream.Positions)
    +1227                data_dict = {
    +1228                    Labels.mz: mz,
    +1229                    Labels.abundance: abun,
    +1230                    Labels.rp: [np.nan] * len(mz),
    +1231                    Labels.s2n: [np.nan] * len(mz),
    +1232                }
    +1233            d_params = self.set_metadata(
    +1234                firstScanNumber=scan_number,
    +1235                lastScanNumber=scan_number,
    +1236                scans_list=False,
    +1237                label=Labels.thermo_centroid,
    +1238            )
    +1239            mass_spectrum_obj = MassSpecCentroid(
    +1240                data_dict, d_params, auto_process=auto_process
    +1241            )
    +1242
    +1243        return mass_spectrum_obj
     
    @@ -4666,26 +4654,26 @@
    Returns
    -
    1249    def get_mass_spectra_obj(self):
    -1250        """Instatiate a MassSpectraBase object from the binary data file file.
    -1251
    -1252        Returns
    -1253        -------
    -1254        MassSpectraBase
    -1255            The MassSpectra object containing the parsed mass spectra.  The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    -1256        """
    -1257        _, scan_df = self.run(spectra="none")
    -1258        mass_spectra_obj = MassSpectraBase(
    -1259            self.file_location,
    -1260            self.analyzer,
    -1261            self.instrument_label,
    -1262            self.sample_name,
    -1263            self,
    -1264        )
    -1265        scan_df = scan_df.set_index("scan", drop=False)
    -1266        mass_spectra_obj.scan_df = scan_df
    -1267
    -1268        return mass_spectra_obj
    +            
    1245    def get_mass_spectra_obj(self):
    +1246        """Instatiate a MassSpectraBase object from the binary data file file.
    +1247
    +1248        Returns
    +1249        -------
    +1250        MassSpectraBase
    +1251            The MassSpectra object containing the parsed mass spectra.  The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    +1252        """
    +1253        _, scan_df = self.run(spectra="none")
    +1254        mass_spectra_obj = MassSpectraBase(
    +1255            self.file_location,
    +1256            self.analyzer,
    +1257            self.instrument_label,
    +1258            self.sample_name,
    +1259            self,
    +1260        )
    +1261        scan_df = scan_df.set_index("scan", drop=False)
    +1262        mass_spectra_obj.scan_df = scan_df
    +1263
    +1264        return mass_spectra_obj
     
    @@ -4711,50 +4699,50 @@
    Returns
    -
    1270    def get_lcms_obj(self, spectra="all"):
    -1271        """Instatiates a LCMSBase object from the mzML file.
    -1272
    -1273        Parameters
    -1274        ----------
    -1275        verbose : bool, optional
    -1276            If True, print progress messages. Default is True.
    -1277        spectra : str, optional
    -1278            Which mass spectra data to include in the output. Default is "all".  Other options: "none", "ms1", "ms2".
    -1279
    -1280        Returns
    -1281        -------
    -1282        LCMSBase
    -1283            LCMS object containing mass spectra data. The object is instatiated with the file location, analyzer, instrument, sample name, scan info, mz dataframe (as specifified), polarity, as well as the attributes holding the scans, retention times, and tics.
    -1284        """
    -1285        _, scan_df = self.run(spectra="none")  # first run it to just get scan info
    -1286        res, scan_df = self.run(
    -1287            scan_df=scan_df, spectra=spectra
    -1288        )  # second run to parse data
    -1289        lcms_obj = LCMSBase(
    -1290            self.file_location,
    -1291            self.analyzer,
    -1292            self.instrument_label,
    -1293            self.sample_name,
    -1294            self,
    -1295        )
    -1296        if spectra != "none":
    -1297            for key in res:
    -1298                key_int = int(key.replace("ms", ""))
    -1299                res[key] = res[key][res[key].intensity > 0]
    -1300                res[key] = (
    -1301                    res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True)
    -1302                )
    -1303                lcms_obj._ms_unprocessed[key_int] = res[key]
    -1304        lcms_obj.scan_df = scan_df.set_index("scan", drop=False)
    -1305        # Check if polarity is mixed
    -1306        if len(set(scan_df.polarity)) > 1:
    -1307            raise ValueError("Mixed polarities detected in scan data")
    -1308        lcms_obj.polarity = scan_df.polarity[0]
    -1309        lcms_obj._scans_number_list = list(scan_df.scan)
    -1310        lcms_obj._retention_time_list = list(scan_df.scan_time)
    -1311        lcms_obj._tic_list = list(scan_df.tic)
    -1312
    -1313        return lcms_obj
    +            
    1266    def get_lcms_obj(self, spectra="all"):
    +1267        """Instatiates a LCMSBase object from the mzML file.
    +1268
    +1269        Parameters
    +1270        ----------
    +1271        verbose : bool, optional
    +1272            If True, print progress messages. Default is True.
    +1273        spectra : str, optional
    +1274            Which mass spectra data to include in the output. Default is "all".  Other options: "none", "ms1", "ms2".
    +1275
    +1276        Returns
    +1277        -------
    +1278        LCMSBase
    +1279            LCMS object containing mass spectra data. The object is instatiated with the file location, analyzer, instrument, sample name, scan info, mz dataframe (as specifified), polarity, as well as the attributes holding the scans, retention times, and tics.
    +1280        """
    +1281        _, scan_df = self.run(spectra="none")  # first run it to just get scan info
    +1282        res, scan_df = self.run(
    +1283            scan_df=scan_df, spectra=spectra
    +1284        )  # second run to parse data
    +1285        lcms_obj = LCMSBase(
    +1286            self.file_location,
    +1287            self.analyzer,
    +1288            self.instrument_label,
    +1289            self.sample_name,
    +1290            self,
    +1291        )
    +1292        if spectra != "none":
    +1293            for key in res:
    +1294                key_int = int(key.replace("ms", ""))
    +1295                res[key] = res[key][res[key].intensity > 0]
    +1296                res[key] = (
    +1297                    res[key].sort_values(by=["scan", "mz"]).reset_index(drop=True)
    +1298                )
    +1299                lcms_obj._ms_unprocessed[key_int] = res[key]
    +1300        lcms_obj.scan_df = scan_df.set_index("scan", drop=False)
    +1301        # Check if polarity is mixed
    +1302        if len(set(scan_df.polarity)) > 1:
    +1303            raise ValueError("Mixed polarities detected in scan data")
    +1304        lcms_obj.polarity = scan_df.polarity[0]
    +1305        lcms_obj._scans_number_list = list(scan_df.scan)
    +1306        lcms_obj._retention_time_list = list(scan_df.scan_time)
    +1307        lcms_obj._tic_list = list(scan_df.tic)
    +1308
    +1309        return lcms_obj
     
    @@ -4789,41 +4777,41 @@
    Returns
    -
    1315    def get_icr_transient_times(self):
    -1316        """Return a list for transient time targets for all scans, or selected scans range
    -1317
    -1318        Notes
    -1319        --------
    -1320        Resolving Power and Transient time targets based on 7T FT-ICR MS system
    -1321        """
    -1322
    -1323        res_trans_time = {
    -1324            "50": 0.384,
    -1325            "100000": 0.768,
    -1326            "200000": 1.536,
    -1327            "400000": 3.072,
    -1328            "750000": 6.144,
    -1329            "1000000": 12.288,
    -1330        }
    +            
    1311    def get_icr_transient_times(self):
    +1312        """Return a list for transient time targets for all scans, or selected scans range
    +1313
    +1314        Notes
    +1315        --------
    +1316        Resolving Power and Transient time targets based on 7T FT-ICR MS system
    +1317        """
    +1318
    +1319        res_trans_time = {
    +1320            "50": 0.384,
    +1321            "100000": 0.768,
    +1322            "200000": 1.536,
    +1323            "400000": 3.072,
    +1324            "750000": 6.144,
    +1325            "1000000": 12.288,
    +1326        }
    +1327
    +1328        firstScanNumber = self.start_scan
    +1329
    +1330        lastScanNumber = self.end_scan
     1331
    -1332        firstScanNumber = self.start_scan
    +1332        transient_time_list = []
     1333
    -1334        lastScanNumber = self.end_scan
    -1335
    -1336        transient_time_list = []
    -1337
    -1338        for scan in range(firstScanNumber, lastScanNumber):
    -1339            scan_header = self.get_scan_header(scan)
    +1334        for scan in range(firstScanNumber, lastScanNumber):
    +1335            scan_header = self.get_scan_header(scan)
    +1336
    +1337            rp_target = scan_header["FT Resolution:"]
    +1338
    +1339            transient_time = res_trans_time.get(rp_target)
     1340
    -1341            rp_target = scan_header["FT Resolution:"]
    +1341            transient_time_list.append(transient_time)
     1342
    -1343            transient_time = res_trans_time.get(rp_target)
    +1343            # print(transient_time, rp_target)
     1344
    -1345            transient_time_list.append(transient_time)
    -1346
    -1347            # print(transient_time, rp_target)
    -1348
    -1349        return transient_time_list
    +1345        return transient_time_list
     
    diff --git a/docs/corems/mass_spectra/output/export.html b/docs/corems/mass_spectra/output/export.html index e0f7d3df..b368abc0 100644 --- a/docs/corems/mass_spectra/output/export.html +++ b/docs/corems/mass_spectra/output/export.html @@ -219,1758 +219,1767 @@

    22 dump_lcms_settings_json, 23 dump_lcms_settings_toml, 24) - 25from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecfromFreq - 26from corems.mass_spectrum.output.export import HighResMassSpecExport - 27from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula - 28from corems.molecular_id.calc.SpectralSimilarity import methods_name - 29 - 30ion_type_dict = { - 31 # adduct : [atoms to add, atoms to subtract when calculating formula of ion - 32 "M+": [{}, {}], - 33 "protonated": [{"H": 1}, {}], - 34 "[M+H]+": [{"H": 1}, {}], - 35 "[M+NH4]+": [{"N": 1, "H": 4}, {}], # ammonium - 36 "[M+Na]+": [{"Na": 1}, {}], - 37 "[M+K]+": [{"K": 1}, {}], - 38 "[M+2Na+Cl]+": [{"Na": 2, "Cl": 1}, {}], - 39 "[M+2Na-H]+": [{"Na": 2}, {"H": 1}], - 40 "[M+C2H3Na2O2]+": [{"C": 2, "H": 3, "Na": 2, "O": 2}, {}], - 41 "[M+C4H10N3]+": [{"C": 4, "H": 10, "N": 3}, {}], - 42 "[M+NH4+ACN]+": [{"C": 2, "H": 7, "N": 2}, {}], - 43 "[M+H-H2O]+": [{}, {"H": 1, "O": 1}], - 44 "de-protonated": [{}, {"H": 1}], - 45 "[M-H]-": [{}, {"H": 1}], - 46 "[M+Cl]-": [{"Cl": 1}, {}], - 47 "[M+HCOO]-": [{"C": 1, "H": 1, "O": 2}, {}], # formate - 48 "[M+CH3COO]-": [{"C": 2, "H": 3, "O": 2}, {}], # acetate - 49 "[M+2NaAc+Cl]-": [{"Na": 2, "C": 2, "H": 3, "O": 2, "Cl": 1}, {}], - 50 "[M+K-2H]-": [{"K": 1}, {"H": 2}], - 51 "[M+Na-2H]-": [{"Na": 1}, {"H": 2}], - 52} + 25from corems.mass_spectrum.output.export import HighResMassSpecExport + 26from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula + 27from corems.molecular_id.calc.SpectralSimilarity import methods_name + 28 + 29ion_type_dict = { + 30 # adduct : [atoms to add, atoms to subtract when calculating formula of ion + 31 "M+": [{}, {}], + 32 "protonated": [{"H": 1}, {}], + 33 "[M+H]+": [{"H": 1}, {}], + 34 "[M+NH4]+": [{"N": 1, "H": 4}, {}], # ammonium + 35 "[M+Na]+": [{"Na": 1}, {}], + 36 "[M+K]+": [{"K": 1}, {}], + 37 "[M+2Na+Cl]+": [{"Na": 2, "Cl": 1}, {}], + 38 "[M+2Na-H]+": [{"Na": 2}, {"H": 1}], + 39 "[M+C2H3Na2O2]+": [{"C": 2, "H": 3, "Na": 2, "O": 2}, {}], + 40 "[M+C4H10N3]+": [{"C": 4, "H": 10, "N": 3}, {}], + 41 "[M+NH4+ACN]+": [{"C": 2, "H": 7, "N": 2}, {}], + 42 "[M+H-H2O]+": [{}, {"H": 1, "O": 1}], + 43 "de-protonated": [{}, {"H": 1}], + 44 "[M-H]-": [{}, {"H": 1}], + 45 "[M+Cl]-": [{"Cl": 1}, {}], + 46 "[M+HCOO]-": [{"C": 1, "H": 1, "O": 2}, {}], # formate + 47 "[M+CH3COO]-": [{"C": 2, "H": 3, "O": 2}, {}], # acetate + 48 "[M+2NaAc+Cl]-": [{"Na": 2, "C": 2, "H": 3, "O": 2, "Cl": 1}, {}], + 49 "[M+K-2H]-": [{"K": 1}, {"H": 2}], + 50 "[M+Na-2H]-": [{"Na": 1}, {"H": 2}], + 51} + 52 53 - 54 - 55class LowResGCMSExport: - 56 """A class to export low resolution GC-MS data. - 57 - 58 This class provides methods to export low resolution GC-MS data to various formats such as Excel, CSV, HDF5, and Pandas DataFrame. - 59 - 60 Parameters: - 61 ---------- - 62 out_file_path : str - 63 The output file path. - 64 gcms : object - 65 The low resolution GCMS object. - 66 - 67 Attributes: - 68 ---------- - 69 output_file : Path - 70 The output file path as a Path object. - 71 gcms : object - 72 The low resolution GCMS object. - 73 - 74 Methods: - 75 ------- - 76 * get_pandas_df(id_label="corems:"). Get the exported data as a Pandas DataFrame. - 77 * get_json(nan=False, id_label="corems:"). Get the exported data as a JSON string. - 78 * to_pandas(write_metadata=True, id_label="corems:"). Export the data to a Pandas DataFrame and save it as a pickle file. - 79 * to_excel(write_mode='a', write_metadata=True, id_label="corems:"), - 80 Export the data to an Excel file. - 81 * to_csv(separate_output=False, write_mode="w", write_metadata=True, id_label="corems:"). - 82 Export the data to a CSV file. - 83 * to_hdf(id_label="corems:"). - 84 Export the data to an HDF5 file. - 85 * get_data_stats(gcms). - 86 Get statistics about the GCMS data. - 87 - 88 """ - 89 - 90 def __init__(self, out_file_path, gcms): - 91 self.output_file = Path(out_file_path) - 92 - 93 self.gcms = gcms - 94 - 95 self._init_columns() - 96 - 97 def _init_columns(self): - 98 """Initialize the column names for the exported data. - 99 - 100 Returns: - 101 ------- - 102 list - 103 The list of column names. - 104 """ - 105 - 106 columns = [ - 107 "Sample name", - 108 "Peak Index", - 109 "Retention Time", - 110 "Retention Time Ref", - 111 "Peak Height", - 112 "Peak Area", - 113 "Retention index", - 114 "Retention index Ref", - 115 "Retention Index Score", - 116 "Similarity Score", - 117 "Spectral Similarity Score", - 118 "Compound Name", - 119 "Chebi ID", - 120 "Kegg Compound ID", - 121 "Inchi", - 122 "Inchi Key", - 123 "Smiles", - 124 "Molecular Formula", - 125 "IUPAC Name", - 126 "Traditional Name", - 127 "Common Name", - 128 "Derivatization", - 129 ] - 130 - 131 if self.gcms.molecular_search_settings.exploratory_mode: - 132 columns.extend( - 133 [ - 134 "Weighted Cosine Correlation", - 135 "Cosine Correlation", - 136 "Stein Scott Similarity", - 137 "Pearson Correlation", - 138 "Spearman Correlation", - 139 "Kendall Tau Correlation", - 140 "Euclidean Distance", - 141 "Manhattan Distance", - 142 "Jaccard Distance", - 143 "DWT Correlation", - 144 "DFT Correlation", - 145 ] - 146 ) - 147 - 148 columns.extend(list(methods_name.values())) - 149 - 150 return columns - 151 - 152 def get_pandas_df(self, id_label="corems:"): - 153 """Get the exported data as a Pandas DataFrame. - 154 - 155 Parameters: - 156 ---------- - 157 id_label : str, optional - 158 The ID label for the data. Default is "corems:". - 159 - 160 Returns: - 161 ------- - 162 DataFrame - 163 The exported data as a Pandas DataFrame. - 164 """ - 165 - 166 columns = self._init_columns() - 167 - 168 dict_data_list = self.get_list_dict_data(self.gcms) - 169 - 170 df = DataFrame(dict_data_list, columns=columns) - 171 - 172 df.name = self.gcms.sample_name - 173 - 174 return df - 175 - 176 def get_json(self, nan=False, id_label="corems:"): - 177 """Get the exported data as a JSON string. - 178 - 179 Parameters: - 180 ---------- - 181 nan : bool, optional - 182 Whether to include NaN values in the JSON string. Default is False. - 183 id_label : str, optional - 184 The ID label for the data. Default is "corems:". - 185 - 186 """ - 187 - 188 import json - 189 - 190 dict_data_list = self.get_list_dict_data(self.gcms) - 191 - 192 return json.dumps( - 193 dict_data_list, sort_keys=False, indent=4, separators=(",", ": ") - 194 ) - 195 - 196 def to_pandas(self, write_metadata=True, id_label="corems:"): - 197 """Export the data to a Pandas DataFrame and save it as a pickle file. - 198 - 199 Parameters: - 200 ---------- - 201 write_metadata : bool, optional - 202 Whether to write metadata to the output file. - 203 id_label : str, optional - 204 The ID label for the data. - 205 """ - 206 - 207 columns = self._init_columns() - 208 - 209 dict_data_list = self.get_list_dict_data(self.gcms) - 210 - 211 df = DataFrame(dict_data_list, columns=columns) - 212 - 213 df.to_pickle(self.output_file.with_suffix(".pkl")) - 214 - 215 if write_metadata: - 216 self.write_settings( - 217 self.output_file.with_suffix(".pkl"), self.gcms, id_label="corems:" - 218 ) - 219 - 220 def to_excel(self, write_mode="a", write_metadata=True, id_label="corems:"): - 221 """Export the data to an Excel file. - 222 - 223 Parameters: - 224 ---------- - 225 write_mode : str, optional - 226 The write mode for the Excel file. Default is 'a' (append). - 227 write_metadata : bool, optional - 228 Whether to write metadata to the output file. Default is True. - 229 id_label : str, optional - 230 The ID label for the data. Default is "corems:". - 231 """ - 232 - 233 out_put_path = self.output_file.with_suffix(".xlsx") - 234 - 235 columns = self._init_columns() - 236 - 237 dict_data_list = self.get_list_dict_data(self.gcms) - 238 - 239 df = DataFrame(dict_data_list, columns=columns) - 240 - 241 if write_mode == "a" and out_put_path.exists(): - 242 writer = ExcelWriter(out_put_path, engine="openpyxl") - 243 # try to open an existing workbook - 244 writer.book = load_workbook(out_put_path) - 245 # copy existing sheets - 246 writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets) - 247 # read existing file - 248 reader = read_excel(out_put_path) - 249 # write out the new sheet - 250 df.to_excel(writer, index=False, header=False, startrow=len(reader) + 1) - 251 - 252 writer.close() - 253 else: - 254 df.to_excel( - 255 self.output_file.with_suffix(".xlsx"), index=False, engine="openpyxl" - 256 ) - 257 - 258 if write_metadata: - 259 self.write_settings(out_put_path, self.gcms, id_label=id_label) - 260 - 261 def to_csv( - 262 self, - 263 separate_output=False, - 264 write_mode="w", - 265 write_metadata=True, - 266 id_label="corems:", - 267 ): - 268 """Export the data to a CSV file. - 269 - 270 Parameters: - 271 ---------- - 272 separate_output : bool, optional - 273 Whether to separate the output into multiple files. Default is False. - 274 write_mode : str, optional - 275 The write mode for the CSV file. Default is 'w' (write). - 276 write_metadata : bool, optional - 277 Whether to write metadata to the output file. Default is True. - 278 id_label : str, optional - 279 The ID label for the data. Default is "corems:". - 280 """ - 281 - 282 if separate_output: - 283 # set write mode to write - 284 # this mode will overwrite the file without warning - 285 write_mode = "w" - 286 else: - 287 # set write mode to append - 288 write_mode = "a" - 289 - 290 columns = self._init_columns() - 291 - 292 dict_data_list = self.get_list_dict_data(self.gcms) - 293 - 294 out_put_path = self.output_file.with_suffix(".csv") - 295 - 296 write_header = not out_put_path.exists() - 297 - 298 try: - 299 with open(out_put_path, write_mode, newline="") as csvfile: - 300 writer = csv.DictWriter(csvfile, fieldnames=columns) - 301 if write_header: - 302 writer.writeheader() - 303 for data in dict_data_list: - 304 writer.writerow(data) - 305 - 306 if write_metadata: - 307 self.write_settings(out_put_path, self.gcms, id_label=id_label) - 308 - 309 except IOError as ioerror: - 310 print(ioerror) - 311 - 312 def to_hdf(self, id_label="corems:"): - 313 """Export the data to an HDF5 file. - 314 - 315 Parameters: - 316 ---------- - 317 id_label : str, optional - 318 The ID label for the data. Default is "corems:". - 319 """ - 320 - 321 # save sample at a time - 322 def add_compound(gc_peak, compound_obj): - 323 modifier = compound_obj.classify if compound_obj.classify else "" - 324 compound_group = compound_obj.name.replace("/", "") + " " + modifier - 325 - 326 if compound_group not in peak_group: - 327 compound_group = peak_group.create_group(compound_group) - 328 - 329 # compound_group.attrs["retention_time"] = compound_obj.retention_time - 330 compound_group.attrs["retention_index"] = compound_obj.ri - 331 compound_group.attrs["retention_index_score"] = compound_obj.ri_score - 332 compound_group.attrs["spectral_similarity_score"] = ( - 333 compound_obj.spectral_similarity_score - 334 ) - 335 compound_group.attrs["similarity_score"] = compound_obj.similarity_score - 336 - 337 compond_mz = compound_group.create_dataset( - 338 "mz", data=np.array(compound_obj.mz), dtype="f8" - 339 ) - 340 compond_abundance = compound_group.create_dataset( - 341 "abundance", data=np.array(compound_obj.abundance), dtype="f8" - 342 ) - 343 - 344 if self.gcms.molecular_search_settings.exploratory_mode: - 345 compound_group.attrs["Spectral Similarities"] = json.dumps( - 346 compound_obj.spectral_similarity_scores, - 347 sort_keys=False, - 348 indent=4, - 349 separators=(",", ":"), - 350 ) - 351 else: - 352 warnings.warn("Skipping duplicate reference compound.") - 353 - 354 import json - 355 from datetime import datetime, timezone - 356 - 357 import h5py - 358 import numpy as np - 359 - 360 output_path = self.output_file.with_suffix(".hdf5") - 361 - 362 with h5py.File(output_path, "w") as hdf_handle: - 363 timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")) - 364 hdf_handle.attrs["time_stamp"] = timenow - 365 hdf_handle.attrs["data_structure"] = "gcms" - 366 hdf_handle.attrs["analyzer"] = self.gcms.analyzer - 367 hdf_handle.attrs["instrument_label"] = self.gcms.instrument_label - 368 - 369 hdf_handle.attrs["sample_id"] = "self.gcms.id" - 370 hdf_handle.attrs["sample_name"] = self.gcms.sample_name - 371 hdf_handle.attrs["input_data"] = str(self.gcms.file_location) - 372 hdf_handle.attrs["output_data"] = str(output_path) - 373 hdf_handle.attrs["output_data_id"] = id_label + uuid.uuid4().hex - 374 hdf_handle.attrs["corems_version"] = __version__ - 375 - 376 hdf_handle.attrs["Stats"] = json.dumps( - 377 self.get_data_stats(self.gcms), - 378 sort_keys=False, - 379 indent=4, - 380 separators=(",", ": "), - 381 ) - 382 hdf_handle.attrs["Calibration"] = json.dumps( - 383 self.get_calibration_stats(self.gcms, id_label), - 384 sort_keys=False, - 385 indent=4, - 386 separators=(",", ": "), - 387 ) - 388 hdf_handle.attrs["Blank"] = json.dumps( - 389 self.get_blank_stats(self.gcms), - 390 sort_keys=False, - 391 indent=4, - 392 separators=(",", ": "), - 393 ) - 394 - 395 corems_dict_setting = parameter_to_dict.get_dict_data_gcms(self.gcms) - 396 hdf_handle.attrs["CoreMSParameters"] = json.dumps( - 397 corems_dict_setting, sort_keys=False, indent=4, separators=(",", ": ") - 398 ) - 399 - 400 scans_dataset = hdf_handle.create_dataset( - 401 "scans", data=np.array(self.gcms.scans_number), dtype="f8" - 402 ) - 403 rt_dataset = hdf_handle.create_dataset( - 404 "rt", data=np.array(self.gcms.retention_time), dtype="f8" - 405 ) - 406 tic_dataset = hdf_handle.create_dataset( - 407 "tic", data=np.array(self.gcms.tic), dtype="f8" - 408 ) - 409 processed_tic_dataset = hdf_handle.create_dataset( - 410 "processed_tic", data=np.array(self.gcms.processed_tic), dtype="f8" - 411 ) - 412 - 413 output_score_method = ( - 414 self.gcms.molecular_search_settings.output_score_method - 415 ) - 416 - 417 for gc_peak in self.gcms: - 418 # print(gc_peak.retention_time) - 419 # print(gc_peak.tic) - 420 - 421 # check if there is a compound candidate - 422 peak_group = hdf_handle.create_group(str(gc_peak.retention_time)) - 423 peak_group.attrs["deconvolution"] = int( - 424 self.gcms.chromatogram_settings.use_deconvolution - 425 ) - 426 - 427 peak_group.attrs["start_scan"] = gc_peak.start_scan - 428 peak_group.attrs["apex_scan"] = gc_peak.apex_scan - 429 peak_group.attrs["final_scan"] = gc_peak.final_scan - 430 - 431 peak_group.attrs["retention_index"] = gc_peak.ri - 432 peak_group.attrs["retention_time"] = gc_peak.retention_time - 433 peak_group.attrs["area"] = gc_peak.area - 434 - 435 mz = peak_group.create_dataset( - 436 "mz", data=np.array(gc_peak.mass_spectrum.mz_exp), dtype="f8" - 437 ) - 438 abundance = peak_group.create_dataset( - 439 "abundance", - 440 data=np.array(gc_peak.mass_spectrum.abundance), - 441 dtype="f8", - 442 ) - 443 - 444 if gc_peak: - 445 if output_score_method == "highest_sim_score": - 446 compound_obj = gc_peak.highest_score_compound - 447 add_compound(gc_peak, compound_obj) - 448 - 449 elif output_score_method == "highest_ss": - 450 compound_obj = gc_peak.highest_ss_compound - 451 add_compound(gc_peak, compound_obj) - 452 - 453 else: - 454 for compound_obj in gc_peak: - 455 add_compound(gc_peak, compound_obj) - 456 - 457 def get_data_stats(self, gcms): - 458 """Get statistics about the GCMS data. - 459 - 460 Parameters: - 461 ---------- - 462 gcms : object - 463 The low resolution GCMS object. - 464 - 465 Returns: - 466 ------- - 467 dict - 468 A dictionary containing the data statistics. - 469 """ - 470 - 471 matched_peaks = gcms.matched_peaks - 472 no_matched_peaks = gcms.no_matched_peaks - 473 unique_metabolites = gcms.unique_metabolites - 474 - 475 peak_matchs_above_0p85 = 0 - 476 unique_peak_match_above_0p85 = 0 - 477 for match_peak in matched_peaks: - 478 gc_peak_above_85 = 0 - 479 matches_above_85 = list( - 480 filter(lambda m: m.similarity_score >= 0.85, match_peak) - 481 ) - 482 if matches_above_85: - 483 peak_matchs_above_0p85 += 1 - 484 if len(matches_above_85) == 1: - 485 unique_peak_match_above_0p85 += 1 - 486 - 487 data_stats = {} - 488 data_stats["average_signal_noise"] = "ni" - 489 data_stats["chromatogram_dynamic_range"] = gcms.dynamic_range - 490 data_stats["total_number_peaks"] = len(gcms) - 491 data_stats["total_peaks_matched"] = len(matched_peaks) - 492 data_stats["total_peaks_without_matches"] = len(no_matched_peaks) - 493 data_stats["total_matches_above_similarity_score_0.85"] = peak_matchs_above_0p85 - 494 data_stats["single_matches_above_similarity_score_0.85"] = ( - 495 unique_peak_match_above_0p85 - 496 ) - 497 data_stats["unique_metabolites"] = len(unique_metabolites) - 498 - 499 return data_stats - 500 - 501 def get_calibration_stats(self, gcms, id_label): - 502 """Get statistics about the GC-MS calibration. - 503 - 504 Parameters: - 505 ---------- - 506 """ - 507 calibration_parameters = {} - 508 - 509 calibration_parameters["calibration_rt_ri_pairs_ref"] = gcms.ri_pairs_ref - 510 calibration_parameters["data_url"] = str(gcms.cal_file_path) - 511 calibration_parameters["has_input"] = id_label + corems_md5(gcms.cal_file_path) - 512 calibration_parameters["data_name"] = str(gcms.cal_file_path.stem) - 513 calibration_parameters["calibration_method"] = "" - 514 - 515 return calibration_parameters - 516 - 517 def get_blank_stats(self, gcms): - 518 """Get statistics about the GC-MS blank.""" - 519 blank_parameters = {} - 520 - 521 blank_parameters["data_name"] = "ni" - 522 blank_parameters["blank_id"] = "ni" - 523 blank_parameters["data_url"] = "ni" - 524 blank_parameters["has_input"] = "ni" - 525 blank_parameters["common_features_to_blank"] = "ni" - 526 - 527 return blank_parameters - 528 - 529 def get_instrument_metadata(self, gcms): - 530 """Get metadata about the GC-MS instrument.""" - 531 instrument_metadata = {} - 532 - 533 instrument_metadata["analyzer"] = gcms.analyzer - 534 instrument_metadata["instrument_label"] = gcms.instrument_label - 535 instrument_metadata["instrument_id"] = uuid.uuid4().hex - 536 - 537 return instrument_metadata - 538 - 539 def get_data_metadata(self, gcms, id_label, output_path): - 540 """Get metadata about the GC-MS data. - 541 - 542 Parameters: - 543 ---------- - 544 gcms : object - 545 The low resolution GCMS object. - 546 id_label : str - 547 The ID label for the data. - 548 output_path : str - 549 The output file path. - 550 - 551 Returns: - 552 ------- - 553 dict - 554 A dictionary containing the data metadata. - 555 """ - 556 if isinstance(output_path, str): - 557 output_path = Path(output_path) - 558 - 559 paramaters_path = output_path.with_suffix(".json") - 560 - 561 if paramaters_path.exists(): - 562 with paramaters_path.open() as current_param: - 563 metadata = json.load(current_param) - 564 data_metadata = metadata.get("Data") - 565 else: - 566 data_metadata = {} - 567 data_metadata["data_name"] = [] - 568 data_metadata["input_data_url"] = [] - 569 data_metadata["has_input"] = [] - 570 - 571 data_metadata["data_name"].append(gcms.sample_name) - 572 data_metadata["input_data_url"].append(str(gcms.file_location)) - 573 data_metadata["has_input"].append(id_label + corems_md5(gcms.file_location)) - 574 - 575 data_metadata["output_data_name"] = str(output_path.stem) - 576 data_metadata["output_data_url"] = str(output_path) - 577 data_metadata["has_output"] = id_label + corems_md5(output_path) - 578 - 579 return data_metadata - 580 - 581 def get_parameters_json(self, gcms, id_label, output_path): - 582 """Get the parameters as a JSON string. - 583 - 584 Parameters: - 585 ---------- - 586 gcms : GCMS object - 587 The low resolution GCMS object. - 588 id_label : str - 589 The ID label for the data. - 590 output_path : str - 591 The output file path. - 592 - 593 Returns: - 594 ------- - 595 str - 596 The parameters as a JSON string. - 597 """ - 598 - 599 output_parameters_dict = {} - 600 output_parameters_dict["Data"] = self.get_data_metadata( - 601 gcms, id_label, output_path - 602 ) - 603 output_parameters_dict["Stats"] = self.get_data_stats(gcms) - 604 output_parameters_dict["Calibration"] = self.get_calibration_stats( - 605 gcms, id_label - 606 ) - 607 output_parameters_dict["Blank"] = self.get_blank_stats(gcms) - 608 output_parameters_dict["Instrument"] = self.get_instrument_metadata(gcms) - 609 corems_dict_setting = parameter_to_dict.get_dict_data_gcms(gcms) - 610 corems_dict_setting["corems_version"] = __version__ - 611 output_parameters_dict["CoreMSParameters"] = corems_dict_setting - 612 output_parameters_dict["has_metabolite"] = gcms.metabolites_data - 613 output = json.dumps( - 614 output_parameters_dict, sort_keys=False, indent=4, separators=(",", ": ") - 615 ) - 616 - 617 return output - 618 - 619 def write_settings(self, output_path, gcms, id_label="emsl:"): - 620 """Write the settings to a JSON file. - 621 - 622 Parameters: - 623 ---------- - 624 output_path : str - 625 The output file path. - 626 gcms : GCMS object - 627 The low resolution GCMS object. - 628 id_label : str - 629 The ID label for the data. Default is "emsl:". - 630 - 631 """ - 632 - 633 output = self.get_parameters_json(gcms, id_label, output_path) - 634 - 635 with open( - 636 output_path.with_suffix(".json"), - 637 "w", - 638 encoding="utf8", - 639 ) as outfile: - 640 outfile.write(output) - 641 - 642 def get_list_dict_data(self, gcms, include_no_match=True, no_match_inline=False): - 643 """Get the exported data as a list of dictionaries. - 644 - 645 Parameters: - 646 ---------- - 647 gcms : object - 648 The low resolution GCMS object. - 649 include_no_match : bool, optional - 650 Whether to include no match data. Default is True. - 651 no_match_inline : bool, optional - 652 Whether to include no match data inline. Default is False. - 653 - 654 Returns: - 655 ------- - 656 list - 657 The exported data as a list of dictionaries. - 658 """ - 659 - 660 output_score_method = gcms.molecular_search_settings.output_score_method - 661 - 662 dict_data_list = [] - 663 - 664 def add_match_dict_data(): - 665 derivatization = "{}:{}:{}".format( - 666 compound_obj.classify, - 667 compound_obj.derivativenum, - 668 compound_obj.derivatization, - 669 ) - 670 out_dict = { - 671 "Sample name": gcms.sample_name, - 672 "Peak Index": gcpeak_index, - 673 "Retention Time": gc_peak.retention_time, - 674 "Retention Time Ref": compound_obj.retention_time, - 675 "Peak Height": gc_peak.tic, - 676 "Peak Area": gc_peak.area, - 677 "Retention index": gc_peak.ri, - 678 "Retention index Ref": compound_obj.ri, - 679 "Retention Index Score": compound_obj.ri_score, - 680 "Spectral Similarity Score": compound_obj.spectral_similarity_score, - 681 "Similarity Score": compound_obj.similarity_score, - 682 "Compound Name": compound_obj.name, - 683 "Chebi ID": compound_obj.metadata.chebi, - 684 "Kegg Compound ID": compound_obj.metadata.kegg, - 685 "Inchi": compound_obj.metadata.inchi, - 686 "Inchi Key": compound_obj.metadata.inchikey, - 687 "Smiles": compound_obj.metadata.smiles, - 688 "Molecular Formula": compound_obj.formula, - 689 "IUPAC Name": compound_obj.metadata.iupac_name, - 690 "Traditional Name": compound_obj.metadata.traditional_name, - 691 "Common Name": compound_obj.metadata.common_name, - 692 "Derivatization": derivatization, - 693 } - 694 - 695 if self.gcms.molecular_search_settings.exploratory_mode: - 696 out_dict.update( - 697 { - 698 "Weighted Cosine Correlation": compound_obj.spectral_similarity_scores.get( - 699 "weighted_cosine_correlation" - 700 ), - 701 "Cosine Correlation": compound_obj.spectral_similarity_scores.get( - 702 "cosine_correlation" - 703 ), - 704 "Stein Scott Similarity": compound_obj.spectral_similarity_scores.get( - 705 "stein_scott_similarity" - 706 ), - 707 "Pearson Correlation": compound_obj.spectral_similarity_scores.get( - 708 "pearson_correlation" - 709 ), - 710 "Spearman Correlation": compound_obj.spectral_similarity_scores.get( - 711 "spearman_correlation" - 712 ), - 713 "Kendall Tau Correlation": compound_obj.spectral_similarity_scores.get( - 714 "kendall_tau_correlation" - 715 ), - 716 "DFT Correlation": compound_obj.spectral_similarity_scores.get( - 717 "dft_correlation" - 718 ), - 719 "DWT Correlation": compound_obj.spectral_similarity_scores.get( - 720 "dwt_correlation" - 721 ), - 722 "Euclidean Distance": compound_obj.spectral_similarity_scores.get( - 723 "euclidean_distance" - 724 ), - 725 "Manhattan Distance": compound_obj.spectral_similarity_scores.get( - 726 "manhattan_distance" - 727 ), - 728 "Jaccard Distance": compound_obj.spectral_similarity_scores.get( - 729 "jaccard_distance" - 730 ), - 731 } - 732 ) - 733 for method in methods_name: - 734 out_dict[methods_name.get(method)] = ( - 735 compound_obj.spectral_similarity_scores.get(method) - 736 ) - 737 - 738 dict_data_list.append(out_dict) - 739 - 740 def add_no_match_dict_data(): - 741 dict_data_list.append( - 742 { - 743 "Sample name": gcms.sample_name, - 744 "Peak Index": gcpeak_index, - 745 "Retention Time": gc_peak.retention_time, - 746 "Peak Height": gc_peak.tic, - 747 "Peak Area": gc_peak.area, - 748 "Retention index": gc_peak.ri, - 749 } - 750 ) - 751 - 752 for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks): - 753 # check if there is a compound candidate - 754 if gc_peak: - 755 if output_score_method == "highest_sim_score": - 756 compound_obj = gc_peak.highest_score_compound - 757 add_match_dict_data() - 758 - 759 elif output_score_method == "highest_ss": - 760 compound_obj = gc_peak.highest_ss_compound - 761 add_match_dict_data() - 762 - 763 else: - 764 for compound_obj in gc_peak: - 765 add_match_dict_data() # add monoisotopic peak - 766 - 767 else: - 768 # include not_match - 769 if include_no_match and no_match_inline: - 770 add_no_match_dict_data() - 771 - 772 if include_no_match and not no_match_inline: - 773 for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks): - 774 if not gc_peak: - 775 add_no_match_dict_data() - 776 - 777 return dict_data_list + 54class LowResGCMSExport: + 55 """A class to export low resolution GC-MS data. + 56 + 57 This class provides methods to export low resolution GC-MS data to various formats such as Excel, CSV, HDF5, and Pandas DataFrame. + 58 + 59 Parameters: + 60 ---------- + 61 out_file_path : str + 62 The output file path. + 63 gcms : object + 64 The low resolution GCMS object. + 65 + 66 Attributes: + 67 ---------- + 68 output_file : Path + 69 The output file path as a Path object. + 70 gcms : object + 71 The low resolution GCMS object. + 72 + 73 Methods: + 74 ------- + 75 * get_pandas_df(id_label="corems:"). Get the exported data as a Pandas DataFrame. + 76 * get_json(nan=False, id_label="corems:"). Get the exported data as a JSON string. + 77 * to_pandas(write_metadata=True, id_label="corems:"). Export the data to a Pandas DataFrame and save it as a pickle file. + 78 * to_excel(write_mode='a', write_metadata=True, id_label="corems:"), + 79 Export the data to an Excel file. + 80 * to_csv(separate_output=False, write_mode="w", write_metadata=True, id_label="corems:"). + 81 Export the data to a CSV file. + 82 * to_hdf(id_label="corems:"). + 83 Export the data to an HDF5 file. + 84 * get_data_stats(gcms). + 85 Get statistics about the GCMS data. + 86 + 87 """ + 88 + 89 def __init__(self, out_file_path, gcms): + 90 self.output_file = Path(out_file_path) + 91 + 92 self.gcms = gcms + 93 + 94 self._init_columns() + 95 + 96 def _init_columns(self): + 97 """Initialize the column names for the exported data. + 98 + 99 Returns: + 100 ------- + 101 list + 102 The list of column names. + 103 """ + 104 + 105 columns = [ + 106 "Sample name", + 107 "Peak Index", + 108 "Retention Time", + 109 "Retention Time Ref", + 110 "Peak Height", + 111 "Peak Area", + 112 "Retention index", + 113 "Retention index Ref", + 114 "Retention Index Score", + 115 "Similarity Score", + 116 "Spectral Similarity Score", + 117 "Compound Name", + 118 "Chebi ID", + 119 "Kegg Compound ID", + 120 "Inchi", + 121 "Inchi Key", + 122 "Smiles", + 123 "Molecular Formula", + 124 "IUPAC Name", + 125 "Traditional Name", + 126 "Common Name", + 127 "Derivatization", + 128 ] + 129 + 130 if self.gcms.molecular_search_settings.exploratory_mode: + 131 columns.extend( + 132 [ + 133 "Weighted Cosine Correlation", + 134 "Cosine Correlation", + 135 "Stein Scott Similarity", + 136 "Pearson Correlation", + 137 "Spearman Correlation", + 138 "Kendall Tau Correlation", + 139 "Euclidean Distance", + 140 "Manhattan Distance", + 141 "Jaccard Distance", + 142 "DWT Correlation", + 143 "DFT Correlation", + 144 ] + 145 ) + 146 + 147 columns.extend(list(methods_name.values())) + 148 + 149 return columns + 150 + 151 def get_pandas_df(self, id_label="corems:"): + 152 """Get the exported data as a Pandas DataFrame. + 153 + 154 Parameters: + 155 ---------- + 156 id_label : str, optional + 157 The ID label for the data. Default is "corems:". + 158 + 159 Returns: + 160 ------- + 161 DataFrame + 162 The exported data as a Pandas DataFrame. + 163 """ + 164 + 165 columns = self._init_columns() + 166 + 167 dict_data_list = self.get_list_dict_data(self.gcms) + 168 + 169 df = DataFrame(dict_data_list, columns=columns) + 170 + 171 df.name = self.gcms.sample_name + 172 + 173 return df + 174 + 175 def get_json(self, nan=False, id_label="corems:"): + 176 """Get the exported data as a JSON string. + 177 + 178 Parameters: + 179 ---------- + 180 nan : bool, optional + 181 Whether to include NaN values in the JSON string. Default is False. + 182 id_label : str, optional + 183 The ID label for the data. Default is "corems:". + 184 + 185 """ + 186 + 187 import json + 188 + 189 dict_data_list = self.get_list_dict_data(self.gcms) + 190 + 191 return json.dumps( + 192 dict_data_list, sort_keys=False, indent=4, separators=(",", ": ") + 193 ) + 194 + 195 def to_pandas(self, write_metadata=True, id_label="corems:"): + 196 """Export the data to a Pandas DataFrame and save it as a pickle file. + 197 + 198 Parameters: + 199 ---------- + 200 write_metadata : bool, optional + 201 Whether to write metadata to the output file. + 202 id_label : str, optional + 203 The ID label for the data. + 204 """ + 205 + 206 columns = self._init_columns() + 207 + 208 dict_data_list = self.get_list_dict_data(self.gcms) + 209 + 210 df = DataFrame(dict_data_list, columns=columns) + 211 + 212 df.to_pickle(self.output_file.with_suffix(".pkl")) + 213 + 214 if write_metadata: + 215 self.write_settings( + 216 self.output_file.with_suffix(".pkl"), self.gcms, id_label="corems:" + 217 ) + 218 + 219 def to_excel(self, write_mode="a", write_metadata=True, id_label="corems:"): + 220 """Export the data to an Excel file. + 221 + 222 Parameters: + 223 ---------- + 224 write_mode : str, optional + 225 The write mode for the Excel file. Default is 'a' (append). + 226 write_metadata : bool, optional + 227 Whether to write metadata to the output file. Default is True. + 228 id_label : str, optional + 229 The ID label for the data. Default is "corems:". + 230 """ + 231 + 232 out_put_path = self.output_file.with_suffix(".xlsx") + 233 + 234 columns = self._init_columns() + 235 + 236 dict_data_list = self.get_list_dict_data(self.gcms) + 237 + 238 df = DataFrame(dict_data_list, columns=columns) + 239 + 240 if write_mode == "a" and out_put_path.exists(): + 241 writer = ExcelWriter(out_put_path, engine="openpyxl") + 242 # try to open an existing workbook + 243 writer.book = load_workbook(out_put_path) + 244 # copy existing sheets + 245 writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets) + 246 # read existing file + 247 reader = read_excel(out_put_path) + 248 # write out the new sheet + 249 df.to_excel(writer, index=False, header=False, startrow=len(reader) + 1) + 250 + 251 writer.close() + 252 else: + 253 df.to_excel( + 254 self.output_file.with_suffix(".xlsx"), index=False, engine="openpyxl" + 255 ) + 256 + 257 if write_metadata: + 258 self.write_settings(out_put_path, self.gcms, id_label=id_label) + 259 + 260 def to_csv( + 261 self, + 262 separate_output=False, + 263 write_mode="w", + 264 write_metadata=True, + 265 id_label="corems:", + 266 ): + 267 """Export the data to a CSV file. + 268 + 269 Parameters: + 270 ---------- + 271 separate_output : bool, optional + 272 Whether to separate the output into multiple files. Default is False. + 273 write_mode : str, optional + 274 The write mode for the CSV file. Default is 'w' (write). + 275 write_metadata : bool, optional + 276 Whether to write metadata to the output file. Default is True. + 277 id_label : str, optional + 278 The ID label for the data. Default is "corems:". + 279 """ + 280 + 281 if separate_output: + 282 # set write mode to write + 283 # this mode will overwrite the file without warning + 284 write_mode = "w" + 285 else: + 286 # set write mode to append + 287 write_mode = "a" + 288 + 289 columns = self._init_columns() + 290 + 291 dict_data_list = self.get_list_dict_data(self.gcms) + 292 + 293 out_put_path = self.output_file.with_suffix(".csv") + 294 + 295 write_header = not out_put_path.exists() + 296 + 297 try: + 298 with open(out_put_path, write_mode, newline="") as csvfile: + 299 writer = csv.DictWriter(csvfile, fieldnames=columns) + 300 if write_header: + 301 writer.writeheader() + 302 for data in dict_data_list: + 303 writer.writerow(data) + 304 + 305 if write_metadata: + 306 self.write_settings(out_put_path, self.gcms, id_label=id_label) + 307 + 308 except IOError as ioerror: + 309 print(ioerror) + 310 + 311 def to_hdf(self, id_label="corems:"): + 312 """Export the data to an HDF5 file. + 313 + 314 Parameters: + 315 ---------- + 316 id_label : str, optional + 317 The ID label for the data. Default is "corems:". + 318 """ + 319 + 320 # save sample at a time + 321 def add_compound(gc_peak, compound_obj): + 322 modifier = compound_obj.classify if compound_obj.classify else "" + 323 compound_group = compound_obj.name.replace("/", "") + " " + modifier + 324 + 325 if compound_group not in peak_group: + 326 compound_group = peak_group.create_group(compound_group) + 327 + 328 # compound_group.attrs["retention_time"] = compound_obj.retention_time + 329 compound_group.attrs["retention_index"] = compound_obj.ri + 330 compound_group.attrs["retention_index_score"] = compound_obj.ri_score + 331 compound_group.attrs["spectral_similarity_score"] = ( + 332 compound_obj.spectral_similarity_score + 333 ) + 334 compound_group.attrs["similarity_score"] = compound_obj.similarity_score + 335 + 336 compond_mz = compound_group.create_dataset( + 337 "mz", data=np.array(compound_obj.mz), dtype="f8" + 338 ) + 339 compond_abundance = compound_group.create_dataset( + 340 "abundance", data=np.array(compound_obj.abundance), dtype="f8" + 341 ) + 342 + 343 if self.gcms.molecular_search_settings.exploratory_mode: + 344 compound_group.attrs["Spectral Similarities"] = json.dumps( + 345 compound_obj.spectral_similarity_scores, + 346 sort_keys=False, + 347 indent=4, + 348 separators=(",", ":"), + 349 ) + 350 else: + 351 warnings.warn("Skipping duplicate reference compound.") + 352 + 353 import json + 354 from datetime import datetime, timezone + 355 + 356 import h5py + 357 import numpy as np + 358 + 359 output_path = self.output_file.with_suffix(".hdf5") + 360 + 361 with h5py.File(output_path, "w") as hdf_handle: + 362 timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")) + 363 hdf_handle.attrs["time_stamp"] = timenow + 364 hdf_handle.attrs["data_structure"] = "gcms" + 365 hdf_handle.attrs["analyzer"] = self.gcms.analyzer + 366 hdf_handle.attrs["instrument_label"] = self.gcms.instrument_label + 367 + 368 hdf_handle.attrs["sample_id"] = "self.gcms.id" + 369 hdf_handle.attrs["sample_name"] = self.gcms.sample_name + 370 hdf_handle.attrs["input_data"] = str(self.gcms.file_location) + 371 hdf_handle.attrs["output_data"] = str(output_path) + 372 hdf_handle.attrs["output_data_id"] = id_label + uuid.uuid4().hex + 373 hdf_handle.attrs["corems_version"] = __version__ + 374 + 375 hdf_handle.attrs["Stats"] = json.dumps( + 376 self.get_data_stats(self.gcms), + 377 sort_keys=False, + 378 indent=4, + 379 separators=(",", ": "), + 380 ) + 381 hdf_handle.attrs["Calibration"] = json.dumps( + 382 self.get_calibration_stats(self.gcms, id_label), + 383 sort_keys=False, + 384 indent=4, + 385 separators=(",", ": "), + 386 ) + 387 hdf_handle.attrs["Blank"] = json.dumps( + 388 self.get_blank_stats(self.gcms), + 389 sort_keys=False, + 390 indent=4, + 391 separators=(",", ": "), + 392 ) + 393 + 394 corems_dict_setting = parameter_to_dict.get_dict_data_gcms(self.gcms) + 395 hdf_handle.attrs["CoreMSParameters"] = json.dumps( + 396 corems_dict_setting, sort_keys=False, indent=4, separators=(",", ": ") + 397 ) + 398 + 399 scans_dataset = hdf_handle.create_dataset( + 400 "scans", data=np.array(self.gcms.scans_number), dtype="f8" + 401 ) + 402 rt_dataset = hdf_handle.create_dataset( + 403 "rt", data=np.array(self.gcms.retention_time), dtype="f8" + 404 ) + 405 tic_dataset = hdf_handle.create_dataset( + 406 "tic", data=np.array(self.gcms.tic), dtype="f8" + 407 ) + 408 processed_tic_dataset = hdf_handle.create_dataset( + 409 "processed_tic", data=np.array(self.gcms.processed_tic), dtype="f8" + 410 ) + 411 + 412 output_score_method = ( + 413 self.gcms.molecular_search_settings.output_score_method + 414 ) + 415 + 416 for gc_peak in self.gcms: + 417 # print(gc_peak.retention_time) + 418 # print(gc_peak.tic) + 419 + 420 # check if there is a compound candidate + 421 peak_group = hdf_handle.create_group(str(gc_peak.retention_time)) + 422 peak_group.attrs["deconvolution"] = int( + 423 self.gcms.chromatogram_settings.use_deconvolution + 424 ) + 425 + 426 peak_group.attrs["start_scan"] = gc_peak.start_scan + 427 peak_group.attrs["apex_scan"] = gc_peak.apex_scan + 428 peak_group.attrs["final_scan"] = gc_peak.final_scan + 429 + 430 peak_group.attrs["retention_index"] = gc_peak.ri + 431 peak_group.attrs["retention_time"] = gc_peak.retention_time + 432 peak_group.attrs["area"] = gc_peak.area + 433 + 434 mz = peak_group.create_dataset( + 435 "mz", data=np.array(gc_peak.mass_spectrum.mz_exp), dtype="f8" + 436 ) + 437 abundance = peak_group.create_dataset( + 438 "abundance", + 439 data=np.array(gc_peak.mass_spectrum.abundance), + 440 dtype="f8", + 441 ) + 442 + 443 if gc_peak: + 444 if output_score_method == "highest_sim_score": + 445 compound_obj = gc_peak.highest_score_compound + 446 add_compound(gc_peak, compound_obj) + 447 + 448 elif output_score_method == "highest_ss": + 449 compound_obj = gc_peak.highest_ss_compound + 450 add_compound(gc_peak, compound_obj) + 451 + 452 else: + 453 for compound_obj in gc_peak: + 454 add_compound(gc_peak, compound_obj) + 455 + 456 def get_data_stats(self, gcms): + 457 """Get statistics about the GCMS data. + 458 + 459 Parameters: + 460 ---------- + 461 gcms : object + 462 The low resolution GCMS object. + 463 + 464 Returns: + 465 ------- + 466 dict + 467 A dictionary containing the data statistics. + 468 """ + 469 + 470 matched_peaks = gcms.matched_peaks + 471 no_matched_peaks = gcms.no_matched_peaks + 472 unique_metabolites = gcms.unique_metabolites + 473 + 474 peak_matchs_above_0p85 = 0 + 475 unique_peak_match_above_0p85 = 0 + 476 for match_peak in matched_peaks: + 477 gc_peak_above_85 = 0 + 478 matches_above_85 = list( + 479 filter(lambda m: m.similarity_score >= 0.85, match_peak) + 480 ) + 481 if matches_above_85: + 482 peak_matchs_above_0p85 += 1 + 483 if len(matches_above_85) == 1: + 484 unique_peak_match_above_0p85 += 1 + 485 + 486 data_stats = {} + 487 data_stats["average_signal_noise"] = "ni" + 488 data_stats["chromatogram_dynamic_range"] = gcms.dynamic_range + 489 data_stats["total_number_peaks"] = len(gcms) + 490 data_stats["total_peaks_matched"] = len(matched_peaks) + 491 data_stats["total_peaks_without_matches"] = len(no_matched_peaks) + 492 data_stats["total_matches_above_similarity_score_0.85"] = peak_matchs_above_0p85 + 493 data_stats["single_matches_above_similarity_score_0.85"] = ( + 494 unique_peak_match_above_0p85 + 495 ) + 496 data_stats["unique_metabolites"] = len(unique_metabolites) + 497 + 498 return data_stats + 499 + 500 def get_calibration_stats(self, gcms, id_label): + 501 """Get statistics about the GC-MS calibration. + 502 + 503 Parameters: + 504 ---------- + 505 """ + 506 calibration_parameters = {} + 507 + 508 calibration_parameters["calibration_rt_ri_pairs_ref"] = gcms.ri_pairs_ref + 509 calibration_parameters["data_url"] = str(gcms.cal_file_path) + 510 calibration_parameters["has_input"] = id_label + corems_md5(gcms.cal_file_path) + 511 calibration_parameters["data_name"] = str(gcms.cal_file_path.stem) + 512 calibration_parameters["calibration_method"] = "" + 513 + 514 return calibration_parameters + 515 + 516 def get_blank_stats(self, gcms): + 517 """Get statistics about the GC-MS blank.""" + 518 blank_parameters = {} + 519 + 520 blank_parameters["data_name"] = "ni" + 521 blank_parameters["blank_id"] = "ni" + 522 blank_parameters["data_url"] = "ni" + 523 blank_parameters["has_input"] = "ni" + 524 blank_parameters["common_features_to_blank"] = "ni" + 525 + 526 return blank_parameters + 527 + 528 def get_instrument_metadata(self, gcms): + 529 """Get metadata about the GC-MS instrument.""" + 530 instrument_metadata = {} + 531 + 532 instrument_metadata["analyzer"] = gcms.analyzer + 533 instrument_metadata["instrument_label"] = gcms.instrument_label + 534 instrument_metadata["instrument_id"] = uuid.uuid4().hex + 535 + 536 return instrument_metadata + 537 + 538 def get_data_metadata(self, gcms, id_label, output_path): + 539 """Get metadata about the GC-MS data. + 540 + 541 Parameters: + 542 ---------- + 543 gcms : object + 544 The low resolution GCMS object. + 545 id_label : str + 546 The ID label for the data. + 547 output_path : str + 548 The output file path. + 549 + 550 Returns: + 551 ------- + 552 dict + 553 A dictionary containing the data metadata. + 554 """ + 555 if isinstance(output_path, str): + 556 output_path = Path(output_path) + 557 + 558 paramaters_path = output_path.with_suffix(".json") + 559 + 560 if paramaters_path.exists(): + 561 with paramaters_path.open() as current_param: + 562 metadata = json.load(current_param) + 563 data_metadata = metadata.get("Data") + 564 else: + 565 data_metadata = {} + 566 data_metadata["data_name"] = [] + 567 data_metadata["input_data_url"] = [] + 568 data_metadata["has_input"] = [] + 569 + 570 data_metadata["data_name"].append(gcms.sample_name) + 571 data_metadata["input_data_url"].append(str(gcms.file_location)) + 572 data_metadata["has_input"].append(id_label + corems_md5(gcms.file_location)) + 573 + 574 data_metadata["output_data_name"] = str(output_path.stem) + 575 data_metadata["output_data_url"] = str(output_path) + 576 data_metadata["has_output"] = id_label + corems_md5(output_path) + 577 + 578 return data_metadata + 579 + 580 def get_parameters_json(self, gcms, id_label, output_path): + 581 """Get the parameters as a JSON string. + 582 + 583 Parameters: + 584 ---------- + 585 gcms : GCMS object + 586 The low resolution GCMS object. + 587 id_label : str + 588 The ID label for the data. + 589 output_path : str + 590 The output file path. + 591 + 592 Returns: + 593 ------- + 594 str + 595 The parameters as a JSON string. + 596 """ + 597 + 598 output_parameters_dict = {} + 599 output_parameters_dict["Data"] = self.get_data_metadata( + 600 gcms, id_label, output_path + 601 ) + 602 output_parameters_dict["Stats"] = self.get_data_stats(gcms) + 603 output_parameters_dict["Calibration"] = self.get_calibration_stats( + 604 gcms, id_label + 605 ) + 606 output_parameters_dict["Blank"] = self.get_blank_stats(gcms) + 607 output_parameters_dict["Instrument"] = self.get_instrument_metadata(gcms) + 608 corems_dict_setting = parameter_to_dict.get_dict_data_gcms(gcms) + 609 corems_dict_setting["corems_version"] = __version__ + 610 output_parameters_dict["CoreMSParameters"] = corems_dict_setting + 611 output_parameters_dict["has_metabolite"] = gcms.metabolites_data + 612 output = json.dumps( + 613 output_parameters_dict, sort_keys=False, indent=4, separators=(",", ": ") + 614 ) + 615 + 616 return output + 617 + 618 def write_settings(self, output_path, gcms, id_label="emsl:"): + 619 """Write the settings to a JSON file. + 620 + 621 Parameters: + 622 ---------- + 623 output_path : str + 624 The output file path. + 625 gcms : GCMS object + 626 The low resolution GCMS object. + 627 id_label : str + 628 The ID label for the data. Default is "emsl:". + 629 + 630 """ + 631 + 632 output = self.get_parameters_json(gcms, id_label, output_path) + 633 + 634 with open( + 635 output_path.with_suffix(".json"), + 636 "w", + 637 encoding="utf8", + 638 ) as outfile: + 639 outfile.write(output) + 640 + 641 def get_list_dict_data(self, gcms, include_no_match=True, no_match_inline=False): + 642 """Get the exported data as a list of dictionaries. + 643 + 644 Parameters: + 645 ---------- + 646 gcms : object + 647 The low resolution GCMS object. + 648 include_no_match : bool, optional + 649 Whether to include no match data. Default is True. + 650 no_match_inline : bool, optional + 651 Whether to include no match data inline. Default is False. + 652 + 653 Returns: + 654 ------- + 655 list + 656 The exported data as a list of dictionaries. + 657 """ + 658 + 659 output_score_method = gcms.molecular_search_settings.output_score_method + 660 + 661 dict_data_list = [] + 662 + 663 def add_match_dict_data(): + 664 derivatization = "{}:{}:{}".format( + 665 compound_obj.classify, + 666 compound_obj.derivativenum, + 667 compound_obj.derivatization, + 668 ) + 669 out_dict = { + 670 "Sample name": gcms.sample_name, + 671 "Peak Index": gcpeak_index, + 672 "Retention Time": gc_peak.retention_time, + 673 "Retention Time Ref": compound_obj.retention_time, + 674 "Peak Height": gc_peak.tic, + 675 "Peak Area": gc_peak.area, + 676 "Retention index": gc_peak.ri, + 677 "Retention index Ref": compound_obj.ri, + 678 "Retention Index Score": compound_obj.ri_score, + 679 "Spectral Similarity Score": compound_obj.spectral_similarity_score, + 680 "Similarity Score": compound_obj.similarity_score, + 681 "Compound Name": compound_obj.name, + 682 "Chebi ID": compound_obj.metadata.chebi, + 683 "Kegg Compound ID": compound_obj.metadata.kegg, + 684 "Inchi": compound_obj.metadata.inchi, + 685 "Inchi Key": compound_obj.metadata.inchikey, + 686 "Smiles": compound_obj.metadata.smiles, + 687 "Molecular Formula": compound_obj.formula, + 688 "IUPAC Name": compound_obj.metadata.iupac_name, + 689 "Traditional Name": compound_obj.metadata.traditional_name, + 690 "Common Name": compound_obj.metadata.common_name, + 691 "Derivatization": derivatization, + 692 } + 693 + 694 if self.gcms.molecular_search_settings.exploratory_mode: + 695 out_dict.update( + 696 { + 697 "Weighted Cosine Correlation": compound_obj.spectral_similarity_scores.get( + 698 "weighted_cosine_correlation" + 699 ), + 700 "Cosine Correlation": compound_obj.spectral_similarity_scores.get( + 701 "cosine_correlation" + 702 ), + 703 "Stein Scott Similarity": compound_obj.spectral_similarity_scores.get( + 704 "stein_scott_similarity" + 705 ), + 706 "Pearson Correlation": compound_obj.spectral_similarity_scores.get( + 707 "pearson_correlation" + 708 ), + 709 "Spearman Correlation": compound_obj.spectral_similarity_scores.get( + 710 "spearman_correlation" + 711 ), + 712 "Kendall Tau Correlation": compound_obj.spectral_similarity_scores.get( + 713 "kendall_tau_correlation" + 714 ), + 715 "DFT Correlation": compound_obj.spectral_similarity_scores.get( + 716 "dft_correlation" + 717 ), + 718 "DWT Correlation": compound_obj.spectral_similarity_scores.get( + 719 "dwt_correlation" + 720 ), + 721 "Euclidean Distance": compound_obj.spectral_similarity_scores.get( + 722 "euclidean_distance" + 723 ), + 724 "Manhattan Distance": compound_obj.spectral_similarity_scores.get( + 725 "manhattan_distance" + 726 ), + 727 "Jaccard Distance": compound_obj.spectral_similarity_scores.get( + 728 "jaccard_distance" + 729 ), + 730 } + 731 ) + 732 for method in methods_name: + 733 out_dict[methods_name.get(method)] = ( + 734 compound_obj.spectral_similarity_scores.get(method) + 735 ) + 736 + 737 dict_data_list.append(out_dict) + 738 + 739 def add_no_match_dict_data(): + 740 dict_data_list.append( + 741 { + 742 "Sample name": gcms.sample_name, + 743 "Peak Index": gcpeak_index, + 744 "Retention Time": gc_peak.retention_time, + 745 "Peak Height": gc_peak.tic, + 746 "Peak Area": gc_peak.area, + 747 "Retention index": gc_peak.ri, + 748 } + 749 ) + 750 + 751 for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks): + 752 # check if there is a compound candidate + 753 if gc_peak: + 754 if output_score_method == "highest_sim_score": + 755 compound_obj = gc_peak.highest_score_compound + 756 add_match_dict_data() + 757 + 758 elif output_score_method == "highest_ss": + 759 compound_obj = gc_peak.highest_ss_compound + 760 add_match_dict_data() + 761 + 762 else: + 763 for compound_obj in gc_peak: + 764 add_match_dict_data() # add monoisotopic peak + 765 + 766 else: + 767 # include not_match + 768 if include_no_match and no_match_inline: + 769 add_no_match_dict_data() + 770 + 771 if include_no_match and not no_match_inline: + 772 for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks): + 773 if not gc_peak: + 774 add_no_match_dict_data() + 775 + 776 return dict_data_list + 777 778 - 779 - 780class HighResMassSpectraExport(HighResMassSpecExport): - 781 """A class to export high resolution mass spectra data. - 782 - 783 This class provides methods to export high resolution mass spectra data to various formats - 784 such as Excel, CSV, HDF5, and Pandas DataFrame. - 785 - 786 Parameters - 787 ---------- - 788 out_file_path : str | Path - 789 The output file path. - 790 mass_spectra : object - 791 The high resolution mass spectra object. - 792 output_type : str, optional - 793 The output type. Default is 'excel'. - 794 - 795 Attributes - 796 ---------- - 797 output_file : Path - 798 The output file path without suffix - 799 dir_loc : Path - 800 The directory location for the output file, - 801 by default this will be the output_file + ".corems" and all output files will be - 802 written into this location - 803 mass_spectra : MassSpectraBase - 804 The high resolution mass spectra object. - 805 """ - 806 - 807 def __init__(self, out_file_path, mass_spectra, output_type="excel"): - 808 super().__init__( - 809 out_file_path=out_file_path, mass_spectrum=None, output_type=output_type - 810 ) - 811 - 812 self.dir_loc = Path(out_file_path + ".corems") - 813 self.dir_loc.mkdir(exist_ok=True) - 814 # Place the output file in the directory - 815 self.output_file = self.dir_loc / Path(out_file_path).name - 816 self._output_type = output_type # 'excel', 'csv', 'pandas' or 'hdf5' - 817 self.mass_spectra = mass_spectra - 818 self.atoms_order_list = None - 819 self._init_columns() - 820 - 821 def get_pandas_df(self): - 822 """Get the mass spectra as a list of Pandas DataFrames.""" - 823 - 824 list_df = [] - 825 - 826 for mass_spectrum in self.mass_spectra: - 827 columns = self.columns_label + self.get_all_used_atoms_in_order( - 828 mass_spectrum - 829 ) - 830 - 831 dict_data_list = self.get_list_dict_data(mass_spectrum) - 832 - 833 df = DataFrame(dict_data_list, columns=columns) - 834 - 835 scan_number = mass_spectrum.scan_number - 836 - 837 df.name = str(self.output_file) + "_" + str(scan_number) - 838 - 839 list_df.append(df) - 840 - 841 return list_df - 842 - 843 def to_pandas(self, write_metadata=True): - 844 """Export the data to a Pandas DataFrame and save it as a pickle file. - 845 - 846 Parameters: - 847 ---------- - 848 write_metadata : bool, optional - 849 Whether to write metadata to the output file. Default is True. - 850 """ - 851 - 852 for mass_spectrum in self.mass_spectra: - 853 columns = self.columns_label + self.get_all_used_atoms_in_order( - 854 mass_spectrum - 855 ) - 856 - 857 dict_data_list = self.get_list_dict_data(mass_spectrum) - 858 - 859 df = DataFrame(dict_data_list, columns=columns) - 860 - 861 scan_number = mass_spectrum.scan_number - 862 - 863 out_filename = Path( - 864 "%s_scan%s%s" % (self.output_file, str(scan_number), ".pkl") - 865 ) - 866 - 867 df.to_pickle(self.dir_loc / out_filename) - 868 - 869 if write_metadata: - 870 self.write_settings( - 871 self.dir_loc / out_filename.with_suffix(""), mass_spectrum - 872 ) - 873 - 874 def to_excel(self, write_metadata=True): - 875 """Export the data to an Excel file. - 876 - 877 Parameters: - 878 ---------- - 879 write_metadata : bool, optional - 880 Whether to write metadata to the output file. Default is True. - 881 """ - 882 for mass_spectrum in self.mass_spectra: - 883 columns = self.columns_label + self.get_all_used_atoms_in_order( - 884 mass_spectrum - 885 ) - 886 - 887 dict_data_list = self.get_list_dict_data(mass_spectrum) - 888 - 889 df = DataFrame(dict_data_list, columns=columns) - 890 - 891 scan_number = mass_spectrum.scan_number - 892 - 893 out_filename = Path( - 894 "%s_scan%s%s" % (self.output_file, str(scan_number), ".xlsx") - 895 ) - 896 - 897 df.to_excel(self.dir_loc / out_filename) - 898 - 899 if write_metadata: - 900 self.write_settings( - 901 self.dir_loc / out_filename.with_suffix(""), mass_spectrum - 902 ) - 903 - 904 def to_csv(self, write_metadata=True): - 905 """Export the data to a CSV file. - 906 - 907 Parameters: - 908 ---------- - 909 write_metadata : bool, optional - 910 Whether to write metadata to the output file. Default is True. - 911 """ - 912 import csv - 913 - 914 for mass_spectrum in self.mass_spectra: - 915 columns = self.columns_label + self.get_all_used_atoms_in_order( - 916 mass_spectrum - 917 ) - 918 - 919 scan_number = mass_spectrum.scan_number - 920 - 921 dict_data_list = self.get_list_dict_data(mass_spectrum) - 922 - 923 out_filename = Path( - 924 "%s_scan%s%s" % (self.output_file, str(scan_number), ".csv") - 925 ) - 926 - 927 with open(self.dir_loc / out_filename, "w", newline="") as csvfile: - 928 writer = csv.DictWriter(csvfile, fieldnames=columns) - 929 writer.writeheader() - 930 for data in dict_data_list: - 931 writer.writerow(data) - 932 - 933 if write_metadata: - 934 self.write_settings( - 935 self.dir_loc / out_filename.with_suffix(""), mass_spectrum - 936 ) - 937 - 938 def get_mass_spectra_attrs(self): - 939 """Get the mass spectra attributes as a JSON string. - 940 - 941 Parameters: - 942 ---------- - 943 mass_spectra : object - 944 The high resolution mass spectra object. - 945 - 946 Returns: - 947 ------- - 948 str - 949 The mass spectra attributes as a JSON string. - 950 """ - 951 dict_ms_attrs = {} - 952 dict_ms_attrs["analyzer"] = self.mass_spectra.analyzer - 953 dict_ms_attrs["instrument_label"] = self.mass_spectra.instrument_label - 954 dict_ms_attrs["sample_name"] = self.mass_spectra.sample_name - 955 - 956 return json.dumps( - 957 dict_ms_attrs, sort_keys=False, indent=4, separators=(",", ": ") - 958 ) - 959 - 960 def to_hdf(self, overwrite=False, export_raw=True): - 961 """Export the data to an HDF5 file. - 962 - 963 Parameters - 964 ---------- - 965 overwrite : bool, optional - 966 Whether to overwrite the output file. Default is False. - 967 export_raw : bool, optional - 968 Whether to export the raw mass spectra data. Default is True. - 969 """ - 970 if overwrite: - 971 if self.output_file.with_suffix(".hdf5").exists(): - 972 self.output_file.with_suffix(".hdf5").unlink() - 973 - 974 with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle: - 975 if not hdf_handle.attrs.get("date_utc"): - 976 # Set metadata for all mass spectra - 977 timenow = str( - 978 datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z") - 979 ) - 980 hdf_handle.attrs["date_utc"] = timenow - 981 hdf_handle.attrs["filename"] = self.mass_spectra.file_location.name - 982 hdf_handle.attrs["data_structure"] = "mass_spectra" - 983 hdf_handle.attrs["analyzer"] = self.mass_spectra.analyzer - 984 hdf_handle.attrs["instrument_label"] = ( - 985 self.mass_spectra.instrument_label - 986 ) - 987 hdf_handle.attrs["sample_name"] = self.mass_spectra.sample_name - 988 hdf_handle.attrs["polarity"] = self.mass_spectra.polarity - 989 hdf_handle.attrs["parser_type"] = self.mass_spectra.spectra_parser_class.__name__ - 990 hdf_handle.attrs["original_file_location"] = self.mass_spectra.file_location._str - 991 - 992 if "mass_spectra" not in hdf_handle: - 993 mass_spectra_group = hdf_handle.create_group("mass_spectra") - 994 else: - 995 mass_spectra_group = hdf_handle.get("mass_spectra") - 996 - 997 for mass_spectrum in self.mass_spectra: - 998 group_key = str(int(mass_spectrum.scan_number)) + 779class HighResMassSpectraExport(HighResMassSpecExport): + 780 """A class to export high resolution mass spectra data. + 781 + 782 This class provides methods to export high resolution mass spectra data to various formats + 783 such as Excel, CSV, HDF5, and Pandas DataFrame. + 784 + 785 Parameters + 786 ---------- + 787 out_file_path : str | Path + 788 The output file path. + 789 mass_spectra : object + 790 The high resolution mass spectra object. + 791 output_type : str, optional + 792 The output type. Default is 'excel'. + 793 + 794 Attributes + 795 ---------- + 796 output_file : Path + 797 The output file path without suffix + 798 dir_loc : Path + 799 The directory location for the output file, + 800 by default this will be the output_file + ".corems" and all output files will be + 801 written into this location + 802 mass_spectra : MassSpectraBase + 803 The high resolution mass spectra object. + 804 """ + 805 + 806 def __init__(self, out_file_path, mass_spectra, output_type="excel"): + 807 super().__init__( + 808 out_file_path=out_file_path, mass_spectrum=None, output_type=output_type + 809 ) + 810 + 811 self.dir_loc = Path(out_file_path + ".corems") + 812 self.dir_loc.mkdir(exist_ok=True) + 813 # Place the output file in the directory + 814 self.output_file = self.dir_loc / Path(out_file_path).name + 815 self._output_type = output_type # 'excel', 'csv', 'pandas' or 'hdf5' + 816 self.mass_spectra = mass_spectra + 817 self.atoms_order_list = None + 818 self._init_columns() + 819 + 820 def get_pandas_df(self): + 821 """Get the mass spectra as a list of Pandas DataFrames.""" + 822 + 823 list_df = [] + 824 + 825 for mass_spectrum in self.mass_spectra: + 826 columns = self.columns_label + self.get_all_used_atoms_in_order( + 827 mass_spectrum + 828 ) + 829 + 830 dict_data_list = self.get_list_dict_data(mass_spectrum) + 831 + 832 df = DataFrame(dict_data_list, columns=columns) + 833 + 834 scan_number = mass_spectrum.scan_number + 835 + 836 df.name = str(self.output_file) + "_" + str(scan_number) + 837 + 838 list_df.append(df) + 839 + 840 return list_df + 841 + 842 def to_pandas(self, write_metadata=True): + 843 """Export the data to a Pandas DataFrame and save it as a pickle file. + 844 + 845 Parameters: + 846 ---------- + 847 write_metadata : bool, optional + 848 Whether to write metadata to the output file. Default is True. + 849 """ + 850 + 851 for mass_spectrum in self.mass_spectra: + 852 columns = self.columns_label + self.get_all_used_atoms_in_order( + 853 mass_spectrum + 854 ) + 855 + 856 dict_data_list = self.get_list_dict_data(mass_spectrum) + 857 + 858 df = DataFrame(dict_data_list, columns=columns) + 859 + 860 scan_number = mass_spectrum.scan_number + 861 + 862 out_filename = Path( + 863 "%s_scan%s%s" % (self.output_file, str(scan_number), ".pkl") + 864 ) + 865 + 866 df.to_pickle(self.dir_loc / out_filename) + 867 + 868 if write_metadata: + 869 self.write_settings( + 870 self.dir_loc / out_filename.with_suffix(""), mass_spectrum + 871 ) + 872 + 873 def to_excel(self, write_metadata=True): + 874 """Export the data to an Excel file. + 875 + 876 Parameters: + 877 ---------- + 878 write_metadata : bool, optional + 879 Whether to write metadata to the output file. Default is True. + 880 """ + 881 for mass_spectrum in self.mass_spectra: + 882 columns = self.columns_label + self.get_all_used_atoms_in_order( + 883 mass_spectrum + 884 ) + 885 + 886 dict_data_list = self.get_list_dict_data(mass_spectrum) + 887 + 888 df = DataFrame(dict_data_list, columns=columns) + 889 + 890 scan_number = mass_spectrum.scan_number + 891 + 892 out_filename = Path( + 893 "%s_scan%s%s" % (self.output_file, str(scan_number), ".xlsx") + 894 ) + 895 + 896 df.to_excel(self.dir_loc / out_filename) + 897 + 898 if write_metadata: + 899 self.write_settings( + 900 self.dir_loc / out_filename.with_suffix(""), mass_spectrum + 901 ) + 902 + 903 def to_csv(self, write_metadata=True): + 904 """Export the data to a CSV file. + 905 + 906 Parameters: + 907 ---------- + 908 write_metadata : bool, optional + 909 Whether to write metadata to the output file. Default is True. + 910 """ + 911 import csv + 912 + 913 for mass_spectrum in self.mass_spectra: + 914 columns = self.columns_label + self.get_all_used_atoms_in_order( + 915 mass_spectrum + 916 ) + 917 + 918 scan_number = mass_spectrum.scan_number + 919 + 920 dict_data_list = self.get_list_dict_data(mass_spectrum) + 921 + 922 out_filename = Path( + 923 "%s_scan%s%s" % (self.output_file, str(scan_number), ".csv") + 924 ) + 925 + 926 with open(self.dir_loc / out_filename, "w", newline="") as csvfile: + 927 writer = csv.DictWriter(csvfile, fieldnames=columns) + 928 writer.writeheader() + 929 for data in dict_data_list: + 930 writer.writerow(data) + 931 + 932 if write_metadata: + 933 self.write_settings( + 934 self.dir_loc / out_filename.with_suffix(""), mass_spectrum + 935 ) + 936 + 937 def get_mass_spectra_attrs(self): + 938 """Get the mass spectra attributes as a JSON string. + 939 + 940 Parameters: + 941 ---------- + 942 mass_spectra : object + 943 The high resolution mass spectra object. + 944 + 945 Returns: + 946 ------- + 947 str + 948 The mass spectra attributes as a JSON string. + 949 """ + 950 dict_ms_attrs = {} + 951 dict_ms_attrs["analyzer"] = self.mass_spectra.analyzer + 952 dict_ms_attrs["instrument_label"] = self.mass_spectra.instrument_label + 953 dict_ms_attrs["sample_name"] = self.mass_spectra.sample_name + 954 + 955 return json.dumps( + 956 dict_ms_attrs, sort_keys=False, indent=4, separators=(",", ": ") + 957 ) + 958 + 959 def to_hdf(self, overwrite=False, export_raw=True): + 960 """Export the data to an HDF5 file. + 961 + 962 Parameters + 963 ---------- + 964 overwrite : bool, optional + 965 Whether to overwrite the output file. Default is False. + 966 export_raw : bool, optional + 967 Whether to export the raw mass spectra data. Default is True. + 968 """ + 969 if overwrite: + 970 if self.output_file.with_suffix(".hdf5").exists(): + 971 self.output_file.with_suffix(".hdf5").unlink() + 972 + 973 with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle: + 974 if not hdf_handle.attrs.get("date_utc"): + 975 # Set metadata for all mass spectra + 976 timenow = str( + 977 datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z") + 978 ) + 979 hdf_handle.attrs["date_utc"] = timenow + 980 hdf_handle.attrs["filename"] = self.mass_spectra.file_location.name + 981 hdf_handle.attrs["data_structure"] = "mass_spectra" + 982 hdf_handle.attrs["analyzer"] = self.mass_spectra.analyzer + 983 hdf_handle.attrs["instrument_label"] = ( + 984 self.mass_spectra.instrument_label + 985 ) + 986 hdf_handle.attrs["sample_name"] = self.mass_spectra.sample_name + 987 hdf_handle.attrs["polarity"] = self.mass_spectra.polarity + 988 hdf_handle.attrs["parser_type"] = ( + 989 self.mass_spectra.spectra_parser_class.__name__ + 990 ) + 991 hdf_handle.attrs["original_file_location"] = ( + 992 self.mass_spectra.file_location._str + 993 ) + 994 + 995 if "mass_spectra" not in hdf_handle: + 996 mass_spectra_group = hdf_handle.create_group("mass_spectra") + 997 else: + 998 mass_spectra_group = hdf_handle.get("mass_spectra") 999 -1000 self.add_mass_spectrum_to_hdf5(hdf_handle, mass_spectrum, group_key, mass_spectra_group, export_raw) -1001 +1000 for mass_spectrum in self.mass_spectra: +1001 group_key = str(int(mass_spectrum.scan_number)) 1002 -1003class LCMSExport(HighResMassSpectraExport): -1004 """A class to export high resolution LC-MS data. -1005 -1006 This class provides methods to export high resolution LC-MS data to HDF5. +1003 self.add_mass_spectrum_to_hdf5( +1004 hdf_handle, mass_spectrum, group_key, mass_spectra_group, export_raw +1005 ) +1006 1007 -1008 Parameters -1009 ---------- -1010 out_file_path : str | Path -1011 The output file path, do not include the file extension. -1012 lcms_object : LCMSBase -1013 The high resolution lc-ms object. -1014 """ -1015 -1016 def __init__(self, out_file_path, mass_spectra): -1017 super().__init__(out_file_path, mass_spectra, output_type="hdf5") -1018 -1019 def to_hdf(self, overwrite=False, save_parameters=True, parameter_format="toml"): -1020 """Export the data to an HDF5. -1021 -1022 Parameters -1023 ---------- -1024 overwrite : bool, optional -1025 Whether to overwrite the output file. Default is False. -1026 save_parameters : bool, optional -1027 Whether to save the parameters as a separate json or toml file. Default is True. -1028 parameter_format : str, optional -1029 The format to save the parameters in. Default is 'toml'. -1030 -1031 Raises -1032 ------ -1033 ValueError -1034 If parameter_format is not 'json' or 'toml'. -1035 """ -1036 export_profile_spectra = self.mass_spectra.parameters.lc_ms.export_profile_spectra -1037 -1038 # Write the mass spectra data to the hdf5 file -1039 super().to_hdf(overwrite=overwrite, export_raw=export_profile_spectra) -1040 -1041 # Write scan info, ms_unprocessed, mass features, eics, and ms2_search results to the hdf5 file -1042 with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle: -1043 # Add scan_info to hdf5 file -1044 if "scan_info" not in hdf_handle: -1045 scan_info_group = hdf_handle.create_group("scan_info") -1046 for k, v in self.mass_spectra._scan_info.items(): -1047 array = np.array(list(v.values())) -1048 if array.dtype.str[0:2] == "<U": -1049 array = array.astype("S") -1050 scan_info_group.create_dataset(k, data=array) -1051 -1052 # Add ms_unprocessed to hdf5 file -1053 export_unprocessed_ms1 = self.mass_spectra.parameters.lc_ms.export_unprocessed_ms1 -1054 if self.mass_spectra._ms_unprocessed and export_unprocessed_ms1: -1055 if "ms_unprocessed" not in hdf_handle: -1056 ms_unprocessed_group = hdf_handle.create_group("ms_unprocessed") -1057 else: -1058 ms_unprocessed_group = hdf_handle.get("ms_unprocessed") -1059 for k, v in self.mass_spectra._ms_unprocessed.items(): -1060 array = np.array(v) -1061 ms_unprocessed_group.create_dataset(str(k), data=array) -1062 -1063 # Add LCMS mass features to hdf5 file -1064 if len(self.mass_spectra.mass_features) > 0: -1065 if "mass_features" not in hdf_handle: -1066 mass_features_group = hdf_handle.create_group("mass_features") -1067 else: -1068 mass_features_group = hdf_handle.get("mass_features") -1069 -1070 # Create group for each mass feature, with key as the mass feature id -1071 for k, v in self.mass_spectra.mass_features.items(): -1072 mass_features_group.create_group(str(k)) -1073 # Loop through each of the mass feature attributes and add them as attributes (if single value) or datasets (if array) -1074 for k2, v2 in v.__dict__.items(): -1075 if v2 is not None: -1076 # Check if the attribute is an integer or float and set as an attribute in the mass feature group -1077 if k2 not in [ -1078 "chromatogram_parent", -1079 "ms2_mass_spectra", -1080 "mass_spectrum", -1081 "_eic_data", -1082 "ms2_similarity_results", -1083 ]: -1084 if k2 == "ms2_scan_numbers": -1085 array = np.array(v2) -1086 mass_features_group[str(k)].create_dataset( -1087 str(k2), data=array -1088 ) -1089 elif k2 == "_half_height_width": -1090 array = np.array(v2) -1091 mass_features_group[str(k)].create_dataset( -1092 str(k2), data=array -1093 ) -1094 elif k2 == "_ms_deconvoluted_idx": -1095 array = np.array(v2) -1096 mass_features_group[str(k)].create_dataset( -1097 str(k2), data=array -1098 ) -1099 elif k2 == "associated_mass_features_deconvoluted": -1100 array = np.array(v2) -1101 mass_features_group[str(k)].create_dataset( -1102 str(k2), data=array -1103 ) -1104 elif ( -1105 isinstance(v2, int) -1106 or isinstance(v2, float) -1107 or isinstance(v2, str) -1108 or isinstance(v2, np.integer) -1109 or isinstance(v2, np.bool_) -1110 ): -1111 mass_features_group[str(k)].attrs[str(k2)] = v2 -1112 else: -1113 raise TypeError( -1114 f"Attribute {k2} is not an integer, float, or string and cannot be added to the hdf5 file" -1115 ) -1116 -1117 # Add EIC data to hdf5 file -1118 export_eics = self.mass_spectra.parameters.lc_ms.export_eics -1119 if len(self.mass_spectra.eics) > 0 and export_eics: -1120 if "eics" not in hdf_handle: -1121 eic_group = hdf_handle.create_group("eics") -1122 else: -1123 eic_group = hdf_handle.get("eics") -1124 -1125 # Create group for each eic -1126 for k, v in self.mass_spectra.eics.items(): -1127 eic_group.create_group(str(k)) -1128 eic_group[str(k)].attrs["mz"] = k -1129 # Loop through each of the attributes and add them as datasets (if array) -1130 for k2, v2 in v.__dict__.items(): -1131 if v2 is not None: -1132 array = np.array(v2) -1133 eic_group[str(k)].create_dataset(str(k2), data=array) -1134 -1135 # Add ms2_search results to hdf5 file -1136 if len(self.mass_spectra.spectral_search_results) > 0: -1137 if "spectral_search_results" not in hdf_handle: -1138 spectral_search_results = hdf_handle.create_group( -1139 "spectral_search_results" -1140 ) -1141 else: -1142 spectral_search_results = hdf_handle.get("spectral_search_results") -1143 # Create group for each search result by ms2_scan / precursor_mz -1144 for k, v in self.mass_spectra.spectral_search_results.items(): -1145 spectral_search_results.create_group(str(k)) -1146 for k2, v2 in v.items(): -1147 spectral_search_results[str(k)].create_group(str(k2)) -1148 spectral_search_results[str(k)][str(k2)].attrs[ -1149 "precursor_mz" -1150 ] = v2.precursor_mz -1151 spectral_search_results[str(k)][str(k2)].attrs[ -1152 "query_spectrum_id" -1153 ] = v2.query_spectrum_id -1154 # Loop through each of the attributes and add them as datasets (if array) -1155 for k3, v3 in v2.__dict__.items(): -1156 if v3 is not None and k3 not in [ -1157 "query_spectrum", -1158 "precursor_mz", -1159 "query_spectrum_id", -1160 ]: -1161 if k3 == "query_frag_types" or k3 == "ref_frag_types": -1162 v3 = [", ".join(x) for x in v3] -1163 array = np.array(v3) -1164 if array.dtype.str[0:2] == "<U": -1165 array = array.astype("S") -1166 spectral_search_results[str(k)][str(k2)].create_dataset( -1167 str(k3), data=array -1168 ) -1169 -1170 # Save parameters as separate json -1171 if save_parameters: -1172 # Check if parameter_format is valid -1173 if parameter_format not in ["json", "toml"]: -1174 raise ValueError("parameter_format must be 'json' or 'toml'") -1175 -1176 if parameter_format == "json": -1177 dump_lcms_settings_json( -1178 filename=self.output_file.with_suffix(".json"), -1179 lcms_obj=self.mass_spectra, -1180 ) -1181 elif parameter_format == "toml": -1182 dump_lcms_settings_toml( -1183 filename=self.output_file.with_suffix(".toml"), -1184 lcms_obj=self.mass_spectra, -1185 ) -1186 -1187 -1188 -1189class LipidomicsExport(LCMSExport): -1190 """A class to export lipidomics data. -1191 -1192 This class provides methods to export lipidomics data to various formats and summarize the lipid report. -1193 -1194 Parameters -1195 ---------- -1196 out_file_path : str | Path -1197 The output file path, do not include the file extension. -1198 mass_spectra : object -1199 The high resolution mass spectra object. -1200 """ -1201 def __init__(self, out_file_path, mass_spectra): -1202 super().__init__(out_file_path, mass_spectra) -1203 self.ion_type_dict = ion_type_dict -1204 -1205 @staticmethod -1206 def get_ion_formula(neutral_formula, ion_type): -1207 """From a neutral formula and an ion type, return the formula of the ion. -1208 -1209 Notes -1210 ----- -1211 This is a static method. -1212 If the neutral_formula is not a string, this method will return None. +1008class LCMSExport(HighResMassSpectraExport): +1009 """A class to export high resolution LC-MS data. +1010 +1011 This class provides methods to export high resolution LC-MS data to HDF5. +1012 +1013 Parameters +1014 ---------- +1015 out_file_path : str | Path +1016 The output file path, do not include the file extension. +1017 lcms_object : LCMSBase +1018 The high resolution lc-ms object. +1019 """ +1020 +1021 def __init__(self, out_file_path, mass_spectra): +1022 super().__init__(out_file_path, mass_spectra, output_type="hdf5") +1023 +1024 def to_hdf(self, overwrite=False, save_parameters=True, parameter_format="toml"): +1025 """Export the data to an HDF5. +1026 +1027 Parameters +1028 ---------- +1029 overwrite : bool, optional +1030 Whether to overwrite the output file. Default is False. +1031 save_parameters : bool, optional +1032 Whether to save the parameters as a separate json or toml file. Default is True. +1033 parameter_format : str, optional +1034 The format to save the parameters in. Default is 'toml'. +1035 +1036 Raises +1037 ------ +1038 ValueError +1039 If parameter_format is not 'json' or 'toml'. +1040 """ +1041 export_profile_spectra = ( +1042 self.mass_spectra.parameters.lc_ms.export_profile_spectra +1043 ) +1044 +1045 # Write the mass spectra data to the hdf5 file +1046 super().to_hdf(overwrite=overwrite, export_raw=export_profile_spectra) +1047 +1048 # Write scan info, ms_unprocessed, mass features, eics, and ms2_search results to the hdf5 file +1049 with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle: +1050 # Add scan_info to hdf5 file +1051 if "scan_info" not in hdf_handle: +1052 scan_info_group = hdf_handle.create_group("scan_info") +1053 for k, v in self.mass_spectra._scan_info.items(): +1054 array = np.array(list(v.values())) +1055 if array.dtype.str[0:2] == "<U": +1056 array = array.astype("S") +1057 scan_info_group.create_dataset(k, data=array) +1058 +1059 # Add ms_unprocessed to hdf5 file +1060 export_unprocessed_ms1 = ( +1061 self.mass_spectra.parameters.lc_ms.export_unprocessed_ms1 +1062 ) +1063 if self.mass_spectra._ms_unprocessed and export_unprocessed_ms1: +1064 if "ms_unprocessed" not in hdf_handle: +1065 ms_unprocessed_group = hdf_handle.create_group("ms_unprocessed") +1066 else: +1067 ms_unprocessed_group = hdf_handle.get("ms_unprocessed") +1068 for k, v in self.mass_spectra._ms_unprocessed.items(): +1069 array = np.array(v) +1070 ms_unprocessed_group.create_dataset(str(k), data=array) +1071 +1072 # Add LCMS mass features to hdf5 file +1073 if len(self.mass_spectra.mass_features) > 0: +1074 if "mass_features" not in hdf_handle: +1075 mass_features_group = hdf_handle.create_group("mass_features") +1076 else: +1077 mass_features_group = hdf_handle.get("mass_features") +1078 +1079 # Create group for each mass feature, with key as the mass feature id +1080 for k, v in self.mass_spectra.mass_features.items(): +1081 mass_features_group.create_group(str(k)) +1082 # Loop through each of the mass feature attributes and add them as attributes (if single value) or datasets (if array) +1083 for k2, v2 in v.__dict__.items(): +1084 if v2 is not None: +1085 # Check if the attribute is an integer or float and set as an attribute in the mass feature group +1086 if k2 not in [ +1087 "chromatogram_parent", +1088 "ms2_mass_spectra", +1089 "mass_spectrum", +1090 "_eic_data", +1091 "ms2_similarity_results", +1092 ]: +1093 if k2 == "ms2_scan_numbers": +1094 array = np.array(v2) +1095 mass_features_group[str(k)].create_dataset( +1096 str(k2), data=array +1097 ) +1098 elif k2 == "_half_height_width": +1099 array = np.array(v2) +1100 mass_features_group[str(k)].create_dataset( +1101 str(k2), data=array +1102 ) +1103 elif k2 == "_ms_deconvoluted_idx": +1104 array = np.array(v2) +1105 mass_features_group[str(k)].create_dataset( +1106 str(k2), data=array +1107 ) +1108 elif k2 == "associated_mass_features_deconvoluted": +1109 array = np.array(v2) +1110 mass_features_group[str(k)].create_dataset( +1111 str(k2), data=array +1112 ) +1113 elif ( +1114 isinstance(v2, int) +1115 or isinstance(v2, float) +1116 or isinstance(v2, str) +1117 or isinstance(v2, np.integer) +1118 or isinstance(v2, np.bool_) +1119 ): +1120 mass_features_group[str(k)].attrs[str(k2)] = v2 +1121 else: +1122 raise TypeError( +1123 f"Attribute {k2} is not an integer, float, or string and cannot be added to the hdf5 file" +1124 ) +1125 +1126 # Add EIC data to hdf5 file +1127 export_eics = self.mass_spectra.parameters.lc_ms.export_eics +1128 if len(self.mass_spectra.eics) > 0 and export_eics: +1129 if "eics" not in hdf_handle: +1130 eic_group = hdf_handle.create_group("eics") +1131 else: +1132 eic_group = hdf_handle.get("eics") +1133 +1134 # Create group for each eic +1135 for k, v in self.mass_spectra.eics.items(): +1136 eic_group.create_group(str(k)) +1137 eic_group[str(k)].attrs["mz"] = k +1138 # Loop through each of the attributes and add them as datasets (if array) +1139 for k2, v2 in v.__dict__.items(): +1140 if v2 is not None: +1141 array = np.array(v2) +1142 eic_group[str(k)].create_dataset(str(k2), data=array) +1143 +1144 # Add ms2_search results to hdf5 file +1145 if len(self.mass_spectra.spectral_search_results) > 0: +1146 if "spectral_search_results" not in hdf_handle: +1147 spectral_search_results = hdf_handle.create_group( +1148 "spectral_search_results" +1149 ) +1150 else: +1151 spectral_search_results = hdf_handle.get("spectral_search_results") +1152 # Create group for each search result by ms2_scan / precursor_mz +1153 for k, v in self.mass_spectra.spectral_search_results.items(): +1154 spectral_search_results.create_group(str(k)) +1155 for k2, v2 in v.items(): +1156 spectral_search_results[str(k)].create_group(str(k2)) +1157 spectral_search_results[str(k)][str(k2)].attrs[ +1158 "precursor_mz" +1159 ] = v2.precursor_mz +1160 spectral_search_results[str(k)][str(k2)].attrs[ +1161 "query_spectrum_id" +1162 ] = v2.query_spectrum_id +1163 # Loop through each of the attributes and add them as datasets (if array) +1164 for k3, v3 in v2.__dict__.items(): +1165 if v3 is not None and k3 not in [ +1166 "query_spectrum", +1167 "precursor_mz", +1168 "query_spectrum_id", +1169 ]: +1170 if k3 == "query_frag_types" or k3 == "ref_frag_types": +1171 v3 = [", ".join(x) for x in v3] +1172 array = np.array(v3) +1173 if array.dtype.str[0:2] == "<U": +1174 array = array.astype("S") +1175 spectral_search_results[str(k)][str(k2)].create_dataset( +1176 str(k3), data=array +1177 ) +1178 +1179 # Save parameters as separate json +1180 if save_parameters: +1181 # Check if parameter_format is valid +1182 if parameter_format not in ["json", "toml"]: +1183 raise ValueError("parameter_format must be 'json' or 'toml'") +1184 +1185 if parameter_format == "json": +1186 dump_lcms_settings_json( +1187 filename=self.output_file.with_suffix(".json"), +1188 lcms_obj=self.mass_spectra, +1189 ) +1190 elif parameter_format == "toml": +1191 dump_lcms_settings_toml( +1192 filename=self.output_file.with_suffix(".toml"), +1193 lcms_obj=self.mass_spectra, +1194 ) +1195 +1196 +1197class LipidomicsExport(LCMSExport): +1198 """A class to export lipidomics data. +1199 +1200 This class provides methods to export lipidomics data to various formats and summarize the lipid report. +1201 +1202 Parameters +1203 ---------- +1204 out_file_path : str | Path +1205 The output file path, do not include the file extension. +1206 mass_spectra : object +1207 The high resolution mass spectra object. +1208 """ +1209 +1210 def __init__(self, out_file_path, mass_spectra): +1211 super().__init__(out_file_path, mass_spectra) +1212 self.ion_type_dict = ion_type_dict 1213 -1214 Parameters -1215 ---------- -1216 neutral_formula : str -1217 The neutral formula, this should be a string form from the MolecularFormula class -1218 (e.g. 'C2 H4 O2', isotopes OK), or simple string (e.g. 'C2H4O2', no isotope handling in this case). -1219 In the case of a simple string, the atoms are parsed based on the presence of capital letters, -1220 e.g. MgCl2 is parsed as 'Mg Cl2. -1221 ion_type : str -1222 The ion type, e.g. 'protonated', '[M+H]+', '[M+Na]+', etc. -1223 See the self.ion_type_dict for the available ion types. -1224 -1225 Returns -1226 ------- -1227 str -1228 The formula of the ion as a string (like 'C2 H4 O2'); or None if the neutral_formula is not a string. -1229 """ -1230 # If neutral_formula is not a string, return None -1231 if not isinstance(neutral_formula, str): -1232 return None +1214 @staticmethod +1215 def get_ion_formula(neutral_formula, ion_type): +1216 """From a neutral formula and an ion type, return the formula of the ion. +1217 +1218 Notes +1219 ----- +1220 This is a static method. +1221 If the neutral_formula is not a string, this method will return None. +1222 +1223 Parameters +1224 ---------- +1225 neutral_formula : str +1226 The neutral formula, this should be a string form from the MolecularFormula class +1227 (e.g. 'C2 H4 O2', isotopes OK), or simple string (e.g. 'C2H4O2', no isotope handling in this case). +1228 In the case of a simple string, the atoms are parsed based on the presence of capital letters, +1229 e.g. MgCl2 is parsed as 'Mg Cl2. +1230 ion_type : str +1231 The ion type, e.g. 'protonated', '[M+H]+', '[M+Na]+', etc. +1232 See the self.ion_type_dict for the available ion types. 1233 -1234 # Check if there are spaces in the formula (these are outputs of the MolecularFormula class and do not need to be processed before being passed to the class) -1235 if re.search(r"\s", neutral_formula): -1236 neutral_formula = MolecularFormula(neutral_formula, ion_charge=0) -1237 else: -1238 form_pre = re.sub(r"([A-Z])", r" \1", neutral_formula)[1:] -1239 elements = [re.findall(r"[A-Z][a-z]*", x) for x in form_pre.split()] -1240 counts = [re.findall(r"\d+", x) for x in form_pre.split()] -1241 neutral_formula = MolecularFormula( -1242 dict( -1243 zip( -1244 [x[0] for x in elements], -1245 [int(x[0]) if x else 1 for x in counts], -1246 ) -1247 ), -1248 ion_charge=0, -1249 ) -1250 neutral_formula_dict = neutral_formula.to_dict().copy() -1251 -1252 adduct_add_dict = ion_type_dict[ion_type][0] -1253 for key in adduct_add_dict: -1254 if key in neutral_formula_dict.keys(): -1255 neutral_formula_dict[key] += adduct_add_dict[key] -1256 else: -1257 neutral_formula_dict[key] = adduct_add_dict[key] -1258 -1259 adduct_subtract = ion_type_dict[ion_type][1] -1260 for key in adduct_subtract: -1261 neutral_formula_dict[key] -= adduct_subtract[key] -1262 -1263 return MolecularFormula(neutral_formula_dict, ion_charge=0).string -1264 -1265 @staticmethod -1266 def get_isotope_type(ion_formula): -1267 """From an ion formula, return the 13C isotope type of the ion. -1268 -1269 Notes -1270 ----- -1271 This is a static method. -1272 If the ion_formula is not a string, this method will return None. -1273 This is currently only functional for 13C isotopes. -1274 -1275 Parameters -1276 ---------- -1277 ion_formula : str -1278 The formula of the ion, expected to be a string like 'C2 H4 O2'. -1279 -1280 Returns -1281 ------- -1282 str -1283 The isotope type of the ion, e.g. '13C1', '13C2', etc; or None if the ion_formula does not contain a 13C isotope. -1284 -1285 Raises -1286 ------ -1287 ValueError -1288 If the ion_formula is not a string. -1289 """ -1290 if not isinstance(ion_formula, str): -1291 return None -1292 -1293 if re.search(r"\s", ion_formula): -1294 ion_formula = MolecularFormula(ion_formula, ion_charge=0) -1295 else: -1296 raise ValueError('ion_formula should be a string like "C2 H4 O2"') -1297 ion_formula_dict = ion_formula.to_dict().copy() -1298 -1299 try: -1300 iso_class = "13C" + str(ion_formula_dict.pop("13C")) -1301 except KeyError: -1302 iso_class = None -1303 -1304 return iso_class -1305 -1306 def clean_ms1_report(self, ms1_summary_full): -1307 """Clean the MS1 report. -1308 -1309 Parameters -1310 ---------- -1311 ms1_summary_full : DataFrame -1312 The full MS1 summary DataFrame. -1313 -1314 Returns -1315 ------- -1316 DataFrame -1317 The cleaned MS1 summary DataFrame. -1318 """ -1319 ms1_summary_full = ms1_summary_full.reset_index() -1320 cols_to_keep = [ -1321 "mf_id", -1322 "Molecular Formula", -1323 "Ion Type", -1324 "Calculated m/z", -1325 "m/z Error (ppm)", -1326 "m/z Error Score", -1327 "Is Isotopologue", -1328 "Isotopologue Similarity", -1329 "Confidence Score", -1330 ] -1331 ms1_summary = ms1_summary_full[cols_to_keep].copy() -1332 ms1_summary["ion_formula"] = [ -1333 self.get_ion_formula(f, a) -1334 for f, a in zip(ms1_summary["Molecular Formula"], ms1_summary["Ion Type"]) -1335 ] -1336 ms1_summary["isotopologue_type"] = [ -1337 self.get_isotope_type(f) for f in ms1_summary["ion_formula"].tolist() -1338 ] -1339 -1340 # Reorder columns -1341 ms1_summary = ms1_summary[ -1342 [ -1343 "mf_id", -1344 "ion_formula", -1345 "isotopologue_type", -1346 "Calculated m/z", -1347 "m/z Error (ppm)", -1348 "m/z Error Score", -1349 "Isotopologue Similarity", -1350 "Confidence Score", -1351 ] -1352 ] -1353 -1354 # Set the index to mf_id -1355 ms1_summary = ms1_summary.set_index("mf_id") -1356 -1357 return ms1_summary -1358 -1359 def summarize_lipid_report(self, ms2_annot): -1360 """Summarize the lipid report. -1361 -1362 Parameters -1363 ---------- -1364 ms2_annot : DataFrame -1365 The MS2 annotation DataFrame with all annotations. -1366 -1367 Returns -1368 ------- -1369 DataFrame -1370 The summarized lipid report. -1371 """ -1372 # Drop unnecessary columns for easier viewing -1373 columns_to_drop = [ -1374 "precursor_mz", -1375 "precursor_mz_error_ppm", -1376 "metabref_mol_id", -1377 "metabref_precursor_mz", -1378 "cas", -1379 "inchikey", -1380 "inchi", -1381 "chebi", -1382 "smiles", -1383 "kegg", -1384 "data_id", -1385 "iupac_name", -1386 "traditional_name", -1387 "common_name", -1388 "casno", -1389 ] -1390 ms2_annot = ms2_annot.drop( -1391 columns=[col for col in columns_to_drop if col in ms2_annot.columns] -1392 ) -1393 -1394 # If ion_types_excluded is not empty, remove those ion types -1395 ion_types_excluded = ( -1396 self.mass_spectra.parameters.mass_spectrum['ms2'].molecular_search.ion_types_excluded -1397 ) -1398 if len(ion_types_excluded) > 0: -1399 ms2_annot = ms2_annot[~ms2_annot["ref_ion_type"].isin(ion_types_excluded)] -1400 -1401 # If mf_id is not present, check that the index name is mf_id and reset the index -1402 if "mf_id" not in ms2_annot.columns: -1403 if ms2_annot.index.name == "mf_id": -1404 ms2_annot = ms2_annot.reset_index() -1405 else: -1406 raise ValueError("mf_id is not present in the dataframe") -1407 -1408 # Attempt to get consensus annotations to the MLF level -1409 mlf_results_all = [] -1410 for mf_id in ms2_annot["mf_id"].unique(): -1411 mlf_results_perid = [] -1412 ms2_annot_mf = ms2_annot[ms2_annot["mf_id"] == mf_id].copy() -1413 ms2_annot_mf["n_spectra_contributing"] = len(ms2_annot_mf) -1414 -1415 for query_scan in ms2_annot["query_spectrum_id"].unique(): -1416 ms2_annot_sub = ms2_annot_mf[ -1417 ms2_annot_mf["query_spectrum_id"] == query_scan -1418 ].copy() -1419 -1420 if ms2_annot_sub["lipid_summed_name"].nunique() == 1: -1421 # If there is only one lipid_summed_name, let's try to get consensus molecular species annotation -1422 if ms2_annot_sub["lipid_summed_name"].nunique() == 1: -1423 ms2_annot_sub["entropy_max"] = ( -1424 ms2_annot_sub["entropy_similarity"] -1425 == ms2_annot_sub["entropy_similarity"].max() -1426 ) -1427 ms2_annot_sub["ref_match_fract_max"] = ( -1428 ms2_annot_sub["ref_mz_in_query_fract"] -1429 == ms2_annot_sub["ref_mz_in_query_fract"].max() -1430 ) -1431 ms2_annot_sub["frag_max"] = ms2_annot_sub["query_frag_types"].apply( -1432 lambda x: True if "MLF" in x else False -1433 ) -1434 -1435 # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks) -1436 ms2_annot_sub["consensus"] = ms2_annot_sub[ -1437 ["entropy_max", "ref_match_fract_max", "frag_max"] -1438 ].all(axis=1) -1439 -1440 # If there is a consensus, take the row with the highest entropy_similarity -1441 if ms2_annot_sub["consensus"].any(): -1442 ms2_annot_sub = ms2_annot_sub[ -1443 ms2_annot_sub["entropy_similarity"] -1444 == ms2_annot_sub["entropy_similarity"].max() -1445 ].head(1) -1446 mlf_results_perid.append(ms2_annot_sub) -1447 if len(mlf_results_perid) == 0: -1448 mlf_results_perid = pd.DataFrame() -1449 else: -1450 mlf_results_perid = pd.concat(mlf_results_perid) -1451 if mlf_results_perid["name"].nunique() == 1: -1452 mlf_results_perid = mlf_results_perid[ -1453 mlf_results_perid["entropy_similarity"] -1454 == mlf_results_perid["entropy_similarity"].max() -1455 ].head(1) -1456 else: -1457 mlf_results_perid = pd.DataFrame() -1458 mlf_results_all.append(mlf_results_perid) -1459 -1460 # These are the consensus annotations to the MLF level -1461 if len(mlf_results_all) > 0: -1462 mlf_results_all = pd.concat(mlf_results_all) -1463 mlf_results_all["annot_level"] = mlf_results_all["structure_level"] -1464 else: -1465 # Make an empty dataframe -1466 mlf_results_all = ms2_annot.head(0) -1467 -1468 # For remaining mf_ids, try to get a consensus annotation to the species level -1469 species_results_all = [] -1470 # Remove mf_ids that have consensus annotations to the MLF level -1471 ms2_annot_spec = ms2_annot[ -1472 ~ms2_annot["mf_id"].isin(mlf_results_all["mf_id"].unique()) -1473 ] -1474 for mf_id in ms2_annot_spec["mf_id"].unique(): -1475 # Do all the hits have the same lipid_summed_name? -1476 ms2_annot_sub = ms2_annot_spec[ms2_annot_spec["mf_id"] == mf_id].copy() -1477 ms2_annot_sub["n_spectra_contributing"] = len(ms2_annot_sub) -1478 -1479 if ms2_annot_sub["lipid_summed_name"].nunique() == 1: -1480 # Grab the highest entropy_similarity result -1481 ms2_annot_sub = ms2_annot_sub[ -1482 ms2_annot_sub["entropy_similarity"] -1483 == ms2_annot_sub["entropy_similarity"].max() -1484 ].head(1) -1485 species_results_all.append(ms2_annot_sub) -1486 -1487 # These are the consensus annotations to the species level -1488 if len(species_results_all) > 0: -1489 species_results_all = pd.concat(species_results_all) -1490 species_results_all["annot_level"] = "species" -1491 else: -1492 # Make an empty dataframe -1493 species_results_all = ms2_annot.head(0) -1494 -1495 # Deal with the remaining mf_ids that do not have consensus annotations to the species level or MLF level -1496 # Remove mf_ids that have consensus annotations to the species level -1497 ms2_annot_remaining = ms2_annot_spec[ -1498 ~ms2_annot_spec["mf_id"].isin(species_results_all["mf_id"].unique()) -1499 ] -1500 no_consensus = [] -1501 for mf_id in ms2_annot_remaining["mf_id"].unique(): -1502 id_sub = [] -1503 id_no_con = [] -1504 ms2_annot_sub_mf = ms2_annot_remaining[ -1505 ms2_annot_remaining["mf_id"] == mf_id -1506 ].copy() -1507 for query_scan in ms2_annot_sub_mf["query_spectrum_id"].unique(): -1508 ms2_annot_sub = ms2_annot_sub_mf[ -1509 ms2_annot_sub_mf["query_spectrum_id"] == query_scan -1510 ].copy() -1511 -1512 # New columns for ranking [HIGHER RANK = BETTER] -1513 ms2_annot_sub["entropy_max"] = ( -1514 ms2_annot_sub["entropy_similarity"] -1515 == ms2_annot_sub["entropy_similarity"].max() -1516 ) -1517 ms2_annot_sub["ref_match_fract_max"] = ( -1518 ms2_annot_sub["ref_mz_in_query_fract"] -1519 == ms2_annot_sub["ref_mz_in_query_fract"].max() -1520 ) -1521 ms2_annot_sub["frag_max"] = ms2_annot_sub["query_frag_types"].apply( -1522 lambda x: True if "MLF" in x else False -1523 ) -1524 -1525 # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks) -1526 ms2_annot_sub["consensus"] = ms2_annot_sub[ -1527 ["entropy_max", "ref_match_fract_max", "frag_max"] -1528 ].all(axis=1) -1529 ms2_annot_sub_con = ms2_annot_sub[ms2_annot_sub["consensus"]] -1530 id_sub.append(ms2_annot_sub_con) -1531 id_no_con.append(ms2_annot_sub) -1532 id_sub = pd.concat(id_sub) -1533 id_no_con = pd.concat(id_no_con) -1534 -1535 # Scenario 1: Multiple scans are being resolved to different MLFs [could be coelutions and should both be kept and annotated to MS level] -1536 if ( -1537 id_sub["query_frag_types"] -1538 .apply(lambda x: True if "MLF" in x else False) -1539 .all() -1540 and len(id_sub) > 0 -1541 ): -1542 idx = id_sub.groupby("name")["entropy_similarity"].idxmax() -1543 id_sub = id_sub.loc[idx] -1544 # Reorder so highest entropy_similarity is first -1545 id_sub = id_sub.sort_values("entropy_similarity", ascending=False) -1546 id_sub["annot_level"] = id_sub["structure_level"] -1547 no_consensus.append(id_sub) -1548 -1549 # Scenario 2: Multiple scans are being resolved to different species, keep both and annotate to appropriate level -1550 elif len(id_sub) == 0: -1551 for lipid_summed_name in id_no_con["lipid_summed_name"].unique(): -1552 summed_sub = id_no_con[ -1553 id_no_con["lipid_summed_name"] == lipid_summed_name -1554 ] -1555 # Any consensus to MLF? -1556 if summed_sub["consensus"].any(): -1557 summed_sub = summed_sub[summed_sub["consensus"]] -1558 summed_sub["annot_level"] = summed_sub["structure_level"] -1559 no_consensus.append(summed_sub) -1560 else: -1561 # Grab the highest entropy_similarity, if there are multiple, grab the first one -1562 summed_sub = summed_sub[ -1563 summed_sub["entropy_similarity"] -1564 == summed_sub["entropy_similarity"].max() -1565 ].head(1) -1566 # get first row -1567 summed_sub["annot_level"] = "species" -1568 summed_sub["name"] = "" -1569 no_consensus.append(summed_sub) -1570 else: -1571 raise ValueError("Unexpected scenario for summarizing mf_id: ", mf_id) -1572 -1573 if len(no_consensus) > 0: -1574 no_consensus = pd.concat(no_consensus) -1575 else: -1576 no_consensus = ms2_annot.head(0) -1577 -1578 # Combine all the consensus annotations and reformat the dataframe for output -1579 species_results_all = species_results_all.drop(columns=["name"]) -1580 species_results_all["lipid_molecular_species_id"] = "" -1581 mlf_results_all["lipid_molecular_species_id"] = mlf_results_all["name"] -1582 no_consensus["lipid_molecular_species_id"] = no_consensus["name"] -1583 consensus_annotations = pd.concat( -1584 [mlf_results_all, species_results_all, no_consensus] -1585 ) -1586 consensus_annotations = consensus_annotations.sort_values( -1587 "mf_id", ascending=True -1588 ) -1589 cols_to_keep = [ -1590 "mf_id", -1591 "ref_ion_type", -1592 "entropy_similarity", -1593 "ref_mz_in_query_fract", -1594 "lipid_molecular_species_id", -1595 "lipid_summed_name", -1596 "lipid_subclass", -1597 "lipid_class", -1598 "lipid_category", -1599 "formula", -1600 "annot_level", -1601 "n_spectra_contributing", -1602 ] -1603 consensus_annotations = consensus_annotations[cols_to_keep] -1604 consensus_annotations = consensus_annotations.set_index("mf_id") -1605 -1606 return consensus_annotations -1607 -1608 def clean_ms2_report(self, lipid_summary): -1609 """Clean the MS2 report. -1610 -1611 Parameters -1612 ---------- -1613 lipid_summary : DataFrame -1614 The full lipid summary DataFrame. -1615 -1616 Returns -1617 ------- -1618 DataFrame -1619 The cleaned lipid summary DataFrame. -1620 """ -1621 lipid_summary = lipid_summary.reset_index() -1622 lipid_summary["ion_formula"] = [ -1623 self.get_ion_formula(f, a) -1624 for f, a in zip(lipid_summary["formula"], lipid_summary["ref_ion_type"]) -1625 ] -1626 -1627 # Reorder columns -1628 lipid_summary = lipid_summary[ -1629 [ -1630 "mf_id", -1631 "ion_formula", -1632 "ref_ion_type", -1633 "formula", -1634 "annot_level", -1635 "lipid_molecular_species_id", -1636 "lipid_summed_name", -1637 "lipid_subclass", -1638 "lipid_class", -1639 "lipid_category", -1640 "entropy_similarity", -1641 "ref_mz_in_query_fract", -1642 "n_spectra_contributing", -1643 ] -1644 ] -1645 -1646 # Set the index to mf_id -1647 lipid_summary = lipid_summary.set_index("mf_id") -1648 -1649 return lipid_summary -1650 -1651 def to_report(self, molecular_metadata=None): -1652 """Create a report of the mass features and their annotations. -1653 -1654 Parameters -1655 ---------- -1656 molecular_metadata : dict, optional -1657 The molecular metadata. Default is None. -1658 -1659 Returns -1660 ------- -1661 DataFrame -1662 The report of the mass features and their annotations. -1663 -1664 Notes -1665 ----- -1666 The report will contain the mass features and their annotations from MS1 and MS2 (if available). -1667 """ -1668 # Get mass feature dataframe -1669 mf_report = self.mass_spectra.mass_features_to_df() -1670 mf_report = mf_report.reset_index(drop=False) -1671 -1672 # Get and clean ms1 annotation dataframe -1673 ms1_annot_report = self.mass_spectra.mass_features_ms1_annot_to_df().copy() -1674 ms1_annot_report = self.clean_ms1_report(ms1_annot_report) -1675 ms1_annot_report = ms1_annot_report.reset_index(drop=False) -1676 -1677 # Get, summarize, and clean ms2 annotation dataframe -1678 ms2_annot_report = self.mass_spectra.mass_features_ms2_annot_to_df( -1679 molecular_metadata=molecular_metadata -1680 ) -1681 if ms2_annot_report is not None: -1682 ms2_annot_report = self.summarize_lipid_report(ms2_annot_report) -1683 ms2_annot_report = self.clean_ms2_report(ms2_annot_report) -1684 ms2_annot_report = ms2_annot_report.dropna(axis=1, how="all") -1685 ms2_annot_report = ms2_annot_report.reset_index(drop=False) -1686 -1687 # Combine the reports -1688 if not ms1_annot_report.empty: -1689 # MS1 has been run and has molecular formula information -1690 mf_report = pd.merge( -1691 mf_report, -1692 ms1_annot_report, -1693 how="left", -1694 on=["mf_id", "isotopologue_type"], -1695 ) -1696 if ms2_annot_report is not None: -1697 # pull out the records with ion_formula and drop the ion_formula column (these should be empty if MS1 molecular formula assignment is working correctly) -1698 mf_no_ion_formula = mf_report[mf_report["ion_formula"].isna()] -1699 mf_no_ion_formula = mf_no_ion_formula.drop(columns=["ion_formula"]) -1700 mf_no_ion_formula = pd.merge( -1701 mf_no_ion_formula, ms2_annot_report, how="left", on=["mf_id"] -1702 ) -1703 -1704 # pull out the records with ion_formula -1705 mf_with_ion_formula = mf_report[~mf_report["ion_formula"].isna()] -1706 mf_with_ion_formula = pd.merge( -1707 mf_with_ion_formula, -1708 ms2_annot_report, -1709 how="left", -1710 on=["mf_id", "ion_formula"], +1234 Returns +1235 ------- +1236 str +1237 The formula of the ion as a string (like 'C2 H4 O2'); or None if the neutral_formula is not a string. +1238 """ +1239 # If neutral_formula is not a string, return None +1240 if not isinstance(neutral_formula, str): +1241 return None +1242 +1243 # Check if there are spaces in the formula (these are outputs of the MolecularFormula class and do not need to be processed before being passed to the class) +1244 if re.search(r"\s", neutral_formula): +1245 neutral_formula = MolecularFormula(neutral_formula, ion_charge=0) +1246 else: +1247 form_pre = re.sub(r"([A-Z])", r" \1", neutral_formula)[1:] +1248 elements = [re.findall(r"[A-Z][a-z]*", x) for x in form_pre.split()] +1249 counts = [re.findall(r"\d+", x) for x in form_pre.split()] +1250 neutral_formula = MolecularFormula( +1251 dict( +1252 zip( +1253 [x[0] for x in elements], +1254 [int(x[0]) if x else 1 for x in counts], +1255 ) +1256 ), +1257 ion_charge=0, +1258 ) +1259 neutral_formula_dict = neutral_formula.to_dict().copy() +1260 +1261 adduct_add_dict = ion_type_dict[ion_type][0] +1262 for key in adduct_add_dict: +1263 if key in neutral_formula_dict.keys(): +1264 neutral_formula_dict[key] += adduct_add_dict[key] +1265 else: +1266 neutral_formula_dict[key] = adduct_add_dict[key] +1267 +1268 adduct_subtract = ion_type_dict[ion_type][1] +1269 for key in adduct_subtract: +1270 neutral_formula_dict[key] -= adduct_subtract[key] +1271 +1272 return MolecularFormula(neutral_formula_dict, ion_charge=0).string +1273 +1274 @staticmethod +1275 def get_isotope_type(ion_formula): +1276 """From an ion formula, return the 13C isotope type of the ion. +1277 +1278 Notes +1279 ----- +1280 This is a static method. +1281 If the ion_formula is not a string, this method will return None. +1282 This is currently only functional for 13C isotopes. +1283 +1284 Parameters +1285 ---------- +1286 ion_formula : str +1287 The formula of the ion, expected to be a string like 'C2 H4 O2'. +1288 +1289 Returns +1290 ------- +1291 str +1292 The isotope type of the ion, e.g. '13C1', '13C2', etc; or None if the ion_formula does not contain a 13C isotope. +1293 +1294 Raises +1295 ------ +1296 ValueError +1297 If the ion_formula is not a string. +1298 """ +1299 if not isinstance(ion_formula, str): +1300 return None +1301 +1302 if re.search(r"\s", ion_formula): +1303 ion_formula = MolecularFormula(ion_formula, ion_charge=0) +1304 else: +1305 raise ValueError('ion_formula should be a string like "C2 H4 O2"') +1306 ion_formula_dict = ion_formula.to_dict().copy() +1307 +1308 try: +1309 iso_class = "13C" + str(ion_formula_dict.pop("13C")) +1310 except KeyError: +1311 iso_class = None +1312 +1313 return iso_class +1314 +1315 def clean_ms1_report(self, ms1_summary_full): +1316 """Clean the MS1 report. +1317 +1318 Parameters +1319 ---------- +1320 ms1_summary_full : DataFrame +1321 The full MS1 summary DataFrame. +1322 +1323 Returns +1324 ------- +1325 DataFrame +1326 The cleaned MS1 summary DataFrame. +1327 """ +1328 ms1_summary_full = ms1_summary_full.reset_index() +1329 cols_to_keep = [ +1330 "mf_id", +1331 "Molecular Formula", +1332 "Ion Type", +1333 "Calculated m/z", +1334 "m/z Error (ppm)", +1335 "m/z Error Score", +1336 "Is Isotopologue", +1337 "Isotopologue Similarity", +1338 "Confidence Score", +1339 ] +1340 ms1_summary = ms1_summary_full[cols_to_keep].copy() +1341 ms1_summary["ion_formula"] = [ +1342 self.get_ion_formula(f, a) +1343 for f, a in zip(ms1_summary["Molecular Formula"], ms1_summary["Ion Type"]) +1344 ] +1345 ms1_summary["isotopologue_type"] = [ +1346 self.get_isotope_type(f) for f in ms1_summary["ion_formula"].tolist() +1347 ] +1348 +1349 # Reorder columns +1350 ms1_summary = ms1_summary[ +1351 [ +1352 "mf_id", +1353 "ion_formula", +1354 "isotopologue_type", +1355 "Calculated m/z", +1356 "m/z Error (ppm)", +1357 "m/z Error Score", +1358 "Isotopologue Similarity", +1359 "Confidence Score", +1360 ] +1361 ] +1362 +1363 # Set the index to mf_id +1364 ms1_summary = ms1_summary.set_index("mf_id") +1365 +1366 return ms1_summary +1367 +1368 def summarize_lipid_report(self, ms2_annot): +1369 """Summarize the lipid report. +1370 +1371 Parameters +1372 ---------- +1373 ms2_annot : DataFrame +1374 The MS2 annotation DataFrame with all annotations. +1375 +1376 Returns +1377 ------- +1378 DataFrame +1379 The summarized lipid report. +1380 """ +1381 # Drop unnecessary columns for easier viewing +1382 columns_to_drop = [ +1383 "precursor_mz", +1384 "precursor_mz_error_ppm", +1385 "metabref_mol_id", +1386 "metabref_precursor_mz", +1387 "cas", +1388 "inchikey", +1389 "inchi", +1390 "chebi", +1391 "smiles", +1392 "kegg", +1393 "data_id", +1394 "iupac_name", +1395 "traditional_name", +1396 "common_name", +1397 "casno", +1398 ] +1399 ms2_annot = ms2_annot.drop( +1400 columns=[col for col in columns_to_drop if col in ms2_annot.columns] +1401 ) +1402 +1403 # If ion_types_excluded is not empty, remove those ion types +1404 ion_types_excluded = self.mass_spectra.parameters.mass_spectrum[ +1405 "ms2" +1406 ].molecular_search.ion_types_excluded +1407 if len(ion_types_excluded) > 0: +1408 ms2_annot = ms2_annot[~ms2_annot["ref_ion_type"].isin(ion_types_excluded)] +1409 +1410 # If mf_id is not present, check that the index name is mf_id and reset the index +1411 if "mf_id" not in ms2_annot.columns: +1412 if ms2_annot.index.name == "mf_id": +1413 ms2_annot = ms2_annot.reset_index() +1414 else: +1415 raise ValueError("mf_id is not present in the dataframe") +1416 +1417 # Attempt to get consensus annotations to the MLF level +1418 mlf_results_all = [] +1419 for mf_id in ms2_annot["mf_id"].unique(): +1420 mlf_results_perid = [] +1421 ms2_annot_mf = ms2_annot[ms2_annot["mf_id"] == mf_id].copy() +1422 ms2_annot_mf["n_spectra_contributing"] = len(ms2_annot_mf) +1423 +1424 for query_scan in ms2_annot["query_spectrum_id"].unique(): +1425 ms2_annot_sub = ms2_annot_mf[ +1426 ms2_annot_mf["query_spectrum_id"] == query_scan +1427 ].copy() +1428 +1429 if ms2_annot_sub["lipid_summed_name"].nunique() == 1: +1430 # If there is only one lipid_summed_name, let's try to get consensus molecular species annotation +1431 if ms2_annot_sub["lipid_summed_name"].nunique() == 1: +1432 ms2_annot_sub["entropy_max"] = ( +1433 ms2_annot_sub["entropy_similarity"] +1434 == ms2_annot_sub["entropy_similarity"].max() +1435 ) +1436 ms2_annot_sub["ref_match_fract_max"] = ( +1437 ms2_annot_sub["ref_mz_in_query_fract"] +1438 == ms2_annot_sub["ref_mz_in_query_fract"].max() +1439 ) +1440 ms2_annot_sub["frag_max"] = ms2_annot_sub[ +1441 "query_frag_types" +1442 ].apply(lambda x: True if "MLF" in x else False) +1443 +1444 # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks) +1445 ms2_annot_sub["consensus"] = ms2_annot_sub[ +1446 ["entropy_max", "ref_match_fract_max", "frag_max"] +1447 ].all(axis=1) +1448 +1449 # If there is a consensus, take the row with the highest entropy_similarity +1450 if ms2_annot_sub["consensus"].any(): +1451 ms2_annot_sub = ms2_annot_sub[ +1452 ms2_annot_sub["entropy_similarity"] +1453 == ms2_annot_sub["entropy_similarity"].max() +1454 ].head(1) +1455 mlf_results_perid.append(ms2_annot_sub) +1456 if len(mlf_results_perid) == 0: +1457 mlf_results_perid = pd.DataFrame() +1458 else: +1459 mlf_results_perid = pd.concat(mlf_results_perid) +1460 if mlf_results_perid["name"].nunique() == 1: +1461 mlf_results_perid = mlf_results_perid[ +1462 mlf_results_perid["entropy_similarity"] +1463 == mlf_results_perid["entropy_similarity"].max() +1464 ].head(1) +1465 else: +1466 mlf_results_perid = pd.DataFrame() +1467 mlf_results_all.append(mlf_results_perid) +1468 +1469 # These are the consensus annotations to the MLF level +1470 if len(mlf_results_all) > 0: +1471 mlf_results_all = pd.concat(mlf_results_all) +1472 mlf_results_all["annot_level"] = mlf_results_all["structure_level"] +1473 else: +1474 # Make an empty dataframe +1475 mlf_results_all = ms2_annot.head(0) +1476 +1477 # For remaining mf_ids, try to get a consensus annotation to the species level +1478 species_results_all = [] +1479 # Remove mf_ids that have consensus annotations to the MLF level +1480 ms2_annot_spec = ms2_annot[ +1481 ~ms2_annot["mf_id"].isin(mlf_results_all["mf_id"].unique()) +1482 ] +1483 for mf_id in ms2_annot_spec["mf_id"].unique(): +1484 # Do all the hits have the same lipid_summed_name? +1485 ms2_annot_sub = ms2_annot_spec[ms2_annot_spec["mf_id"] == mf_id].copy() +1486 ms2_annot_sub["n_spectra_contributing"] = len(ms2_annot_sub) +1487 +1488 if ms2_annot_sub["lipid_summed_name"].nunique() == 1: +1489 # Grab the highest entropy_similarity result +1490 ms2_annot_sub = ms2_annot_sub[ +1491 ms2_annot_sub["entropy_similarity"] +1492 == ms2_annot_sub["entropy_similarity"].max() +1493 ].head(1) +1494 species_results_all.append(ms2_annot_sub) +1495 +1496 # These are the consensus annotations to the species level +1497 if len(species_results_all) > 0: +1498 species_results_all = pd.concat(species_results_all) +1499 species_results_all["annot_level"] = "species" +1500 else: +1501 # Make an empty dataframe +1502 species_results_all = ms2_annot.head(0) +1503 +1504 # Deal with the remaining mf_ids that do not have consensus annotations to the species level or MLF level +1505 # Remove mf_ids that have consensus annotations to the species level +1506 ms2_annot_remaining = ms2_annot_spec[ +1507 ~ms2_annot_spec["mf_id"].isin(species_results_all["mf_id"].unique()) +1508 ] +1509 no_consensus = [] +1510 for mf_id in ms2_annot_remaining["mf_id"].unique(): +1511 id_sub = [] +1512 id_no_con = [] +1513 ms2_annot_sub_mf = ms2_annot_remaining[ +1514 ms2_annot_remaining["mf_id"] == mf_id +1515 ].copy() +1516 for query_scan in ms2_annot_sub_mf["query_spectrum_id"].unique(): +1517 ms2_annot_sub = ms2_annot_sub_mf[ +1518 ms2_annot_sub_mf["query_spectrum_id"] == query_scan +1519 ].copy() +1520 +1521 # New columns for ranking [HIGHER RANK = BETTER] +1522 ms2_annot_sub["entropy_max"] = ( +1523 ms2_annot_sub["entropy_similarity"] +1524 == ms2_annot_sub["entropy_similarity"].max() +1525 ) +1526 ms2_annot_sub["ref_match_fract_max"] = ( +1527 ms2_annot_sub["ref_mz_in_query_fract"] +1528 == ms2_annot_sub["ref_mz_in_query_fract"].max() +1529 ) +1530 ms2_annot_sub["frag_max"] = ms2_annot_sub["query_frag_types"].apply( +1531 lambda x: True if "MLF" in x else False +1532 ) +1533 +1534 # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks) +1535 ms2_annot_sub["consensus"] = ms2_annot_sub[ +1536 ["entropy_max", "ref_match_fract_max", "frag_max"] +1537 ].all(axis=1) +1538 ms2_annot_sub_con = ms2_annot_sub[ms2_annot_sub["consensus"]] +1539 id_sub.append(ms2_annot_sub_con) +1540 id_no_con.append(ms2_annot_sub) +1541 id_sub = pd.concat(id_sub) +1542 id_no_con = pd.concat(id_no_con) +1543 +1544 # Scenario 1: Multiple scans are being resolved to different MLFs [could be coelutions and should both be kept and annotated to MS level] +1545 if ( +1546 id_sub["query_frag_types"] +1547 .apply(lambda x: True if "MLF" in x else False) +1548 .all() +1549 and len(id_sub) > 0 +1550 ): +1551 idx = id_sub.groupby("name")["entropy_similarity"].idxmax() +1552 id_sub = id_sub.loc[idx] +1553 # Reorder so highest entropy_similarity is first +1554 id_sub = id_sub.sort_values("entropy_similarity", ascending=False) +1555 id_sub["annot_level"] = id_sub["structure_level"] +1556 no_consensus.append(id_sub) +1557 +1558 # Scenario 2: Multiple scans are being resolved to different species, keep both and annotate to appropriate level +1559 elif len(id_sub) == 0: +1560 for lipid_summed_name in id_no_con["lipid_summed_name"].unique(): +1561 summed_sub = id_no_con[ +1562 id_no_con["lipid_summed_name"] == lipid_summed_name +1563 ] +1564 # Any consensus to MLF? +1565 if summed_sub["consensus"].any(): +1566 summed_sub = summed_sub[summed_sub["consensus"]] +1567 summed_sub["annot_level"] = summed_sub["structure_level"] +1568 no_consensus.append(summed_sub) +1569 else: +1570 # Grab the highest entropy_similarity, if there are multiple, grab the first one +1571 summed_sub = summed_sub[ +1572 summed_sub["entropy_similarity"] +1573 == summed_sub["entropy_similarity"].max() +1574 ].head(1) +1575 # get first row +1576 summed_sub["annot_level"] = "species" +1577 summed_sub["name"] = "" +1578 no_consensus.append(summed_sub) +1579 else: +1580 raise ValueError("Unexpected scenario for summarizing mf_id: ", mf_id) +1581 +1582 if len(no_consensus) > 0: +1583 no_consensus = pd.concat(no_consensus) +1584 else: +1585 no_consensus = ms2_annot.head(0) +1586 +1587 # Combine all the consensus annotations and reformat the dataframe for output +1588 species_results_all = species_results_all.drop(columns=["name"]) +1589 species_results_all["lipid_molecular_species_id"] = "" +1590 mlf_results_all["lipid_molecular_species_id"] = mlf_results_all["name"] +1591 no_consensus["lipid_molecular_species_id"] = no_consensus["name"] +1592 consensus_annotations = pd.concat( +1593 [mlf_results_all, species_results_all, no_consensus] +1594 ) +1595 consensus_annotations = consensus_annotations.sort_values( +1596 "mf_id", ascending=True +1597 ) +1598 cols_to_keep = [ +1599 "mf_id", +1600 "ref_ion_type", +1601 "entropy_similarity", +1602 "ref_mz_in_query_fract", +1603 "lipid_molecular_species_id", +1604 "lipid_summed_name", +1605 "lipid_subclass", +1606 "lipid_class", +1607 "lipid_category", +1608 "formula", +1609 "annot_level", +1610 "n_spectra_contributing", +1611 ] +1612 consensus_annotations = consensus_annotations[cols_to_keep] +1613 consensus_annotations = consensus_annotations.set_index("mf_id") +1614 +1615 return consensus_annotations +1616 +1617 def clean_ms2_report(self, lipid_summary): +1618 """Clean the MS2 report. +1619 +1620 Parameters +1621 ---------- +1622 lipid_summary : DataFrame +1623 The full lipid summary DataFrame. +1624 +1625 Returns +1626 ------- +1627 DataFrame +1628 The cleaned lipid summary DataFrame. +1629 """ +1630 lipid_summary = lipid_summary.reset_index() +1631 lipid_summary["ion_formula"] = [ +1632 self.get_ion_formula(f, a) +1633 for f, a in zip(lipid_summary["formula"], lipid_summary["ref_ion_type"]) +1634 ] +1635 +1636 # Reorder columns +1637 lipid_summary = lipid_summary[ +1638 [ +1639 "mf_id", +1640 "ion_formula", +1641 "ref_ion_type", +1642 "formula", +1643 "annot_level", +1644 "lipid_molecular_species_id", +1645 "lipid_summed_name", +1646 "lipid_subclass", +1647 "lipid_class", +1648 "lipid_category", +1649 "entropy_similarity", +1650 "ref_mz_in_query_fract", +1651 "n_spectra_contributing", +1652 ] +1653 ] +1654 +1655 # Set the index to mf_id +1656 lipid_summary = lipid_summary.set_index("mf_id") +1657 +1658 return lipid_summary +1659 +1660 def to_report(self, molecular_metadata=None): +1661 """Create a report of the mass features and their annotations. +1662 +1663 Parameters +1664 ---------- +1665 molecular_metadata : dict, optional +1666 The molecular metadata. Default is None. +1667 +1668 Returns +1669 ------- +1670 DataFrame +1671 The report of the mass features and their annotations. +1672 +1673 Notes +1674 ----- +1675 The report will contain the mass features and their annotations from MS1 and MS2 (if available). +1676 """ +1677 # Get mass feature dataframe +1678 mf_report = self.mass_spectra.mass_features_to_df() +1679 mf_report = mf_report.reset_index(drop=False) +1680 +1681 # Get and clean ms1 annotation dataframe +1682 ms1_annot_report = self.mass_spectra.mass_features_ms1_annot_to_df().copy() +1683 ms1_annot_report = self.clean_ms1_report(ms1_annot_report) +1684 ms1_annot_report = ms1_annot_report.reset_index(drop=False) +1685 +1686 # Get, summarize, and clean ms2 annotation dataframe +1687 ms2_annot_report = self.mass_spectra.mass_features_ms2_annot_to_df( +1688 molecular_metadata=molecular_metadata +1689 ) +1690 if ms2_annot_report is not None: +1691 ms2_annot_report = self.summarize_lipid_report(ms2_annot_report) +1692 ms2_annot_report = self.clean_ms2_report(ms2_annot_report) +1693 ms2_annot_report = ms2_annot_report.dropna(axis=1, how="all") +1694 ms2_annot_report = ms2_annot_report.reset_index(drop=False) +1695 +1696 # Combine the reports +1697 if not ms1_annot_report.empty: +1698 # MS1 has been run and has molecular formula information +1699 mf_report = pd.merge( +1700 mf_report, +1701 ms1_annot_report, +1702 how="left", +1703 on=["mf_id", "isotopologue_type"], +1704 ) +1705 if ms2_annot_report is not None: +1706 # pull out the records with ion_formula and drop the ion_formula column (these should be empty if MS1 molecular formula assignment is working correctly) +1707 mf_no_ion_formula = mf_report[mf_report["ion_formula"].isna()] +1708 mf_no_ion_formula = mf_no_ion_formula.drop(columns=["ion_formula"]) +1709 mf_no_ion_formula = pd.merge( +1710 mf_no_ion_formula, ms2_annot_report, how="left", on=["mf_id"] 1711 ) 1712 -1713 # put back together -1714 mf_report = pd.concat([mf_no_ion_formula, mf_with_ion_formula]) -1715 -1716 # Rename colums -1717 rename_dict = { -1718 "mf_id": "Mass Feature ID", -1719 "scan_time": "Retention Time (min)", -1720 "mz": "m/z", -1721 "apex_scan": "Apex Scan Number", -1722 "intensity": "Intensity", -1723 "persistence": "Persistence", -1724 "area": "Area", -1725 "half_height_width": "Half Height Width (min)", -1726 "tailing_factor": "Tailing Factor", -1727 "dispersity_index": "Dispersity Index", -1728 "ms2_spectrum": "MS2 Spectrum", -1729 "monoisotopic_mf_id": "Monoisotopic Mass Feature ID", -1730 "isotopologue_type": "Isotopologue Type", -1731 "mass_spectrum_deconvoluted_parent": "Is Largest Ion after Deconvolution", -1732 "associated_mass_features": "Associated Mass Features after Deconvolution", -1733 "ion_formula": "Ion Formula", -1734 "formula": "Molecular Formula", -1735 "ref_ion_type": "Ion Type", -1736 "annot_level": "Lipid Annotation Level", -1737 "lipid_molecular_species_id": "Lipid Molecular Species", -1738 "lipid_summed_name": "Lipid Species", -1739 "lipid_subclass": "Lipid Subclass", -1740 "lipid_class": "Lipid Class", -1741 "lipid_category": "Lipid Category", -1742 "entropy_similarity": "Entropy Similarity", -1743 "ref_mz_in_query_fract": "Library mzs in Query (fraction)", -1744 "n_spectra_contributing": "Spectra with Annotation (n)", -1745 } -1746 mf_report = mf_report.rename(columns=rename_dict) -1747 mf_report["Sample Name"] = self.mass_spectra.sample_name -1748 mf_report["Polarity"] = self.mass_spectra.polarity -1749 mf_report = mf_report[ -1750 ["Mass Feature ID", "Sample Name", "Polarity"] -1751 + [ -1752 col -1753 for col in mf_report.columns -1754 if col not in ["Mass Feature ID", "Sample Name", "Polarity"] -1755 ] -1756 ] -1757 -1758 # Reorder rows by "Mass Feature ID" -1759 mf_report = mf_report.sort_values("Mass Feature ID") -1760 -1761 # Reset index -1762 mf_report = mf_report.reset_index(drop=True) -1763 -1764 return mf_report -1765 -1766 def report_to_csv(self, molecular_metadata=None): -1767 """Create a report of the mass features and their annotations and save it as a CSV file. -1768 -1769 Parameters -1770 ---------- -1771 molecular_metadata : dict, optional -1772 The molecular metadata. Default is None. -1773 """ -1774 report = self.to_report(molecular_metadata=molecular_metadata) -1775 out_file = self.output_file.with_suffix(".csv") -1776 report.to_csv(out_file, index=False) +1713 # pull out the records with ion_formula +1714 mf_with_ion_formula = mf_report[~mf_report["ion_formula"].isna()] +1715 mf_with_ion_formula = pd.merge( +1716 mf_with_ion_formula, +1717 ms2_annot_report, +1718 how="left", +1719 on=["mf_id", "ion_formula"], +1720 ) +1721 +1722 # put back together +1723 mf_report = pd.concat([mf_no_ion_formula, mf_with_ion_formula]) +1724 +1725 # Rename colums +1726 rename_dict = { +1727 "mf_id": "Mass Feature ID", +1728 "scan_time": "Retention Time (min)", +1729 "mz": "m/z", +1730 "apex_scan": "Apex Scan Number", +1731 "intensity": "Intensity", +1732 "persistence": "Persistence", +1733 "area": "Area", +1734 "half_height_width": "Half Height Width (min)", +1735 "tailing_factor": "Tailing Factor", +1736 "dispersity_index": "Dispersity Index", +1737 "ms2_spectrum": "MS2 Spectrum", +1738 "monoisotopic_mf_id": "Monoisotopic Mass Feature ID", +1739 "isotopologue_type": "Isotopologue Type", +1740 "mass_spectrum_deconvoluted_parent": "Is Largest Ion after Deconvolution", +1741 "associated_mass_features": "Associated Mass Features after Deconvolution", +1742 "ion_formula": "Ion Formula", +1743 "formula": "Molecular Formula", +1744 "ref_ion_type": "Ion Type", +1745 "annot_level": "Lipid Annotation Level", +1746 "lipid_molecular_species_id": "Lipid Molecular Species", +1747 "lipid_summed_name": "Lipid Species", +1748 "lipid_subclass": "Lipid Subclass", +1749 "lipid_class": "Lipid Class", +1750 "lipid_category": "Lipid Category", +1751 "entropy_similarity": "Entropy Similarity", +1752 "ref_mz_in_query_fract": "Library mzs in Query (fraction)", +1753 "n_spectra_contributing": "Spectra with Annotation (n)", +1754 } +1755 mf_report = mf_report.rename(columns=rename_dict) +1756 mf_report["Sample Name"] = self.mass_spectra.sample_name +1757 mf_report["Polarity"] = self.mass_spectra.polarity +1758 mf_report = mf_report[ +1759 ["Mass Feature ID", "Sample Name", "Polarity"] +1760 + [ +1761 col +1762 for col in mf_report.columns +1763 if col not in ["Mass Feature ID", "Sample Name", "Polarity"] +1764 ] +1765 ] +1766 +1767 # Reorder rows by "Mass Feature ID" +1768 mf_report = mf_report.sort_values("Mass Feature ID") +1769 +1770 # Reset index +1771 mf_report = mf_report.reset_index(drop=True) +1772 +1773 return mf_report +1774 +1775 def report_to_csv(self, molecular_metadata=None): +1776 """Create a report of the mass features and their annotations and save it as a CSV file. +1777 +1778 Parameters +1779 ---------- +1780 molecular_metadata : dict, optional +1781 The molecular metadata. Default is None. +1782 """ +1783 report = self.to_report(molecular_metadata=molecular_metadata) +1784 out_file = self.output_file.with_suffix(".csv") +1785 report.to_csv(out_file, index=False)

    @@ -1999,729 +2008,729 @@

    -
     56class LowResGCMSExport:
    - 57    """A class to export low resolution GC-MS data.
    - 58
    - 59    This class provides methods to export low resolution GC-MS data to various formats such as Excel, CSV, HDF5, and Pandas DataFrame.
    - 60
    - 61    Parameters:
    - 62    ----------
    - 63    out_file_path : str
    - 64        The output file path.
    - 65    gcms : object
    - 66        The low resolution GCMS object.
    - 67
    - 68    Attributes:
    - 69    ----------
    - 70    output_file : Path
    - 71        The output file path as a Path object.
    - 72    gcms : object
    - 73        The low resolution GCMS object.
    - 74
    - 75    Methods:
    - 76    -------
    - 77    * get_pandas_df(id_label="corems:"). Get the exported data as a Pandas DataFrame.
    - 78    * get_json(nan=False, id_label="corems:"). Get the exported data as a JSON string.
    - 79    * to_pandas(write_metadata=True, id_label="corems:"). Export the data to a Pandas DataFrame and save it as a pickle file.
    - 80    * to_excel(write_mode='a', write_metadata=True, id_label="corems:"),
    - 81        Export the data to an Excel file.
    - 82    * to_csv(separate_output=False, write_mode="w", write_metadata=True, id_label="corems:").
    - 83        Export the data to a CSV file.
    - 84    * to_hdf(id_label="corems:").
    - 85        Export the data to an HDF5 file.
    - 86    * get_data_stats(gcms).
    - 87        Get statistics about the GCMS data.
    - 88
    - 89    """
    - 90
    - 91    def __init__(self, out_file_path, gcms):
    - 92        self.output_file = Path(out_file_path)
    - 93
    - 94        self.gcms = gcms
    - 95
    - 96        self._init_columns()
    - 97
    - 98    def _init_columns(self):
    - 99        """Initialize the column names for the exported data.
    -100
    -101        Returns:
    -102        -------
    -103        list
    -104            The list of column names.
    -105        """
    -106
    -107        columns = [
    -108            "Sample name",
    -109            "Peak Index",
    -110            "Retention Time",
    -111            "Retention Time Ref",
    -112            "Peak Height",
    -113            "Peak Area",
    -114            "Retention index",
    -115            "Retention index Ref",
    -116            "Retention Index Score",
    -117            "Similarity Score",
    -118            "Spectral Similarity Score",
    -119            "Compound Name",
    -120            "Chebi ID",
    -121            "Kegg Compound ID",
    -122            "Inchi",
    -123            "Inchi Key",
    -124            "Smiles",
    -125            "Molecular Formula",
    -126            "IUPAC Name",
    -127            "Traditional Name",
    -128            "Common Name",
    -129            "Derivatization",
    -130        ]
    -131
    -132        if self.gcms.molecular_search_settings.exploratory_mode:
    -133            columns.extend(
    -134                [
    -135                    "Weighted Cosine Correlation",
    -136                    "Cosine Correlation",
    -137                    "Stein Scott Similarity",
    -138                    "Pearson Correlation",
    -139                    "Spearman Correlation",
    -140                    "Kendall Tau Correlation",
    -141                    "Euclidean Distance",
    -142                    "Manhattan Distance",
    -143                    "Jaccard Distance",
    -144                    "DWT Correlation",
    -145                    "DFT Correlation",
    -146                ]
    -147            )
    -148
    -149            columns.extend(list(methods_name.values()))
    -150
    -151        return columns
    -152
    -153    def get_pandas_df(self, id_label="corems:"):
    -154        """Get the exported data as a Pandas DataFrame.
    -155
    -156        Parameters:
    -157        ----------
    -158        id_label : str, optional
    -159            The ID label for the data. Default is "corems:".
    -160
    -161        Returns:
    -162        -------
    -163        DataFrame
    -164            The exported data as a Pandas DataFrame.
    -165        """
    -166
    -167        columns = self._init_columns()
    -168
    -169        dict_data_list = self.get_list_dict_data(self.gcms)
    -170
    -171        df = DataFrame(dict_data_list, columns=columns)
    -172
    -173        df.name = self.gcms.sample_name
    -174
    -175        return df
    -176
    -177    def get_json(self, nan=False, id_label="corems:"):
    -178        """Get the exported data as a JSON string.
    -179
    -180        Parameters:
    -181        ----------
    -182        nan : bool, optional
    -183            Whether to include NaN values in the JSON string. Default is False.
    -184        id_label : str, optional
    -185            The ID label for the data. Default is "corems:".
    -186
    -187        """
    -188
    -189        import json
    -190
    -191        dict_data_list = self.get_list_dict_data(self.gcms)
    -192
    -193        return json.dumps(
    -194            dict_data_list, sort_keys=False, indent=4, separators=(",", ": ")
    -195        )
    -196
    -197    def to_pandas(self, write_metadata=True, id_label="corems:"):
    -198        """Export the data to a Pandas DataFrame and save it as a pickle file.
    -199
    -200        Parameters:
    -201        ----------
    -202        write_metadata : bool, optional
    -203            Whether to write metadata to the output file.
    -204        id_label : str, optional
    -205            The ID label for the data.
    -206        """
    -207
    -208        columns = self._init_columns()
    -209
    -210        dict_data_list = self.get_list_dict_data(self.gcms)
    -211
    -212        df = DataFrame(dict_data_list, columns=columns)
    -213
    -214        df.to_pickle(self.output_file.with_suffix(".pkl"))
    -215
    -216        if write_metadata:
    -217            self.write_settings(
    -218                self.output_file.with_suffix(".pkl"), self.gcms, id_label="corems:"
    -219            )
    -220
    -221    def to_excel(self, write_mode="a", write_metadata=True, id_label="corems:"):
    -222        """Export the data to an Excel file.
    -223
    -224        Parameters:
    -225        ----------
    -226        write_mode : str, optional
    -227            The write mode for the Excel file. Default is 'a' (append).
    -228        write_metadata : bool, optional
    -229            Whether to write metadata to the output file. Default is True.
    -230        id_label : str, optional
    -231            The ID label for the data. Default is "corems:".
    -232        """
    -233
    -234        out_put_path = self.output_file.with_suffix(".xlsx")
    -235
    -236        columns = self._init_columns()
    -237
    -238        dict_data_list = self.get_list_dict_data(self.gcms)
    -239
    -240        df = DataFrame(dict_data_list, columns=columns)
    -241
    -242        if write_mode == "a" and out_put_path.exists():
    -243            writer = ExcelWriter(out_put_path, engine="openpyxl")
    -244            # try to open an existing workbook
    -245            writer.book = load_workbook(out_put_path)
    -246            # copy existing sheets
    -247            writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
    -248            # read existing file
    -249            reader = read_excel(out_put_path)
    -250            # write out the new sheet
    -251            df.to_excel(writer, index=False, header=False, startrow=len(reader) + 1)
    -252
    -253            writer.close()
    -254        else:
    -255            df.to_excel(
    -256                self.output_file.with_suffix(".xlsx"), index=False, engine="openpyxl"
    -257            )
    -258
    -259        if write_metadata:
    -260            self.write_settings(out_put_path, self.gcms, id_label=id_label)
    -261
    -262    def to_csv(
    -263        self,
    -264        separate_output=False,
    -265        write_mode="w",
    -266        write_metadata=True,
    -267        id_label="corems:",
    -268    ):
    -269        """Export the data to a CSV file.
    -270
    -271        Parameters:
    -272        ----------
    -273        separate_output : bool, optional
    -274            Whether to separate the output into multiple files. Default is False.
    -275        write_mode : str, optional
    -276            The write mode for the CSV file. Default is 'w' (write).
    -277        write_metadata : bool, optional
    -278            Whether to write metadata to the output file. Default is True.
    -279        id_label : str, optional
    -280            The ID label for the data. Default is "corems:".
    -281        """
    -282
    -283        if separate_output:
    -284            # set write mode to write
    -285            # this mode will overwrite the file without warning
    -286            write_mode = "w"
    -287        else:
    -288            # set write mode to append
    -289            write_mode = "a"
    -290
    -291        columns = self._init_columns()
    -292
    -293        dict_data_list = self.get_list_dict_data(self.gcms)
    -294
    -295        out_put_path = self.output_file.with_suffix(".csv")
    -296
    -297        write_header = not out_put_path.exists()
    -298
    -299        try:
    -300            with open(out_put_path, write_mode, newline="") as csvfile:
    -301                writer = csv.DictWriter(csvfile, fieldnames=columns)
    -302                if write_header:
    -303                    writer.writeheader()
    -304                for data in dict_data_list:
    -305                    writer.writerow(data)
    -306
    -307            if write_metadata:
    -308                self.write_settings(out_put_path, self.gcms, id_label=id_label)
    -309
    -310        except IOError as ioerror:
    -311            print(ioerror)
    -312
    -313    def to_hdf(self, id_label="corems:"):
    -314        """Export the data to an HDF5 file.
    -315
    -316        Parameters:
    -317        ----------
    -318        id_label : str, optional
    -319            The ID label for the data. Default is "corems:".
    -320        """
    -321
    -322        # save sample at a time
    -323        def add_compound(gc_peak, compound_obj):
    -324            modifier = compound_obj.classify if compound_obj.classify else ""
    -325            compound_group = compound_obj.name.replace("/", "") + " " + modifier
    -326
    -327            if compound_group not in peak_group:
    -328                compound_group = peak_group.create_group(compound_group)
    -329
    -330                # compound_group.attrs["retention_time"] = compound_obj.retention_time
    -331                compound_group.attrs["retention_index"] = compound_obj.ri
    -332                compound_group.attrs["retention_index_score"] = compound_obj.ri_score
    -333                compound_group.attrs["spectral_similarity_score"] = (
    -334                    compound_obj.spectral_similarity_score
    -335                )
    -336                compound_group.attrs["similarity_score"] = compound_obj.similarity_score
    -337
    -338                compond_mz = compound_group.create_dataset(
    -339                    "mz", data=np.array(compound_obj.mz), dtype="f8"
    -340                )
    -341                compond_abundance = compound_group.create_dataset(
    -342                    "abundance", data=np.array(compound_obj.abundance), dtype="f8"
    -343                )
    -344
    -345                if self.gcms.molecular_search_settings.exploratory_mode:
    -346                    compound_group.attrs["Spectral Similarities"] = json.dumps(
    -347                        compound_obj.spectral_similarity_scores,
    -348                        sort_keys=False,
    -349                        indent=4,
    -350                        separators=(",", ":"),
    -351                    )
    -352            else:
    -353                warnings.warn("Skipping duplicate reference compound.")
    -354
    -355        import json
    -356        from datetime import datetime, timezone
    -357
    -358        import h5py
    -359        import numpy as np
    -360
    -361        output_path = self.output_file.with_suffix(".hdf5")
    -362
    -363        with h5py.File(output_path, "w") as hdf_handle:
    -364            timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    -365            hdf_handle.attrs["time_stamp"] = timenow
    -366            hdf_handle.attrs["data_structure"] = "gcms"
    -367            hdf_handle.attrs["analyzer"] = self.gcms.analyzer
    -368            hdf_handle.attrs["instrument_label"] = self.gcms.instrument_label
    -369
    -370            hdf_handle.attrs["sample_id"] = "self.gcms.id"
    -371            hdf_handle.attrs["sample_name"] = self.gcms.sample_name
    -372            hdf_handle.attrs["input_data"] = str(self.gcms.file_location)
    -373            hdf_handle.attrs["output_data"] = str(output_path)
    -374            hdf_handle.attrs["output_data_id"] = id_label + uuid.uuid4().hex
    -375            hdf_handle.attrs["corems_version"] = __version__
    -376
    -377            hdf_handle.attrs["Stats"] = json.dumps(
    -378                self.get_data_stats(self.gcms),
    -379                sort_keys=False,
    -380                indent=4,
    -381                separators=(",", ": "),
    -382            )
    -383            hdf_handle.attrs["Calibration"] = json.dumps(
    -384                self.get_calibration_stats(self.gcms, id_label),
    -385                sort_keys=False,
    -386                indent=4,
    -387                separators=(",", ": "),
    -388            )
    -389            hdf_handle.attrs["Blank"] = json.dumps(
    -390                self.get_blank_stats(self.gcms),
    -391                sort_keys=False,
    -392                indent=4,
    -393                separators=(",", ": "),
    -394            )
    -395
    -396            corems_dict_setting = parameter_to_dict.get_dict_data_gcms(self.gcms)
    -397            hdf_handle.attrs["CoreMSParameters"] = json.dumps(
    -398                corems_dict_setting, sort_keys=False, indent=4, separators=(",", ": ")
    -399            )
    -400
    -401            scans_dataset = hdf_handle.create_dataset(
    -402                "scans", data=np.array(self.gcms.scans_number), dtype="f8"
    -403            )
    -404            rt_dataset = hdf_handle.create_dataset(
    -405                "rt", data=np.array(self.gcms.retention_time), dtype="f8"
    -406            )
    -407            tic_dataset = hdf_handle.create_dataset(
    -408                "tic", data=np.array(self.gcms.tic), dtype="f8"
    -409            )
    -410            processed_tic_dataset = hdf_handle.create_dataset(
    -411                "processed_tic", data=np.array(self.gcms.processed_tic), dtype="f8"
    -412            )
    -413
    -414            output_score_method = (
    -415                self.gcms.molecular_search_settings.output_score_method
    -416            )
    -417
    -418            for gc_peak in self.gcms:
    -419                # print(gc_peak.retention_time)
    -420                # print(gc_peak.tic)
    -421
    -422                # check if there is a compound candidate
    -423                peak_group = hdf_handle.create_group(str(gc_peak.retention_time))
    -424                peak_group.attrs["deconvolution"] = int(
    -425                    self.gcms.chromatogram_settings.use_deconvolution
    -426                )
    -427
    -428                peak_group.attrs["start_scan"] = gc_peak.start_scan
    -429                peak_group.attrs["apex_scan"] = gc_peak.apex_scan
    -430                peak_group.attrs["final_scan"] = gc_peak.final_scan
    -431
    -432                peak_group.attrs["retention_index"] = gc_peak.ri
    -433                peak_group.attrs["retention_time"] = gc_peak.retention_time
    -434                peak_group.attrs["area"] = gc_peak.area
    -435
    -436                mz = peak_group.create_dataset(
    -437                    "mz", data=np.array(gc_peak.mass_spectrum.mz_exp), dtype="f8"
    -438                )
    -439                abundance = peak_group.create_dataset(
    -440                    "abundance",
    -441                    data=np.array(gc_peak.mass_spectrum.abundance),
    -442                    dtype="f8",
    -443                )
    -444
    -445                if gc_peak:
    -446                    if output_score_method == "highest_sim_score":
    -447                        compound_obj = gc_peak.highest_score_compound
    -448                        add_compound(gc_peak, compound_obj)
    -449
    -450                    elif output_score_method == "highest_ss":
    -451                        compound_obj = gc_peak.highest_ss_compound
    -452                        add_compound(gc_peak, compound_obj)
    -453
    -454                    else:
    -455                        for compound_obj in gc_peak:
    -456                            add_compound(gc_peak, compound_obj)
    -457
    -458    def get_data_stats(self, gcms):
    -459        """Get statistics about the GCMS data.
    -460
    -461        Parameters:
    -462        ----------
    -463        gcms : object
    -464            The low resolution GCMS object.
    -465
    -466        Returns:
    -467        -------
    -468        dict
    -469            A dictionary containing the data statistics.
    -470        """
    -471
    -472        matched_peaks = gcms.matched_peaks
    -473        no_matched_peaks = gcms.no_matched_peaks
    -474        unique_metabolites = gcms.unique_metabolites
    -475
    -476        peak_matchs_above_0p85 = 0
    -477        unique_peak_match_above_0p85 = 0
    -478        for match_peak in matched_peaks:
    -479            gc_peak_above_85 = 0
    -480            matches_above_85 = list(
    -481                filter(lambda m: m.similarity_score >= 0.85, match_peak)
    -482            )
    -483            if matches_above_85:
    -484                peak_matchs_above_0p85 += 1
    -485            if len(matches_above_85) == 1:
    -486                unique_peak_match_above_0p85 += 1
    -487
    -488        data_stats = {}
    -489        data_stats["average_signal_noise"] = "ni"
    -490        data_stats["chromatogram_dynamic_range"] = gcms.dynamic_range
    -491        data_stats["total_number_peaks"] = len(gcms)
    -492        data_stats["total_peaks_matched"] = len(matched_peaks)
    -493        data_stats["total_peaks_without_matches"] = len(no_matched_peaks)
    -494        data_stats["total_matches_above_similarity_score_0.85"] = peak_matchs_above_0p85
    -495        data_stats["single_matches_above_similarity_score_0.85"] = (
    -496            unique_peak_match_above_0p85
    -497        )
    -498        data_stats["unique_metabolites"] = len(unique_metabolites)
    -499
    -500        return data_stats
    -501
    -502    def get_calibration_stats(self, gcms, id_label):
    -503        """Get statistics about the GC-MS calibration.
    -504
    -505        Parameters:
    -506        ----------
    -507        """
    -508        calibration_parameters = {}
    -509
    -510        calibration_parameters["calibration_rt_ri_pairs_ref"] = gcms.ri_pairs_ref
    -511        calibration_parameters["data_url"] = str(gcms.cal_file_path)
    -512        calibration_parameters["has_input"] = id_label + corems_md5(gcms.cal_file_path)
    -513        calibration_parameters["data_name"] = str(gcms.cal_file_path.stem)
    -514        calibration_parameters["calibration_method"] = ""
    -515
    -516        return calibration_parameters
    -517
    -518    def get_blank_stats(self, gcms):
    -519        """Get statistics about the GC-MS blank."""
    -520        blank_parameters = {}
    -521
    -522        blank_parameters["data_name"] = "ni"
    -523        blank_parameters["blank_id"] = "ni"
    -524        blank_parameters["data_url"] = "ni"
    -525        blank_parameters["has_input"] = "ni"
    -526        blank_parameters["common_features_to_blank"] = "ni"
    -527
    -528        return blank_parameters
    -529
    -530    def get_instrument_metadata(self, gcms):
    -531        """Get metadata about the GC-MS instrument."""
    -532        instrument_metadata = {}
    -533
    -534        instrument_metadata["analyzer"] = gcms.analyzer
    -535        instrument_metadata["instrument_label"] = gcms.instrument_label
    -536        instrument_metadata["instrument_id"] = uuid.uuid4().hex
    -537
    -538        return instrument_metadata
    -539
    -540    def get_data_metadata(self, gcms, id_label, output_path):
    -541        """Get metadata about the GC-MS data.
    -542
    -543        Parameters:
    -544        ----------
    -545        gcms : object
    -546            The low resolution GCMS object.
    -547        id_label : str
    -548            The ID label for the data.
    -549        output_path : str
    -550            The output file path.
    -551
    -552        Returns:
    -553        -------
    -554        dict
    -555            A dictionary containing the data metadata.
    -556        """
    -557        if isinstance(output_path, str):
    -558            output_path = Path(output_path)
    -559
    -560        paramaters_path = output_path.with_suffix(".json")
    -561
    -562        if paramaters_path.exists():
    -563            with paramaters_path.open() as current_param:
    -564                metadata = json.load(current_param)
    -565                data_metadata = metadata.get("Data")
    -566        else:
    -567            data_metadata = {}
    -568            data_metadata["data_name"] = []
    -569            data_metadata["input_data_url"] = []
    -570            data_metadata["has_input"] = []
    -571
    -572        data_metadata["data_name"].append(gcms.sample_name)
    -573        data_metadata["input_data_url"].append(str(gcms.file_location))
    -574        data_metadata["has_input"].append(id_label + corems_md5(gcms.file_location))
    -575
    -576        data_metadata["output_data_name"] = str(output_path.stem)
    -577        data_metadata["output_data_url"] = str(output_path)
    -578        data_metadata["has_output"] = id_label + corems_md5(output_path)
    -579
    -580        return data_metadata
    -581
    -582    def get_parameters_json(self, gcms, id_label, output_path):
    -583        """Get the parameters as a JSON string.
    -584
    -585        Parameters:
    -586        ----------
    -587        gcms : GCMS object
    -588            The low resolution GCMS object.
    -589        id_label : str
    -590            The ID label for the data.
    -591        output_path : str
    -592            The output file path.
    -593
    -594        Returns:
    -595        -------
    -596        str
    -597            The parameters as a JSON string.
    -598        """
    -599
    -600        output_parameters_dict = {}
    -601        output_parameters_dict["Data"] = self.get_data_metadata(
    -602            gcms, id_label, output_path
    -603        )
    -604        output_parameters_dict["Stats"] = self.get_data_stats(gcms)
    -605        output_parameters_dict["Calibration"] = self.get_calibration_stats(
    -606            gcms, id_label
    -607        )
    -608        output_parameters_dict["Blank"] = self.get_blank_stats(gcms)
    -609        output_parameters_dict["Instrument"] = self.get_instrument_metadata(gcms)
    -610        corems_dict_setting = parameter_to_dict.get_dict_data_gcms(gcms)
    -611        corems_dict_setting["corems_version"] = __version__
    -612        output_parameters_dict["CoreMSParameters"] = corems_dict_setting
    -613        output_parameters_dict["has_metabolite"] = gcms.metabolites_data
    -614        output = json.dumps(
    -615            output_parameters_dict, sort_keys=False, indent=4, separators=(",", ": ")
    -616        )
    -617
    -618        return output
    -619
    -620    def write_settings(self, output_path, gcms, id_label="emsl:"):
    -621        """Write the settings to a JSON file.
    -622
    -623        Parameters:
    -624        ----------
    -625        output_path : str
    -626            The output file path.
    -627        gcms : GCMS object
    -628            The low resolution GCMS object.
    -629        id_label : str
    -630            The ID label for the data. Default is "emsl:".
    -631
    -632        """
    -633
    -634        output = self.get_parameters_json(gcms, id_label, output_path)
    -635
    -636        with open(
    -637            output_path.with_suffix(".json"),
    -638            "w",
    -639            encoding="utf8",
    -640        ) as outfile:
    -641            outfile.write(output)
    -642
    -643    def get_list_dict_data(self, gcms, include_no_match=True, no_match_inline=False):
    -644        """Get the exported data as a list of dictionaries.
    -645
    -646        Parameters:
    -647        ----------
    -648        gcms : object
    -649            The low resolution GCMS object.
    -650        include_no_match : bool, optional
    -651            Whether to include no match data. Default is True.
    -652        no_match_inline : bool, optional
    -653            Whether to include no match data inline. Default is False.
    -654
    -655        Returns:
    -656        -------
    -657        list
    -658            The exported data as a list of dictionaries.
    -659        """
    -660
    -661        output_score_method = gcms.molecular_search_settings.output_score_method
    -662
    -663        dict_data_list = []
    -664
    -665        def add_match_dict_data():
    -666            derivatization = "{}:{}:{}".format(
    -667                compound_obj.classify,
    -668                compound_obj.derivativenum,
    -669                compound_obj.derivatization,
    -670            )
    -671            out_dict = {
    -672                "Sample name": gcms.sample_name,
    -673                "Peak Index": gcpeak_index,
    -674                "Retention Time": gc_peak.retention_time,
    -675                "Retention Time Ref": compound_obj.retention_time,
    -676                "Peak Height": gc_peak.tic,
    -677                "Peak Area": gc_peak.area,
    -678                "Retention index": gc_peak.ri,
    -679                "Retention index Ref": compound_obj.ri,
    -680                "Retention Index Score": compound_obj.ri_score,
    -681                "Spectral Similarity Score": compound_obj.spectral_similarity_score,
    -682                "Similarity Score": compound_obj.similarity_score,
    -683                "Compound Name": compound_obj.name,
    -684                "Chebi ID": compound_obj.metadata.chebi,
    -685                "Kegg Compound ID": compound_obj.metadata.kegg,
    -686                "Inchi": compound_obj.metadata.inchi,
    -687                "Inchi Key": compound_obj.metadata.inchikey,
    -688                "Smiles": compound_obj.metadata.smiles,
    -689                "Molecular Formula": compound_obj.formula,
    -690                "IUPAC Name": compound_obj.metadata.iupac_name,
    -691                "Traditional Name": compound_obj.metadata.traditional_name,
    -692                "Common Name": compound_obj.metadata.common_name,
    -693                "Derivatization": derivatization,
    -694            }
    -695
    -696            if self.gcms.molecular_search_settings.exploratory_mode:
    -697                out_dict.update(
    -698                    {
    -699                        "Weighted Cosine Correlation": compound_obj.spectral_similarity_scores.get(
    -700                            "weighted_cosine_correlation"
    -701                        ),
    -702                        "Cosine Correlation": compound_obj.spectral_similarity_scores.get(
    -703                            "cosine_correlation"
    -704                        ),
    -705                        "Stein Scott Similarity": compound_obj.spectral_similarity_scores.get(
    -706                            "stein_scott_similarity"
    -707                        ),
    -708                        "Pearson Correlation": compound_obj.spectral_similarity_scores.get(
    -709                            "pearson_correlation"
    -710                        ),
    -711                        "Spearman Correlation": compound_obj.spectral_similarity_scores.get(
    -712                            "spearman_correlation"
    -713                        ),
    -714                        "Kendall Tau Correlation": compound_obj.spectral_similarity_scores.get(
    -715                            "kendall_tau_correlation"
    -716                        ),
    -717                        "DFT Correlation": compound_obj.spectral_similarity_scores.get(
    -718                            "dft_correlation"
    -719                        ),
    -720                        "DWT Correlation": compound_obj.spectral_similarity_scores.get(
    -721                            "dwt_correlation"
    -722                        ),
    -723                        "Euclidean Distance": compound_obj.spectral_similarity_scores.get(
    -724                            "euclidean_distance"
    -725                        ),
    -726                        "Manhattan Distance": compound_obj.spectral_similarity_scores.get(
    -727                            "manhattan_distance"
    -728                        ),
    -729                        "Jaccard Distance": compound_obj.spectral_similarity_scores.get(
    -730                            "jaccard_distance"
    -731                        ),
    -732                    }
    -733                )
    -734                for method in methods_name:
    -735                    out_dict[methods_name.get(method)] = (
    -736                        compound_obj.spectral_similarity_scores.get(method)
    -737                    )
    -738
    -739            dict_data_list.append(out_dict)
    -740
    -741        def add_no_match_dict_data():
    -742            dict_data_list.append(
    -743                {
    -744                    "Sample name": gcms.sample_name,
    -745                    "Peak Index": gcpeak_index,
    -746                    "Retention Time": gc_peak.retention_time,
    -747                    "Peak Height": gc_peak.tic,
    -748                    "Peak Area": gc_peak.area,
    -749                    "Retention index": gc_peak.ri,
    -750                }
    -751            )
    -752
    -753        for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks):
    -754            # check if there is a compound candidate
    -755            if gc_peak:
    -756                if output_score_method == "highest_sim_score":
    -757                    compound_obj = gc_peak.highest_score_compound
    -758                    add_match_dict_data()
    -759
    -760                elif output_score_method == "highest_ss":
    -761                    compound_obj = gc_peak.highest_ss_compound
    -762                    add_match_dict_data()
    -763
    -764                else:
    -765                    for compound_obj in gc_peak:
    -766                        add_match_dict_data()  # add monoisotopic peak
    -767
    -768            else:
    -769                # include not_match
    -770                if include_no_match and no_match_inline:
    -771                    add_no_match_dict_data()
    -772
    -773        if include_no_match and not no_match_inline:
    -774            for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks):
    -775                if not gc_peak:
    -776                    add_no_match_dict_data()
    -777
    -778        return dict_data_list
    +            
     55class LowResGCMSExport:
    + 56    """A class to export low resolution GC-MS data.
    + 57
    + 58    This class provides methods to export low resolution GC-MS data to various formats such as Excel, CSV, HDF5, and Pandas DataFrame.
    + 59
    + 60    Parameters:
    + 61    ----------
    + 62    out_file_path : str
    + 63        The output file path.
    + 64    gcms : object
    + 65        The low resolution GCMS object.
    + 66
    + 67    Attributes:
    + 68    ----------
    + 69    output_file : Path
    + 70        The output file path as a Path object.
    + 71    gcms : object
    + 72        The low resolution GCMS object.
    + 73
    + 74    Methods:
    + 75    -------
    + 76    * get_pandas_df(id_label="corems:"). Get the exported data as a Pandas DataFrame.
    + 77    * get_json(nan=False, id_label="corems:"). Get the exported data as a JSON string.
    + 78    * to_pandas(write_metadata=True, id_label="corems:"). Export the data to a Pandas DataFrame and save it as a pickle file.
    + 79    * to_excel(write_mode='a', write_metadata=True, id_label="corems:"),
    + 80        Export the data to an Excel file.
    + 81    * to_csv(separate_output=False, write_mode="w", write_metadata=True, id_label="corems:").
    + 82        Export the data to a CSV file.
    + 83    * to_hdf(id_label="corems:").
    + 84        Export the data to an HDF5 file.
    + 85    * get_data_stats(gcms).
    + 86        Get statistics about the GCMS data.
    + 87
    + 88    """
    + 89
    + 90    def __init__(self, out_file_path, gcms):
    + 91        self.output_file = Path(out_file_path)
    + 92
    + 93        self.gcms = gcms
    + 94
    + 95        self._init_columns()
    + 96
    + 97    def _init_columns(self):
    + 98        """Initialize the column names for the exported data.
    + 99
    +100        Returns:
    +101        -------
    +102        list
    +103            The list of column names.
    +104        """
    +105
    +106        columns = [
    +107            "Sample name",
    +108            "Peak Index",
    +109            "Retention Time",
    +110            "Retention Time Ref",
    +111            "Peak Height",
    +112            "Peak Area",
    +113            "Retention index",
    +114            "Retention index Ref",
    +115            "Retention Index Score",
    +116            "Similarity Score",
    +117            "Spectral Similarity Score",
    +118            "Compound Name",
    +119            "Chebi ID",
    +120            "Kegg Compound ID",
    +121            "Inchi",
    +122            "Inchi Key",
    +123            "Smiles",
    +124            "Molecular Formula",
    +125            "IUPAC Name",
    +126            "Traditional Name",
    +127            "Common Name",
    +128            "Derivatization",
    +129        ]
    +130
    +131        if self.gcms.molecular_search_settings.exploratory_mode:
    +132            columns.extend(
    +133                [
    +134                    "Weighted Cosine Correlation",
    +135                    "Cosine Correlation",
    +136                    "Stein Scott Similarity",
    +137                    "Pearson Correlation",
    +138                    "Spearman Correlation",
    +139                    "Kendall Tau Correlation",
    +140                    "Euclidean Distance",
    +141                    "Manhattan Distance",
    +142                    "Jaccard Distance",
    +143                    "DWT Correlation",
    +144                    "DFT Correlation",
    +145                ]
    +146            )
    +147
    +148            columns.extend(list(methods_name.values()))
    +149
    +150        return columns
    +151
    +152    def get_pandas_df(self, id_label="corems:"):
    +153        """Get the exported data as a Pandas DataFrame.
    +154
    +155        Parameters:
    +156        ----------
    +157        id_label : str, optional
    +158            The ID label for the data. Default is "corems:".
    +159
    +160        Returns:
    +161        -------
    +162        DataFrame
    +163            The exported data as a Pandas DataFrame.
    +164        """
    +165
    +166        columns = self._init_columns()
    +167
    +168        dict_data_list = self.get_list_dict_data(self.gcms)
    +169
    +170        df = DataFrame(dict_data_list, columns=columns)
    +171
    +172        df.name = self.gcms.sample_name
    +173
    +174        return df
    +175
    +176    def get_json(self, nan=False, id_label="corems:"):
    +177        """Get the exported data as a JSON string.
    +178
    +179        Parameters:
    +180        ----------
    +181        nan : bool, optional
    +182            Whether to include NaN values in the JSON string. Default is False.
    +183        id_label : str, optional
    +184            The ID label for the data. Default is "corems:".
    +185
    +186        """
    +187
    +188        import json
    +189
    +190        dict_data_list = self.get_list_dict_data(self.gcms)
    +191
    +192        return json.dumps(
    +193            dict_data_list, sort_keys=False, indent=4, separators=(",", ": ")
    +194        )
    +195
    +196    def to_pandas(self, write_metadata=True, id_label="corems:"):
    +197        """Export the data to a Pandas DataFrame and save it as a pickle file.
    +198
    +199        Parameters:
    +200        ----------
    +201        write_metadata : bool, optional
    +202            Whether to write metadata to the output file.
    +203        id_label : str, optional
    +204            The ID label for the data.
    +205        """
    +206
    +207        columns = self._init_columns()
    +208
    +209        dict_data_list = self.get_list_dict_data(self.gcms)
    +210
    +211        df = DataFrame(dict_data_list, columns=columns)
    +212
    +213        df.to_pickle(self.output_file.with_suffix(".pkl"))
    +214
    +215        if write_metadata:
    +216            self.write_settings(
    +217                self.output_file.with_suffix(".pkl"), self.gcms, id_label="corems:"
    +218            )
    +219
    +220    def to_excel(self, write_mode="a", write_metadata=True, id_label="corems:"):
    +221        """Export the data to an Excel file.
    +222
    +223        Parameters:
    +224        ----------
    +225        write_mode : str, optional
    +226            The write mode for the Excel file. Default is 'a' (append).
    +227        write_metadata : bool, optional
    +228            Whether to write metadata to the output file. Default is True.
    +229        id_label : str, optional
    +230            The ID label for the data. Default is "corems:".
    +231        """
    +232
    +233        out_put_path = self.output_file.with_suffix(".xlsx")
    +234
    +235        columns = self._init_columns()
    +236
    +237        dict_data_list = self.get_list_dict_data(self.gcms)
    +238
    +239        df = DataFrame(dict_data_list, columns=columns)
    +240
    +241        if write_mode == "a" and out_put_path.exists():
    +242            writer = ExcelWriter(out_put_path, engine="openpyxl")
    +243            # try to open an existing workbook
    +244            writer.book = load_workbook(out_put_path)
    +245            # copy existing sheets
    +246            writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
    +247            # read existing file
    +248            reader = read_excel(out_put_path)
    +249            # write out the new sheet
    +250            df.to_excel(writer, index=False, header=False, startrow=len(reader) + 1)
    +251
    +252            writer.close()
    +253        else:
    +254            df.to_excel(
    +255                self.output_file.with_suffix(".xlsx"), index=False, engine="openpyxl"
    +256            )
    +257
    +258        if write_metadata:
    +259            self.write_settings(out_put_path, self.gcms, id_label=id_label)
    +260
    +261    def to_csv(
    +262        self,
    +263        separate_output=False,
    +264        write_mode="w",
    +265        write_metadata=True,
    +266        id_label="corems:",
    +267    ):
    +268        """Export the data to a CSV file.
    +269
    +270        Parameters:
    +271        ----------
    +272        separate_output : bool, optional
    +273            Whether to separate the output into multiple files. Default is False.
    +274        write_mode : str, optional
    +275            The write mode for the CSV file. Default is 'w' (write).
    +276        write_metadata : bool, optional
    +277            Whether to write metadata to the output file. Default is True.
    +278        id_label : str, optional
    +279            The ID label for the data. Default is "corems:".
    +280        """
    +281
    +282        if separate_output:
    +283            # set write mode to write
    +284            # this mode will overwrite the file without warning
    +285            write_mode = "w"
    +286        else:
    +287            # set write mode to append
    +288            write_mode = "a"
    +289
    +290        columns = self._init_columns()
    +291
    +292        dict_data_list = self.get_list_dict_data(self.gcms)
    +293
    +294        out_put_path = self.output_file.with_suffix(".csv")
    +295
    +296        write_header = not out_put_path.exists()
    +297
    +298        try:
    +299            with open(out_put_path, write_mode, newline="") as csvfile:
    +300                writer = csv.DictWriter(csvfile, fieldnames=columns)
    +301                if write_header:
    +302                    writer.writeheader()
    +303                for data in dict_data_list:
    +304                    writer.writerow(data)
    +305
    +306            if write_metadata:
    +307                self.write_settings(out_put_path, self.gcms, id_label=id_label)
    +308
    +309        except IOError as ioerror:
    +310            print(ioerror)
    +311
    +312    def to_hdf(self, id_label="corems:"):
    +313        """Export the data to an HDF5 file.
    +314
    +315        Parameters:
    +316        ----------
    +317        id_label : str, optional
    +318            The ID label for the data. Default is "corems:".
    +319        """
    +320
    +321        # save sample at a time
    +322        def add_compound(gc_peak, compound_obj):
    +323            modifier = compound_obj.classify if compound_obj.classify else ""
    +324            compound_group = compound_obj.name.replace("/", "") + " " + modifier
    +325
    +326            if compound_group not in peak_group:
    +327                compound_group = peak_group.create_group(compound_group)
    +328
    +329                # compound_group.attrs["retention_time"] = compound_obj.retention_time
    +330                compound_group.attrs["retention_index"] = compound_obj.ri
    +331                compound_group.attrs["retention_index_score"] = compound_obj.ri_score
    +332                compound_group.attrs["spectral_similarity_score"] = (
    +333                    compound_obj.spectral_similarity_score
    +334                )
    +335                compound_group.attrs["similarity_score"] = compound_obj.similarity_score
    +336
    +337                compond_mz = compound_group.create_dataset(
    +338                    "mz", data=np.array(compound_obj.mz), dtype="f8"
    +339                )
    +340                compond_abundance = compound_group.create_dataset(
    +341                    "abundance", data=np.array(compound_obj.abundance), dtype="f8"
    +342                )
    +343
    +344                if self.gcms.molecular_search_settings.exploratory_mode:
    +345                    compound_group.attrs["Spectral Similarities"] = json.dumps(
    +346                        compound_obj.spectral_similarity_scores,
    +347                        sort_keys=False,
    +348                        indent=4,
    +349                        separators=(",", ":"),
    +350                    )
    +351            else:
    +352                warnings.warn("Skipping duplicate reference compound.")
    +353
    +354        import json
    +355        from datetime import datetime, timezone
    +356
    +357        import h5py
    +358        import numpy as np
    +359
    +360        output_path = self.output_file.with_suffix(".hdf5")
    +361
    +362        with h5py.File(output_path, "w") as hdf_handle:
    +363            timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    +364            hdf_handle.attrs["time_stamp"] = timenow
    +365            hdf_handle.attrs["data_structure"] = "gcms"
    +366            hdf_handle.attrs["analyzer"] = self.gcms.analyzer
    +367            hdf_handle.attrs["instrument_label"] = self.gcms.instrument_label
    +368
    +369            hdf_handle.attrs["sample_id"] = "self.gcms.id"
    +370            hdf_handle.attrs["sample_name"] = self.gcms.sample_name
    +371            hdf_handle.attrs["input_data"] = str(self.gcms.file_location)
    +372            hdf_handle.attrs["output_data"] = str(output_path)
    +373            hdf_handle.attrs["output_data_id"] = id_label + uuid.uuid4().hex
    +374            hdf_handle.attrs["corems_version"] = __version__
    +375
    +376            hdf_handle.attrs["Stats"] = json.dumps(
    +377                self.get_data_stats(self.gcms),
    +378                sort_keys=False,
    +379                indent=4,
    +380                separators=(",", ": "),
    +381            )
    +382            hdf_handle.attrs["Calibration"] = json.dumps(
    +383                self.get_calibration_stats(self.gcms, id_label),
    +384                sort_keys=False,
    +385                indent=4,
    +386                separators=(",", ": "),
    +387            )
    +388            hdf_handle.attrs["Blank"] = json.dumps(
    +389                self.get_blank_stats(self.gcms),
    +390                sort_keys=False,
    +391                indent=4,
    +392                separators=(",", ": "),
    +393            )
    +394
    +395            corems_dict_setting = parameter_to_dict.get_dict_data_gcms(self.gcms)
    +396            hdf_handle.attrs["CoreMSParameters"] = json.dumps(
    +397                corems_dict_setting, sort_keys=False, indent=4, separators=(",", ": ")
    +398            )
    +399
    +400            scans_dataset = hdf_handle.create_dataset(
    +401                "scans", data=np.array(self.gcms.scans_number), dtype="f8"
    +402            )
    +403            rt_dataset = hdf_handle.create_dataset(
    +404                "rt", data=np.array(self.gcms.retention_time), dtype="f8"
    +405            )
    +406            tic_dataset = hdf_handle.create_dataset(
    +407                "tic", data=np.array(self.gcms.tic), dtype="f8"
    +408            )
    +409            processed_tic_dataset = hdf_handle.create_dataset(
    +410                "processed_tic", data=np.array(self.gcms.processed_tic), dtype="f8"
    +411            )
    +412
    +413            output_score_method = (
    +414                self.gcms.molecular_search_settings.output_score_method
    +415            )
    +416
    +417            for gc_peak in self.gcms:
    +418                # print(gc_peak.retention_time)
    +419                # print(gc_peak.tic)
    +420
    +421                # check if there is a compound candidate
    +422                peak_group = hdf_handle.create_group(str(gc_peak.retention_time))
    +423                peak_group.attrs["deconvolution"] = int(
    +424                    self.gcms.chromatogram_settings.use_deconvolution
    +425                )
    +426
    +427                peak_group.attrs["start_scan"] = gc_peak.start_scan
    +428                peak_group.attrs["apex_scan"] = gc_peak.apex_scan
    +429                peak_group.attrs["final_scan"] = gc_peak.final_scan
    +430
    +431                peak_group.attrs["retention_index"] = gc_peak.ri
    +432                peak_group.attrs["retention_time"] = gc_peak.retention_time
    +433                peak_group.attrs["area"] = gc_peak.area
    +434
    +435                mz = peak_group.create_dataset(
    +436                    "mz", data=np.array(gc_peak.mass_spectrum.mz_exp), dtype="f8"
    +437                )
    +438                abundance = peak_group.create_dataset(
    +439                    "abundance",
    +440                    data=np.array(gc_peak.mass_spectrum.abundance),
    +441                    dtype="f8",
    +442                )
    +443
    +444                if gc_peak:
    +445                    if output_score_method == "highest_sim_score":
    +446                        compound_obj = gc_peak.highest_score_compound
    +447                        add_compound(gc_peak, compound_obj)
    +448
    +449                    elif output_score_method == "highest_ss":
    +450                        compound_obj = gc_peak.highest_ss_compound
    +451                        add_compound(gc_peak, compound_obj)
    +452
    +453                    else:
    +454                        for compound_obj in gc_peak:
    +455                            add_compound(gc_peak, compound_obj)
    +456
    +457    def get_data_stats(self, gcms):
    +458        """Get statistics about the GCMS data.
    +459
    +460        Parameters:
    +461        ----------
    +462        gcms : object
    +463            The low resolution GCMS object.
    +464
    +465        Returns:
    +466        -------
    +467        dict
    +468            A dictionary containing the data statistics.
    +469        """
    +470
    +471        matched_peaks = gcms.matched_peaks
    +472        no_matched_peaks = gcms.no_matched_peaks
    +473        unique_metabolites = gcms.unique_metabolites
    +474
    +475        peak_matchs_above_0p85 = 0
    +476        unique_peak_match_above_0p85 = 0
    +477        for match_peak in matched_peaks:
    +478            gc_peak_above_85 = 0
    +479            matches_above_85 = list(
    +480                filter(lambda m: m.similarity_score >= 0.85, match_peak)
    +481            )
    +482            if matches_above_85:
    +483                peak_matchs_above_0p85 += 1
    +484            if len(matches_above_85) == 1:
    +485                unique_peak_match_above_0p85 += 1
    +486
    +487        data_stats = {}
    +488        data_stats["average_signal_noise"] = "ni"
    +489        data_stats["chromatogram_dynamic_range"] = gcms.dynamic_range
    +490        data_stats["total_number_peaks"] = len(gcms)
    +491        data_stats["total_peaks_matched"] = len(matched_peaks)
    +492        data_stats["total_peaks_without_matches"] = len(no_matched_peaks)
    +493        data_stats["total_matches_above_similarity_score_0.85"] = peak_matchs_above_0p85
    +494        data_stats["single_matches_above_similarity_score_0.85"] = (
    +495            unique_peak_match_above_0p85
    +496        )
    +497        data_stats["unique_metabolites"] = len(unique_metabolites)
    +498
    +499        return data_stats
    +500
    +501    def get_calibration_stats(self, gcms, id_label):
    +502        """Get statistics about the GC-MS calibration.
    +503
    +504        Parameters:
    +505        ----------
    +506        """
    +507        calibration_parameters = {}
    +508
    +509        calibration_parameters["calibration_rt_ri_pairs_ref"] = gcms.ri_pairs_ref
    +510        calibration_parameters["data_url"] = str(gcms.cal_file_path)
    +511        calibration_parameters["has_input"] = id_label + corems_md5(gcms.cal_file_path)
    +512        calibration_parameters["data_name"] = str(gcms.cal_file_path.stem)
    +513        calibration_parameters["calibration_method"] = ""
    +514
    +515        return calibration_parameters
    +516
    +517    def get_blank_stats(self, gcms):
    +518        """Get statistics about the GC-MS blank."""
    +519        blank_parameters = {}
    +520
    +521        blank_parameters["data_name"] = "ni"
    +522        blank_parameters["blank_id"] = "ni"
    +523        blank_parameters["data_url"] = "ni"
    +524        blank_parameters["has_input"] = "ni"
    +525        blank_parameters["common_features_to_blank"] = "ni"
    +526
    +527        return blank_parameters
    +528
    +529    def get_instrument_metadata(self, gcms):
    +530        """Get metadata about the GC-MS instrument."""
    +531        instrument_metadata = {}
    +532
    +533        instrument_metadata["analyzer"] = gcms.analyzer
    +534        instrument_metadata["instrument_label"] = gcms.instrument_label
    +535        instrument_metadata["instrument_id"] = uuid.uuid4().hex
    +536
    +537        return instrument_metadata
    +538
    +539    def get_data_metadata(self, gcms, id_label, output_path):
    +540        """Get metadata about the GC-MS data.
    +541
    +542        Parameters:
    +543        ----------
    +544        gcms : object
    +545            The low resolution GCMS object.
    +546        id_label : str
    +547            The ID label for the data.
    +548        output_path : str
    +549            The output file path.
    +550
    +551        Returns:
    +552        -------
    +553        dict
    +554            A dictionary containing the data metadata.
    +555        """
    +556        if isinstance(output_path, str):
    +557            output_path = Path(output_path)
    +558
    +559        paramaters_path = output_path.with_suffix(".json")
    +560
    +561        if paramaters_path.exists():
    +562            with paramaters_path.open() as current_param:
    +563                metadata = json.load(current_param)
    +564                data_metadata = metadata.get("Data")
    +565        else:
    +566            data_metadata = {}
    +567            data_metadata["data_name"] = []
    +568            data_metadata["input_data_url"] = []
    +569            data_metadata["has_input"] = []
    +570
    +571        data_metadata["data_name"].append(gcms.sample_name)
    +572        data_metadata["input_data_url"].append(str(gcms.file_location))
    +573        data_metadata["has_input"].append(id_label + corems_md5(gcms.file_location))
    +574
    +575        data_metadata["output_data_name"] = str(output_path.stem)
    +576        data_metadata["output_data_url"] = str(output_path)
    +577        data_metadata["has_output"] = id_label + corems_md5(output_path)
    +578
    +579        return data_metadata
    +580
    +581    def get_parameters_json(self, gcms, id_label, output_path):
    +582        """Get the parameters as a JSON string.
    +583
    +584        Parameters:
    +585        ----------
    +586        gcms : GCMS object
    +587            The low resolution GCMS object.
    +588        id_label : str
    +589            The ID label for the data.
    +590        output_path : str
    +591            The output file path.
    +592
    +593        Returns:
    +594        -------
    +595        str
    +596            The parameters as a JSON string.
    +597        """
    +598
    +599        output_parameters_dict = {}
    +600        output_parameters_dict["Data"] = self.get_data_metadata(
    +601            gcms, id_label, output_path
    +602        )
    +603        output_parameters_dict["Stats"] = self.get_data_stats(gcms)
    +604        output_parameters_dict["Calibration"] = self.get_calibration_stats(
    +605            gcms, id_label
    +606        )
    +607        output_parameters_dict["Blank"] = self.get_blank_stats(gcms)
    +608        output_parameters_dict["Instrument"] = self.get_instrument_metadata(gcms)
    +609        corems_dict_setting = parameter_to_dict.get_dict_data_gcms(gcms)
    +610        corems_dict_setting["corems_version"] = __version__
    +611        output_parameters_dict["CoreMSParameters"] = corems_dict_setting
    +612        output_parameters_dict["has_metabolite"] = gcms.metabolites_data
    +613        output = json.dumps(
    +614            output_parameters_dict, sort_keys=False, indent=4, separators=(",", ": ")
    +615        )
    +616
    +617        return output
    +618
    +619    def write_settings(self, output_path, gcms, id_label="emsl:"):
    +620        """Write the settings to a JSON file.
    +621
    +622        Parameters:
    +623        ----------
    +624        output_path : str
    +625            The output file path.
    +626        gcms : GCMS object
    +627            The low resolution GCMS object.
    +628        id_label : str
    +629            The ID label for the data. Default is "emsl:".
    +630
    +631        """
    +632
    +633        output = self.get_parameters_json(gcms, id_label, output_path)
    +634
    +635        with open(
    +636            output_path.with_suffix(".json"),
    +637            "w",
    +638            encoding="utf8",
    +639        ) as outfile:
    +640            outfile.write(output)
    +641
    +642    def get_list_dict_data(self, gcms, include_no_match=True, no_match_inline=False):
    +643        """Get the exported data as a list of dictionaries.
    +644
    +645        Parameters:
    +646        ----------
    +647        gcms : object
    +648            The low resolution GCMS object.
    +649        include_no_match : bool, optional
    +650            Whether to include no match data. Default is True.
    +651        no_match_inline : bool, optional
    +652            Whether to include no match data inline. Default is False.
    +653
    +654        Returns:
    +655        -------
    +656        list
    +657            The exported data as a list of dictionaries.
    +658        """
    +659
    +660        output_score_method = gcms.molecular_search_settings.output_score_method
    +661
    +662        dict_data_list = []
    +663
    +664        def add_match_dict_data():
    +665            derivatization = "{}:{}:{}".format(
    +666                compound_obj.classify,
    +667                compound_obj.derivativenum,
    +668                compound_obj.derivatization,
    +669            )
    +670            out_dict = {
    +671                "Sample name": gcms.sample_name,
    +672                "Peak Index": gcpeak_index,
    +673                "Retention Time": gc_peak.retention_time,
    +674                "Retention Time Ref": compound_obj.retention_time,
    +675                "Peak Height": gc_peak.tic,
    +676                "Peak Area": gc_peak.area,
    +677                "Retention index": gc_peak.ri,
    +678                "Retention index Ref": compound_obj.ri,
    +679                "Retention Index Score": compound_obj.ri_score,
    +680                "Spectral Similarity Score": compound_obj.spectral_similarity_score,
    +681                "Similarity Score": compound_obj.similarity_score,
    +682                "Compound Name": compound_obj.name,
    +683                "Chebi ID": compound_obj.metadata.chebi,
    +684                "Kegg Compound ID": compound_obj.metadata.kegg,
    +685                "Inchi": compound_obj.metadata.inchi,
    +686                "Inchi Key": compound_obj.metadata.inchikey,
    +687                "Smiles": compound_obj.metadata.smiles,
    +688                "Molecular Formula": compound_obj.formula,
    +689                "IUPAC Name": compound_obj.metadata.iupac_name,
    +690                "Traditional Name": compound_obj.metadata.traditional_name,
    +691                "Common Name": compound_obj.metadata.common_name,
    +692                "Derivatization": derivatization,
    +693            }
    +694
    +695            if self.gcms.molecular_search_settings.exploratory_mode:
    +696                out_dict.update(
    +697                    {
    +698                        "Weighted Cosine Correlation": compound_obj.spectral_similarity_scores.get(
    +699                            "weighted_cosine_correlation"
    +700                        ),
    +701                        "Cosine Correlation": compound_obj.spectral_similarity_scores.get(
    +702                            "cosine_correlation"
    +703                        ),
    +704                        "Stein Scott Similarity": compound_obj.spectral_similarity_scores.get(
    +705                            "stein_scott_similarity"
    +706                        ),
    +707                        "Pearson Correlation": compound_obj.spectral_similarity_scores.get(
    +708                            "pearson_correlation"
    +709                        ),
    +710                        "Spearman Correlation": compound_obj.spectral_similarity_scores.get(
    +711                            "spearman_correlation"
    +712                        ),
    +713                        "Kendall Tau Correlation": compound_obj.spectral_similarity_scores.get(
    +714                            "kendall_tau_correlation"
    +715                        ),
    +716                        "DFT Correlation": compound_obj.spectral_similarity_scores.get(
    +717                            "dft_correlation"
    +718                        ),
    +719                        "DWT Correlation": compound_obj.spectral_similarity_scores.get(
    +720                            "dwt_correlation"
    +721                        ),
    +722                        "Euclidean Distance": compound_obj.spectral_similarity_scores.get(
    +723                            "euclidean_distance"
    +724                        ),
    +725                        "Manhattan Distance": compound_obj.spectral_similarity_scores.get(
    +726                            "manhattan_distance"
    +727                        ),
    +728                        "Jaccard Distance": compound_obj.spectral_similarity_scores.get(
    +729                            "jaccard_distance"
    +730                        ),
    +731                    }
    +732                )
    +733                for method in methods_name:
    +734                    out_dict[methods_name.get(method)] = (
    +735                        compound_obj.spectral_similarity_scores.get(method)
    +736                    )
    +737
    +738            dict_data_list.append(out_dict)
    +739
    +740        def add_no_match_dict_data():
    +741            dict_data_list.append(
    +742                {
    +743                    "Sample name": gcms.sample_name,
    +744                    "Peak Index": gcpeak_index,
    +745                    "Retention Time": gc_peak.retention_time,
    +746                    "Peak Height": gc_peak.tic,
    +747                    "Peak Area": gc_peak.area,
    +748                    "Retention index": gc_peak.ri,
    +749                }
    +750            )
    +751
    +752        for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks):
    +753            # check if there is a compound candidate
    +754            if gc_peak:
    +755                if output_score_method == "highest_sim_score":
    +756                    compound_obj = gc_peak.highest_score_compound
    +757                    add_match_dict_data()
    +758
    +759                elif output_score_method == "highest_ss":
    +760                    compound_obj = gc_peak.highest_ss_compound
    +761                    add_match_dict_data()
    +762
    +763                else:
    +764                    for compound_obj in gc_peak:
    +765                        add_match_dict_data()  # add monoisotopic peak
    +766
    +767            else:
    +768                # include not_match
    +769                if include_no_match and no_match_inline:
    +770                    add_no_match_dict_data()
    +771
    +772        if include_no_match and not no_match_inline:
    +773            for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks):
    +774                if not gc_peak:
    +775                    add_no_match_dict_data()
    +776
    +777        return dict_data_list
     
    @@ -2771,12 +2780,12 @@

    Methods:

    -
    91    def __init__(self, out_file_path, gcms):
    -92        self.output_file = Path(out_file_path)
    -93
    -94        self.gcms = gcms
    -95
    -96        self._init_columns()
    +            
    90    def __init__(self, out_file_path, gcms):
    +91        self.output_file = Path(out_file_path)
    +92
    +93        self.gcms = gcms
    +94
    +95        self._init_columns()
     
    @@ -2816,29 +2825,29 @@

    Methods:

    -
    153    def get_pandas_df(self, id_label="corems:"):
    -154        """Get the exported data as a Pandas DataFrame.
    -155
    -156        Parameters:
    -157        ----------
    -158        id_label : str, optional
    -159            The ID label for the data. Default is "corems:".
    -160
    -161        Returns:
    -162        -------
    -163        DataFrame
    -164            The exported data as a Pandas DataFrame.
    -165        """
    -166
    -167        columns = self._init_columns()
    -168
    -169        dict_data_list = self.get_list_dict_data(self.gcms)
    -170
    -171        df = DataFrame(dict_data_list, columns=columns)
    -172
    -173        df.name = self.gcms.sample_name
    -174
    -175        return df
    +            
    152    def get_pandas_df(self, id_label="corems:"):
    +153        """Get the exported data as a Pandas DataFrame.
    +154
    +155        Parameters:
    +156        ----------
    +157        id_label : str, optional
    +158            The ID label for the data. Default is "corems:".
    +159
    +160        Returns:
    +161        -------
    +162        DataFrame
    +163            The exported data as a Pandas DataFrame.
    +164        """
    +165
    +166        columns = self._init_columns()
    +167
    +168        dict_data_list = self.get_list_dict_data(self.gcms)
    +169
    +170        df = DataFrame(dict_data_list, columns=columns)
    +171
    +172        df.name = self.gcms.sample_name
    +173
    +174        return df
     
    @@ -2868,25 +2877,25 @@

    Returns:

    -
    177    def get_json(self, nan=False, id_label="corems:"):
    -178        """Get the exported data as a JSON string.
    -179
    -180        Parameters:
    -181        ----------
    -182        nan : bool, optional
    -183            Whether to include NaN values in the JSON string. Default is False.
    -184        id_label : str, optional
    -185            The ID label for the data. Default is "corems:".
    -186
    -187        """
    -188
    -189        import json
    -190
    -191        dict_data_list = self.get_list_dict_data(self.gcms)
    -192
    -193        return json.dumps(
    -194            dict_data_list, sort_keys=False, indent=4, separators=(",", ": ")
    -195        )
    +            
    176    def get_json(self, nan=False, id_label="corems:"):
    +177        """Get the exported data as a JSON string.
    +178
    +179        Parameters:
    +180        ----------
    +181        nan : bool, optional
    +182            Whether to include NaN values in the JSON string. Default is False.
    +183        id_label : str, optional
    +184            The ID label for the data. Default is "corems:".
    +185
    +186        """
    +187
    +188        import json
    +189
    +190        dict_data_list = self.get_list_dict_data(self.gcms)
    +191
    +192        return json.dumps(
    +193            dict_data_list, sort_keys=False, indent=4, separators=(",", ": ")
    +194        )
     
    @@ -2913,29 +2922,29 @@

    Parameters:

    -
    197    def to_pandas(self, write_metadata=True, id_label="corems:"):
    -198        """Export the data to a Pandas DataFrame and save it as a pickle file.
    -199
    -200        Parameters:
    -201        ----------
    -202        write_metadata : bool, optional
    -203            Whether to write metadata to the output file.
    -204        id_label : str, optional
    -205            The ID label for the data.
    -206        """
    -207
    -208        columns = self._init_columns()
    -209
    -210        dict_data_list = self.get_list_dict_data(self.gcms)
    -211
    -212        df = DataFrame(dict_data_list, columns=columns)
    -213
    -214        df.to_pickle(self.output_file.with_suffix(".pkl"))
    -215
    -216        if write_metadata:
    -217            self.write_settings(
    -218                self.output_file.with_suffix(".pkl"), self.gcms, id_label="corems:"
    -219            )
    +            
    196    def to_pandas(self, write_metadata=True, id_label="corems:"):
    +197        """Export the data to a Pandas DataFrame and save it as a pickle file.
    +198
    +199        Parameters:
    +200        ----------
    +201        write_metadata : bool, optional
    +202            Whether to write metadata to the output file.
    +203        id_label : str, optional
    +204            The ID label for the data.
    +205        """
    +206
    +207        columns = self._init_columns()
    +208
    +209        dict_data_list = self.get_list_dict_data(self.gcms)
    +210
    +211        df = DataFrame(dict_data_list, columns=columns)
    +212
    +213        df.to_pickle(self.output_file.with_suffix(".pkl"))
    +214
    +215        if write_metadata:
    +216            self.write_settings(
    +217                self.output_file.with_suffix(".pkl"), self.gcms, id_label="corems:"
    +218            )
     
    @@ -2962,46 +2971,46 @@

    Parameters:

    -
    221    def to_excel(self, write_mode="a", write_metadata=True, id_label="corems:"):
    -222        """Export the data to an Excel file.
    -223
    -224        Parameters:
    -225        ----------
    -226        write_mode : str, optional
    -227            The write mode for the Excel file. Default is 'a' (append).
    -228        write_metadata : bool, optional
    -229            Whether to write metadata to the output file. Default is True.
    -230        id_label : str, optional
    -231            The ID label for the data. Default is "corems:".
    -232        """
    -233
    -234        out_put_path = self.output_file.with_suffix(".xlsx")
    -235
    -236        columns = self._init_columns()
    -237
    -238        dict_data_list = self.get_list_dict_data(self.gcms)
    -239
    -240        df = DataFrame(dict_data_list, columns=columns)
    -241
    -242        if write_mode == "a" and out_put_path.exists():
    -243            writer = ExcelWriter(out_put_path, engine="openpyxl")
    -244            # try to open an existing workbook
    -245            writer.book = load_workbook(out_put_path)
    -246            # copy existing sheets
    -247            writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
    -248            # read existing file
    -249            reader = read_excel(out_put_path)
    -250            # write out the new sheet
    -251            df.to_excel(writer, index=False, header=False, startrow=len(reader) + 1)
    -252
    -253            writer.close()
    -254        else:
    -255            df.to_excel(
    -256                self.output_file.with_suffix(".xlsx"), index=False, engine="openpyxl"
    -257            )
    -258
    -259        if write_metadata:
    -260            self.write_settings(out_put_path, self.gcms, id_label=id_label)
    +            
    220    def to_excel(self, write_mode="a", write_metadata=True, id_label="corems:"):
    +221        """Export the data to an Excel file.
    +222
    +223        Parameters:
    +224        ----------
    +225        write_mode : str, optional
    +226            The write mode for the Excel file. Default is 'a' (append).
    +227        write_metadata : bool, optional
    +228            Whether to write metadata to the output file. Default is True.
    +229        id_label : str, optional
    +230            The ID label for the data. Default is "corems:".
    +231        """
    +232
    +233        out_put_path = self.output_file.with_suffix(".xlsx")
    +234
    +235        columns = self._init_columns()
    +236
    +237        dict_data_list = self.get_list_dict_data(self.gcms)
    +238
    +239        df = DataFrame(dict_data_list, columns=columns)
    +240
    +241        if write_mode == "a" and out_put_path.exists():
    +242            writer = ExcelWriter(out_put_path, engine="openpyxl")
    +243            # try to open an existing workbook
    +244            writer.book = load_workbook(out_put_path)
    +245            # copy existing sheets
    +246            writer.sheets = dict((ws.title, ws) for ws in writer.book.worksheets)
    +247            # read existing file
    +248            reader = read_excel(out_put_path)
    +249            # write out the new sheet
    +250            df.to_excel(writer, index=False, header=False, startrow=len(reader) + 1)
    +251
    +252            writer.close()
    +253        else:
    +254            df.to_excel(
    +255                self.output_file.with_suffix(".xlsx"), index=False, engine="openpyxl"
    +256            )
    +257
    +258        if write_metadata:
    +259            self.write_settings(out_put_path, self.gcms, id_label=id_label)
     
    @@ -3030,56 +3039,56 @@

    Parameters:

    -
    262    def to_csv(
    -263        self,
    -264        separate_output=False,
    -265        write_mode="w",
    -266        write_metadata=True,
    -267        id_label="corems:",
    -268    ):
    -269        """Export the data to a CSV file.
    -270
    -271        Parameters:
    -272        ----------
    -273        separate_output : bool, optional
    -274            Whether to separate the output into multiple files. Default is False.
    -275        write_mode : str, optional
    -276            The write mode for the CSV file. Default is 'w' (write).
    -277        write_metadata : bool, optional
    -278            Whether to write metadata to the output file. Default is True.
    -279        id_label : str, optional
    -280            The ID label for the data. Default is "corems:".
    -281        """
    -282
    -283        if separate_output:
    -284            # set write mode to write
    -285            # this mode will overwrite the file without warning
    -286            write_mode = "w"
    -287        else:
    -288            # set write mode to append
    -289            write_mode = "a"
    -290
    -291        columns = self._init_columns()
    -292
    -293        dict_data_list = self.get_list_dict_data(self.gcms)
    -294
    -295        out_put_path = self.output_file.with_suffix(".csv")
    -296
    -297        write_header = not out_put_path.exists()
    -298
    -299        try:
    -300            with open(out_put_path, write_mode, newline="") as csvfile:
    -301                writer = csv.DictWriter(csvfile, fieldnames=columns)
    -302                if write_header:
    -303                    writer.writeheader()
    -304                for data in dict_data_list:
    -305                    writer.writerow(data)
    -306
    -307            if write_metadata:
    -308                self.write_settings(out_put_path, self.gcms, id_label=id_label)
    -309
    -310        except IOError as ioerror:
    -311            print(ioerror)
    +            
    261    def to_csv(
    +262        self,
    +263        separate_output=False,
    +264        write_mode="w",
    +265        write_metadata=True,
    +266        id_label="corems:",
    +267    ):
    +268        """Export the data to a CSV file.
    +269
    +270        Parameters:
    +271        ----------
    +272        separate_output : bool, optional
    +273            Whether to separate the output into multiple files. Default is False.
    +274        write_mode : str, optional
    +275            The write mode for the CSV file. Default is 'w' (write).
    +276        write_metadata : bool, optional
    +277            Whether to write metadata to the output file. Default is True.
    +278        id_label : str, optional
    +279            The ID label for the data. Default is "corems:".
    +280        """
    +281
    +282        if separate_output:
    +283            # set write mode to write
    +284            # this mode will overwrite the file without warning
    +285            write_mode = "w"
    +286        else:
    +287            # set write mode to append
    +288            write_mode = "a"
    +289
    +290        columns = self._init_columns()
    +291
    +292        dict_data_list = self.get_list_dict_data(self.gcms)
    +293
    +294        out_put_path = self.output_file.with_suffix(".csv")
    +295
    +296        write_header = not out_put_path.exists()
    +297
    +298        try:
    +299            with open(out_put_path, write_mode, newline="") as csvfile:
    +300                writer = csv.DictWriter(csvfile, fieldnames=columns)
    +301                if write_header:
    +302                    writer.writeheader()
    +303                for data in dict_data_list:
    +304                    writer.writerow(data)
    +305
    +306            if write_metadata:
    +307                self.write_settings(out_put_path, self.gcms, id_label=id_label)
    +308
    +309        except IOError as ioerror:
    +310            print(ioerror)
     
    @@ -3110,150 +3119,150 @@

    Parameters:

    -
    313    def to_hdf(self, id_label="corems:"):
    -314        """Export the data to an HDF5 file.
    -315
    -316        Parameters:
    -317        ----------
    -318        id_label : str, optional
    -319            The ID label for the data. Default is "corems:".
    -320        """
    -321
    -322        # save sample at a time
    -323        def add_compound(gc_peak, compound_obj):
    -324            modifier = compound_obj.classify if compound_obj.classify else ""
    -325            compound_group = compound_obj.name.replace("/", "") + " " + modifier
    -326
    -327            if compound_group not in peak_group:
    -328                compound_group = peak_group.create_group(compound_group)
    -329
    -330                # compound_group.attrs["retention_time"] = compound_obj.retention_time
    -331                compound_group.attrs["retention_index"] = compound_obj.ri
    -332                compound_group.attrs["retention_index_score"] = compound_obj.ri_score
    -333                compound_group.attrs["spectral_similarity_score"] = (
    -334                    compound_obj.spectral_similarity_score
    -335                )
    -336                compound_group.attrs["similarity_score"] = compound_obj.similarity_score
    -337
    -338                compond_mz = compound_group.create_dataset(
    -339                    "mz", data=np.array(compound_obj.mz), dtype="f8"
    -340                )
    -341                compond_abundance = compound_group.create_dataset(
    -342                    "abundance", data=np.array(compound_obj.abundance), dtype="f8"
    -343                )
    -344
    -345                if self.gcms.molecular_search_settings.exploratory_mode:
    -346                    compound_group.attrs["Spectral Similarities"] = json.dumps(
    -347                        compound_obj.spectral_similarity_scores,
    -348                        sort_keys=False,
    -349                        indent=4,
    -350                        separators=(",", ":"),
    -351                    )
    -352            else:
    -353                warnings.warn("Skipping duplicate reference compound.")
    -354
    -355        import json
    -356        from datetime import datetime, timezone
    -357
    -358        import h5py
    -359        import numpy as np
    -360
    -361        output_path = self.output_file.with_suffix(".hdf5")
    -362
    -363        with h5py.File(output_path, "w") as hdf_handle:
    -364            timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    -365            hdf_handle.attrs["time_stamp"] = timenow
    -366            hdf_handle.attrs["data_structure"] = "gcms"
    -367            hdf_handle.attrs["analyzer"] = self.gcms.analyzer
    -368            hdf_handle.attrs["instrument_label"] = self.gcms.instrument_label
    -369
    -370            hdf_handle.attrs["sample_id"] = "self.gcms.id"
    -371            hdf_handle.attrs["sample_name"] = self.gcms.sample_name
    -372            hdf_handle.attrs["input_data"] = str(self.gcms.file_location)
    -373            hdf_handle.attrs["output_data"] = str(output_path)
    -374            hdf_handle.attrs["output_data_id"] = id_label + uuid.uuid4().hex
    -375            hdf_handle.attrs["corems_version"] = __version__
    -376
    -377            hdf_handle.attrs["Stats"] = json.dumps(
    -378                self.get_data_stats(self.gcms),
    -379                sort_keys=False,
    -380                indent=4,
    -381                separators=(",", ": "),
    -382            )
    -383            hdf_handle.attrs["Calibration"] = json.dumps(
    -384                self.get_calibration_stats(self.gcms, id_label),
    -385                sort_keys=False,
    -386                indent=4,
    -387                separators=(",", ": "),
    -388            )
    -389            hdf_handle.attrs["Blank"] = json.dumps(
    -390                self.get_blank_stats(self.gcms),
    -391                sort_keys=False,
    -392                indent=4,
    -393                separators=(",", ": "),
    -394            )
    -395
    -396            corems_dict_setting = parameter_to_dict.get_dict_data_gcms(self.gcms)
    -397            hdf_handle.attrs["CoreMSParameters"] = json.dumps(
    -398                corems_dict_setting, sort_keys=False, indent=4, separators=(",", ": ")
    -399            )
    -400
    -401            scans_dataset = hdf_handle.create_dataset(
    -402                "scans", data=np.array(self.gcms.scans_number), dtype="f8"
    -403            )
    -404            rt_dataset = hdf_handle.create_dataset(
    -405                "rt", data=np.array(self.gcms.retention_time), dtype="f8"
    -406            )
    -407            tic_dataset = hdf_handle.create_dataset(
    -408                "tic", data=np.array(self.gcms.tic), dtype="f8"
    -409            )
    -410            processed_tic_dataset = hdf_handle.create_dataset(
    -411                "processed_tic", data=np.array(self.gcms.processed_tic), dtype="f8"
    -412            )
    -413
    -414            output_score_method = (
    -415                self.gcms.molecular_search_settings.output_score_method
    -416            )
    -417
    -418            for gc_peak in self.gcms:
    -419                # print(gc_peak.retention_time)
    -420                # print(gc_peak.tic)
    -421
    -422                # check if there is a compound candidate
    -423                peak_group = hdf_handle.create_group(str(gc_peak.retention_time))
    -424                peak_group.attrs["deconvolution"] = int(
    -425                    self.gcms.chromatogram_settings.use_deconvolution
    -426                )
    -427
    -428                peak_group.attrs["start_scan"] = gc_peak.start_scan
    -429                peak_group.attrs["apex_scan"] = gc_peak.apex_scan
    -430                peak_group.attrs["final_scan"] = gc_peak.final_scan
    -431
    -432                peak_group.attrs["retention_index"] = gc_peak.ri
    -433                peak_group.attrs["retention_time"] = gc_peak.retention_time
    -434                peak_group.attrs["area"] = gc_peak.area
    -435
    -436                mz = peak_group.create_dataset(
    -437                    "mz", data=np.array(gc_peak.mass_spectrum.mz_exp), dtype="f8"
    -438                )
    -439                abundance = peak_group.create_dataset(
    -440                    "abundance",
    -441                    data=np.array(gc_peak.mass_spectrum.abundance),
    -442                    dtype="f8",
    -443                )
    -444
    -445                if gc_peak:
    -446                    if output_score_method == "highest_sim_score":
    -447                        compound_obj = gc_peak.highest_score_compound
    -448                        add_compound(gc_peak, compound_obj)
    -449
    -450                    elif output_score_method == "highest_ss":
    -451                        compound_obj = gc_peak.highest_ss_compound
    -452                        add_compound(gc_peak, compound_obj)
    -453
    -454                    else:
    -455                        for compound_obj in gc_peak:
    -456                            add_compound(gc_peak, compound_obj)
    +            
    312    def to_hdf(self, id_label="corems:"):
    +313        """Export the data to an HDF5 file.
    +314
    +315        Parameters:
    +316        ----------
    +317        id_label : str, optional
    +318            The ID label for the data. Default is "corems:".
    +319        """
    +320
    +321        # save sample at a time
    +322        def add_compound(gc_peak, compound_obj):
    +323            modifier = compound_obj.classify if compound_obj.classify else ""
    +324            compound_group = compound_obj.name.replace("/", "") + " " + modifier
    +325
    +326            if compound_group not in peak_group:
    +327                compound_group = peak_group.create_group(compound_group)
    +328
    +329                # compound_group.attrs["retention_time"] = compound_obj.retention_time
    +330                compound_group.attrs["retention_index"] = compound_obj.ri
    +331                compound_group.attrs["retention_index_score"] = compound_obj.ri_score
    +332                compound_group.attrs["spectral_similarity_score"] = (
    +333                    compound_obj.spectral_similarity_score
    +334                )
    +335                compound_group.attrs["similarity_score"] = compound_obj.similarity_score
    +336
    +337                compond_mz = compound_group.create_dataset(
    +338                    "mz", data=np.array(compound_obj.mz), dtype="f8"
    +339                )
    +340                compond_abundance = compound_group.create_dataset(
    +341                    "abundance", data=np.array(compound_obj.abundance), dtype="f8"
    +342                )
    +343
    +344                if self.gcms.molecular_search_settings.exploratory_mode:
    +345                    compound_group.attrs["Spectral Similarities"] = json.dumps(
    +346                        compound_obj.spectral_similarity_scores,
    +347                        sort_keys=False,
    +348                        indent=4,
    +349                        separators=(",", ":"),
    +350                    )
    +351            else:
    +352                warnings.warn("Skipping duplicate reference compound.")
    +353
    +354        import json
    +355        from datetime import datetime, timezone
    +356
    +357        import h5py
    +358        import numpy as np
    +359
    +360        output_path = self.output_file.with_suffix(".hdf5")
    +361
    +362        with h5py.File(output_path, "w") as hdf_handle:
    +363            timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    +364            hdf_handle.attrs["time_stamp"] = timenow
    +365            hdf_handle.attrs["data_structure"] = "gcms"
    +366            hdf_handle.attrs["analyzer"] = self.gcms.analyzer
    +367            hdf_handle.attrs["instrument_label"] = self.gcms.instrument_label
    +368
    +369            hdf_handle.attrs["sample_id"] = "self.gcms.id"
    +370            hdf_handle.attrs["sample_name"] = self.gcms.sample_name
    +371            hdf_handle.attrs["input_data"] = str(self.gcms.file_location)
    +372            hdf_handle.attrs["output_data"] = str(output_path)
    +373            hdf_handle.attrs["output_data_id"] = id_label + uuid.uuid4().hex
    +374            hdf_handle.attrs["corems_version"] = __version__
    +375
    +376            hdf_handle.attrs["Stats"] = json.dumps(
    +377                self.get_data_stats(self.gcms),
    +378                sort_keys=False,
    +379                indent=4,
    +380                separators=(",", ": "),
    +381            )
    +382            hdf_handle.attrs["Calibration"] = json.dumps(
    +383                self.get_calibration_stats(self.gcms, id_label),
    +384                sort_keys=False,
    +385                indent=4,
    +386                separators=(",", ": "),
    +387            )
    +388            hdf_handle.attrs["Blank"] = json.dumps(
    +389                self.get_blank_stats(self.gcms),
    +390                sort_keys=False,
    +391                indent=4,
    +392                separators=(",", ": "),
    +393            )
    +394
    +395            corems_dict_setting = parameter_to_dict.get_dict_data_gcms(self.gcms)
    +396            hdf_handle.attrs["CoreMSParameters"] = json.dumps(
    +397                corems_dict_setting, sort_keys=False, indent=4, separators=(",", ": ")
    +398            )
    +399
    +400            scans_dataset = hdf_handle.create_dataset(
    +401                "scans", data=np.array(self.gcms.scans_number), dtype="f8"
    +402            )
    +403            rt_dataset = hdf_handle.create_dataset(
    +404                "rt", data=np.array(self.gcms.retention_time), dtype="f8"
    +405            )
    +406            tic_dataset = hdf_handle.create_dataset(
    +407                "tic", data=np.array(self.gcms.tic), dtype="f8"
    +408            )
    +409            processed_tic_dataset = hdf_handle.create_dataset(
    +410                "processed_tic", data=np.array(self.gcms.processed_tic), dtype="f8"
    +411            )
    +412
    +413            output_score_method = (
    +414                self.gcms.molecular_search_settings.output_score_method
    +415            )
    +416
    +417            for gc_peak in self.gcms:
    +418                # print(gc_peak.retention_time)
    +419                # print(gc_peak.tic)
    +420
    +421                # check if there is a compound candidate
    +422                peak_group = hdf_handle.create_group(str(gc_peak.retention_time))
    +423                peak_group.attrs["deconvolution"] = int(
    +424                    self.gcms.chromatogram_settings.use_deconvolution
    +425                )
    +426
    +427                peak_group.attrs["start_scan"] = gc_peak.start_scan
    +428                peak_group.attrs["apex_scan"] = gc_peak.apex_scan
    +429                peak_group.attrs["final_scan"] = gc_peak.final_scan
    +430
    +431                peak_group.attrs["retention_index"] = gc_peak.ri
    +432                peak_group.attrs["retention_time"] = gc_peak.retention_time
    +433                peak_group.attrs["area"] = gc_peak.area
    +434
    +435                mz = peak_group.create_dataset(
    +436                    "mz", data=np.array(gc_peak.mass_spectrum.mz_exp), dtype="f8"
    +437                )
    +438                abundance = peak_group.create_dataset(
    +439                    "abundance",
    +440                    data=np.array(gc_peak.mass_spectrum.abundance),
    +441                    dtype="f8",
    +442                )
    +443
    +444                if gc_peak:
    +445                    if output_score_method == "highest_sim_score":
    +446                        compound_obj = gc_peak.highest_score_compound
    +447                        add_compound(gc_peak, compound_obj)
    +448
    +449                    elif output_score_method == "highest_ss":
    +450                        compound_obj = gc_peak.highest_ss_compound
    +451                        add_compound(gc_peak, compound_obj)
    +452
    +453                    else:
    +454                        for compound_obj in gc_peak:
    +455                            add_compound(gc_peak, compound_obj)
     
    @@ -3278,49 +3287,49 @@

    Parameters:

    -
    458    def get_data_stats(self, gcms):
    -459        """Get statistics about the GCMS data.
    -460
    -461        Parameters:
    -462        ----------
    -463        gcms : object
    -464            The low resolution GCMS object.
    -465
    -466        Returns:
    -467        -------
    -468        dict
    -469            A dictionary containing the data statistics.
    -470        """
    -471
    -472        matched_peaks = gcms.matched_peaks
    -473        no_matched_peaks = gcms.no_matched_peaks
    -474        unique_metabolites = gcms.unique_metabolites
    -475
    -476        peak_matchs_above_0p85 = 0
    -477        unique_peak_match_above_0p85 = 0
    -478        for match_peak in matched_peaks:
    -479            gc_peak_above_85 = 0
    -480            matches_above_85 = list(
    -481                filter(lambda m: m.similarity_score >= 0.85, match_peak)
    -482            )
    -483            if matches_above_85:
    -484                peak_matchs_above_0p85 += 1
    -485            if len(matches_above_85) == 1:
    -486                unique_peak_match_above_0p85 += 1
    -487
    -488        data_stats = {}
    -489        data_stats["average_signal_noise"] = "ni"
    -490        data_stats["chromatogram_dynamic_range"] = gcms.dynamic_range
    -491        data_stats["total_number_peaks"] = len(gcms)
    -492        data_stats["total_peaks_matched"] = len(matched_peaks)
    -493        data_stats["total_peaks_without_matches"] = len(no_matched_peaks)
    -494        data_stats["total_matches_above_similarity_score_0.85"] = peak_matchs_above_0p85
    -495        data_stats["single_matches_above_similarity_score_0.85"] = (
    -496            unique_peak_match_above_0p85
    -497        )
    -498        data_stats["unique_metabolites"] = len(unique_metabolites)
    -499
    -500        return data_stats
    +            
    457    def get_data_stats(self, gcms):
    +458        """Get statistics about the GCMS data.
    +459
    +460        Parameters:
    +461        ----------
    +462        gcms : object
    +463            The low resolution GCMS object.
    +464
    +465        Returns:
    +466        -------
    +467        dict
    +468            A dictionary containing the data statistics.
    +469        """
    +470
    +471        matched_peaks = gcms.matched_peaks
    +472        no_matched_peaks = gcms.no_matched_peaks
    +473        unique_metabolites = gcms.unique_metabolites
    +474
    +475        peak_matchs_above_0p85 = 0
    +476        unique_peak_match_above_0p85 = 0
    +477        for match_peak in matched_peaks:
    +478            gc_peak_above_85 = 0
    +479            matches_above_85 = list(
    +480                filter(lambda m: m.similarity_score >= 0.85, match_peak)
    +481            )
    +482            if matches_above_85:
    +483                peak_matchs_above_0p85 += 1
    +484            if len(matches_above_85) == 1:
    +485                unique_peak_match_above_0p85 += 1
    +486
    +487        data_stats = {}
    +488        data_stats["average_signal_noise"] = "ni"
    +489        data_stats["chromatogram_dynamic_range"] = gcms.dynamic_range
    +490        data_stats["total_number_peaks"] = len(gcms)
    +491        data_stats["total_peaks_matched"] = len(matched_peaks)
    +492        data_stats["total_peaks_without_matches"] = len(no_matched_peaks)
    +493        data_stats["total_matches_above_similarity_score_0.85"] = peak_matchs_above_0p85
    +494        data_stats["single_matches_above_similarity_score_0.85"] = (
    +495            unique_peak_match_above_0p85
    +496        )
    +497        data_stats["unique_metabolites"] = len(unique_metabolites)
    +498
    +499        return data_stats
     
    @@ -3350,21 +3359,21 @@

    Returns:

    -
    502    def get_calibration_stats(self, gcms, id_label):
    -503        """Get statistics about the GC-MS calibration.
    -504
    -505        Parameters:
    -506        ----------
    -507        """
    -508        calibration_parameters = {}
    -509
    -510        calibration_parameters["calibration_rt_ri_pairs_ref"] = gcms.ri_pairs_ref
    -511        calibration_parameters["data_url"] = str(gcms.cal_file_path)
    -512        calibration_parameters["has_input"] = id_label + corems_md5(gcms.cal_file_path)
    -513        calibration_parameters["data_name"] = str(gcms.cal_file_path.stem)
    -514        calibration_parameters["calibration_method"] = ""
    -515
    -516        return calibration_parameters
    +            
    501    def get_calibration_stats(self, gcms, id_label):
    +502        """Get statistics about the GC-MS calibration.
    +503
    +504        Parameters:
    +505        ----------
    +506        """
    +507        calibration_parameters = {}
    +508
    +509        calibration_parameters["calibration_rt_ri_pairs_ref"] = gcms.ri_pairs_ref
    +510        calibration_parameters["data_url"] = str(gcms.cal_file_path)
    +511        calibration_parameters["has_input"] = id_label + corems_md5(gcms.cal_file_path)
    +512        calibration_parameters["data_name"] = str(gcms.cal_file_path.stem)
    +513        calibration_parameters["calibration_method"] = ""
    +514
    +515        return calibration_parameters
     
    @@ -3386,17 +3395,17 @@

    Parameters:

    -
    518    def get_blank_stats(self, gcms):
    -519        """Get statistics about the GC-MS blank."""
    -520        blank_parameters = {}
    -521
    -522        blank_parameters["data_name"] = "ni"
    -523        blank_parameters["blank_id"] = "ni"
    -524        blank_parameters["data_url"] = "ni"
    -525        blank_parameters["has_input"] = "ni"
    -526        blank_parameters["common_features_to_blank"] = "ni"
    -527
    -528        return blank_parameters
    +            
    517    def get_blank_stats(self, gcms):
    +518        """Get statistics about the GC-MS blank."""
    +519        blank_parameters = {}
    +520
    +521        blank_parameters["data_name"] = "ni"
    +522        blank_parameters["blank_id"] = "ni"
    +523        blank_parameters["data_url"] = "ni"
    +524        blank_parameters["has_input"] = "ni"
    +525        blank_parameters["common_features_to_blank"] = "ni"
    +526
    +527        return blank_parameters
     
    @@ -3416,15 +3425,15 @@

    Parameters:

    -
    530    def get_instrument_metadata(self, gcms):
    -531        """Get metadata about the GC-MS instrument."""
    -532        instrument_metadata = {}
    -533
    -534        instrument_metadata["analyzer"] = gcms.analyzer
    -535        instrument_metadata["instrument_label"] = gcms.instrument_label
    -536        instrument_metadata["instrument_id"] = uuid.uuid4().hex
    -537
    -538        return instrument_metadata
    +            
    529    def get_instrument_metadata(self, gcms):
    +530        """Get metadata about the GC-MS instrument."""
    +531        instrument_metadata = {}
    +532
    +533        instrument_metadata["analyzer"] = gcms.analyzer
    +534        instrument_metadata["instrument_label"] = gcms.instrument_label
    +535        instrument_metadata["instrument_id"] = uuid.uuid4().hex
    +536
    +537        return instrument_metadata
     
    @@ -3444,47 +3453,47 @@

    Parameters:

    -
    540    def get_data_metadata(self, gcms, id_label, output_path):
    -541        """Get metadata about the GC-MS data.
    -542
    -543        Parameters:
    -544        ----------
    -545        gcms : object
    -546            The low resolution GCMS object.
    -547        id_label : str
    -548            The ID label for the data.
    -549        output_path : str
    -550            The output file path.
    -551
    -552        Returns:
    -553        -------
    -554        dict
    -555            A dictionary containing the data metadata.
    -556        """
    -557        if isinstance(output_path, str):
    -558            output_path = Path(output_path)
    -559
    -560        paramaters_path = output_path.with_suffix(".json")
    -561
    -562        if paramaters_path.exists():
    -563            with paramaters_path.open() as current_param:
    -564                metadata = json.load(current_param)
    -565                data_metadata = metadata.get("Data")
    -566        else:
    -567            data_metadata = {}
    -568            data_metadata["data_name"] = []
    -569            data_metadata["input_data_url"] = []
    -570            data_metadata["has_input"] = []
    -571
    -572        data_metadata["data_name"].append(gcms.sample_name)
    -573        data_metadata["input_data_url"].append(str(gcms.file_location))
    -574        data_metadata["has_input"].append(id_label + corems_md5(gcms.file_location))
    -575
    -576        data_metadata["output_data_name"] = str(output_path.stem)
    -577        data_metadata["output_data_url"] = str(output_path)
    -578        data_metadata["has_output"] = id_label + corems_md5(output_path)
    -579
    -580        return data_metadata
    +            
    539    def get_data_metadata(self, gcms, id_label, output_path):
    +540        """Get metadata about the GC-MS data.
    +541
    +542        Parameters:
    +543        ----------
    +544        gcms : object
    +545            The low resolution GCMS object.
    +546        id_label : str
    +547            The ID label for the data.
    +548        output_path : str
    +549            The output file path.
    +550
    +551        Returns:
    +552        -------
    +553        dict
    +554            A dictionary containing the data metadata.
    +555        """
    +556        if isinstance(output_path, str):
    +557            output_path = Path(output_path)
    +558
    +559        paramaters_path = output_path.with_suffix(".json")
    +560
    +561        if paramaters_path.exists():
    +562            with paramaters_path.open() as current_param:
    +563                metadata = json.load(current_param)
    +564                data_metadata = metadata.get("Data")
    +565        else:
    +566            data_metadata = {}
    +567            data_metadata["data_name"] = []
    +568            data_metadata["input_data_url"] = []
    +569            data_metadata["has_input"] = []
    +570
    +571        data_metadata["data_name"].append(gcms.sample_name)
    +572        data_metadata["input_data_url"].append(str(gcms.file_location))
    +573        data_metadata["has_input"].append(id_label + corems_md5(gcms.file_location))
    +574
    +575        data_metadata["output_data_name"] = str(output_path.stem)
    +576        data_metadata["output_data_url"] = str(output_path)
    +577        data_metadata["has_output"] = id_label + corems_md5(output_path)
    +578
    +579        return data_metadata
     
    @@ -3518,43 +3527,43 @@

    Returns:

    -
    582    def get_parameters_json(self, gcms, id_label, output_path):
    -583        """Get the parameters as a JSON string.
    -584
    -585        Parameters:
    -586        ----------
    -587        gcms : GCMS object
    -588            The low resolution GCMS object.
    -589        id_label : str
    -590            The ID label for the data.
    -591        output_path : str
    -592            The output file path.
    -593
    -594        Returns:
    -595        -------
    -596        str
    -597            The parameters as a JSON string.
    -598        """
    -599
    -600        output_parameters_dict = {}
    -601        output_parameters_dict["Data"] = self.get_data_metadata(
    -602            gcms, id_label, output_path
    -603        )
    -604        output_parameters_dict["Stats"] = self.get_data_stats(gcms)
    -605        output_parameters_dict["Calibration"] = self.get_calibration_stats(
    -606            gcms, id_label
    -607        )
    -608        output_parameters_dict["Blank"] = self.get_blank_stats(gcms)
    -609        output_parameters_dict["Instrument"] = self.get_instrument_metadata(gcms)
    -610        corems_dict_setting = parameter_to_dict.get_dict_data_gcms(gcms)
    -611        corems_dict_setting["corems_version"] = __version__
    -612        output_parameters_dict["CoreMSParameters"] = corems_dict_setting
    -613        output_parameters_dict["has_metabolite"] = gcms.metabolites_data
    -614        output = json.dumps(
    -615            output_parameters_dict, sort_keys=False, indent=4, separators=(",", ": ")
    -616        )
    -617
    -618        return output
    +            
    581    def get_parameters_json(self, gcms, id_label, output_path):
    +582        """Get the parameters as a JSON string.
    +583
    +584        Parameters:
    +585        ----------
    +586        gcms : GCMS object
    +587            The low resolution GCMS object.
    +588        id_label : str
    +589            The ID label for the data.
    +590        output_path : str
    +591            The output file path.
    +592
    +593        Returns:
    +594        -------
    +595        str
    +596            The parameters as a JSON string.
    +597        """
    +598
    +599        output_parameters_dict = {}
    +600        output_parameters_dict["Data"] = self.get_data_metadata(
    +601            gcms, id_label, output_path
    +602        )
    +603        output_parameters_dict["Stats"] = self.get_data_stats(gcms)
    +604        output_parameters_dict["Calibration"] = self.get_calibration_stats(
    +605            gcms, id_label
    +606        )
    +607        output_parameters_dict["Blank"] = self.get_blank_stats(gcms)
    +608        output_parameters_dict["Instrument"] = self.get_instrument_metadata(gcms)
    +609        corems_dict_setting = parameter_to_dict.get_dict_data_gcms(gcms)
    +610        corems_dict_setting["corems_version"] = __version__
    +611        output_parameters_dict["CoreMSParameters"] = corems_dict_setting
    +612        output_parameters_dict["has_metabolite"] = gcms.metabolites_data
    +613        output = json.dumps(
    +614            output_parameters_dict, sort_keys=False, indent=4, separators=(",", ": ")
    +615        )
    +616
    +617        return output
     
    @@ -3588,28 +3597,28 @@

    Returns:

    -
    620    def write_settings(self, output_path, gcms, id_label="emsl:"):
    -621        """Write the settings to a JSON file.
    -622
    -623        Parameters:
    -624        ----------
    -625        output_path : str
    -626            The output file path.
    -627        gcms : GCMS object
    -628            The low resolution GCMS object.
    -629        id_label : str
    -630            The ID label for the data. Default is "emsl:".
    -631
    -632        """
    -633
    -634        output = self.get_parameters_json(gcms, id_label, output_path)
    -635
    -636        with open(
    -637            output_path.with_suffix(".json"),
    -638            "w",
    -639            encoding="utf8",
    -640        ) as outfile:
    -641            outfile.write(output)
    +            
    619    def write_settings(self, output_path, gcms, id_label="emsl:"):
    +620        """Write the settings to a JSON file.
    +621
    +622        Parameters:
    +623        ----------
    +624        output_path : str
    +625            The output file path.
    +626        gcms : GCMS object
    +627            The low resolution GCMS object.
    +628        id_label : str
    +629            The ID label for the data. Default is "emsl:".
    +630
    +631        """
    +632
    +633        output = self.get_parameters_json(gcms, id_label, output_path)
    +634
    +635        with open(
    +636            output_path.with_suffix(".json"),
    +637            "w",
    +638            encoding="utf8",
    +639        ) as outfile:
    +640            outfile.write(output)
     
    @@ -3638,142 +3647,142 @@

    Parameters:

    -
    643    def get_list_dict_data(self, gcms, include_no_match=True, no_match_inline=False):
    -644        """Get the exported data as a list of dictionaries.
    -645
    -646        Parameters:
    -647        ----------
    -648        gcms : object
    -649            The low resolution GCMS object.
    -650        include_no_match : bool, optional
    -651            Whether to include no match data. Default is True.
    -652        no_match_inline : bool, optional
    -653            Whether to include no match data inline. Default is False.
    -654
    -655        Returns:
    -656        -------
    -657        list
    -658            The exported data as a list of dictionaries.
    -659        """
    -660
    -661        output_score_method = gcms.molecular_search_settings.output_score_method
    -662
    -663        dict_data_list = []
    -664
    -665        def add_match_dict_data():
    -666            derivatization = "{}:{}:{}".format(
    -667                compound_obj.classify,
    -668                compound_obj.derivativenum,
    -669                compound_obj.derivatization,
    -670            )
    -671            out_dict = {
    -672                "Sample name": gcms.sample_name,
    -673                "Peak Index": gcpeak_index,
    -674                "Retention Time": gc_peak.retention_time,
    -675                "Retention Time Ref": compound_obj.retention_time,
    -676                "Peak Height": gc_peak.tic,
    -677                "Peak Area": gc_peak.area,
    -678                "Retention index": gc_peak.ri,
    -679                "Retention index Ref": compound_obj.ri,
    -680                "Retention Index Score": compound_obj.ri_score,
    -681                "Spectral Similarity Score": compound_obj.spectral_similarity_score,
    -682                "Similarity Score": compound_obj.similarity_score,
    -683                "Compound Name": compound_obj.name,
    -684                "Chebi ID": compound_obj.metadata.chebi,
    -685                "Kegg Compound ID": compound_obj.metadata.kegg,
    -686                "Inchi": compound_obj.metadata.inchi,
    -687                "Inchi Key": compound_obj.metadata.inchikey,
    -688                "Smiles": compound_obj.metadata.smiles,
    -689                "Molecular Formula": compound_obj.formula,
    -690                "IUPAC Name": compound_obj.metadata.iupac_name,
    -691                "Traditional Name": compound_obj.metadata.traditional_name,
    -692                "Common Name": compound_obj.metadata.common_name,
    -693                "Derivatization": derivatization,
    -694            }
    -695
    -696            if self.gcms.molecular_search_settings.exploratory_mode:
    -697                out_dict.update(
    -698                    {
    -699                        "Weighted Cosine Correlation": compound_obj.spectral_similarity_scores.get(
    -700                            "weighted_cosine_correlation"
    -701                        ),
    -702                        "Cosine Correlation": compound_obj.spectral_similarity_scores.get(
    -703                            "cosine_correlation"
    -704                        ),
    -705                        "Stein Scott Similarity": compound_obj.spectral_similarity_scores.get(
    -706                            "stein_scott_similarity"
    -707                        ),
    -708                        "Pearson Correlation": compound_obj.spectral_similarity_scores.get(
    -709                            "pearson_correlation"
    -710                        ),
    -711                        "Spearman Correlation": compound_obj.spectral_similarity_scores.get(
    -712                            "spearman_correlation"
    -713                        ),
    -714                        "Kendall Tau Correlation": compound_obj.spectral_similarity_scores.get(
    -715                            "kendall_tau_correlation"
    -716                        ),
    -717                        "DFT Correlation": compound_obj.spectral_similarity_scores.get(
    -718                            "dft_correlation"
    -719                        ),
    -720                        "DWT Correlation": compound_obj.spectral_similarity_scores.get(
    -721                            "dwt_correlation"
    -722                        ),
    -723                        "Euclidean Distance": compound_obj.spectral_similarity_scores.get(
    -724                            "euclidean_distance"
    -725                        ),
    -726                        "Manhattan Distance": compound_obj.spectral_similarity_scores.get(
    -727                            "manhattan_distance"
    -728                        ),
    -729                        "Jaccard Distance": compound_obj.spectral_similarity_scores.get(
    -730                            "jaccard_distance"
    -731                        ),
    -732                    }
    -733                )
    -734                for method in methods_name:
    -735                    out_dict[methods_name.get(method)] = (
    -736                        compound_obj.spectral_similarity_scores.get(method)
    -737                    )
    -738
    -739            dict_data_list.append(out_dict)
    -740
    -741        def add_no_match_dict_data():
    -742            dict_data_list.append(
    -743                {
    -744                    "Sample name": gcms.sample_name,
    -745                    "Peak Index": gcpeak_index,
    -746                    "Retention Time": gc_peak.retention_time,
    -747                    "Peak Height": gc_peak.tic,
    -748                    "Peak Area": gc_peak.area,
    -749                    "Retention index": gc_peak.ri,
    -750                }
    -751            )
    -752
    -753        for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks):
    -754            # check if there is a compound candidate
    -755            if gc_peak:
    -756                if output_score_method == "highest_sim_score":
    -757                    compound_obj = gc_peak.highest_score_compound
    -758                    add_match_dict_data()
    -759
    -760                elif output_score_method == "highest_ss":
    -761                    compound_obj = gc_peak.highest_ss_compound
    -762                    add_match_dict_data()
    -763
    -764                else:
    -765                    for compound_obj in gc_peak:
    -766                        add_match_dict_data()  # add monoisotopic peak
    -767
    -768            else:
    -769                # include not_match
    -770                if include_no_match and no_match_inline:
    -771                    add_no_match_dict_data()
    -772
    -773        if include_no_match and not no_match_inline:
    -774            for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks):
    -775                if not gc_peak:
    -776                    add_no_match_dict_data()
    -777
    -778        return dict_data_list
    +            
    642    def get_list_dict_data(self, gcms, include_no_match=True, no_match_inline=False):
    +643        """Get the exported data as a list of dictionaries.
    +644
    +645        Parameters:
    +646        ----------
    +647        gcms : object
    +648            The low resolution GCMS object.
    +649        include_no_match : bool, optional
    +650            Whether to include no match data. Default is True.
    +651        no_match_inline : bool, optional
    +652            Whether to include no match data inline. Default is False.
    +653
    +654        Returns:
    +655        -------
    +656        list
    +657            The exported data as a list of dictionaries.
    +658        """
    +659
    +660        output_score_method = gcms.molecular_search_settings.output_score_method
    +661
    +662        dict_data_list = []
    +663
    +664        def add_match_dict_data():
    +665            derivatization = "{}:{}:{}".format(
    +666                compound_obj.classify,
    +667                compound_obj.derivativenum,
    +668                compound_obj.derivatization,
    +669            )
    +670            out_dict = {
    +671                "Sample name": gcms.sample_name,
    +672                "Peak Index": gcpeak_index,
    +673                "Retention Time": gc_peak.retention_time,
    +674                "Retention Time Ref": compound_obj.retention_time,
    +675                "Peak Height": gc_peak.tic,
    +676                "Peak Area": gc_peak.area,
    +677                "Retention index": gc_peak.ri,
    +678                "Retention index Ref": compound_obj.ri,
    +679                "Retention Index Score": compound_obj.ri_score,
    +680                "Spectral Similarity Score": compound_obj.spectral_similarity_score,
    +681                "Similarity Score": compound_obj.similarity_score,
    +682                "Compound Name": compound_obj.name,
    +683                "Chebi ID": compound_obj.metadata.chebi,
    +684                "Kegg Compound ID": compound_obj.metadata.kegg,
    +685                "Inchi": compound_obj.metadata.inchi,
    +686                "Inchi Key": compound_obj.metadata.inchikey,
    +687                "Smiles": compound_obj.metadata.smiles,
    +688                "Molecular Formula": compound_obj.formula,
    +689                "IUPAC Name": compound_obj.metadata.iupac_name,
    +690                "Traditional Name": compound_obj.metadata.traditional_name,
    +691                "Common Name": compound_obj.metadata.common_name,
    +692                "Derivatization": derivatization,
    +693            }
    +694
    +695            if self.gcms.molecular_search_settings.exploratory_mode:
    +696                out_dict.update(
    +697                    {
    +698                        "Weighted Cosine Correlation": compound_obj.spectral_similarity_scores.get(
    +699                            "weighted_cosine_correlation"
    +700                        ),
    +701                        "Cosine Correlation": compound_obj.spectral_similarity_scores.get(
    +702                            "cosine_correlation"
    +703                        ),
    +704                        "Stein Scott Similarity": compound_obj.spectral_similarity_scores.get(
    +705                            "stein_scott_similarity"
    +706                        ),
    +707                        "Pearson Correlation": compound_obj.spectral_similarity_scores.get(
    +708                            "pearson_correlation"
    +709                        ),
    +710                        "Spearman Correlation": compound_obj.spectral_similarity_scores.get(
    +711                            "spearman_correlation"
    +712                        ),
    +713                        "Kendall Tau Correlation": compound_obj.spectral_similarity_scores.get(
    +714                            "kendall_tau_correlation"
    +715                        ),
    +716                        "DFT Correlation": compound_obj.spectral_similarity_scores.get(
    +717                            "dft_correlation"
    +718                        ),
    +719                        "DWT Correlation": compound_obj.spectral_similarity_scores.get(
    +720                            "dwt_correlation"
    +721                        ),
    +722                        "Euclidean Distance": compound_obj.spectral_similarity_scores.get(
    +723                            "euclidean_distance"
    +724                        ),
    +725                        "Manhattan Distance": compound_obj.spectral_similarity_scores.get(
    +726                            "manhattan_distance"
    +727                        ),
    +728                        "Jaccard Distance": compound_obj.spectral_similarity_scores.get(
    +729                            "jaccard_distance"
    +730                        ),
    +731                    }
    +732                )
    +733                for method in methods_name:
    +734                    out_dict[methods_name.get(method)] = (
    +735                        compound_obj.spectral_similarity_scores.get(method)
    +736                    )
    +737
    +738            dict_data_list.append(out_dict)
    +739
    +740        def add_no_match_dict_data():
    +741            dict_data_list.append(
    +742                {
    +743                    "Sample name": gcms.sample_name,
    +744                    "Peak Index": gcpeak_index,
    +745                    "Retention Time": gc_peak.retention_time,
    +746                    "Peak Height": gc_peak.tic,
    +747                    "Peak Area": gc_peak.area,
    +748                    "Retention index": gc_peak.ri,
    +749                }
    +750            )
    +751
    +752        for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks):
    +753            # check if there is a compound candidate
    +754            if gc_peak:
    +755                if output_score_method == "highest_sim_score":
    +756                    compound_obj = gc_peak.highest_score_compound
    +757                    add_match_dict_data()
    +758
    +759                elif output_score_method == "highest_ss":
    +760                    compound_obj = gc_peak.highest_ss_compound
    +761                    add_match_dict_data()
    +762
    +763                else:
    +764                    for compound_obj in gc_peak:
    +765                        add_match_dict_data()  # add monoisotopic peak
    +766
    +767            else:
    +768                # include not_match
    +769                if include_no_match and no_match_inline:
    +770                    add_no_match_dict_data()
    +771
    +772        if include_no_match and not no_match_inline:
    +773            for gcpeak_index, gc_peak in enumerate(gcms.sorted_gcpeaks):
    +774                if not gc_peak:
    +775                    add_no_match_dict_data()
    +776
    +777        return dict_data_list
     
    @@ -3808,227 +3817,233 @@

    Returns:

    -
     781class HighResMassSpectraExport(HighResMassSpecExport):
    - 782    """A class to export high resolution mass spectra data.
    - 783
    - 784    This class provides methods to export high resolution mass spectra data to various formats
    - 785    such as Excel, CSV, HDF5, and Pandas DataFrame.
    - 786
    - 787    Parameters
    - 788    ----------
    - 789    out_file_path : str | Path
    - 790        The output file path.
    - 791    mass_spectra : object
    - 792        The high resolution mass spectra object.
    - 793    output_type : str, optional
    - 794        The output type. Default is 'excel'.
    - 795
    - 796    Attributes
    - 797    ----------
    - 798    output_file : Path
    - 799        The output file path without suffix
    - 800    dir_loc : Path
    - 801        The directory location for the output file,
    - 802        by default this will be the output_file + ".corems" and all output files will be
    - 803        written into this location
    - 804    mass_spectra : MassSpectraBase
    - 805        The high resolution mass spectra object.
    - 806    """
    - 807
    - 808    def __init__(self, out_file_path, mass_spectra, output_type="excel"):
    - 809        super().__init__(
    - 810            out_file_path=out_file_path, mass_spectrum=None, output_type=output_type
    - 811        )
    - 812
    - 813        self.dir_loc = Path(out_file_path + ".corems")
    - 814        self.dir_loc.mkdir(exist_ok=True)
    - 815        # Place the output file in the directory
    - 816        self.output_file = self.dir_loc / Path(out_file_path).name
    - 817        self._output_type = output_type  # 'excel', 'csv', 'pandas' or 'hdf5'
    - 818        self.mass_spectra = mass_spectra
    - 819        self.atoms_order_list = None
    - 820        self._init_columns()
    - 821
    - 822    def get_pandas_df(self):
    - 823        """Get the mass spectra as a list of Pandas DataFrames."""
    - 824
    - 825        list_df = []
    - 826
    - 827        for mass_spectrum in self.mass_spectra:
    - 828            columns = self.columns_label + self.get_all_used_atoms_in_order(
    - 829                mass_spectrum
    - 830            )
    - 831
    - 832            dict_data_list = self.get_list_dict_data(mass_spectrum)
    - 833
    - 834            df = DataFrame(dict_data_list, columns=columns)
    - 835
    - 836            scan_number = mass_spectrum.scan_number
    - 837
    - 838            df.name = str(self.output_file) + "_" + str(scan_number)
    - 839
    - 840            list_df.append(df)
    - 841
    - 842        return list_df
    - 843
    - 844    def to_pandas(self, write_metadata=True):
    - 845        """Export the data to a Pandas DataFrame and save it as a pickle file.
    - 846
    - 847        Parameters:
    - 848        ----------
    - 849        write_metadata : bool, optional
    - 850            Whether to write metadata to the output file. Default is True.
    - 851        """
    - 852
    - 853        for mass_spectrum in self.mass_spectra:
    - 854            columns = self.columns_label + self.get_all_used_atoms_in_order(
    - 855                mass_spectrum
    - 856            )
    - 857
    - 858            dict_data_list = self.get_list_dict_data(mass_spectrum)
    - 859
    - 860            df = DataFrame(dict_data_list, columns=columns)
    - 861
    - 862            scan_number = mass_spectrum.scan_number
    - 863
    - 864            out_filename = Path(
    - 865                "%s_scan%s%s" % (self.output_file, str(scan_number), ".pkl")
    - 866            )
    - 867
    - 868            df.to_pickle(self.dir_loc / out_filename)
    - 869
    - 870            if write_metadata:
    - 871                self.write_settings(
    - 872                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    - 873                )
    - 874
    - 875    def to_excel(self, write_metadata=True):
    - 876        """Export the data to an Excel file.
    - 877
    - 878        Parameters:
    - 879        ----------
    - 880        write_metadata : bool, optional
    - 881            Whether to write metadata to the output file. Default is True.
    - 882        """
    - 883        for mass_spectrum in self.mass_spectra:
    - 884            columns = self.columns_label + self.get_all_used_atoms_in_order(
    - 885                mass_spectrum
    - 886            )
    - 887
    - 888            dict_data_list = self.get_list_dict_data(mass_spectrum)
    - 889
    - 890            df = DataFrame(dict_data_list, columns=columns)
    - 891
    - 892            scan_number = mass_spectrum.scan_number
    - 893
    - 894            out_filename = Path(
    - 895                "%s_scan%s%s" % (self.output_file, str(scan_number), ".xlsx")
    - 896            )
    - 897
    - 898            df.to_excel(self.dir_loc / out_filename)
    - 899
    - 900            if write_metadata:
    - 901                self.write_settings(
    - 902                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    - 903                )
    - 904
    - 905    def to_csv(self, write_metadata=True):
    - 906        """Export the data to a CSV file.
    - 907
    - 908        Parameters:
    - 909        ----------
    - 910        write_metadata : bool, optional
    - 911            Whether to write metadata to the output file. Default is True.
    - 912        """
    - 913        import csv
    - 914
    - 915        for mass_spectrum in self.mass_spectra:
    - 916            columns = self.columns_label + self.get_all_used_atoms_in_order(
    - 917                mass_spectrum
    - 918            )
    - 919
    - 920            scan_number = mass_spectrum.scan_number
    - 921
    - 922            dict_data_list = self.get_list_dict_data(mass_spectrum)
    - 923
    - 924            out_filename = Path(
    - 925                "%s_scan%s%s" % (self.output_file, str(scan_number), ".csv")
    - 926            )
    - 927
    - 928            with open(self.dir_loc / out_filename, "w", newline="") as csvfile:
    - 929                writer = csv.DictWriter(csvfile, fieldnames=columns)
    - 930                writer.writeheader()
    - 931                for data in dict_data_list:
    - 932                    writer.writerow(data)
    - 933
    - 934            if write_metadata:
    - 935                self.write_settings(
    - 936                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    - 937                )
    - 938
    - 939    def get_mass_spectra_attrs(self):
    - 940        """Get the mass spectra attributes as a JSON string.
    - 941
    - 942        Parameters:
    - 943        ----------
    - 944        mass_spectra : object
    - 945            The high resolution mass spectra object.
    - 946
    - 947        Returns:
    - 948        -------
    - 949        str
    - 950            The mass spectra attributes as a JSON string.
    - 951        """
    - 952        dict_ms_attrs = {}
    - 953        dict_ms_attrs["analyzer"] = self.mass_spectra.analyzer
    - 954        dict_ms_attrs["instrument_label"] = self.mass_spectra.instrument_label
    - 955        dict_ms_attrs["sample_name"] = self.mass_spectra.sample_name
    - 956
    - 957        return json.dumps(
    - 958            dict_ms_attrs, sort_keys=False, indent=4, separators=(",", ": ")
    - 959        )
    - 960
    - 961    def to_hdf(self, overwrite=False, export_raw=True):
    - 962        """Export the data to an HDF5 file.
    - 963        
    - 964        Parameters
    - 965        ----------
    - 966        overwrite : bool, optional
    - 967            Whether to overwrite the output file. Default is False.
    - 968        export_raw : bool, optional
    - 969            Whether to export the raw mass spectra data. Default is True.
    - 970        """
    - 971        if overwrite:
    - 972            if self.output_file.with_suffix(".hdf5").exists():
    - 973                self.output_file.with_suffix(".hdf5").unlink()
    - 974
    - 975        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    - 976            if not hdf_handle.attrs.get("date_utc"):
    - 977                # Set metadata for all mass spectra
    - 978                timenow = str(
    - 979                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
    - 980                )
    - 981                hdf_handle.attrs["date_utc"] = timenow
    - 982                hdf_handle.attrs["filename"] = self.mass_spectra.file_location.name
    - 983                hdf_handle.attrs["data_structure"] = "mass_spectra"
    - 984                hdf_handle.attrs["analyzer"] = self.mass_spectra.analyzer
    - 985                hdf_handle.attrs["instrument_label"] = (
    - 986                    self.mass_spectra.instrument_label
    - 987                )
    - 988                hdf_handle.attrs["sample_name"] = self.mass_spectra.sample_name
    - 989                hdf_handle.attrs["polarity"] = self.mass_spectra.polarity
    - 990                hdf_handle.attrs["parser_type"] = self.mass_spectra.spectra_parser_class.__name__
    - 991                hdf_handle.attrs["original_file_location"] = self.mass_spectra.file_location._str
    - 992
    - 993            if "mass_spectra" not in hdf_handle:
    - 994                mass_spectra_group = hdf_handle.create_group("mass_spectra")
    - 995            else:
    - 996                mass_spectra_group = hdf_handle.get("mass_spectra")
    - 997
    - 998            for mass_spectrum in self.mass_spectra:
    - 999                group_key = str(int(mass_spectrum.scan_number))
    +            
     780class HighResMassSpectraExport(HighResMassSpecExport):
    + 781    """A class to export high resolution mass spectra data.
    + 782
    + 783    This class provides methods to export high resolution mass spectra data to various formats
    + 784    such as Excel, CSV, HDF5, and Pandas DataFrame.
    + 785
    + 786    Parameters
    + 787    ----------
    + 788    out_file_path : str | Path
    + 789        The output file path.
    + 790    mass_spectra : object
    + 791        The high resolution mass spectra object.
    + 792    output_type : str, optional
    + 793        The output type. Default is 'excel'.
    + 794
    + 795    Attributes
    + 796    ----------
    + 797    output_file : Path
    + 798        The output file path without suffix
    + 799    dir_loc : Path
    + 800        The directory location for the output file,
    + 801        by default this will be the output_file + ".corems" and all output files will be
    + 802        written into this location
    + 803    mass_spectra : MassSpectraBase
    + 804        The high resolution mass spectra object.
    + 805    """
    + 806
    + 807    def __init__(self, out_file_path, mass_spectra, output_type="excel"):
    + 808        super().__init__(
    + 809            out_file_path=out_file_path, mass_spectrum=None, output_type=output_type
    + 810        )
    + 811
    + 812        self.dir_loc = Path(out_file_path + ".corems")
    + 813        self.dir_loc.mkdir(exist_ok=True)
    + 814        # Place the output file in the directory
    + 815        self.output_file = self.dir_loc / Path(out_file_path).name
    + 816        self._output_type = output_type  # 'excel', 'csv', 'pandas' or 'hdf5'
    + 817        self.mass_spectra = mass_spectra
    + 818        self.atoms_order_list = None
    + 819        self._init_columns()
    + 820
    + 821    def get_pandas_df(self):
    + 822        """Get the mass spectra as a list of Pandas DataFrames."""
    + 823
    + 824        list_df = []
    + 825
    + 826        for mass_spectrum in self.mass_spectra:
    + 827            columns = self.columns_label + self.get_all_used_atoms_in_order(
    + 828                mass_spectrum
    + 829            )
    + 830
    + 831            dict_data_list = self.get_list_dict_data(mass_spectrum)
    + 832
    + 833            df = DataFrame(dict_data_list, columns=columns)
    + 834
    + 835            scan_number = mass_spectrum.scan_number
    + 836
    + 837            df.name = str(self.output_file) + "_" + str(scan_number)
    + 838
    + 839            list_df.append(df)
    + 840
    + 841        return list_df
    + 842
    + 843    def to_pandas(self, write_metadata=True):
    + 844        """Export the data to a Pandas DataFrame and save it as a pickle file.
    + 845
    + 846        Parameters:
    + 847        ----------
    + 848        write_metadata : bool, optional
    + 849            Whether to write metadata to the output file. Default is True.
    + 850        """
    + 851
    + 852        for mass_spectrum in self.mass_spectra:
    + 853            columns = self.columns_label + self.get_all_used_atoms_in_order(
    + 854                mass_spectrum
    + 855            )
    + 856
    + 857            dict_data_list = self.get_list_dict_data(mass_spectrum)
    + 858
    + 859            df = DataFrame(dict_data_list, columns=columns)
    + 860
    + 861            scan_number = mass_spectrum.scan_number
    + 862
    + 863            out_filename = Path(
    + 864                "%s_scan%s%s" % (self.output_file, str(scan_number), ".pkl")
    + 865            )
    + 866
    + 867            df.to_pickle(self.dir_loc / out_filename)
    + 868
    + 869            if write_metadata:
    + 870                self.write_settings(
    + 871                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    + 872                )
    + 873
    + 874    def to_excel(self, write_metadata=True):
    + 875        """Export the data to an Excel file.
    + 876
    + 877        Parameters:
    + 878        ----------
    + 879        write_metadata : bool, optional
    + 880            Whether to write metadata to the output file. Default is True.
    + 881        """
    + 882        for mass_spectrum in self.mass_spectra:
    + 883            columns = self.columns_label + self.get_all_used_atoms_in_order(
    + 884                mass_spectrum
    + 885            )
    + 886
    + 887            dict_data_list = self.get_list_dict_data(mass_spectrum)
    + 888
    + 889            df = DataFrame(dict_data_list, columns=columns)
    + 890
    + 891            scan_number = mass_spectrum.scan_number
    + 892
    + 893            out_filename = Path(
    + 894                "%s_scan%s%s" % (self.output_file, str(scan_number), ".xlsx")
    + 895            )
    + 896
    + 897            df.to_excel(self.dir_loc / out_filename)
    + 898
    + 899            if write_metadata:
    + 900                self.write_settings(
    + 901                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    + 902                )
    + 903
    + 904    def to_csv(self, write_metadata=True):
    + 905        """Export the data to a CSV file.
    + 906
    + 907        Parameters:
    + 908        ----------
    + 909        write_metadata : bool, optional
    + 910            Whether to write metadata to the output file. Default is True.
    + 911        """
    + 912        import csv
    + 913
    + 914        for mass_spectrum in self.mass_spectra:
    + 915            columns = self.columns_label + self.get_all_used_atoms_in_order(
    + 916                mass_spectrum
    + 917            )
    + 918
    + 919            scan_number = mass_spectrum.scan_number
    + 920
    + 921            dict_data_list = self.get_list_dict_data(mass_spectrum)
    + 922
    + 923            out_filename = Path(
    + 924                "%s_scan%s%s" % (self.output_file, str(scan_number), ".csv")
    + 925            )
    + 926
    + 927            with open(self.dir_loc / out_filename, "w", newline="") as csvfile:
    + 928                writer = csv.DictWriter(csvfile, fieldnames=columns)
    + 929                writer.writeheader()
    + 930                for data in dict_data_list:
    + 931                    writer.writerow(data)
    + 932
    + 933            if write_metadata:
    + 934                self.write_settings(
    + 935                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    + 936                )
    + 937
    + 938    def get_mass_spectra_attrs(self):
    + 939        """Get the mass spectra attributes as a JSON string.
    + 940
    + 941        Parameters:
    + 942        ----------
    + 943        mass_spectra : object
    + 944            The high resolution mass spectra object.
    + 945
    + 946        Returns:
    + 947        -------
    + 948        str
    + 949            The mass spectra attributes as a JSON string.
    + 950        """
    + 951        dict_ms_attrs = {}
    + 952        dict_ms_attrs["analyzer"] = self.mass_spectra.analyzer
    + 953        dict_ms_attrs["instrument_label"] = self.mass_spectra.instrument_label
    + 954        dict_ms_attrs["sample_name"] = self.mass_spectra.sample_name
    + 955
    + 956        return json.dumps(
    + 957            dict_ms_attrs, sort_keys=False, indent=4, separators=(",", ": ")
    + 958        )
    + 959
    + 960    def to_hdf(self, overwrite=False, export_raw=True):
    + 961        """Export the data to an HDF5 file.
    + 962
    + 963        Parameters
    + 964        ----------
    + 965        overwrite : bool, optional
    + 966            Whether to overwrite the output file. Default is False.
    + 967        export_raw : bool, optional
    + 968            Whether to export the raw mass spectra data. Default is True.
    + 969        """
    + 970        if overwrite:
    + 971            if self.output_file.with_suffix(".hdf5").exists():
    + 972                self.output_file.with_suffix(".hdf5").unlink()
    + 973
    + 974        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    + 975            if not hdf_handle.attrs.get("date_utc"):
    + 976                # Set metadata for all mass spectra
    + 977                timenow = str(
    + 978                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
    + 979                )
    + 980                hdf_handle.attrs["date_utc"] = timenow
    + 981                hdf_handle.attrs["filename"] = self.mass_spectra.file_location.name
    + 982                hdf_handle.attrs["data_structure"] = "mass_spectra"
    + 983                hdf_handle.attrs["analyzer"] = self.mass_spectra.analyzer
    + 984                hdf_handle.attrs["instrument_label"] = (
    + 985                    self.mass_spectra.instrument_label
    + 986                )
    + 987                hdf_handle.attrs["sample_name"] = self.mass_spectra.sample_name
    + 988                hdf_handle.attrs["polarity"] = self.mass_spectra.polarity
    + 989                hdf_handle.attrs["parser_type"] = (
    + 990                    self.mass_spectra.spectra_parser_class.__name__
    + 991                )
    + 992                hdf_handle.attrs["original_file_location"] = (
    + 993                    self.mass_spectra.file_location._str
    + 994                )
    + 995
    + 996            if "mass_spectra" not in hdf_handle:
    + 997                mass_spectra_group = hdf_handle.create_group("mass_spectra")
    + 998            else:
    + 999                mass_spectra_group = hdf_handle.get("mass_spectra")
     1000
    -1001                self.add_mass_spectrum_to_hdf5(hdf_handle, mass_spectrum, group_key, mass_spectra_group, export_raw)
    +1001            for mass_spectrum in self.mass_spectra:
    +1002                group_key = str(int(mass_spectrum.scan_number))
    +1003
    +1004                self.add_mass_spectrum_to_hdf5(
    +1005                    hdf_handle, mass_spectrum, group_key, mass_spectra_group, export_raw
    +1006                )
     
    @@ -4073,19 +4088,19 @@
    Attributes
    -
    808    def __init__(self, out_file_path, mass_spectra, output_type="excel"):
    -809        super().__init__(
    -810            out_file_path=out_file_path, mass_spectrum=None, output_type=output_type
    -811        )
    -812
    -813        self.dir_loc = Path(out_file_path + ".corems")
    -814        self.dir_loc.mkdir(exist_ok=True)
    -815        # Place the output file in the directory
    -816        self.output_file = self.dir_loc / Path(out_file_path).name
    -817        self._output_type = output_type  # 'excel', 'csv', 'pandas' or 'hdf5'
    -818        self.mass_spectra = mass_spectra
    -819        self.atoms_order_list = None
    -820        self._init_columns()
    +            
    807    def __init__(self, out_file_path, mass_spectra, output_type="excel"):
    +808        super().__init__(
    +809            out_file_path=out_file_path, mass_spectrum=None, output_type=output_type
    +810        )
    +811
    +812        self.dir_loc = Path(out_file_path + ".corems")
    +813        self.dir_loc.mkdir(exist_ok=True)
    +814        # Place the output file in the directory
    +815        self.output_file = self.dir_loc / Path(out_file_path).name
    +816        self._output_type = output_type  # 'excel', 'csv', 'pandas' or 'hdf5'
    +817        self.mass_spectra = mass_spectra
    +818        self.atoms_order_list = None
    +819        self._init_columns()
     
    @@ -4167,27 +4182,27 @@
    Attributes
    -
    822    def get_pandas_df(self):
    -823        """Get the mass spectra as a list of Pandas DataFrames."""
    -824
    -825        list_df = []
    -826
    -827        for mass_spectrum in self.mass_spectra:
    -828            columns = self.columns_label + self.get_all_used_atoms_in_order(
    -829                mass_spectrum
    -830            )
    -831
    -832            dict_data_list = self.get_list_dict_data(mass_spectrum)
    -833
    -834            df = DataFrame(dict_data_list, columns=columns)
    -835
    -836            scan_number = mass_spectrum.scan_number
    -837
    -838            df.name = str(self.output_file) + "_" + str(scan_number)
    -839
    -840            list_df.append(df)
    -841
    -842        return list_df
    +            
    821    def get_pandas_df(self):
    +822        """Get the mass spectra as a list of Pandas DataFrames."""
    +823
    +824        list_df = []
    +825
    +826        for mass_spectrum in self.mass_spectra:
    +827            columns = self.columns_label + self.get_all_used_atoms_in_order(
    +828                mass_spectrum
    +829            )
    +830
    +831            dict_data_list = self.get_list_dict_data(mass_spectrum)
    +832
    +833            df = DataFrame(dict_data_list, columns=columns)
    +834
    +835            scan_number = mass_spectrum.scan_number
    +836
    +837            df.name = str(self.output_file) + "_" + str(scan_number)
    +838
    +839            list_df.append(df)
    +840
    +841        return list_df
     
    @@ -4207,36 +4222,36 @@
    Attributes
    -
    844    def to_pandas(self, write_metadata=True):
    -845        """Export the data to a Pandas DataFrame and save it as a pickle file.
    -846
    -847        Parameters:
    -848        ----------
    -849        write_metadata : bool, optional
    -850            Whether to write metadata to the output file. Default is True.
    -851        """
    -852
    -853        for mass_spectrum in self.mass_spectra:
    -854            columns = self.columns_label + self.get_all_used_atoms_in_order(
    -855                mass_spectrum
    -856            )
    -857
    -858            dict_data_list = self.get_list_dict_data(mass_spectrum)
    -859
    -860            df = DataFrame(dict_data_list, columns=columns)
    -861
    -862            scan_number = mass_spectrum.scan_number
    -863
    -864            out_filename = Path(
    -865                "%s_scan%s%s" % (self.output_file, str(scan_number), ".pkl")
    -866            )
    -867
    -868            df.to_pickle(self.dir_loc / out_filename)
    -869
    -870            if write_metadata:
    -871                self.write_settings(
    -872                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    -873                )
    +            
    843    def to_pandas(self, write_metadata=True):
    +844        """Export the data to a Pandas DataFrame and save it as a pickle file.
    +845
    +846        Parameters:
    +847        ----------
    +848        write_metadata : bool, optional
    +849            Whether to write metadata to the output file. Default is True.
    +850        """
    +851
    +852        for mass_spectrum in self.mass_spectra:
    +853            columns = self.columns_label + self.get_all_used_atoms_in_order(
    +854                mass_spectrum
    +855            )
    +856
    +857            dict_data_list = self.get_list_dict_data(mass_spectrum)
    +858
    +859            df = DataFrame(dict_data_list, columns=columns)
    +860
    +861            scan_number = mass_spectrum.scan_number
    +862
    +863            out_filename = Path(
    +864                "%s_scan%s%s" % (self.output_file, str(scan_number), ".pkl")
    +865            )
    +866
    +867            df.to_pickle(self.dir_loc / out_filename)
    +868
    +869            if write_metadata:
    +870                self.write_settings(
    +871                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    +872                )
     
    @@ -4261,35 +4276,35 @@

    Parameters:

    -
    875    def to_excel(self, write_metadata=True):
    -876        """Export the data to an Excel file.
    -877
    -878        Parameters:
    -879        ----------
    -880        write_metadata : bool, optional
    -881            Whether to write metadata to the output file. Default is True.
    -882        """
    -883        for mass_spectrum in self.mass_spectra:
    -884            columns = self.columns_label + self.get_all_used_atoms_in_order(
    -885                mass_spectrum
    -886            )
    -887
    -888            dict_data_list = self.get_list_dict_data(mass_spectrum)
    -889
    -890            df = DataFrame(dict_data_list, columns=columns)
    -891
    -892            scan_number = mass_spectrum.scan_number
    -893
    -894            out_filename = Path(
    -895                "%s_scan%s%s" % (self.output_file, str(scan_number), ".xlsx")
    -896            )
    -897
    -898            df.to_excel(self.dir_loc / out_filename)
    -899
    -900            if write_metadata:
    -901                self.write_settings(
    -902                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    -903                )
    +            
    874    def to_excel(self, write_metadata=True):
    +875        """Export the data to an Excel file.
    +876
    +877        Parameters:
    +878        ----------
    +879        write_metadata : bool, optional
    +880            Whether to write metadata to the output file. Default is True.
    +881        """
    +882        for mass_spectrum in self.mass_spectra:
    +883            columns = self.columns_label + self.get_all_used_atoms_in_order(
    +884                mass_spectrum
    +885            )
    +886
    +887            dict_data_list = self.get_list_dict_data(mass_spectrum)
    +888
    +889            df = DataFrame(dict_data_list, columns=columns)
    +890
    +891            scan_number = mass_spectrum.scan_number
    +892
    +893            out_filename = Path(
    +894                "%s_scan%s%s" % (self.output_file, str(scan_number), ".xlsx")
    +895            )
    +896
    +897            df.to_excel(self.dir_loc / out_filename)
    +898
    +899            if write_metadata:
    +900                self.write_settings(
    +901                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    +902                )
     
    @@ -4314,39 +4329,39 @@

    Parameters:

    -
    905    def to_csv(self, write_metadata=True):
    -906        """Export the data to a CSV file.
    -907
    -908        Parameters:
    -909        ----------
    -910        write_metadata : bool, optional
    -911            Whether to write metadata to the output file. Default is True.
    -912        """
    -913        import csv
    -914
    -915        for mass_spectrum in self.mass_spectra:
    -916            columns = self.columns_label + self.get_all_used_atoms_in_order(
    -917                mass_spectrum
    -918            )
    -919
    -920            scan_number = mass_spectrum.scan_number
    -921
    -922            dict_data_list = self.get_list_dict_data(mass_spectrum)
    -923
    -924            out_filename = Path(
    -925                "%s_scan%s%s" % (self.output_file, str(scan_number), ".csv")
    -926            )
    -927
    -928            with open(self.dir_loc / out_filename, "w", newline="") as csvfile:
    -929                writer = csv.DictWriter(csvfile, fieldnames=columns)
    -930                writer.writeheader()
    -931                for data in dict_data_list:
    -932                    writer.writerow(data)
    -933
    -934            if write_metadata:
    -935                self.write_settings(
    -936                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    -937                )
    +            
    904    def to_csv(self, write_metadata=True):
    +905        """Export the data to a CSV file.
    +906
    +907        Parameters:
    +908        ----------
    +909        write_metadata : bool, optional
    +910            Whether to write metadata to the output file. Default is True.
    +911        """
    +912        import csv
    +913
    +914        for mass_spectrum in self.mass_spectra:
    +915            columns = self.columns_label + self.get_all_used_atoms_in_order(
    +916                mass_spectrum
    +917            )
    +918
    +919            scan_number = mass_spectrum.scan_number
    +920
    +921            dict_data_list = self.get_list_dict_data(mass_spectrum)
    +922
    +923            out_filename = Path(
    +924                "%s_scan%s%s" % (self.output_file, str(scan_number), ".csv")
    +925            )
    +926
    +927            with open(self.dir_loc / out_filename, "w", newline="") as csvfile:
    +928                writer = csv.DictWriter(csvfile, fieldnames=columns)
    +929                writer.writeheader()
    +930                for data in dict_data_list:
    +931                    writer.writerow(data)
    +932
    +933            if write_metadata:
    +934                self.write_settings(
    +935                    self.dir_loc / out_filename.with_suffix(""), mass_spectrum
    +936                )
     
    @@ -4371,27 +4386,27 @@

    Parameters:

    -
    939    def get_mass_spectra_attrs(self):
    -940        """Get the mass spectra attributes as a JSON string.
    -941
    -942        Parameters:
    -943        ----------
    -944        mass_spectra : object
    -945            The high resolution mass spectra object.
    -946
    -947        Returns:
    -948        -------
    -949        str
    -950            The mass spectra attributes as a JSON string.
    -951        """
    -952        dict_ms_attrs = {}
    -953        dict_ms_attrs["analyzer"] = self.mass_spectra.analyzer
    -954        dict_ms_attrs["instrument_label"] = self.mass_spectra.instrument_label
    -955        dict_ms_attrs["sample_name"] = self.mass_spectra.sample_name
    -956
    -957        return json.dumps(
    -958            dict_ms_attrs, sort_keys=False, indent=4, separators=(",", ": ")
    -959        )
    +            
    938    def get_mass_spectra_attrs(self):
    +939        """Get the mass spectra attributes as a JSON string.
    +940
    +941        Parameters:
    +942        ----------
    +943        mass_spectra : object
    +944            The high resolution mass spectra object.
    +945
    +946        Returns:
    +947        -------
    +948        str
    +949            The mass spectra attributes as a JSON string.
    +950        """
    +951        dict_ms_attrs = {}
    +952        dict_ms_attrs["analyzer"] = self.mass_spectra.analyzer
    +953        dict_ms_attrs["instrument_label"] = self.mass_spectra.instrument_label
    +954        dict_ms_attrs["sample_name"] = self.mass_spectra.sample_name
    +955
    +956        return json.dumps(
    +957            dict_ms_attrs, sort_keys=False, indent=4, separators=(",", ": ")
    +958        )
     
    @@ -4421,47 +4436,53 @@

    Returns:

    -
     961    def to_hdf(self, overwrite=False, export_raw=True):
    - 962        """Export the data to an HDF5 file.
    - 963        
    - 964        Parameters
    - 965        ----------
    - 966        overwrite : bool, optional
    - 967            Whether to overwrite the output file. Default is False.
    - 968        export_raw : bool, optional
    - 969            Whether to export the raw mass spectra data. Default is True.
    - 970        """
    - 971        if overwrite:
    - 972            if self.output_file.with_suffix(".hdf5").exists():
    - 973                self.output_file.with_suffix(".hdf5").unlink()
    - 974
    - 975        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    - 976            if not hdf_handle.attrs.get("date_utc"):
    - 977                # Set metadata for all mass spectra
    - 978                timenow = str(
    - 979                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
    - 980                )
    - 981                hdf_handle.attrs["date_utc"] = timenow
    - 982                hdf_handle.attrs["filename"] = self.mass_spectra.file_location.name
    - 983                hdf_handle.attrs["data_structure"] = "mass_spectra"
    - 984                hdf_handle.attrs["analyzer"] = self.mass_spectra.analyzer
    - 985                hdf_handle.attrs["instrument_label"] = (
    - 986                    self.mass_spectra.instrument_label
    - 987                )
    - 988                hdf_handle.attrs["sample_name"] = self.mass_spectra.sample_name
    - 989                hdf_handle.attrs["polarity"] = self.mass_spectra.polarity
    - 990                hdf_handle.attrs["parser_type"] = self.mass_spectra.spectra_parser_class.__name__
    - 991                hdf_handle.attrs["original_file_location"] = self.mass_spectra.file_location._str
    - 992
    - 993            if "mass_spectra" not in hdf_handle:
    - 994                mass_spectra_group = hdf_handle.create_group("mass_spectra")
    - 995            else:
    - 996                mass_spectra_group = hdf_handle.get("mass_spectra")
    - 997
    - 998            for mass_spectrum in self.mass_spectra:
    - 999                group_key = str(int(mass_spectrum.scan_number))
    +            
     960    def to_hdf(self, overwrite=False, export_raw=True):
    + 961        """Export the data to an HDF5 file.
    + 962
    + 963        Parameters
    + 964        ----------
    + 965        overwrite : bool, optional
    + 966            Whether to overwrite the output file. Default is False.
    + 967        export_raw : bool, optional
    + 968            Whether to export the raw mass spectra data. Default is True.
    + 969        """
    + 970        if overwrite:
    + 971            if self.output_file.with_suffix(".hdf5").exists():
    + 972                self.output_file.with_suffix(".hdf5").unlink()
    + 973
    + 974        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    + 975            if not hdf_handle.attrs.get("date_utc"):
    + 976                # Set metadata for all mass spectra
    + 977                timenow = str(
    + 978                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
    + 979                )
    + 980                hdf_handle.attrs["date_utc"] = timenow
    + 981                hdf_handle.attrs["filename"] = self.mass_spectra.file_location.name
    + 982                hdf_handle.attrs["data_structure"] = "mass_spectra"
    + 983                hdf_handle.attrs["analyzer"] = self.mass_spectra.analyzer
    + 984                hdf_handle.attrs["instrument_label"] = (
    + 985                    self.mass_spectra.instrument_label
    + 986                )
    + 987                hdf_handle.attrs["sample_name"] = self.mass_spectra.sample_name
    + 988                hdf_handle.attrs["polarity"] = self.mass_spectra.polarity
    + 989                hdf_handle.attrs["parser_type"] = (
    + 990                    self.mass_spectra.spectra_parser_class.__name__
    + 991                )
    + 992                hdf_handle.attrs["original_file_location"] = (
    + 993                    self.mass_spectra.file_location._str
    + 994                )
    + 995
    + 996            if "mass_spectra" not in hdf_handle:
    + 997                mass_spectra_group = hdf_handle.create_group("mass_spectra")
    + 998            else:
    + 999                mass_spectra_group = hdf_handle.get("mass_spectra")
     1000
    -1001                self.add_mass_spectrum_to_hdf5(hdf_handle, mass_spectrum, group_key, mass_spectra_group, export_raw)
    +1001            for mass_spectrum in self.mass_spectra:
    +1002                group_key = str(int(mass_spectrum.scan_number))
    +1003
    +1004                self.add_mass_spectrum_to_hdf5(
    +1005                    hdf_handle, mass_spectrum, group_key, mass_spectra_group, export_raw
    +1006                )
     
    @@ -4526,189 +4547,193 @@
    Inherited Members
    -
    1004class LCMSExport(HighResMassSpectraExport):
    -1005    """A class to export high resolution LC-MS data.
    -1006
    -1007    This class provides methods to export high resolution LC-MS data to HDF5.
    -1008
    -1009    Parameters
    -1010    ----------
    -1011    out_file_path : str | Path
    -1012        The output file path, do not include the file extension.
    -1013    lcms_object : LCMSBase
    -1014        The high resolution lc-ms object.
    -1015    """
    -1016
    -1017    def __init__(self, out_file_path, mass_spectra):
    -1018        super().__init__(out_file_path, mass_spectra, output_type="hdf5")
    -1019
    -1020    def to_hdf(self, overwrite=False, save_parameters=True, parameter_format="toml"):
    -1021        """Export the data to an HDF5.
    -1022
    -1023        Parameters
    -1024        ----------
    -1025        overwrite : bool, optional
    -1026            Whether to overwrite the output file. Default is False.
    -1027        save_parameters : bool, optional
    -1028            Whether to save the parameters as a separate json or toml file. Default is True.
    -1029        parameter_format : str, optional
    -1030            The format to save the parameters in. Default is 'toml'.
    -1031
    -1032        Raises
    -1033        ------
    -1034        ValueError
    -1035            If parameter_format is not 'json' or 'toml'.
    -1036        """
    -1037        export_profile_spectra = self.mass_spectra.parameters.lc_ms.export_profile_spectra
    -1038        
    -1039        # Write the mass spectra data to the hdf5 file
    -1040        super().to_hdf(overwrite=overwrite, export_raw=export_profile_spectra)
    -1041
    -1042        # Write scan info, ms_unprocessed, mass features, eics, and ms2_search results to the hdf5 file
    -1043        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    -1044            # Add scan_info to hdf5 file
    -1045            if "scan_info" not in hdf_handle:
    -1046                scan_info_group = hdf_handle.create_group("scan_info")
    -1047                for k, v in self.mass_spectra._scan_info.items():
    -1048                    array = np.array(list(v.values()))
    -1049                    if array.dtype.str[0:2] == "<U":
    -1050                        array = array.astype("S")
    -1051                    scan_info_group.create_dataset(k, data=array)
    -1052
    -1053            # Add ms_unprocessed to hdf5 file
    -1054            export_unprocessed_ms1 = self.mass_spectra.parameters.lc_ms.export_unprocessed_ms1
    -1055            if self.mass_spectra._ms_unprocessed and export_unprocessed_ms1:
    -1056                if "ms_unprocessed" not in hdf_handle:
    -1057                    ms_unprocessed_group = hdf_handle.create_group("ms_unprocessed")
    -1058                else:
    -1059                    ms_unprocessed_group = hdf_handle.get("ms_unprocessed")
    -1060                for k, v in self.mass_spectra._ms_unprocessed.items():
    -1061                    array = np.array(v)
    -1062                    ms_unprocessed_group.create_dataset(str(k), data=array)
    -1063
    -1064            # Add LCMS mass features to hdf5 file
    -1065            if len(self.mass_spectra.mass_features) > 0:
    -1066                if "mass_features" not in hdf_handle:
    -1067                    mass_features_group = hdf_handle.create_group("mass_features")
    -1068                else:
    -1069                    mass_features_group = hdf_handle.get("mass_features")
    -1070
    -1071                # Create group for each mass feature, with key as the mass feature id
    -1072                for k, v in self.mass_spectra.mass_features.items():
    -1073                    mass_features_group.create_group(str(k))
    -1074                    # Loop through each of the mass feature attributes and add them as attributes (if single value) or datasets (if array)
    -1075                    for k2, v2 in v.__dict__.items():
    -1076                        if v2 is not None:
    -1077                            # Check if the attribute is an integer or float and set as an attribute in the mass feature group
    -1078                            if k2 not in [
    -1079                                "chromatogram_parent",
    -1080                                "ms2_mass_spectra",
    -1081                                "mass_spectrum",
    -1082                                "_eic_data",
    -1083                                "ms2_similarity_results",
    -1084                            ]:
    -1085                                if k2 == "ms2_scan_numbers":
    -1086                                    array = np.array(v2)
    -1087                                    mass_features_group[str(k)].create_dataset(
    -1088                                        str(k2), data=array
    -1089                                    )
    -1090                                elif k2 == "_half_height_width":
    -1091                                    array = np.array(v2)
    -1092                                    mass_features_group[str(k)].create_dataset(
    -1093                                        str(k2), data=array
    -1094                                    )
    -1095                                elif k2 == "_ms_deconvoluted_idx":
    -1096                                    array = np.array(v2)
    -1097                                    mass_features_group[str(k)].create_dataset(
    -1098                                        str(k2), data=array
    -1099                                    )
    -1100                                elif k2 == "associated_mass_features_deconvoluted":
    -1101                                    array = np.array(v2)
    -1102                                    mass_features_group[str(k)].create_dataset(
    -1103                                        str(k2), data=array
    -1104                                    )
    -1105                                elif (
    -1106                                    isinstance(v2, int)
    -1107                                    or isinstance(v2, float)
    -1108                                    or isinstance(v2, str)
    -1109                                    or isinstance(v2, np.integer)
    -1110                                    or isinstance(v2, np.bool_)
    -1111                                ):
    -1112                                    mass_features_group[str(k)].attrs[str(k2)] = v2
    -1113                                else:
    -1114                                    raise TypeError(
    -1115                                        f"Attribute {k2} is not an integer, float, or string and cannot be added to the hdf5 file"
    -1116                                    )
    -1117
    -1118            # Add EIC data to hdf5 file
    -1119            export_eics = self.mass_spectra.parameters.lc_ms.export_eics
    -1120            if len(self.mass_spectra.eics) > 0 and export_eics:
    -1121                if "eics" not in hdf_handle:
    -1122                    eic_group = hdf_handle.create_group("eics")
    -1123                else:
    -1124                    eic_group = hdf_handle.get("eics")
    -1125
    -1126                # Create group for each eic
    -1127                for k, v in self.mass_spectra.eics.items():
    -1128                    eic_group.create_group(str(k))
    -1129                    eic_group[str(k)].attrs["mz"] = k
    -1130                    # Loop through each of the attributes and add them as datasets (if array)
    -1131                    for k2, v2 in v.__dict__.items():
    -1132                        if v2 is not None:
    -1133                            array = np.array(v2)
    -1134                            eic_group[str(k)].create_dataset(str(k2), data=array)
    -1135
    -1136            # Add ms2_search results to hdf5 file
    -1137            if len(self.mass_spectra.spectral_search_results) > 0:
    -1138                if "spectral_search_results" not in hdf_handle:
    -1139                    spectral_search_results = hdf_handle.create_group(
    -1140                        "spectral_search_results"
    -1141                    )
    -1142                else:
    -1143                    spectral_search_results = hdf_handle.get("spectral_search_results")
    -1144                # Create group for each search result by ms2_scan / precursor_mz
    -1145                for k, v in self.mass_spectra.spectral_search_results.items():
    -1146                    spectral_search_results.create_group(str(k))
    -1147                    for k2, v2 in v.items():
    -1148                        spectral_search_results[str(k)].create_group(str(k2))
    -1149                        spectral_search_results[str(k)][str(k2)].attrs[
    -1150                            "precursor_mz"
    -1151                        ] = v2.precursor_mz
    -1152                        spectral_search_results[str(k)][str(k2)].attrs[
    -1153                            "query_spectrum_id"
    -1154                        ] = v2.query_spectrum_id
    -1155                        # Loop through each of the attributes and add them as datasets (if array)
    -1156                        for k3, v3 in v2.__dict__.items():
    -1157                            if v3 is not None and k3 not in [
    -1158                                "query_spectrum",
    -1159                                "precursor_mz",
    -1160                                "query_spectrum_id",
    -1161                            ]:
    -1162                                if k3 == "query_frag_types" or k3 == "ref_frag_types":
    -1163                                    v3 = [", ".join(x) for x in v3]
    -1164                                array = np.array(v3)
    -1165                                if array.dtype.str[0:2] == "<U":
    -1166                                    array = array.astype("S")
    -1167                                spectral_search_results[str(k)][str(k2)].create_dataset(
    -1168                                    str(k3), data=array
    -1169                                )
    -1170
    -1171        # Save parameters as separate json
    -1172        if save_parameters:
    -1173            # Check if parameter_format is valid
    -1174            if parameter_format not in ["json", "toml"]:
    -1175                raise ValueError("parameter_format must be 'json' or 'toml'")
    -1176
    -1177            if parameter_format == "json":
    -1178                dump_lcms_settings_json(
    -1179                    filename=self.output_file.with_suffix(".json"),
    -1180                    lcms_obj=self.mass_spectra,
    -1181                )
    -1182            elif parameter_format == "toml":
    -1183                dump_lcms_settings_toml(
    -1184                    filename=self.output_file.with_suffix(".toml"),
    -1185                    lcms_obj=self.mass_spectra,
    -1186                )
    +            
    1009class LCMSExport(HighResMassSpectraExport):
    +1010    """A class to export high resolution LC-MS data.
    +1011
    +1012    This class provides methods to export high resolution LC-MS data to HDF5.
    +1013
    +1014    Parameters
    +1015    ----------
    +1016    out_file_path : str | Path
    +1017        The output file path, do not include the file extension.
    +1018    lcms_object : LCMSBase
    +1019        The high resolution lc-ms object.
    +1020    """
    +1021
    +1022    def __init__(self, out_file_path, mass_spectra):
    +1023        super().__init__(out_file_path, mass_spectra, output_type="hdf5")
    +1024
    +1025    def to_hdf(self, overwrite=False, save_parameters=True, parameter_format="toml"):
    +1026        """Export the data to an HDF5.
    +1027
    +1028        Parameters
    +1029        ----------
    +1030        overwrite : bool, optional
    +1031            Whether to overwrite the output file. Default is False.
    +1032        save_parameters : bool, optional
    +1033            Whether to save the parameters as a separate json or toml file. Default is True.
    +1034        parameter_format : str, optional
    +1035            The format to save the parameters in. Default is 'toml'.
    +1036
    +1037        Raises
    +1038        ------
    +1039        ValueError
    +1040            If parameter_format is not 'json' or 'toml'.
    +1041        """
    +1042        export_profile_spectra = (
    +1043            self.mass_spectra.parameters.lc_ms.export_profile_spectra
    +1044        )
    +1045
    +1046        # Write the mass spectra data to the hdf5 file
    +1047        super().to_hdf(overwrite=overwrite, export_raw=export_profile_spectra)
    +1048
    +1049        # Write scan info, ms_unprocessed, mass features, eics, and ms2_search results to the hdf5 file
    +1050        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    +1051            # Add scan_info to hdf5 file
    +1052            if "scan_info" not in hdf_handle:
    +1053                scan_info_group = hdf_handle.create_group("scan_info")
    +1054                for k, v in self.mass_spectra._scan_info.items():
    +1055                    array = np.array(list(v.values()))
    +1056                    if array.dtype.str[0:2] == "<U":
    +1057                        array = array.astype("S")
    +1058                    scan_info_group.create_dataset(k, data=array)
    +1059
    +1060            # Add ms_unprocessed to hdf5 file
    +1061            export_unprocessed_ms1 = (
    +1062                self.mass_spectra.parameters.lc_ms.export_unprocessed_ms1
    +1063            )
    +1064            if self.mass_spectra._ms_unprocessed and export_unprocessed_ms1:
    +1065                if "ms_unprocessed" not in hdf_handle:
    +1066                    ms_unprocessed_group = hdf_handle.create_group("ms_unprocessed")
    +1067                else:
    +1068                    ms_unprocessed_group = hdf_handle.get("ms_unprocessed")
    +1069                for k, v in self.mass_spectra._ms_unprocessed.items():
    +1070                    array = np.array(v)
    +1071                    ms_unprocessed_group.create_dataset(str(k), data=array)
    +1072
    +1073            # Add LCMS mass features to hdf5 file
    +1074            if len(self.mass_spectra.mass_features) > 0:
    +1075                if "mass_features" not in hdf_handle:
    +1076                    mass_features_group = hdf_handle.create_group("mass_features")
    +1077                else:
    +1078                    mass_features_group = hdf_handle.get("mass_features")
    +1079
    +1080                # Create group for each mass feature, with key as the mass feature id
    +1081                for k, v in self.mass_spectra.mass_features.items():
    +1082                    mass_features_group.create_group(str(k))
    +1083                    # Loop through each of the mass feature attributes and add them as attributes (if single value) or datasets (if array)
    +1084                    for k2, v2 in v.__dict__.items():
    +1085                        if v2 is not None:
    +1086                            # Check if the attribute is an integer or float and set as an attribute in the mass feature group
    +1087                            if k2 not in [
    +1088                                "chromatogram_parent",
    +1089                                "ms2_mass_spectra",
    +1090                                "mass_spectrum",
    +1091                                "_eic_data",
    +1092                                "ms2_similarity_results",
    +1093                            ]:
    +1094                                if k2 == "ms2_scan_numbers":
    +1095                                    array = np.array(v2)
    +1096                                    mass_features_group[str(k)].create_dataset(
    +1097                                        str(k2), data=array
    +1098                                    )
    +1099                                elif k2 == "_half_height_width":
    +1100                                    array = np.array(v2)
    +1101                                    mass_features_group[str(k)].create_dataset(
    +1102                                        str(k2), data=array
    +1103                                    )
    +1104                                elif k2 == "_ms_deconvoluted_idx":
    +1105                                    array = np.array(v2)
    +1106                                    mass_features_group[str(k)].create_dataset(
    +1107                                        str(k2), data=array
    +1108                                    )
    +1109                                elif k2 == "associated_mass_features_deconvoluted":
    +1110                                    array = np.array(v2)
    +1111                                    mass_features_group[str(k)].create_dataset(
    +1112                                        str(k2), data=array
    +1113                                    )
    +1114                                elif (
    +1115                                    isinstance(v2, int)
    +1116                                    or isinstance(v2, float)
    +1117                                    or isinstance(v2, str)
    +1118                                    or isinstance(v2, np.integer)
    +1119                                    or isinstance(v2, np.bool_)
    +1120                                ):
    +1121                                    mass_features_group[str(k)].attrs[str(k2)] = v2
    +1122                                else:
    +1123                                    raise TypeError(
    +1124                                        f"Attribute {k2} is not an integer, float, or string and cannot be added to the hdf5 file"
    +1125                                    )
    +1126
    +1127            # Add EIC data to hdf5 file
    +1128            export_eics = self.mass_spectra.parameters.lc_ms.export_eics
    +1129            if len(self.mass_spectra.eics) > 0 and export_eics:
    +1130                if "eics" not in hdf_handle:
    +1131                    eic_group = hdf_handle.create_group("eics")
    +1132                else:
    +1133                    eic_group = hdf_handle.get("eics")
    +1134
    +1135                # Create group for each eic
    +1136                for k, v in self.mass_spectra.eics.items():
    +1137                    eic_group.create_group(str(k))
    +1138                    eic_group[str(k)].attrs["mz"] = k
    +1139                    # Loop through each of the attributes and add them as datasets (if array)
    +1140                    for k2, v2 in v.__dict__.items():
    +1141                        if v2 is not None:
    +1142                            array = np.array(v2)
    +1143                            eic_group[str(k)].create_dataset(str(k2), data=array)
    +1144
    +1145            # Add ms2_search results to hdf5 file
    +1146            if len(self.mass_spectra.spectral_search_results) > 0:
    +1147                if "spectral_search_results" not in hdf_handle:
    +1148                    spectral_search_results = hdf_handle.create_group(
    +1149                        "spectral_search_results"
    +1150                    )
    +1151                else:
    +1152                    spectral_search_results = hdf_handle.get("spectral_search_results")
    +1153                # Create group for each search result by ms2_scan / precursor_mz
    +1154                for k, v in self.mass_spectra.spectral_search_results.items():
    +1155                    spectral_search_results.create_group(str(k))
    +1156                    for k2, v2 in v.items():
    +1157                        spectral_search_results[str(k)].create_group(str(k2))
    +1158                        spectral_search_results[str(k)][str(k2)].attrs[
    +1159                            "precursor_mz"
    +1160                        ] = v2.precursor_mz
    +1161                        spectral_search_results[str(k)][str(k2)].attrs[
    +1162                            "query_spectrum_id"
    +1163                        ] = v2.query_spectrum_id
    +1164                        # Loop through each of the attributes and add them as datasets (if array)
    +1165                        for k3, v3 in v2.__dict__.items():
    +1166                            if v3 is not None and k3 not in [
    +1167                                "query_spectrum",
    +1168                                "precursor_mz",
    +1169                                "query_spectrum_id",
    +1170                            ]:
    +1171                                if k3 == "query_frag_types" or k3 == "ref_frag_types":
    +1172                                    v3 = [", ".join(x) for x in v3]
    +1173                                array = np.array(v3)
    +1174                                if array.dtype.str[0:2] == "<U":
    +1175                                    array = array.astype("S")
    +1176                                spectral_search_results[str(k)][str(k2)].create_dataset(
    +1177                                    str(k3), data=array
    +1178                                )
    +1179
    +1180        # Save parameters as separate json
    +1181        if save_parameters:
    +1182            # Check if parameter_format is valid
    +1183            if parameter_format not in ["json", "toml"]:
    +1184                raise ValueError("parameter_format must be 'json' or 'toml'")
    +1185
    +1186            if parameter_format == "json":
    +1187                dump_lcms_settings_json(
    +1188                    filename=self.output_file.with_suffix(".json"),
    +1189                    lcms_obj=self.mass_spectra,
    +1190                )
    +1191            elif parameter_format == "toml":
    +1192                dump_lcms_settings_toml(
    +1193                    filename=self.output_file.with_suffix(".toml"),
    +1194                    lcms_obj=self.mass_spectra,
    +1195                )
     
    @@ -4737,8 +4762,8 @@
    Parameters
    -
    1017    def __init__(self, out_file_path, mass_spectra):
    -1018        super().__init__(out_file_path, mass_spectra, output_type="hdf5")
    +            
    1022    def __init__(self, out_file_path, mass_spectra):
    +1023        super().__init__(out_file_path, mass_spectra, output_type="hdf5")
     
    @@ -4776,173 +4801,177 @@
    Parameters
    -
    1020    def to_hdf(self, overwrite=False, save_parameters=True, parameter_format="toml"):
    -1021        """Export the data to an HDF5.
    -1022
    -1023        Parameters
    -1024        ----------
    -1025        overwrite : bool, optional
    -1026            Whether to overwrite the output file. Default is False.
    -1027        save_parameters : bool, optional
    -1028            Whether to save the parameters as a separate json or toml file. Default is True.
    -1029        parameter_format : str, optional
    -1030            The format to save the parameters in. Default is 'toml'.
    -1031
    -1032        Raises
    -1033        ------
    -1034        ValueError
    -1035            If parameter_format is not 'json' or 'toml'.
    -1036        """
    -1037        export_profile_spectra = self.mass_spectra.parameters.lc_ms.export_profile_spectra
    -1038        
    -1039        # Write the mass spectra data to the hdf5 file
    -1040        super().to_hdf(overwrite=overwrite, export_raw=export_profile_spectra)
    -1041
    -1042        # Write scan info, ms_unprocessed, mass features, eics, and ms2_search results to the hdf5 file
    -1043        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    -1044            # Add scan_info to hdf5 file
    -1045            if "scan_info" not in hdf_handle:
    -1046                scan_info_group = hdf_handle.create_group("scan_info")
    -1047                for k, v in self.mass_spectra._scan_info.items():
    -1048                    array = np.array(list(v.values()))
    -1049                    if array.dtype.str[0:2] == "<U":
    -1050                        array = array.astype("S")
    -1051                    scan_info_group.create_dataset(k, data=array)
    -1052
    -1053            # Add ms_unprocessed to hdf5 file
    -1054            export_unprocessed_ms1 = self.mass_spectra.parameters.lc_ms.export_unprocessed_ms1
    -1055            if self.mass_spectra._ms_unprocessed and export_unprocessed_ms1:
    -1056                if "ms_unprocessed" not in hdf_handle:
    -1057                    ms_unprocessed_group = hdf_handle.create_group("ms_unprocessed")
    -1058                else:
    -1059                    ms_unprocessed_group = hdf_handle.get("ms_unprocessed")
    -1060                for k, v in self.mass_spectra._ms_unprocessed.items():
    -1061                    array = np.array(v)
    -1062                    ms_unprocessed_group.create_dataset(str(k), data=array)
    -1063
    -1064            # Add LCMS mass features to hdf5 file
    -1065            if len(self.mass_spectra.mass_features) > 0:
    -1066                if "mass_features" not in hdf_handle:
    -1067                    mass_features_group = hdf_handle.create_group("mass_features")
    -1068                else:
    -1069                    mass_features_group = hdf_handle.get("mass_features")
    -1070
    -1071                # Create group for each mass feature, with key as the mass feature id
    -1072                for k, v in self.mass_spectra.mass_features.items():
    -1073                    mass_features_group.create_group(str(k))
    -1074                    # Loop through each of the mass feature attributes and add them as attributes (if single value) or datasets (if array)
    -1075                    for k2, v2 in v.__dict__.items():
    -1076                        if v2 is not None:
    -1077                            # Check if the attribute is an integer or float and set as an attribute in the mass feature group
    -1078                            if k2 not in [
    -1079                                "chromatogram_parent",
    -1080                                "ms2_mass_spectra",
    -1081                                "mass_spectrum",
    -1082                                "_eic_data",
    -1083                                "ms2_similarity_results",
    -1084                            ]:
    -1085                                if k2 == "ms2_scan_numbers":
    -1086                                    array = np.array(v2)
    -1087                                    mass_features_group[str(k)].create_dataset(
    -1088                                        str(k2), data=array
    -1089                                    )
    -1090                                elif k2 == "_half_height_width":
    -1091                                    array = np.array(v2)
    -1092                                    mass_features_group[str(k)].create_dataset(
    -1093                                        str(k2), data=array
    -1094                                    )
    -1095                                elif k2 == "_ms_deconvoluted_idx":
    -1096                                    array = np.array(v2)
    -1097                                    mass_features_group[str(k)].create_dataset(
    -1098                                        str(k2), data=array
    -1099                                    )
    -1100                                elif k2 == "associated_mass_features_deconvoluted":
    -1101                                    array = np.array(v2)
    -1102                                    mass_features_group[str(k)].create_dataset(
    -1103                                        str(k2), data=array
    -1104                                    )
    -1105                                elif (
    -1106                                    isinstance(v2, int)
    -1107                                    or isinstance(v2, float)
    -1108                                    or isinstance(v2, str)
    -1109                                    or isinstance(v2, np.integer)
    -1110                                    or isinstance(v2, np.bool_)
    -1111                                ):
    -1112                                    mass_features_group[str(k)].attrs[str(k2)] = v2
    -1113                                else:
    -1114                                    raise TypeError(
    -1115                                        f"Attribute {k2} is not an integer, float, or string and cannot be added to the hdf5 file"
    -1116                                    )
    -1117
    -1118            # Add EIC data to hdf5 file
    -1119            export_eics = self.mass_spectra.parameters.lc_ms.export_eics
    -1120            if len(self.mass_spectra.eics) > 0 and export_eics:
    -1121                if "eics" not in hdf_handle:
    -1122                    eic_group = hdf_handle.create_group("eics")
    -1123                else:
    -1124                    eic_group = hdf_handle.get("eics")
    -1125
    -1126                # Create group for each eic
    -1127                for k, v in self.mass_spectra.eics.items():
    -1128                    eic_group.create_group(str(k))
    -1129                    eic_group[str(k)].attrs["mz"] = k
    -1130                    # Loop through each of the attributes and add them as datasets (if array)
    -1131                    for k2, v2 in v.__dict__.items():
    -1132                        if v2 is not None:
    -1133                            array = np.array(v2)
    -1134                            eic_group[str(k)].create_dataset(str(k2), data=array)
    -1135
    -1136            # Add ms2_search results to hdf5 file
    -1137            if len(self.mass_spectra.spectral_search_results) > 0:
    -1138                if "spectral_search_results" not in hdf_handle:
    -1139                    spectral_search_results = hdf_handle.create_group(
    -1140                        "spectral_search_results"
    -1141                    )
    -1142                else:
    -1143                    spectral_search_results = hdf_handle.get("spectral_search_results")
    -1144                # Create group for each search result by ms2_scan / precursor_mz
    -1145                for k, v in self.mass_spectra.spectral_search_results.items():
    -1146                    spectral_search_results.create_group(str(k))
    -1147                    for k2, v2 in v.items():
    -1148                        spectral_search_results[str(k)].create_group(str(k2))
    -1149                        spectral_search_results[str(k)][str(k2)].attrs[
    -1150                            "precursor_mz"
    -1151                        ] = v2.precursor_mz
    -1152                        spectral_search_results[str(k)][str(k2)].attrs[
    -1153                            "query_spectrum_id"
    -1154                        ] = v2.query_spectrum_id
    -1155                        # Loop through each of the attributes and add them as datasets (if array)
    -1156                        for k3, v3 in v2.__dict__.items():
    -1157                            if v3 is not None and k3 not in [
    -1158                                "query_spectrum",
    -1159                                "precursor_mz",
    -1160                                "query_spectrum_id",
    -1161                            ]:
    -1162                                if k3 == "query_frag_types" or k3 == "ref_frag_types":
    -1163                                    v3 = [", ".join(x) for x in v3]
    -1164                                array = np.array(v3)
    -1165                                if array.dtype.str[0:2] == "<U":
    -1166                                    array = array.astype("S")
    -1167                                spectral_search_results[str(k)][str(k2)].create_dataset(
    -1168                                    str(k3), data=array
    -1169                                )
    -1170
    -1171        # Save parameters as separate json
    -1172        if save_parameters:
    -1173            # Check if parameter_format is valid
    -1174            if parameter_format not in ["json", "toml"]:
    -1175                raise ValueError("parameter_format must be 'json' or 'toml'")
    -1176
    -1177            if parameter_format == "json":
    -1178                dump_lcms_settings_json(
    -1179                    filename=self.output_file.with_suffix(".json"),
    -1180                    lcms_obj=self.mass_spectra,
    -1181                )
    -1182            elif parameter_format == "toml":
    -1183                dump_lcms_settings_toml(
    -1184                    filename=self.output_file.with_suffix(".toml"),
    -1185                    lcms_obj=self.mass_spectra,
    -1186                )
    +            
    1025    def to_hdf(self, overwrite=False, save_parameters=True, parameter_format="toml"):
    +1026        """Export the data to an HDF5.
    +1027
    +1028        Parameters
    +1029        ----------
    +1030        overwrite : bool, optional
    +1031            Whether to overwrite the output file. Default is False.
    +1032        save_parameters : bool, optional
    +1033            Whether to save the parameters as a separate json or toml file. Default is True.
    +1034        parameter_format : str, optional
    +1035            The format to save the parameters in. Default is 'toml'.
    +1036
    +1037        Raises
    +1038        ------
    +1039        ValueError
    +1040            If parameter_format is not 'json' or 'toml'.
    +1041        """
    +1042        export_profile_spectra = (
    +1043            self.mass_spectra.parameters.lc_ms.export_profile_spectra
    +1044        )
    +1045
    +1046        # Write the mass spectra data to the hdf5 file
    +1047        super().to_hdf(overwrite=overwrite, export_raw=export_profile_spectra)
    +1048
    +1049        # Write scan info, ms_unprocessed, mass features, eics, and ms2_search results to the hdf5 file
    +1050        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    +1051            # Add scan_info to hdf5 file
    +1052            if "scan_info" not in hdf_handle:
    +1053                scan_info_group = hdf_handle.create_group("scan_info")
    +1054                for k, v in self.mass_spectra._scan_info.items():
    +1055                    array = np.array(list(v.values()))
    +1056                    if array.dtype.str[0:2] == "<U":
    +1057                        array = array.astype("S")
    +1058                    scan_info_group.create_dataset(k, data=array)
    +1059
    +1060            # Add ms_unprocessed to hdf5 file
    +1061            export_unprocessed_ms1 = (
    +1062                self.mass_spectra.parameters.lc_ms.export_unprocessed_ms1
    +1063            )
    +1064            if self.mass_spectra._ms_unprocessed and export_unprocessed_ms1:
    +1065                if "ms_unprocessed" not in hdf_handle:
    +1066                    ms_unprocessed_group = hdf_handle.create_group("ms_unprocessed")
    +1067                else:
    +1068                    ms_unprocessed_group = hdf_handle.get("ms_unprocessed")
    +1069                for k, v in self.mass_spectra._ms_unprocessed.items():
    +1070                    array = np.array(v)
    +1071                    ms_unprocessed_group.create_dataset(str(k), data=array)
    +1072
    +1073            # Add LCMS mass features to hdf5 file
    +1074            if len(self.mass_spectra.mass_features) > 0:
    +1075                if "mass_features" not in hdf_handle:
    +1076                    mass_features_group = hdf_handle.create_group("mass_features")
    +1077                else:
    +1078                    mass_features_group = hdf_handle.get("mass_features")
    +1079
    +1080                # Create group for each mass feature, with key as the mass feature id
    +1081                for k, v in self.mass_spectra.mass_features.items():
    +1082                    mass_features_group.create_group(str(k))
    +1083                    # Loop through each of the mass feature attributes and add them as attributes (if single value) or datasets (if array)
    +1084                    for k2, v2 in v.__dict__.items():
    +1085                        if v2 is not None:
    +1086                            # Check if the attribute is an integer or float and set as an attribute in the mass feature group
    +1087                            if k2 not in [
    +1088                                "chromatogram_parent",
    +1089                                "ms2_mass_spectra",
    +1090                                "mass_spectrum",
    +1091                                "_eic_data",
    +1092                                "ms2_similarity_results",
    +1093                            ]:
    +1094                                if k2 == "ms2_scan_numbers":
    +1095                                    array = np.array(v2)
    +1096                                    mass_features_group[str(k)].create_dataset(
    +1097                                        str(k2), data=array
    +1098                                    )
    +1099                                elif k2 == "_half_height_width":
    +1100                                    array = np.array(v2)
    +1101                                    mass_features_group[str(k)].create_dataset(
    +1102                                        str(k2), data=array
    +1103                                    )
    +1104                                elif k2 == "_ms_deconvoluted_idx":
    +1105                                    array = np.array(v2)
    +1106                                    mass_features_group[str(k)].create_dataset(
    +1107                                        str(k2), data=array
    +1108                                    )
    +1109                                elif k2 == "associated_mass_features_deconvoluted":
    +1110                                    array = np.array(v2)
    +1111                                    mass_features_group[str(k)].create_dataset(
    +1112                                        str(k2), data=array
    +1113                                    )
    +1114                                elif (
    +1115                                    isinstance(v2, int)
    +1116                                    or isinstance(v2, float)
    +1117                                    or isinstance(v2, str)
    +1118                                    or isinstance(v2, np.integer)
    +1119                                    or isinstance(v2, np.bool_)
    +1120                                ):
    +1121                                    mass_features_group[str(k)].attrs[str(k2)] = v2
    +1122                                else:
    +1123                                    raise TypeError(
    +1124                                        f"Attribute {k2} is not an integer, float, or string and cannot be added to the hdf5 file"
    +1125                                    )
    +1126
    +1127            # Add EIC data to hdf5 file
    +1128            export_eics = self.mass_spectra.parameters.lc_ms.export_eics
    +1129            if len(self.mass_spectra.eics) > 0 and export_eics:
    +1130                if "eics" not in hdf_handle:
    +1131                    eic_group = hdf_handle.create_group("eics")
    +1132                else:
    +1133                    eic_group = hdf_handle.get("eics")
    +1134
    +1135                # Create group for each eic
    +1136                for k, v in self.mass_spectra.eics.items():
    +1137                    eic_group.create_group(str(k))
    +1138                    eic_group[str(k)].attrs["mz"] = k
    +1139                    # Loop through each of the attributes and add them as datasets (if array)
    +1140                    for k2, v2 in v.__dict__.items():
    +1141                        if v2 is not None:
    +1142                            array = np.array(v2)
    +1143                            eic_group[str(k)].create_dataset(str(k2), data=array)
    +1144
    +1145            # Add ms2_search results to hdf5 file
    +1146            if len(self.mass_spectra.spectral_search_results) > 0:
    +1147                if "spectral_search_results" not in hdf_handle:
    +1148                    spectral_search_results = hdf_handle.create_group(
    +1149                        "spectral_search_results"
    +1150                    )
    +1151                else:
    +1152                    spectral_search_results = hdf_handle.get("spectral_search_results")
    +1153                # Create group for each search result by ms2_scan / precursor_mz
    +1154                for k, v in self.mass_spectra.spectral_search_results.items():
    +1155                    spectral_search_results.create_group(str(k))
    +1156                    for k2, v2 in v.items():
    +1157                        spectral_search_results[str(k)].create_group(str(k2))
    +1158                        spectral_search_results[str(k)][str(k2)].attrs[
    +1159                            "precursor_mz"
    +1160                        ] = v2.precursor_mz
    +1161                        spectral_search_results[str(k)][str(k2)].attrs[
    +1162                            "query_spectrum_id"
    +1163                        ] = v2.query_spectrum_id
    +1164                        # Loop through each of the attributes and add them as datasets (if array)
    +1165                        for k3, v3 in v2.__dict__.items():
    +1166                            if v3 is not None and k3 not in [
    +1167                                "query_spectrum",
    +1168                                "precursor_mz",
    +1169                                "query_spectrum_id",
    +1170                            ]:
    +1171                                if k3 == "query_frag_types" or k3 == "ref_frag_types":
    +1172                                    v3 = [", ".join(x) for x in v3]
    +1173                                array = np.array(v3)
    +1174                                if array.dtype.str[0:2] == "<U":
    +1175                                    array = array.astype("S")
    +1176                                spectral_search_results[str(k)][str(k2)].create_dataset(
    +1177                                    str(k3), data=array
    +1178                                )
    +1179
    +1180        # Save parameters as separate json
    +1181        if save_parameters:
    +1182            # Check if parameter_format is valid
    +1183            if parameter_format not in ["json", "toml"]:
    +1184                raise ValueError("parameter_format must be 'json' or 'toml'")
    +1185
    +1186            if parameter_format == "json":
    +1187                dump_lcms_settings_json(
    +1188                    filename=self.output_file.with_suffix(".json"),
    +1189                    lcms_obj=self.mass_spectra,
    +1190                )
    +1191            elif parameter_format == "toml":
    +1192                dump_lcms_settings_toml(
    +1193                    filename=self.output_file.with_suffix(".toml"),
    +1194                    lcms_obj=self.mass_spectra,
    +1195                )
     
    @@ -5027,594 +5056,595 @@
    Inherited Members
    -
    1190class LipidomicsExport(LCMSExport):
    -1191    """A class to export lipidomics data.
    -1192
    -1193    This class provides methods to export lipidomics data to various formats and summarize the lipid report.
    -1194
    -1195    Parameters
    -1196    ----------
    -1197    out_file_path : str | Path
    -1198        The output file path, do not include the file extension.
    -1199    mass_spectra : object
    -1200        The high resolution mass spectra object.
    -1201    """
    -1202    def __init__(self, out_file_path, mass_spectra):
    -1203        super().__init__(out_file_path, mass_spectra)
    -1204        self.ion_type_dict = ion_type_dict
    -1205
    -1206    @staticmethod
    -1207    def get_ion_formula(neutral_formula, ion_type):
    -1208        """From a neutral formula and an ion type, return the formula of the ion.
    -1209
    -1210        Notes
    -1211        -----
    -1212        This is a static method.
    -1213        If the neutral_formula is not a string, this method will return None.
    +            
    1198class LipidomicsExport(LCMSExport):
    +1199    """A class to export lipidomics data.
    +1200
    +1201    This class provides methods to export lipidomics data to various formats and summarize the lipid report.
    +1202
    +1203    Parameters
    +1204    ----------
    +1205    out_file_path : str | Path
    +1206        The output file path, do not include the file extension.
    +1207    mass_spectra : object
    +1208        The high resolution mass spectra object.
    +1209    """
    +1210
    +1211    def __init__(self, out_file_path, mass_spectra):
    +1212        super().__init__(out_file_path, mass_spectra)
    +1213        self.ion_type_dict = ion_type_dict
     1214
    -1215        Parameters
    -1216        ----------
    -1217        neutral_formula : str
    -1218            The neutral formula, this should be a string form from the MolecularFormula class
    -1219            (e.g. 'C2 H4 O2', isotopes OK), or simple string (e.g. 'C2H4O2', no isotope handling in this case).
    -1220            In the case of a simple string, the atoms are parsed based on the presence of capital letters,
    -1221            e.g. MgCl2 is parsed as 'Mg Cl2.
    -1222        ion_type : str
    -1223            The ion type, e.g. 'protonated', '[M+H]+', '[M+Na]+', etc.
    -1224            See the self.ion_type_dict for the available ion types.
    -1225
    -1226        Returns
    -1227        -------
    -1228        str
    -1229            The formula of the ion as a string (like 'C2 H4 O2'); or None if the neutral_formula is not a string.
    -1230        """
    -1231        # If neutral_formula is not a string, return None
    -1232        if not isinstance(neutral_formula, str):
    -1233            return None
    +1215    @staticmethod
    +1216    def get_ion_formula(neutral_formula, ion_type):
    +1217        """From a neutral formula and an ion type, return the formula of the ion.
    +1218
    +1219        Notes
    +1220        -----
    +1221        This is a static method.
    +1222        If the neutral_formula is not a string, this method will return None.
    +1223
    +1224        Parameters
    +1225        ----------
    +1226        neutral_formula : str
    +1227            The neutral formula, this should be a string form from the MolecularFormula class
    +1228            (e.g. 'C2 H4 O2', isotopes OK), or simple string (e.g. 'C2H4O2', no isotope handling in this case).
    +1229            In the case of a simple string, the atoms are parsed based on the presence of capital letters,
    +1230            e.g. MgCl2 is parsed as 'Mg Cl2.
    +1231        ion_type : str
    +1232            The ion type, e.g. 'protonated', '[M+H]+', '[M+Na]+', etc.
    +1233            See the self.ion_type_dict for the available ion types.
     1234
    -1235        # Check if there are spaces in the formula (these are outputs of the MolecularFormula class and do not need to be processed before being passed to the class)
    -1236        if re.search(r"\s", neutral_formula):
    -1237            neutral_formula = MolecularFormula(neutral_formula, ion_charge=0)
    -1238        else:
    -1239            form_pre = re.sub(r"([A-Z])", r" \1", neutral_formula)[1:]
    -1240            elements = [re.findall(r"[A-Z][a-z]*", x) for x in form_pre.split()]
    -1241            counts = [re.findall(r"\d+", x) for x in form_pre.split()]
    -1242            neutral_formula = MolecularFormula(
    -1243                dict(
    -1244                    zip(
    -1245                        [x[0] for x in elements],
    -1246                        [int(x[0]) if x else 1 for x in counts],
    -1247                    )
    -1248                ),
    -1249                ion_charge=0,
    -1250            )
    -1251        neutral_formula_dict = neutral_formula.to_dict().copy()
    -1252
    -1253        adduct_add_dict = ion_type_dict[ion_type][0]
    -1254        for key in adduct_add_dict:
    -1255            if key in neutral_formula_dict.keys():
    -1256                neutral_formula_dict[key] += adduct_add_dict[key]
    -1257            else:
    -1258                neutral_formula_dict[key] = adduct_add_dict[key]
    -1259
    -1260        adduct_subtract = ion_type_dict[ion_type][1]
    -1261        for key in adduct_subtract:
    -1262            neutral_formula_dict[key] -= adduct_subtract[key]
    -1263
    -1264        return MolecularFormula(neutral_formula_dict, ion_charge=0).string
    -1265
    -1266    @staticmethod
    -1267    def get_isotope_type(ion_formula):
    -1268        """From an ion formula, return the 13C isotope type of the ion.
    -1269
    -1270        Notes
    -1271        -----
    -1272        This is a static method.
    -1273        If the ion_formula is not a string, this method will return None.
    -1274        This is currently only functional for 13C isotopes.
    -1275
    -1276        Parameters
    -1277        ----------
    -1278        ion_formula : str
    -1279            The formula of the ion, expected to be a string like 'C2 H4 O2'.
    -1280
    -1281        Returns
    -1282        -------
    -1283        str
    -1284            The isotope type of the ion, e.g. '13C1', '13C2', etc; or None if the ion_formula does not contain a 13C isotope.
    -1285
    -1286        Raises
    -1287        ------
    -1288        ValueError
    -1289            If the ion_formula is not a string.
    -1290        """
    -1291        if not isinstance(ion_formula, str):
    -1292            return None
    -1293
    -1294        if re.search(r"\s", ion_formula):
    -1295            ion_formula = MolecularFormula(ion_formula, ion_charge=0)
    -1296        else:
    -1297            raise ValueError('ion_formula should be a string like "C2 H4 O2"')
    -1298        ion_formula_dict = ion_formula.to_dict().copy()
    -1299
    -1300        try:
    -1301            iso_class = "13C" + str(ion_formula_dict.pop("13C"))
    -1302        except KeyError:
    -1303            iso_class = None
    -1304
    -1305        return iso_class
    -1306
    -1307    def clean_ms1_report(self, ms1_summary_full):
    -1308        """Clean the MS1 report.
    -1309
    -1310        Parameters
    -1311        ----------
    -1312        ms1_summary_full : DataFrame
    -1313            The full MS1 summary DataFrame.
    -1314
    -1315        Returns
    -1316        -------
    -1317        DataFrame
    -1318            The cleaned MS1 summary DataFrame.
    -1319        """
    -1320        ms1_summary_full = ms1_summary_full.reset_index()
    -1321        cols_to_keep = [
    -1322            "mf_id",
    -1323            "Molecular Formula",
    -1324            "Ion Type",
    -1325            "Calculated m/z",
    -1326            "m/z Error (ppm)",
    -1327            "m/z Error Score",
    -1328            "Is Isotopologue",
    -1329            "Isotopologue Similarity",
    -1330            "Confidence Score",
    -1331        ]
    -1332        ms1_summary = ms1_summary_full[cols_to_keep].copy()
    -1333        ms1_summary["ion_formula"] = [
    -1334            self.get_ion_formula(f, a)
    -1335            for f, a in zip(ms1_summary["Molecular Formula"], ms1_summary["Ion Type"])
    -1336        ]
    -1337        ms1_summary["isotopologue_type"] = [
    -1338            self.get_isotope_type(f) for f in ms1_summary["ion_formula"].tolist()
    -1339        ]
    -1340
    -1341        # Reorder columns
    -1342        ms1_summary = ms1_summary[
    -1343            [
    -1344                "mf_id",
    -1345                "ion_formula",
    -1346                "isotopologue_type",
    -1347                "Calculated m/z",
    -1348                "m/z Error (ppm)",
    -1349                "m/z Error Score",
    -1350                "Isotopologue Similarity",
    -1351                "Confidence Score",
    -1352            ]
    -1353        ]
    -1354
    -1355        # Set the index to mf_id
    -1356        ms1_summary = ms1_summary.set_index("mf_id")
    -1357
    -1358        return ms1_summary
    -1359
    -1360    def summarize_lipid_report(self, ms2_annot):
    -1361        """Summarize the lipid report.
    -1362
    -1363        Parameters
    -1364        ----------
    -1365        ms2_annot : DataFrame
    -1366            The MS2 annotation DataFrame with all annotations.
    -1367
    -1368        Returns
    -1369        -------
    -1370        DataFrame
    -1371            The summarized lipid report.
    -1372        """
    -1373        # Drop unnecessary columns for easier viewing
    -1374        columns_to_drop = [
    -1375            "precursor_mz",
    -1376            "precursor_mz_error_ppm",
    -1377            "metabref_mol_id",
    -1378            "metabref_precursor_mz",
    -1379            "cas",
    -1380            "inchikey",
    -1381            "inchi",
    -1382            "chebi",
    -1383            "smiles",
    -1384            "kegg",
    -1385            "data_id",
    -1386            "iupac_name",
    -1387            "traditional_name",
    -1388            "common_name",
    -1389            "casno",
    -1390        ]
    -1391        ms2_annot = ms2_annot.drop(
    -1392            columns=[col for col in columns_to_drop if col in ms2_annot.columns]
    -1393        )
    -1394
    -1395        # If ion_types_excluded is not empty, remove those ion types
    -1396        ion_types_excluded = (
    -1397            self.mass_spectra.parameters.mass_spectrum['ms2'].molecular_search.ion_types_excluded
    -1398        )
    -1399        if len(ion_types_excluded) > 0:
    -1400            ms2_annot = ms2_annot[~ms2_annot["ref_ion_type"].isin(ion_types_excluded)]
    -1401
    -1402        # If mf_id is not present, check that the index name is mf_id and reset the index
    -1403        if "mf_id" not in ms2_annot.columns:
    -1404            if ms2_annot.index.name == "mf_id":
    -1405                ms2_annot = ms2_annot.reset_index()
    -1406            else:
    -1407                raise ValueError("mf_id is not present in the dataframe")
    -1408
    -1409        # Attempt to get consensus annotations to the MLF level
    -1410        mlf_results_all = []
    -1411        for mf_id in ms2_annot["mf_id"].unique():
    -1412            mlf_results_perid = []
    -1413            ms2_annot_mf = ms2_annot[ms2_annot["mf_id"] == mf_id].copy()
    -1414            ms2_annot_mf["n_spectra_contributing"] = len(ms2_annot_mf)
    -1415
    -1416            for query_scan in ms2_annot["query_spectrum_id"].unique():
    -1417                ms2_annot_sub = ms2_annot_mf[
    -1418                    ms2_annot_mf["query_spectrum_id"] == query_scan
    -1419                ].copy()
    -1420
    -1421                if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    -1422                    # If there is only one lipid_summed_name, let's try to get consensus molecular species annotation
    -1423                    if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    -1424                        ms2_annot_sub["entropy_max"] = (
    -1425                            ms2_annot_sub["entropy_similarity"]
    -1426                            == ms2_annot_sub["entropy_similarity"].max()
    -1427                        )
    -1428                        ms2_annot_sub["ref_match_fract_max"] = (
    -1429                            ms2_annot_sub["ref_mz_in_query_fract"]
    -1430                            == ms2_annot_sub["ref_mz_in_query_fract"].max()
    -1431                        )
    -1432                        ms2_annot_sub["frag_max"] = ms2_annot_sub["query_frag_types"].apply(
    -1433                            lambda x: True if "MLF" in x else False
    -1434                        )
    -1435
    -1436                        # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks)
    -1437                        ms2_annot_sub["consensus"] = ms2_annot_sub[
    -1438                            ["entropy_max", "ref_match_fract_max", "frag_max"]
    -1439                        ].all(axis=1)
    -1440
    -1441                        # If there is a consensus, take the row with the highest entropy_similarity
    -1442                        if ms2_annot_sub["consensus"].any():
    -1443                            ms2_annot_sub = ms2_annot_sub[
    -1444                                ms2_annot_sub["entropy_similarity"]
    -1445                                == ms2_annot_sub["entropy_similarity"].max()
    -1446                            ].head(1)
    -1447                            mlf_results_perid.append(ms2_annot_sub)
    -1448            if len(mlf_results_perid) == 0:
    -1449                mlf_results_perid = pd.DataFrame()
    -1450            else:
    -1451                mlf_results_perid = pd.concat(mlf_results_perid)
    -1452                if mlf_results_perid["name"].nunique() == 1:
    -1453                    mlf_results_perid = mlf_results_perid[
    -1454                        mlf_results_perid["entropy_similarity"]
    -1455                        == mlf_results_perid["entropy_similarity"].max()
    -1456                    ].head(1)
    -1457                else:
    -1458                    mlf_results_perid = pd.DataFrame()
    -1459                mlf_results_all.append(mlf_results_perid)
    -1460
    -1461        # These are the consensus annotations to the MLF level
    -1462        if len(mlf_results_all) > 0:
    -1463            mlf_results_all = pd.concat(mlf_results_all)
    -1464            mlf_results_all["annot_level"] = mlf_results_all["structure_level"]
    -1465        else:
    -1466            # Make an empty dataframe
    -1467            mlf_results_all = ms2_annot.head(0)
    -1468
    -1469        # For remaining mf_ids, try to get a consensus annotation to the species level
    -1470        species_results_all = []
    -1471        # Remove mf_ids that have consensus annotations to the MLF level
    -1472        ms2_annot_spec = ms2_annot[
    -1473            ~ms2_annot["mf_id"].isin(mlf_results_all["mf_id"].unique())
    -1474        ]
    -1475        for mf_id in ms2_annot_spec["mf_id"].unique():
    -1476            # Do all the hits have the same lipid_summed_name?
    -1477            ms2_annot_sub = ms2_annot_spec[ms2_annot_spec["mf_id"] == mf_id].copy()
    -1478            ms2_annot_sub["n_spectra_contributing"] = len(ms2_annot_sub)
    -1479
    -1480            if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    -1481                # Grab the highest entropy_similarity result
    -1482                ms2_annot_sub = ms2_annot_sub[
    -1483                    ms2_annot_sub["entropy_similarity"]
    -1484                    == ms2_annot_sub["entropy_similarity"].max()
    -1485                ].head(1)
    -1486                species_results_all.append(ms2_annot_sub)
    -1487
    -1488        # These are the consensus annotations to the species level
    -1489        if len(species_results_all) > 0:
    -1490            species_results_all = pd.concat(species_results_all)
    -1491            species_results_all["annot_level"] = "species"
    -1492        else:
    -1493            # Make an empty dataframe
    -1494            species_results_all = ms2_annot.head(0)
    -1495
    -1496        # Deal with the remaining mf_ids that do not have consensus annotations to the species level or MLF level
    -1497        # Remove mf_ids that have consensus annotations to the species level
    -1498        ms2_annot_remaining = ms2_annot_spec[
    -1499            ~ms2_annot_spec["mf_id"].isin(species_results_all["mf_id"].unique())
    -1500        ]
    -1501        no_consensus = []
    -1502        for mf_id in ms2_annot_remaining["mf_id"].unique():
    -1503            id_sub = []
    -1504            id_no_con = []
    -1505            ms2_annot_sub_mf = ms2_annot_remaining[
    -1506                ms2_annot_remaining["mf_id"] == mf_id
    -1507            ].copy()
    -1508            for query_scan in ms2_annot_sub_mf["query_spectrum_id"].unique():
    -1509                ms2_annot_sub = ms2_annot_sub_mf[
    -1510                    ms2_annot_sub_mf["query_spectrum_id"] == query_scan
    -1511                ].copy()
    -1512
    -1513                # New columns for ranking [HIGHER RANK = BETTER]
    -1514                ms2_annot_sub["entropy_max"] = (
    -1515                    ms2_annot_sub["entropy_similarity"]
    -1516                    == ms2_annot_sub["entropy_similarity"].max()
    -1517                )
    -1518                ms2_annot_sub["ref_match_fract_max"] = (
    -1519                    ms2_annot_sub["ref_mz_in_query_fract"]
    -1520                    == ms2_annot_sub["ref_mz_in_query_fract"].max()
    -1521                )
    -1522                ms2_annot_sub["frag_max"] = ms2_annot_sub["query_frag_types"].apply(
    -1523                    lambda x: True if "MLF" in x else False
    -1524                )
    -1525
    -1526                # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks)
    -1527                ms2_annot_sub["consensus"] = ms2_annot_sub[
    -1528                    ["entropy_max", "ref_match_fract_max", "frag_max"]
    -1529                ].all(axis=1)
    -1530                ms2_annot_sub_con = ms2_annot_sub[ms2_annot_sub["consensus"]]
    -1531                id_sub.append(ms2_annot_sub_con)
    -1532                id_no_con.append(ms2_annot_sub)
    -1533            id_sub = pd.concat(id_sub)
    -1534            id_no_con = pd.concat(id_no_con)
    -1535
    -1536            # Scenario 1: Multiple scans are being resolved to different MLFs [could be coelutions and should both be kept and annotated to MS level]
    -1537            if (
    -1538                id_sub["query_frag_types"]
    -1539                .apply(lambda x: True if "MLF" in x else False)
    -1540                .all()
    -1541                and len(id_sub) > 0
    -1542            ):
    -1543                idx = id_sub.groupby("name")["entropy_similarity"].idxmax()
    -1544                id_sub = id_sub.loc[idx]
    -1545                # Reorder so highest entropy_similarity is first
    -1546                id_sub = id_sub.sort_values("entropy_similarity", ascending=False)
    -1547                id_sub["annot_level"] = id_sub["structure_level"]
    -1548                no_consensus.append(id_sub)
    -1549
    -1550            # Scenario 2: Multiple scans are being resolved to different species, keep both and annotate to appropriate level
    -1551            elif len(id_sub) == 0:
    -1552                for lipid_summed_name in id_no_con["lipid_summed_name"].unique():
    -1553                    summed_sub = id_no_con[
    -1554                        id_no_con["lipid_summed_name"] == lipid_summed_name
    -1555                    ]
    -1556                    # Any consensus to MLF?
    -1557                    if summed_sub["consensus"].any():
    -1558                        summed_sub = summed_sub[summed_sub["consensus"]]
    -1559                        summed_sub["annot_level"] = summed_sub["structure_level"]
    -1560                        no_consensus.append(summed_sub)
    -1561                    else:
    -1562                        # Grab the highest entropy_similarity, if there are multiple, grab the first one
    -1563                        summed_sub = summed_sub[
    -1564                            summed_sub["entropy_similarity"]
    -1565                            == summed_sub["entropy_similarity"].max()
    -1566                        ].head(1)
    -1567                        # get first row
    -1568                        summed_sub["annot_level"] = "species"
    -1569                        summed_sub["name"] = ""
    -1570                        no_consensus.append(summed_sub)
    -1571            else:
    -1572                raise ValueError("Unexpected scenario for summarizing mf_id: ", mf_id)
    -1573
    -1574        if len(no_consensus) > 0:
    -1575            no_consensus = pd.concat(no_consensus)
    -1576        else:
    -1577            no_consensus = ms2_annot.head(0)
    -1578
    -1579        # Combine all the consensus annotations and reformat the dataframe for output
    -1580        species_results_all = species_results_all.drop(columns=["name"])
    -1581        species_results_all["lipid_molecular_species_id"] = ""
    -1582        mlf_results_all["lipid_molecular_species_id"] = mlf_results_all["name"]
    -1583        no_consensus["lipid_molecular_species_id"] = no_consensus["name"]
    -1584        consensus_annotations = pd.concat(
    -1585            [mlf_results_all, species_results_all, no_consensus]
    -1586        )
    -1587        consensus_annotations = consensus_annotations.sort_values(
    -1588            "mf_id", ascending=True
    -1589        )
    -1590        cols_to_keep = [
    -1591            "mf_id",
    -1592            "ref_ion_type",
    -1593            "entropy_similarity",
    -1594            "ref_mz_in_query_fract",
    -1595            "lipid_molecular_species_id",
    -1596            "lipid_summed_name",
    -1597            "lipid_subclass",
    -1598            "lipid_class",
    -1599            "lipid_category",
    -1600            "formula",
    -1601            "annot_level",
    -1602            "n_spectra_contributing",
    -1603        ]
    -1604        consensus_annotations = consensus_annotations[cols_to_keep]
    -1605        consensus_annotations = consensus_annotations.set_index("mf_id")
    -1606
    -1607        return consensus_annotations
    -1608
    -1609    def clean_ms2_report(self, lipid_summary):
    -1610        """Clean the MS2 report.
    -1611
    -1612        Parameters
    -1613        ----------
    -1614        lipid_summary : DataFrame
    -1615            The full lipid summary DataFrame.
    -1616
    -1617        Returns
    -1618        -------
    -1619        DataFrame
    -1620            The cleaned lipid summary DataFrame.
    -1621        """
    -1622        lipid_summary = lipid_summary.reset_index()
    -1623        lipid_summary["ion_formula"] = [
    -1624            self.get_ion_formula(f, a)
    -1625            for f, a in zip(lipid_summary["formula"], lipid_summary["ref_ion_type"])
    -1626        ]
    -1627
    -1628        # Reorder columns
    -1629        lipid_summary = lipid_summary[
    -1630            [
    -1631                "mf_id",
    -1632                "ion_formula",
    -1633                "ref_ion_type",
    -1634                "formula",
    -1635                "annot_level",
    -1636                "lipid_molecular_species_id",
    -1637                "lipid_summed_name",
    -1638                "lipid_subclass",
    -1639                "lipid_class",
    -1640                "lipid_category",
    -1641                "entropy_similarity",
    -1642                "ref_mz_in_query_fract",
    -1643                "n_spectra_contributing",
    -1644            ]
    -1645        ]
    -1646
    -1647        # Set the index to mf_id
    -1648        lipid_summary = lipid_summary.set_index("mf_id")
    -1649
    -1650        return lipid_summary
    -1651
    -1652    def to_report(self, molecular_metadata=None):
    -1653        """Create a report of the mass features and their annotations.
    -1654
    -1655        Parameters
    -1656        ----------
    -1657        molecular_metadata : dict, optional
    -1658            The molecular metadata. Default is None.
    -1659
    -1660        Returns
    -1661        -------
    -1662        DataFrame
    -1663            The report of the mass features and their annotations.
    -1664
    -1665        Notes
    -1666        -----
    -1667        The report will contain the mass features and their annotations from MS1 and MS2 (if available).
    -1668        """
    -1669        # Get mass feature dataframe
    -1670        mf_report = self.mass_spectra.mass_features_to_df()
    -1671        mf_report = mf_report.reset_index(drop=False)
    -1672
    -1673        # Get and clean ms1 annotation dataframe
    -1674        ms1_annot_report = self.mass_spectra.mass_features_ms1_annot_to_df().copy()
    -1675        ms1_annot_report = self.clean_ms1_report(ms1_annot_report)
    -1676        ms1_annot_report = ms1_annot_report.reset_index(drop=False)
    -1677
    -1678        # Get, summarize, and clean ms2 annotation dataframe
    -1679        ms2_annot_report = self.mass_spectra.mass_features_ms2_annot_to_df(
    -1680            molecular_metadata=molecular_metadata
    -1681        )
    -1682        if ms2_annot_report is not None:
    -1683            ms2_annot_report = self.summarize_lipid_report(ms2_annot_report)
    -1684            ms2_annot_report = self.clean_ms2_report(ms2_annot_report)
    -1685            ms2_annot_report = ms2_annot_report.dropna(axis=1, how="all")
    -1686            ms2_annot_report = ms2_annot_report.reset_index(drop=False)
    -1687
    -1688        # Combine the reports
    -1689        if not ms1_annot_report.empty:
    -1690            # MS1 has been run and has molecular formula information
    -1691            mf_report = pd.merge(
    -1692                mf_report,
    -1693                ms1_annot_report,
    -1694                how="left",
    -1695                on=["mf_id", "isotopologue_type"],
    -1696            )
    -1697        if ms2_annot_report is not None:
    -1698            # pull out the records with ion_formula and drop the ion_formula column (these should be empty if MS1 molecular formula assignment is working correctly)
    -1699            mf_no_ion_formula = mf_report[mf_report["ion_formula"].isna()]
    -1700            mf_no_ion_formula = mf_no_ion_formula.drop(columns=["ion_formula"])
    -1701            mf_no_ion_formula = pd.merge(
    -1702                mf_no_ion_formula, ms2_annot_report, how="left", on=["mf_id"]
    -1703            )
    -1704
    -1705            # pull out the records with ion_formula
    -1706            mf_with_ion_formula = mf_report[~mf_report["ion_formula"].isna()]
    -1707            mf_with_ion_formula = pd.merge(
    -1708                mf_with_ion_formula,
    -1709                ms2_annot_report,
    -1710                how="left",
    -1711                on=["mf_id", "ion_formula"],
    +1235        Returns
    +1236        -------
    +1237        str
    +1238            The formula of the ion as a string (like 'C2 H4 O2'); or None if the neutral_formula is not a string.
    +1239        """
    +1240        # If neutral_formula is not a string, return None
    +1241        if not isinstance(neutral_formula, str):
    +1242            return None
    +1243
    +1244        # Check if there are spaces in the formula (these are outputs of the MolecularFormula class and do not need to be processed before being passed to the class)
    +1245        if re.search(r"\s", neutral_formula):
    +1246            neutral_formula = MolecularFormula(neutral_formula, ion_charge=0)
    +1247        else:
    +1248            form_pre = re.sub(r"([A-Z])", r" \1", neutral_formula)[1:]
    +1249            elements = [re.findall(r"[A-Z][a-z]*", x) for x in form_pre.split()]
    +1250            counts = [re.findall(r"\d+", x) for x in form_pre.split()]
    +1251            neutral_formula = MolecularFormula(
    +1252                dict(
    +1253                    zip(
    +1254                        [x[0] for x in elements],
    +1255                        [int(x[0]) if x else 1 for x in counts],
    +1256                    )
    +1257                ),
    +1258                ion_charge=0,
    +1259            )
    +1260        neutral_formula_dict = neutral_formula.to_dict().copy()
    +1261
    +1262        adduct_add_dict = ion_type_dict[ion_type][0]
    +1263        for key in adduct_add_dict:
    +1264            if key in neutral_formula_dict.keys():
    +1265                neutral_formula_dict[key] += adduct_add_dict[key]
    +1266            else:
    +1267                neutral_formula_dict[key] = adduct_add_dict[key]
    +1268
    +1269        adduct_subtract = ion_type_dict[ion_type][1]
    +1270        for key in adduct_subtract:
    +1271            neutral_formula_dict[key] -= adduct_subtract[key]
    +1272
    +1273        return MolecularFormula(neutral_formula_dict, ion_charge=0).string
    +1274
    +1275    @staticmethod
    +1276    def get_isotope_type(ion_formula):
    +1277        """From an ion formula, return the 13C isotope type of the ion.
    +1278
    +1279        Notes
    +1280        -----
    +1281        This is a static method.
    +1282        If the ion_formula is not a string, this method will return None.
    +1283        This is currently only functional for 13C isotopes.
    +1284
    +1285        Parameters
    +1286        ----------
    +1287        ion_formula : str
    +1288            The formula of the ion, expected to be a string like 'C2 H4 O2'.
    +1289
    +1290        Returns
    +1291        -------
    +1292        str
    +1293            The isotope type of the ion, e.g. '13C1', '13C2', etc; or None if the ion_formula does not contain a 13C isotope.
    +1294
    +1295        Raises
    +1296        ------
    +1297        ValueError
    +1298            If the ion_formula is not a string.
    +1299        """
    +1300        if not isinstance(ion_formula, str):
    +1301            return None
    +1302
    +1303        if re.search(r"\s", ion_formula):
    +1304            ion_formula = MolecularFormula(ion_formula, ion_charge=0)
    +1305        else:
    +1306            raise ValueError('ion_formula should be a string like "C2 H4 O2"')
    +1307        ion_formula_dict = ion_formula.to_dict().copy()
    +1308
    +1309        try:
    +1310            iso_class = "13C" + str(ion_formula_dict.pop("13C"))
    +1311        except KeyError:
    +1312            iso_class = None
    +1313
    +1314        return iso_class
    +1315
    +1316    def clean_ms1_report(self, ms1_summary_full):
    +1317        """Clean the MS1 report.
    +1318
    +1319        Parameters
    +1320        ----------
    +1321        ms1_summary_full : DataFrame
    +1322            The full MS1 summary DataFrame.
    +1323
    +1324        Returns
    +1325        -------
    +1326        DataFrame
    +1327            The cleaned MS1 summary DataFrame.
    +1328        """
    +1329        ms1_summary_full = ms1_summary_full.reset_index()
    +1330        cols_to_keep = [
    +1331            "mf_id",
    +1332            "Molecular Formula",
    +1333            "Ion Type",
    +1334            "Calculated m/z",
    +1335            "m/z Error (ppm)",
    +1336            "m/z Error Score",
    +1337            "Is Isotopologue",
    +1338            "Isotopologue Similarity",
    +1339            "Confidence Score",
    +1340        ]
    +1341        ms1_summary = ms1_summary_full[cols_to_keep].copy()
    +1342        ms1_summary["ion_formula"] = [
    +1343            self.get_ion_formula(f, a)
    +1344            for f, a in zip(ms1_summary["Molecular Formula"], ms1_summary["Ion Type"])
    +1345        ]
    +1346        ms1_summary["isotopologue_type"] = [
    +1347            self.get_isotope_type(f) for f in ms1_summary["ion_formula"].tolist()
    +1348        ]
    +1349
    +1350        # Reorder columns
    +1351        ms1_summary = ms1_summary[
    +1352            [
    +1353                "mf_id",
    +1354                "ion_formula",
    +1355                "isotopologue_type",
    +1356                "Calculated m/z",
    +1357                "m/z Error (ppm)",
    +1358                "m/z Error Score",
    +1359                "Isotopologue Similarity",
    +1360                "Confidence Score",
    +1361            ]
    +1362        ]
    +1363
    +1364        # Set the index to mf_id
    +1365        ms1_summary = ms1_summary.set_index("mf_id")
    +1366
    +1367        return ms1_summary
    +1368
    +1369    def summarize_lipid_report(self, ms2_annot):
    +1370        """Summarize the lipid report.
    +1371
    +1372        Parameters
    +1373        ----------
    +1374        ms2_annot : DataFrame
    +1375            The MS2 annotation DataFrame with all annotations.
    +1376
    +1377        Returns
    +1378        -------
    +1379        DataFrame
    +1380            The summarized lipid report.
    +1381        """
    +1382        # Drop unnecessary columns for easier viewing
    +1383        columns_to_drop = [
    +1384            "precursor_mz",
    +1385            "precursor_mz_error_ppm",
    +1386            "metabref_mol_id",
    +1387            "metabref_precursor_mz",
    +1388            "cas",
    +1389            "inchikey",
    +1390            "inchi",
    +1391            "chebi",
    +1392            "smiles",
    +1393            "kegg",
    +1394            "data_id",
    +1395            "iupac_name",
    +1396            "traditional_name",
    +1397            "common_name",
    +1398            "casno",
    +1399        ]
    +1400        ms2_annot = ms2_annot.drop(
    +1401            columns=[col for col in columns_to_drop if col in ms2_annot.columns]
    +1402        )
    +1403
    +1404        # If ion_types_excluded is not empty, remove those ion types
    +1405        ion_types_excluded = self.mass_spectra.parameters.mass_spectrum[
    +1406            "ms2"
    +1407        ].molecular_search.ion_types_excluded
    +1408        if len(ion_types_excluded) > 0:
    +1409            ms2_annot = ms2_annot[~ms2_annot["ref_ion_type"].isin(ion_types_excluded)]
    +1410
    +1411        # If mf_id is not present, check that the index name is mf_id and reset the index
    +1412        if "mf_id" not in ms2_annot.columns:
    +1413            if ms2_annot.index.name == "mf_id":
    +1414                ms2_annot = ms2_annot.reset_index()
    +1415            else:
    +1416                raise ValueError("mf_id is not present in the dataframe")
    +1417
    +1418        # Attempt to get consensus annotations to the MLF level
    +1419        mlf_results_all = []
    +1420        for mf_id in ms2_annot["mf_id"].unique():
    +1421            mlf_results_perid = []
    +1422            ms2_annot_mf = ms2_annot[ms2_annot["mf_id"] == mf_id].copy()
    +1423            ms2_annot_mf["n_spectra_contributing"] = len(ms2_annot_mf)
    +1424
    +1425            for query_scan in ms2_annot["query_spectrum_id"].unique():
    +1426                ms2_annot_sub = ms2_annot_mf[
    +1427                    ms2_annot_mf["query_spectrum_id"] == query_scan
    +1428                ].copy()
    +1429
    +1430                if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    +1431                    # If there is only one lipid_summed_name, let's try to get consensus molecular species annotation
    +1432                    if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    +1433                        ms2_annot_sub["entropy_max"] = (
    +1434                            ms2_annot_sub["entropy_similarity"]
    +1435                            == ms2_annot_sub["entropy_similarity"].max()
    +1436                        )
    +1437                        ms2_annot_sub["ref_match_fract_max"] = (
    +1438                            ms2_annot_sub["ref_mz_in_query_fract"]
    +1439                            == ms2_annot_sub["ref_mz_in_query_fract"].max()
    +1440                        )
    +1441                        ms2_annot_sub["frag_max"] = ms2_annot_sub[
    +1442                            "query_frag_types"
    +1443                        ].apply(lambda x: True if "MLF" in x else False)
    +1444
    +1445                        # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks)
    +1446                        ms2_annot_sub["consensus"] = ms2_annot_sub[
    +1447                            ["entropy_max", "ref_match_fract_max", "frag_max"]
    +1448                        ].all(axis=1)
    +1449
    +1450                        # If there is a consensus, take the row with the highest entropy_similarity
    +1451                        if ms2_annot_sub["consensus"].any():
    +1452                            ms2_annot_sub = ms2_annot_sub[
    +1453                                ms2_annot_sub["entropy_similarity"]
    +1454                                == ms2_annot_sub["entropy_similarity"].max()
    +1455                            ].head(1)
    +1456                            mlf_results_perid.append(ms2_annot_sub)
    +1457            if len(mlf_results_perid) == 0:
    +1458                mlf_results_perid = pd.DataFrame()
    +1459            else:
    +1460                mlf_results_perid = pd.concat(mlf_results_perid)
    +1461                if mlf_results_perid["name"].nunique() == 1:
    +1462                    mlf_results_perid = mlf_results_perid[
    +1463                        mlf_results_perid["entropy_similarity"]
    +1464                        == mlf_results_perid["entropy_similarity"].max()
    +1465                    ].head(1)
    +1466                else:
    +1467                    mlf_results_perid = pd.DataFrame()
    +1468                mlf_results_all.append(mlf_results_perid)
    +1469
    +1470        # These are the consensus annotations to the MLF level
    +1471        if len(mlf_results_all) > 0:
    +1472            mlf_results_all = pd.concat(mlf_results_all)
    +1473            mlf_results_all["annot_level"] = mlf_results_all["structure_level"]
    +1474        else:
    +1475            # Make an empty dataframe
    +1476            mlf_results_all = ms2_annot.head(0)
    +1477
    +1478        # For remaining mf_ids, try to get a consensus annotation to the species level
    +1479        species_results_all = []
    +1480        # Remove mf_ids that have consensus annotations to the MLF level
    +1481        ms2_annot_spec = ms2_annot[
    +1482            ~ms2_annot["mf_id"].isin(mlf_results_all["mf_id"].unique())
    +1483        ]
    +1484        for mf_id in ms2_annot_spec["mf_id"].unique():
    +1485            # Do all the hits have the same lipid_summed_name?
    +1486            ms2_annot_sub = ms2_annot_spec[ms2_annot_spec["mf_id"] == mf_id].copy()
    +1487            ms2_annot_sub["n_spectra_contributing"] = len(ms2_annot_sub)
    +1488
    +1489            if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    +1490                # Grab the highest entropy_similarity result
    +1491                ms2_annot_sub = ms2_annot_sub[
    +1492                    ms2_annot_sub["entropy_similarity"]
    +1493                    == ms2_annot_sub["entropy_similarity"].max()
    +1494                ].head(1)
    +1495                species_results_all.append(ms2_annot_sub)
    +1496
    +1497        # These are the consensus annotations to the species level
    +1498        if len(species_results_all) > 0:
    +1499            species_results_all = pd.concat(species_results_all)
    +1500            species_results_all["annot_level"] = "species"
    +1501        else:
    +1502            # Make an empty dataframe
    +1503            species_results_all = ms2_annot.head(0)
    +1504
    +1505        # Deal with the remaining mf_ids that do not have consensus annotations to the species level or MLF level
    +1506        # Remove mf_ids that have consensus annotations to the species level
    +1507        ms2_annot_remaining = ms2_annot_spec[
    +1508            ~ms2_annot_spec["mf_id"].isin(species_results_all["mf_id"].unique())
    +1509        ]
    +1510        no_consensus = []
    +1511        for mf_id in ms2_annot_remaining["mf_id"].unique():
    +1512            id_sub = []
    +1513            id_no_con = []
    +1514            ms2_annot_sub_mf = ms2_annot_remaining[
    +1515                ms2_annot_remaining["mf_id"] == mf_id
    +1516            ].copy()
    +1517            for query_scan in ms2_annot_sub_mf["query_spectrum_id"].unique():
    +1518                ms2_annot_sub = ms2_annot_sub_mf[
    +1519                    ms2_annot_sub_mf["query_spectrum_id"] == query_scan
    +1520                ].copy()
    +1521
    +1522                # New columns for ranking [HIGHER RANK = BETTER]
    +1523                ms2_annot_sub["entropy_max"] = (
    +1524                    ms2_annot_sub["entropy_similarity"]
    +1525                    == ms2_annot_sub["entropy_similarity"].max()
    +1526                )
    +1527                ms2_annot_sub["ref_match_fract_max"] = (
    +1528                    ms2_annot_sub["ref_mz_in_query_fract"]
    +1529                    == ms2_annot_sub["ref_mz_in_query_fract"].max()
    +1530                )
    +1531                ms2_annot_sub["frag_max"] = ms2_annot_sub["query_frag_types"].apply(
    +1532                    lambda x: True if "MLF" in x else False
    +1533                )
    +1534
    +1535                # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks)
    +1536                ms2_annot_sub["consensus"] = ms2_annot_sub[
    +1537                    ["entropy_max", "ref_match_fract_max", "frag_max"]
    +1538                ].all(axis=1)
    +1539                ms2_annot_sub_con = ms2_annot_sub[ms2_annot_sub["consensus"]]
    +1540                id_sub.append(ms2_annot_sub_con)
    +1541                id_no_con.append(ms2_annot_sub)
    +1542            id_sub = pd.concat(id_sub)
    +1543            id_no_con = pd.concat(id_no_con)
    +1544
    +1545            # Scenario 1: Multiple scans are being resolved to different MLFs [could be coelutions and should both be kept and annotated to MS level]
    +1546            if (
    +1547                id_sub["query_frag_types"]
    +1548                .apply(lambda x: True if "MLF" in x else False)
    +1549                .all()
    +1550                and len(id_sub) > 0
    +1551            ):
    +1552                idx = id_sub.groupby("name")["entropy_similarity"].idxmax()
    +1553                id_sub = id_sub.loc[idx]
    +1554                # Reorder so highest entropy_similarity is first
    +1555                id_sub = id_sub.sort_values("entropy_similarity", ascending=False)
    +1556                id_sub["annot_level"] = id_sub["structure_level"]
    +1557                no_consensus.append(id_sub)
    +1558
    +1559            # Scenario 2: Multiple scans are being resolved to different species, keep both and annotate to appropriate level
    +1560            elif len(id_sub) == 0:
    +1561                for lipid_summed_name in id_no_con["lipid_summed_name"].unique():
    +1562                    summed_sub = id_no_con[
    +1563                        id_no_con["lipid_summed_name"] == lipid_summed_name
    +1564                    ]
    +1565                    # Any consensus to MLF?
    +1566                    if summed_sub["consensus"].any():
    +1567                        summed_sub = summed_sub[summed_sub["consensus"]]
    +1568                        summed_sub["annot_level"] = summed_sub["structure_level"]
    +1569                        no_consensus.append(summed_sub)
    +1570                    else:
    +1571                        # Grab the highest entropy_similarity, if there are multiple, grab the first one
    +1572                        summed_sub = summed_sub[
    +1573                            summed_sub["entropy_similarity"]
    +1574                            == summed_sub["entropy_similarity"].max()
    +1575                        ].head(1)
    +1576                        # get first row
    +1577                        summed_sub["annot_level"] = "species"
    +1578                        summed_sub["name"] = ""
    +1579                        no_consensus.append(summed_sub)
    +1580            else:
    +1581                raise ValueError("Unexpected scenario for summarizing mf_id: ", mf_id)
    +1582
    +1583        if len(no_consensus) > 0:
    +1584            no_consensus = pd.concat(no_consensus)
    +1585        else:
    +1586            no_consensus = ms2_annot.head(0)
    +1587
    +1588        # Combine all the consensus annotations and reformat the dataframe for output
    +1589        species_results_all = species_results_all.drop(columns=["name"])
    +1590        species_results_all["lipid_molecular_species_id"] = ""
    +1591        mlf_results_all["lipid_molecular_species_id"] = mlf_results_all["name"]
    +1592        no_consensus["lipid_molecular_species_id"] = no_consensus["name"]
    +1593        consensus_annotations = pd.concat(
    +1594            [mlf_results_all, species_results_all, no_consensus]
    +1595        )
    +1596        consensus_annotations = consensus_annotations.sort_values(
    +1597            "mf_id", ascending=True
    +1598        )
    +1599        cols_to_keep = [
    +1600            "mf_id",
    +1601            "ref_ion_type",
    +1602            "entropy_similarity",
    +1603            "ref_mz_in_query_fract",
    +1604            "lipid_molecular_species_id",
    +1605            "lipid_summed_name",
    +1606            "lipid_subclass",
    +1607            "lipid_class",
    +1608            "lipid_category",
    +1609            "formula",
    +1610            "annot_level",
    +1611            "n_spectra_contributing",
    +1612        ]
    +1613        consensus_annotations = consensus_annotations[cols_to_keep]
    +1614        consensus_annotations = consensus_annotations.set_index("mf_id")
    +1615
    +1616        return consensus_annotations
    +1617
    +1618    def clean_ms2_report(self, lipid_summary):
    +1619        """Clean the MS2 report.
    +1620
    +1621        Parameters
    +1622        ----------
    +1623        lipid_summary : DataFrame
    +1624            The full lipid summary DataFrame.
    +1625
    +1626        Returns
    +1627        -------
    +1628        DataFrame
    +1629            The cleaned lipid summary DataFrame.
    +1630        """
    +1631        lipid_summary = lipid_summary.reset_index()
    +1632        lipid_summary["ion_formula"] = [
    +1633            self.get_ion_formula(f, a)
    +1634            for f, a in zip(lipid_summary["formula"], lipid_summary["ref_ion_type"])
    +1635        ]
    +1636
    +1637        # Reorder columns
    +1638        lipid_summary = lipid_summary[
    +1639            [
    +1640                "mf_id",
    +1641                "ion_formula",
    +1642                "ref_ion_type",
    +1643                "formula",
    +1644                "annot_level",
    +1645                "lipid_molecular_species_id",
    +1646                "lipid_summed_name",
    +1647                "lipid_subclass",
    +1648                "lipid_class",
    +1649                "lipid_category",
    +1650                "entropy_similarity",
    +1651                "ref_mz_in_query_fract",
    +1652                "n_spectra_contributing",
    +1653            ]
    +1654        ]
    +1655
    +1656        # Set the index to mf_id
    +1657        lipid_summary = lipid_summary.set_index("mf_id")
    +1658
    +1659        return lipid_summary
    +1660
    +1661    def to_report(self, molecular_metadata=None):
    +1662        """Create a report of the mass features and their annotations.
    +1663
    +1664        Parameters
    +1665        ----------
    +1666        molecular_metadata : dict, optional
    +1667            The molecular metadata. Default is None.
    +1668
    +1669        Returns
    +1670        -------
    +1671        DataFrame
    +1672            The report of the mass features and their annotations.
    +1673
    +1674        Notes
    +1675        -----
    +1676        The report will contain the mass features and their annotations from MS1 and MS2 (if available).
    +1677        """
    +1678        # Get mass feature dataframe
    +1679        mf_report = self.mass_spectra.mass_features_to_df()
    +1680        mf_report = mf_report.reset_index(drop=False)
    +1681
    +1682        # Get and clean ms1 annotation dataframe
    +1683        ms1_annot_report = self.mass_spectra.mass_features_ms1_annot_to_df().copy()
    +1684        ms1_annot_report = self.clean_ms1_report(ms1_annot_report)
    +1685        ms1_annot_report = ms1_annot_report.reset_index(drop=False)
    +1686
    +1687        # Get, summarize, and clean ms2 annotation dataframe
    +1688        ms2_annot_report = self.mass_spectra.mass_features_ms2_annot_to_df(
    +1689            molecular_metadata=molecular_metadata
    +1690        )
    +1691        if ms2_annot_report is not None:
    +1692            ms2_annot_report = self.summarize_lipid_report(ms2_annot_report)
    +1693            ms2_annot_report = self.clean_ms2_report(ms2_annot_report)
    +1694            ms2_annot_report = ms2_annot_report.dropna(axis=1, how="all")
    +1695            ms2_annot_report = ms2_annot_report.reset_index(drop=False)
    +1696
    +1697        # Combine the reports
    +1698        if not ms1_annot_report.empty:
    +1699            # MS1 has been run and has molecular formula information
    +1700            mf_report = pd.merge(
    +1701                mf_report,
    +1702                ms1_annot_report,
    +1703                how="left",
    +1704                on=["mf_id", "isotopologue_type"],
    +1705            )
    +1706        if ms2_annot_report is not None:
    +1707            # pull out the records with ion_formula and drop the ion_formula column (these should be empty if MS1 molecular formula assignment is working correctly)
    +1708            mf_no_ion_formula = mf_report[mf_report["ion_formula"].isna()]
    +1709            mf_no_ion_formula = mf_no_ion_formula.drop(columns=["ion_formula"])
    +1710            mf_no_ion_formula = pd.merge(
    +1711                mf_no_ion_formula, ms2_annot_report, how="left", on=["mf_id"]
     1712            )
     1713
    -1714            # put back together
    -1715            mf_report = pd.concat([mf_no_ion_formula, mf_with_ion_formula])
    -1716
    -1717        # Rename colums
    -1718        rename_dict = {
    -1719            "mf_id": "Mass Feature ID",
    -1720            "scan_time": "Retention Time (min)",
    -1721            "mz": "m/z",
    -1722            "apex_scan": "Apex Scan Number",
    -1723            "intensity": "Intensity",
    -1724            "persistence": "Persistence",
    -1725            "area": "Area",
    -1726            "half_height_width": "Half Height Width (min)",
    -1727            "tailing_factor": "Tailing Factor",
    -1728            "dispersity_index": "Dispersity Index",
    -1729            "ms2_spectrum": "MS2 Spectrum",
    -1730            "monoisotopic_mf_id": "Monoisotopic Mass Feature ID",
    -1731            "isotopologue_type": "Isotopologue Type",
    -1732            "mass_spectrum_deconvoluted_parent": "Is Largest Ion after Deconvolution",
    -1733            "associated_mass_features": "Associated Mass Features after Deconvolution",
    -1734            "ion_formula": "Ion Formula",
    -1735            "formula": "Molecular Formula",
    -1736            "ref_ion_type": "Ion Type",
    -1737            "annot_level": "Lipid Annotation Level",
    -1738            "lipid_molecular_species_id": "Lipid Molecular Species",
    -1739            "lipid_summed_name": "Lipid Species",
    -1740            "lipid_subclass": "Lipid Subclass",
    -1741            "lipid_class": "Lipid Class",
    -1742            "lipid_category": "Lipid Category",
    -1743            "entropy_similarity": "Entropy Similarity",
    -1744            "ref_mz_in_query_fract": "Library mzs in Query (fraction)",
    -1745            "n_spectra_contributing": "Spectra with Annotation (n)",
    -1746        }
    -1747        mf_report = mf_report.rename(columns=rename_dict)
    -1748        mf_report["Sample Name"] = self.mass_spectra.sample_name
    -1749        mf_report["Polarity"] = self.mass_spectra.polarity
    -1750        mf_report = mf_report[
    -1751            ["Mass Feature ID", "Sample Name", "Polarity"]
    -1752            + [
    -1753                col
    -1754                for col in mf_report.columns
    -1755                if col not in ["Mass Feature ID", "Sample Name", "Polarity"]
    -1756            ]
    -1757        ]
    -1758
    -1759        # Reorder rows by "Mass Feature ID"
    -1760        mf_report = mf_report.sort_values("Mass Feature ID")
    -1761
    -1762        # Reset index
    -1763        mf_report = mf_report.reset_index(drop=True)
    -1764
    -1765        return mf_report
    -1766
    -1767    def report_to_csv(self, molecular_metadata=None):
    -1768        """Create a report of the mass features and their annotations and save it as a CSV file.
    -1769
    -1770        Parameters
    -1771        ----------
    -1772        molecular_metadata : dict, optional
    -1773            The molecular metadata. Default is None.
    -1774        """
    -1775        report = self.to_report(molecular_metadata=molecular_metadata)
    -1776        out_file = self.output_file.with_suffix(".csv")
    -1777        report.to_csv(out_file, index=False)
    +1714            # pull out the records with ion_formula
    +1715            mf_with_ion_formula = mf_report[~mf_report["ion_formula"].isna()]
    +1716            mf_with_ion_formula = pd.merge(
    +1717                mf_with_ion_formula,
    +1718                ms2_annot_report,
    +1719                how="left",
    +1720                on=["mf_id", "ion_formula"],
    +1721            )
    +1722
    +1723            # put back together
    +1724            mf_report = pd.concat([mf_no_ion_formula, mf_with_ion_formula])
    +1725
    +1726        # Rename colums
    +1727        rename_dict = {
    +1728            "mf_id": "Mass Feature ID",
    +1729            "scan_time": "Retention Time (min)",
    +1730            "mz": "m/z",
    +1731            "apex_scan": "Apex Scan Number",
    +1732            "intensity": "Intensity",
    +1733            "persistence": "Persistence",
    +1734            "area": "Area",
    +1735            "half_height_width": "Half Height Width (min)",
    +1736            "tailing_factor": "Tailing Factor",
    +1737            "dispersity_index": "Dispersity Index",
    +1738            "ms2_spectrum": "MS2 Spectrum",
    +1739            "monoisotopic_mf_id": "Monoisotopic Mass Feature ID",
    +1740            "isotopologue_type": "Isotopologue Type",
    +1741            "mass_spectrum_deconvoluted_parent": "Is Largest Ion after Deconvolution",
    +1742            "associated_mass_features": "Associated Mass Features after Deconvolution",
    +1743            "ion_formula": "Ion Formula",
    +1744            "formula": "Molecular Formula",
    +1745            "ref_ion_type": "Ion Type",
    +1746            "annot_level": "Lipid Annotation Level",
    +1747            "lipid_molecular_species_id": "Lipid Molecular Species",
    +1748            "lipid_summed_name": "Lipid Species",
    +1749            "lipid_subclass": "Lipid Subclass",
    +1750            "lipid_class": "Lipid Class",
    +1751            "lipid_category": "Lipid Category",
    +1752            "entropy_similarity": "Entropy Similarity",
    +1753            "ref_mz_in_query_fract": "Library mzs in Query (fraction)",
    +1754            "n_spectra_contributing": "Spectra with Annotation (n)",
    +1755        }
    +1756        mf_report = mf_report.rename(columns=rename_dict)
    +1757        mf_report["Sample Name"] = self.mass_spectra.sample_name
    +1758        mf_report["Polarity"] = self.mass_spectra.polarity
    +1759        mf_report = mf_report[
    +1760            ["Mass Feature ID", "Sample Name", "Polarity"]
    +1761            + [
    +1762                col
    +1763                for col in mf_report.columns
    +1764                if col not in ["Mass Feature ID", "Sample Name", "Polarity"]
    +1765            ]
    +1766        ]
    +1767
    +1768        # Reorder rows by "Mass Feature ID"
    +1769        mf_report = mf_report.sort_values("Mass Feature ID")
    +1770
    +1771        # Reset index
    +1772        mf_report = mf_report.reset_index(drop=True)
    +1773
    +1774        return mf_report
    +1775
    +1776    def report_to_csv(self, molecular_metadata=None):
    +1777        """Create a report of the mass features and their annotations and save it as a CSV file.
    +1778
    +1779        Parameters
    +1780        ----------
    +1781        molecular_metadata : dict, optional
    +1782            The molecular metadata. Default is None.
    +1783        """
    +1784        report = self.to_report(molecular_metadata=molecular_metadata)
    +1785        out_file = self.output_file.with_suffix(".csv")
    +1786        report.to_csv(out_file, index=False)
     
    @@ -5643,9 +5673,9 @@
    Parameters
    -
    1202    def __init__(self, out_file_path, mass_spectra):
    -1203        super().__init__(out_file_path, mass_spectra)
    -1204        self.ion_type_dict = ion_type_dict
    +            
    1211    def __init__(self, out_file_path, mass_spectra):
    +1212        super().__init__(out_file_path, mass_spectra)
    +1213        self.ion_type_dict = ion_type_dict
     
    @@ -5695,65 +5725,65 @@
    Parameters
    -
    1206    @staticmethod
    -1207    def get_ion_formula(neutral_formula, ion_type):
    -1208        """From a neutral formula and an ion type, return the formula of the ion.
    -1209
    -1210        Notes
    -1211        -----
    -1212        This is a static method.
    -1213        If the neutral_formula is not a string, this method will return None.
    -1214
    -1215        Parameters
    -1216        ----------
    -1217        neutral_formula : str
    -1218            The neutral formula, this should be a string form from the MolecularFormula class
    -1219            (e.g. 'C2 H4 O2', isotopes OK), or simple string (e.g. 'C2H4O2', no isotope handling in this case).
    -1220            In the case of a simple string, the atoms are parsed based on the presence of capital letters,
    -1221            e.g. MgCl2 is parsed as 'Mg Cl2.
    -1222        ion_type : str
    -1223            The ion type, e.g. 'protonated', '[M+H]+', '[M+Na]+', etc.
    -1224            See the self.ion_type_dict for the available ion types.
    -1225
    -1226        Returns
    -1227        -------
    -1228        str
    -1229            The formula of the ion as a string (like 'C2 H4 O2'); or None if the neutral_formula is not a string.
    -1230        """
    -1231        # If neutral_formula is not a string, return None
    -1232        if not isinstance(neutral_formula, str):
    -1233            return None
    +            
    1215    @staticmethod
    +1216    def get_ion_formula(neutral_formula, ion_type):
    +1217        """From a neutral formula and an ion type, return the formula of the ion.
    +1218
    +1219        Notes
    +1220        -----
    +1221        This is a static method.
    +1222        If the neutral_formula is not a string, this method will return None.
    +1223
    +1224        Parameters
    +1225        ----------
    +1226        neutral_formula : str
    +1227            The neutral formula, this should be a string form from the MolecularFormula class
    +1228            (e.g. 'C2 H4 O2', isotopes OK), or simple string (e.g. 'C2H4O2', no isotope handling in this case).
    +1229            In the case of a simple string, the atoms are parsed based on the presence of capital letters,
    +1230            e.g. MgCl2 is parsed as 'Mg Cl2.
    +1231        ion_type : str
    +1232            The ion type, e.g. 'protonated', '[M+H]+', '[M+Na]+', etc.
    +1233            See the self.ion_type_dict for the available ion types.
     1234
    -1235        # Check if there are spaces in the formula (these are outputs of the MolecularFormula class and do not need to be processed before being passed to the class)
    -1236        if re.search(r"\s", neutral_formula):
    -1237            neutral_formula = MolecularFormula(neutral_formula, ion_charge=0)
    -1238        else:
    -1239            form_pre = re.sub(r"([A-Z])", r" \1", neutral_formula)[1:]
    -1240            elements = [re.findall(r"[A-Z][a-z]*", x) for x in form_pre.split()]
    -1241            counts = [re.findall(r"\d+", x) for x in form_pre.split()]
    -1242            neutral_formula = MolecularFormula(
    -1243                dict(
    -1244                    zip(
    -1245                        [x[0] for x in elements],
    -1246                        [int(x[0]) if x else 1 for x in counts],
    -1247                    )
    -1248                ),
    -1249                ion_charge=0,
    -1250            )
    -1251        neutral_formula_dict = neutral_formula.to_dict().copy()
    -1252
    -1253        adduct_add_dict = ion_type_dict[ion_type][0]
    -1254        for key in adduct_add_dict:
    -1255            if key in neutral_formula_dict.keys():
    -1256                neutral_formula_dict[key] += adduct_add_dict[key]
    -1257            else:
    -1258                neutral_formula_dict[key] = adduct_add_dict[key]
    -1259
    -1260        adduct_subtract = ion_type_dict[ion_type][1]
    -1261        for key in adduct_subtract:
    -1262            neutral_formula_dict[key] -= adduct_subtract[key]
    -1263
    -1264        return MolecularFormula(neutral_formula_dict, ion_charge=0).string
    +1235        Returns
    +1236        -------
    +1237        str
    +1238            The formula of the ion as a string (like 'C2 H4 O2'); or None if the neutral_formula is not a string.
    +1239        """
    +1240        # If neutral_formula is not a string, return None
    +1241        if not isinstance(neutral_formula, str):
    +1242            return None
    +1243
    +1244        # Check if there are spaces in the formula (these are outputs of the MolecularFormula class and do not need to be processed before being passed to the class)
    +1245        if re.search(r"\s", neutral_formula):
    +1246            neutral_formula = MolecularFormula(neutral_formula, ion_charge=0)
    +1247        else:
    +1248            form_pre = re.sub(r"([A-Z])", r" \1", neutral_formula)[1:]
    +1249            elements = [re.findall(r"[A-Z][a-z]*", x) for x in form_pre.split()]
    +1250            counts = [re.findall(r"\d+", x) for x in form_pre.split()]
    +1251            neutral_formula = MolecularFormula(
    +1252                dict(
    +1253                    zip(
    +1254                        [x[0] for x in elements],
    +1255                        [int(x[0]) if x else 1 for x in counts],
    +1256                    )
    +1257                ),
    +1258                ion_charge=0,
    +1259            )
    +1260        neutral_formula_dict = neutral_formula.to_dict().copy()
    +1261
    +1262        adduct_add_dict = ion_type_dict[ion_type][0]
    +1263        for key in adduct_add_dict:
    +1264            if key in neutral_formula_dict.keys():
    +1265                neutral_formula_dict[key] += adduct_add_dict[key]
    +1266            else:
    +1267                neutral_formula_dict[key] = adduct_add_dict[key]
    +1268
    +1269        adduct_subtract = ion_type_dict[ion_type][1]
    +1270        for key in adduct_subtract:
    +1271            neutral_formula_dict[key] -= adduct_subtract[key]
    +1272
    +1273        return MolecularFormula(neutral_formula_dict, ion_charge=0).string
     
    @@ -5798,46 +5828,46 @@
    Returns
    -
    1266    @staticmethod
    -1267    def get_isotope_type(ion_formula):
    -1268        """From an ion formula, return the 13C isotope type of the ion.
    -1269
    -1270        Notes
    -1271        -----
    -1272        This is a static method.
    -1273        If the ion_formula is not a string, this method will return None.
    -1274        This is currently only functional for 13C isotopes.
    -1275
    -1276        Parameters
    -1277        ----------
    -1278        ion_formula : str
    -1279            The formula of the ion, expected to be a string like 'C2 H4 O2'.
    -1280
    -1281        Returns
    -1282        -------
    -1283        str
    -1284            The isotope type of the ion, e.g. '13C1', '13C2', etc; or None if the ion_formula does not contain a 13C isotope.
    -1285
    -1286        Raises
    -1287        ------
    -1288        ValueError
    -1289            If the ion_formula is not a string.
    -1290        """
    -1291        if not isinstance(ion_formula, str):
    -1292            return None
    -1293
    -1294        if re.search(r"\s", ion_formula):
    -1295            ion_formula = MolecularFormula(ion_formula, ion_charge=0)
    -1296        else:
    -1297            raise ValueError('ion_formula should be a string like "C2 H4 O2"')
    -1298        ion_formula_dict = ion_formula.to_dict().copy()
    -1299
    -1300        try:
    -1301            iso_class = "13C" + str(ion_formula_dict.pop("13C"))
    -1302        except KeyError:
    -1303            iso_class = None
    -1304
    -1305        return iso_class
    +            
    1275    @staticmethod
    +1276    def get_isotope_type(ion_formula):
    +1277        """From an ion formula, return the 13C isotope type of the ion.
    +1278
    +1279        Notes
    +1280        -----
    +1281        This is a static method.
    +1282        If the ion_formula is not a string, this method will return None.
    +1283        This is currently only functional for 13C isotopes.
    +1284
    +1285        Parameters
    +1286        ----------
    +1287        ion_formula : str
    +1288            The formula of the ion, expected to be a string like 'C2 H4 O2'.
    +1289
    +1290        Returns
    +1291        -------
    +1292        str
    +1293            The isotope type of the ion, e.g. '13C1', '13C2', etc; or None if the ion_formula does not contain a 13C isotope.
    +1294
    +1295        Raises
    +1296        ------
    +1297        ValueError
    +1298            If the ion_formula is not a string.
    +1299        """
    +1300        if not isinstance(ion_formula, str):
    +1301            return None
    +1302
    +1303        if re.search(r"\s", ion_formula):
    +1304            ion_formula = MolecularFormula(ion_formula, ion_charge=0)
    +1305        else:
    +1306            raise ValueError('ion_formula should be a string like "C2 H4 O2"')
    +1307        ion_formula_dict = ion_formula.to_dict().copy()
    +1308
    +1309        try:
    +1310            iso_class = "13C" + str(ion_formula_dict.pop("13C"))
    +1311        except KeyError:
    +1312            iso_class = None
    +1313
    +1314        return iso_class
     
    @@ -5882,58 +5912,58 @@
    Raises
    -
    1307    def clean_ms1_report(self, ms1_summary_full):
    -1308        """Clean the MS1 report.
    -1309
    -1310        Parameters
    -1311        ----------
    -1312        ms1_summary_full : DataFrame
    -1313            The full MS1 summary DataFrame.
    -1314
    -1315        Returns
    -1316        -------
    -1317        DataFrame
    -1318            The cleaned MS1 summary DataFrame.
    -1319        """
    -1320        ms1_summary_full = ms1_summary_full.reset_index()
    -1321        cols_to_keep = [
    -1322            "mf_id",
    -1323            "Molecular Formula",
    -1324            "Ion Type",
    -1325            "Calculated m/z",
    -1326            "m/z Error (ppm)",
    -1327            "m/z Error Score",
    -1328            "Is Isotopologue",
    -1329            "Isotopologue Similarity",
    -1330            "Confidence Score",
    -1331        ]
    -1332        ms1_summary = ms1_summary_full[cols_to_keep].copy()
    -1333        ms1_summary["ion_formula"] = [
    -1334            self.get_ion_formula(f, a)
    -1335            for f, a in zip(ms1_summary["Molecular Formula"], ms1_summary["Ion Type"])
    -1336        ]
    -1337        ms1_summary["isotopologue_type"] = [
    -1338            self.get_isotope_type(f) for f in ms1_summary["ion_formula"].tolist()
    -1339        ]
    -1340
    -1341        # Reorder columns
    -1342        ms1_summary = ms1_summary[
    -1343            [
    -1344                "mf_id",
    -1345                "ion_formula",
    -1346                "isotopologue_type",
    -1347                "Calculated m/z",
    -1348                "m/z Error (ppm)",
    -1349                "m/z Error Score",
    -1350                "Isotopologue Similarity",
    -1351                "Confidence Score",
    -1352            ]
    -1353        ]
    -1354
    -1355        # Set the index to mf_id
    -1356        ms1_summary = ms1_summary.set_index("mf_id")
    -1357
    -1358        return ms1_summary
    +            
    1316    def clean_ms1_report(self, ms1_summary_full):
    +1317        """Clean the MS1 report.
    +1318
    +1319        Parameters
    +1320        ----------
    +1321        ms1_summary_full : DataFrame
    +1322            The full MS1 summary DataFrame.
    +1323
    +1324        Returns
    +1325        -------
    +1326        DataFrame
    +1327            The cleaned MS1 summary DataFrame.
    +1328        """
    +1329        ms1_summary_full = ms1_summary_full.reset_index()
    +1330        cols_to_keep = [
    +1331            "mf_id",
    +1332            "Molecular Formula",
    +1333            "Ion Type",
    +1334            "Calculated m/z",
    +1335            "m/z Error (ppm)",
    +1336            "m/z Error Score",
    +1337            "Is Isotopologue",
    +1338            "Isotopologue Similarity",
    +1339            "Confidence Score",
    +1340        ]
    +1341        ms1_summary = ms1_summary_full[cols_to_keep].copy()
    +1342        ms1_summary["ion_formula"] = [
    +1343            self.get_ion_formula(f, a)
    +1344            for f, a in zip(ms1_summary["Molecular Formula"], ms1_summary["Ion Type"])
    +1345        ]
    +1346        ms1_summary["isotopologue_type"] = [
    +1347            self.get_isotope_type(f) for f in ms1_summary["ion_formula"].tolist()
    +1348        ]
    +1349
    +1350        # Reorder columns
    +1351        ms1_summary = ms1_summary[
    +1352            [
    +1353                "mf_id",
    +1354                "ion_formula",
    +1355                "isotopologue_type",
    +1356                "Calculated m/z",
    +1357                "m/z Error (ppm)",
    +1358                "m/z Error Score",
    +1359                "Isotopologue Similarity",
    +1360                "Confidence Score",
    +1361            ]
    +1362        ]
    +1363
    +1364        # Set the index to mf_id
    +1365        ms1_summary = ms1_summary.set_index("mf_id")
    +1366
    +1367        return ms1_summary
     
    @@ -5966,254 +5996,254 @@
    Returns
    -
    1360    def summarize_lipid_report(self, ms2_annot):
    -1361        """Summarize the lipid report.
    -1362
    -1363        Parameters
    -1364        ----------
    -1365        ms2_annot : DataFrame
    -1366            The MS2 annotation DataFrame with all annotations.
    -1367
    -1368        Returns
    -1369        -------
    -1370        DataFrame
    -1371            The summarized lipid report.
    -1372        """
    -1373        # Drop unnecessary columns for easier viewing
    -1374        columns_to_drop = [
    -1375            "precursor_mz",
    -1376            "precursor_mz_error_ppm",
    -1377            "metabref_mol_id",
    -1378            "metabref_precursor_mz",
    -1379            "cas",
    -1380            "inchikey",
    -1381            "inchi",
    -1382            "chebi",
    -1383            "smiles",
    -1384            "kegg",
    -1385            "data_id",
    -1386            "iupac_name",
    -1387            "traditional_name",
    -1388            "common_name",
    -1389            "casno",
    -1390        ]
    -1391        ms2_annot = ms2_annot.drop(
    -1392            columns=[col for col in columns_to_drop if col in ms2_annot.columns]
    -1393        )
    -1394
    -1395        # If ion_types_excluded is not empty, remove those ion types
    -1396        ion_types_excluded = (
    -1397            self.mass_spectra.parameters.mass_spectrum['ms2'].molecular_search.ion_types_excluded
    -1398        )
    -1399        if len(ion_types_excluded) > 0:
    -1400            ms2_annot = ms2_annot[~ms2_annot["ref_ion_type"].isin(ion_types_excluded)]
    -1401
    -1402        # If mf_id is not present, check that the index name is mf_id and reset the index
    -1403        if "mf_id" not in ms2_annot.columns:
    -1404            if ms2_annot.index.name == "mf_id":
    -1405                ms2_annot = ms2_annot.reset_index()
    -1406            else:
    -1407                raise ValueError("mf_id is not present in the dataframe")
    -1408
    -1409        # Attempt to get consensus annotations to the MLF level
    -1410        mlf_results_all = []
    -1411        for mf_id in ms2_annot["mf_id"].unique():
    -1412            mlf_results_perid = []
    -1413            ms2_annot_mf = ms2_annot[ms2_annot["mf_id"] == mf_id].copy()
    -1414            ms2_annot_mf["n_spectra_contributing"] = len(ms2_annot_mf)
    -1415
    -1416            for query_scan in ms2_annot["query_spectrum_id"].unique():
    -1417                ms2_annot_sub = ms2_annot_mf[
    -1418                    ms2_annot_mf["query_spectrum_id"] == query_scan
    -1419                ].copy()
    -1420
    -1421                if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    -1422                    # If there is only one lipid_summed_name, let's try to get consensus molecular species annotation
    -1423                    if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    -1424                        ms2_annot_sub["entropy_max"] = (
    -1425                            ms2_annot_sub["entropy_similarity"]
    -1426                            == ms2_annot_sub["entropy_similarity"].max()
    -1427                        )
    -1428                        ms2_annot_sub["ref_match_fract_max"] = (
    -1429                            ms2_annot_sub["ref_mz_in_query_fract"]
    -1430                            == ms2_annot_sub["ref_mz_in_query_fract"].max()
    -1431                        )
    -1432                        ms2_annot_sub["frag_max"] = ms2_annot_sub["query_frag_types"].apply(
    -1433                            lambda x: True if "MLF" in x else False
    -1434                        )
    -1435
    -1436                        # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks)
    -1437                        ms2_annot_sub["consensus"] = ms2_annot_sub[
    -1438                            ["entropy_max", "ref_match_fract_max", "frag_max"]
    -1439                        ].all(axis=1)
    -1440
    -1441                        # If there is a consensus, take the row with the highest entropy_similarity
    -1442                        if ms2_annot_sub["consensus"].any():
    -1443                            ms2_annot_sub = ms2_annot_sub[
    -1444                                ms2_annot_sub["entropy_similarity"]
    -1445                                == ms2_annot_sub["entropy_similarity"].max()
    -1446                            ].head(1)
    -1447                            mlf_results_perid.append(ms2_annot_sub)
    -1448            if len(mlf_results_perid) == 0:
    -1449                mlf_results_perid = pd.DataFrame()
    -1450            else:
    -1451                mlf_results_perid = pd.concat(mlf_results_perid)
    -1452                if mlf_results_perid["name"].nunique() == 1:
    -1453                    mlf_results_perid = mlf_results_perid[
    -1454                        mlf_results_perid["entropy_similarity"]
    -1455                        == mlf_results_perid["entropy_similarity"].max()
    -1456                    ].head(1)
    -1457                else:
    -1458                    mlf_results_perid = pd.DataFrame()
    -1459                mlf_results_all.append(mlf_results_perid)
    -1460
    -1461        # These are the consensus annotations to the MLF level
    -1462        if len(mlf_results_all) > 0:
    -1463            mlf_results_all = pd.concat(mlf_results_all)
    -1464            mlf_results_all["annot_level"] = mlf_results_all["structure_level"]
    -1465        else:
    -1466            # Make an empty dataframe
    -1467            mlf_results_all = ms2_annot.head(0)
    -1468
    -1469        # For remaining mf_ids, try to get a consensus annotation to the species level
    -1470        species_results_all = []
    -1471        # Remove mf_ids that have consensus annotations to the MLF level
    -1472        ms2_annot_spec = ms2_annot[
    -1473            ~ms2_annot["mf_id"].isin(mlf_results_all["mf_id"].unique())
    -1474        ]
    -1475        for mf_id in ms2_annot_spec["mf_id"].unique():
    -1476            # Do all the hits have the same lipid_summed_name?
    -1477            ms2_annot_sub = ms2_annot_spec[ms2_annot_spec["mf_id"] == mf_id].copy()
    -1478            ms2_annot_sub["n_spectra_contributing"] = len(ms2_annot_sub)
    -1479
    -1480            if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    -1481                # Grab the highest entropy_similarity result
    -1482                ms2_annot_sub = ms2_annot_sub[
    -1483                    ms2_annot_sub["entropy_similarity"]
    -1484                    == ms2_annot_sub["entropy_similarity"].max()
    -1485                ].head(1)
    -1486                species_results_all.append(ms2_annot_sub)
    -1487
    -1488        # These are the consensus annotations to the species level
    -1489        if len(species_results_all) > 0:
    -1490            species_results_all = pd.concat(species_results_all)
    -1491            species_results_all["annot_level"] = "species"
    -1492        else:
    -1493            # Make an empty dataframe
    -1494            species_results_all = ms2_annot.head(0)
    -1495
    -1496        # Deal with the remaining mf_ids that do not have consensus annotations to the species level or MLF level
    -1497        # Remove mf_ids that have consensus annotations to the species level
    -1498        ms2_annot_remaining = ms2_annot_spec[
    -1499            ~ms2_annot_spec["mf_id"].isin(species_results_all["mf_id"].unique())
    -1500        ]
    -1501        no_consensus = []
    -1502        for mf_id in ms2_annot_remaining["mf_id"].unique():
    -1503            id_sub = []
    -1504            id_no_con = []
    -1505            ms2_annot_sub_mf = ms2_annot_remaining[
    -1506                ms2_annot_remaining["mf_id"] == mf_id
    -1507            ].copy()
    -1508            for query_scan in ms2_annot_sub_mf["query_spectrum_id"].unique():
    -1509                ms2_annot_sub = ms2_annot_sub_mf[
    -1510                    ms2_annot_sub_mf["query_spectrum_id"] == query_scan
    -1511                ].copy()
    -1512
    -1513                # New columns for ranking [HIGHER RANK = BETTER]
    -1514                ms2_annot_sub["entropy_max"] = (
    -1515                    ms2_annot_sub["entropy_similarity"]
    -1516                    == ms2_annot_sub["entropy_similarity"].max()
    -1517                )
    -1518                ms2_annot_sub["ref_match_fract_max"] = (
    -1519                    ms2_annot_sub["ref_mz_in_query_fract"]
    -1520                    == ms2_annot_sub["ref_mz_in_query_fract"].max()
    -1521                )
    -1522                ms2_annot_sub["frag_max"] = ms2_annot_sub["query_frag_types"].apply(
    -1523                    lambda x: True if "MLF" in x else False
    -1524                )
    -1525
    -1526                # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks)
    -1527                ms2_annot_sub["consensus"] = ms2_annot_sub[
    -1528                    ["entropy_max", "ref_match_fract_max", "frag_max"]
    -1529                ].all(axis=1)
    -1530                ms2_annot_sub_con = ms2_annot_sub[ms2_annot_sub["consensus"]]
    -1531                id_sub.append(ms2_annot_sub_con)
    -1532                id_no_con.append(ms2_annot_sub)
    -1533            id_sub = pd.concat(id_sub)
    -1534            id_no_con = pd.concat(id_no_con)
    -1535
    -1536            # Scenario 1: Multiple scans are being resolved to different MLFs [could be coelutions and should both be kept and annotated to MS level]
    -1537            if (
    -1538                id_sub["query_frag_types"]
    -1539                .apply(lambda x: True if "MLF" in x else False)
    -1540                .all()
    -1541                and len(id_sub) > 0
    -1542            ):
    -1543                idx = id_sub.groupby("name")["entropy_similarity"].idxmax()
    -1544                id_sub = id_sub.loc[idx]
    -1545                # Reorder so highest entropy_similarity is first
    -1546                id_sub = id_sub.sort_values("entropy_similarity", ascending=False)
    -1547                id_sub["annot_level"] = id_sub["structure_level"]
    -1548                no_consensus.append(id_sub)
    -1549
    -1550            # Scenario 2: Multiple scans are being resolved to different species, keep both and annotate to appropriate level
    -1551            elif len(id_sub) == 0:
    -1552                for lipid_summed_name in id_no_con["lipid_summed_name"].unique():
    -1553                    summed_sub = id_no_con[
    -1554                        id_no_con["lipid_summed_name"] == lipid_summed_name
    -1555                    ]
    -1556                    # Any consensus to MLF?
    -1557                    if summed_sub["consensus"].any():
    -1558                        summed_sub = summed_sub[summed_sub["consensus"]]
    -1559                        summed_sub["annot_level"] = summed_sub["structure_level"]
    -1560                        no_consensus.append(summed_sub)
    -1561                    else:
    -1562                        # Grab the highest entropy_similarity, if there are multiple, grab the first one
    -1563                        summed_sub = summed_sub[
    -1564                            summed_sub["entropy_similarity"]
    -1565                            == summed_sub["entropy_similarity"].max()
    -1566                        ].head(1)
    -1567                        # get first row
    -1568                        summed_sub["annot_level"] = "species"
    -1569                        summed_sub["name"] = ""
    -1570                        no_consensus.append(summed_sub)
    -1571            else:
    -1572                raise ValueError("Unexpected scenario for summarizing mf_id: ", mf_id)
    -1573
    -1574        if len(no_consensus) > 0:
    -1575            no_consensus = pd.concat(no_consensus)
    -1576        else:
    -1577            no_consensus = ms2_annot.head(0)
    -1578
    -1579        # Combine all the consensus annotations and reformat the dataframe for output
    -1580        species_results_all = species_results_all.drop(columns=["name"])
    -1581        species_results_all["lipid_molecular_species_id"] = ""
    -1582        mlf_results_all["lipid_molecular_species_id"] = mlf_results_all["name"]
    -1583        no_consensus["lipid_molecular_species_id"] = no_consensus["name"]
    -1584        consensus_annotations = pd.concat(
    -1585            [mlf_results_all, species_results_all, no_consensus]
    -1586        )
    -1587        consensus_annotations = consensus_annotations.sort_values(
    -1588            "mf_id", ascending=True
    -1589        )
    -1590        cols_to_keep = [
    -1591            "mf_id",
    -1592            "ref_ion_type",
    -1593            "entropy_similarity",
    -1594            "ref_mz_in_query_fract",
    -1595            "lipid_molecular_species_id",
    -1596            "lipid_summed_name",
    -1597            "lipid_subclass",
    -1598            "lipid_class",
    -1599            "lipid_category",
    -1600            "formula",
    -1601            "annot_level",
    -1602            "n_spectra_contributing",
    -1603        ]
    -1604        consensus_annotations = consensus_annotations[cols_to_keep]
    -1605        consensus_annotations = consensus_annotations.set_index("mf_id")
    -1606
    -1607        return consensus_annotations
    +            
    1369    def summarize_lipid_report(self, ms2_annot):
    +1370        """Summarize the lipid report.
    +1371
    +1372        Parameters
    +1373        ----------
    +1374        ms2_annot : DataFrame
    +1375            The MS2 annotation DataFrame with all annotations.
    +1376
    +1377        Returns
    +1378        -------
    +1379        DataFrame
    +1380            The summarized lipid report.
    +1381        """
    +1382        # Drop unnecessary columns for easier viewing
    +1383        columns_to_drop = [
    +1384            "precursor_mz",
    +1385            "precursor_mz_error_ppm",
    +1386            "metabref_mol_id",
    +1387            "metabref_precursor_mz",
    +1388            "cas",
    +1389            "inchikey",
    +1390            "inchi",
    +1391            "chebi",
    +1392            "smiles",
    +1393            "kegg",
    +1394            "data_id",
    +1395            "iupac_name",
    +1396            "traditional_name",
    +1397            "common_name",
    +1398            "casno",
    +1399        ]
    +1400        ms2_annot = ms2_annot.drop(
    +1401            columns=[col for col in columns_to_drop if col in ms2_annot.columns]
    +1402        )
    +1403
    +1404        # If ion_types_excluded is not empty, remove those ion types
    +1405        ion_types_excluded = self.mass_spectra.parameters.mass_spectrum[
    +1406            "ms2"
    +1407        ].molecular_search.ion_types_excluded
    +1408        if len(ion_types_excluded) > 0:
    +1409            ms2_annot = ms2_annot[~ms2_annot["ref_ion_type"].isin(ion_types_excluded)]
    +1410
    +1411        # If mf_id is not present, check that the index name is mf_id and reset the index
    +1412        if "mf_id" not in ms2_annot.columns:
    +1413            if ms2_annot.index.name == "mf_id":
    +1414                ms2_annot = ms2_annot.reset_index()
    +1415            else:
    +1416                raise ValueError("mf_id is not present in the dataframe")
    +1417
    +1418        # Attempt to get consensus annotations to the MLF level
    +1419        mlf_results_all = []
    +1420        for mf_id in ms2_annot["mf_id"].unique():
    +1421            mlf_results_perid = []
    +1422            ms2_annot_mf = ms2_annot[ms2_annot["mf_id"] == mf_id].copy()
    +1423            ms2_annot_mf["n_spectra_contributing"] = len(ms2_annot_mf)
    +1424
    +1425            for query_scan in ms2_annot["query_spectrum_id"].unique():
    +1426                ms2_annot_sub = ms2_annot_mf[
    +1427                    ms2_annot_mf["query_spectrum_id"] == query_scan
    +1428                ].copy()
    +1429
    +1430                if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    +1431                    # If there is only one lipid_summed_name, let's try to get consensus molecular species annotation
    +1432                    if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    +1433                        ms2_annot_sub["entropy_max"] = (
    +1434                            ms2_annot_sub["entropy_similarity"]
    +1435                            == ms2_annot_sub["entropy_similarity"].max()
    +1436                        )
    +1437                        ms2_annot_sub["ref_match_fract_max"] = (
    +1438                            ms2_annot_sub["ref_mz_in_query_fract"]
    +1439                            == ms2_annot_sub["ref_mz_in_query_fract"].max()
    +1440                        )
    +1441                        ms2_annot_sub["frag_max"] = ms2_annot_sub[
    +1442                            "query_frag_types"
    +1443                        ].apply(lambda x: True if "MLF" in x else False)
    +1444
    +1445                        # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks)
    +1446                        ms2_annot_sub["consensus"] = ms2_annot_sub[
    +1447                            ["entropy_max", "ref_match_fract_max", "frag_max"]
    +1448                        ].all(axis=1)
    +1449
    +1450                        # If there is a consensus, take the row with the highest entropy_similarity
    +1451                        if ms2_annot_sub["consensus"].any():
    +1452                            ms2_annot_sub = ms2_annot_sub[
    +1453                                ms2_annot_sub["entropy_similarity"]
    +1454                                == ms2_annot_sub["entropy_similarity"].max()
    +1455                            ].head(1)
    +1456                            mlf_results_perid.append(ms2_annot_sub)
    +1457            if len(mlf_results_perid) == 0:
    +1458                mlf_results_perid = pd.DataFrame()
    +1459            else:
    +1460                mlf_results_perid = pd.concat(mlf_results_perid)
    +1461                if mlf_results_perid["name"].nunique() == 1:
    +1462                    mlf_results_perid = mlf_results_perid[
    +1463                        mlf_results_perid["entropy_similarity"]
    +1464                        == mlf_results_perid["entropy_similarity"].max()
    +1465                    ].head(1)
    +1466                else:
    +1467                    mlf_results_perid = pd.DataFrame()
    +1468                mlf_results_all.append(mlf_results_perid)
    +1469
    +1470        # These are the consensus annotations to the MLF level
    +1471        if len(mlf_results_all) > 0:
    +1472            mlf_results_all = pd.concat(mlf_results_all)
    +1473            mlf_results_all["annot_level"] = mlf_results_all["structure_level"]
    +1474        else:
    +1475            # Make an empty dataframe
    +1476            mlf_results_all = ms2_annot.head(0)
    +1477
    +1478        # For remaining mf_ids, try to get a consensus annotation to the species level
    +1479        species_results_all = []
    +1480        # Remove mf_ids that have consensus annotations to the MLF level
    +1481        ms2_annot_spec = ms2_annot[
    +1482            ~ms2_annot["mf_id"].isin(mlf_results_all["mf_id"].unique())
    +1483        ]
    +1484        for mf_id in ms2_annot_spec["mf_id"].unique():
    +1485            # Do all the hits have the same lipid_summed_name?
    +1486            ms2_annot_sub = ms2_annot_spec[ms2_annot_spec["mf_id"] == mf_id].copy()
    +1487            ms2_annot_sub["n_spectra_contributing"] = len(ms2_annot_sub)
    +1488
    +1489            if ms2_annot_sub["lipid_summed_name"].nunique() == 1:
    +1490                # Grab the highest entropy_similarity result
    +1491                ms2_annot_sub = ms2_annot_sub[
    +1492                    ms2_annot_sub["entropy_similarity"]
    +1493                    == ms2_annot_sub["entropy_similarity"].max()
    +1494                ].head(1)
    +1495                species_results_all.append(ms2_annot_sub)
    +1496
    +1497        # These are the consensus annotations to the species level
    +1498        if len(species_results_all) > 0:
    +1499            species_results_all = pd.concat(species_results_all)
    +1500            species_results_all["annot_level"] = "species"
    +1501        else:
    +1502            # Make an empty dataframe
    +1503            species_results_all = ms2_annot.head(0)
    +1504
    +1505        # Deal with the remaining mf_ids that do not have consensus annotations to the species level or MLF level
    +1506        # Remove mf_ids that have consensus annotations to the species level
    +1507        ms2_annot_remaining = ms2_annot_spec[
    +1508            ~ms2_annot_spec["mf_id"].isin(species_results_all["mf_id"].unique())
    +1509        ]
    +1510        no_consensus = []
    +1511        for mf_id in ms2_annot_remaining["mf_id"].unique():
    +1512            id_sub = []
    +1513            id_no_con = []
    +1514            ms2_annot_sub_mf = ms2_annot_remaining[
    +1515                ms2_annot_remaining["mf_id"] == mf_id
    +1516            ].copy()
    +1517            for query_scan in ms2_annot_sub_mf["query_spectrum_id"].unique():
    +1518                ms2_annot_sub = ms2_annot_sub_mf[
    +1519                    ms2_annot_sub_mf["query_spectrum_id"] == query_scan
    +1520                ].copy()
    +1521
    +1522                # New columns for ranking [HIGHER RANK = BETTER]
    +1523                ms2_annot_sub["entropy_max"] = (
    +1524                    ms2_annot_sub["entropy_similarity"]
    +1525                    == ms2_annot_sub["entropy_similarity"].max()
    +1526                )
    +1527                ms2_annot_sub["ref_match_fract_max"] = (
    +1528                    ms2_annot_sub["ref_mz_in_query_fract"]
    +1529                    == ms2_annot_sub["ref_mz_in_query_fract"].max()
    +1530                )
    +1531                ms2_annot_sub["frag_max"] = ms2_annot_sub["query_frag_types"].apply(
    +1532                    lambda x: True if "MLF" in x else False
    +1533                )
    +1534
    +1535                # New column that looks if there is a consensus between the ranks (one row that is highest in all ranks)
    +1536                ms2_annot_sub["consensus"] = ms2_annot_sub[
    +1537                    ["entropy_max", "ref_match_fract_max", "frag_max"]
    +1538                ].all(axis=1)
    +1539                ms2_annot_sub_con = ms2_annot_sub[ms2_annot_sub["consensus"]]
    +1540                id_sub.append(ms2_annot_sub_con)
    +1541                id_no_con.append(ms2_annot_sub)
    +1542            id_sub = pd.concat(id_sub)
    +1543            id_no_con = pd.concat(id_no_con)
    +1544
    +1545            # Scenario 1: Multiple scans are being resolved to different MLFs [could be coelutions and should both be kept and annotated to MS level]
    +1546            if (
    +1547                id_sub["query_frag_types"]
    +1548                .apply(lambda x: True if "MLF" in x else False)
    +1549                .all()
    +1550                and len(id_sub) > 0
    +1551            ):
    +1552                idx = id_sub.groupby("name")["entropy_similarity"].idxmax()
    +1553                id_sub = id_sub.loc[idx]
    +1554                # Reorder so highest entropy_similarity is first
    +1555                id_sub = id_sub.sort_values("entropy_similarity", ascending=False)
    +1556                id_sub["annot_level"] = id_sub["structure_level"]
    +1557                no_consensus.append(id_sub)
    +1558
    +1559            # Scenario 2: Multiple scans are being resolved to different species, keep both and annotate to appropriate level
    +1560            elif len(id_sub) == 0:
    +1561                for lipid_summed_name in id_no_con["lipid_summed_name"].unique():
    +1562                    summed_sub = id_no_con[
    +1563                        id_no_con["lipid_summed_name"] == lipid_summed_name
    +1564                    ]
    +1565                    # Any consensus to MLF?
    +1566                    if summed_sub["consensus"].any():
    +1567                        summed_sub = summed_sub[summed_sub["consensus"]]
    +1568                        summed_sub["annot_level"] = summed_sub["structure_level"]
    +1569                        no_consensus.append(summed_sub)
    +1570                    else:
    +1571                        # Grab the highest entropy_similarity, if there are multiple, grab the first one
    +1572                        summed_sub = summed_sub[
    +1573                            summed_sub["entropy_similarity"]
    +1574                            == summed_sub["entropy_similarity"].max()
    +1575                        ].head(1)
    +1576                        # get first row
    +1577                        summed_sub["annot_level"] = "species"
    +1578                        summed_sub["name"] = ""
    +1579                        no_consensus.append(summed_sub)
    +1580            else:
    +1581                raise ValueError("Unexpected scenario for summarizing mf_id: ", mf_id)
    +1582
    +1583        if len(no_consensus) > 0:
    +1584            no_consensus = pd.concat(no_consensus)
    +1585        else:
    +1586            no_consensus = ms2_annot.head(0)
    +1587
    +1588        # Combine all the consensus annotations and reformat the dataframe for output
    +1589        species_results_all = species_results_all.drop(columns=["name"])
    +1590        species_results_all["lipid_molecular_species_id"] = ""
    +1591        mlf_results_all["lipid_molecular_species_id"] = mlf_results_all["name"]
    +1592        no_consensus["lipid_molecular_species_id"] = no_consensus["name"]
    +1593        consensus_annotations = pd.concat(
    +1594            [mlf_results_all, species_results_all, no_consensus]
    +1595        )
    +1596        consensus_annotations = consensus_annotations.sort_values(
    +1597            "mf_id", ascending=True
    +1598        )
    +1599        cols_to_keep = [
    +1600            "mf_id",
    +1601            "ref_ion_type",
    +1602            "entropy_similarity",
    +1603            "ref_mz_in_query_fract",
    +1604            "lipid_molecular_species_id",
    +1605            "lipid_summed_name",
    +1606            "lipid_subclass",
    +1607            "lipid_class",
    +1608            "lipid_category",
    +1609            "formula",
    +1610            "annot_level",
    +1611            "n_spectra_contributing",
    +1612        ]
    +1613        consensus_annotations = consensus_annotations[cols_to_keep]
    +1614        consensus_annotations = consensus_annotations.set_index("mf_id")
    +1615
    +1616        return consensus_annotations
     
    @@ -6246,48 +6276,48 @@
    Returns
    -
    1609    def clean_ms2_report(self, lipid_summary):
    -1610        """Clean the MS2 report.
    -1611
    -1612        Parameters
    -1613        ----------
    -1614        lipid_summary : DataFrame
    -1615            The full lipid summary DataFrame.
    -1616
    -1617        Returns
    -1618        -------
    -1619        DataFrame
    -1620            The cleaned lipid summary DataFrame.
    -1621        """
    -1622        lipid_summary = lipid_summary.reset_index()
    -1623        lipid_summary["ion_formula"] = [
    -1624            self.get_ion_formula(f, a)
    -1625            for f, a in zip(lipid_summary["formula"], lipid_summary["ref_ion_type"])
    -1626        ]
    -1627
    -1628        # Reorder columns
    -1629        lipid_summary = lipid_summary[
    -1630            [
    -1631                "mf_id",
    -1632                "ion_formula",
    -1633                "ref_ion_type",
    -1634                "formula",
    -1635                "annot_level",
    -1636                "lipid_molecular_species_id",
    -1637                "lipid_summed_name",
    -1638                "lipid_subclass",
    -1639                "lipid_class",
    -1640                "lipid_category",
    -1641                "entropy_similarity",
    -1642                "ref_mz_in_query_fract",
    -1643                "n_spectra_contributing",
    -1644            ]
    -1645        ]
    -1646
    -1647        # Set the index to mf_id
    -1648        lipid_summary = lipid_summary.set_index("mf_id")
    -1649
    -1650        return lipid_summary
    +            
    1618    def clean_ms2_report(self, lipid_summary):
    +1619        """Clean the MS2 report.
    +1620
    +1621        Parameters
    +1622        ----------
    +1623        lipid_summary : DataFrame
    +1624            The full lipid summary DataFrame.
    +1625
    +1626        Returns
    +1627        -------
    +1628        DataFrame
    +1629            The cleaned lipid summary DataFrame.
    +1630        """
    +1631        lipid_summary = lipid_summary.reset_index()
    +1632        lipid_summary["ion_formula"] = [
    +1633            self.get_ion_formula(f, a)
    +1634            for f, a in zip(lipid_summary["formula"], lipid_summary["ref_ion_type"])
    +1635        ]
    +1636
    +1637        # Reorder columns
    +1638        lipid_summary = lipid_summary[
    +1639            [
    +1640                "mf_id",
    +1641                "ion_formula",
    +1642                "ref_ion_type",
    +1643                "formula",
    +1644                "annot_level",
    +1645                "lipid_molecular_species_id",
    +1646                "lipid_summed_name",
    +1647                "lipid_subclass",
    +1648                "lipid_class",
    +1649                "lipid_category",
    +1650                "entropy_similarity",
    +1651                "ref_mz_in_query_fract",
    +1652                "n_spectra_contributing",
    +1653            ]
    +1654        ]
    +1655
    +1656        # Set the index to mf_id
    +1657        lipid_summary = lipid_summary.set_index("mf_id")
    +1658
    +1659        return lipid_summary
     
    @@ -6320,120 +6350,120 @@
    Returns
    -
    1652    def to_report(self, molecular_metadata=None):
    -1653        """Create a report of the mass features and their annotations.
    -1654
    -1655        Parameters
    -1656        ----------
    -1657        molecular_metadata : dict, optional
    -1658            The molecular metadata. Default is None.
    -1659
    -1660        Returns
    -1661        -------
    -1662        DataFrame
    -1663            The report of the mass features and their annotations.
    -1664
    -1665        Notes
    -1666        -----
    -1667        The report will contain the mass features and their annotations from MS1 and MS2 (if available).
    -1668        """
    -1669        # Get mass feature dataframe
    -1670        mf_report = self.mass_spectra.mass_features_to_df()
    -1671        mf_report = mf_report.reset_index(drop=False)
    -1672
    -1673        # Get and clean ms1 annotation dataframe
    -1674        ms1_annot_report = self.mass_spectra.mass_features_ms1_annot_to_df().copy()
    -1675        ms1_annot_report = self.clean_ms1_report(ms1_annot_report)
    -1676        ms1_annot_report = ms1_annot_report.reset_index(drop=False)
    -1677
    -1678        # Get, summarize, and clean ms2 annotation dataframe
    -1679        ms2_annot_report = self.mass_spectra.mass_features_ms2_annot_to_df(
    -1680            molecular_metadata=molecular_metadata
    -1681        )
    -1682        if ms2_annot_report is not None:
    -1683            ms2_annot_report = self.summarize_lipid_report(ms2_annot_report)
    -1684            ms2_annot_report = self.clean_ms2_report(ms2_annot_report)
    -1685            ms2_annot_report = ms2_annot_report.dropna(axis=1, how="all")
    -1686            ms2_annot_report = ms2_annot_report.reset_index(drop=False)
    -1687
    -1688        # Combine the reports
    -1689        if not ms1_annot_report.empty:
    -1690            # MS1 has been run and has molecular formula information
    -1691            mf_report = pd.merge(
    -1692                mf_report,
    -1693                ms1_annot_report,
    -1694                how="left",
    -1695                on=["mf_id", "isotopologue_type"],
    -1696            )
    -1697        if ms2_annot_report is not None:
    -1698            # pull out the records with ion_formula and drop the ion_formula column (these should be empty if MS1 molecular formula assignment is working correctly)
    -1699            mf_no_ion_formula = mf_report[mf_report["ion_formula"].isna()]
    -1700            mf_no_ion_formula = mf_no_ion_formula.drop(columns=["ion_formula"])
    -1701            mf_no_ion_formula = pd.merge(
    -1702                mf_no_ion_formula, ms2_annot_report, how="left", on=["mf_id"]
    -1703            )
    -1704
    -1705            # pull out the records with ion_formula
    -1706            mf_with_ion_formula = mf_report[~mf_report["ion_formula"].isna()]
    -1707            mf_with_ion_formula = pd.merge(
    -1708                mf_with_ion_formula,
    -1709                ms2_annot_report,
    -1710                how="left",
    -1711                on=["mf_id", "ion_formula"],
    +            
    1661    def to_report(self, molecular_metadata=None):
    +1662        """Create a report of the mass features and their annotations.
    +1663
    +1664        Parameters
    +1665        ----------
    +1666        molecular_metadata : dict, optional
    +1667            The molecular metadata. Default is None.
    +1668
    +1669        Returns
    +1670        -------
    +1671        DataFrame
    +1672            The report of the mass features and their annotations.
    +1673
    +1674        Notes
    +1675        -----
    +1676        The report will contain the mass features and their annotations from MS1 and MS2 (if available).
    +1677        """
    +1678        # Get mass feature dataframe
    +1679        mf_report = self.mass_spectra.mass_features_to_df()
    +1680        mf_report = mf_report.reset_index(drop=False)
    +1681
    +1682        # Get and clean ms1 annotation dataframe
    +1683        ms1_annot_report = self.mass_spectra.mass_features_ms1_annot_to_df().copy()
    +1684        ms1_annot_report = self.clean_ms1_report(ms1_annot_report)
    +1685        ms1_annot_report = ms1_annot_report.reset_index(drop=False)
    +1686
    +1687        # Get, summarize, and clean ms2 annotation dataframe
    +1688        ms2_annot_report = self.mass_spectra.mass_features_ms2_annot_to_df(
    +1689            molecular_metadata=molecular_metadata
    +1690        )
    +1691        if ms2_annot_report is not None:
    +1692            ms2_annot_report = self.summarize_lipid_report(ms2_annot_report)
    +1693            ms2_annot_report = self.clean_ms2_report(ms2_annot_report)
    +1694            ms2_annot_report = ms2_annot_report.dropna(axis=1, how="all")
    +1695            ms2_annot_report = ms2_annot_report.reset_index(drop=False)
    +1696
    +1697        # Combine the reports
    +1698        if not ms1_annot_report.empty:
    +1699            # MS1 has been run and has molecular formula information
    +1700            mf_report = pd.merge(
    +1701                mf_report,
    +1702                ms1_annot_report,
    +1703                how="left",
    +1704                on=["mf_id", "isotopologue_type"],
    +1705            )
    +1706        if ms2_annot_report is not None:
    +1707            # pull out the records with ion_formula and drop the ion_formula column (these should be empty if MS1 molecular formula assignment is working correctly)
    +1708            mf_no_ion_formula = mf_report[mf_report["ion_formula"].isna()]
    +1709            mf_no_ion_formula = mf_no_ion_formula.drop(columns=["ion_formula"])
    +1710            mf_no_ion_formula = pd.merge(
    +1711                mf_no_ion_formula, ms2_annot_report, how="left", on=["mf_id"]
     1712            )
     1713
    -1714            # put back together
    -1715            mf_report = pd.concat([mf_no_ion_formula, mf_with_ion_formula])
    -1716
    -1717        # Rename colums
    -1718        rename_dict = {
    -1719            "mf_id": "Mass Feature ID",
    -1720            "scan_time": "Retention Time (min)",
    -1721            "mz": "m/z",
    -1722            "apex_scan": "Apex Scan Number",
    -1723            "intensity": "Intensity",
    -1724            "persistence": "Persistence",
    -1725            "area": "Area",
    -1726            "half_height_width": "Half Height Width (min)",
    -1727            "tailing_factor": "Tailing Factor",
    -1728            "dispersity_index": "Dispersity Index",
    -1729            "ms2_spectrum": "MS2 Spectrum",
    -1730            "monoisotopic_mf_id": "Monoisotopic Mass Feature ID",
    -1731            "isotopologue_type": "Isotopologue Type",
    -1732            "mass_spectrum_deconvoluted_parent": "Is Largest Ion after Deconvolution",
    -1733            "associated_mass_features": "Associated Mass Features after Deconvolution",
    -1734            "ion_formula": "Ion Formula",
    -1735            "formula": "Molecular Formula",
    -1736            "ref_ion_type": "Ion Type",
    -1737            "annot_level": "Lipid Annotation Level",
    -1738            "lipid_molecular_species_id": "Lipid Molecular Species",
    -1739            "lipid_summed_name": "Lipid Species",
    -1740            "lipid_subclass": "Lipid Subclass",
    -1741            "lipid_class": "Lipid Class",
    -1742            "lipid_category": "Lipid Category",
    -1743            "entropy_similarity": "Entropy Similarity",
    -1744            "ref_mz_in_query_fract": "Library mzs in Query (fraction)",
    -1745            "n_spectra_contributing": "Spectra with Annotation (n)",
    -1746        }
    -1747        mf_report = mf_report.rename(columns=rename_dict)
    -1748        mf_report["Sample Name"] = self.mass_spectra.sample_name
    -1749        mf_report["Polarity"] = self.mass_spectra.polarity
    -1750        mf_report = mf_report[
    -1751            ["Mass Feature ID", "Sample Name", "Polarity"]
    -1752            + [
    -1753                col
    -1754                for col in mf_report.columns
    -1755                if col not in ["Mass Feature ID", "Sample Name", "Polarity"]
    -1756            ]
    -1757        ]
    -1758
    -1759        # Reorder rows by "Mass Feature ID"
    -1760        mf_report = mf_report.sort_values("Mass Feature ID")
    -1761
    -1762        # Reset index
    -1763        mf_report = mf_report.reset_index(drop=True)
    -1764
    -1765        return mf_report
    +1714            # pull out the records with ion_formula
    +1715            mf_with_ion_formula = mf_report[~mf_report["ion_formula"].isna()]
    +1716            mf_with_ion_formula = pd.merge(
    +1717                mf_with_ion_formula,
    +1718                ms2_annot_report,
    +1719                how="left",
    +1720                on=["mf_id", "ion_formula"],
    +1721            )
    +1722
    +1723            # put back together
    +1724            mf_report = pd.concat([mf_no_ion_formula, mf_with_ion_formula])
    +1725
    +1726        # Rename colums
    +1727        rename_dict = {
    +1728            "mf_id": "Mass Feature ID",
    +1729            "scan_time": "Retention Time (min)",
    +1730            "mz": "m/z",
    +1731            "apex_scan": "Apex Scan Number",
    +1732            "intensity": "Intensity",
    +1733            "persistence": "Persistence",
    +1734            "area": "Area",
    +1735            "half_height_width": "Half Height Width (min)",
    +1736            "tailing_factor": "Tailing Factor",
    +1737            "dispersity_index": "Dispersity Index",
    +1738            "ms2_spectrum": "MS2 Spectrum",
    +1739            "monoisotopic_mf_id": "Monoisotopic Mass Feature ID",
    +1740            "isotopologue_type": "Isotopologue Type",
    +1741            "mass_spectrum_deconvoluted_parent": "Is Largest Ion after Deconvolution",
    +1742            "associated_mass_features": "Associated Mass Features after Deconvolution",
    +1743            "ion_formula": "Ion Formula",
    +1744            "formula": "Molecular Formula",
    +1745            "ref_ion_type": "Ion Type",
    +1746            "annot_level": "Lipid Annotation Level",
    +1747            "lipid_molecular_species_id": "Lipid Molecular Species",
    +1748            "lipid_summed_name": "Lipid Species",
    +1749            "lipid_subclass": "Lipid Subclass",
    +1750            "lipid_class": "Lipid Class",
    +1751            "lipid_category": "Lipid Category",
    +1752            "entropy_similarity": "Entropy Similarity",
    +1753            "ref_mz_in_query_fract": "Library mzs in Query (fraction)",
    +1754            "n_spectra_contributing": "Spectra with Annotation (n)",
    +1755        }
    +1756        mf_report = mf_report.rename(columns=rename_dict)
    +1757        mf_report["Sample Name"] = self.mass_spectra.sample_name
    +1758        mf_report["Polarity"] = self.mass_spectra.polarity
    +1759        mf_report = mf_report[
    +1760            ["Mass Feature ID", "Sample Name", "Polarity"]
    +1761            + [
    +1762                col
    +1763                for col in mf_report.columns
    +1764                if col not in ["Mass Feature ID", "Sample Name", "Polarity"]
    +1765            ]
    +1766        ]
    +1767
    +1768        # Reorder rows by "Mass Feature ID"
    +1769        mf_report = mf_report.sort_values("Mass Feature ID")
    +1770
    +1771        # Reset index
    +1772        mf_report = mf_report.reset_index(drop=True)
    +1773
    +1774        return mf_report
     
    @@ -6470,17 +6500,17 @@
    Notes
    -
    1767    def report_to_csv(self, molecular_metadata=None):
    -1768        """Create a report of the mass features and their annotations and save it as a CSV file.
    -1769
    -1770        Parameters
    -1771        ----------
    -1772        molecular_metadata : dict, optional
    -1773            The molecular metadata. Default is None.
    -1774        """
    -1775        report = self.to_report(molecular_metadata=molecular_metadata)
    -1776        out_file = self.output_file.with_suffix(".csv")
    -1777        report.to_csv(out_file, index=False)
    +            
    1776    def report_to_csv(self, molecular_metadata=None):
    +1777        """Create a report of the mass features and their annotations and save it as a CSV file.
    +1778
    +1779        Parameters
    +1780        ----------
    +1781        molecular_metadata : dict, optional
    +1782            The molecular metadata. Default is None.
    +1783        """
    +1784        report = self.to_report(molecular_metadata=molecular_metadata)
    +1785        out_file = self.output_file.with_suffix(".csv")
    +1786        report.to_csv(out_file, index=False)
     
    diff --git a/docs/corems/mass_spectrum/calc/AutoRecalibration.html b/docs/corems/mass_spectrum/calc/AutoRecalibration.html index d77e4827..2820fdab 100644 --- a/docs/corems/mass_spectrum/calc/AutoRecalibration.html +++ b/docs/corems/mass_spectrum/calc/AutoRecalibration.html @@ -103,216 +103,230 @@

    8""" 9 10from lmfit.models import GaussianModel - 11from scipy import stats - 12import seaborn as sns - 13import pandas as pd - 14import numpy as np - 15import matplotlib.pyplot as plt - 16from corems.encapsulation.factory.parameters import MSParameters - 17from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas - 18import copy - 19 - 20class HighResRecalibration: - 21 """ - 22 This class is designed for high resolution (FTICR, Orbitrap) data of complex mixture, e.g. Organic matter - 23 - 24 The tool first does a broad mass range search for the most commonly expected ion type (i.e. CHO, deprotonated - for negative ESI) - 25 And then the assigned data mass error distribution is searched, with a gaussian fit to the most prominent range. - 26 This tool works when the data are of sufficient quality, and not outwith the typical expected range of the mass analyzer - 27 It presumes the mean error is out by 0-several ppm, but that the spread of error values is modest (<2ppm) - 28 - 29 Parameters - 30 ---------- - 31 mass_spectrum : MassSpectrum - 32 CoreMS mass spectrum object - 33 plot : bool, optional - 34 Whether to plot the error distribution. The default is False. - 35 docker : bool, optional - 36 Whether to use the docker database. The default is True. If not, it uses a dynamically generated sqlite database. - 37 ppmFWHMprior : float, optional - 38 The FWHM of the prior distribution (ppm). The default is 3. - 39 ppmRangeprior : float, optional - 40 The range of the prior distribution (ppm). The default is 15. - 41 - 42 Methods - 43 -------- - 44 * determine_error_boundaries(). Determine the error boundaries for recalibration space. - 45 - 46 Notes - 47 ----- - 48 This initialisation function creates a copy of the MassSpectrum object to avoid over-writing assignments. - 49 Possible future task is to make the base class copyable. - 50 - 51 """ - 52 - 53 def __init__(self, mass_spectrum, plot : bool=False, docker : bool=True, - 54 ppmFWHMprior : float=3, ppmRangeprior : float=15): - 55 - 56 self.mass_spectrum = copy.deepcopy(mass_spectrum) - 57 self.plot = plot - 58 self.docker = docker - 59 self.ppmFWHMprior = ppmFWHMprior - 60 self.ppmRangeprior = ppmRangeprior - 61 - 62 - 63 def set_uncal_settings(self): - 64 """ Set uncalibrated formula search settings + 11import seaborn as sns + 12import pandas as pd + 13import numpy as np + 14import matplotlib.pyplot as plt + 15from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas + 16import copy + 17 + 18 + 19class HighResRecalibration: + 20 """ + 21 This class is designed for high resolution (FTICR, Orbitrap) data of complex mixture, e.g. Organic matter + 22 + 23 The tool first does a broad mass range search for the most commonly expected ion type (i.e. CHO, deprotonated - for negative ESI) + 24 And then the assigned data mass error distribution is searched, with a gaussian fit to the most prominent range. + 25 This tool works when the data are of sufficient quality, and not outwith the typical expected range of the mass analyzer + 26 It presumes the mean error is out by 0-several ppm, but that the spread of error values is modest (<2ppm) + 27 + 28 Parameters + 29 ---------- + 30 mass_spectrum : MassSpectrum + 31 CoreMS mass spectrum object + 32 plot : bool, optional + 33 Whether to plot the error distribution. The default is False. + 34 docker : bool, optional + 35 Whether to use the docker database. The default is True. If not, it uses a dynamically generated sqlite database. + 36 ppmFWHMprior : float, optional + 37 The FWHM of the prior distribution (ppm). The default is 3. + 38 ppmRangeprior : float, optional + 39 The range of the prior distribution (ppm). The default is 15. + 40 + 41 Methods + 42 -------- + 43 * determine_error_boundaries(). Determine the error boundaries for recalibration space. + 44 + 45 Notes + 46 ----- + 47 This initialisation function creates a copy of the MassSpectrum object to avoid over-writing assignments. + 48 Possible future task is to make the base class copyable. + 49 + 50 """ + 51 + 52 def __init__( + 53 self, + 54 mass_spectrum, + 55 plot: bool = False, + 56 docker: bool = True, + 57 ppmFWHMprior: float = 3, + 58 ppmRangeprior: float = 15, + 59 ): + 60 self.mass_spectrum = copy.deepcopy(mass_spectrum) + 61 self.plot = plot + 62 self.docker = docker + 63 self.ppmFWHMprior = ppmFWHMprior + 64 self.ppmRangeprior = ppmRangeprior 65 - 66 This function serves the uncalibrated data (hence broad error tolerance) - 67 It only allows CHO formula in deprotonated ion type- as most common for SRFA ESI negative mode + 66 def set_uncal_settings(self): + 67 """Set uncalibrated formula search settings 68 - 69 This will not work for positive mode data, or for other ion types, or other expected elemental searches. - 70 - 71 """ - 72 #TODO rework this. + 69 This function serves the uncalibrated data (hence broad error tolerance) + 70 It only allows CHO formula in deprotonated ion type- as most common for SRFA ESI negative mode + 71 + 72 This will not work for positive mode data, or for other ion types, or other expected elemental searches. 73 - 74 if self.docker: - 75 self.mass_spectrum.molecular_search_settings.url_database = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp" - 76 else: - 77 self.mass_spectrum.molecular_search_settings.url_database = None - 78 self.mass_spectrum.molecular_search_settings.error_method = None - 79 self.mass_spectrum.molecular_search_settings.score_method = 'prob_score' - 80 - 81 self.mass_spectrum.molecular_search_settings.min_ppm_error = -1*self.ppmRangeprior/2 #-7.5 - 82 self.mass_spectrum.molecular_search_settings.max_ppm_error = self.ppmRangeprior/2 #7.5 + 74 """ + 75 # TODO rework this. + 76 + 77 if self.docker: + 78 self.mass_spectrum.molecular_search_settings.url_database = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp" + 79 else: + 80 self.mass_spectrum.molecular_search_settings.url_database = None + 81 self.mass_spectrum.molecular_search_settings.error_method = None + 82 self.mass_spectrum.molecular_search_settings.score_method = "prob_score" 83 - 84 self.mass_spectrum.molecular_search_settings.min_dbe = 0 - 85 self.mass_spectrum.molecular_search_settings.max_dbe = 50 - 86 - 87 self.mass_spectrum.molecular_search_settings.use_isotopologue_filter = False - 88 self.mass_spectrum.molecular_search_settings.min_abun_error = -30 - 89 self.mass_spectrum.molecular_search_settings.max_abun_error = 70 - 90 - 91 self.mass_spectrum.molecular_search_settings.use_min_peaks_filter = True - 92 self.mass_spectrum.molecular_search_settings.min_peaks_per_class = 10 #default is 15 + 84 self.mass_spectrum.molecular_search_settings.min_ppm_error = ( + 85 -1 * self.ppmRangeprior / 2 + 86 ) # -7.5 + 87 self.mass_spectrum.molecular_search_settings.max_ppm_error = ( + 88 self.ppmRangeprior / 2 + 89 ) # 7.5 + 90 + 91 self.mass_spectrum.molecular_search_settings.min_dbe = 0 + 92 self.mass_spectrum.molecular_search_settings.max_dbe = 50 93 - 94 self.mass_spectrum.molecular_search_settings.usedAtoms['C'] = (1,90) - 95 self.mass_spectrum.molecular_search_settings.usedAtoms['H'] = (4,200) - 96 self.mass_spectrum.molecular_search_settings.usedAtoms['O'] = (1,23) - 97 self.mass_spectrum.molecular_search_settings.usedAtoms['N'] = (0,0) - 98 self.mass_spectrum.molecular_search_settings.usedAtoms['S'] = (0,0) - 99 self.mass_spectrum.molecular_search_settings.usedAtoms['P'] = (0,0) -100 -101 self.mass_spectrum.molecular_search_settings.isProtonated = True -102 self.mass_spectrum.molecular_search_settings.isRadical= False -103 self.mass_spectrum.molecular_search_settings.isAdduct = False -104 -105 def positive_search_settings(self): -106 """ Set the positive mode elemental search settings -107 """ -108 self.mass_spectrum.molecular_search_settings.isProtonated = False -109 self.mass_spectrum.molecular_search_settings.isAdduct = True -110 self.mass_spectrum.molecular_search_settings.adduct_atoms_pos = ['Na'] -111 -112 @staticmethod -113 def get_error_range(errors: list, ppmFWHMprior: float=3, plot_logic: bool=False): -114 """ Get the error range from the error distribution -115 -116 Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species. -117 -118 Parameters -119 ---------- -120 errors : list -121 list of the errors of the assigned species (ppm) -122 ppmFWHMprior : float, optional -123 The FWHM of the prior distribution (ppm). The default is 3. -124 plot_logic : bool, optional -125 Whether to plot the error distribution. The default is False. -126 -127 Returns -128 ------- -129 mean_error : float -130 mean mass error of the Gaussian distribution (ppm) -131 fwhm_error : float -132 full width half max of the gaussian error distribution (ppm) -133 ppm_thresh : list -134 recommended thresholds for the recalibration parameters (ppm) -135 Consists of [mean_error-fwhm_error,mean_error+fwhm_error] -136 -137 """ -138 kde = sns.kdeplot(errors) -139 -140 kde_data = kde.get_lines()[0].get_data() -141 -142 tmpdf = pd.Series(index=kde_data[0],data=kde_data[1]) -143 kde_apex_ppm = tmpdf.idxmax() -144 kde_apex_val = tmpdf.max() -145 -146 plt.close(kde.figure) -147 plt.close('all') -148 -149 lmmodel = GaussianModel() -150 lmpars = lmmodel.guess(kde_data[1], x=kde_data[0]) -151 lmpars['sigma'].value = 2.3548/ppmFWHMprior -152 lmpars['center'].value = kde_apex_ppm -153 lmpars['amplitude'].value = kde_apex_val -154 lmout = lmmodel.fit(kde_data[1], lmpars, x=kde_data[0]) -155 -156 if plot_logic: -157 fig,ax = plt.subplots(figsize=(8,4)) -158 lmout.plot_fit(ax=ax,data_kws ={'color':'tab:blue'},fit_kws ={'color':'tab:red'}) -159 ax.set_xlabel('$m/z$ Error (ppm)') -160 ax.set_ylabel('Density') -161 plt.legend(facecolor='white', framealpha=0) -162 -163 mean_error = lmout.best_values['center'] -164 std_error = lmout.best_values['sigma'] -165 # FWHM from Sigma = approx. 2.355*sigma -166 #fwhm_error = 2*np.sqrt(2*np.log(2))*std_error -167 fwhm_error = std_error * np.sqrt(8*np.log(2)) -168 -169 ppm_thresh = [mean_error-fwhm_error,mean_error+fwhm_error] -170 return mean_error,fwhm_error,ppm_thresh -171 -172 def determine_error_boundaries(self): -173 """ Determine the error boundaries for recalibration space + 94 self.mass_spectrum.molecular_search_settings.use_isotopologue_filter = False + 95 self.mass_spectrum.molecular_search_settings.min_abun_error = -30 + 96 self.mass_spectrum.molecular_search_settings.max_abun_error = 70 + 97 + 98 self.mass_spectrum.molecular_search_settings.use_min_peaks_filter = True + 99 self.mass_spectrum.molecular_search_settings.min_peaks_per_class = ( +100 10 # default is 15 +101 ) +102 +103 self.mass_spectrum.molecular_search_settings.usedAtoms["C"] = (1, 90) +104 self.mass_spectrum.molecular_search_settings.usedAtoms["H"] = (4, 200) +105 self.mass_spectrum.molecular_search_settings.usedAtoms["O"] = (1, 23) +106 self.mass_spectrum.molecular_search_settings.usedAtoms["N"] = (0, 0) +107 self.mass_spectrum.molecular_search_settings.usedAtoms["S"] = (0, 0) +108 self.mass_spectrum.molecular_search_settings.usedAtoms["P"] = (0, 0) +109 +110 self.mass_spectrum.molecular_search_settings.isProtonated = True +111 self.mass_spectrum.molecular_search_settings.isRadical = False +112 self.mass_spectrum.molecular_search_settings.isAdduct = False +113 +114 def positive_search_settings(self): +115 """Set the positive mode elemental search settings""" +116 self.mass_spectrum.molecular_search_settings.isProtonated = False +117 self.mass_spectrum.molecular_search_settings.isAdduct = True +118 self.mass_spectrum.molecular_search_settings.adduct_atoms_pos = ["Na"] +119 +120 @staticmethod +121 def get_error_range( +122 errors: list, ppmFWHMprior: float = 3, plot_logic: bool = False +123 ): +124 """Get the error range from the error distribution +125 +126 Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species. +127 +128 Parameters +129 ---------- +130 errors : list +131 list of the errors of the assigned species (ppm) +132 ppmFWHMprior : float, optional +133 The FWHM of the prior distribution (ppm). The default is 3. +134 plot_logic : bool, optional +135 Whether to plot the error distribution. The default is False. +136 +137 Returns +138 ------- +139 mean_error : float +140 mean mass error of the Gaussian distribution (ppm) +141 fwhm_error : float +142 full width half max of the gaussian error distribution (ppm) +143 ppm_thresh : list +144 recommended thresholds for the recalibration parameters (ppm) +145 Consists of [mean_error-fwhm_error,mean_error+fwhm_error] +146 +147 """ +148 kde = sns.kdeplot(errors) +149 +150 kde_data = kde.get_lines()[0].get_data() +151 +152 tmpdf = pd.Series(index=kde_data[0], data=kde_data[1]) +153 kde_apex_ppm = tmpdf.idxmax() +154 kde_apex_val = tmpdf.max() +155 +156 plt.close(kde.figure) +157 plt.close("all") +158 +159 lmmodel = GaussianModel() +160 lmpars = lmmodel.guess(kde_data[1], x=kde_data[0]) +161 lmpars["sigma"].value = 2.3548 / ppmFWHMprior +162 lmpars["center"].value = kde_apex_ppm +163 lmpars["amplitude"].value = kde_apex_val +164 lmout = lmmodel.fit(kde_data[1], lmpars, x=kde_data[0]) +165 +166 if plot_logic: +167 fig, ax = plt.subplots(figsize=(8, 4)) +168 lmout.plot_fit( +169 ax=ax, data_kws={"color": "tab:blue"}, fit_kws={"color": "tab:red"} +170 ) +171 ax.set_xlabel("$m/z$ Error (ppm)") +172 ax.set_ylabel("Density") +173 plt.legend(facecolor="white", framealpha=0) 174 -175 This is the main function in this class -176 Sets the Molecular Formulas search settings, performs the initial formula search -177 Converts the data to a dataframe, and gets the error range -178 Returns the error thresholds. -179 -180 Returns -181 ------- -182 mean_error : float -183 mean mass error of the Gaussian distribution (ppm) -184 fwhm_error : float -185 full width half max of the gaussian error distribution (ppm) -186 ppm_thresh : list -187 recommended thresholds for the recalibration parameters (ppm) -188 Consists of [mean_error-fwhm_error,mean_error+fwhm_error] -189 """ -190 -191 # Set the search settings -192 self.set_uncal_settings() -193 -194 # Set the positive mode settings -195 # To do - have user defineable settings? -196 if self.mass_spectrum.polarity == 1: -197 self.positive_search_settings() -198 -199 # Search MFs -200 SearchMolecularFormulas(self.mass_spectrum, first_hit=True).run_worker_mass_spectrum() -201 -202 -203 # Exporting to a DF is ~30x slower than just getting the errors, so this is fast. -204 errors = [] -205 for mspeak in self.mass_spectrum.mspeaks: -206 if len(mspeak.molecular_formulas)>0: -207 errors.append(mspeak.best_molecular_formula_candidate.mz_error) -208 -209 -210 # If there are NO assignments, it'll fail on the next step. Need to check for that -211 nassign = len(errors) -212 # Here we say at least 5 features assigned are needed - it probably should be greater, but we are just trying to stop it breaking the code -213 # We want to make sure the spectrum is capture in the database though - so we return the stats entries (0 assignments) and the number of assignments -214 if nassign <5: -215 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -216 print("fewer than 5 peaks assigned, cannot determine error range") -217 return np.nan,np.nan,[np.nan,np.nan] -218 else: -219 mean_error,fwhm_error,ppm_thresh = self.get_error_range(errors, self.ppmFWHMprior, self.plot) -220 return mean_error,fwhm_error,ppm_thresh +175 mean_error = lmout.best_values["center"] +176 std_error = lmout.best_values["sigma"] +177 # FWHM from Sigma = approx. 2.355*sigma +178 # fwhm_error = 2*np.sqrt(2*np.log(2))*std_error +179 fwhm_error = std_error * np.sqrt(8 * np.log(2)) +180 +181 ppm_thresh = [mean_error - fwhm_error, mean_error + fwhm_error] +182 return mean_error, fwhm_error, ppm_thresh +183 +184 def determine_error_boundaries(self): +185 """Determine the error boundaries for recalibration space +186 +187 This is the main function in this class +188 Sets the Molecular Formulas search settings, performs the initial formula search +189 Converts the data to a dataframe, and gets the error range +190 Returns the error thresholds. +191 +192 Returns +193 ------- +194 mean_error : float +195 mean mass error of the Gaussian distribution (ppm) +196 fwhm_error : float +197 full width half max of the gaussian error distribution (ppm) +198 ppm_thresh : list +199 recommended thresholds for the recalibration parameters (ppm) +200 Consists of [mean_error-fwhm_error,mean_error+fwhm_error] +201 """ +202 +203 # Set the search settings +204 self.set_uncal_settings() +205 +206 # Set the positive mode settings +207 # To do - have user defineable settings? +208 if self.mass_spectrum.polarity == 1: +209 self.positive_search_settings() +210 +211 # Search MFs +212 SearchMolecularFormulas( +213 self.mass_spectrum, first_hit=True +214 ).run_worker_mass_spectrum() +215 +216 # Exporting to a DF is ~30x slower than just getting the errors, so this is fast. +217 errors = [] +218 for mspeak in self.mass_spectrum.mspeaks: +219 if len(mspeak.molecular_formulas) > 0: +220 errors.append(mspeak.best_molecular_formula_candidate.mz_error) +221 +222 # If there are NO assignments, it'll fail on the next step. Need to check for that +223 nassign = len(errors) +224 # Here we say at least 5 features assigned are needed - it probably should be greater, but we are just trying to stop it breaking the code +225 # We want to make sure the spectrum is capture in the database though - so we return the stats entries (0 assignments) and the number of assignments +226 if nassign < 5: +227 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: +228 print("fewer than 5 peaks assigned, cannot determine error range") +229 return np.nan, np.nan, [np.nan, np.nan] +230 else: +231 mean_error, fwhm_error, ppm_thresh = self.get_error_range( +232 errors, self.ppmFWHMprior, self.plot +233 ) +234 return mean_error, fwhm_error, ppm_thresh

    @@ -328,214 +342,229 @@

    -
     21class HighResRecalibration:
    - 22    """
    - 23    This class is designed for high resolution (FTICR, Orbitrap) data of complex mixture, e.g. Organic matter
    - 24
    - 25    The tool first does a broad mass range search for the most commonly expected ion type (i.e. CHO, deprotonated - for negative ESI)
    - 26    And then the assigned data mass error distribution is searched, with a gaussian fit to the most prominent range. 
    - 27    This tool works when the data are of sufficient quality, and not outwith the typical expected range of the mass analyzer
    - 28    It presumes the mean error is out by 0-several ppm, but that the spread of error values is modest (<2ppm)
    - 29
    - 30    Parameters
    - 31    ----------
    - 32    mass_spectrum : MassSpectrum
    - 33        CoreMS mass spectrum object
    - 34    plot : bool, optional
    - 35        Whether to plot the error distribution. The default is False.
    - 36    docker : bool, optional
    - 37        Whether to use the docker database. The default is True. If not, it uses a dynamically generated sqlite database.
    - 38    ppmFWHMprior : float, optional  
    - 39        The FWHM of the prior distribution (ppm). The default is 3.
    - 40    ppmRangeprior : float, optional
    - 41        The range of the prior distribution (ppm). The default is 15.
    - 42
    - 43    Methods
    - 44    --------
    - 45    * determine_error_boundaries(). Determine the error boundaries for recalibration space.    
    - 46
    - 47    Notes
    - 48    -----
    - 49    This initialisation function creates a copy of the MassSpectrum object to avoid over-writing assignments. 
    - 50    Possible future task is to make the base class copyable. 
    - 51
    - 52    """
    - 53
    - 54    def __init__(self, mass_spectrum, plot : bool=False, docker : bool=True, 
    - 55                    ppmFWHMprior : float=3, ppmRangeprior : float=15):
    - 56       
    - 57        self.mass_spectrum = copy.deepcopy(mass_spectrum) 
    - 58        self.plot = plot
    - 59        self.docker = docker
    - 60        self.ppmFWHMprior = ppmFWHMprior
    - 61        self.ppmRangeprior = ppmRangeprior
    - 62
    - 63    
    - 64    def set_uncal_settings(self):
    - 65        """ Set uncalibrated formula search settings
    +            
     20class HighResRecalibration:
    + 21    """
    + 22    This class is designed for high resolution (FTICR, Orbitrap) data of complex mixture, e.g. Organic matter
    + 23
    + 24    The tool first does a broad mass range search for the most commonly expected ion type (i.e. CHO, deprotonated - for negative ESI)
    + 25    And then the assigned data mass error distribution is searched, with a gaussian fit to the most prominent range.
    + 26    This tool works when the data are of sufficient quality, and not outwith the typical expected range of the mass analyzer
    + 27    It presumes the mean error is out by 0-several ppm, but that the spread of error values is modest (<2ppm)
    + 28
    + 29    Parameters
    + 30    ----------
    + 31    mass_spectrum : MassSpectrum
    + 32        CoreMS mass spectrum object
    + 33    plot : bool, optional
    + 34        Whether to plot the error distribution. The default is False.
    + 35    docker : bool, optional
    + 36        Whether to use the docker database. The default is True. If not, it uses a dynamically generated sqlite database.
    + 37    ppmFWHMprior : float, optional
    + 38        The FWHM of the prior distribution (ppm). The default is 3.
    + 39    ppmRangeprior : float, optional
    + 40        The range of the prior distribution (ppm). The default is 15.
    + 41
    + 42    Methods
    + 43    --------
    + 44    * determine_error_boundaries(). Determine the error boundaries for recalibration space.
    + 45
    + 46    Notes
    + 47    -----
    + 48    This initialisation function creates a copy of the MassSpectrum object to avoid over-writing assignments.
    + 49    Possible future task is to make the base class copyable.
    + 50
    + 51    """
    + 52
    + 53    def __init__(
    + 54        self,
    + 55        mass_spectrum,
    + 56        plot: bool = False,
    + 57        docker: bool = True,
    + 58        ppmFWHMprior: float = 3,
    + 59        ppmRangeprior: float = 15,
    + 60    ):
    + 61        self.mass_spectrum = copy.deepcopy(mass_spectrum)
    + 62        self.plot = plot
    + 63        self.docker = docker
    + 64        self.ppmFWHMprior = ppmFWHMprior
    + 65        self.ppmRangeprior = ppmRangeprior
      66
    - 67        This function serves the uncalibrated data (hence broad error tolerance)
    - 68        It only allows CHO formula in deprotonated ion type- as most common for SRFA ESI negative mode
    + 67    def set_uncal_settings(self):
    + 68        """Set uncalibrated formula search settings
      69
    - 70        This will not work for positive mode data, or for other ion types, or other expected elemental searches.
    - 71
    - 72        """
    - 73        #TODO rework this.
    + 70        This function serves the uncalibrated data (hence broad error tolerance)
    + 71        It only allows CHO formula in deprotonated ion type- as most common for SRFA ESI negative mode
    + 72
    + 73        This will not work for positive mode data, or for other ion types, or other expected elemental searches.
      74
    - 75        if self.docker:
    - 76            self.mass_spectrum.molecular_search_settings.url_database = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
    - 77        else:
    - 78            self.mass_spectrum.molecular_search_settings.url_database = None
    - 79        self.mass_spectrum.molecular_search_settings.error_method = None
    - 80        self.mass_spectrum.molecular_search_settings.score_method = 'prob_score'
    - 81
    - 82        self.mass_spectrum.molecular_search_settings.min_ppm_error  = -1*self.ppmRangeprior/2   #-7.5
    - 83        self.mass_spectrum.molecular_search_settings.max_ppm_error = self.ppmRangeprior/2   #7.5
    + 75        """
    + 76        # TODO rework this.
    + 77
    + 78        if self.docker:
    + 79            self.mass_spectrum.molecular_search_settings.url_database = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
    + 80        else:
    + 81            self.mass_spectrum.molecular_search_settings.url_database = None
    + 82        self.mass_spectrum.molecular_search_settings.error_method = None
    + 83        self.mass_spectrum.molecular_search_settings.score_method = "prob_score"
      84
    - 85        self.mass_spectrum.molecular_search_settings.min_dbe = 0
    - 86        self.mass_spectrum.molecular_search_settings.max_dbe = 50
    - 87        
    - 88        self.mass_spectrum.molecular_search_settings.use_isotopologue_filter = False
    - 89        self.mass_spectrum.molecular_search_settings.min_abun_error = -30
    - 90        self.mass_spectrum.molecular_search_settings.max_abun_error = 70
    - 91        
    - 92        self.mass_spectrum.molecular_search_settings.use_min_peaks_filter = True
    - 93        self.mass_spectrum.molecular_search_settings.min_peaks_per_class = 10 #default is 15
    + 85        self.mass_spectrum.molecular_search_settings.min_ppm_error = (
    + 86            -1 * self.ppmRangeprior / 2
    + 87        )  # -7.5
    + 88        self.mass_spectrum.molecular_search_settings.max_ppm_error = (
    + 89            self.ppmRangeprior / 2
    + 90        )  # 7.5
    + 91
    + 92        self.mass_spectrum.molecular_search_settings.min_dbe = 0
    + 93        self.mass_spectrum.molecular_search_settings.max_dbe = 50
      94
    - 95        self.mass_spectrum.molecular_search_settings.usedAtoms['C'] = (1,90)
    - 96        self.mass_spectrum.molecular_search_settings.usedAtoms['H'] = (4,200)
    - 97        self.mass_spectrum.molecular_search_settings.usedAtoms['O'] = (1,23)
    - 98        self.mass_spectrum.molecular_search_settings.usedAtoms['N'] = (0,0)
    - 99        self.mass_spectrum.molecular_search_settings.usedAtoms['S'] = (0,0)
    -100        self.mass_spectrum.molecular_search_settings.usedAtoms['P'] = (0,0)
    -101
    -102        self.mass_spectrum.molecular_search_settings.isProtonated = True
    -103        self.mass_spectrum.molecular_search_settings.isRadical= False
    -104        self.mass_spectrum.molecular_search_settings.isAdduct = False
    -105
    -106    def positive_search_settings(self):
    -107        """ Set the positive mode elemental search settings
    -108        """
    -109        self.mass_spectrum.molecular_search_settings.isProtonated = False
    -110        self.mass_spectrum.molecular_search_settings.isAdduct = True
    -111        self.mass_spectrum.molecular_search_settings.adduct_atoms_pos = ['Na']
    -112
    -113    @staticmethod
    -114    def get_error_range(errors: list, ppmFWHMprior: float=3, plot_logic: bool=False):
    -115        """ Get the error range from the error distribution
    -116
    -117        Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species. 
    -118
    -119        Parameters
    -120        ----------
    -121        errors : list
    -122            list of the errors of the assigned species (ppm)
    -123        ppmFWHMprior : float, optional
    -124            The FWHM of the prior distribution (ppm). The default is 3.
    -125        plot_logic : bool, optional
    -126            Whether to plot the error distribution. The default is False.
    -127        
    -128        Returns
    -129        -------
    -130        mean_error : float
    -131            mean mass error of the Gaussian distribution (ppm)
    -132        fwhm_error : float
    -133            full width half max of the gaussian error distribution (ppm)
    -134        ppm_thresh : list
    -135            recommended thresholds for the recalibration parameters (ppm)
    -136            Consists of [mean_error-fwhm_error,mean_error+fwhm_error]
    -137        
    -138        """
    -139        kde = sns.kdeplot(errors) 
    -140
    -141        kde_data = kde.get_lines()[0].get_data()
    -142        
    -143        tmpdf = pd.Series(index=kde_data[0],data=kde_data[1])
    -144        kde_apex_ppm = tmpdf.idxmax()
    -145        kde_apex_val = tmpdf.max()
    -146
    -147        plt.close(kde.figure)
    -148        plt.close('all')
    -149        
    -150        lmmodel = GaussianModel()
    -151        lmpars = lmmodel.guess(kde_data[1], x=kde_data[0])
    -152        lmpars['sigma'].value = 2.3548/ppmFWHMprior
    -153        lmpars['center'].value = kde_apex_ppm
    -154        lmpars['amplitude'].value = kde_apex_val
    -155        lmout = lmmodel.fit(kde_data[1], lmpars, x=kde_data[0])
    -156        
    -157        if plot_logic:
    -158            fig,ax = plt.subplots(figsize=(8,4))
    -159            lmout.plot_fit(ax=ax,data_kws ={'color':'tab:blue'},fit_kws ={'color':'tab:red'})
    -160            ax.set_xlabel('$m/z$ Error (ppm)')
    -161            ax.set_ylabel('Density')
    -162            plt.legend(facecolor='white', framealpha=0)
    -163
    -164        mean_error = lmout.best_values['center']
    -165        std_error = lmout.best_values['sigma']
    -166        # FWHM from Sigma = approx. 2.355*sigma
    -167        #fwhm_error = 2*np.sqrt(2*np.log(2))*std_error
    -168        fwhm_error = std_error * np.sqrt(8*np.log(2))
    -169        
    -170        ppm_thresh = [mean_error-fwhm_error,mean_error+fwhm_error]
    -171        return mean_error,fwhm_error,ppm_thresh
    -172    
    -173    def determine_error_boundaries(self):
    -174        """ Determine the error boundaries for recalibration space
    + 95        self.mass_spectrum.molecular_search_settings.use_isotopologue_filter = False
    + 96        self.mass_spectrum.molecular_search_settings.min_abun_error = -30
    + 97        self.mass_spectrum.molecular_search_settings.max_abun_error = 70
    + 98
    + 99        self.mass_spectrum.molecular_search_settings.use_min_peaks_filter = True
    +100        self.mass_spectrum.molecular_search_settings.min_peaks_per_class = (
    +101            10  # default is 15
    +102        )
    +103
    +104        self.mass_spectrum.molecular_search_settings.usedAtoms["C"] = (1, 90)
    +105        self.mass_spectrum.molecular_search_settings.usedAtoms["H"] = (4, 200)
    +106        self.mass_spectrum.molecular_search_settings.usedAtoms["O"] = (1, 23)
    +107        self.mass_spectrum.molecular_search_settings.usedAtoms["N"] = (0, 0)
    +108        self.mass_spectrum.molecular_search_settings.usedAtoms["S"] = (0, 0)
    +109        self.mass_spectrum.molecular_search_settings.usedAtoms["P"] = (0, 0)
    +110
    +111        self.mass_spectrum.molecular_search_settings.isProtonated = True
    +112        self.mass_spectrum.molecular_search_settings.isRadical = False
    +113        self.mass_spectrum.molecular_search_settings.isAdduct = False
    +114
    +115    def positive_search_settings(self):
    +116        """Set the positive mode elemental search settings"""
    +117        self.mass_spectrum.molecular_search_settings.isProtonated = False
    +118        self.mass_spectrum.molecular_search_settings.isAdduct = True
    +119        self.mass_spectrum.molecular_search_settings.adduct_atoms_pos = ["Na"]
    +120
    +121    @staticmethod
    +122    def get_error_range(
    +123        errors: list, ppmFWHMprior: float = 3, plot_logic: bool = False
    +124    ):
    +125        """Get the error range from the error distribution
    +126
    +127        Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species.
    +128
    +129        Parameters
    +130        ----------
    +131        errors : list
    +132            list of the errors of the assigned species (ppm)
    +133        ppmFWHMprior : float, optional
    +134            The FWHM of the prior distribution (ppm). The default is 3.
    +135        plot_logic : bool, optional
    +136            Whether to plot the error distribution. The default is False.
    +137
    +138        Returns
    +139        -------
    +140        mean_error : float
    +141            mean mass error of the Gaussian distribution (ppm)
    +142        fwhm_error : float
    +143            full width half max of the gaussian error distribution (ppm)
    +144        ppm_thresh : list
    +145            recommended thresholds for the recalibration parameters (ppm)
    +146            Consists of [mean_error-fwhm_error,mean_error+fwhm_error]
    +147
    +148        """
    +149        kde = sns.kdeplot(errors)
    +150
    +151        kde_data = kde.get_lines()[0].get_data()
    +152
    +153        tmpdf = pd.Series(index=kde_data[0], data=kde_data[1])
    +154        kde_apex_ppm = tmpdf.idxmax()
    +155        kde_apex_val = tmpdf.max()
    +156
    +157        plt.close(kde.figure)
    +158        plt.close("all")
    +159
    +160        lmmodel = GaussianModel()
    +161        lmpars = lmmodel.guess(kde_data[1], x=kde_data[0])
    +162        lmpars["sigma"].value = 2.3548 / ppmFWHMprior
    +163        lmpars["center"].value = kde_apex_ppm
    +164        lmpars["amplitude"].value = kde_apex_val
    +165        lmout = lmmodel.fit(kde_data[1], lmpars, x=kde_data[0])
    +166
    +167        if plot_logic:
    +168            fig, ax = plt.subplots(figsize=(8, 4))
    +169            lmout.plot_fit(
    +170                ax=ax, data_kws={"color": "tab:blue"}, fit_kws={"color": "tab:red"}
    +171            )
    +172            ax.set_xlabel("$m/z$ Error (ppm)")
    +173            ax.set_ylabel("Density")
    +174            plt.legend(facecolor="white", framealpha=0)
     175
    -176        This is the main function in this class
    -177        Sets the Molecular Formulas search settings, performs the initial formula search
    -178        Converts the data to a dataframe, and gets the error range
    -179        Returns the error thresholds. 
    -180
    -181        Returns
    -182        -------
    -183        mean_error : float
    -184            mean mass error of the Gaussian distribution (ppm)
    -185        fwhm_error : float
    -186            full width half max of the gaussian error distribution (ppm)
    -187        ppm_thresh : list
    -188            recommended thresholds for the recalibration parameters (ppm)
    -189            Consists of [mean_error-fwhm_error,mean_error+fwhm_error]
    -190        """
    -191        
    -192        # Set the search settings 
    -193        self.set_uncal_settings()
    -194
    -195        # Set the positive mode settings
    -196        # To do - have user defineable settings?
    -197        if self.mass_spectrum.polarity == 1:
    -198            self.positive_search_settings()
    -199
    -200        # Search MFs
    -201        SearchMolecularFormulas(self.mass_spectrum, first_hit=True).run_worker_mass_spectrum()
    -202        
    -203        
    -204        # Exporting to a DF is ~30x slower than just getting the errors, so this is fast.
    -205        errors = []
    -206        for mspeak in self.mass_spectrum.mspeaks:
    -207            if len(mspeak.molecular_formulas)>0:
    -208                errors.append(mspeak.best_molecular_formula_candidate.mz_error)
    -209
    -210                
    -211        # If there are NO assignments, it'll fail on the next step. Need to check for that
    -212        nassign = len(errors)
    -213        # Here we say at least 5 features assigned are needed - it probably should be greater, but we are just trying to stop it breaking the code
    -214        # We want to make sure the spectrum is capture in the database though - so we return the stats entries (0 assignments) and the number of assignments
    -215        if nassign <5:
    -216            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -217                print("fewer than 5 peaks assigned, cannot determine error range")
    -218            return np.nan,np.nan,[np.nan,np.nan]
    -219        else:
    -220            mean_error,fwhm_error,ppm_thresh = self.get_error_range(errors, self.ppmFWHMprior, self.plot)
    -221            return mean_error,fwhm_error,ppm_thresh
    +176        mean_error = lmout.best_values["center"]
    +177        std_error = lmout.best_values["sigma"]
    +178        # FWHM from Sigma = approx. 2.355*sigma
    +179        # fwhm_error = 2*np.sqrt(2*np.log(2))*std_error
    +180        fwhm_error = std_error * np.sqrt(8 * np.log(2))
    +181
    +182        ppm_thresh = [mean_error - fwhm_error, mean_error + fwhm_error]
    +183        return mean_error, fwhm_error, ppm_thresh
    +184
    +185    def determine_error_boundaries(self):
    +186        """Determine the error boundaries for recalibration space
    +187
    +188        This is the main function in this class
    +189        Sets the Molecular Formulas search settings, performs the initial formula search
    +190        Converts the data to a dataframe, and gets the error range
    +191        Returns the error thresholds.
    +192
    +193        Returns
    +194        -------
    +195        mean_error : float
    +196            mean mass error of the Gaussian distribution (ppm)
    +197        fwhm_error : float
    +198            full width half max of the gaussian error distribution (ppm)
    +199        ppm_thresh : list
    +200            recommended thresholds for the recalibration parameters (ppm)
    +201            Consists of [mean_error-fwhm_error,mean_error+fwhm_error]
    +202        """
    +203
    +204        # Set the search settings
    +205        self.set_uncal_settings()
    +206
    +207        # Set the positive mode settings
    +208        # To do - have user defineable settings?
    +209        if self.mass_spectrum.polarity == 1:
    +210            self.positive_search_settings()
    +211
    +212        # Search MFs
    +213        SearchMolecularFormulas(
    +214            self.mass_spectrum, first_hit=True
    +215        ).run_worker_mass_spectrum()
    +216
    +217        # Exporting to a DF is ~30x slower than just getting the errors, so this is fast.
    +218        errors = []
    +219        for mspeak in self.mass_spectrum.mspeaks:
    +220            if len(mspeak.molecular_formulas) > 0:
    +221                errors.append(mspeak.best_molecular_formula_candidate.mz_error)
    +222
    +223        # If there are NO assignments, it'll fail on the next step. Need to check for that
    +224        nassign = len(errors)
    +225        # Here we say at least 5 features assigned are needed - it probably should be greater, but we are just trying to stop it breaking the code
    +226        # We want to make sure the spectrum is capture in the database though - so we return the stats entries (0 assignments) and the number of assignments
    +227        if nassign < 5:
    +228            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +229                print("fewer than 5 peaks assigned, cannot determine error range")
    +230            return np.nan, np.nan, [np.nan, np.nan]
    +231        else:
    +232            mean_error, fwhm_error, ppm_thresh = self.get_error_range(
    +233                errors, self.ppmFWHMprior, self.plot
    +234            )
    +235            return mean_error, fwhm_error, ppm_thresh
     

    This class is designed for high resolution (FTICR, Orbitrap) data of complex mixture, e.g. Organic matter

    The tool first does a broad mass range search for the most commonly expected ion type (i.e. CHO, deprotonated - for negative ESI) -And then the assigned data mass error distribution is searched, with a gaussian fit to the most prominent range. +And then the assigned data mass error distribution is searched, with a gaussian fit to the most prominent range. This tool works when the data are of sufficient quality, and not outwith the typical expected range of the mass analyzer It presumes the mean error is out by 0-several ppm, but that the spread of error values is modest (<2ppm)

    @@ -557,12 +586,12 @@
    Parameters
    Methods
      -
    • determine_error_boundaries(). Determine the error boundaries for recalibration space.
    • +
    • determine_error_boundaries(). Determine the error boundaries for recalibration space.
    Notes
    -

    This initialisation function creates a copy of the MassSpectrum object to avoid over-writing assignments. +

    This initialisation function creates a copy of the MassSpectrum object to avoid over-writing assignments. Possible future task is to make the base class copyable.

    @@ -577,14 +606,19 @@
    Notes
    -
    54    def __init__(self, mass_spectrum, plot : bool=False, docker : bool=True, 
    -55                    ppmFWHMprior : float=3, ppmRangeprior : float=15):
    -56       
    -57        self.mass_spectrum = copy.deepcopy(mass_spectrum) 
    -58        self.plot = plot
    -59        self.docker = docker
    -60        self.ppmFWHMprior = ppmFWHMprior
    -61        self.ppmRangeprior = ppmRangeprior
    +            
    53    def __init__(
    +54        self,
    +55        mass_spectrum,
    +56        plot: bool = False,
    +57        docker: bool = True,
    +58        ppmFWHMprior: float = 3,
    +59        ppmRangeprior: float = 15,
    +60    ):
    +61        self.mass_spectrum = copy.deepcopy(mass_spectrum)
    +62        self.plot = plot
    +63        self.docker = docker
    +64        self.ppmFWHMprior = ppmFWHMprior
    +65        self.ppmRangeprior = ppmRangeprior
     
    @@ -657,47 +691,53 @@
    Notes
    -
     64    def set_uncal_settings(self):
    - 65        """ Set uncalibrated formula search settings
    - 66
    - 67        This function serves the uncalibrated data (hence broad error tolerance)
    - 68        It only allows CHO formula in deprotonated ion type- as most common for SRFA ESI negative mode
    +            
     67    def set_uncal_settings(self):
    + 68        """Set uncalibrated formula search settings
      69
    - 70        This will not work for positive mode data, or for other ion types, or other expected elemental searches.
    - 71
    - 72        """
    - 73        #TODO rework this.
    + 70        This function serves the uncalibrated data (hence broad error tolerance)
    + 71        It only allows CHO formula in deprotonated ion type- as most common for SRFA ESI negative mode
    + 72
    + 73        This will not work for positive mode data, or for other ion types, or other expected elemental searches.
      74
    - 75        if self.docker:
    - 76            self.mass_spectrum.molecular_search_settings.url_database = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
    - 77        else:
    - 78            self.mass_spectrum.molecular_search_settings.url_database = None
    - 79        self.mass_spectrum.molecular_search_settings.error_method = None
    - 80        self.mass_spectrum.molecular_search_settings.score_method = 'prob_score'
    - 81
    - 82        self.mass_spectrum.molecular_search_settings.min_ppm_error  = -1*self.ppmRangeprior/2   #-7.5
    - 83        self.mass_spectrum.molecular_search_settings.max_ppm_error = self.ppmRangeprior/2   #7.5
    + 75        """
    + 76        # TODO rework this.
    + 77
    + 78        if self.docker:
    + 79            self.mass_spectrum.molecular_search_settings.url_database = "postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp"
    + 80        else:
    + 81            self.mass_spectrum.molecular_search_settings.url_database = None
    + 82        self.mass_spectrum.molecular_search_settings.error_method = None
    + 83        self.mass_spectrum.molecular_search_settings.score_method = "prob_score"
      84
    - 85        self.mass_spectrum.molecular_search_settings.min_dbe = 0
    - 86        self.mass_spectrum.molecular_search_settings.max_dbe = 50
    - 87        
    - 88        self.mass_spectrum.molecular_search_settings.use_isotopologue_filter = False
    - 89        self.mass_spectrum.molecular_search_settings.min_abun_error = -30
    - 90        self.mass_spectrum.molecular_search_settings.max_abun_error = 70
    - 91        
    - 92        self.mass_spectrum.molecular_search_settings.use_min_peaks_filter = True
    - 93        self.mass_spectrum.molecular_search_settings.min_peaks_per_class = 10 #default is 15
    + 85        self.mass_spectrum.molecular_search_settings.min_ppm_error = (
    + 86            -1 * self.ppmRangeprior / 2
    + 87        )  # -7.5
    + 88        self.mass_spectrum.molecular_search_settings.max_ppm_error = (
    + 89            self.ppmRangeprior / 2
    + 90        )  # 7.5
    + 91
    + 92        self.mass_spectrum.molecular_search_settings.min_dbe = 0
    + 93        self.mass_spectrum.molecular_search_settings.max_dbe = 50
      94
    - 95        self.mass_spectrum.molecular_search_settings.usedAtoms['C'] = (1,90)
    - 96        self.mass_spectrum.molecular_search_settings.usedAtoms['H'] = (4,200)
    - 97        self.mass_spectrum.molecular_search_settings.usedAtoms['O'] = (1,23)
    - 98        self.mass_spectrum.molecular_search_settings.usedAtoms['N'] = (0,0)
    - 99        self.mass_spectrum.molecular_search_settings.usedAtoms['S'] = (0,0)
    -100        self.mass_spectrum.molecular_search_settings.usedAtoms['P'] = (0,0)
    -101
    -102        self.mass_spectrum.molecular_search_settings.isProtonated = True
    -103        self.mass_spectrum.molecular_search_settings.isRadical= False
    -104        self.mass_spectrum.molecular_search_settings.isAdduct = False
    + 95        self.mass_spectrum.molecular_search_settings.use_isotopologue_filter = False
    + 96        self.mass_spectrum.molecular_search_settings.min_abun_error = -30
    + 97        self.mass_spectrum.molecular_search_settings.max_abun_error = 70
    + 98
    + 99        self.mass_spectrum.molecular_search_settings.use_min_peaks_filter = True
    +100        self.mass_spectrum.molecular_search_settings.min_peaks_per_class = (
    +101            10  # default is 15
    +102        )
    +103
    +104        self.mass_spectrum.molecular_search_settings.usedAtoms["C"] = (1, 90)
    +105        self.mass_spectrum.molecular_search_settings.usedAtoms["H"] = (4, 200)
    +106        self.mass_spectrum.molecular_search_settings.usedAtoms["O"] = (1, 23)
    +107        self.mass_spectrum.molecular_search_settings.usedAtoms["N"] = (0, 0)
    +108        self.mass_spectrum.molecular_search_settings.usedAtoms["S"] = (0, 0)
    +109        self.mass_spectrum.molecular_search_settings.usedAtoms["P"] = (0, 0)
    +110
    +111        self.mass_spectrum.molecular_search_settings.isProtonated = True
    +112        self.mass_spectrum.molecular_search_settings.isRadical = False
    +113        self.mass_spectrum.molecular_search_settings.isAdduct = False
     
    @@ -722,12 +762,11 @@
    Notes
    -
    106    def positive_search_settings(self):
    -107        """ Set the positive mode elemental search settings
    -108        """
    -109        self.mass_spectrum.molecular_search_settings.isProtonated = False
    -110        self.mass_spectrum.molecular_search_settings.isAdduct = True
    -111        self.mass_spectrum.molecular_search_settings.adduct_atoms_pos = ['Na']
    +            
    115    def positive_search_settings(self):
    +116        """Set the positive mode elemental search settings"""
    +117        self.mass_spectrum.molecular_search_settings.isProtonated = False
    +118        self.mass_spectrum.molecular_search_settings.isAdduct = True
    +119        self.mass_spectrum.molecular_search_settings.adduct_atoms_pos = ["Na"]
     
    @@ -748,71 +787,75 @@
    Notes
    -
    113    @staticmethod
    -114    def get_error_range(errors: list, ppmFWHMprior: float=3, plot_logic: bool=False):
    -115        """ Get the error range from the error distribution
    -116
    -117        Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species. 
    -118
    -119        Parameters
    -120        ----------
    -121        errors : list
    -122            list of the errors of the assigned species (ppm)
    -123        ppmFWHMprior : float, optional
    -124            The FWHM of the prior distribution (ppm). The default is 3.
    -125        plot_logic : bool, optional
    -126            Whether to plot the error distribution. The default is False.
    -127        
    -128        Returns
    -129        -------
    -130        mean_error : float
    -131            mean mass error of the Gaussian distribution (ppm)
    -132        fwhm_error : float
    -133            full width half max of the gaussian error distribution (ppm)
    -134        ppm_thresh : list
    -135            recommended thresholds for the recalibration parameters (ppm)
    -136            Consists of [mean_error-fwhm_error,mean_error+fwhm_error]
    -137        
    -138        """
    -139        kde = sns.kdeplot(errors) 
    -140
    -141        kde_data = kde.get_lines()[0].get_data()
    -142        
    -143        tmpdf = pd.Series(index=kde_data[0],data=kde_data[1])
    -144        kde_apex_ppm = tmpdf.idxmax()
    -145        kde_apex_val = tmpdf.max()
    -146
    -147        plt.close(kde.figure)
    -148        plt.close('all')
    -149        
    -150        lmmodel = GaussianModel()
    -151        lmpars = lmmodel.guess(kde_data[1], x=kde_data[0])
    -152        lmpars['sigma'].value = 2.3548/ppmFWHMprior
    -153        lmpars['center'].value = kde_apex_ppm
    -154        lmpars['amplitude'].value = kde_apex_val
    -155        lmout = lmmodel.fit(kde_data[1], lmpars, x=kde_data[0])
    -156        
    -157        if plot_logic:
    -158            fig,ax = plt.subplots(figsize=(8,4))
    -159            lmout.plot_fit(ax=ax,data_kws ={'color':'tab:blue'},fit_kws ={'color':'tab:red'})
    -160            ax.set_xlabel('$m/z$ Error (ppm)')
    -161            ax.set_ylabel('Density')
    -162            plt.legend(facecolor='white', framealpha=0)
    -163
    -164        mean_error = lmout.best_values['center']
    -165        std_error = lmout.best_values['sigma']
    -166        # FWHM from Sigma = approx. 2.355*sigma
    -167        #fwhm_error = 2*np.sqrt(2*np.log(2))*std_error
    -168        fwhm_error = std_error * np.sqrt(8*np.log(2))
    -169        
    -170        ppm_thresh = [mean_error-fwhm_error,mean_error+fwhm_error]
    -171        return mean_error,fwhm_error,ppm_thresh
    +            
    121    @staticmethod
    +122    def get_error_range(
    +123        errors: list, ppmFWHMprior: float = 3, plot_logic: bool = False
    +124    ):
    +125        """Get the error range from the error distribution
    +126
    +127        Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species.
    +128
    +129        Parameters
    +130        ----------
    +131        errors : list
    +132            list of the errors of the assigned species (ppm)
    +133        ppmFWHMprior : float, optional
    +134            The FWHM of the prior distribution (ppm). The default is 3.
    +135        plot_logic : bool, optional
    +136            Whether to plot the error distribution. The default is False.
    +137
    +138        Returns
    +139        -------
    +140        mean_error : float
    +141            mean mass error of the Gaussian distribution (ppm)
    +142        fwhm_error : float
    +143            full width half max of the gaussian error distribution (ppm)
    +144        ppm_thresh : list
    +145            recommended thresholds for the recalibration parameters (ppm)
    +146            Consists of [mean_error-fwhm_error,mean_error+fwhm_error]
    +147
    +148        """
    +149        kde = sns.kdeplot(errors)
    +150
    +151        kde_data = kde.get_lines()[0].get_data()
    +152
    +153        tmpdf = pd.Series(index=kde_data[0], data=kde_data[1])
    +154        kde_apex_ppm = tmpdf.idxmax()
    +155        kde_apex_val = tmpdf.max()
    +156
    +157        plt.close(kde.figure)
    +158        plt.close("all")
    +159
    +160        lmmodel = GaussianModel()
    +161        lmpars = lmmodel.guess(kde_data[1], x=kde_data[0])
    +162        lmpars["sigma"].value = 2.3548 / ppmFWHMprior
    +163        lmpars["center"].value = kde_apex_ppm
    +164        lmpars["amplitude"].value = kde_apex_val
    +165        lmout = lmmodel.fit(kde_data[1], lmpars, x=kde_data[0])
    +166
    +167        if plot_logic:
    +168            fig, ax = plt.subplots(figsize=(8, 4))
    +169            lmout.plot_fit(
    +170                ax=ax, data_kws={"color": "tab:blue"}, fit_kws={"color": "tab:red"}
    +171            )
    +172            ax.set_xlabel("$m/z$ Error (ppm)")
    +173            ax.set_ylabel("Density")
    +174            plt.legend(facecolor="white", framealpha=0)
    +175
    +176        mean_error = lmout.best_values["center"]
    +177        std_error = lmout.best_values["sigma"]
    +178        # FWHM from Sigma = approx. 2.355*sigma
    +179        # fwhm_error = 2*np.sqrt(2*np.log(2))*std_error
    +180        fwhm_error = std_error * np.sqrt(8 * np.log(2))
    +181
    +182        ppm_thresh = [mean_error - fwhm_error, mean_error + fwhm_error]
    +183        return mean_error, fwhm_error, ppm_thresh
     

    Get the error range from the error distribution

    -

    Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species.

    +

    Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species.

    Parameters
    @@ -851,55 +894,57 @@
    Returns
    -
    173    def determine_error_boundaries(self):
    -174        """ Determine the error boundaries for recalibration space
    -175
    -176        This is the main function in this class
    -177        Sets the Molecular Formulas search settings, performs the initial formula search
    -178        Converts the data to a dataframe, and gets the error range
    -179        Returns the error thresholds. 
    -180
    -181        Returns
    -182        -------
    -183        mean_error : float
    -184            mean mass error of the Gaussian distribution (ppm)
    -185        fwhm_error : float
    -186            full width half max of the gaussian error distribution (ppm)
    -187        ppm_thresh : list
    -188            recommended thresholds for the recalibration parameters (ppm)
    -189            Consists of [mean_error-fwhm_error,mean_error+fwhm_error]
    -190        """
    -191        
    -192        # Set the search settings 
    -193        self.set_uncal_settings()
    -194
    -195        # Set the positive mode settings
    -196        # To do - have user defineable settings?
    -197        if self.mass_spectrum.polarity == 1:
    -198            self.positive_search_settings()
    -199
    -200        # Search MFs
    -201        SearchMolecularFormulas(self.mass_spectrum, first_hit=True).run_worker_mass_spectrum()
    -202        
    -203        
    -204        # Exporting to a DF is ~30x slower than just getting the errors, so this is fast.
    -205        errors = []
    -206        for mspeak in self.mass_spectrum.mspeaks:
    -207            if len(mspeak.molecular_formulas)>0:
    -208                errors.append(mspeak.best_molecular_formula_candidate.mz_error)
    -209
    -210                
    -211        # If there are NO assignments, it'll fail on the next step. Need to check for that
    -212        nassign = len(errors)
    -213        # Here we say at least 5 features assigned are needed - it probably should be greater, but we are just trying to stop it breaking the code
    -214        # We want to make sure the spectrum is capture in the database though - so we return the stats entries (0 assignments) and the number of assignments
    -215        if nassign <5:
    -216            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -217                print("fewer than 5 peaks assigned, cannot determine error range")
    -218            return np.nan,np.nan,[np.nan,np.nan]
    -219        else:
    -220            mean_error,fwhm_error,ppm_thresh = self.get_error_range(errors, self.ppmFWHMprior, self.plot)
    -221            return mean_error,fwhm_error,ppm_thresh
    +            
    185    def determine_error_boundaries(self):
    +186        """Determine the error boundaries for recalibration space
    +187
    +188        This is the main function in this class
    +189        Sets the Molecular Formulas search settings, performs the initial formula search
    +190        Converts the data to a dataframe, and gets the error range
    +191        Returns the error thresholds.
    +192
    +193        Returns
    +194        -------
    +195        mean_error : float
    +196            mean mass error of the Gaussian distribution (ppm)
    +197        fwhm_error : float
    +198            full width half max of the gaussian error distribution (ppm)
    +199        ppm_thresh : list
    +200            recommended thresholds for the recalibration parameters (ppm)
    +201            Consists of [mean_error-fwhm_error,mean_error+fwhm_error]
    +202        """
    +203
    +204        # Set the search settings
    +205        self.set_uncal_settings()
    +206
    +207        # Set the positive mode settings
    +208        # To do - have user defineable settings?
    +209        if self.mass_spectrum.polarity == 1:
    +210            self.positive_search_settings()
    +211
    +212        # Search MFs
    +213        SearchMolecularFormulas(
    +214            self.mass_spectrum, first_hit=True
    +215        ).run_worker_mass_spectrum()
    +216
    +217        # Exporting to a DF is ~30x slower than just getting the errors, so this is fast.
    +218        errors = []
    +219        for mspeak in self.mass_spectrum.mspeaks:
    +220            if len(mspeak.molecular_formulas) > 0:
    +221                errors.append(mspeak.best_molecular_formula_candidate.mz_error)
    +222
    +223        # If there are NO assignments, it'll fail on the next step. Need to check for that
    +224        nassign = len(errors)
    +225        # Here we say at least 5 features assigned are needed - it probably should be greater, but we are just trying to stop it breaking the code
    +226        # We want to make sure the spectrum is capture in the database though - so we return the stats entries (0 assignments) and the number of assignments
    +227        if nassign < 5:
    +228            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +229                print("fewer than 5 peaks assigned, cannot determine error range")
    +230            return np.nan, np.nan, [np.nan, np.nan]
    +231        else:
    +232            mean_error, fwhm_error, ppm_thresh = self.get_error_range(
    +233                errors, self.ppmFWHMprior, self.plot
    +234            )
    +235            return mean_error, fwhm_error, ppm_thresh
     
    @@ -908,7 +953,7 @@
    Returns

    This is the main function in this class Sets the Molecular Formulas search settings, performs the initial formula search Converts the data to a dataframe, and gets the error range -Returns the error thresholds.

    +Returns the error thresholds.

    Returns
    diff --git a/docs/corems/mass_spectrum/calc/Calibration.html b/docs/corems/mass_spectrum/calc/Calibration.html index 45029353..f7db63c2 100644 --- a/docs/corems/mass_spectrum/calc/Calibration.html +++ b/docs/corems/mass_spectrum/calc/Calibration.html @@ -99,477 +99,559 @@

    6""" 7 8# import modules - 9import pandas as pd - 10import numpy as np - 11import os - 12import csv - 13from io import BytesIO - 14from pathlib import Path - 15import warnings - 16 - 17from s3path import S3Path - 18# import corems modules - 19from corems.transient.input.brukerSolarix import ReadBrukerSolarix - 20from corems.encapsulation.factory.parameters import MSParameters - 21from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas - 22 - 23# import scipy modules for calibration - 24from scipy.optimize import minimize - 25 - 26class MzDomainCalibration: - 27 """ MzDomainCalibration class for recalibrating mass spectra - 28 - 29 Parameters - 30 ---------- - 31 mass_spectrum : CoreMS MassSpectrum Object - 32 The mass spectrum to be calibrated. - 33 ref_masslist : str - 34 The path to a reference mass list. - 35 mzsegment : tuple of floats, optional - 36 The mz range to recalibrate, or None. Used for calibration of specific parts of the mz domain at a time. - 37 Future work - allow multiple mzsegments to be passed. - 38 - 39 Attributes - 40 ---------- - 41 mass_spectrum : CoreMS MassSpectrum Object - 42 The mass spectrum to be calibrated. - 43 mzsegment : tuple of floats or None - 44 The mz range to recalibrate, or None. - 45 ref_mass_list_path : str or Path - 46 The path to the reference mass list. - 47 - 48 Methods - 49 ------- - 50 * run(). - 51 Main function to run this class. - 52 * load_ref_mass_list(). - 53 Load reference mass list (Bruker format). - 54 * gen_ref_mass_list_from_assigned(min_conf=0.7). - 55 Generate reference mass list from assigned masses. - 56 * find_calibration_points(df_ref, calib_ppm_error_threshold=(-1, 1), calib_snr_threshold=5). - 57 Find calibration points in the mass spectrum based on the reference mass list. - 58 * robust_calib(param, cal_peaks_mz, cal_refs_mz, order=1). - 59 Recalibration function. - 60 * recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=1, diagnostic=False). - 61 Main recalibration function which uses a robust linear regression. - 62 - 63 - 64 """ + 9import csv + 10import warnings + 11from io import BytesIO + 12from pathlib import Path + 13 + 14import numpy as np + 15import pandas as pd + 16from s3path import S3Path + 17 + 18# import scipy modules for calibration + 19from scipy.optimize import minimize + 20 + 21 + 22class MzDomainCalibration: + 23 """MzDomainCalibration class for recalibrating mass spectra + 24 + 25 Parameters + 26 ---------- + 27 mass_spectrum : CoreMS MassSpectrum Object + 28 The mass spectrum to be calibrated. + 29 ref_masslist : str + 30 The path to a reference mass list. + 31 mzsegment : tuple of floats, optional + 32 The mz range to recalibrate, or None. Used for calibration of specific parts of the mz domain at a time. + 33 Future work - allow multiple mzsegments to be passed. + 34 + 35 Attributes + 36 ---------- + 37 mass_spectrum : CoreMS MassSpectrum Object + 38 The mass spectrum to be calibrated. + 39 mzsegment : tuple of floats or None + 40 The mz range to recalibrate, or None. + 41 ref_mass_list_path : str or Path + 42 The path to the reference mass list. + 43 + 44 Methods + 45 ------- + 46 * run(). + 47 Main function to run this class. + 48 * load_ref_mass_list(). + 49 Load reference mass list (Bruker format). + 50 * gen_ref_mass_list_from_assigned(min_conf=0.7). + 51 Generate reference mass list from assigned masses. + 52 * find_calibration_points(df_ref, calib_ppm_error_threshold=(-1, 1), calib_snr_threshold=5). + 53 Find calibration points in the mass spectrum based on the reference mass list. + 54 * robust_calib(param, cal_peaks_mz, cal_refs_mz, order=1). + 55 Recalibration function. + 56 * recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=1, diagnostic=False). + 57 Main recalibration function which uses a robust linear regression. + 58 + 59 + 60 """ + 61 + 62 def __init__(self, mass_spectrum, ref_masslist, mzsegment=None): + 63 self.mass_spectrum = mass_spectrum + 64 self.mzsegment = mzsegment 65 - 66 def __init__(self, mass_spectrum, ref_masslist,mzsegment=None): - 67 - 68 self.mass_spectrum = mass_spectrum - 69 self.mzsegment = mzsegment - 70 - 71 # define reference mass list - bruker .ref format - 72 self.ref_mass_list_path = ref_masslist - 73 if self.mass_spectrum.percentile_assigned()[0]!=0: - 74 warnings.warn('Warning: calibrating spectra which have already been assigned may yield erroneous results') - 75 self.mass_spectrum.mz_cal = self.mass_spectrum.mz_exp - 76 self.mass_spectrum.mz_cal_profile = self.mass_spectrum._mz_exp - 77 - 78 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: - 79 print("MS Obj loaded - "+str(len(mass_spectrum.mspeaks))+" peaks found.") - 80 - 81 print("MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found.") - 82 - 83 def load_ref_mass_list(self): - 84 """ Load reference mass list (Bruker format) - 85 - 86 Loads in a reference mass list from a .ref file - 87 Note that some versions of Bruker's software produce .ref files with a different format. - 88 As such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format. - 89 CoreMS includes an example .ref file with the correct format for reference. - 90 - 91 Returns - 92 ------- - 93 df_ref : Pandas DataFrame - 94 reference mass list object. - 95 - 96 """ - 97 refmasslist = Path(self.ref_mass_list_path) if isinstance(self.ref_mass_list_path, str) else self.ref_mass_list_path - 98 - 99 if not refmasslist.exists(): -100 raise FileExistsError("File does not exist: %s" % refmasslist) -101 -102 with refmasslist.open('r') as csvfile: -103 dialect = csv.Sniffer().sniff(csvfile.read(1024)) -104 delimiter = dialect.delimiter -105 -106 if isinstance(refmasslist, S3Path): -107 # data = self.file_location.open('rb').read() -108 data = BytesIO(refmasslist.open('rb').read()) -109 -110 else: -111 data = refmasslist -112 -113 df_ref = pd.read_csv(data, sep=delimiter, header=None, skiprows=1) + 66 # define reference mass list - bruker .ref format + 67 self.ref_mass_list_path = ref_masslist + 68 if self.mass_spectrum.percentile_assigned()[0] != 0: + 69 warnings.warn( + 70 "Warning: calibrating spectra which have already been assigned may yield erroneous results" + 71 ) + 72 self.mass_spectrum.mz_cal = self.mass_spectrum.mz_exp + 73 self.mass_spectrum.mz_cal_profile = self.mass_spectrum._mz_exp + 74 + 75 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: + 76 print( + 77 "MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found." + 78 ) + 79 + 80 print( + 81 "MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found." + 82 ) + 83 + 84 def load_ref_mass_list(self): + 85 """Load reference mass list (Bruker format) + 86 + 87 Loads in a reference mass list from a .ref file + 88 Note that some versions of Bruker's software produce .ref files with a different format. + 89 As such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format. + 90 CoreMS includes an example .ref file with the correct format for reference. + 91 + 92 Returns + 93 ------- + 94 df_ref : Pandas DataFrame + 95 reference mass list object. + 96 + 97 """ + 98 refmasslist = ( + 99 Path(self.ref_mass_list_path) +100 if isinstance(self.ref_mass_list_path, str) +101 else self.ref_mass_list_path +102 ) +103 +104 if not refmasslist.exists(): +105 raise FileExistsError("File does not exist: %s" % refmasslist) +106 +107 with refmasslist.open("r") as csvfile: +108 dialect = csv.Sniffer().sniff(csvfile.read(1024)) +109 delimiter = dialect.delimiter +110 +111 if isinstance(refmasslist, S3Path): +112 # data = self.file_location.open('rb').read() +113 data = BytesIO(refmasslist.open("rb").read()) 114 -115 df_ref = df_ref.rename({0: 'Formula', -116 1: 'm/z', -117 2: 'Charge', -118 3: 'Form2' -119 }, axis=1) -120 -121 df_ref.sort_values(by='m/z', ascending=True,inplace=True) -122 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -123 print("Reference mass list loaded - " + str(len(df_ref)) + " calibration masses loaded.") -124 -125 return df_ref -126 -127 def gen_ref_mass_list_from_assigned(self, min_conf : float=0.7): -128 """ Generate reference mass list from assigned masses -129 -130 This function will generate a ref mass dataframe object from an assigned corems mass spec obj -131 using assigned masses above a certain minimum confidence threshold. -132 -133 This function needs to be retested and check it is covered in the unit tests. -134 -135 Parameters -136 ---------- -137 min_conf : float, optional -138 minimum confidence score. The default is 0.7. +115 else: +116 data = refmasslist +117 +118 df_ref = pd.read_csv(data, sep=delimiter, header=None, skiprows=1) +119 +120 df_ref = df_ref.rename( +121 {0: "Formula", 1: "m/z", 2: "Charge", 3: "Form2"}, axis=1 +122 ) +123 +124 df_ref.sort_values(by="m/z", ascending=True, inplace=True) +125 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: +126 print( +127 "Reference mass list loaded - " +128 + str(len(df_ref)) +129 + " calibration masses loaded." +130 ) +131 +132 return df_ref +133 +134 def gen_ref_mass_list_from_assigned(self, min_conf: float = 0.7): +135 """Generate reference mass list from assigned masses +136 +137 This function will generate a ref mass dataframe object from an assigned corems mass spec obj +138 using assigned masses above a certain minimum confidence threshold. 139 -140 Returns -141 ------- -142 df_ref : Pandas DataFrame -143 reference mass list - based on calculated masses. -144 -145 """ -146 #TODO this function needs to be retested and check it is covered in the unit tests -147 df = self.mass_spectrum.to_dataframe() -148 df = df[df['Confidence Score'] > min_conf] -149 df_ref = pd.DataFrame(columns=['m/z']) -150 df_ref['m/z'] = df['Calculated m/z'] -151 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -152 print("Reference mass list generated - " + str(len(df_ref)) + " calibration masses.") -153 return df_ref -154 -155 def find_calibration_points(self, df_ref, -156 calib_ppm_error_threshold : tuple[float, float]=(-1, 1), -157 calib_snr_threshold : float=5, -158 calibration_ref_match_method : str='legacy', -159 calibration_ref_match_tolerance : float=0.003, -160 calibration_ref_match_std_raw_error_limit: float=1.5): -161 """Function to find calibration points in the mass spectrum -162 -163 Based on the reference mass list. -164 -165 Parameters -166 ---------- -167 df_ref : Pandas DataFrame -168 reference mass list for recalibration. -169 calib_ppm_error_threshold : tuple of floats, optional -170 ppm error for finding calibration masses in the spectrum. The default is -1,1. -171 Note: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6. -172 Some software does this the other way around and value signs must be inverted for that to work. -173 calib_snr_threshold : float, optional -174 snr threshold for finding calibration masses in the spectrum. The default is 5. -175 -176 Returns -177 ------- -178 cal_peaks_mz : list of floats -179 masses of measured ions to use in calibration routine -180 cal_refs_mz : list of floats -181 reference mz values of found calibration points. -182 -183 """ -184 -185 # This approach is much more efficient and expedient than the original implementation. -186 peaks_mz = [] -187 for x in self.mass_spectrum.mspeaks: -188 if x.signal_to_noise > calib_snr_threshold: -189 if self.mzsegment: -190 if (min(self.mzsegment) <= x.mz_exp <= max(self.mzsegment)): -191 peaks_mz.append(x.mz_exp) -192 else: -193 peaks_mz.append(x.mz_exp) -194 peaks_mz = np.asarray(peaks_mz) -195 -196 if calibration_ref_match_method == 'legacy': -197 # This legacy approach iterates through each reference match and finds the entries within 1 mz and within the user defined PPM error threshold -198 # Then it removes ambiguities - which means the calibration threshold hasto be very tight. -199 cal_peaks_mz = [] -200 cal_refs_mz = [] -201 for mzref in df_ref['m/z']: -202 tmp_peaks_mz = peaks_mz[abs(peaks_mz-mzref)<1] -203 for mzmeas in tmp_peaks_mz: -204 delta_mass = ((mzmeas-mzref)/mzref)*1e6 -205 if delta_mass < max(calib_ppm_error_threshold): -206 if delta_mass > min(calib_ppm_error_threshold): -207 cal_peaks_mz.append(mzmeas) -208 cal_refs_mz.append(mzref) +140 This function needs to be retested and check it is covered in the unit tests. +141 +142 Parameters +143 ---------- +144 min_conf : float, optional +145 minimum confidence score. The default is 0.7. +146 +147 Returns +148 ------- +149 df_ref : Pandas DataFrame +150 reference mass list - based on calculated masses. +151 +152 """ +153 # TODO this function needs to be retested and check it is covered in the unit tests +154 df = self.mass_spectrum.to_dataframe() +155 df = df[df["Confidence Score"] > min_conf] +156 df_ref = pd.DataFrame(columns=["m/z"]) +157 df_ref["m/z"] = df["Calculated m/z"] +158 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: +159 print( +160 "Reference mass list generated - " +161 + str(len(df_ref)) +162 + " calibration masses." +163 ) +164 return df_ref +165 +166 def find_calibration_points( +167 self, +168 df_ref, +169 calib_ppm_error_threshold: tuple[float, float] = (-1, 1), +170 calib_snr_threshold: float = 5, +171 calibration_ref_match_method: str = "legacy", +172 calibration_ref_match_tolerance: float = 0.003, +173 calibration_ref_match_std_raw_error_limit: float = 1.5, +174 ): +175 """Function to find calibration points in the mass spectrum +176 +177 Based on the reference mass list. +178 +179 Parameters +180 ---------- +181 df_ref : Pandas DataFrame +182 reference mass list for recalibration. +183 calib_ppm_error_threshold : tuple of floats, optional +184 ppm error for finding calibration masses in the spectrum. The default is -1,1. +185 Note: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6. +186 Some software does this the other way around and value signs must be inverted for that to work. +187 calib_snr_threshold : float, optional +188 snr threshold for finding calibration masses in the spectrum. The default is 5. +189 +190 Returns +191 ------- +192 cal_peaks_mz : list of floats +193 masses of measured ions to use in calibration routine +194 cal_refs_mz : list of floats +195 reference mz values of found calibration points. +196 +197 """ +198 +199 # This approach is much more efficient and expedient than the original implementation. +200 peaks_mz = [] +201 for x in self.mass_spectrum.mspeaks: +202 if x.signal_to_noise > calib_snr_threshold: +203 if self.mzsegment: +204 if min(self.mzsegment) <= x.mz_exp <= max(self.mzsegment): +205 peaks_mz.append(x.mz_exp) +206 else: +207 peaks_mz.append(x.mz_exp) +208 peaks_mz = np.asarray(peaks_mz) 209 -210 # To remove entries with duplicated indices (reference masses matching multiple peaks) -211 tmpdf = pd.Series(index = cal_refs_mz,data = cal_peaks_mz,dtype=float) -212 tmpdf = tmpdf[~tmpdf.index.duplicated(keep=False)] -213 -214 cal_peaks_mz = list(tmpdf.values) -215 cal_refs_mz = list(tmpdf.index) -216 elif calibration_ref_match_method == 'merged': -217 warnings.warn('Using experimental new reference mass list merging') -218 # This is a new approach (August 2024) which uses Pandas 'merged_asof' to find the peaks closest in m/z between -219 # reference and measured masses. This is a quicker way to match, and seems to get more matches. -220 # It may not work as well when the data are far from correc initial mass -221 # e.g. if the correct peak is further from the reference than an incorrect peak. -222 meas_df = pd.DataFrame(columns=['meas_m/z'],data = peaks_mz) -223 tolerance = calibration_ref_match_tolerance -224 merged_df = pd.merge_asof(df_ref, meas_df, left_on='m/z', right_on = 'meas_m/z',tolerance=tolerance,direction='nearest') -225 merged_df.dropna(how='any',inplace=True) -226 merged_df['Error_ppm'] = ((merged_df['meas_m/z']-merged_df['m/z'])/merged_df['m/z'])*1e6 -227 median_raw_error = merged_df['Error_ppm'].median() -228 std_raw_error = merged_df['Error_ppm'].std() -229 if std_raw_error > calibration_ref_match_std_raw_error_limit: -230 std_raw_error = calibration_ref_match_std_raw_error_limit -231 self.mass_spectrum.calibration_raw_error_median = median_raw_error -232 self.mass_spectrum.calibration_raw_error_stdev = std_raw_error -233 merged_df= merged_df[(merged_df['Error_ppm']>(median_raw_error-1.5*std_raw_error))&(merged_df['Error_ppm']<(median_raw_error+1.5*std_raw_error))] -234 #merged_df= merged_df[(merged_df['Error_ppm']>min(calib_ppm_error_threshold))&(merged_df['Error_ppm']<max(calib_ppm_error_threshold))] -235 cal_peaks_mz = list(merged_df['meas_m/z']) -236 cal_refs_mz = list(merged_df['m/z']) -237 else: -238 raise ValueError(f'{calibration_ref_match_method} not allowed.') -239 -240 if False: -241 min_calib_ppm_error = calib_ppm_error_threshold[0] -242 max_calib_ppm_error = calib_ppm_error_threshold[1] -243 df_raw = self.mass_spectrum.to_dataframe() -244 -245 df_raw = df_raw[df_raw['S/N'] > calib_snr_threshold] -246 # optionally further subset that based on minimum S/N, RP, Peak Height -247 # to ensure only valid points are utilized -248 # in this example, only a S/N threshold is implemented. -249 imzmeas = [] -250 mzrefs = [] -251 -252 for mzref in df_ref['m/z']: -253 -254 # find all peaks within a defined ppm error threshold -255 tmpdf = df_raw[((df_raw['m/z']-mzref)/mzref)*1e6<max_calib_ppm_error] -256 # Error is relative to the theoretical, so the divisor should be divisor -257 -258 tmpdf = tmpdf[((tmpdf['m/z']-mzref)/mzref)*1e6>min_calib_ppm_error] -259 -260 # only use the calibration point if only one peak is within the thresholds -261 # This may require some optimization of the threshold tolerances -262 if len(tmpdf) == 1: -263 imzmeas.append(int(tmpdf.index.values)) -264 mzrefs.append(mzref) +210 if calibration_ref_match_method == "legacy": +211 # This legacy approach iterates through each reference match and finds the entries within 1 mz and within the user defined PPM error threshold +212 # Then it removes ambiguities - which means the calibration threshold hasto be very tight. +213 cal_peaks_mz = [] +214 cal_refs_mz = [] +215 for mzref in df_ref["m/z"]: +216 tmp_peaks_mz = peaks_mz[abs(peaks_mz - mzref) < 1] +217 for mzmeas in tmp_peaks_mz: +218 delta_mass = ((mzmeas - mzref) / mzref) * 1e6 +219 if delta_mass < max(calib_ppm_error_threshold): +220 if delta_mass > min(calib_ppm_error_threshold): +221 cal_peaks_mz.append(mzmeas) +222 cal_refs_mz.append(mzref) +223 +224 # To remove entries with duplicated indices (reference masses matching multiple peaks) +225 tmpdf = pd.Series(index=cal_refs_mz, data=cal_peaks_mz, dtype=float) +226 tmpdf = tmpdf[~tmpdf.index.duplicated(keep=False)] +227 +228 cal_peaks_mz = list(tmpdf.values) +229 cal_refs_mz = list(tmpdf.index) +230 elif calibration_ref_match_method == "merged": +231 warnings.warn("Using experimental new reference mass list merging") +232 # This is a new approach (August 2024) which uses Pandas 'merged_asof' to find the peaks closest in m/z between +233 # reference and measured masses. This is a quicker way to match, and seems to get more matches. +234 # It may not work as well when the data are far from correc initial mass +235 # e.g. if the correct peak is further from the reference than an incorrect peak. +236 meas_df = pd.DataFrame(columns=["meas_m/z"], data=peaks_mz) +237 tolerance = calibration_ref_match_tolerance +238 merged_df = pd.merge_asof( +239 df_ref, +240 meas_df, +241 left_on="m/z", +242 right_on="meas_m/z", +243 tolerance=tolerance, +244 direction="nearest", +245 ) +246 merged_df.dropna(how="any", inplace=True) +247 merged_df["Error_ppm"] = ( +248 (merged_df["meas_m/z"] - merged_df["m/z"]) / merged_df["m/z"] +249 ) * 1e6 +250 median_raw_error = merged_df["Error_ppm"].median() +251 std_raw_error = merged_df["Error_ppm"].std() +252 if std_raw_error > calibration_ref_match_std_raw_error_limit: +253 std_raw_error = calibration_ref_match_std_raw_error_limit +254 self.mass_spectrum.calibration_raw_error_median = median_raw_error +255 self.mass_spectrum.calibration_raw_error_stdev = std_raw_error +256 merged_df = merged_df[ +257 (merged_df["Error_ppm"] > (median_raw_error - 1.5 * std_raw_error)) +258 & (merged_df["Error_ppm"] < (median_raw_error + 1.5 * std_raw_error)) +259 ] +260 # merged_df= merged_df[(merged_df['Error_ppm']>min(calib_ppm_error_threshold))&(merged_df['Error_ppm']<max(calib_ppm_error_threshold))] +261 cal_peaks_mz = list(merged_df["meas_m/z"]) +262 cal_refs_mz = list(merged_df["m/z"]) +263 else: +264 raise ValueError(f"{calibration_ref_match_method} not allowed.") 265 -266 # it is crucial the mass lists are in same order -267 # corems likes to do masses from high to low. -268 cal_refs_mz.sort(reverse=False) -269 cal_peaks_mz.sort(reverse=False) -270 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -271 print(str(len(cal_peaks_mz)) + " calibration points matched within thresholds.") -272 return cal_peaks_mz, cal_refs_mz -273 -274 def robust_calib(self, param : list[float], -275 cal_peaks_mz : list[float], cal_refs_mz : list[float], -276 order : int=1): -277 """ Recalibration function -278 -279 Computes the rms of m/z errors to minimize when calibrating. -280 This is adapted from from spike. -281 -282 Parameters -283 ---------- -284 param : list of floats -285 generated by minimize function from scipy optimize. -286 cal_peaks_mz : list of floats -287 masses of measured peaks to use in mass calibration. -288 cal_peaks_mz : list of floats -289 reference mz values of found calibration points. -290 order : int, optional -291 order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1. -292 -293 Returns -294 ------- -295 rmserror : float -296 root mean square mass error for calibration points. -297 -298 """ -299 Aterm = param[0] -300 Bterm = param[1] -301 try: -302 Cterm = param[2] -303 except IndexError: -304 pass +266 if False: +267 min_calib_ppm_error = calib_ppm_error_threshold[0] +268 max_calib_ppm_error = calib_ppm_error_threshold[1] +269 df_raw = self.mass_spectrum.to_dataframe() +270 +271 df_raw = df_raw[df_raw["S/N"] > calib_snr_threshold] +272 # optionally further subset that based on minimum S/N, RP, Peak Height +273 # to ensure only valid points are utilized +274 # in this example, only a S/N threshold is implemented. +275 imzmeas = [] +276 mzrefs = [] +277 +278 for mzref in df_ref["m/z"]: +279 # find all peaks within a defined ppm error threshold +280 tmpdf = df_raw[ +281 ((df_raw["m/z"] - mzref) / mzref) * 1e6 < max_calib_ppm_error +282 ] +283 # Error is relative to the theoretical, so the divisor should be divisor +284 +285 tmpdf = tmpdf[ +286 ((tmpdf["m/z"] - mzref) / mzref) * 1e6 > min_calib_ppm_error +287 ] +288 +289 # only use the calibration point if only one peak is within the thresholds +290 # This may require some optimization of the threshold tolerances +291 if len(tmpdf) == 1: +292 imzmeas.append(int(tmpdf.index.values)) +293 mzrefs.append(mzref) +294 +295 # it is crucial the mass lists are in same order +296 # corems likes to do masses from high to low. +297 cal_refs_mz.sort(reverse=False) +298 cal_peaks_mz.sort(reverse=False) +299 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: +300 print( +301 str(len(cal_peaks_mz)) +302 + " calibration points matched within thresholds." +303 ) +304 return cal_peaks_mz, cal_refs_mz 305 -306 # get the mspeaks from the mass spectrum object which were calibration points -307 #mspeaks = [self.mass_spectrum.mspeaks[x] for x in imzmeas] -308 # get their calibrated mass values -309 #mspeakmzs = [x.mz_cal for x in mspeaks] -310 cal_peaks_mz = np.asarray(cal_peaks_mz) -311 -312 # linearz -313 if order == 1: -314 ref_recal_points = (Aterm * cal_peaks_mz) + Bterm -315 # quadratic -316 elif order == 2: -317 ref_recal_points = (Aterm * (cal_peaks_mz)) + \ -318 (Bterm * np.power((cal_peaks_mz), 2) + Cterm) -319 -320 # sort both the calibration points (measured, recalibrated) -321 ref_recal_points.sort() -322 # and sort the calibration points (theoretical, predefined) -323 cal_refs_mz.sort() -324 -325 # calculate the ppm error for each calibration point -326 error = ((ref_recal_points - cal_refs_mz) / cal_refs_mz) * 1e6 -327 # calculate the root mean square error - this is our target to minimize -328 rmserror = np.sqrt(np.mean(error**2)) -329 return rmserror -330 -331 def recalibrate_mass_spectrum(self, cal_peaks_mz : list[float], cal_refs_mz : list[float], -332 order : int=1, diagnostic : bool=False): +306 def robust_calib( +307 self, +308 param: list[float], +309 cal_peaks_mz: list[float], +310 cal_refs_mz: list[float], +311 order: int = 1, +312 ): +313 """Recalibration function +314 +315 Computes the rms of m/z errors to minimize when calibrating. +316 This is adapted from from spike. +317 +318 Parameters +319 ---------- +320 param : list of floats +321 generated by minimize function from scipy optimize. +322 cal_peaks_mz : list of floats +323 masses of measured peaks to use in mass calibration. +324 cal_peaks_mz : list of floats +325 reference mz values of found calibration points. +326 order : int, optional +327 order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1. +328 +329 Returns +330 ------- +331 rmserror : float +332 root mean square mass error for calibration points. 333 -334 """ Main recalibration function which uses a robust linear regression -335 -336 This function performs the recalibration of the mass spectrum object. -337 It iteratively applies -338 -339 Parameters -340 ---------- -341 cal_peaks_mz : list of float -342 masses of measured peaks to use in mass calibration. -343 cal_refs_mz : list of float -344 reference mz values of found calibration points. -345 order : int, optional -346 order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1. +334 """ +335 Aterm = param[0] +336 Bterm = param[1] +337 try: +338 Cterm = param[2] +339 except IndexError: +340 pass +341 +342 # get the mspeaks from the mass spectrum object which were calibration points +343 # mspeaks = [self.mass_spectrum.mspeaks[x] for x in imzmeas] +344 # get their calibrated mass values +345 # mspeakmzs = [x.mz_cal for x in mspeaks] +346 cal_peaks_mz = np.asarray(cal_peaks_mz) 347 -348 Returns -349 ------- -350 mass_spectrum : CoreMS mass spectrum object -351 Calibrated mass spectrum object -352 -353 -354 Notes -355 ----- -356 This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method. -357 -358 References -359 ---------- -360 1. Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A. -361 SPIKE a Processing Software dedicated to Fourier Spectroscopies -362 https://arxiv.org/abs/1608.06777 (2016) -363 2. SPIKE - https://github.com/spike-project/spike -364 -365 """ -366 # initialise parameters for recalibration -367 # these are the 'Aterm, Bterm, Cterm' -368 # as spectra are already freq->mz calibrated, these terms are very small -369 # may be beneficial to formally separate them from the freq->mz terms -370 if order == 1: -371 Po = [1, 0] -372 elif order == 2: -373 Po = [1, 0, 0] -374 -375 if len(cal_peaks_mz) >= 2: -376 if self.mzsegment: #If only part of the spectrum is to be recalibrated -377 mz_exp_peaks = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum]) -378 # Split the array into two parts - one to recailbrate, one to keep unchanged. -379 mz_exp_peaks_tocal = mz_exp_peaks[(mz_exp_peaks>=min(self.mzsegment)) & (mz_exp_peaks<=max(self.mzsegment))] -380 mz_exp_peaks_unchanged = mz_exp_peaks[~(mz_exp_peaks>=min(self.mzsegment)) | ~(mz_exp_peaks<=max(self.mzsegment))] -381 # TODO: - segmented calibration needs a way to better track the calibration args/values... -382 if not self.mass_spectrum.is_centroid: -383 mz_exp_profile = np.array(self.mass_spectrum.mz_exp_profile) -384 # Split the array into two parts - one to recailbrate, one to keep unchanged. -385 mz_exp_profile_tocal = mz_exp_profile[(mz_exp_profile>=min(self.mzsegment)) & (mz_exp_profile<=max(self.mzsegment))] -386 mz_exp_profile_unchanged = mz_exp_profile[~(mz_exp_profile>=min(self.mzsegment)) | ~(mz_exp_profile<=max(self.mzsegment))] -387 else: #if just recalibrating the whole spectrum -388 mz_exp_peaks_tocal = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum]) -389 if not self.mass_spectrum.is_centroid: -390 mz_exp_profile_tocal = np.array(self.mass_spectrum.mz_exp_profile) -391 -392 -393 minimize_method = self.mass_spectrum.settings.calib_minimize_method -394 res = minimize(self.robust_calib, Po, args=(cal_peaks_mz, cal_refs_mz, order), method=minimize_method) -395 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -396 print("minimize function completed with RMS error of: {:0.3f} ppm".format(res['fun'])) -397 print("minimize function performed {:1d} fn evals and {:1d} iterations".format(res['nfev'], res['nit'])) -398 Pn = res.x -399 -400 #mz_exp_ms = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum]) -401 -402 if order == 1: -403 mz_domain = (Pn[0] * mz_exp_peaks_tocal) + Pn[1] -404 if not self.mass_spectrum.is_centroid: -405 mz_profile_calc = (Pn[0] * mz_exp_profile_tocal) + Pn[1] -406 -407 elif order == 2: -408 mz_domain = (Pn[0] * (mz_exp_peaks_tocal)) + \ -409 (Pn[1] * np.power((mz_exp_peaks_tocal), 2) + Pn[2]) -410 -411 if not self.mass_spectrum.is_centroid: -412 mz_profile_calc = (Pn[0] * (mz_exp_profile_tocal)) + \ -413 (Pn[1] * np.power((mz_exp_profile_tocal), 2) + Pn[2]) -414 -415 if self.mzsegment: -416 # Recombine the mass domains -417 mz_domain = np.concatenate([mz_domain,mz_exp_peaks_unchanged]) -418 mz_domain.sort() -419 if not self.mass_spectrum.is_centroid: -420 mz_profile_calc = np.concatenate([mz_profile_calc,mz_exp_profile_unchanged]) -421 mz_profile_calc.sort() -422 # Sort them -423 if mz_exp_peaks[0] > mz_exp_peaks[1]: #If originally descending mass order -424 mz_domain = mz_domain[::-1] -425 if not self.mass_spectrum.is_centroid: -426 mz_profile_calc = mz_profile_calc[::-1] -427 -428 self.mass_spectrum.mz_cal = mz_domain -429 if not self.mass_spectrum.is_centroid: -430 self.mass_spectrum.mz_cal_profile = mz_profile_calc -431 -432 self.mass_spectrum.calibration_order = order -433 self.mass_spectrum.calibration_RMS = float(res['fun']) -434 self.mass_spectrum.calibration_points = int(len(cal_refs_mz)) -435 self.mass_spectrum.calibration_ref_mzs = cal_refs_mz -436 self.mass_spectrum.calibration_meas_mzs = cal_peaks_mz -437 -438 self.mass_spectrum.calibration_segment = self.mzsegment -439 -440 if diagnostic: -441 return self.mass_spectrum,res -442 return self.mass_spectrum -443 else: -444 warnings.warn("Too few calibration points - aborting.") -445 return self.mass_spectrum -446 -447 def run(self): -448 """ Run the calibration routine -449 -450 This function runs the calibration routine. -451 -452 """ -453 calib_ppm_error_threshold = self.mass_spectrum.settings.calib_sn_threshold -454 max_calib_ppm_error = self.mass_spectrum.settings.max_calib_ppm_error -455 min_calib_ppm_error = self.mass_spectrum.settings.min_calib_ppm_error -456 calib_pol_order = self.mass_spectrum.settings.calib_pol_order -457 calibration_ref_match_method = self.mass_spectrum.settings.calibration_ref_match_method -458 calibration_ref_match_tolerance = self.mass_spectrum.settings.calibration_ref_match_tolerance -459 calibration_ref_match_std_raw_error_limit = self.mass_spectrum.settings.calibration_ref_match_std_raw_error_limit -460 -461 # load reference mass list -462 df_ref = self.load_ref_mass_list() -463 -464 # find calibration points -465 cal_peaks_mz, cal_refs_mz = self.find_calibration_points(df_ref, -466 calib_ppm_error_threshold=(min_calib_ppm_error, -467 max_calib_ppm_error), -468 calib_snr_threshold=calib_ppm_error_threshold, -469 calibration_ref_match_method = calibration_ref_match_method, -470 calibration_ref_match_tolerance = calibration_ref_match_tolerance, -471 calibration_ref_match_std_raw_error_limit = calibration_ref_match_std_raw_error_limit) -472 if len(cal_peaks_mz)==2: -473 self.mass_spectrum.settings.calib_pol_order = 1 -474 calib_pol_order = 1 -475 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -476 print('Only 2 calibration points found, forcing a linear recalibration') -477 elif len(cal_peaks_mz)<2: -478 warnings.warn('Too few calibration points found, function will fail') -479 self.recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=calib_pol_order) +348 # linearz +349 if order == 1: +350 ref_recal_points = (Aterm * cal_peaks_mz) + Bterm +351 # quadratic +352 elif order == 2: +353 ref_recal_points = (Aterm * (cal_peaks_mz)) + ( +354 Bterm * np.power((cal_peaks_mz), 2) + Cterm +355 ) +356 +357 # sort both the calibration points (measured, recalibrated) +358 ref_recal_points.sort() +359 # and sort the calibration points (theoretical, predefined) +360 cal_refs_mz.sort() +361 +362 # calculate the ppm error for each calibration point +363 error = ((ref_recal_points - cal_refs_mz) / cal_refs_mz) * 1e6 +364 # calculate the root mean square error - this is our target to minimize +365 rmserror = np.sqrt(np.mean(error**2)) +366 return rmserror +367 +368 def recalibrate_mass_spectrum( +369 self, +370 cal_peaks_mz: list[float], +371 cal_refs_mz: list[float], +372 order: int = 1, +373 diagnostic: bool = False, +374 ): +375 """Main recalibration function which uses a robust linear regression +376 +377 This function performs the recalibration of the mass spectrum object. +378 It iteratively applies +379 +380 Parameters +381 ---------- +382 cal_peaks_mz : list of float +383 masses of measured peaks to use in mass calibration. +384 cal_refs_mz : list of float +385 reference mz values of found calibration points. +386 order : int, optional +387 order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1. +388 +389 Returns +390 ------- +391 mass_spectrum : CoreMS mass spectrum object +392 Calibrated mass spectrum object +393 +394 +395 Notes +396 ----- +397 This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method. +398 +399 References +400 ---------- +401 1. Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A. +402 SPIKE a Processing Software dedicated to Fourier Spectroscopies +403 https://arxiv.org/abs/1608.06777 (2016) +404 2. SPIKE - https://github.com/spike-project/spike +405 +406 """ +407 # initialise parameters for recalibration +408 # these are the 'Aterm, Bterm, Cterm' +409 # as spectra are already freq->mz calibrated, these terms are very small +410 # may be beneficial to formally separate them from the freq->mz terms +411 if order == 1: +412 Po = [1, 0] +413 elif order == 2: +414 Po = [1, 0, 0] +415 +416 if len(cal_peaks_mz) >= 2: +417 if self.mzsegment: # If only part of the spectrum is to be recalibrated +418 mz_exp_peaks = np.array( +419 [mspeak.mz_exp for mspeak in self.mass_spectrum] +420 ) +421 # Split the array into two parts - one to recailbrate, one to keep unchanged. +422 mz_exp_peaks_tocal = mz_exp_peaks[ +423 (mz_exp_peaks >= min(self.mzsegment)) +424 & (mz_exp_peaks <= max(self.mzsegment)) +425 ] +426 mz_exp_peaks_unchanged = mz_exp_peaks[ +427 ~(mz_exp_peaks >= min(self.mzsegment)) +428 | ~(mz_exp_peaks <= max(self.mzsegment)) +429 ] +430 # TODO: - segmented calibration needs a way to better track the calibration args/values... +431 if not self.mass_spectrum.is_centroid: +432 mz_exp_profile = np.array(self.mass_spectrum.mz_exp_profile) +433 # Split the array into two parts - one to recailbrate, one to keep unchanged. +434 mz_exp_profile_tocal = mz_exp_profile[ +435 (mz_exp_profile >= min(self.mzsegment)) +436 & (mz_exp_profile <= max(self.mzsegment)) +437 ] +438 mz_exp_profile_unchanged = mz_exp_profile[ +439 ~(mz_exp_profile >= min(self.mzsegment)) +440 | ~(mz_exp_profile <= max(self.mzsegment)) +441 ] +442 else: # if just recalibrating the whole spectrum +443 mz_exp_peaks_tocal = np.array( +444 [mspeak.mz_exp for mspeak in self.mass_spectrum] +445 ) +446 if not self.mass_spectrum.is_centroid: +447 mz_exp_profile_tocal = np.array(self.mass_spectrum.mz_exp_profile) +448 +449 minimize_method = self.mass_spectrum.settings.calib_minimize_method +450 res = minimize( +451 self.robust_calib, +452 Po, +453 args=(cal_peaks_mz, cal_refs_mz, order), +454 method=minimize_method, +455 ) +456 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: +457 print( +458 "minimize function completed with RMS error of: {:0.3f} ppm".format( +459 res["fun"] +460 ) +461 ) +462 print( +463 "minimize function performed {:1d} fn evals and {:1d} iterations".format( +464 res["nfev"], res["nit"] +465 ) +466 ) +467 Pn = res.x +468 +469 # mz_exp_ms = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum]) +470 +471 if order == 1: +472 mz_domain = (Pn[0] * mz_exp_peaks_tocal) + Pn[1] +473 if not self.mass_spectrum.is_centroid: +474 mz_profile_calc = (Pn[0] * mz_exp_profile_tocal) + Pn[1] +475 +476 elif order == 2: +477 mz_domain = (Pn[0] * (mz_exp_peaks_tocal)) + ( +478 Pn[1] * np.power((mz_exp_peaks_tocal), 2) + Pn[2] +479 ) +480 +481 if not self.mass_spectrum.is_centroid: +482 mz_profile_calc = (Pn[0] * (mz_exp_profile_tocal)) + ( +483 Pn[1] * np.power((mz_exp_profile_tocal), 2) + Pn[2] +484 ) +485 +486 if self.mzsegment: +487 # Recombine the mass domains +488 mz_domain = np.concatenate([mz_domain, mz_exp_peaks_unchanged]) +489 mz_domain.sort() +490 if not self.mass_spectrum.is_centroid: +491 mz_profile_calc = np.concatenate( +492 [mz_profile_calc, mz_exp_profile_unchanged] +493 ) +494 mz_profile_calc.sort() +495 # Sort them +496 if ( +497 mz_exp_peaks[0] > mz_exp_peaks[1] +498 ): # If originally descending mass order +499 mz_domain = mz_domain[::-1] +500 if not self.mass_spectrum.is_centroid: +501 mz_profile_calc = mz_profile_calc[::-1] +502 +503 self.mass_spectrum.mz_cal = mz_domain +504 if not self.mass_spectrum.is_centroid: +505 self.mass_spectrum.mz_cal_profile = mz_profile_calc +506 +507 self.mass_spectrum.calibration_order = order +508 self.mass_spectrum.calibration_RMS = float(res["fun"]) +509 self.mass_spectrum.calibration_points = int(len(cal_refs_mz)) +510 self.mass_spectrum.calibration_ref_mzs = cal_refs_mz +511 self.mass_spectrum.calibration_meas_mzs = cal_peaks_mz +512 +513 self.mass_spectrum.calibration_segment = self.mzsegment +514 +515 if diagnostic: +516 return self.mass_spectrum, res +517 return self.mass_spectrum +518 else: +519 warnings.warn("Too few calibration points - aborting.") +520 return self.mass_spectrum +521 +522 def run(self): +523 """Run the calibration routine +524 +525 This function runs the calibration routine. +526 +527 """ +528 calib_ppm_error_threshold = self.mass_spectrum.settings.calib_sn_threshold +529 max_calib_ppm_error = self.mass_spectrum.settings.max_calib_ppm_error +530 min_calib_ppm_error = self.mass_spectrum.settings.min_calib_ppm_error +531 calib_pol_order = self.mass_spectrum.settings.calib_pol_order +532 calibration_ref_match_method = ( +533 self.mass_spectrum.settings.calibration_ref_match_method +534 ) +535 calibration_ref_match_tolerance = ( +536 self.mass_spectrum.settings.calibration_ref_match_tolerance +537 ) +538 calibration_ref_match_std_raw_error_limit = ( +539 self.mass_spectrum.settings.calibration_ref_match_std_raw_error_limit +540 ) +541 +542 # load reference mass list +543 df_ref = self.load_ref_mass_list() +544 +545 # find calibration points +546 cal_peaks_mz, cal_refs_mz = self.find_calibration_points( +547 df_ref, +548 calib_ppm_error_threshold=(min_calib_ppm_error, max_calib_ppm_error), +549 calib_snr_threshold=calib_ppm_error_threshold, +550 calibration_ref_match_method=calibration_ref_match_method, +551 calibration_ref_match_tolerance=calibration_ref_match_tolerance, +552 calibration_ref_match_std_raw_error_limit=calibration_ref_match_std_raw_error_limit, +553 ) +554 if len(cal_peaks_mz) == 2: +555 self.mass_spectrum.settings.calib_pol_order = 1 +556 calib_pol_order = 1 +557 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: +558 print("Only 2 calibration points found, forcing a linear recalibration") +559 elif len(cal_peaks_mz) < 2: +560 warnings.warn("Too few calibration points found, function will fail") +561 self.recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=calib_pol_order)

    @@ -585,460 +667,546 @@

    -
     27class MzDomainCalibration:
    - 28    """ MzDomainCalibration class for recalibrating mass spectra
    - 29    
    - 30    Parameters
    - 31    ----------
    - 32    mass_spectrum : CoreMS MassSpectrum Object
    - 33        The mass spectrum to be calibrated.
    - 34    ref_masslist : str
    - 35        The path to a reference mass list.
    - 36    mzsegment : tuple of floats, optional
    - 37        The mz range to recalibrate, or None. Used for calibration of specific parts of the mz domain at a time.
    - 38        Future work - allow multiple mzsegments to be passed.
    - 39    
    - 40    Attributes
    - 41    ----------
    - 42    mass_spectrum : CoreMS MassSpectrum Object
    - 43        The mass spectrum to be calibrated.
    - 44    mzsegment : tuple of floats or None
    - 45        The mz range to recalibrate, or None.
    - 46    ref_mass_list_path : str or Path
    - 47        The path to the reference mass list.
    - 48    
    - 49    Methods
    - 50    -------
    - 51    * run(). 
    - 52        Main function to run this class.  
    - 53    * load_ref_mass_list(). 
    - 54        Load reference mass list (Bruker format).  
    - 55    * gen_ref_mass_list_from_assigned(min_conf=0.7). 
    - 56        Generate reference mass list from assigned masses.  
    - 57    * find_calibration_points(df_ref, calib_ppm_error_threshold=(-1, 1), calib_snr_threshold=5). 
    - 58        Find calibration points in the mass spectrum based on the reference mass list.  
    - 59    * robust_calib(param, cal_peaks_mz, cal_refs_mz, order=1). 
    - 60        Recalibration function.  
    - 61    * recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=1, diagnostic=False). 
    - 62        Main recalibration function which uses a robust linear regression.
    - 63    
    - 64
    - 65    """
    +            
     23class MzDomainCalibration:
    + 24    """MzDomainCalibration class for recalibrating mass spectra
    + 25
    + 26    Parameters
    + 27    ----------
    + 28    mass_spectrum : CoreMS MassSpectrum Object
    + 29        The mass spectrum to be calibrated.
    + 30    ref_masslist : str
    + 31        The path to a reference mass list.
    + 32    mzsegment : tuple of floats, optional
    + 33        The mz range to recalibrate, or None. Used for calibration of specific parts of the mz domain at a time.
    + 34        Future work - allow multiple mzsegments to be passed.
    + 35
    + 36    Attributes
    + 37    ----------
    + 38    mass_spectrum : CoreMS MassSpectrum Object
    + 39        The mass spectrum to be calibrated.
    + 40    mzsegment : tuple of floats or None
    + 41        The mz range to recalibrate, or None.
    + 42    ref_mass_list_path : str or Path
    + 43        The path to the reference mass list.
    + 44
    + 45    Methods
    + 46    -------
    + 47    * run().
    + 48        Main function to run this class.
    + 49    * load_ref_mass_list().
    + 50        Load reference mass list (Bruker format).
    + 51    * gen_ref_mass_list_from_assigned(min_conf=0.7).
    + 52        Generate reference mass list from assigned masses.
    + 53    * find_calibration_points(df_ref, calib_ppm_error_threshold=(-1, 1), calib_snr_threshold=5).
    + 54        Find calibration points in the mass spectrum based on the reference mass list.
    + 55    * robust_calib(param, cal_peaks_mz, cal_refs_mz, order=1).
    + 56        Recalibration function.
    + 57    * recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=1, diagnostic=False).
    + 58        Main recalibration function which uses a robust linear regression.
    + 59
    + 60
    + 61    """
    + 62
    + 63    def __init__(self, mass_spectrum, ref_masslist, mzsegment=None):
    + 64        self.mass_spectrum = mass_spectrum
    + 65        self.mzsegment = mzsegment
      66
    - 67    def __init__(self, mass_spectrum, ref_masslist,mzsegment=None):
    - 68        
    - 69        self.mass_spectrum = mass_spectrum
    - 70        self.mzsegment = mzsegment
    - 71
    - 72        # define reference mass list - bruker .ref format
    - 73        self.ref_mass_list_path = ref_masslist
    - 74        if self.mass_spectrum.percentile_assigned()[0]!=0:
    - 75            warnings.warn('Warning: calibrating spectra which have already been assigned may yield erroneous results')
    - 76        self.mass_spectrum.mz_cal = self.mass_spectrum.mz_exp    
    - 77        self.mass_spectrum.mz_cal_profile = self.mass_spectrum._mz_exp  
    - 78        
    - 79        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    - 80            print("MS Obj loaded - "+str(len(mass_spectrum.mspeaks))+" peaks found.")
    - 81
    - 82            print("MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found.")
    - 83
    - 84    def load_ref_mass_list(self):
    - 85        """ Load reference mass list (Bruker format)
    - 86
    - 87        Loads in a reference mass list from a .ref file
    - 88        Note that some versions of Bruker's software produce .ref files with a different format. 
    - 89        As such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format. 
    - 90        CoreMS includes an example .ref file with the correct format for reference. 
    - 91
    - 92        Returns
    - 93        -------
    - 94        df_ref : Pandas DataFrame
    - 95            reference mass list object.
    - 96
    - 97        """
    - 98        refmasslist = Path(self.ref_mass_list_path) if isinstance(self.ref_mass_list_path, str) else self.ref_mass_list_path
    - 99
    -100        if not refmasslist.exists():
    -101            raise FileExistsError("File does not exist: %s" % refmasslist)
    -102
    -103        with refmasslist.open('r') as csvfile:
    -104            dialect = csv.Sniffer().sniff(csvfile.read(1024))
    -105            delimiter = dialect.delimiter
    -106
    -107        if isinstance(refmasslist, S3Path):
    -108            # data = self.file_location.open('rb').read()
    -109            data = BytesIO(refmasslist.open('rb').read())
    -110
    -111        else:
    -112            data = refmasslist
    -113
    -114        df_ref = pd.read_csv(data, sep=delimiter, header=None, skiprows=1)
    + 67        # define reference mass list - bruker .ref format
    + 68        self.ref_mass_list_path = ref_masslist
    + 69        if self.mass_spectrum.percentile_assigned()[0] != 0:
    + 70            warnings.warn(
    + 71                "Warning: calibrating spectra which have already been assigned may yield erroneous results"
    + 72            )
    + 73        self.mass_spectrum.mz_cal = self.mass_spectrum.mz_exp
    + 74        self.mass_spectrum.mz_cal_profile = self.mass_spectrum._mz_exp
    + 75
    + 76        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    + 77            print(
    + 78                "MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found."
    + 79            )
    + 80
    + 81            print(
    + 82                "MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found."
    + 83            )
    + 84
    + 85    def load_ref_mass_list(self):
    + 86        """Load reference mass list (Bruker format)
    + 87
    + 88        Loads in a reference mass list from a .ref file
    + 89        Note that some versions of Bruker's software produce .ref files with a different format.
    + 90        As such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format.
    + 91        CoreMS includes an example .ref file with the correct format for reference.
    + 92
    + 93        Returns
    + 94        -------
    + 95        df_ref : Pandas DataFrame
    + 96            reference mass list object.
    + 97
    + 98        """
    + 99        refmasslist = (
    +100            Path(self.ref_mass_list_path)
    +101            if isinstance(self.ref_mass_list_path, str)
    +102            else self.ref_mass_list_path
    +103        )
    +104
    +105        if not refmasslist.exists():
    +106            raise FileExistsError("File does not exist: %s" % refmasslist)
    +107
    +108        with refmasslist.open("r") as csvfile:
    +109            dialect = csv.Sniffer().sniff(csvfile.read(1024))
    +110            delimiter = dialect.delimiter
    +111
    +112        if isinstance(refmasslist, S3Path):
    +113            # data = self.file_location.open('rb').read()
    +114            data = BytesIO(refmasslist.open("rb").read())
     115
    -116        df_ref = df_ref.rename({0: 'Formula',
    -117                                1: 'm/z',
    -118                                2: 'Charge',
    -119                                3: 'Form2'
    -120                                }, axis=1)
    -121
    -122        df_ref.sort_values(by='m/z', ascending=True,inplace=True)
    -123        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -124            print("Reference mass list loaded - " + str(len(df_ref)) + " calibration masses loaded.")
    -125
    -126        return df_ref
    -127
    -128    def gen_ref_mass_list_from_assigned(self, min_conf : float=0.7):
    -129        """ Generate reference mass list from assigned masses
    -130
    -131        This function will generate a ref mass dataframe object from an assigned corems mass spec obj
    -132        using assigned masses above a certain minimum confidence threshold.
    -133
    -134        This function needs to be retested and check it is covered in the unit tests.
    -135
    -136        Parameters
    -137        ----------
    -138        min_conf : float, optional
    -139            minimum confidence score. The default is 0.7.
    +116        else:
    +117            data = refmasslist
    +118
    +119        df_ref = pd.read_csv(data, sep=delimiter, header=None, skiprows=1)
    +120
    +121        df_ref = df_ref.rename(
    +122            {0: "Formula", 1: "m/z", 2: "Charge", 3: "Form2"}, axis=1
    +123        )
    +124
    +125        df_ref.sort_values(by="m/z", ascending=True, inplace=True)
    +126        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +127            print(
    +128                "Reference mass list loaded - "
    +129                + str(len(df_ref))
    +130                + " calibration masses loaded."
    +131            )
    +132
    +133        return df_ref
    +134
    +135    def gen_ref_mass_list_from_assigned(self, min_conf: float = 0.7):
    +136        """Generate reference mass list from assigned masses
    +137
    +138        This function will generate a ref mass dataframe object from an assigned corems mass spec obj
    +139        using assigned masses above a certain minimum confidence threshold.
     140
    -141        Returns
    -142        -------
    -143        df_ref : Pandas DataFrame
    -144            reference mass list - based on calculated masses.
    -145
    -146        """
    -147        #TODO this function needs to be retested and check it is covered in the unit tests
    -148        df = self.mass_spectrum.to_dataframe()
    -149        df = df[df['Confidence Score'] > min_conf]
    -150        df_ref = pd.DataFrame(columns=['m/z'])
    -151        df_ref['m/z'] = df['Calculated m/z']
    -152        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -153            print("Reference mass list generated - " + str(len(df_ref)) + " calibration masses.")
    -154        return df_ref
    -155
    -156    def find_calibration_points(self, df_ref,
    -157                                calib_ppm_error_threshold : tuple[float, float]=(-1, 1),
    -158                                calib_snr_threshold : float=5,
    -159                                calibration_ref_match_method : str='legacy',
    -160                                calibration_ref_match_tolerance : float=0.003,
    -161                                calibration_ref_match_std_raw_error_limit: float=1.5):
    -162        """Function to find calibration points in the mass spectrum 
    -163        
    -164        Based on the reference mass list.
    -165
    -166        Parameters
    -167        ----------
    -168        df_ref : Pandas DataFrame
    -169            reference mass list for recalibration.
    -170        calib_ppm_error_threshold : tuple of floats, optional
    -171            ppm error for finding calibration masses in the spectrum. The default is -1,1.
    -172            Note: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6. 
    -173                Some software does this the other way around and value signs must be inverted for that to work. 
    -174        calib_snr_threshold : float, optional
    -175            snr threshold for finding calibration masses in the spectrum. The default is 5.
    -176
    -177        Returns
    -178        -------
    -179        cal_peaks_mz : list of floats
    -180            masses of measured ions to use in calibration routine
    -181        cal_refs_mz : list of floats
    -182            reference mz values of found calibration points.
    -183
    -184        """
    -185
    -186        # This approach is much more efficient and expedient than the original implementation.
    -187        peaks_mz = []
    -188        for x in self.mass_spectrum.mspeaks:
    -189            if x.signal_to_noise > calib_snr_threshold:
    -190                if self.mzsegment:
    -191                    if (min(self.mzsegment) <= x.mz_exp <= max(self.mzsegment)):
    -192                        peaks_mz.append(x.mz_exp)
    -193                else:
    -194                    peaks_mz.append(x.mz_exp)
    -195        peaks_mz = np.asarray(peaks_mz)
    -196        
    -197        if calibration_ref_match_method == 'legacy':
    -198            # This legacy approach iterates through each reference match and finds the entries within 1 mz and within the user defined PPM error threshold
    -199            # Then it removes ambiguities - which means the calibration threshold hasto be very tight.
    -200            cal_peaks_mz = []
    -201            cal_refs_mz = []
    -202            for mzref in df_ref['m/z']:
    -203                tmp_peaks_mz = peaks_mz[abs(peaks_mz-mzref)<1]
    -204                for mzmeas in tmp_peaks_mz:
    -205                    delta_mass = ((mzmeas-mzref)/mzref)*1e6
    -206                    if delta_mass < max(calib_ppm_error_threshold):
    -207                        if delta_mass > min(calib_ppm_error_threshold):
    -208                            cal_peaks_mz.append(mzmeas)
    -209                            cal_refs_mz.append(mzref)
    +141        This function needs to be retested and check it is covered in the unit tests.
    +142
    +143        Parameters
    +144        ----------
    +145        min_conf : float, optional
    +146            minimum confidence score. The default is 0.7.
    +147
    +148        Returns
    +149        -------
    +150        df_ref : Pandas DataFrame
    +151            reference mass list - based on calculated masses.
    +152
    +153        """
    +154        # TODO this function needs to be retested and check it is covered in the unit tests
    +155        df = self.mass_spectrum.to_dataframe()
    +156        df = df[df["Confidence Score"] > min_conf]
    +157        df_ref = pd.DataFrame(columns=["m/z"])
    +158        df_ref["m/z"] = df["Calculated m/z"]
    +159        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +160            print(
    +161                "Reference mass list generated - "
    +162                + str(len(df_ref))
    +163                + " calibration masses."
    +164            )
    +165        return df_ref
    +166
    +167    def find_calibration_points(
    +168        self,
    +169        df_ref,
    +170        calib_ppm_error_threshold: tuple[float, float] = (-1, 1),
    +171        calib_snr_threshold: float = 5,
    +172        calibration_ref_match_method: str = "legacy",
    +173        calibration_ref_match_tolerance: float = 0.003,
    +174        calibration_ref_match_std_raw_error_limit: float = 1.5,
    +175    ):
    +176        """Function to find calibration points in the mass spectrum
    +177
    +178        Based on the reference mass list.
    +179
    +180        Parameters
    +181        ----------
    +182        df_ref : Pandas DataFrame
    +183            reference mass list for recalibration.
    +184        calib_ppm_error_threshold : tuple of floats, optional
    +185            ppm error for finding calibration masses in the spectrum. The default is -1,1.
    +186            Note: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6.
    +187                Some software does this the other way around and value signs must be inverted for that to work.
    +188        calib_snr_threshold : float, optional
    +189            snr threshold for finding calibration masses in the spectrum. The default is 5.
    +190
    +191        Returns
    +192        -------
    +193        cal_peaks_mz : list of floats
    +194            masses of measured ions to use in calibration routine
    +195        cal_refs_mz : list of floats
    +196            reference mz values of found calibration points.
    +197
    +198        """
    +199
    +200        # This approach is much more efficient and expedient than the original implementation.
    +201        peaks_mz = []
    +202        for x in self.mass_spectrum.mspeaks:
    +203            if x.signal_to_noise > calib_snr_threshold:
    +204                if self.mzsegment:
    +205                    if min(self.mzsegment) <= x.mz_exp <= max(self.mzsegment):
    +206                        peaks_mz.append(x.mz_exp)
    +207                else:
    +208                    peaks_mz.append(x.mz_exp)
    +209        peaks_mz = np.asarray(peaks_mz)
     210
    -211            # To remove entries with duplicated indices (reference masses matching multiple peaks)
    -212            tmpdf = pd.Series(index = cal_refs_mz,data = cal_peaks_mz,dtype=float)
    -213            tmpdf = tmpdf[~tmpdf.index.duplicated(keep=False)]
    -214
    -215            cal_peaks_mz = list(tmpdf.values)
    -216            cal_refs_mz = list(tmpdf.index)
    -217        elif calibration_ref_match_method == 'merged':
    -218            warnings.warn('Using experimental new reference mass list merging')
    -219            # This is a new approach (August 2024) which uses Pandas 'merged_asof' to find the peaks closest in m/z between 
    -220            # reference and measured masses. This is a quicker way to match, and seems to get more matches.
    -221            # It may not work as well when the data are far from correc initial mass
    -222            # e.g. if the correct peak is further from the reference than an incorrect peak.
    -223            meas_df = pd.DataFrame(columns=['meas_m/z'],data = peaks_mz)
    -224            tolerance = calibration_ref_match_tolerance
    -225            merged_df = pd.merge_asof(df_ref, meas_df, left_on='m/z', right_on = 'meas_m/z',tolerance=tolerance,direction='nearest')
    -226            merged_df.dropna(how='any',inplace=True)
    -227            merged_df['Error_ppm'] = ((merged_df['meas_m/z']-merged_df['m/z'])/merged_df['m/z'])*1e6
    -228            median_raw_error = merged_df['Error_ppm'].median()
    -229            std_raw_error = merged_df['Error_ppm'].std()
    -230            if std_raw_error > calibration_ref_match_std_raw_error_limit:
    -231                std_raw_error = calibration_ref_match_std_raw_error_limit
    -232            self.mass_spectrum.calibration_raw_error_median = median_raw_error
    -233            self.mass_spectrum.calibration_raw_error_stdev = std_raw_error
    -234            merged_df= merged_df[(merged_df['Error_ppm']>(median_raw_error-1.5*std_raw_error))&(merged_df['Error_ppm']<(median_raw_error+1.5*std_raw_error))]
    -235            #merged_df= merged_df[(merged_df['Error_ppm']>min(calib_ppm_error_threshold))&(merged_df['Error_ppm']<max(calib_ppm_error_threshold))]
    -236            cal_peaks_mz = list(merged_df['meas_m/z'])
    -237            cal_refs_mz = list(merged_df['m/z'])   
    -238        else:
    -239            raise ValueError(f'{calibration_ref_match_method} not allowed.')
    -240
    -241        if False:
    -242            min_calib_ppm_error = calib_ppm_error_threshold[0]
    -243            max_calib_ppm_error = calib_ppm_error_threshold[1]
    -244            df_raw = self.mass_spectrum.to_dataframe()
    -245
    -246            df_raw = df_raw[df_raw['S/N'] > calib_snr_threshold]
    -247            # optionally further subset that based on minimum S/N, RP, Peak Height
    -248            # to ensure only valid points are utilized
    -249            # in this example, only a S/N threshold is implemented.        
    -250            imzmeas = []
    -251            mzrefs = []
    -252
    -253            for mzref in df_ref['m/z']:
    -254
    -255                # find all peaks within a defined ppm error threshold
    -256                tmpdf = df_raw[((df_raw['m/z']-mzref)/mzref)*1e6<max_calib_ppm_error]
    -257                # Error is relative to the theoretical, so the divisor should be divisor
    -258
    -259                tmpdf = tmpdf[((tmpdf['m/z']-mzref)/mzref)*1e6>min_calib_ppm_error]
    -260                        
    -261                # only use the calibration point if only one peak is within the thresholds
    -262                # This may require some optimization of the threshold tolerances
    -263                if len(tmpdf) == 1:
    -264                    imzmeas.append(int(tmpdf.index.values))
    -265                    mzrefs.append(mzref)
    +211        if calibration_ref_match_method == "legacy":
    +212            # This legacy approach iterates through each reference match and finds the entries within 1 mz and within the user defined PPM error threshold
    +213            # Then it removes ambiguities - which means the calibration threshold hasto be very tight.
    +214            cal_peaks_mz = []
    +215            cal_refs_mz = []
    +216            for mzref in df_ref["m/z"]:
    +217                tmp_peaks_mz = peaks_mz[abs(peaks_mz - mzref) < 1]
    +218                for mzmeas in tmp_peaks_mz:
    +219                    delta_mass = ((mzmeas - mzref) / mzref) * 1e6
    +220                    if delta_mass < max(calib_ppm_error_threshold):
    +221                        if delta_mass > min(calib_ppm_error_threshold):
    +222                            cal_peaks_mz.append(mzmeas)
    +223                            cal_refs_mz.append(mzref)
    +224
    +225            # To remove entries with duplicated indices (reference masses matching multiple peaks)
    +226            tmpdf = pd.Series(index=cal_refs_mz, data=cal_peaks_mz, dtype=float)
    +227            tmpdf = tmpdf[~tmpdf.index.duplicated(keep=False)]
    +228
    +229            cal_peaks_mz = list(tmpdf.values)
    +230            cal_refs_mz = list(tmpdf.index)
    +231        elif calibration_ref_match_method == "merged":
    +232            warnings.warn("Using experimental new reference mass list merging")
    +233            # This is a new approach (August 2024) which uses Pandas 'merged_asof' to find the peaks closest in m/z between
    +234            # reference and measured masses. This is a quicker way to match, and seems to get more matches.
    +235            # It may not work as well when the data are far from correc initial mass
    +236            # e.g. if the correct peak is further from the reference than an incorrect peak.
    +237            meas_df = pd.DataFrame(columns=["meas_m/z"], data=peaks_mz)
    +238            tolerance = calibration_ref_match_tolerance
    +239            merged_df = pd.merge_asof(
    +240                df_ref,
    +241                meas_df,
    +242                left_on="m/z",
    +243                right_on="meas_m/z",
    +244                tolerance=tolerance,
    +245                direction="nearest",
    +246            )
    +247            merged_df.dropna(how="any", inplace=True)
    +248            merged_df["Error_ppm"] = (
    +249                (merged_df["meas_m/z"] - merged_df["m/z"]) / merged_df["m/z"]
    +250            ) * 1e6
    +251            median_raw_error = merged_df["Error_ppm"].median()
    +252            std_raw_error = merged_df["Error_ppm"].std()
    +253            if std_raw_error > calibration_ref_match_std_raw_error_limit:
    +254                std_raw_error = calibration_ref_match_std_raw_error_limit
    +255            self.mass_spectrum.calibration_raw_error_median = median_raw_error
    +256            self.mass_spectrum.calibration_raw_error_stdev = std_raw_error
    +257            merged_df = merged_df[
    +258                (merged_df["Error_ppm"] > (median_raw_error - 1.5 * std_raw_error))
    +259                & (merged_df["Error_ppm"] < (median_raw_error + 1.5 * std_raw_error))
    +260            ]
    +261            # merged_df= merged_df[(merged_df['Error_ppm']>min(calib_ppm_error_threshold))&(merged_df['Error_ppm']<max(calib_ppm_error_threshold))]
    +262            cal_peaks_mz = list(merged_df["meas_m/z"])
    +263            cal_refs_mz = list(merged_df["m/z"])
    +264        else:
    +265            raise ValueError(f"{calibration_ref_match_method} not allowed.")
     266
    -267        # it is crucial the mass lists are in same order
    -268        # corems likes to do masses from high to low.
    -269        cal_refs_mz.sort(reverse=False)
    -270        cal_peaks_mz.sort(reverse=False)
    -271        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -272            print(str(len(cal_peaks_mz)) + " calibration points matched within thresholds.")
    -273        return cal_peaks_mz, cal_refs_mz
    -274
    -275    def robust_calib(self, param : list[float], 
    -276                     cal_peaks_mz : list[float], cal_refs_mz : list[float], 
    -277                     order : int=1):
    -278        """ Recalibration function
    -279
    -280        Computes the rms of m/z errors to minimize when calibrating.
    -281        This is adapted from from spike.
    -282
    -283        Parameters
    -284        ----------
    -285        param : list of floats
    -286            generated by minimize function from scipy optimize.
    -287        cal_peaks_mz : list of floats
    -288            masses of measured peaks to use in mass calibration.
    -289        cal_peaks_mz : list of floats
    -290            reference mz values of found calibration points.
    -291        order : int, optional
    -292            order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    -293
    -294        Returns
    -295        -------
    -296        rmserror : float
    -297            root mean square mass error for calibration points.
    -298
    -299        """
    -300        Aterm = param[0]
    -301        Bterm = param[1]
    -302        try:
    -303            Cterm = param[2]
    -304        except IndexError:
    -305            pass
    +267        if False:
    +268            min_calib_ppm_error = calib_ppm_error_threshold[0]
    +269            max_calib_ppm_error = calib_ppm_error_threshold[1]
    +270            df_raw = self.mass_spectrum.to_dataframe()
    +271
    +272            df_raw = df_raw[df_raw["S/N"] > calib_snr_threshold]
    +273            # optionally further subset that based on minimum S/N, RP, Peak Height
    +274            # to ensure only valid points are utilized
    +275            # in this example, only a S/N threshold is implemented.
    +276            imzmeas = []
    +277            mzrefs = []
    +278
    +279            for mzref in df_ref["m/z"]:
    +280                # find all peaks within a defined ppm error threshold
    +281                tmpdf = df_raw[
    +282                    ((df_raw["m/z"] - mzref) / mzref) * 1e6 < max_calib_ppm_error
    +283                ]
    +284                # Error is relative to the theoretical, so the divisor should be divisor
    +285
    +286                tmpdf = tmpdf[
    +287                    ((tmpdf["m/z"] - mzref) / mzref) * 1e6 > min_calib_ppm_error
    +288                ]
    +289
    +290                # only use the calibration point if only one peak is within the thresholds
    +291                # This may require some optimization of the threshold tolerances
    +292                if len(tmpdf) == 1:
    +293                    imzmeas.append(int(tmpdf.index.values))
    +294                    mzrefs.append(mzref)
    +295
    +296        # it is crucial the mass lists are in same order
    +297        # corems likes to do masses from high to low.
    +298        cal_refs_mz.sort(reverse=False)
    +299        cal_peaks_mz.sort(reverse=False)
    +300        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +301            print(
    +302                str(len(cal_peaks_mz))
    +303                + " calibration points matched within thresholds."
    +304            )
    +305        return cal_peaks_mz, cal_refs_mz
     306
    -307        # get the mspeaks from the mass spectrum object which were calibration points
    -308        #mspeaks = [self.mass_spectrum.mspeaks[x] for x in imzmeas]
    -309        # get their calibrated mass values
    -310        #mspeakmzs = [x.mz_cal for x in mspeaks]
    -311        cal_peaks_mz = np.asarray(cal_peaks_mz)
    -312
    -313        # linearz
    -314        if order == 1:
    -315            ref_recal_points = (Aterm * cal_peaks_mz) + Bterm
    -316        # quadratic
    -317        elif order == 2:
    -318            ref_recal_points = (Aterm * (cal_peaks_mz)) + \
    -319                (Bterm * np.power((cal_peaks_mz), 2) + Cterm)
    -320
    -321        # sort both the calibration points (measured, recalibrated)
    -322        ref_recal_points.sort()
    -323        # and sort the calibration points (theoretical, predefined)
    -324        cal_refs_mz.sort()
    -325
    -326        # calculate the ppm error for each calibration point
    -327        error = ((ref_recal_points - cal_refs_mz) / cal_refs_mz) * 1e6
    -328        # calculate the root mean square error - this is our target to minimize
    -329        rmserror = np.sqrt(np.mean(error**2))
    -330        return rmserror
    -331
    -332    def recalibrate_mass_spectrum(self, cal_peaks_mz : list[float], cal_refs_mz : list[float], 
    -333                                        order : int=1, diagnostic : bool=False):
    +307    def robust_calib(
    +308        self,
    +309        param: list[float],
    +310        cal_peaks_mz: list[float],
    +311        cal_refs_mz: list[float],
    +312        order: int = 1,
    +313    ):
    +314        """Recalibration function
    +315
    +316        Computes the rms of m/z errors to minimize when calibrating.
    +317        This is adapted from from spike.
    +318
    +319        Parameters
    +320        ----------
    +321        param : list of floats
    +322            generated by minimize function from scipy optimize.
    +323        cal_peaks_mz : list of floats
    +324            masses of measured peaks to use in mass calibration.
    +325        cal_peaks_mz : list of floats
    +326            reference mz values of found calibration points.
    +327        order : int, optional
    +328            order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    +329
    +330        Returns
    +331        -------
    +332        rmserror : float
    +333            root mean square mass error for calibration points.
     334
    -335        """ Main recalibration function which uses a robust linear regression
    -336
    -337        This function performs the recalibration of the mass spectrum object. 
    -338        It iteratively applies 
    -339
    -340        Parameters
    -341        ----------
    -342        cal_peaks_mz : list of float
    -343            masses of measured peaks to use in mass calibration.
    -344        cal_refs_mz : list of float
    -345            reference mz values of found calibration points.
    -346        order : int, optional
    -347            order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    +335        """
    +336        Aterm = param[0]
    +337        Bterm = param[1]
    +338        try:
    +339            Cterm = param[2]
    +340        except IndexError:
    +341            pass
    +342
    +343        # get the mspeaks from the mass spectrum object which were calibration points
    +344        # mspeaks = [self.mass_spectrum.mspeaks[x] for x in imzmeas]
    +345        # get their calibrated mass values
    +346        # mspeakmzs = [x.mz_cal for x in mspeaks]
    +347        cal_peaks_mz = np.asarray(cal_peaks_mz)
     348
    -349        Returns
    -350        -------
    -351        mass_spectrum : CoreMS mass spectrum object
    -352            Calibrated mass spectrum object
    -353
    -354
    -355        Notes 
    -356        -----
    -357        This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method. 
    -358
    -359        References
    -360        ----------
    -361        1.  Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A. 
    -362            SPIKE a Processing Software dedicated to Fourier Spectroscopies 
    -363            https://arxiv.org/abs/1608.06777 (2016)
    -364        2.  SPIKE - https://github.com/spike-project/spike 
    -365
    -366        """
    -367        # initialise parameters for recalibration
    -368        # these are the 'Aterm, Bterm, Cterm'
    -369        # as spectra are already freq->mz calibrated, these terms are very small
    -370        # may be beneficial to formally separate them from the freq->mz terms
    -371        if order == 1:
    -372            Po = [1, 0]
    -373        elif order == 2:
    -374            Po = [1, 0, 0]
    -375
    -376        if len(cal_peaks_mz) >= 2:
    -377            if self.mzsegment: #If only part of the spectrum is to be recalibrated
    -378                mz_exp_peaks = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum])
    -379                # Split the array into two parts - one to recailbrate, one to keep unchanged. 
    -380                mz_exp_peaks_tocal = mz_exp_peaks[(mz_exp_peaks>=min(self.mzsegment)) & (mz_exp_peaks<=max(self.mzsegment))]
    -381                mz_exp_peaks_unchanged = mz_exp_peaks[~(mz_exp_peaks>=min(self.mzsegment)) | ~(mz_exp_peaks<=max(self.mzsegment))]
    -382                # TODO: - segmented calibration needs a way to better track the calibration args/values... 
    -383                if not self.mass_spectrum.is_centroid:
    -384                    mz_exp_profile = np.array(self.mass_spectrum.mz_exp_profile)
    -385                    # Split the array into two parts - one to recailbrate, one to keep unchanged. 
    -386                    mz_exp_profile_tocal = mz_exp_profile[(mz_exp_profile>=min(self.mzsegment)) & (mz_exp_profile<=max(self.mzsegment))]
    -387                    mz_exp_profile_unchanged = mz_exp_profile[~(mz_exp_profile>=min(self.mzsegment)) | ~(mz_exp_profile<=max(self.mzsegment))]
    -388            else: #if just recalibrating the whole spectrum
    -389                mz_exp_peaks_tocal = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum])
    -390                if not self.mass_spectrum.is_centroid:
    -391                    mz_exp_profile_tocal = np.array(self.mass_spectrum.mz_exp_profile)
    -392
    -393
    -394            minimize_method = self.mass_spectrum.settings.calib_minimize_method
    -395            res = minimize(self.robust_calib, Po, args=(cal_peaks_mz, cal_refs_mz, order), method=minimize_method)
    -396            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -397                print("minimize function completed with RMS error of: {:0.3f} ppm".format(res['fun']))
    -398                print("minimize function performed {:1d} fn evals and {:1d} iterations".format(res['nfev'], res['nit']))
    -399            Pn = res.x
    -400
    -401            #mz_exp_ms = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum])
    -402
    -403            if order == 1:
    -404                mz_domain = (Pn[0] * mz_exp_peaks_tocal) + Pn[1]
    -405                if not self.mass_spectrum.is_centroid:
    -406                    mz_profile_calc = (Pn[0] * mz_exp_profile_tocal) + Pn[1]
    -407
    -408            elif order == 2:
    -409                mz_domain = (Pn[0] * (mz_exp_peaks_tocal)) + \
    -410                    (Pn[1] * np.power((mz_exp_peaks_tocal), 2) + Pn[2])
    -411
    -412                if not self.mass_spectrum.is_centroid:
    -413                    mz_profile_calc = (Pn[0] * (mz_exp_profile_tocal)) + \
    -414                        (Pn[1] * np.power((mz_exp_profile_tocal), 2) + Pn[2])
    -415
    -416            if self.mzsegment:
    -417                # Recombine the mass domains
    -418                mz_domain = np.concatenate([mz_domain,mz_exp_peaks_unchanged])
    -419                mz_domain.sort()
    -420                if not self.mass_spectrum.is_centroid:
    -421                    mz_profile_calc = np.concatenate([mz_profile_calc,mz_exp_profile_unchanged])
    -422                    mz_profile_calc.sort()
    -423                # Sort them 
    -424                if mz_exp_peaks[0] > mz_exp_peaks[1]: #If originally descending mass order
    -425                    mz_domain = mz_domain[::-1]
    -426                    if not self.mass_spectrum.is_centroid:
    -427                        mz_profile_calc = mz_profile_calc[::-1]
    -428
    -429            self.mass_spectrum.mz_cal = mz_domain
    -430            if not self.mass_spectrum.is_centroid:
    -431                self.mass_spectrum.mz_cal_profile = mz_profile_calc
    -432
    -433            self.mass_spectrum.calibration_order = order
    -434            self.mass_spectrum.calibration_RMS = float(res['fun'])
    -435            self.mass_spectrum.calibration_points = int(len(cal_refs_mz))
    -436            self.mass_spectrum.calibration_ref_mzs = cal_refs_mz
    -437            self.mass_spectrum.calibration_meas_mzs = cal_peaks_mz
    -438
    -439            self.mass_spectrum.calibration_segment = self.mzsegment
    -440
    -441            if diagnostic:
    -442                return self.mass_spectrum,res
    -443            return self.mass_spectrum
    -444        else:
    -445            warnings.warn("Too few calibration points - aborting.")
    -446            return self.mass_spectrum
    -447
    -448    def run(self):
    -449        """ Run the calibration routine
    -450        
    -451        This function runs the calibration routine.
    -452        
    -453        """
    -454        calib_ppm_error_threshold = self.mass_spectrum.settings.calib_sn_threshold
    -455        max_calib_ppm_error = self.mass_spectrum.settings.max_calib_ppm_error
    -456        min_calib_ppm_error = self.mass_spectrum.settings.min_calib_ppm_error
    -457        calib_pol_order = self.mass_spectrum.settings.calib_pol_order
    -458        calibration_ref_match_method = self.mass_spectrum.settings.calibration_ref_match_method
    -459        calibration_ref_match_tolerance = self.mass_spectrum.settings.calibration_ref_match_tolerance
    -460        calibration_ref_match_std_raw_error_limit = self.mass_spectrum.settings.calibration_ref_match_std_raw_error_limit
    -461
    -462        # load reference mass list
    -463        df_ref = self.load_ref_mass_list()
    -464
    -465        # find calibration points
    -466        cal_peaks_mz, cal_refs_mz = self.find_calibration_points(df_ref,
    -467                                                       calib_ppm_error_threshold=(min_calib_ppm_error,
    -468                                                                                  max_calib_ppm_error),
    -469                                                       calib_snr_threshold=calib_ppm_error_threshold,
    -470                                                       calibration_ref_match_method = calibration_ref_match_method,
    -471                                                       calibration_ref_match_tolerance = calibration_ref_match_tolerance,
    -472                                                       calibration_ref_match_std_raw_error_limit = calibration_ref_match_std_raw_error_limit)
    -473        if len(cal_peaks_mz)==2:
    -474            self.mass_spectrum.settings.calib_pol_order = 1
    -475            calib_pol_order = 1
    -476            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -477                print('Only 2 calibration points found, forcing a linear recalibration')
    -478        elif len(cal_peaks_mz)<2:
    -479            warnings.warn('Too few calibration points found, function will fail')
    -480        self.recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=calib_pol_order)
    +349        # linearz
    +350        if order == 1:
    +351            ref_recal_points = (Aterm * cal_peaks_mz) + Bterm
    +352        # quadratic
    +353        elif order == 2:
    +354            ref_recal_points = (Aterm * (cal_peaks_mz)) + (
    +355                Bterm * np.power((cal_peaks_mz), 2) + Cterm
    +356            )
    +357
    +358        # sort both the calibration points (measured, recalibrated)
    +359        ref_recal_points.sort()
    +360        # and sort the calibration points (theoretical, predefined)
    +361        cal_refs_mz.sort()
    +362
    +363        # calculate the ppm error for each calibration point
    +364        error = ((ref_recal_points - cal_refs_mz) / cal_refs_mz) * 1e6
    +365        # calculate the root mean square error - this is our target to minimize
    +366        rmserror = np.sqrt(np.mean(error**2))
    +367        return rmserror
    +368
    +369    def recalibrate_mass_spectrum(
    +370        self,
    +371        cal_peaks_mz: list[float],
    +372        cal_refs_mz: list[float],
    +373        order: int = 1,
    +374        diagnostic: bool = False,
    +375    ):
    +376        """Main recalibration function which uses a robust linear regression
    +377
    +378        This function performs the recalibration of the mass spectrum object.
    +379        It iteratively applies
    +380
    +381        Parameters
    +382        ----------
    +383        cal_peaks_mz : list of float
    +384            masses of measured peaks to use in mass calibration.
    +385        cal_refs_mz : list of float
    +386            reference mz values of found calibration points.
    +387        order : int, optional
    +388            order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    +389
    +390        Returns
    +391        -------
    +392        mass_spectrum : CoreMS mass spectrum object
    +393            Calibrated mass spectrum object
    +394
    +395
    +396        Notes
    +397        -----
    +398        This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method.
    +399
    +400        References
    +401        ----------
    +402        1.  Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A.
    +403            SPIKE a Processing Software dedicated to Fourier Spectroscopies
    +404            https://arxiv.org/abs/1608.06777 (2016)
    +405        2.  SPIKE - https://github.com/spike-project/spike
    +406
    +407        """
    +408        # initialise parameters for recalibration
    +409        # these are the 'Aterm, Bterm, Cterm'
    +410        # as spectra are already freq->mz calibrated, these terms are very small
    +411        # may be beneficial to formally separate them from the freq->mz terms
    +412        if order == 1:
    +413            Po = [1, 0]
    +414        elif order == 2:
    +415            Po = [1, 0, 0]
    +416
    +417        if len(cal_peaks_mz) >= 2:
    +418            if self.mzsegment:  # If only part of the spectrum is to be recalibrated
    +419                mz_exp_peaks = np.array(
    +420                    [mspeak.mz_exp for mspeak in self.mass_spectrum]
    +421                )
    +422                # Split the array into two parts - one to recailbrate, one to keep unchanged.
    +423                mz_exp_peaks_tocal = mz_exp_peaks[
    +424                    (mz_exp_peaks >= min(self.mzsegment))
    +425                    & (mz_exp_peaks <= max(self.mzsegment))
    +426                ]
    +427                mz_exp_peaks_unchanged = mz_exp_peaks[
    +428                    ~(mz_exp_peaks >= min(self.mzsegment))
    +429                    | ~(mz_exp_peaks <= max(self.mzsegment))
    +430                ]
    +431                # TODO: - segmented calibration needs a way to better track the calibration args/values...
    +432                if not self.mass_spectrum.is_centroid:
    +433                    mz_exp_profile = np.array(self.mass_spectrum.mz_exp_profile)
    +434                    # Split the array into two parts - one to recailbrate, one to keep unchanged.
    +435                    mz_exp_profile_tocal = mz_exp_profile[
    +436                        (mz_exp_profile >= min(self.mzsegment))
    +437                        & (mz_exp_profile <= max(self.mzsegment))
    +438                    ]
    +439                    mz_exp_profile_unchanged = mz_exp_profile[
    +440                        ~(mz_exp_profile >= min(self.mzsegment))
    +441                        | ~(mz_exp_profile <= max(self.mzsegment))
    +442                    ]
    +443            else:  # if just recalibrating the whole spectrum
    +444                mz_exp_peaks_tocal = np.array(
    +445                    [mspeak.mz_exp for mspeak in self.mass_spectrum]
    +446                )
    +447                if not self.mass_spectrum.is_centroid:
    +448                    mz_exp_profile_tocal = np.array(self.mass_spectrum.mz_exp_profile)
    +449
    +450            minimize_method = self.mass_spectrum.settings.calib_minimize_method
    +451            res = minimize(
    +452                self.robust_calib,
    +453                Po,
    +454                args=(cal_peaks_mz, cal_refs_mz, order),
    +455                method=minimize_method,
    +456            )
    +457            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +458                print(
    +459                    "minimize function completed with RMS error of: {:0.3f} ppm".format(
    +460                        res["fun"]
    +461                    )
    +462                )
    +463                print(
    +464                    "minimize function performed {:1d} fn evals and {:1d} iterations".format(
    +465                        res["nfev"], res["nit"]
    +466                    )
    +467                )
    +468            Pn = res.x
    +469
    +470            # mz_exp_ms = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum])
    +471
    +472            if order == 1:
    +473                mz_domain = (Pn[0] * mz_exp_peaks_tocal) + Pn[1]
    +474                if not self.mass_spectrum.is_centroid:
    +475                    mz_profile_calc = (Pn[0] * mz_exp_profile_tocal) + Pn[1]
    +476
    +477            elif order == 2:
    +478                mz_domain = (Pn[0] * (mz_exp_peaks_tocal)) + (
    +479                    Pn[1] * np.power((mz_exp_peaks_tocal), 2) + Pn[2]
    +480                )
    +481
    +482                if not self.mass_spectrum.is_centroid:
    +483                    mz_profile_calc = (Pn[0] * (mz_exp_profile_tocal)) + (
    +484                        Pn[1] * np.power((mz_exp_profile_tocal), 2) + Pn[2]
    +485                    )
    +486
    +487            if self.mzsegment:
    +488                # Recombine the mass domains
    +489                mz_domain = np.concatenate([mz_domain, mz_exp_peaks_unchanged])
    +490                mz_domain.sort()
    +491                if not self.mass_spectrum.is_centroid:
    +492                    mz_profile_calc = np.concatenate(
    +493                        [mz_profile_calc, mz_exp_profile_unchanged]
    +494                    )
    +495                    mz_profile_calc.sort()
    +496                # Sort them
    +497                if (
    +498                    mz_exp_peaks[0] > mz_exp_peaks[1]
    +499                ):  # If originally descending mass order
    +500                    mz_domain = mz_domain[::-1]
    +501                    if not self.mass_spectrum.is_centroid:
    +502                        mz_profile_calc = mz_profile_calc[::-1]
    +503
    +504            self.mass_spectrum.mz_cal = mz_domain
    +505            if not self.mass_spectrum.is_centroid:
    +506                self.mass_spectrum.mz_cal_profile = mz_profile_calc
    +507
    +508            self.mass_spectrum.calibration_order = order
    +509            self.mass_spectrum.calibration_RMS = float(res["fun"])
    +510            self.mass_spectrum.calibration_points = int(len(cal_refs_mz))
    +511            self.mass_spectrum.calibration_ref_mzs = cal_refs_mz
    +512            self.mass_spectrum.calibration_meas_mzs = cal_peaks_mz
    +513
    +514            self.mass_spectrum.calibration_segment = self.mzsegment
    +515
    +516            if diagnostic:
    +517                return self.mass_spectrum, res
    +518            return self.mass_spectrum
    +519        else:
    +520            warnings.warn("Too few calibration points - aborting.")
    +521            return self.mass_spectrum
    +522
    +523    def run(self):
    +524        """Run the calibration routine
    +525
    +526        This function runs the calibration routine.
    +527
    +528        """
    +529        calib_ppm_error_threshold = self.mass_spectrum.settings.calib_sn_threshold
    +530        max_calib_ppm_error = self.mass_spectrum.settings.max_calib_ppm_error
    +531        min_calib_ppm_error = self.mass_spectrum.settings.min_calib_ppm_error
    +532        calib_pol_order = self.mass_spectrum.settings.calib_pol_order
    +533        calibration_ref_match_method = (
    +534            self.mass_spectrum.settings.calibration_ref_match_method
    +535        )
    +536        calibration_ref_match_tolerance = (
    +537            self.mass_spectrum.settings.calibration_ref_match_tolerance
    +538        )
    +539        calibration_ref_match_std_raw_error_limit = (
    +540            self.mass_spectrum.settings.calibration_ref_match_std_raw_error_limit
    +541        )
    +542
    +543        # load reference mass list
    +544        df_ref = self.load_ref_mass_list()
    +545
    +546        # find calibration points
    +547        cal_peaks_mz, cal_refs_mz = self.find_calibration_points(
    +548            df_ref,
    +549            calib_ppm_error_threshold=(min_calib_ppm_error, max_calib_ppm_error),
    +550            calib_snr_threshold=calib_ppm_error_threshold,
    +551            calibration_ref_match_method=calibration_ref_match_method,
    +552            calibration_ref_match_tolerance=calibration_ref_match_tolerance,
    +553            calibration_ref_match_std_raw_error_limit=calibration_ref_match_std_raw_error_limit,
    +554        )
    +555        if len(cal_peaks_mz) == 2:
    +556            self.mass_spectrum.settings.calib_pol_order = 1
    +557            calib_pol_order = 1
    +558            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +559                print("Only 2 calibration points found, forcing a linear recalibration")
    +560        elif len(cal_peaks_mz) < 2:
    +561            warnings.warn("Too few calibration points found, function will fail")
    +562        self.recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=calib_pol_order)
     
    @@ -1070,17 +1238,17 @@
    Attributes
    Methods
      -
    • run(). -Main function to run this class.
    • -
    • load_ref_mass_list(). -Load reference mass list (Bruker format).
    • -
    • gen_ref_mass_list_from_assigned(min_conf=0.7). -Generate reference mass list from assigned masses.
    • -
    • find_calibration_points(df_ref, calib_ppm_error_threshold=(-1, 1), calib_snr_threshold=5). -Find calibration points in the mass spectrum based on the reference mass list.
    • -
    • robust_calib(param, cal_peaks_mz, cal_refs_mz, order=1). -Recalibration function.
    • -
    • recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=1, diagnostic=False). +
    • run(). +Main function to run this class.
    • +
    • load_ref_mass_list(). +Load reference mass list (Bruker format).
    • +
    • gen_ref_mass_list_from_assigned(min_conf=0.7). +Generate reference mass list from assigned masses.
    • +
    • find_calibration_points(df_ref, calib_ppm_error_threshold=(-1, 1), calib_snr_threshold=5). +Find calibration points in the mass spectrum based on the reference mass list.
    • +
    • robust_calib(param, cal_peaks_mz, cal_refs_mz, order=1). +Recalibration function.
    • +
    • recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=1, diagnostic=False). Main recalibration function which uses a robust linear regression.
    @@ -1096,22 +1264,27 @@

    Methods
    -
    67    def __init__(self, mass_spectrum, ref_masslist,mzsegment=None):
    -68        
    -69        self.mass_spectrum = mass_spectrum
    -70        self.mzsegment = mzsegment
    -71
    -72        # define reference mass list - bruker .ref format
    -73        self.ref_mass_list_path = ref_masslist
    -74        if self.mass_spectrum.percentile_assigned()[0]!=0:
    -75            warnings.warn('Warning: calibrating spectra which have already been assigned may yield erroneous results')
    -76        self.mass_spectrum.mz_cal = self.mass_spectrum.mz_exp    
    -77        self.mass_spectrum.mz_cal_profile = self.mass_spectrum._mz_exp  
    -78        
    -79        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -80            print("MS Obj loaded - "+str(len(mass_spectrum.mspeaks))+" peaks found.")
    -81
    -82            print("MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found.")
    +            
    63    def __init__(self, mass_spectrum, ref_masslist, mzsegment=None):
    +64        self.mass_spectrum = mass_spectrum
    +65        self.mzsegment = mzsegment
    +66
    +67        # define reference mass list - bruker .ref format
    +68        self.ref_mass_list_path = ref_masslist
    +69        if self.mass_spectrum.percentile_assigned()[0] != 0:
    +70            warnings.warn(
    +71                "Warning: calibrating spectra which have already been assigned may yield erroneous results"
    +72            )
    +73        self.mass_spectrum.mz_cal = self.mass_spectrum.mz_exp
    +74        self.mass_spectrum.mz_cal_profile = self.mass_spectrum._mz_exp
    +75
    +76        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +77            print(
    +78                "MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found."
    +79            )
    +80
    +81            print(
    +82                "MS Obj loaded - " + str(len(mass_spectrum.mspeaks)) + " peaks found."
    +83            )
     
    @@ -1162,58 +1335,64 @@
    Methods
    -
     84    def load_ref_mass_list(self):
    - 85        """ Load reference mass list (Bruker format)
    - 86
    - 87        Loads in a reference mass list from a .ref file
    - 88        Note that some versions of Bruker's software produce .ref files with a different format. 
    - 89        As such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format. 
    - 90        CoreMS includes an example .ref file with the correct format for reference. 
    - 91
    - 92        Returns
    - 93        -------
    - 94        df_ref : Pandas DataFrame
    - 95            reference mass list object.
    - 96
    - 97        """
    - 98        refmasslist = Path(self.ref_mass_list_path) if isinstance(self.ref_mass_list_path, str) else self.ref_mass_list_path
    - 99
    -100        if not refmasslist.exists():
    -101            raise FileExistsError("File does not exist: %s" % refmasslist)
    -102
    -103        with refmasslist.open('r') as csvfile:
    -104            dialect = csv.Sniffer().sniff(csvfile.read(1024))
    -105            delimiter = dialect.delimiter
    -106
    -107        if isinstance(refmasslist, S3Path):
    -108            # data = self.file_location.open('rb').read()
    -109            data = BytesIO(refmasslist.open('rb').read())
    -110
    -111        else:
    -112            data = refmasslist
    -113
    -114        df_ref = pd.read_csv(data, sep=delimiter, header=None, skiprows=1)
    +            
     85    def load_ref_mass_list(self):
    + 86        """Load reference mass list (Bruker format)
    + 87
    + 88        Loads in a reference mass list from a .ref file
    + 89        Note that some versions of Bruker's software produce .ref files with a different format.
    + 90        As such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format.
    + 91        CoreMS includes an example .ref file with the correct format for reference.
    + 92
    + 93        Returns
    + 94        -------
    + 95        df_ref : Pandas DataFrame
    + 96            reference mass list object.
    + 97
    + 98        """
    + 99        refmasslist = (
    +100            Path(self.ref_mass_list_path)
    +101            if isinstance(self.ref_mass_list_path, str)
    +102            else self.ref_mass_list_path
    +103        )
    +104
    +105        if not refmasslist.exists():
    +106            raise FileExistsError("File does not exist: %s" % refmasslist)
    +107
    +108        with refmasslist.open("r") as csvfile:
    +109            dialect = csv.Sniffer().sniff(csvfile.read(1024))
    +110            delimiter = dialect.delimiter
    +111
    +112        if isinstance(refmasslist, S3Path):
    +113            # data = self.file_location.open('rb').read()
    +114            data = BytesIO(refmasslist.open("rb").read())
     115
    -116        df_ref = df_ref.rename({0: 'Formula',
    -117                                1: 'm/z',
    -118                                2: 'Charge',
    -119                                3: 'Form2'
    -120                                }, axis=1)
    -121
    -122        df_ref.sort_values(by='m/z', ascending=True,inplace=True)
    -123        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -124            print("Reference mass list loaded - " + str(len(df_ref)) + " calibration masses loaded.")
    -125
    -126        return df_ref
    +116        else:
    +117            data = refmasslist
    +118
    +119        df_ref = pd.read_csv(data, sep=delimiter, header=None, skiprows=1)
    +120
    +121        df_ref = df_ref.rename(
    +122            {0: "Formula", 1: "m/z", 2: "Charge", 3: "Form2"}, axis=1
    +123        )
    +124
    +125        df_ref.sort_values(by="m/z", ascending=True, inplace=True)
    +126        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +127            print(
    +128                "Reference mass list loaded - "
    +129                + str(len(df_ref))
    +130                + " calibration masses loaded."
    +131            )
    +132
    +133        return df_ref
     

    Load reference mass list (Bruker format)

    Loads in a reference mass list from a .ref file -Note that some versions of Bruker's software produce .ref files with a different format. -As such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format. -CoreMS includes an example .ref file with the correct format for reference.

    +Note that some versions of Bruker's software produce .ref files with a different format. +As such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format. +CoreMS includes an example .ref file with the correct format for reference.

    Returns
    @@ -1236,33 +1415,37 @@
    Returns
    -
    128    def gen_ref_mass_list_from_assigned(self, min_conf : float=0.7):
    -129        """ Generate reference mass list from assigned masses
    -130
    -131        This function will generate a ref mass dataframe object from an assigned corems mass spec obj
    -132        using assigned masses above a certain minimum confidence threshold.
    -133
    -134        This function needs to be retested and check it is covered in the unit tests.
    -135
    -136        Parameters
    -137        ----------
    -138        min_conf : float, optional
    -139            minimum confidence score. The default is 0.7.
    +            
    135    def gen_ref_mass_list_from_assigned(self, min_conf: float = 0.7):
    +136        """Generate reference mass list from assigned masses
    +137
    +138        This function will generate a ref mass dataframe object from an assigned corems mass spec obj
    +139        using assigned masses above a certain minimum confidence threshold.
     140
    -141        Returns
    -142        -------
    -143        df_ref : Pandas DataFrame
    -144            reference mass list - based on calculated masses.
    -145
    -146        """
    -147        #TODO this function needs to be retested and check it is covered in the unit tests
    -148        df = self.mass_spectrum.to_dataframe()
    -149        df = df[df['Confidence Score'] > min_conf]
    -150        df_ref = pd.DataFrame(columns=['m/z'])
    -151        df_ref['m/z'] = df['Calculated m/z']
    -152        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -153            print("Reference mass list generated - " + str(len(df_ref)) + " calibration masses.")
    -154        return df_ref
    +141        This function needs to be retested and check it is covered in the unit tests.
    +142
    +143        Parameters
    +144        ----------
    +145        min_conf : float, optional
    +146            minimum confidence score. The default is 0.7.
    +147
    +148        Returns
    +149        -------
    +150        df_ref : Pandas DataFrame
    +151            reference mass list - based on calculated masses.
    +152
    +153        """
    +154        # TODO this function needs to be retested and check it is covered in the unit tests
    +155        df = self.mass_spectrum.to_dataframe()
    +156        df = df[df["Confidence Score"] > min_conf]
    +157        df_ref = pd.DataFrame(columns=["m/z"])
    +158        df_ref["m/z"] = df["Calculated m/z"]
    +159        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +160            print(
    +161                "Reference mass list generated - "
    +162                + str(len(df_ref))
    +163                + " calibration masses."
    +164            )
    +165        return df_ref
     
    @@ -1301,128 +1484,149 @@
    Returns
    -
    156    def find_calibration_points(self, df_ref,
    -157                                calib_ppm_error_threshold : tuple[float, float]=(-1, 1),
    -158                                calib_snr_threshold : float=5,
    -159                                calibration_ref_match_method : str='legacy',
    -160                                calibration_ref_match_tolerance : float=0.003,
    -161                                calibration_ref_match_std_raw_error_limit: float=1.5):
    -162        """Function to find calibration points in the mass spectrum 
    -163        
    -164        Based on the reference mass list.
    -165
    -166        Parameters
    -167        ----------
    -168        df_ref : Pandas DataFrame
    -169            reference mass list for recalibration.
    -170        calib_ppm_error_threshold : tuple of floats, optional
    -171            ppm error for finding calibration masses in the spectrum. The default is -1,1.
    -172            Note: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6. 
    -173                Some software does this the other way around and value signs must be inverted for that to work. 
    -174        calib_snr_threshold : float, optional
    -175            snr threshold for finding calibration masses in the spectrum. The default is 5.
    -176
    -177        Returns
    -178        -------
    -179        cal_peaks_mz : list of floats
    -180            masses of measured ions to use in calibration routine
    -181        cal_refs_mz : list of floats
    -182            reference mz values of found calibration points.
    -183
    -184        """
    -185
    -186        # This approach is much more efficient and expedient than the original implementation.
    -187        peaks_mz = []
    -188        for x in self.mass_spectrum.mspeaks:
    -189            if x.signal_to_noise > calib_snr_threshold:
    -190                if self.mzsegment:
    -191                    if (min(self.mzsegment) <= x.mz_exp <= max(self.mzsegment)):
    -192                        peaks_mz.append(x.mz_exp)
    -193                else:
    -194                    peaks_mz.append(x.mz_exp)
    -195        peaks_mz = np.asarray(peaks_mz)
    -196        
    -197        if calibration_ref_match_method == 'legacy':
    -198            # This legacy approach iterates through each reference match and finds the entries within 1 mz and within the user defined PPM error threshold
    -199            # Then it removes ambiguities - which means the calibration threshold hasto be very tight.
    -200            cal_peaks_mz = []
    -201            cal_refs_mz = []
    -202            for mzref in df_ref['m/z']:
    -203                tmp_peaks_mz = peaks_mz[abs(peaks_mz-mzref)<1]
    -204                for mzmeas in tmp_peaks_mz:
    -205                    delta_mass = ((mzmeas-mzref)/mzref)*1e6
    -206                    if delta_mass < max(calib_ppm_error_threshold):
    -207                        if delta_mass > min(calib_ppm_error_threshold):
    -208                            cal_peaks_mz.append(mzmeas)
    -209                            cal_refs_mz.append(mzref)
    +            
    167    def find_calibration_points(
    +168        self,
    +169        df_ref,
    +170        calib_ppm_error_threshold: tuple[float, float] = (-1, 1),
    +171        calib_snr_threshold: float = 5,
    +172        calibration_ref_match_method: str = "legacy",
    +173        calibration_ref_match_tolerance: float = 0.003,
    +174        calibration_ref_match_std_raw_error_limit: float = 1.5,
    +175    ):
    +176        """Function to find calibration points in the mass spectrum
    +177
    +178        Based on the reference mass list.
    +179
    +180        Parameters
    +181        ----------
    +182        df_ref : Pandas DataFrame
    +183            reference mass list for recalibration.
    +184        calib_ppm_error_threshold : tuple of floats, optional
    +185            ppm error for finding calibration masses in the spectrum. The default is -1,1.
    +186            Note: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6.
    +187                Some software does this the other way around and value signs must be inverted for that to work.
    +188        calib_snr_threshold : float, optional
    +189            snr threshold for finding calibration masses in the spectrum. The default is 5.
    +190
    +191        Returns
    +192        -------
    +193        cal_peaks_mz : list of floats
    +194            masses of measured ions to use in calibration routine
    +195        cal_refs_mz : list of floats
    +196            reference mz values of found calibration points.
    +197
    +198        """
    +199
    +200        # This approach is much more efficient and expedient than the original implementation.
    +201        peaks_mz = []
    +202        for x in self.mass_spectrum.mspeaks:
    +203            if x.signal_to_noise > calib_snr_threshold:
    +204                if self.mzsegment:
    +205                    if min(self.mzsegment) <= x.mz_exp <= max(self.mzsegment):
    +206                        peaks_mz.append(x.mz_exp)
    +207                else:
    +208                    peaks_mz.append(x.mz_exp)
    +209        peaks_mz = np.asarray(peaks_mz)
     210
    -211            # To remove entries with duplicated indices (reference masses matching multiple peaks)
    -212            tmpdf = pd.Series(index = cal_refs_mz,data = cal_peaks_mz,dtype=float)
    -213            tmpdf = tmpdf[~tmpdf.index.duplicated(keep=False)]
    -214
    -215            cal_peaks_mz = list(tmpdf.values)
    -216            cal_refs_mz = list(tmpdf.index)
    -217        elif calibration_ref_match_method == 'merged':
    -218            warnings.warn('Using experimental new reference mass list merging')
    -219            # This is a new approach (August 2024) which uses Pandas 'merged_asof' to find the peaks closest in m/z between 
    -220            # reference and measured masses. This is a quicker way to match, and seems to get more matches.
    -221            # It may not work as well when the data are far from correc initial mass
    -222            # e.g. if the correct peak is further from the reference than an incorrect peak.
    -223            meas_df = pd.DataFrame(columns=['meas_m/z'],data = peaks_mz)
    -224            tolerance = calibration_ref_match_tolerance
    -225            merged_df = pd.merge_asof(df_ref, meas_df, left_on='m/z', right_on = 'meas_m/z',tolerance=tolerance,direction='nearest')
    -226            merged_df.dropna(how='any',inplace=True)
    -227            merged_df['Error_ppm'] = ((merged_df['meas_m/z']-merged_df['m/z'])/merged_df['m/z'])*1e6
    -228            median_raw_error = merged_df['Error_ppm'].median()
    -229            std_raw_error = merged_df['Error_ppm'].std()
    -230            if std_raw_error > calibration_ref_match_std_raw_error_limit:
    -231                std_raw_error = calibration_ref_match_std_raw_error_limit
    -232            self.mass_spectrum.calibration_raw_error_median = median_raw_error
    -233            self.mass_spectrum.calibration_raw_error_stdev = std_raw_error
    -234            merged_df= merged_df[(merged_df['Error_ppm']>(median_raw_error-1.5*std_raw_error))&(merged_df['Error_ppm']<(median_raw_error+1.5*std_raw_error))]
    -235            #merged_df= merged_df[(merged_df['Error_ppm']>min(calib_ppm_error_threshold))&(merged_df['Error_ppm']<max(calib_ppm_error_threshold))]
    -236            cal_peaks_mz = list(merged_df['meas_m/z'])
    -237            cal_refs_mz = list(merged_df['m/z'])   
    -238        else:
    -239            raise ValueError(f'{calibration_ref_match_method} not allowed.')
    -240
    -241        if False:
    -242            min_calib_ppm_error = calib_ppm_error_threshold[0]
    -243            max_calib_ppm_error = calib_ppm_error_threshold[1]
    -244            df_raw = self.mass_spectrum.to_dataframe()
    -245
    -246            df_raw = df_raw[df_raw['S/N'] > calib_snr_threshold]
    -247            # optionally further subset that based on minimum S/N, RP, Peak Height
    -248            # to ensure only valid points are utilized
    -249            # in this example, only a S/N threshold is implemented.        
    -250            imzmeas = []
    -251            mzrefs = []
    -252
    -253            for mzref in df_ref['m/z']:
    -254
    -255                # find all peaks within a defined ppm error threshold
    -256                tmpdf = df_raw[((df_raw['m/z']-mzref)/mzref)*1e6<max_calib_ppm_error]
    -257                # Error is relative to the theoretical, so the divisor should be divisor
    -258
    -259                tmpdf = tmpdf[((tmpdf['m/z']-mzref)/mzref)*1e6>min_calib_ppm_error]
    -260                        
    -261                # only use the calibration point if only one peak is within the thresholds
    -262                # This may require some optimization of the threshold tolerances
    -263                if len(tmpdf) == 1:
    -264                    imzmeas.append(int(tmpdf.index.values))
    -265                    mzrefs.append(mzref)
    +211        if calibration_ref_match_method == "legacy":
    +212            # This legacy approach iterates through each reference match and finds the entries within 1 mz and within the user defined PPM error threshold
    +213            # Then it removes ambiguities - which means the calibration threshold hasto be very tight.
    +214            cal_peaks_mz = []
    +215            cal_refs_mz = []
    +216            for mzref in df_ref["m/z"]:
    +217                tmp_peaks_mz = peaks_mz[abs(peaks_mz - mzref) < 1]
    +218                for mzmeas in tmp_peaks_mz:
    +219                    delta_mass = ((mzmeas - mzref) / mzref) * 1e6
    +220                    if delta_mass < max(calib_ppm_error_threshold):
    +221                        if delta_mass > min(calib_ppm_error_threshold):
    +222                            cal_peaks_mz.append(mzmeas)
    +223                            cal_refs_mz.append(mzref)
    +224
    +225            # To remove entries with duplicated indices (reference masses matching multiple peaks)
    +226            tmpdf = pd.Series(index=cal_refs_mz, data=cal_peaks_mz, dtype=float)
    +227            tmpdf = tmpdf[~tmpdf.index.duplicated(keep=False)]
    +228
    +229            cal_peaks_mz = list(tmpdf.values)
    +230            cal_refs_mz = list(tmpdf.index)
    +231        elif calibration_ref_match_method == "merged":
    +232            warnings.warn("Using experimental new reference mass list merging")
    +233            # This is a new approach (August 2024) which uses Pandas 'merged_asof' to find the peaks closest in m/z between
    +234            # reference and measured masses. This is a quicker way to match, and seems to get more matches.
    +235            # It may not work as well when the data are far from correc initial mass
    +236            # e.g. if the correct peak is further from the reference than an incorrect peak.
    +237            meas_df = pd.DataFrame(columns=["meas_m/z"], data=peaks_mz)
    +238            tolerance = calibration_ref_match_tolerance
    +239            merged_df = pd.merge_asof(
    +240                df_ref,
    +241                meas_df,
    +242                left_on="m/z",
    +243                right_on="meas_m/z",
    +244                tolerance=tolerance,
    +245                direction="nearest",
    +246            )
    +247            merged_df.dropna(how="any", inplace=True)
    +248            merged_df["Error_ppm"] = (
    +249                (merged_df["meas_m/z"] - merged_df["m/z"]) / merged_df["m/z"]
    +250            ) * 1e6
    +251            median_raw_error = merged_df["Error_ppm"].median()
    +252            std_raw_error = merged_df["Error_ppm"].std()
    +253            if std_raw_error > calibration_ref_match_std_raw_error_limit:
    +254                std_raw_error = calibration_ref_match_std_raw_error_limit
    +255            self.mass_spectrum.calibration_raw_error_median = median_raw_error
    +256            self.mass_spectrum.calibration_raw_error_stdev = std_raw_error
    +257            merged_df = merged_df[
    +258                (merged_df["Error_ppm"] > (median_raw_error - 1.5 * std_raw_error))
    +259                & (merged_df["Error_ppm"] < (median_raw_error + 1.5 * std_raw_error))
    +260            ]
    +261            # merged_df= merged_df[(merged_df['Error_ppm']>min(calib_ppm_error_threshold))&(merged_df['Error_ppm']<max(calib_ppm_error_threshold))]
    +262            cal_peaks_mz = list(merged_df["meas_m/z"])
    +263            cal_refs_mz = list(merged_df["m/z"])
    +264        else:
    +265            raise ValueError(f"{calibration_ref_match_method} not allowed.")
     266
    -267        # it is crucial the mass lists are in same order
    -268        # corems likes to do masses from high to low.
    -269        cal_refs_mz.sort(reverse=False)
    -270        cal_peaks_mz.sort(reverse=False)
    -271        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -272            print(str(len(cal_peaks_mz)) + " calibration points matched within thresholds.")
    -273        return cal_peaks_mz, cal_refs_mz
    +267        if False:
    +268            min_calib_ppm_error = calib_ppm_error_threshold[0]
    +269            max_calib_ppm_error = calib_ppm_error_threshold[1]
    +270            df_raw = self.mass_spectrum.to_dataframe()
    +271
    +272            df_raw = df_raw[df_raw["S/N"] > calib_snr_threshold]
    +273            # optionally further subset that based on minimum S/N, RP, Peak Height
    +274            # to ensure only valid points are utilized
    +275            # in this example, only a S/N threshold is implemented.
    +276            imzmeas = []
    +277            mzrefs = []
    +278
    +279            for mzref in df_ref["m/z"]:
    +280                # find all peaks within a defined ppm error threshold
    +281                tmpdf = df_raw[
    +282                    ((df_raw["m/z"] - mzref) / mzref) * 1e6 < max_calib_ppm_error
    +283                ]
    +284                # Error is relative to the theoretical, so the divisor should be divisor
    +285
    +286                tmpdf = tmpdf[
    +287                    ((tmpdf["m/z"] - mzref) / mzref) * 1e6 > min_calib_ppm_error
    +288                ]
    +289
    +290                # only use the calibration point if only one peak is within the thresholds
    +291                # This may require some optimization of the threshold tolerances
    +292                if len(tmpdf) == 1:
    +293                    imzmeas.append(int(tmpdf.index.values))
    +294                    mzrefs.append(mzref)
    +295
    +296        # it is crucial the mass lists are in same order
    +297        # corems likes to do masses from high to low.
    +298        cal_refs_mz.sort(reverse=False)
    +299        cal_peaks_mz.sort(reverse=False)
    +300        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +301            print(
    +302                str(len(cal_peaks_mz))
    +303                + " calibration points matched within thresholds."
    +304            )
    +305        return cal_peaks_mz, cal_refs_mz
     
    -

    Function to find calibration points in the mass spectrum

    +

    Function to find calibration points in the mass spectrum

    Based on the reference mass list.

    @@ -1433,7 +1637,7 @@
    Parameters
    reference mass list for recalibration.
  • calib_ppm_error_threshold (tuple of floats, optional): ppm error for finding calibration masses in the spectrum. The default is -1,1. -Note: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6. +Note: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6. Some software does this the other way around and value signs must be inverted for that to work.
  • calib_snr_threshold (float, optional): snr threshold for finding calibration masses in the spectrum. The default is 5.
  • @@ -1462,62 +1666,67 @@
    Returns
    -
    275    def robust_calib(self, param : list[float], 
    -276                     cal_peaks_mz : list[float], cal_refs_mz : list[float], 
    -277                     order : int=1):
    -278        """ Recalibration function
    -279
    -280        Computes the rms of m/z errors to minimize when calibrating.
    -281        This is adapted from from spike.
    -282
    -283        Parameters
    -284        ----------
    -285        param : list of floats
    -286            generated by minimize function from scipy optimize.
    -287        cal_peaks_mz : list of floats
    -288            masses of measured peaks to use in mass calibration.
    -289        cal_peaks_mz : list of floats
    -290            reference mz values of found calibration points.
    -291        order : int, optional
    -292            order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    -293
    -294        Returns
    -295        -------
    -296        rmserror : float
    -297            root mean square mass error for calibration points.
    -298
    -299        """
    -300        Aterm = param[0]
    -301        Bterm = param[1]
    -302        try:
    -303            Cterm = param[2]
    -304        except IndexError:
    -305            pass
    -306
    -307        # get the mspeaks from the mass spectrum object which were calibration points
    -308        #mspeaks = [self.mass_spectrum.mspeaks[x] for x in imzmeas]
    -309        # get their calibrated mass values
    -310        #mspeakmzs = [x.mz_cal for x in mspeaks]
    -311        cal_peaks_mz = np.asarray(cal_peaks_mz)
    -312
    -313        # linearz
    -314        if order == 1:
    -315            ref_recal_points = (Aterm * cal_peaks_mz) + Bterm
    -316        # quadratic
    -317        elif order == 2:
    -318            ref_recal_points = (Aterm * (cal_peaks_mz)) + \
    -319                (Bterm * np.power((cal_peaks_mz), 2) + Cterm)
    -320
    -321        # sort both the calibration points (measured, recalibrated)
    -322        ref_recal_points.sort()
    -323        # and sort the calibration points (theoretical, predefined)
    -324        cal_refs_mz.sort()
    -325
    -326        # calculate the ppm error for each calibration point
    -327        error = ((ref_recal_points - cal_refs_mz) / cal_refs_mz) * 1e6
    -328        # calculate the root mean square error - this is our target to minimize
    -329        rmserror = np.sqrt(np.mean(error**2))
    -330        return rmserror
    +            
    307    def robust_calib(
    +308        self,
    +309        param: list[float],
    +310        cal_peaks_mz: list[float],
    +311        cal_refs_mz: list[float],
    +312        order: int = 1,
    +313    ):
    +314        """Recalibration function
    +315
    +316        Computes the rms of m/z errors to minimize when calibrating.
    +317        This is adapted from from spike.
    +318
    +319        Parameters
    +320        ----------
    +321        param : list of floats
    +322            generated by minimize function from scipy optimize.
    +323        cal_peaks_mz : list of floats
    +324            masses of measured peaks to use in mass calibration.
    +325        cal_peaks_mz : list of floats
    +326            reference mz values of found calibration points.
    +327        order : int, optional
    +328            order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    +329
    +330        Returns
    +331        -------
    +332        rmserror : float
    +333            root mean square mass error for calibration points.
    +334
    +335        """
    +336        Aterm = param[0]
    +337        Bterm = param[1]
    +338        try:
    +339            Cterm = param[2]
    +340        except IndexError:
    +341            pass
    +342
    +343        # get the mspeaks from the mass spectrum object which were calibration points
    +344        # mspeaks = [self.mass_spectrum.mspeaks[x] for x in imzmeas]
    +345        # get their calibrated mass values
    +346        # mspeakmzs = [x.mz_cal for x in mspeaks]
    +347        cal_peaks_mz = np.asarray(cal_peaks_mz)
    +348
    +349        # linearz
    +350        if order == 1:
    +351            ref_recal_points = (Aterm * cal_peaks_mz) + Bterm
    +352        # quadratic
    +353        elif order == 2:
    +354            ref_recal_points = (Aterm * (cal_peaks_mz)) + (
    +355                Bterm * np.power((cal_peaks_mz), 2) + Cterm
    +356            )
    +357
    +358        # sort both the calibration points (measured, recalibrated)
    +359        ref_recal_points.sort()
    +360        # and sort the calibration points (theoretical, predefined)
    +361        cal_refs_mz.sort()
    +362
    +363        # calculate the ppm error for each calibration point
    +364        error = ((ref_recal_points - cal_refs_mz) / cal_refs_mz) * 1e6
    +365        # calculate the root mean square error - this is our target to minimize
    +366        rmserror = np.sqrt(np.mean(error**2))
    +367        return rmserror
     
    @@ -1560,128 +1769,166 @@
    Returns
    -
    332    def recalibrate_mass_spectrum(self, cal_peaks_mz : list[float], cal_refs_mz : list[float], 
    -333                                        order : int=1, diagnostic : bool=False):
    -334
    -335        """ Main recalibration function which uses a robust linear regression
    -336
    -337        This function performs the recalibration of the mass spectrum object. 
    -338        It iteratively applies 
    -339
    -340        Parameters
    -341        ----------
    -342        cal_peaks_mz : list of float
    -343            masses of measured peaks to use in mass calibration.
    -344        cal_refs_mz : list of float
    -345            reference mz values of found calibration points.
    -346        order : int, optional
    -347            order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    -348
    -349        Returns
    -350        -------
    -351        mass_spectrum : CoreMS mass spectrum object
    -352            Calibrated mass spectrum object
    -353
    -354
    -355        Notes 
    -356        -----
    -357        This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method. 
    -358
    -359        References
    -360        ----------
    -361        1.  Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A. 
    -362            SPIKE a Processing Software dedicated to Fourier Spectroscopies 
    -363            https://arxiv.org/abs/1608.06777 (2016)
    -364        2.  SPIKE - https://github.com/spike-project/spike 
    -365
    -366        """
    -367        # initialise parameters for recalibration
    -368        # these are the 'Aterm, Bterm, Cterm'
    -369        # as spectra are already freq->mz calibrated, these terms are very small
    -370        # may be beneficial to formally separate them from the freq->mz terms
    -371        if order == 1:
    -372            Po = [1, 0]
    -373        elif order == 2:
    -374            Po = [1, 0, 0]
    -375
    -376        if len(cal_peaks_mz) >= 2:
    -377            if self.mzsegment: #If only part of the spectrum is to be recalibrated
    -378                mz_exp_peaks = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum])
    -379                # Split the array into two parts - one to recailbrate, one to keep unchanged. 
    -380                mz_exp_peaks_tocal = mz_exp_peaks[(mz_exp_peaks>=min(self.mzsegment)) & (mz_exp_peaks<=max(self.mzsegment))]
    -381                mz_exp_peaks_unchanged = mz_exp_peaks[~(mz_exp_peaks>=min(self.mzsegment)) | ~(mz_exp_peaks<=max(self.mzsegment))]
    -382                # TODO: - segmented calibration needs a way to better track the calibration args/values... 
    -383                if not self.mass_spectrum.is_centroid:
    -384                    mz_exp_profile = np.array(self.mass_spectrum.mz_exp_profile)
    -385                    # Split the array into two parts - one to recailbrate, one to keep unchanged. 
    -386                    mz_exp_profile_tocal = mz_exp_profile[(mz_exp_profile>=min(self.mzsegment)) & (mz_exp_profile<=max(self.mzsegment))]
    -387                    mz_exp_profile_unchanged = mz_exp_profile[~(mz_exp_profile>=min(self.mzsegment)) | ~(mz_exp_profile<=max(self.mzsegment))]
    -388            else: #if just recalibrating the whole spectrum
    -389                mz_exp_peaks_tocal = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum])
    -390                if not self.mass_spectrum.is_centroid:
    -391                    mz_exp_profile_tocal = np.array(self.mass_spectrum.mz_exp_profile)
    -392
    -393
    -394            minimize_method = self.mass_spectrum.settings.calib_minimize_method
    -395            res = minimize(self.robust_calib, Po, args=(cal_peaks_mz, cal_refs_mz, order), method=minimize_method)
    -396            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -397                print("minimize function completed with RMS error of: {:0.3f} ppm".format(res['fun']))
    -398                print("minimize function performed {:1d} fn evals and {:1d} iterations".format(res['nfev'], res['nit']))
    -399            Pn = res.x
    -400
    -401            #mz_exp_ms = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum])
    -402
    -403            if order == 1:
    -404                mz_domain = (Pn[0] * mz_exp_peaks_tocal) + Pn[1]
    -405                if not self.mass_spectrum.is_centroid:
    -406                    mz_profile_calc = (Pn[0] * mz_exp_profile_tocal) + Pn[1]
    -407
    -408            elif order == 2:
    -409                mz_domain = (Pn[0] * (mz_exp_peaks_tocal)) + \
    -410                    (Pn[1] * np.power((mz_exp_peaks_tocal), 2) + Pn[2])
    -411
    -412                if not self.mass_spectrum.is_centroid:
    -413                    mz_profile_calc = (Pn[0] * (mz_exp_profile_tocal)) + \
    -414                        (Pn[1] * np.power((mz_exp_profile_tocal), 2) + Pn[2])
    -415
    -416            if self.mzsegment:
    -417                # Recombine the mass domains
    -418                mz_domain = np.concatenate([mz_domain,mz_exp_peaks_unchanged])
    -419                mz_domain.sort()
    -420                if not self.mass_spectrum.is_centroid:
    -421                    mz_profile_calc = np.concatenate([mz_profile_calc,mz_exp_profile_unchanged])
    -422                    mz_profile_calc.sort()
    -423                # Sort them 
    -424                if mz_exp_peaks[0] > mz_exp_peaks[1]: #If originally descending mass order
    -425                    mz_domain = mz_domain[::-1]
    -426                    if not self.mass_spectrum.is_centroid:
    -427                        mz_profile_calc = mz_profile_calc[::-1]
    -428
    -429            self.mass_spectrum.mz_cal = mz_domain
    -430            if not self.mass_spectrum.is_centroid:
    -431                self.mass_spectrum.mz_cal_profile = mz_profile_calc
    -432
    -433            self.mass_spectrum.calibration_order = order
    -434            self.mass_spectrum.calibration_RMS = float(res['fun'])
    -435            self.mass_spectrum.calibration_points = int(len(cal_refs_mz))
    -436            self.mass_spectrum.calibration_ref_mzs = cal_refs_mz
    -437            self.mass_spectrum.calibration_meas_mzs = cal_peaks_mz
    -438
    -439            self.mass_spectrum.calibration_segment = self.mzsegment
    -440
    -441            if diagnostic:
    -442                return self.mass_spectrum,res
    -443            return self.mass_spectrum
    -444        else:
    -445            warnings.warn("Too few calibration points - aborting.")
    -446            return self.mass_spectrum
    +            
    369    def recalibrate_mass_spectrum(
    +370        self,
    +371        cal_peaks_mz: list[float],
    +372        cal_refs_mz: list[float],
    +373        order: int = 1,
    +374        diagnostic: bool = False,
    +375    ):
    +376        """Main recalibration function which uses a robust linear regression
    +377
    +378        This function performs the recalibration of the mass spectrum object.
    +379        It iteratively applies
    +380
    +381        Parameters
    +382        ----------
    +383        cal_peaks_mz : list of float
    +384            masses of measured peaks to use in mass calibration.
    +385        cal_refs_mz : list of float
    +386            reference mz values of found calibration points.
    +387        order : int, optional
    +388            order of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    +389
    +390        Returns
    +391        -------
    +392        mass_spectrum : CoreMS mass spectrum object
    +393            Calibrated mass spectrum object
    +394
    +395
    +396        Notes
    +397        -----
    +398        This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method.
    +399
    +400        References
    +401        ----------
    +402        1.  Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A.
    +403            SPIKE a Processing Software dedicated to Fourier Spectroscopies
    +404            https://arxiv.org/abs/1608.06777 (2016)
    +405        2.  SPIKE - https://github.com/spike-project/spike
    +406
    +407        """
    +408        # initialise parameters for recalibration
    +409        # these are the 'Aterm, Bterm, Cterm'
    +410        # as spectra are already freq->mz calibrated, these terms are very small
    +411        # may be beneficial to formally separate them from the freq->mz terms
    +412        if order == 1:
    +413            Po = [1, 0]
    +414        elif order == 2:
    +415            Po = [1, 0, 0]
    +416
    +417        if len(cal_peaks_mz) >= 2:
    +418            if self.mzsegment:  # If only part of the spectrum is to be recalibrated
    +419                mz_exp_peaks = np.array(
    +420                    [mspeak.mz_exp for mspeak in self.mass_spectrum]
    +421                )
    +422                # Split the array into two parts - one to recailbrate, one to keep unchanged.
    +423                mz_exp_peaks_tocal = mz_exp_peaks[
    +424                    (mz_exp_peaks >= min(self.mzsegment))
    +425                    & (mz_exp_peaks <= max(self.mzsegment))
    +426                ]
    +427                mz_exp_peaks_unchanged = mz_exp_peaks[
    +428                    ~(mz_exp_peaks >= min(self.mzsegment))
    +429                    | ~(mz_exp_peaks <= max(self.mzsegment))
    +430                ]
    +431                # TODO: - segmented calibration needs a way to better track the calibration args/values...
    +432                if not self.mass_spectrum.is_centroid:
    +433                    mz_exp_profile = np.array(self.mass_spectrum.mz_exp_profile)
    +434                    # Split the array into two parts - one to recailbrate, one to keep unchanged.
    +435                    mz_exp_profile_tocal = mz_exp_profile[
    +436                        (mz_exp_profile >= min(self.mzsegment))
    +437                        & (mz_exp_profile <= max(self.mzsegment))
    +438                    ]
    +439                    mz_exp_profile_unchanged = mz_exp_profile[
    +440                        ~(mz_exp_profile >= min(self.mzsegment))
    +441                        | ~(mz_exp_profile <= max(self.mzsegment))
    +442                    ]
    +443            else:  # if just recalibrating the whole spectrum
    +444                mz_exp_peaks_tocal = np.array(
    +445                    [mspeak.mz_exp for mspeak in self.mass_spectrum]
    +446                )
    +447                if not self.mass_spectrum.is_centroid:
    +448                    mz_exp_profile_tocal = np.array(self.mass_spectrum.mz_exp_profile)
    +449
    +450            minimize_method = self.mass_spectrum.settings.calib_minimize_method
    +451            res = minimize(
    +452                self.robust_calib,
    +453                Po,
    +454                args=(cal_peaks_mz, cal_refs_mz, order),
    +455                method=minimize_method,
    +456            )
    +457            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +458                print(
    +459                    "minimize function completed with RMS error of: {:0.3f} ppm".format(
    +460                        res["fun"]
    +461                    )
    +462                )
    +463                print(
    +464                    "minimize function performed {:1d} fn evals and {:1d} iterations".format(
    +465                        res["nfev"], res["nit"]
    +466                    )
    +467                )
    +468            Pn = res.x
    +469
    +470            # mz_exp_ms = np.array([mspeak.mz_exp for mspeak in self.mass_spectrum])
    +471
    +472            if order == 1:
    +473                mz_domain = (Pn[0] * mz_exp_peaks_tocal) + Pn[1]
    +474                if not self.mass_spectrum.is_centroid:
    +475                    mz_profile_calc = (Pn[0] * mz_exp_profile_tocal) + Pn[1]
    +476
    +477            elif order == 2:
    +478                mz_domain = (Pn[0] * (mz_exp_peaks_tocal)) + (
    +479                    Pn[1] * np.power((mz_exp_peaks_tocal), 2) + Pn[2]
    +480                )
    +481
    +482                if not self.mass_spectrum.is_centroid:
    +483                    mz_profile_calc = (Pn[0] * (mz_exp_profile_tocal)) + (
    +484                        Pn[1] * np.power((mz_exp_profile_tocal), 2) + Pn[2]
    +485                    )
    +486
    +487            if self.mzsegment:
    +488                # Recombine the mass domains
    +489                mz_domain = np.concatenate([mz_domain, mz_exp_peaks_unchanged])
    +490                mz_domain.sort()
    +491                if not self.mass_spectrum.is_centroid:
    +492                    mz_profile_calc = np.concatenate(
    +493                        [mz_profile_calc, mz_exp_profile_unchanged]
    +494                    )
    +495                    mz_profile_calc.sort()
    +496                # Sort them
    +497                if (
    +498                    mz_exp_peaks[0] > mz_exp_peaks[1]
    +499                ):  # If originally descending mass order
    +500                    mz_domain = mz_domain[::-1]
    +501                    if not self.mass_spectrum.is_centroid:
    +502                        mz_profile_calc = mz_profile_calc[::-1]
    +503
    +504            self.mass_spectrum.mz_cal = mz_domain
    +505            if not self.mass_spectrum.is_centroid:
    +506                self.mass_spectrum.mz_cal_profile = mz_profile_calc
    +507
    +508            self.mass_spectrum.calibration_order = order
    +509            self.mass_spectrum.calibration_RMS = float(res["fun"])
    +510            self.mass_spectrum.calibration_points = int(len(cal_refs_mz))
    +511            self.mass_spectrum.calibration_ref_mzs = cal_refs_mz
    +512            self.mass_spectrum.calibration_meas_mzs = cal_peaks_mz
    +513
    +514            self.mass_spectrum.calibration_segment = self.mzsegment
    +515
    +516            if diagnostic:
    +517                return self.mass_spectrum, res
    +518            return self.mass_spectrum
    +519        else:
    +520            warnings.warn("Too few calibration points - aborting.")
    +521            return self.mass_spectrum
     

    Main recalibration function which uses a robust linear regression

    -

    This function performs the recalibration of the mass spectrum object. -It iteratively applies

    +

    This function performs the recalibration of the mass spectrum object. +It iteratively applies

    Parameters
    @@ -1703,13 +1950,13 @@
    Returns
    Notes
    -

    This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method.

    +

    This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method.

    References
      -
    1. Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A. -SPIKE a Processing Software dedicated to Fourier Spectroscopies +
    2. Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A. +SPIKE a Processing Software dedicated to Fourier Spectroscopies https://arxiv.org/abs/1608.06777 (2016)
    3. SPIKE - https://github.com/spike-project/spike
    @@ -1728,39 +1975,46 @@
    References
    -
    448    def run(self):
    -449        """ Run the calibration routine
    -450        
    -451        This function runs the calibration routine.
    -452        
    -453        """
    -454        calib_ppm_error_threshold = self.mass_spectrum.settings.calib_sn_threshold
    -455        max_calib_ppm_error = self.mass_spectrum.settings.max_calib_ppm_error
    -456        min_calib_ppm_error = self.mass_spectrum.settings.min_calib_ppm_error
    -457        calib_pol_order = self.mass_spectrum.settings.calib_pol_order
    -458        calibration_ref_match_method = self.mass_spectrum.settings.calibration_ref_match_method
    -459        calibration_ref_match_tolerance = self.mass_spectrum.settings.calibration_ref_match_tolerance
    -460        calibration_ref_match_std_raw_error_limit = self.mass_spectrum.settings.calibration_ref_match_std_raw_error_limit
    -461
    -462        # load reference mass list
    -463        df_ref = self.load_ref_mass_list()
    -464
    -465        # find calibration points
    -466        cal_peaks_mz, cal_refs_mz = self.find_calibration_points(df_ref,
    -467                                                       calib_ppm_error_threshold=(min_calib_ppm_error,
    -468                                                                                  max_calib_ppm_error),
    -469                                                       calib_snr_threshold=calib_ppm_error_threshold,
    -470                                                       calibration_ref_match_method = calibration_ref_match_method,
    -471                                                       calibration_ref_match_tolerance = calibration_ref_match_tolerance,
    -472                                                       calibration_ref_match_std_raw_error_limit = calibration_ref_match_std_raw_error_limit)
    -473        if len(cal_peaks_mz)==2:
    -474            self.mass_spectrum.settings.calib_pol_order = 1
    -475            calib_pol_order = 1
    -476            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -477                print('Only 2 calibration points found, forcing a linear recalibration')
    -478        elif len(cal_peaks_mz)<2:
    -479            warnings.warn('Too few calibration points found, function will fail')
    -480        self.recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=calib_pol_order)
    +            
    523    def run(self):
    +524        """Run the calibration routine
    +525
    +526        This function runs the calibration routine.
    +527
    +528        """
    +529        calib_ppm_error_threshold = self.mass_spectrum.settings.calib_sn_threshold
    +530        max_calib_ppm_error = self.mass_spectrum.settings.max_calib_ppm_error
    +531        min_calib_ppm_error = self.mass_spectrum.settings.min_calib_ppm_error
    +532        calib_pol_order = self.mass_spectrum.settings.calib_pol_order
    +533        calibration_ref_match_method = (
    +534            self.mass_spectrum.settings.calibration_ref_match_method
    +535        )
    +536        calibration_ref_match_tolerance = (
    +537            self.mass_spectrum.settings.calibration_ref_match_tolerance
    +538        )
    +539        calibration_ref_match_std_raw_error_limit = (
    +540            self.mass_spectrum.settings.calibration_ref_match_std_raw_error_limit
    +541        )
    +542
    +543        # load reference mass list
    +544        df_ref = self.load_ref_mass_list()
    +545
    +546        # find calibration points
    +547        cal_peaks_mz, cal_refs_mz = self.find_calibration_points(
    +548            df_ref,
    +549            calib_ppm_error_threshold=(min_calib_ppm_error, max_calib_ppm_error),
    +550            calib_snr_threshold=calib_ppm_error_threshold,
    +551            calibration_ref_match_method=calibration_ref_match_method,
    +552            calibration_ref_match_tolerance=calibration_ref_match_tolerance,
    +553            calibration_ref_match_std_raw_error_limit=calibration_ref_match_std_raw_error_limit,
    +554        )
    +555        if len(cal_peaks_mz) == 2:
    +556            self.mass_spectrum.settings.calib_pol_order = 1
    +557            calib_pol_order = 1
    +558            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +559                print("Only 2 calibration points found, forcing a linear recalibration")
    +560        elif len(cal_peaks_mz) < 2:
    +561            warnings.warn("Too few calibration points found, function will fail")
    +562        self.recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=calib_pol_order)
     
    diff --git a/docs/corems/mass_spectrum/calc/CalibrationCalc.html b/docs/corems/mass_spectrum/calc/CalibrationCalc.html index 7ee554ae..ddf1dae9 100644 --- a/docs/corems/mass_spectrum/calc/CalibrationCalc.html +++ b/docs/corems/mass_spectrum/calc/CalibrationCalc.html @@ -95,48 +95,48 @@

      1import numpy as np
       2
    -  3class FreqDomain_Calibration:
    -  4    """ Frequency Domain Calibration class for mass spectrum.
    -  5
    -  6    Parameters
    -  7    ----------
    -  8    mass_spectrum : MassSpectrum
    -  9        The mass spectrum object.
    - 10    selected_mass_peaks : list
    - 11        List of selected mass peaks.
    - 12    include_isotopologue : bool, optional
    - 13        Flag to include isotopologues, by default False.
    - 14
    - 15    Attributes
    - 16    ----------
    - 17    mz_exp : ndarray
    - 18        Array of experimental m/z values.
    - 19    mz_calc : ndarray
    - 20        Array of calculated m/z values.
    - 21    freq_exp : ndarray
    - 22        Array of experimental frequencies.
    - 23    mass_spectrum : MassSpectrum
    - 24        The mass spectrum object.
    - 25    freq_exp_ms : ndarray
    - 26        Array of experimental frequencies for mass spectrum.
    - 27
    - 28    Methods
    - 29    -------
    - 30    * recal_mass_spec(mz_domain, Aterm, Bterm, Cterm). 
    - 31        Recalibrate the mass spectrum with the given parameters.  
    - 32    * linear(). 
    - 33        Perform linear calibration.  
    - 34    * quadratic(iteration=False). 
    - 35        Perform quadratic calibration.  
    - 36    * ledford_calibration(iteration=False). 
    - 37        Perform Ledford calibration.  
    - 38    * step_fit(steps=4).   
    - 39        Perform step fit calibration.  
    - 40
    - 41    """
    - 42
    - 43    def __init__(self, mass_spectrum, selected_mass_peaks, include_isotopologue=False):
    - 44        
    +  3
    +  4class FreqDomain_Calibration:
    +  5    """Frequency Domain Calibration class for mass spectrum.
    +  6
    +  7    Parameters
    +  8    ----------
    +  9    mass_spectrum : MassSpectrum
    + 10        The mass spectrum object.
    + 11    selected_mass_peaks : list
    + 12        List of selected mass peaks.
    + 13    include_isotopologue : bool, optional
    + 14        Flag to include isotopologues, by default False.
    + 15
    + 16    Attributes
    + 17    ----------
    + 18    mz_exp : ndarray
    + 19        Array of experimental m/z values.
    + 20    mz_calc : ndarray
    + 21        Array of calculated m/z values.
    + 22    freq_exp : ndarray
    + 23        Array of experimental frequencies.
    + 24    mass_spectrum : MassSpectrum
    + 25        The mass spectrum object.
    + 26    freq_exp_ms : ndarray
    + 27        Array of experimental frequencies for mass spectrum.
    + 28
    + 29    Methods
    + 30    -------
    + 31    * recal_mass_spec(mz_domain, Aterm, Bterm, Cterm).
    + 32        Recalibrate the mass spectrum with the given parameters.
    + 33    * linear().
    + 34        Perform linear calibration.
    + 35    * quadratic(iteration=False).
    + 36        Perform quadratic calibration.
    + 37    * ledford_calibration(iteration=False).
    + 38        Perform Ledford calibration.
    + 39    * step_fit(steps=4).
    + 40        Perform step fit calibration.
    + 41
    + 42    """
    + 43
    + 44    def __init__(self, mass_spectrum, selected_mass_peaks, include_isotopologue=False):
      45        self.selected_mspeaks = selected_mass_peaks
      46        error = list()
      47        freq_exp = list()
    @@ -144,198 +144,208 @@ 

    49 mz_exp = list() 50 51 for mspeak in selected_mass_peaks: - 52 - 53 if not include_isotopologue: - 54 molecular_formulas = [ - 55 formula for formula in mspeak if not formula.is_isotopologue] + 52 if not include_isotopologue: + 53 molecular_formulas = [ + 54 formula for formula in mspeak if not formula.is_isotopologue + 55 ] 56 else: 57 molecular_formulas = mspeak 58 59 for molecular_formula in molecular_formulas: - 60 - 61 freq_exp.append(mspeak.freq_exp) - 62 error.append( - 63 molecular_formula.mz_error) - 64 mz_calc.append(molecular_formula.mz_calc) - 65 mz_exp.append(mspeak.mz_exp) - 66 - 67 self.mz_exp = np.array(mz_exp) - 68 self.mz_calc = np.array(mz_calc) - 69 self.freq_exp = np.array(freq_exp) - 70 self.mass_spectrum = mass_spectrum - 71 self.freq_exp_ms = np.array( - 72 [mspeak.freq_exp for mspeak in mass_spectrum]) + 60 freq_exp.append(mspeak.freq_exp) + 61 error.append(molecular_formula.mz_error) + 62 mz_calc.append(molecular_formula.mz_calc) + 63 mz_exp.append(mspeak.mz_exp) + 64 + 65 self.mz_exp = np.array(mz_exp) + 66 self.mz_calc = np.array(mz_calc) + 67 self.freq_exp = np.array(freq_exp) + 68 self.mass_spectrum = mass_spectrum + 69 self.freq_exp_ms = np.array([mspeak.freq_exp for mspeak in mass_spectrum]) + 70 + 71 def recal_mass_spec(self, mz_domain, Aterm, Bterm, Cterm): + 72 """Recalibrate the mass spectrum with the given parameters. 73 - 74 def recal_mass_spec(self, mz_domain, Aterm, Bterm, Cterm): - 75 """ Recalibrate the mass spectrum with the given parameters. - 76 - 77 Parameters - 78 ---------- - 79 mz_domain : ndarray - 80 Array of m/z values for recalibration. - 81 Aterm : float - 82 Aterm parameter for recalibration. - 83 Bterm : float - 84 Bterm parameter for recalibration. - 85 Cterm : float - 86 Cterm parameter for recalibration. - 87 - 88 """ - 89 self.mass_spectrum._calibration_terms = (Aterm, Bterm, 0) - 90 self.mass_spectrum.mz_cal = mz_domain - 91 - 92 def linear(self): - 93 """ Perform linear calibration. - 94 - 95 """ - 96 matrix = np.vstack([1/self.freq_exp, np.ones(len(self.freq_exp))]).T - 97 Aterm, Bterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0] - 98 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: - 99 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) -100 print('Linear Calibration %.2f Aterm, %.2f Bterm ' %(Aterm, Bterm)) -101 mz_domain = (Aterm/self.freq_exp_ms) + Bterm -102 self.recal_mass_spec(mz_domain, Aterm, Bterm, 0) -103 -104 def quadratic(self, iteration : bool=False): -105 """ Perform quadratic calibration. + 74 Parameters + 75 ---------- + 76 mz_domain : ndarray + 77 Array of m/z values for recalibration. + 78 Aterm : float + 79 Aterm parameter for recalibration. + 80 Bterm : float + 81 Bterm parameter for recalibration. + 82 Cterm : float + 83 Cterm parameter for recalibration. + 84 + 85 """ + 86 self.mass_spectrum._calibration_terms = (Aterm, Bterm, 0) + 87 self.mass_spectrum.mz_cal = mz_domain + 88 + 89 def linear(self): + 90 """Perform linear calibration.""" + 91 matrix = np.vstack([1 / self.freq_exp, np.ones(len(self.freq_exp))]).T + 92 Aterm, Bterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0] + 93 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: + 94 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) + 95 print("Linear Calibration %.2f Aterm, %.2f Bterm " % (Aterm, Bterm)) + 96 mz_domain = (Aterm / self.freq_exp_ms) + Bterm + 97 self.recal_mass_spec(mz_domain, Aterm, Bterm, 0) + 98 + 99 def quadratic(self, iteration: bool = False): +100 """Perform quadratic calibration. +101 +102 Parameters +103 ---------- +104 iteration : bool, optional +105 Flag to perform iterative calibration, by default False. 106 -107 Parameters -108 ---------- -109 iteration : bool, optional -110 Flag to perform iterative calibration, by default False. +107 """ +108 mz_calc = self.mz_calc +109 freq_exp = self.freq_exp +110 mz_exp = self.mz_exp 111 -112 """ -113 mz_calc = self.mz_calc -114 freq_exp = self.freq_exp -115 mz_exp = self.mz_exp -116 -117 error = ((mz_exp-mz_calc)/mz_calc) * 1000000 -118 last_rms = np.sqrt(np.mean(error**2)) -119 while True: -120 -121 matrix = np.vstack( -122 [1/freq_exp, 1/np.power(freq_exp, 2), np.ones(len(freq_exp))]).T -123 Aterm, Bterm, Cterm = np.linalg.lstsq( -124 matrix, self.mz_calc, rcond=None)[0] -125 mz_exp = (Aterm / (freq_exp)) + \ -126 (Bterm / np.power((freq_exp), 2)) + Cterm -127 error = ((mz_exp-mz_calc)/mz_calc)*1000000 -128 rms = np.sqrt(np.mean(error**2)) -129 std = np.std(error) -130 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -131 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) -132 print('Quadratic Calibration %.2f RMS, %.2f std, %.2f Aterm, %.2f Bterm ' %(rms, std, Aterm, Bterm)) -133 if rms < last_rms: -134 last_rms = rms -135 freq_exp = (Aterm + np.sqrt(np.power(-Aterm, 2) - -136 (4*Cterm*(mz_exp-Bterm)))) / (2*mz_exp) -137 -138 mz_domain = (Aterm / (self.freq_exp_ms)) + \ -139 (Bterm / np.power((self.freq_exp_ms), 2)) + Cterm -140 self.recal_mass_spec(mz_domain, Aterm, Bterm, Cterm) -141 if not iteration: -142 break -143 else: -144 break -145 -146 def ledford_calibration(self, iteration : bool=False): -147 """ Perform Ledford calibration. -148 -149 Parameters -150 ---------- -151 iteration : bool, optional -152 Flag to perform iterative calibration, by default False. -153 -154 """ -155 mz_calc = self.mz_calc -156 freq_exp = self.freq_exp -157 mz_exp = self.mz_exp -158 -159 error = ((mz_exp-self.mz_calc)/self.mz_calc) * 1000000 -160 last_rms = np.sqrt(np.mean(error**2)) -161 while True: -162 -163 matrix = np.vstack([1/freq_exp, 1/np.power(freq_exp, 2)]).T +112 error = ((mz_exp - mz_calc) / mz_calc) * 1000000 +113 last_rms = np.sqrt(np.mean(error**2)) +114 while True: +115 matrix = np.vstack( +116 [1 / freq_exp, 1 / np.power(freq_exp, 2), np.ones(len(freq_exp))] +117 ).T +118 Aterm, Bterm, Cterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0] +119 mz_exp = (Aterm / (freq_exp)) + (Bterm / np.power((freq_exp), 2)) + Cterm +120 error = ((mz_exp - mz_calc) / mz_calc) * 1000000 +121 rms = np.sqrt(np.mean(error**2)) +122 std = np.std(error) +123 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: +124 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) +125 print( +126 "Quadratic Calibration %.2f RMS, %.2f std, %.2f Aterm, %.2f Bterm " +127 % (rms, std, Aterm, Bterm) +128 ) +129 if rms < last_rms: +130 last_rms = rms +131 freq_exp = ( +132 Aterm +133 + np.sqrt(np.power(-Aterm, 2) - (4 * Cterm * (mz_exp - Bterm))) +134 ) / (2 * mz_exp) +135 +136 mz_domain = ( +137 (Aterm / (self.freq_exp_ms)) +138 + (Bterm / np.power((self.freq_exp_ms), 2)) +139 + Cterm +140 ) +141 self.recal_mass_spec(mz_domain, Aterm, Bterm, Cterm) +142 if not iteration: +143 break +144 else: +145 break +146 +147 def ledford_calibration(self, iteration: bool = False): +148 """Perform Ledford calibration. +149 +150 Parameters +151 ---------- +152 iteration : bool, optional +153 Flag to perform iterative calibration, by default False. +154 +155 """ +156 mz_calc = self.mz_calc +157 freq_exp = self.freq_exp +158 mz_exp = self.mz_exp +159 +160 error = ((mz_exp - self.mz_calc) / self.mz_calc) * 1000000 +161 last_rms = np.sqrt(np.mean(error**2)) +162 while True: +163 matrix = np.vstack([1 / freq_exp, 1 / np.power(freq_exp, 2)]).T 164 Aterm, Bterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0] 165 166 mz_exp = (Aterm / (freq_exp)) + (Bterm / np.power((freq_exp), 2)) -167 error = ((mz_exp-mz_calc)/mz_calc)*1000000 +167 error = ((mz_exp - mz_calc) / mz_calc) * 1000000 168 rms = np.sqrt(np.mean(error**2)) 169 std = np.std(error) 170 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -171 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) -172 print('Ledford Calibration %.2f RMS, %.2f std, %.2f Aterm, %.2f Bterm ' %(rms, std, Aterm, Bterm)) -173 if rms < last_rms: -174 last_rms = rms -175 freq_exp = (Aterm + np.sqrt(np.power(-Aterm, 2) - -176 (4*mz_exp-Bterm))) / (2*mz_exp) -177 mz_domain = (Aterm / (self.freq_exp_ms)) + \ -178 (Bterm / np.power((self.freq_exp_ms), 2)) -179 self.recal_mass_spec(mz_domain, Aterm, Bterm, 0) -180 if not iteration: -181 break -182 else: -183 break -184 -185 def step_fit(self, steps : int=4): -186 """ Perform step fit calibration. -187 -188 Parameters -189 ---------- -190 steps : int, optional -191 Number of steps for step fit calibration, by default 4. +171 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) +172 print( +173 "Ledford Calibration %.2f RMS, %.2f std, %.2f Aterm, %.2f Bterm " +174 % (rms, std, Aterm, Bterm) +175 ) +176 if rms < last_rms: +177 last_rms = rms +178 freq_exp = ( +179 Aterm + np.sqrt(np.power(-Aterm, 2) - (4 * mz_exp - Bterm)) +180 ) / (2 * mz_exp) +181 mz_domain = (Aterm / (self.freq_exp_ms)) + ( +182 Bterm / np.power((self.freq_exp_ms), 2) +183 ) +184 self.recal_mass_spec(mz_domain, Aterm, Bterm, 0) +185 if not iteration: +186 break +187 else: +188 break +189 +190 def step_fit(self, steps: int = 4): +191 """Perform step fit calibration. 192 -193 """ -194 def f_to_mz(f, A, B, C, a): -195 return (A / f) + (B / np.power(f, 2)) + (C*a / np.power(f, 2)) -196 -197 def mz_to_f(m, A, B, C): return (-A-m/B) -198 -199 tuple_indexes = [(i, i+steps) for i in range(0, len(self.selected_mspeaks)-steps, steps)] -200 -201 for current_index, tuple_index in enumerate(tuple_indexes): -202 -203 mspeak_ii, mspeak_fi = tuple_index -204 freq_exp = list() -205 mz_calc = list() -206 mz_exp = list() -207 abu = list() -208 -209 for i in range(mspeak_ii, mspeak_fi+1): -210 -211 best_formula = self.selected_mspeaks[i].best_molecular_formula_candidate -212 -213 freq_exp.append(self.selected_mspeaks[i].freq_exp) -214 mz_calc.append(best_formula.mz_calc) -215 mz_exp.append(self.selected_mspeaks[i].mz_exp) -216 abu.append(self.selected_mspeaks[i].abundance) -217 -218 -219 freq_exp = np.array(freq_exp) -220 mz_calc = np.array(mz_calc) -221 mz_exp = np.array(mz_exp) -222 abu = np.array(abu) -223 -224 if current_index == len(tuple_indexes)-1: -225 ms_peaks_indexes = (self.selected_mspeaks[mspeak_ii].index, 0) -226 -227 elif current_index == 0: -228 ms_peaks_indexes = (len(self.mass_spectrum)-1, -229 self.selected_mspeaks[mspeak_fi].index-1) -230 else: -231 ms_peaks_indexes = ( -232 self.selected_mspeaks[mspeak_ii].index, self.selected_mspeaks[mspeak_fi].index-1) -233 -234 final_index, start_index = ms_peaks_indexes -235 -236 matrix = np.vstack([1/freq_exp, 1/np.power(freq_exp, 2)]).T -237 A, B = np.linalg.lstsq(matrix, mz_calc, rcond=None)[0] -238 C = 0 -239 -240 for mspeak in self.mass_spectrum[start_index:final_index]: -241 mspeak.mz_cal = f_to_mz(mspeak.freq_exp, A, B, C, 0) -242 -243 self.mass_spectrum.is_calibrated = True +193 Parameters +194 ---------- +195 steps : int, optional +196 Number of steps for step fit calibration, by default 4. +197 +198 """ +199 +200 def f_to_mz(f, A, B, C, a): +201 return (A / f) + (B / np.power(f, 2)) + (C * a / np.power(f, 2)) +202 +203 def mz_to_f(m, A, B, C): +204 return -A - m / B +205 +206 tuple_indexes = [ +207 (i, i + steps) for i in range(0, len(self.selected_mspeaks) - steps, steps) +208 ] +209 +210 for current_index, tuple_index in enumerate(tuple_indexes): +211 mspeak_ii, mspeak_fi = tuple_index +212 freq_exp = list() +213 mz_calc = list() +214 mz_exp = list() +215 abu = list() +216 +217 for i in range(mspeak_ii, mspeak_fi + 1): +218 best_formula = self.selected_mspeaks[i].best_molecular_formula_candidate +219 +220 freq_exp.append(self.selected_mspeaks[i].freq_exp) +221 mz_calc.append(best_formula.mz_calc) +222 mz_exp.append(self.selected_mspeaks[i].mz_exp) +223 abu.append(self.selected_mspeaks[i].abundance) +224 +225 freq_exp = np.array(freq_exp) +226 mz_calc = np.array(mz_calc) +227 mz_exp = np.array(mz_exp) +228 abu = np.array(abu) +229 +230 if current_index == len(tuple_indexes) - 1: +231 ms_peaks_indexes = (self.selected_mspeaks[mspeak_ii].index, 0) +232 +233 elif current_index == 0: +234 ms_peaks_indexes = ( +235 len(self.mass_spectrum) - 1, +236 self.selected_mspeaks[mspeak_fi].index - 1, +237 ) +238 else: +239 ms_peaks_indexes = ( +240 self.selected_mspeaks[mspeak_ii].index, +241 self.selected_mspeaks[mspeak_fi].index - 1, +242 ) +243 +244 final_index, start_index = ms_peaks_indexes +245 +246 matrix = np.vstack([1 / freq_exp, 1 / np.power(freq_exp, 2)]).T +247 A, B = np.linalg.lstsq(matrix, mz_calc, rcond=None)[0] +248 C = 0 +249 +250 for mspeak in self.mass_spectrum[start_index:final_index]: +251 mspeak.mz_cal = f_to_mz(mspeak.freq_exp, A, B, C, 0) +252 +253 self.mass_spectrum.is_calibrated = True

    @@ -351,48 +361,47 @@

    -
      4class FreqDomain_Calibration:
    -  5    """ Frequency Domain Calibration class for mass spectrum.
    -  6
    -  7    Parameters
    -  8    ----------
    -  9    mass_spectrum : MassSpectrum
    - 10        The mass spectrum object.
    - 11    selected_mass_peaks : list
    - 12        List of selected mass peaks.
    - 13    include_isotopologue : bool, optional
    - 14        Flag to include isotopologues, by default False.
    - 15
    - 16    Attributes
    - 17    ----------
    - 18    mz_exp : ndarray
    - 19        Array of experimental m/z values.
    - 20    mz_calc : ndarray
    - 21        Array of calculated m/z values.
    - 22    freq_exp : ndarray
    - 23        Array of experimental frequencies.
    - 24    mass_spectrum : MassSpectrum
    - 25        The mass spectrum object.
    - 26    freq_exp_ms : ndarray
    - 27        Array of experimental frequencies for mass spectrum.
    - 28
    - 29    Methods
    - 30    -------
    - 31    * recal_mass_spec(mz_domain, Aterm, Bterm, Cterm). 
    - 32        Recalibrate the mass spectrum with the given parameters.  
    - 33    * linear(). 
    - 34        Perform linear calibration.  
    - 35    * quadratic(iteration=False). 
    - 36        Perform quadratic calibration.  
    - 37    * ledford_calibration(iteration=False). 
    - 38        Perform Ledford calibration.  
    - 39    * step_fit(steps=4).   
    - 40        Perform step fit calibration.  
    - 41
    - 42    """
    - 43
    - 44    def __init__(self, mass_spectrum, selected_mass_peaks, include_isotopologue=False):
    - 45        
    +            
      5class FreqDomain_Calibration:
    +  6    """Frequency Domain Calibration class for mass spectrum.
    +  7
    +  8    Parameters
    +  9    ----------
    + 10    mass_spectrum : MassSpectrum
    + 11        The mass spectrum object.
    + 12    selected_mass_peaks : list
    + 13        List of selected mass peaks.
    + 14    include_isotopologue : bool, optional
    + 15        Flag to include isotopologues, by default False.
    + 16
    + 17    Attributes
    + 18    ----------
    + 19    mz_exp : ndarray
    + 20        Array of experimental m/z values.
    + 21    mz_calc : ndarray
    + 22        Array of calculated m/z values.
    + 23    freq_exp : ndarray
    + 24        Array of experimental frequencies.
    + 25    mass_spectrum : MassSpectrum
    + 26        The mass spectrum object.
    + 27    freq_exp_ms : ndarray
    + 28        Array of experimental frequencies for mass spectrum.
    + 29
    + 30    Methods
    + 31    -------
    + 32    * recal_mass_spec(mz_domain, Aterm, Bterm, Cterm).
    + 33        Recalibrate the mass spectrum with the given parameters.
    + 34    * linear().
    + 35        Perform linear calibration.
    + 36    * quadratic(iteration=False).
    + 37        Perform quadratic calibration.
    + 38    * ledford_calibration(iteration=False).
    + 39        Perform Ledford calibration.
    + 40    * step_fit(steps=4).
    + 41        Perform step fit calibration.
    + 42
    + 43    """
    + 44
    + 45    def __init__(self, mass_spectrum, selected_mass_peaks, include_isotopologue=False):
      46        self.selected_mspeaks = selected_mass_peaks
      47        error = list()
      48        freq_exp = list()
    @@ -400,198 +409,208 @@ 

    50 mz_exp = list() 51 52 for mspeak in selected_mass_peaks: - 53 - 54 if not include_isotopologue: - 55 molecular_formulas = [ - 56 formula for formula in mspeak if not formula.is_isotopologue] + 53 if not include_isotopologue: + 54 molecular_formulas = [ + 55 formula for formula in mspeak if not formula.is_isotopologue + 56 ] 57 else: 58 molecular_formulas = mspeak 59 60 for molecular_formula in molecular_formulas: - 61 - 62 freq_exp.append(mspeak.freq_exp) - 63 error.append( - 64 molecular_formula.mz_error) - 65 mz_calc.append(molecular_formula.mz_calc) - 66 mz_exp.append(mspeak.mz_exp) - 67 - 68 self.mz_exp = np.array(mz_exp) - 69 self.mz_calc = np.array(mz_calc) - 70 self.freq_exp = np.array(freq_exp) - 71 self.mass_spectrum = mass_spectrum - 72 self.freq_exp_ms = np.array( - 73 [mspeak.freq_exp for mspeak in mass_spectrum]) + 61 freq_exp.append(mspeak.freq_exp) + 62 error.append(molecular_formula.mz_error) + 63 mz_calc.append(molecular_formula.mz_calc) + 64 mz_exp.append(mspeak.mz_exp) + 65 + 66 self.mz_exp = np.array(mz_exp) + 67 self.mz_calc = np.array(mz_calc) + 68 self.freq_exp = np.array(freq_exp) + 69 self.mass_spectrum = mass_spectrum + 70 self.freq_exp_ms = np.array([mspeak.freq_exp for mspeak in mass_spectrum]) + 71 + 72 def recal_mass_spec(self, mz_domain, Aterm, Bterm, Cterm): + 73 """Recalibrate the mass spectrum with the given parameters. 74 - 75 def recal_mass_spec(self, mz_domain, Aterm, Bterm, Cterm): - 76 """ Recalibrate the mass spectrum with the given parameters. - 77 - 78 Parameters - 79 ---------- - 80 mz_domain : ndarray - 81 Array of m/z values for recalibration. - 82 Aterm : float - 83 Aterm parameter for recalibration. - 84 Bterm : float - 85 Bterm parameter for recalibration. - 86 Cterm : float - 87 Cterm parameter for recalibration. - 88 - 89 """ - 90 self.mass_spectrum._calibration_terms = (Aterm, Bterm, 0) - 91 self.mass_spectrum.mz_cal = mz_domain - 92 - 93 def linear(self): - 94 """ Perform linear calibration. - 95 - 96 """ - 97 matrix = np.vstack([1/self.freq_exp, np.ones(len(self.freq_exp))]).T - 98 Aterm, Bterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0] - 99 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -100 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) -101 print('Linear Calibration %.2f Aterm, %.2f Bterm ' %(Aterm, Bterm)) -102 mz_domain = (Aterm/self.freq_exp_ms) + Bterm -103 self.recal_mass_spec(mz_domain, Aterm, Bterm, 0) -104 -105 def quadratic(self, iteration : bool=False): -106 """ Perform quadratic calibration. + 75 Parameters + 76 ---------- + 77 mz_domain : ndarray + 78 Array of m/z values for recalibration. + 79 Aterm : float + 80 Aterm parameter for recalibration. + 81 Bterm : float + 82 Bterm parameter for recalibration. + 83 Cterm : float + 84 Cterm parameter for recalibration. + 85 + 86 """ + 87 self.mass_spectrum._calibration_terms = (Aterm, Bterm, 0) + 88 self.mass_spectrum.mz_cal = mz_domain + 89 + 90 def linear(self): + 91 """Perform linear calibration.""" + 92 matrix = np.vstack([1 / self.freq_exp, np.ones(len(self.freq_exp))]).T + 93 Aterm, Bterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0] + 94 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: + 95 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) + 96 print("Linear Calibration %.2f Aterm, %.2f Bterm " % (Aterm, Bterm)) + 97 mz_domain = (Aterm / self.freq_exp_ms) + Bterm + 98 self.recal_mass_spec(mz_domain, Aterm, Bterm, 0) + 99 +100 def quadratic(self, iteration: bool = False): +101 """Perform quadratic calibration. +102 +103 Parameters +104 ---------- +105 iteration : bool, optional +106 Flag to perform iterative calibration, by default False. 107 -108 Parameters -109 ---------- -110 iteration : bool, optional -111 Flag to perform iterative calibration, by default False. +108 """ +109 mz_calc = self.mz_calc +110 freq_exp = self.freq_exp +111 mz_exp = self.mz_exp 112 -113 """ -114 mz_calc = self.mz_calc -115 freq_exp = self.freq_exp -116 mz_exp = self.mz_exp -117 -118 error = ((mz_exp-mz_calc)/mz_calc) * 1000000 -119 last_rms = np.sqrt(np.mean(error**2)) -120 while True: -121 -122 matrix = np.vstack( -123 [1/freq_exp, 1/np.power(freq_exp, 2), np.ones(len(freq_exp))]).T -124 Aterm, Bterm, Cterm = np.linalg.lstsq( -125 matrix, self.mz_calc, rcond=None)[0] -126 mz_exp = (Aterm / (freq_exp)) + \ -127 (Bterm / np.power((freq_exp), 2)) + Cterm -128 error = ((mz_exp-mz_calc)/mz_calc)*1000000 -129 rms = np.sqrt(np.mean(error**2)) -130 std = np.std(error) -131 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -132 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) -133 print('Quadratic Calibration %.2f RMS, %.2f std, %.2f Aterm, %.2f Bterm ' %(rms, std, Aterm, Bterm)) -134 if rms < last_rms: -135 last_rms = rms -136 freq_exp = (Aterm + np.sqrt(np.power(-Aterm, 2) - -137 (4*Cterm*(mz_exp-Bterm)))) / (2*mz_exp) -138 -139 mz_domain = (Aterm / (self.freq_exp_ms)) + \ -140 (Bterm / np.power((self.freq_exp_ms), 2)) + Cterm -141 self.recal_mass_spec(mz_domain, Aterm, Bterm, Cterm) -142 if not iteration: -143 break -144 else: -145 break -146 -147 def ledford_calibration(self, iteration : bool=False): -148 """ Perform Ledford calibration. -149 -150 Parameters -151 ---------- -152 iteration : bool, optional -153 Flag to perform iterative calibration, by default False. -154 -155 """ -156 mz_calc = self.mz_calc -157 freq_exp = self.freq_exp -158 mz_exp = self.mz_exp -159 -160 error = ((mz_exp-self.mz_calc)/self.mz_calc) * 1000000 -161 last_rms = np.sqrt(np.mean(error**2)) -162 while True: -163 -164 matrix = np.vstack([1/freq_exp, 1/np.power(freq_exp, 2)]).T +113 error = ((mz_exp - mz_calc) / mz_calc) * 1000000 +114 last_rms = np.sqrt(np.mean(error**2)) +115 while True: +116 matrix = np.vstack( +117 [1 / freq_exp, 1 / np.power(freq_exp, 2), np.ones(len(freq_exp))] +118 ).T +119 Aterm, Bterm, Cterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0] +120 mz_exp = (Aterm / (freq_exp)) + (Bterm / np.power((freq_exp), 2)) + Cterm +121 error = ((mz_exp - mz_calc) / mz_calc) * 1000000 +122 rms = np.sqrt(np.mean(error**2)) +123 std = np.std(error) +124 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: +125 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) +126 print( +127 "Quadratic Calibration %.2f RMS, %.2f std, %.2f Aterm, %.2f Bterm " +128 % (rms, std, Aterm, Bterm) +129 ) +130 if rms < last_rms: +131 last_rms = rms +132 freq_exp = ( +133 Aterm +134 + np.sqrt(np.power(-Aterm, 2) - (4 * Cterm * (mz_exp - Bterm))) +135 ) / (2 * mz_exp) +136 +137 mz_domain = ( +138 (Aterm / (self.freq_exp_ms)) +139 + (Bterm / np.power((self.freq_exp_ms), 2)) +140 + Cterm +141 ) +142 self.recal_mass_spec(mz_domain, Aterm, Bterm, Cterm) +143 if not iteration: +144 break +145 else: +146 break +147 +148 def ledford_calibration(self, iteration: bool = False): +149 """Perform Ledford calibration. +150 +151 Parameters +152 ---------- +153 iteration : bool, optional +154 Flag to perform iterative calibration, by default False. +155 +156 """ +157 mz_calc = self.mz_calc +158 freq_exp = self.freq_exp +159 mz_exp = self.mz_exp +160 +161 error = ((mz_exp - self.mz_calc) / self.mz_calc) * 1000000 +162 last_rms = np.sqrt(np.mean(error**2)) +163 while True: +164 matrix = np.vstack([1 / freq_exp, 1 / np.power(freq_exp, 2)]).T 165 Aterm, Bterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0] 166 167 mz_exp = (Aterm / (freq_exp)) + (Bterm / np.power((freq_exp), 2)) -168 error = ((mz_exp-mz_calc)/mz_calc)*1000000 +168 error = ((mz_exp - mz_calc) / mz_calc) * 1000000 169 rms = np.sqrt(np.mean(error**2)) 170 std = np.std(error) 171 if self.mass_spectrum.parameters.mass_spectrum.verbose_processing: -172 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) -173 print('Ledford Calibration %.2f RMS, %.2f std, %.2f Aterm, %.2f Bterm ' %(rms, std, Aterm, Bterm)) -174 if rms < last_rms: -175 last_rms = rms -176 freq_exp = (Aterm + np.sqrt(np.power(-Aterm, 2) - -177 (4*mz_exp-Bterm))) / (2*mz_exp) -178 mz_domain = (Aterm / (self.freq_exp_ms)) + \ -179 (Bterm / np.power((self.freq_exp_ms), 2)) -180 self.recal_mass_spec(mz_domain, Aterm, Bterm, 0) -181 if not iteration: -182 break -183 else: -184 break -185 -186 def step_fit(self, steps : int=4): -187 """ Perform step fit calibration. -188 -189 Parameters -190 ---------- -191 steps : int, optional -192 Number of steps for step fit calibration, by default 4. +172 print("%.2f Aterm, %.2f Bterm" % (Aterm, Bterm)) +173 print( +174 "Ledford Calibration %.2f RMS, %.2f std, %.2f Aterm, %.2f Bterm " +175 % (rms, std, Aterm, Bterm) +176 ) +177 if rms < last_rms: +178 last_rms = rms +179 freq_exp = ( +180 Aterm + np.sqrt(np.power(-Aterm, 2) - (4 * mz_exp - Bterm)) +181 ) / (2 * mz_exp) +182 mz_domain = (Aterm / (self.freq_exp_ms)) + ( +183 Bterm / np.power((self.freq_exp_ms), 2) +184 ) +185 self.recal_mass_spec(mz_domain, Aterm, Bterm, 0) +186 if not iteration: +187 break +188 else: +189 break +190 +191 def step_fit(self, steps: int = 4): +192 """Perform step fit calibration. 193 -194 """ -195 def f_to_mz(f, A, B, C, a): -196 return (A / f) + (B / np.power(f, 2)) + (C*a / np.power(f, 2)) -197 -198 def mz_to_f(m, A, B, C): return (-A-m/B) -199 -200 tuple_indexes = [(i, i+steps) for i in range(0, len(self.selected_mspeaks)-steps, steps)] -201 -202 for current_index, tuple_index in enumerate(tuple_indexes): -203 -204 mspeak_ii, mspeak_fi = tuple_index -205 freq_exp = list() -206 mz_calc = list() -207 mz_exp = list() -208 abu = list() -209 -210 for i in range(mspeak_ii, mspeak_fi+1): -211 -212 best_formula = self.selected_mspeaks[i].best_molecular_formula_candidate -213 -214 freq_exp.append(self.selected_mspeaks[i].freq_exp) -215 mz_calc.append(best_formula.mz_calc) -216 mz_exp.append(self.selected_mspeaks[i].mz_exp) -217 abu.append(self.selected_mspeaks[i].abundance) -218 -219 -220 freq_exp = np.array(freq_exp) -221 mz_calc = np.array(mz_calc) -222 mz_exp = np.array(mz_exp) -223 abu = np.array(abu) -224 -225 if current_index == len(tuple_indexes)-1: -226 ms_peaks_indexes = (self.selected_mspeaks[mspeak_ii].index, 0) -227 -228 elif current_index == 0: -229 ms_peaks_indexes = (len(self.mass_spectrum)-1, -230 self.selected_mspeaks[mspeak_fi].index-1) -231 else: -232 ms_peaks_indexes = ( -233 self.selected_mspeaks[mspeak_ii].index, self.selected_mspeaks[mspeak_fi].index-1) -234 -235 final_index, start_index = ms_peaks_indexes -236 -237 matrix = np.vstack([1/freq_exp, 1/np.power(freq_exp, 2)]).T -238 A, B = np.linalg.lstsq(matrix, mz_calc, rcond=None)[0] -239 C = 0 -240 -241 for mspeak in self.mass_spectrum[start_index:final_index]: -242 mspeak.mz_cal = f_to_mz(mspeak.freq_exp, A, B, C, 0) -243 -244 self.mass_spectrum.is_calibrated = True +194 Parameters +195 ---------- +196 steps : int, optional +197 Number of steps for step fit calibration, by default 4. +198 +199 """ +200 +201 def f_to_mz(f, A, B, C, a): +202 return (A / f) + (B / np.power(f, 2)) + (C * a / np.power(f, 2)) +203 +204 def mz_to_f(m, A, B, C): +205 return -A - m / B +206 +207 tuple_indexes = [ +208 (i, i + steps) for i in range(0, len(self.selected_mspeaks) - steps, steps) +209 ] +210 +211 for current_index, tuple_index in enumerate(tuple_indexes): +212 mspeak_ii, mspeak_fi = tuple_index +213 freq_exp = list() +214 mz_calc = list() +215 mz_exp = list() +216 abu = list() +217 +218 for i in range(mspeak_ii, mspeak_fi + 1): +219 best_formula = self.selected_mspeaks[i].best_molecular_formula_candidate +220 +221 freq_exp.append(self.selected_mspeaks[i].freq_exp) +222 mz_calc.append(best_formula.mz_calc) +223 mz_exp.append(self.selected_mspeaks[i].mz_exp) +224 abu.append(self.selected_mspeaks[i].abundance) +225 +226 freq_exp = np.array(freq_exp) +227 mz_calc = np.array(mz_calc) +228 mz_exp = np.array(mz_exp) +229 abu = np.array(abu) +230 +231 if current_index == len(tuple_indexes) - 1: +232 ms_peaks_indexes = (self.selected_mspeaks[mspeak_ii].index, 0) +233 +234 elif current_index == 0: +235 ms_peaks_indexes = ( +236 len(self.mass_spectrum) - 1, +237 self.selected_mspeaks[mspeak_fi].index - 1, +238 ) +239 else: +240 ms_peaks_indexes = ( +241 self.selected_mspeaks[mspeak_ii].index, +242 self.selected_mspeaks[mspeak_fi].index - 1, +243 ) +244 +245 final_index, start_index = ms_peaks_indexes +246 +247 matrix = np.vstack([1 / freq_exp, 1 / np.power(freq_exp, 2)]).T +248 A, B = np.linalg.lstsq(matrix, mz_calc, rcond=None)[0] +249 C = 0 +250 +251 for mspeak in self.mass_spectrum[start_index:final_index]: +252 mspeak.mz_cal = f_to_mz(mspeak.freq_exp, A, B, C, 0) +253 +254 self.mass_spectrum.is_calibrated = True

    @@ -626,15 +645,15 @@
    Attributes
    Methods
      -
    • recal_mass_spec(mz_domain, Aterm, Bterm, Cterm). -Recalibrate the mass spectrum with the given parameters.
    • -
    • linear(). -Perform linear calibration.
    • -
    • quadratic(iteration=False). -Perform quadratic calibration.
    • -
    • ledford_calibration(iteration=False). -Perform Ledford calibration.
    • -
    • step_fit(steps=4).
      +
    • recal_mass_spec(mz_domain, Aterm, Bterm, Cterm). +Recalibrate the mass spectrum with the given parameters.
    • +
    • linear(). +Perform linear calibration.
    • +
    • quadratic(iteration=False). +Perform quadratic calibration.
    • +
    • ledford_calibration(iteration=False). +Perform Ledford calibration.
    • +
    • step_fit(steps=4). Perform step fit calibration.
    @@ -650,8 +669,7 @@
    Methods
    -
    44    def __init__(self, mass_spectrum, selected_mass_peaks, include_isotopologue=False):
    -45        
    +            
    45    def __init__(self, mass_spectrum, selected_mass_peaks, include_isotopologue=False):
     46        self.selected_mspeaks = selected_mass_peaks
     47        error = list()
     48        freq_exp = list()
    @@ -659,27 +677,24 @@ 
    Methods
    50 mz_exp = list() 51 52 for mspeak in selected_mass_peaks: -53 -54 if not include_isotopologue: -55 molecular_formulas = [ -56 formula for formula in mspeak if not formula.is_isotopologue] +53 if not include_isotopologue: +54 molecular_formulas = [ +55 formula for formula in mspeak if not formula.is_isotopologue +56 ] 57 else: 58 molecular_formulas = mspeak 59 60 for molecular_formula in molecular_formulas: -61 -62 freq_exp.append(mspeak.freq_exp) -63 error.append( -64 molecular_formula.mz_error) -65 mz_calc.append(molecular_formula.mz_calc) -66 mz_exp.append(mspeak.mz_exp) -67 -68 self.mz_exp = np.array(mz_exp) -69 self.mz_calc = np.array(mz_calc) -70 self.freq_exp = np.array(freq_exp) -71 self.mass_spectrum = mass_spectrum -72 self.freq_exp_ms = np.array( -73 [mspeak.freq_exp for mspeak in mass_spectrum]) +61 freq_exp.append(mspeak.freq_exp) +62 error.append(molecular_formula.mz_error) +63 mz_calc.append(molecular_formula.mz_calc) +64 mz_exp.append(mspeak.mz_exp) +65 +66 self.mz_exp = np.array(mz_exp) +67 self.mz_calc = np.array(mz_calc) +68 self.freq_exp = np.array(freq_exp) +69 self.mass_spectrum = mass_spectrum +70 self.freq_exp_ms = np.array([mspeak.freq_exp for mspeak in mass_spectrum])
    @@ -763,23 +778,23 @@
    Methods
    -
    75    def recal_mass_spec(self, mz_domain, Aterm, Bterm, Cterm):
    -76        """ Recalibrate the mass spectrum with the given parameters.
    -77
    -78        Parameters
    -79        ----------
    -80        mz_domain : ndarray
    -81            Array of m/z values for recalibration.
    -82        Aterm : float
    -83            Aterm parameter for recalibration.
    -84        Bterm : float
    -85            Bterm parameter for recalibration.
    -86        Cterm : float
    -87            Cterm parameter for recalibration.
    -88
    -89        """
    -90        self.mass_spectrum._calibration_terms = (Aterm, Bterm, 0)
    -91        self.mass_spectrum.mz_cal = mz_domain
    +            
    72    def recal_mass_spec(self, mz_domain, Aterm, Bterm, Cterm):
    +73        """Recalibrate the mass spectrum with the given parameters.
    +74
    +75        Parameters
    +76        ----------
    +77        mz_domain : ndarray
    +78            Array of m/z values for recalibration.
    +79        Aterm : float
    +80            Aterm parameter for recalibration.
    +81        Bterm : float
    +82            Bterm parameter for recalibration.
    +83        Cterm : float
    +84            Cterm parameter for recalibration.
    +85
    +86        """
    +87        self.mass_spectrum._calibration_terms = (Aterm, Bterm, 0)
    +88        self.mass_spectrum.mz_cal = mz_domain
     
    @@ -812,17 +827,15 @@
    Parameters
    -
     93    def linear(self):
    - 94        """ Perform linear calibration.
    - 95
    - 96        """
    - 97        matrix = np.vstack([1/self.freq_exp, np.ones(len(self.freq_exp))]).T
    - 98        Aterm, Bterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0]
    - 99        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -100            print("%.2f Aterm,  %.2f Bterm" %  (Aterm, Bterm))
    -101            print('Linear Calibration %.2f Aterm,  %.2f Bterm ' %(Aterm, Bterm))
    -102        mz_domain = (Aterm/self.freq_exp_ms) + Bterm
    -103        self.recal_mass_spec(mz_domain, Aterm, Bterm, 0)
    +            
    90    def linear(self):
    +91        """Perform linear calibration."""
    +92        matrix = np.vstack([1 / self.freq_exp, np.ones(len(self.freq_exp))]).T
    +93        Aterm, Bterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0]
    +94        if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +95            print("%.2f Aterm,  %.2f Bterm" % (Aterm, Bterm))
    +96            print("Linear Calibration %.2f Aterm,  %.2f Bterm " % (Aterm, Bterm))
    +97        mz_domain = (Aterm / self.freq_exp_ms) + Bterm
    +98        self.recal_mass_spec(mz_domain, Aterm, Bterm, 0)
     
    @@ -842,47 +855,53 @@
    Parameters
    -
    105    def quadratic(self, iteration : bool=False):
    -106        """ Perform quadratic calibration.
    +            
    100    def quadratic(self, iteration: bool = False):
    +101        """Perform quadratic calibration.
    +102
    +103        Parameters
    +104        ----------
    +105        iteration : bool, optional
    +106            Flag to perform iterative calibration, by default False.
     107
    -108        Parameters
    -109        ----------
    -110        iteration : bool, optional
    -111            Flag to perform iterative calibration, by default False.
    +108        """
    +109        mz_calc = self.mz_calc
    +110        freq_exp = self.freq_exp
    +111        mz_exp = self.mz_exp
     112
    -113        """
    -114        mz_calc = self.mz_calc
    -115        freq_exp = self.freq_exp
    -116        mz_exp = self.mz_exp
    -117
    -118        error = ((mz_exp-mz_calc)/mz_calc) * 1000000
    -119        last_rms = np.sqrt(np.mean(error**2))
    -120        while True:
    -121
    -122            matrix = np.vstack(
    -123                [1/freq_exp, 1/np.power(freq_exp, 2), np.ones(len(freq_exp))]).T
    -124            Aterm, Bterm, Cterm = np.linalg.lstsq(
    -125                matrix, self.mz_calc, rcond=None)[0]
    -126            mz_exp = (Aterm / (freq_exp)) + \
    -127                (Bterm / np.power((freq_exp), 2)) + Cterm
    -128            error = ((mz_exp-mz_calc)/mz_calc)*1000000
    -129            rms = np.sqrt(np.mean(error**2))
    -130            std = np.std(error)
    -131            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -132                print("%.2f Aterm,  %.2f Bterm" %  (Aterm, Bterm))
    -133                print('Quadratic Calibration %.2f RMS,  %.2f std,  %.2f Aterm,  %.2f Bterm ' %(rms, std, Aterm, Bterm))
    -134            if rms < last_rms:
    -135                last_rms = rms
    -136                freq_exp = (Aterm + np.sqrt(np.power(-Aterm, 2) -
    -137                                            (4*Cterm*(mz_exp-Bterm)))) / (2*mz_exp)
    -138
    -139                mz_domain = (Aterm / (self.freq_exp_ms)) + \
    -140                    (Bterm / np.power((self.freq_exp_ms), 2)) + Cterm
    -141                self.recal_mass_spec(mz_domain, Aterm, Bterm, Cterm)
    -142                if not iteration:
    -143                    break
    -144            else:
    -145                break
    +113        error = ((mz_exp - mz_calc) / mz_calc) * 1000000
    +114        last_rms = np.sqrt(np.mean(error**2))
    +115        while True:
    +116            matrix = np.vstack(
    +117                [1 / freq_exp, 1 / np.power(freq_exp, 2), np.ones(len(freq_exp))]
    +118            ).T
    +119            Aterm, Bterm, Cterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0]
    +120            mz_exp = (Aterm / (freq_exp)) + (Bterm / np.power((freq_exp), 2)) + Cterm
    +121            error = ((mz_exp - mz_calc) / mz_calc) * 1000000
    +122            rms = np.sqrt(np.mean(error**2))
    +123            std = np.std(error)
    +124            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    +125                print("%.2f Aterm,  %.2f Bterm" % (Aterm, Bterm))
    +126                print(
    +127                    "Quadratic Calibration %.2f RMS,  %.2f std,  %.2f Aterm,  %.2f Bterm "
    +128                    % (rms, std, Aterm, Bterm)
    +129                )
    +130            if rms < last_rms:
    +131                last_rms = rms
    +132                freq_exp = (
    +133                    Aterm
    +134                    + np.sqrt(np.power(-Aterm, 2) - (4 * Cterm * (mz_exp - Bterm)))
    +135                ) / (2 * mz_exp)
    +136
    +137                mz_domain = (
    +138                    (Aterm / (self.freq_exp_ms))
    +139                    + (Bterm / np.power((self.freq_exp_ms), 2))
    +140                    + Cterm
    +141                )
    +142                self.recal_mass_spec(mz_domain, Aterm, Bterm, Cterm)
    +143                if not iteration:
    +144                    break
    +145            else:
    +146                break
     
    @@ -909,44 +928,48 @@
    Parameters
    -
    147    def ledford_calibration(self, iteration : bool=False):
    -148        """ Perform Ledford calibration.
    -149
    -150        Parameters
    -151        ----------
    -152        iteration : bool, optional
    -153            Flag to perform iterative calibration, by default False.
    -154
    -155        """
    -156        mz_calc = self.mz_calc
    -157        freq_exp = self.freq_exp
    -158        mz_exp = self.mz_exp
    -159
    -160        error = ((mz_exp-self.mz_calc)/self.mz_calc) * 1000000
    -161        last_rms = np.sqrt(np.mean(error**2))
    -162        while True:
    -163
    -164            matrix = np.vstack([1/freq_exp, 1/np.power(freq_exp, 2)]).T
    +            
    148    def ledford_calibration(self, iteration: bool = False):
    +149        """Perform Ledford calibration.
    +150
    +151        Parameters
    +152        ----------
    +153        iteration : bool, optional
    +154            Flag to perform iterative calibration, by default False.
    +155
    +156        """
    +157        mz_calc = self.mz_calc
    +158        freq_exp = self.freq_exp
    +159        mz_exp = self.mz_exp
    +160
    +161        error = ((mz_exp - self.mz_calc) / self.mz_calc) * 1000000
    +162        last_rms = np.sqrt(np.mean(error**2))
    +163        while True:
    +164            matrix = np.vstack([1 / freq_exp, 1 / np.power(freq_exp, 2)]).T
     165            Aterm, Bterm = np.linalg.lstsq(matrix, self.mz_calc, rcond=None)[0]
     166
     167            mz_exp = (Aterm / (freq_exp)) + (Bterm / np.power((freq_exp), 2))
    -168            error = ((mz_exp-mz_calc)/mz_calc)*1000000
    +168            error = ((mz_exp - mz_calc) / mz_calc) * 1000000
     169            rms = np.sqrt(np.mean(error**2))
     170            std = np.std(error)
     171            if self.mass_spectrum.parameters.mass_spectrum.verbose_processing:
    -172                print("%.2f Aterm,  %.2f Bterm" %  (Aterm, Bterm))
    -173                print('Ledford Calibration %.2f RMS,  %.2f std,  %.2f Aterm,  %.2f Bterm ' %(rms, std, Aterm, Bterm))
    -174            if rms < last_rms:
    -175                last_rms = rms
    -176                freq_exp = (Aterm + np.sqrt(np.power(-Aterm, 2) -
    -177                                            (4*mz_exp-Bterm))) / (2*mz_exp)
    -178                mz_domain = (Aterm / (self.freq_exp_ms)) + \
    -179                    (Bterm / np.power((self.freq_exp_ms), 2))
    -180                self.recal_mass_spec(mz_domain, Aterm, Bterm, 0)
    -181                if not iteration:
    -182                    break
    -183            else:
    -184                break
    +172                print("%.2f Aterm,  %.2f Bterm" % (Aterm, Bterm))
    +173                print(
    +174                    "Ledford Calibration %.2f RMS,  %.2f std,  %.2f Aterm,  %.2f Bterm "
    +175                    % (rms, std, Aterm, Bterm)
    +176                )
    +177            if rms < last_rms:
    +178                last_rms = rms
    +179                freq_exp = (
    +180                    Aterm + np.sqrt(np.power(-Aterm, 2) - (4 * mz_exp - Bterm))
    +181                ) / (2 * mz_exp)
    +182                mz_domain = (Aterm / (self.freq_exp_ms)) + (
    +183                    Bterm / np.power((self.freq_exp_ms), 2)
    +184                )
    +185                self.recal_mass_spec(mz_domain, Aterm, Bterm, 0)
    +186                if not iteration:
    +187                    break
    +188            else:
    +189                break
     
    @@ -973,65 +996,70 @@
    Parameters
    -
    186    def step_fit(self, steps : int=4):
    -187        """ Perform step fit calibration.
    -188
    -189        Parameters
    -190        ----------
    -191        steps : int, optional
    -192            Number of steps for step fit calibration, by default 4.
    +            
    191    def step_fit(self, steps: int = 4):
    +192        """Perform step fit calibration.
     193
    -194        """
    -195        def f_to_mz(f, A, B, C, a): 
    -196                return (A / f) + (B / np.power(f, 2)) + (C*a / np.power(f, 2))
    -197        
    -198        def mz_to_f(m, A, B, C): return (-A-m/B)
    -199        
    -200        tuple_indexes = [(i, i+steps) for i in range(0, len(self.selected_mspeaks)-steps, steps)]
    -201
    -202        for current_index, tuple_index in enumerate(tuple_indexes):
    -203            
    -204            mspeak_ii, mspeak_fi = tuple_index
    -205            freq_exp = list()
    -206            mz_calc = list()
    -207            mz_exp = list()
    -208            abu = list()
    -209            
    -210            for i in range(mspeak_ii, mspeak_fi+1):
    -211
    -212                best_formula = self.selected_mspeaks[i].best_molecular_formula_candidate
    -213
    -214                freq_exp.append(self.selected_mspeaks[i].freq_exp)
    -215                mz_calc.append(best_formula.mz_calc)
    -216                mz_exp.append(self.selected_mspeaks[i].mz_exp)
    -217                abu.append(self.selected_mspeaks[i].abundance)
    -218                        
    -219            
    -220            freq_exp = np.array(freq_exp)
    -221            mz_calc = np.array(mz_calc)
    -222            mz_exp = np.array(mz_exp)
    -223            abu = np.array(abu)
    -224
    -225            if current_index == len(tuple_indexes)-1:
    -226                ms_peaks_indexes = (self.selected_mspeaks[mspeak_ii].index, 0)
    -227            
    -228            elif current_index == 0:
    -229                ms_peaks_indexes = (len(self.mass_spectrum)-1,
    -230                                    self.selected_mspeaks[mspeak_fi].index-1)
    -231            else:
    -232                ms_peaks_indexes = (
    -233                    self.selected_mspeaks[mspeak_ii].index, self.selected_mspeaks[mspeak_fi].index-1)
    -234
    -235            final_index, start_index = ms_peaks_indexes
    -236                           
    -237            matrix = np.vstack([1/freq_exp, 1/np.power(freq_exp, 2)]).T
    -238            A, B = np.linalg.lstsq(matrix, mz_calc, rcond=None)[0]
    -239            C = 0
    -240            
    -241            for mspeak in self.mass_spectrum[start_index:final_index]:
    -242                mspeak.mz_cal = f_to_mz(mspeak.freq_exp, A, B, C, 0)
    -243        
    -244        self.mass_spectrum.is_calibrated = True
    +194        Parameters
    +195        ----------
    +196        steps : int, optional
    +197            Number of steps for step fit calibration, by default 4.
    +198
    +199        """
    +200
    +201        def f_to_mz(f, A, B, C, a):
    +202            return (A / f) + (B / np.power(f, 2)) + (C * a / np.power(f, 2))
    +203
    +204        def mz_to_f(m, A, B, C):
    +205            return -A - m / B
    +206
    +207        tuple_indexes = [
    +208            (i, i + steps) for i in range(0, len(self.selected_mspeaks) - steps, steps)
    +209        ]
    +210
    +211        for current_index, tuple_index in enumerate(tuple_indexes):
    +212            mspeak_ii, mspeak_fi = tuple_index
    +213            freq_exp = list()
    +214            mz_calc = list()
    +215            mz_exp = list()
    +216            abu = list()
    +217
    +218            for i in range(mspeak_ii, mspeak_fi + 1):
    +219                best_formula = self.selected_mspeaks[i].best_molecular_formula_candidate
    +220
    +221                freq_exp.append(self.selected_mspeaks[i].freq_exp)
    +222                mz_calc.append(best_formula.mz_calc)
    +223                mz_exp.append(self.selected_mspeaks[i].mz_exp)
    +224                abu.append(self.selected_mspeaks[i].abundance)
    +225
    +226            freq_exp = np.array(freq_exp)
    +227            mz_calc = np.array(mz_calc)
    +228            mz_exp = np.array(mz_exp)
    +229            abu = np.array(abu)
    +230
    +231            if current_index == len(tuple_indexes) - 1:
    +232                ms_peaks_indexes = (self.selected_mspeaks[mspeak_ii].index, 0)
    +233
    +234            elif current_index == 0:
    +235                ms_peaks_indexes = (
    +236                    len(self.mass_spectrum) - 1,
    +237                    self.selected_mspeaks[mspeak_fi].index - 1,
    +238                )
    +239            else:
    +240                ms_peaks_indexes = (
    +241                    self.selected_mspeaks[mspeak_ii].index,
    +242                    self.selected_mspeaks[mspeak_fi].index - 1,
    +243                )
    +244
    +245            final_index, start_index = ms_peaks_indexes
    +246
    +247            matrix = np.vstack([1 / freq_exp, 1 / np.power(freq_exp, 2)]).T
    +248            A, B = np.linalg.lstsq(matrix, mz_calc, rcond=None)[0]
    +249            C = 0
    +250
    +251            for mspeak in self.mass_spectrum[start_index:final_index]:
    +252                mspeak.mz_cal = f_to_mz(mspeak.freq_exp, A, B, C, 0)
    +253
    +254        self.mass_spectrum.is_calibrated = True
     
    diff --git a/docs/corems/mass_spectrum/calc/KendrickGroup.html b/docs/corems/mass_spectrum/calc/KendrickGroup.html index 76c97a6c..0b87df0e 100644 --- a/docs/corems/mass_spectrum/calc/KendrickGroup.html +++ b/docs/corems/mass_spectrum/calc/KendrickGroup.html @@ -76,27 +76,27 @@

      1class KendrickGrouping:
    -  2    """ Class for Kendrick grouping of mass spectra.
    +  2    """Class for Kendrick grouping of mass spectra.
       3
       4    Methods
       5    -------
    -  6    * mz_odd_even_index_lists(). 
    +  6    * mz_odd_even_index_lists().
       7        Get odd and even indexes lists.
    -  8    * calc_error(current, test). 
    +  8    * calc_error(current, test).
       9        Calculate the error between two values.
    - 10    * populate_kendrick_index_dict_error(list_indexes, sort=True). 
    + 10    * populate_kendrick_index_dict_error(list_indexes, sort=True).
      11        Populate the Kendrick index dictionary based on error.
    - 12    * populate_kendrick_index_dict_rounding(list_indexes, sort=True). 
    + 12    * populate_kendrick_index_dict_rounding(list_indexes, sort=True).
      13        Populate the Kendrick index dictionary based on rounding.
    - 14    * sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index). 
    + 14    * sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index).
      15        Sort the Kendrick index dictionary based on abundance.
    - 16    * kendrick_groups_indexes(sort=True). 
    + 16    * kendrick_groups_indexes(sort=True).
      17        Get the Kendrick groups indexes dictionary.
      18
      19    """
      20
      21    def mz_odd_even_index_lists(self):
    - 22        """ Get odd and even indexes lists.
    + 22        """Get odd and even indexes lists.
      23
      24        Returns
      25        -------
    @@ -104,157 +104,157 @@ 

    27 A tuple containing the lists of even and odd indexes. 28 29 """ - 30 even_idx = [] + 30 even_idx = [] 31 odd_idx = [] - 32 + 32 33 for i, mspeak in enumerate(self.mspeaks): - 34 - 35 if mspeak.nominal_mz_exp % 2 == 0: - 36 even_idx.append(i) - 37 else: - 38 odd_idx.append(i) - 39 - 40 return even_idx, odd_idx - 41 - 42 def calc_error(self, current : float, test : float): - 43 """ Calculate the error between two values. - 44 - 45 Parameters - 46 ---------- - 47 current : float - 48 The current value. - 49 test : float - 50 The test value. - 51 - 52 Returns - 53 ------- - 54 float - 55 The calculated error. - 56 - 57 """ - 58 return ((test-current)/current)*1e6 - 59 - 60 - 61 def populate_kendrick_index_dict_error(self, list_indexes : list, sort : bool=True): - 62 """ Populate the Kendrick index dictionary based on error. - 63 - 64 Parameters - 65 ---------- - 66 list_indexes : list - 67 The list of indexes. - 68 sort : bool, optional - 69 Whether to sort the dictionary by abundance (default is True). - 70 - 71 Returns - 72 ------- - 73 dict - 74 The Kendrick index dictionary. + 34 if mspeak.nominal_mz_exp % 2 == 0: + 35 even_idx.append(i) + 36 else: + 37 odd_idx.append(i) + 38 + 39 return even_idx, odd_idx + 40 + 41 def calc_error(self, current: float, test: float): + 42 """Calculate the error between two values. + 43 + 44 Parameters + 45 ---------- + 46 current : float + 47 The current value. + 48 test : float + 49 The test value. + 50 + 51 Returns + 52 ------- + 53 float + 54 The calculated error. + 55 + 56 """ + 57 return ((test - current) / current) * 1e6 + 58 + 59 def populate_kendrick_index_dict_error(self, list_indexes: list, sort: bool = True): + 60 """Populate the Kendrick index dictionary based on error. + 61 + 62 Parameters + 63 ---------- + 64 list_indexes : list + 65 The list of indexes. + 66 sort : bool, optional + 67 Whether to sort the dictionary by abundance (default is True). + 68 + 69 Returns + 70 ------- + 71 dict + 72 The Kendrick index dictionary. + 73 + 74 """ 75 - 76 """ - 77 def error(): - 78 - 79 return abs(current_kmd_reference - next_mspeak.kmd) + 76 def error(): + 77 return abs(current_kmd_reference - next_mspeak.kmd) + 78 + 79 already_found = [] 80 - 81 already_found = [] + 81 all_results = [] 82 - 83 all_results = [] - 84 - 85 for i in list_indexes: - 86 - 87 result_indexes = [] - 88 - 89 mspeak = self.mspeaks[i] - 90 - 91 current_kmd_reference = mspeak.kmd - 92 - 93 for j in list_indexes: - 94 if j not in already_found and j != i: - 95 - 96 next_mspeak = self.mspeaks[j] - 97 - 98 if error() <= 0.001: + 83 for i in list_indexes: + 84 result_indexes = [] + 85 + 86 mspeak = self.mspeaks[i] + 87 + 88 current_kmd_reference = mspeak.kmd + 89 + 90 for j in list_indexes: + 91 if j not in already_found and j != i: + 92 next_mspeak = self.mspeaks[j] + 93 + 94 if error() <= 0.001: + 95 result_indexes.append(j) + 96 already_found.append(j) + 97 + 98 current_kmd_reference = next_mspeak.kmd 99 -100 result_indexes.append(j) -101 already_found.append(j) +100 if result_indexes and len(result_indexes) > 3: +101 already_found.append(i) 102 -103 current_kmd_reference = next_mspeak.kmd -104 -105 if result_indexes and len(result_indexes) > 3: -106 -107 already_found.append(i) -108 -109 result_indexes.insert(0,i) -110 -111 all_results.append(result_indexes) -112 else: +103 result_indexes.insert(0, i) +104 +105 all_results.append(result_indexes) +106 else: +107 for w in result_indexes: +108 already_found.remove(w) +109 +110 kendrick_group_index = { +111 i: indexes_list for i, indexes_list in enumerate(all_results) +112 } 113 -114 for w in result_indexes: -115 -116 already_found.remove(w) -117 -118 kendrick_group_index = { i : indexes_list for i, indexes_list in enumerate(all_results) } -119 -120 -121 #return dictionary with the keys sorted by sum of the abundances -122 if sort: -123 -124 return dict(sorted(kendrick_group_index.items(), key = lambda it: sum([self.mspeaks[i].abundance for i in it[1]]), reverse=False )) -125 -126 else: -127 -128 return kendrick_group_index -129 -130 def populate_kendrick_index_dict_rounding(self, list_indexes : list, sort : bool=True): -131 """ Populate the Kendrick index dictionary based on rounding. -132 -133 Parameters -134 ---------- -135 list_indexes : list -136 The list of indexes. -137 sort : bool, optional -138 Whether to sort the dictionary by abundance (default is True). -139 -140 Returns -141 ------- -142 dict -143 The Kendrick index dictionary. -144 -145 """ -146 kendrick_group_index = {} -147 -148 for i in list_indexes: +114 # return dictionary with the keys sorted by sum of the abundances +115 if sort: +116 return dict( +117 sorted( +118 kendrick_group_index.items(), +119 key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]), +120 reverse=False, +121 ) +122 ) +123 +124 else: +125 return kendrick_group_index +126 +127 def populate_kendrick_index_dict_rounding( +128 self, list_indexes: list, sort: bool = True +129 ): +130 """Populate the Kendrick index dictionary based on rounding. +131 +132 Parameters +133 ---------- +134 list_indexes : list +135 The list of indexes. +136 sort : bool, optional +137 Whether to sort the dictionary by abundance (default is True). +138 +139 Returns +140 ------- +141 dict +142 The Kendrick index dictionary. +143 +144 """ +145 kendrick_group_index = {} +146 +147 for i in list_indexes: +148 mspeak = self.mspeaks[i] 149 -150 mspeak = self.mspeaks[i] +150 group = round(mspeak.kmd * 100) 151 -152 group = round(mspeak.kmd * 100) -153 -154 if group not in kendrick_group_index: -155 -156 kendrick_group_index[group] = [i] +152 if group not in kendrick_group_index: +153 kendrick_group_index[group] = [i] +154 +155 else: +156 last_index = kendrick_group_index[group][-1] 157 -158 else: -159 -160 last_index = kendrick_group_index[group][-1] -161 -162 if self.parameters.mass_spectrum.verbose_processing: -163 print(abs(mspeak.kmd - self.mspeaks[last_index].kmd )) -164 -165 if abs(mspeak.kmd - self.mspeaks[last_index].kmd ) < 0.001: -166 -167 kendrick_group_index[group].append(i) -168 -169 -170 -171 -172 #return dictionary with the keys sorted by sum of the abundances -173 if sort: -174 return dict(sorted(kendrick_group_index.items(), key = lambda it: sum([self.mspeaks[i].abundance for i in it[1]]), reverse=True )) -175 -176 else: -177 return kendrick_group_index -178 -179 def sort_abundance_kendrick_dict(self, even_kendrick_group_index : dict, odd_kendrick_group_index : dict): -180 """ Sort the Kendrick index dictionary based on abundance. +158 if self.parameters.mass_spectrum.verbose_processing: +159 print(abs(mspeak.kmd - self.mspeaks[last_index].kmd)) +160 +161 if abs(mspeak.kmd - self.mspeaks[last_index].kmd) < 0.001: +162 kendrick_group_index[group].append(i) +163 +164 # return dictionary with the keys sorted by sum of the abundances +165 if sort: +166 return dict( +167 sorted( +168 kendrick_group_index.items(), +169 key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]), +170 reverse=True, +171 ) +172 ) +173 +174 else: +175 return kendrick_group_index +176 +177 def sort_abundance_kendrick_dict( +178 self, even_kendrick_group_index: dict, odd_kendrick_group_index: dict +179 ): +180 """Sort the Kendrick index dictionary based on abundance. 181 182 Parameters 183 ---------- @@ -276,41 +276,44 @@

    199 sum_even = sum([self.mspeaks[i].abundance for i in all_even_indexes]) 200 201 sum_odd = sum([self.mspeaks[i].abundance for i in all_odd_indexes]) -202 +202 203 if sum_even >= sum_odd: -204 -205 even_kendrick_group_index.update(odd_kendrick_group_index) -206 -207 return even_kendrick_group_index -208 -209 else: +204 even_kendrick_group_index.update(odd_kendrick_group_index) +205 +206 return even_kendrick_group_index +207 +208 else: +209 odd_kendrick_group_index.update(even_kendrick_group_index) 210 -211 odd_kendrick_group_index.update(even_kendrick_group_index) +211 return odd_kendrick_group_index 212 -213 return odd_kendrick_group_index -214 -215 -216 def kendrick_groups_indexes(self, sort : bool=True): -217 """ Get the Kendrick groups indexes dictionary. -218 -219 Parameters -220 ---------- -221 sort : bool, optional -222 Whether to sort the dictionary by abundance (default is True). -223 -224 Returns -225 ------- -226 dict -227 The Kendrick groups indexes dictionary. +213 def kendrick_groups_indexes(self, sort: bool = True): +214 """Get the Kendrick groups indexes dictionary. +215 +216 Parameters +217 ---------- +218 sort : bool, optional +219 Whether to sort the dictionary by abundance (default is True). +220 +221 Returns +222 ------- +223 dict +224 The Kendrick groups indexes dictionary. +225 +226 """ +227 even_idx, odd_idx = self.mz_odd_even_index_lists() 228 -229 """ -230 even_idx, odd_idx = self.mz_odd_even_index_lists() -231 -232 even_kendrick_group_index = self.populate_kendrick_index_dict_error(even_idx, sort=sort) -233 -234 odd_kendrick_group_index = self.populate_kendrick_index_dict_error(odd_idx, sort=sort) -235 -236 return self.sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index) +229 even_kendrick_group_index = self.populate_kendrick_index_dict_error( +230 even_idx, sort=sort +231 ) +232 +233 odd_kendrick_group_index = self.populate_kendrick_index_dict_error( +234 odd_idx, sort=sort +235 ) +236 +237 return self.sort_abundance_kendrick_dict( +238 even_kendrick_group_index, odd_kendrick_group_index +239 )

    @@ -326,242 +329,245 @@

    -
      3class KendrickGrouping:
    -  4    """ Class for Kendrick grouping of mass spectra.
    -  5
    -  6    Methods
    -  7    -------
    -  8    * mz_odd_even_index_lists(). 
    -  9        Get odd and even indexes lists.
    - 10    * calc_error(current, test). 
    - 11        Calculate the error between two values.
    - 12    * populate_kendrick_index_dict_error(list_indexes, sort=True). 
    - 13        Populate the Kendrick index dictionary based on error.
    - 14    * populate_kendrick_index_dict_rounding(list_indexes, sort=True). 
    - 15        Populate the Kendrick index dictionary based on rounding.
    - 16    * sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index). 
    - 17        Sort the Kendrick index dictionary based on abundance.
    - 18    * kendrick_groups_indexes(sort=True). 
    - 19        Get the Kendrick groups indexes dictionary.
    - 20
    - 21    """
    - 22
    - 23    def mz_odd_even_index_lists(self):
    - 24        """ Get odd and even indexes lists.
    - 25
    - 26        Returns
    - 27        -------
    - 28        tuple
    - 29            A tuple containing the lists of even and odd indexes.
    - 30
    - 31        """
    - 32        even_idx = [] 
    - 33        odd_idx = []
    - 34        
    - 35        for i, mspeak in enumerate(self.mspeaks):
    - 36
    - 37            if mspeak.nominal_mz_exp % 2 == 0:
    - 38                even_idx.append(i)
    - 39            else:
    - 40                odd_idx.append(i)
    - 41        
    - 42        return even_idx, odd_idx 
    - 43
    - 44    def calc_error(self, current : float, test : float):
    - 45        """ Calculate the error between two values.
    - 46
    - 47        Parameters
    - 48        ----------
    - 49        current : float
    - 50            The current value.
    - 51        test : float
    - 52            The test value.
    - 53
    - 54        Returns
    - 55        -------
    - 56        float
    - 57            The calculated error.
    - 58
    - 59        """
    - 60        return ((test-current)/current)*1e6
    - 61  
    - 62    
    - 63    def populate_kendrick_index_dict_error(self, list_indexes : list, sort : bool=True):
    - 64        """ Populate the Kendrick index dictionary based on error.
    - 65
    - 66        Parameters
    - 67        ----------
    - 68        list_indexes : list
    - 69            The list of indexes.
    - 70        sort : bool, optional
    - 71            Whether to sort the dictionary by abundance (default is True).
    - 72
    - 73        Returns
    - 74        -------
    - 75        dict
    - 76            The Kendrick index dictionary.
    - 77
    - 78        """
    - 79        def error():
    - 80            
    - 81            return  abs(current_kmd_reference - next_mspeak.kmd)
    - 82
    - 83        already_found = []
    - 84
    - 85        all_results = []
    - 86        
    - 87        for i in list_indexes:
    +            
      2class KendrickGrouping:
    +  3    """Class for Kendrick grouping of mass spectra.
    +  4
    +  5    Methods
    +  6    -------
    +  7    * mz_odd_even_index_lists().
    +  8        Get odd and even indexes lists.
    +  9    * calc_error(current, test).
    + 10        Calculate the error between two values.
    + 11    * populate_kendrick_index_dict_error(list_indexes, sort=True).
    + 12        Populate the Kendrick index dictionary based on error.
    + 13    * populate_kendrick_index_dict_rounding(list_indexes, sort=True).
    + 14        Populate the Kendrick index dictionary based on rounding.
    + 15    * sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index).
    + 16        Sort the Kendrick index dictionary based on abundance.
    + 17    * kendrick_groups_indexes(sort=True).
    + 18        Get the Kendrick groups indexes dictionary.
    + 19
    + 20    """
    + 21
    + 22    def mz_odd_even_index_lists(self):
    + 23        """Get odd and even indexes lists.
    + 24
    + 25        Returns
    + 26        -------
    + 27        tuple
    + 28            A tuple containing the lists of even and odd indexes.
    + 29
    + 30        """
    + 31        even_idx = []
    + 32        odd_idx = []
    + 33
    + 34        for i, mspeak in enumerate(self.mspeaks):
    + 35            if mspeak.nominal_mz_exp % 2 == 0:
    + 36                even_idx.append(i)
    + 37            else:
    + 38                odd_idx.append(i)
    + 39
    + 40        return even_idx, odd_idx
    + 41
    + 42    def calc_error(self, current: float, test: float):
    + 43        """Calculate the error between two values.
    + 44
    + 45        Parameters
    + 46        ----------
    + 47        current : float
    + 48            The current value.
    + 49        test : float
    + 50            The test value.
    + 51
    + 52        Returns
    + 53        -------
    + 54        float
    + 55            The calculated error.
    + 56
    + 57        """
    + 58        return ((test - current) / current) * 1e6
    + 59
    + 60    def populate_kendrick_index_dict_error(self, list_indexes: list, sort: bool = True):
    + 61        """Populate the Kendrick index dictionary based on error.
    + 62
    + 63        Parameters
    + 64        ----------
    + 65        list_indexes : list
    + 66            The list of indexes.
    + 67        sort : bool, optional
    + 68            Whether to sort the dictionary by abundance (default is True).
    + 69
    + 70        Returns
    + 71        -------
    + 72        dict
    + 73            The Kendrick index dictionary.
    + 74
    + 75        """
    + 76
    + 77        def error():
    + 78            return abs(current_kmd_reference - next_mspeak.kmd)
    + 79
    + 80        already_found = []
    + 81
    + 82        all_results = []
    + 83
    + 84        for i in list_indexes:
    + 85            result_indexes = []
    + 86
    + 87            mspeak = self.mspeaks[i]
      88
    - 89            result_indexes = []
    + 89            current_kmd_reference = mspeak.kmd
      90
    - 91            mspeak = self.mspeaks[i]    
    - 92
    - 93            current_kmd_reference = mspeak.kmd
    - 94            
    - 95            for j in list_indexes:
    - 96                if j not in already_found and j != i:
    - 97
    - 98                    next_mspeak = self.mspeaks[j]
    - 99                    
    -100                    if  error() <= 0.001:
    -101
    -102                        result_indexes.append(j)    
    -103                        already_found.append(j)
    -104
    -105                        current_kmd_reference = next_mspeak.kmd
    -106            
    -107            if result_indexes and len(result_indexes) > 3:
    -108
    -109                already_found.append(i)
    -110                
    -111                result_indexes.insert(0,i)
    -112
    -113                all_results.append(result_indexes)        
    -114            else:
    -115
    -116                for w in result_indexes:
    -117
    -118                    already_found.remove(w)        
    -119
    -120        kendrick_group_index = { i : indexes_list for i, indexes_list in enumerate(all_results) }
    -121
    -122        
    -123        #return dictionary with the keys sorted by sum of the abundances
    -124        if sort:
    -125            
    -126            return dict(sorted(kendrick_group_index.items(), key = lambda it: sum([self.mspeaks[i].abundance for i in it[1]]), reverse=False ))
    + 91            for j in list_indexes:
    + 92                if j not in already_found and j != i:
    + 93                    next_mspeak = self.mspeaks[j]
    + 94
    + 95                    if error() <= 0.001:
    + 96                        result_indexes.append(j)
    + 97                        already_found.append(j)
    + 98
    + 99                        current_kmd_reference = next_mspeak.kmd
    +100
    +101            if result_indexes and len(result_indexes) > 3:
    +102                already_found.append(i)
    +103
    +104                result_indexes.insert(0, i)
    +105
    +106                all_results.append(result_indexes)
    +107            else:
    +108                for w in result_indexes:
    +109                    already_found.remove(w)
    +110
    +111        kendrick_group_index = {
    +112            i: indexes_list for i, indexes_list in enumerate(all_results)
    +113        }
    +114
    +115        # return dictionary with the keys sorted by sum of the abundances
    +116        if sort:
    +117            return dict(
    +118                sorted(
    +119                    kendrick_group_index.items(),
    +120                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
    +121                    reverse=False,
    +122                )
    +123            )
    +124
    +125        else:
    +126            return kendrick_group_index
     127
    -128        else:
    -129
    -130            return kendrick_group_index
    -131        
    -132    def populate_kendrick_index_dict_rounding(self, list_indexes : list, sort : bool=True):
    -133        """ Populate the Kendrick index dictionary based on rounding.
    -134
    -135        Parameters
    -136        ----------
    -137        list_indexes : list
    -138            The list of indexes.
    -139        sort : bool, optional
    -140            Whether to sort the dictionary by abundance (default is True).
    -141
    -142        Returns
    -143        -------
    -144        dict
    -145            The Kendrick index dictionary.
    -146
    -147        """
    -148        kendrick_group_index = {}
    -149        
    -150        for i in list_indexes:
    -151
    -152            mspeak = self.mspeaks[i]
    -153
    -154            group = round(mspeak.kmd * 100)
    -155            
    -156            if group not in kendrick_group_index:
    -157
    -158                kendrick_group_index[group] = [i]
    -159
    -160            else: 
    +128    def populate_kendrick_index_dict_rounding(
    +129        self, list_indexes: list, sort: bool = True
    +130    ):
    +131        """Populate the Kendrick index dictionary based on rounding.
    +132
    +133        Parameters
    +134        ----------
    +135        list_indexes : list
    +136            The list of indexes.
    +137        sort : bool, optional
    +138            Whether to sort the dictionary by abundance (default is True).
    +139
    +140        Returns
    +141        -------
    +142        dict
    +143            The Kendrick index dictionary.
    +144
    +145        """
    +146        kendrick_group_index = {}
    +147
    +148        for i in list_indexes:
    +149            mspeak = self.mspeaks[i]
    +150
    +151            group = round(mspeak.kmd * 100)
    +152
    +153            if group not in kendrick_group_index:
    +154                kendrick_group_index[group] = [i]
    +155
    +156            else:
    +157                last_index = kendrick_group_index[group][-1]
    +158
    +159                if self.parameters.mass_spectrum.verbose_processing:
    +160                    print(abs(mspeak.kmd - self.mspeaks[last_index].kmd))
     161
    -162                last_index = kendrick_group_index[group][-1]
    -163                
    -164                if self.parameters.mass_spectrum.verbose_processing:
    -165                    print(abs(mspeak.kmd - self.mspeaks[last_index].kmd )) 
    -166                
    -167                if abs(mspeak.kmd - self.mspeaks[last_index].kmd ) < 0.001:
    -168
    -169                    kendrick_group_index[group].append(i)
    -170                
    -171
    -172
    -173
    -174            #return dictionary with the keys sorted by sum of the abundances
    -175        if sort:
    -176            return dict(sorted(kendrick_group_index.items(), key = lambda it: sum([self.mspeaks[i].abundance for i in it[1]]), reverse=True ))
    +162                if abs(mspeak.kmd - self.mspeaks[last_index].kmd) < 0.001:
    +163                    kendrick_group_index[group].append(i)
    +164
    +165            # return dictionary with the keys sorted by sum of the abundances
    +166        if sort:
    +167            return dict(
    +168                sorted(
    +169                    kendrick_group_index.items(),
    +170                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
    +171                    reverse=True,
    +172                )
    +173            )
    +174
    +175        else:
    +176            return kendrick_group_index
     177
    -178        else:
    -179            return kendrick_group_index
    -180
    -181    def sort_abundance_kendrick_dict(self, even_kendrick_group_index : dict, odd_kendrick_group_index : dict):
    -182        """ Sort the Kendrick index dictionary based on abundance.
    -183
    -184        Parameters
    -185        ----------
    -186        even_kendrick_group_index : dict
    -187            The Kendrick index dictionary for even indexes.
    -188        odd_kendrick_group_index : dict
    -189            The Kendrick index dictionary for odd indexes.
    -190
    -191        Returns
    -192        -------
    -193        dict
    -194            The sorted Kendrick index dictionary.
    -195
    -196        """
    -197        all_even_indexes = [i for v in even_kendrick_group_index.values() for i in v]
    -198
    -199        all_odd_indexes = [i for v in odd_kendrick_group_index.values() for i in v]
    -200
    -201        sum_even = sum([self.mspeaks[i].abundance for i in all_even_indexes])
    -202
    -203        sum_odd = sum([self.mspeaks[i].abundance for i in all_odd_indexes])
    -204        
    -205        if sum_even >= sum_odd:
    +178    def sort_abundance_kendrick_dict(
    +179        self, even_kendrick_group_index: dict, odd_kendrick_group_index: dict
    +180    ):
    +181        """Sort the Kendrick index dictionary based on abundance.
    +182
    +183        Parameters
    +184        ----------
    +185        even_kendrick_group_index : dict
    +186            The Kendrick index dictionary for even indexes.
    +187        odd_kendrick_group_index : dict
    +188            The Kendrick index dictionary for odd indexes.
    +189
    +190        Returns
    +191        -------
    +192        dict
    +193            The sorted Kendrick index dictionary.
    +194
    +195        """
    +196        all_even_indexes = [i for v in even_kendrick_group_index.values() for i in v]
    +197
    +198        all_odd_indexes = [i for v in odd_kendrick_group_index.values() for i in v]
    +199
    +200        sum_even = sum([self.mspeaks[i].abundance for i in all_even_indexes])
    +201
    +202        sum_odd = sum([self.mspeaks[i].abundance for i in all_odd_indexes])
    +203
    +204        if sum_even >= sum_odd:
    +205            even_kendrick_group_index.update(odd_kendrick_group_index)
     206
    -207            even_kendrick_group_index.update(odd_kendrick_group_index)
    +207            return even_kendrick_group_index
     208
    -209            return even_kendrick_group_index
    -210
    -211        else: 
    -212
    -213            odd_kendrick_group_index.update(even_kendrick_group_index)
    -214
    -215            return odd_kendrick_group_index  
    +209        else:
    +210            odd_kendrick_group_index.update(even_kendrick_group_index)
    +211
    +212            return odd_kendrick_group_index
    +213
    +214    def kendrick_groups_indexes(self, sort: bool = True):
    +215        """Get the Kendrick groups indexes dictionary.
     216
    -217    
    -218    def kendrick_groups_indexes(self, sort : bool=True):
    -219        """ Get the Kendrick groups indexes dictionary.
    -220
    -221        Parameters
    -222        ----------
    -223        sort : bool, optional
    -224            Whether to sort the dictionary by abundance (default is True).
    -225
    -226        Returns
    -227        -------
    -228        dict
    -229            The Kendrick groups indexes dictionary.
    -230
    -231        """
    -232        even_idx, odd_idx = self.mz_odd_even_index_lists()
    +217        Parameters
    +218        ----------
    +219        sort : bool, optional
    +220            Whether to sort the dictionary by abundance (default is True).
    +221
    +222        Returns
    +223        -------
    +224        dict
    +225            The Kendrick groups indexes dictionary.
    +226
    +227        """
    +228        even_idx, odd_idx = self.mz_odd_even_index_lists()
    +229
    +230        even_kendrick_group_index = self.populate_kendrick_index_dict_error(
    +231            even_idx, sort=sort
    +232        )
     233
    -234        even_kendrick_group_index = self.populate_kendrick_index_dict_error(even_idx, sort=sort)
    -235
    -236        odd_kendrick_group_index = self.populate_kendrick_index_dict_error(odd_idx, sort=sort)
    +234        odd_kendrick_group_index = self.populate_kendrick_index_dict_error(
    +235            odd_idx, sort=sort
    +236        )
     237
    -238        return self.sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index)
    +238        return self.sort_abundance_kendrick_dict(
    +239            even_kendrick_group_index, odd_kendrick_group_index
    +240        )
     
    @@ -570,17 +576,17 @@

    Methods
      -
    • mz_odd_even_index_lists(). +
    • mz_odd_even_index_lists(). Get odd and even indexes lists.
    • -
    • calc_error(current, test). +
    • calc_error(current, test). Calculate the error between two values.
    • -
    • populate_kendrick_index_dict_error(list_indexes, sort=True). +
    • populate_kendrick_index_dict_error(list_indexes, sort=True). Populate the Kendrick index dictionary based on error.
    • -
    • populate_kendrick_index_dict_rounding(list_indexes, sort=True). +
    • populate_kendrick_index_dict_rounding(list_indexes, sort=True). Populate the Kendrick index dictionary based on rounding.
    • -
    • sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index). +
    • sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index). Sort the Kendrick index dictionary based on abundance.
    • -
    • kendrick_groups_indexes(sort=True). +
    • kendrick_groups_indexes(sort=True). Get the Kendrick groups indexes dictionary.
    @@ -597,26 +603,25 @@
    Methods
    -
    23    def mz_odd_even_index_lists(self):
    -24        """ Get odd and even indexes lists.
    -25
    -26        Returns
    -27        -------
    -28        tuple
    -29            A tuple containing the lists of even and odd indexes.
    -30
    -31        """
    -32        even_idx = [] 
    -33        odd_idx = []
    -34        
    -35        for i, mspeak in enumerate(self.mspeaks):
    -36
    -37            if mspeak.nominal_mz_exp % 2 == 0:
    -38                even_idx.append(i)
    -39            else:
    -40                odd_idx.append(i)
    -41        
    -42        return even_idx, odd_idx 
    +            
    22    def mz_odd_even_index_lists(self):
    +23        """Get odd and even indexes lists.
    +24
    +25        Returns
    +26        -------
    +27        tuple
    +28            A tuple containing the lists of even and odd indexes.
    +29
    +30        """
    +31        even_idx = []
    +32        odd_idx = []
    +33
    +34        for i, mspeak in enumerate(self.mspeaks):
    +35            if mspeak.nominal_mz_exp % 2 == 0:
    +36                even_idx.append(i)
    +37            else:
    +38                odd_idx.append(i)
    +39
    +40        return even_idx, odd_idx
     
    @@ -642,23 +647,23 @@
    Returns
    -
    44    def calc_error(self, current : float, test : float):
    -45        """ Calculate the error between two values.
    -46
    -47        Parameters
    -48        ----------
    -49        current : float
    -50            The current value.
    -51        test : float
    -52            The test value.
    -53
    -54        Returns
    -55        -------
    -56        float
    -57            The calculated error.
    -58
    -59        """
    -60        return ((test-current)/current)*1e6
    +            
    42    def calc_error(self, current: float, test: float):
    +43        """Calculate the error between two values.
    +44
    +45        Parameters
    +46        ----------
    +47        current : float
    +48            The current value.
    +49        test : float
    +50            The test value.
    +51
    +52        Returns
    +53        -------
    +54        float
    +55            The calculated error.
    +56
    +57        """
    +58        return ((test - current) / current) * 1e6
     
    @@ -693,74 +698,73 @@
    Returns
    -
     63    def populate_kendrick_index_dict_error(self, list_indexes : list, sort : bool=True):
    - 64        """ Populate the Kendrick index dictionary based on error.
    - 65
    - 66        Parameters
    - 67        ----------
    - 68        list_indexes : list
    - 69            The list of indexes.
    - 70        sort : bool, optional
    - 71            Whether to sort the dictionary by abundance (default is True).
    - 72
    - 73        Returns
    - 74        -------
    - 75        dict
    - 76            The Kendrick index dictionary.
    - 77
    - 78        """
    - 79        def error():
    - 80            
    - 81            return  abs(current_kmd_reference - next_mspeak.kmd)
    - 82
    - 83        already_found = []
    - 84
    - 85        all_results = []
    - 86        
    - 87        for i in list_indexes:
    +            
     60    def populate_kendrick_index_dict_error(self, list_indexes: list, sort: bool = True):
    + 61        """Populate the Kendrick index dictionary based on error.
    + 62
    + 63        Parameters
    + 64        ----------
    + 65        list_indexes : list
    + 66            The list of indexes.
    + 67        sort : bool, optional
    + 68            Whether to sort the dictionary by abundance (default is True).
    + 69
    + 70        Returns
    + 71        -------
    + 72        dict
    + 73            The Kendrick index dictionary.
    + 74
    + 75        """
    + 76
    + 77        def error():
    + 78            return abs(current_kmd_reference - next_mspeak.kmd)
    + 79
    + 80        already_found = []
    + 81
    + 82        all_results = []
    + 83
    + 84        for i in list_indexes:
    + 85            result_indexes = []
    + 86
    + 87            mspeak = self.mspeaks[i]
      88
    - 89            result_indexes = []
    + 89            current_kmd_reference = mspeak.kmd
      90
    - 91            mspeak = self.mspeaks[i]    
    - 92
    - 93            current_kmd_reference = mspeak.kmd
    - 94            
    - 95            for j in list_indexes:
    - 96                if j not in already_found and j != i:
    - 97
    - 98                    next_mspeak = self.mspeaks[j]
    - 99                    
    -100                    if  error() <= 0.001:
    -101
    -102                        result_indexes.append(j)    
    -103                        already_found.append(j)
    -104
    -105                        current_kmd_reference = next_mspeak.kmd
    -106            
    -107            if result_indexes and len(result_indexes) > 3:
    -108
    -109                already_found.append(i)
    -110                
    -111                result_indexes.insert(0,i)
    -112
    -113                all_results.append(result_indexes)        
    -114            else:
    -115
    -116                for w in result_indexes:
    -117
    -118                    already_found.remove(w)        
    -119
    -120        kendrick_group_index = { i : indexes_list for i, indexes_list in enumerate(all_results) }
    -121
    -122        
    -123        #return dictionary with the keys sorted by sum of the abundances
    -124        if sort:
    -125            
    -126            return dict(sorted(kendrick_group_index.items(), key = lambda it: sum([self.mspeaks[i].abundance for i in it[1]]), reverse=False ))
    -127
    -128        else:
    -129
    -130            return kendrick_group_index
    + 91            for j in list_indexes:
    + 92                if j not in already_found and j != i:
    + 93                    next_mspeak = self.mspeaks[j]
    + 94
    + 95                    if error() <= 0.001:
    + 96                        result_indexes.append(j)
    + 97                        already_found.append(j)
    + 98
    + 99                        current_kmd_reference = next_mspeak.kmd
    +100
    +101            if result_indexes and len(result_indexes) > 3:
    +102                already_found.append(i)
    +103
    +104                result_indexes.insert(0, i)
    +105
    +106                all_results.append(result_indexes)
    +107            else:
    +108                for w in result_indexes:
    +109                    already_found.remove(w)
    +110
    +111        kendrick_group_index = {
    +112            i: indexes_list for i, indexes_list in enumerate(all_results)
    +113        }
    +114
    +115        # return dictionary with the keys sorted by sum of the abundances
    +116        if sort:
    +117            return dict(
    +118                sorted(
    +119                    kendrick_group_index.items(),
    +120                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
    +121                    reverse=False,
    +122                )
    +123            )
    +124
    +125        else:
    +126            return kendrick_group_index
     
    @@ -795,54 +799,55 @@
    Returns
    -
    132    def populate_kendrick_index_dict_rounding(self, list_indexes : list, sort : bool=True):
    -133        """ Populate the Kendrick index dictionary based on rounding.
    -134
    -135        Parameters
    -136        ----------
    -137        list_indexes : list
    -138            The list of indexes.
    -139        sort : bool, optional
    -140            Whether to sort the dictionary by abundance (default is True).
    -141
    -142        Returns
    -143        -------
    -144        dict
    -145            The Kendrick index dictionary.
    -146
    -147        """
    -148        kendrick_group_index = {}
    -149        
    -150        for i in list_indexes:
    -151
    -152            mspeak = self.mspeaks[i]
    -153
    -154            group = round(mspeak.kmd * 100)
    -155            
    -156            if group not in kendrick_group_index:
    -157
    -158                kendrick_group_index[group] = [i]
    -159
    -160            else: 
    +            
    128    def populate_kendrick_index_dict_rounding(
    +129        self, list_indexes: list, sort: bool = True
    +130    ):
    +131        """Populate the Kendrick index dictionary based on rounding.
    +132
    +133        Parameters
    +134        ----------
    +135        list_indexes : list
    +136            The list of indexes.
    +137        sort : bool, optional
    +138            Whether to sort the dictionary by abundance (default is True).
    +139
    +140        Returns
    +141        -------
    +142        dict
    +143            The Kendrick index dictionary.
    +144
    +145        """
    +146        kendrick_group_index = {}
    +147
    +148        for i in list_indexes:
    +149            mspeak = self.mspeaks[i]
    +150
    +151            group = round(mspeak.kmd * 100)
    +152
    +153            if group not in kendrick_group_index:
    +154                kendrick_group_index[group] = [i]
    +155
    +156            else:
    +157                last_index = kendrick_group_index[group][-1]
    +158
    +159                if self.parameters.mass_spectrum.verbose_processing:
    +160                    print(abs(mspeak.kmd - self.mspeaks[last_index].kmd))
     161
    -162                last_index = kendrick_group_index[group][-1]
    -163                
    -164                if self.parameters.mass_spectrum.verbose_processing:
    -165                    print(abs(mspeak.kmd - self.mspeaks[last_index].kmd )) 
    -166                
    -167                if abs(mspeak.kmd - self.mspeaks[last_index].kmd ) < 0.001:
    -168
    -169                    kendrick_group_index[group].append(i)
    -170                
    -171
    -172
    -173
    -174            #return dictionary with the keys sorted by sum of the abundances
    -175        if sort:
    -176            return dict(sorted(kendrick_group_index.items(), key = lambda it: sum([self.mspeaks[i].abundance for i in it[1]]), reverse=True ))
    -177
    -178        else:
    -179            return kendrick_group_index
    +162                if abs(mspeak.kmd - self.mspeaks[last_index].kmd) < 0.001:
    +163                    kendrick_group_index[group].append(i)
    +164
    +165            # return dictionary with the keys sorted by sum of the abundances
    +166        if sort:
    +167            return dict(
    +168                sorted(
    +169                    kendrick_group_index.items(),
    +170                    key=lambda it: sum([self.mspeaks[i].abundance for i in it[1]]),
    +171                    reverse=True,
    +172                )
    +173            )
    +174
    +175        else:
    +176            return kendrick_group_index
     
    @@ -877,41 +882,41 @@
    Returns
    -
    181    def sort_abundance_kendrick_dict(self, even_kendrick_group_index : dict, odd_kendrick_group_index : dict):
    -182        """ Sort the Kendrick index dictionary based on abundance.
    -183
    -184        Parameters
    -185        ----------
    -186        even_kendrick_group_index : dict
    -187            The Kendrick index dictionary for even indexes.
    -188        odd_kendrick_group_index : dict
    -189            The Kendrick index dictionary for odd indexes.
    -190
    -191        Returns
    -192        -------
    -193        dict
    -194            The sorted Kendrick index dictionary.
    -195
    -196        """
    -197        all_even_indexes = [i for v in even_kendrick_group_index.values() for i in v]
    -198
    -199        all_odd_indexes = [i for v in odd_kendrick_group_index.values() for i in v]
    -200
    -201        sum_even = sum([self.mspeaks[i].abundance for i in all_even_indexes])
    -202
    -203        sum_odd = sum([self.mspeaks[i].abundance for i in all_odd_indexes])
    -204        
    -205        if sum_even >= sum_odd:
    +            
    178    def sort_abundance_kendrick_dict(
    +179        self, even_kendrick_group_index: dict, odd_kendrick_group_index: dict
    +180    ):
    +181        """Sort the Kendrick index dictionary based on abundance.
    +182
    +183        Parameters
    +184        ----------
    +185        even_kendrick_group_index : dict
    +186            The Kendrick index dictionary for even indexes.
    +187        odd_kendrick_group_index : dict
    +188            The Kendrick index dictionary for odd indexes.
    +189
    +190        Returns
    +191        -------
    +192        dict
    +193            The sorted Kendrick index dictionary.
    +194
    +195        """
    +196        all_even_indexes = [i for v in even_kendrick_group_index.values() for i in v]
    +197
    +198        all_odd_indexes = [i for v in odd_kendrick_group_index.values() for i in v]
    +199
    +200        sum_even = sum([self.mspeaks[i].abundance for i in all_even_indexes])
    +201
    +202        sum_odd = sum([self.mspeaks[i].abundance for i in all_odd_indexes])
    +203
    +204        if sum_even >= sum_odd:
    +205            even_kendrick_group_index.update(odd_kendrick_group_index)
     206
    -207            even_kendrick_group_index.update(odd_kendrick_group_index)
    +207            return even_kendrick_group_index
     208
    -209            return even_kendrick_group_index
    -210
    -211        else: 
    -212
    -213            odd_kendrick_group_index.update(even_kendrick_group_index)
    -214
    -215            return odd_kendrick_group_index  
    +209        else:
    +210            odd_kendrick_group_index.update(even_kendrick_group_index)
    +211
    +212            return odd_kendrick_group_index
     
    @@ -946,27 +951,33 @@
    Returns
    -
    218    def kendrick_groups_indexes(self, sort : bool=True):
    -219        """ Get the Kendrick groups indexes dictionary.
    -220
    -221        Parameters
    -222        ----------
    -223        sort : bool, optional
    -224            Whether to sort the dictionary by abundance (default is True).
    -225
    -226        Returns
    -227        -------
    -228        dict
    -229            The Kendrick groups indexes dictionary.
    -230
    -231        """
    -232        even_idx, odd_idx = self.mz_odd_even_index_lists()
    +            
    214    def kendrick_groups_indexes(self, sort: bool = True):
    +215        """Get the Kendrick groups indexes dictionary.
    +216
    +217        Parameters
    +218        ----------
    +219        sort : bool, optional
    +220            Whether to sort the dictionary by abundance (default is True).
    +221
    +222        Returns
    +223        -------
    +224        dict
    +225            The Kendrick groups indexes dictionary.
    +226
    +227        """
    +228        even_idx, odd_idx = self.mz_odd_even_index_lists()
    +229
    +230        even_kendrick_group_index = self.populate_kendrick_index_dict_error(
    +231            even_idx, sort=sort
    +232        )
     233
    -234        even_kendrick_group_index = self.populate_kendrick_index_dict_error(even_idx, sort=sort)
    -235
    -236        odd_kendrick_group_index = self.populate_kendrick_index_dict_error(odd_idx, sort=sort)
    +234        odd_kendrick_group_index = self.populate_kendrick_index_dict_error(
    +235            odd_idx, sort=sort
    +236        )
     237
    -238        return self.sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index)
    +238        return self.sort_abundance_kendrick_dict(
    +239            even_kendrick_group_index, odd_kendrick_group_index
    +240        )
     
    diff --git a/docs/corems/mass_spectrum/calc/MassErrorPrediction.html b/docs/corems/mass_spectrum/calc/MassErrorPrediction.html index d77c29ef..8cd2fe83 100644 --- a/docs/corems/mass_spectrum/calc/MassErrorPrediction.html +++ b/docs/corems/mass_spectrum/calc/MassErrorPrediction.html @@ -102,7 +102,7 @@

    -
      1__author__ = 'Yuri E. Corilo'
    +                        
      1__author__ = "Yuri E. Corilo"
       2__date__ = "03/31/2020"
       3
       4from threading import Thread
    @@ -110,375 +110,434 @@ 

    6from numpy import hstack, inf, isnan, where, array 7from tqdm import tqdm 8 - 9class MassErrorPrediction(Thread): - 10 """ Class for mass error prediction. - 11 - 12 Parameters - 13 ---------- - 14 mass_spectrum : list - 15 List of mass spectrum objects. - 16 mz_overlay : int, optional - 17 The mz overlay value for peak simulation. Default is 10. - 18 rp_increments : int, optional - 19 The resolving power increments for peak simulation. Default is 10000. - 20 base_line_target : float, optional - 21 The target value for the baseline resolution. Default is 0.01. - 22 max_interation : int, optional - 23 The maximum number of iterations for peak simulation. Default is 1000. - 24 interpolation : str, optional - 25 The interpolation method for missing data. Default is 'linear'. - 26 - 27 Attributes - 28 ---------- - 29 mass_spectrum_obj : list - 30 List of mass spectrum objects. - 31 mz_overlay : int - 32 The mz overlay value for peak simulation. - 33 rp_increments : int - 34 The resolving power increments for peak simulation. - 35 base_line_target : float - 36 The target value for the baseline resolution. - 37 max_interation : int - 38 The maximum number of iterations for peak simulation. - 39 df : DataFrame or None - 40 The calculated error distribution dataframe. - 41 interpolation : str - 42 The interpolation method for missing data. - 43 - 44 Methods - 45 ------- - 46 * run(). - 47 Runs the mass error prediction calculation. - 48 * get_results(). - 49 Returns the calculated error distribution dataframe. - 50 - 51 """ - 52 def __init__(self, mass_spectrum, mz_overlay=10, rp_increments=10000, - 53 base_line_target : float=0.01, max_interation=1000, interpolation='linear'): - 54 - 55 Thread.__init__(self) - 56 - 57 self.mass_spectrum_obj = mass_spectrum - 58 - 59 self.mz_overlay = mz_overlay - 60 - 61 self.rp_increments = rp_increments - 62 - 63 self.base_line_target = base_line_target + 9 + 10class MassErrorPrediction(Thread): + 11 """Class for mass error prediction. + 12 + 13 Parameters + 14 ---------- + 15 mass_spectrum : list + 16 List of mass spectrum objects. + 17 mz_overlay : int, optional + 18 The mz overlay value for peak simulation. Default is 10. + 19 rp_increments : int, optional + 20 The resolving power increments for peak simulation. Default is 10000. + 21 base_line_target : float, optional + 22 The target value for the baseline resolution. Default is 0.01. + 23 max_interation : int, optional + 24 The maximum number of iterations for peak simulation. Default is 1000. + 25 interpolation : str, optional + 26 The interpolation method for missing data. Default is 'linear'. + 27 + 28 Attributes + 29 ---------- + 30 mass_spectrum_obj : list + 31 List of mass spectrum objects. + 32 mz_overlay : int + 33 The mz overlay value for peak simulation. + 34 rp_increments : int + 35 The resolving power increments for peak simulation. + 36 base_line_target : float + 37 The target value for the baseline resolution. + 38 max_interation : int + 39 The maximum number of iterations for peak simulation. + 40 df : DataFrame or None + 41 The calculated error distribution dataframe. + 42 interpolation : str + 43 The interpolation method for missing data. + 44 + 45 Methods + 46 ------- + 47 * run(). + 48 Runs the mass error prediction calculation. + 49 * get_results(). + 50 Returns the calculated error distribution dataframe. + 51 + 52 """ + 53 + 54 def __init__( + 55 self, + 56 mass_spectrum, + 57 mz_overlay=10, + 58 rp_increments=10000, + 59 base_line_target: float = 0.01, + 60 max_interation=1000, + 61 interpolation="linear", + 62 ): + 63 Thread.__init__(self) 64 - 65 self.max_interation = max_interation + 65 self.mass_spectrum_obj = mass_spectrum 66 - 67 self.df = None + 67 self.mz_overlay = mz_overlay 68 - 69 self.interpolation = interpolation - 70 - 71 def run(self): - 72 """ Runs the mass error prediction calculation. - 73 """ - 74 self.df = self.calc_error_dist() - 75 - 76 def get_results(self): - 77 """ Returns the calculated error distribution dataframe. - 78 """ - 79 - 80 if not self.df: - 81 self.run() + 69 self.rp_increments = rp_increments + 70 + 71 self.base_line_target = base_line_target + 72 + 73 self.max_interation = max_interation + 74 + 75 self.df = None + 76 + 77 self.interpolation = interpolation + 78 + 79 def run(self): + 80 """Runs the mass error prediction calculation.""" + 81 self.df = self.calc_error_dist() 82 - 83 return self.df - 84 - 85 def calc_error_dist(self): - 86 """ Calculate the error distribution. - 87 """ - 88 results_list = [] - 89 - 90 indexes_without_results = list(range(len(self.mass_spectrum_obj))) - 91 # loop trough mass spectrum - 92 - 93 for peak_obj_idx, peak_obj in enumerate(tqdm(self.mass_spectrum_obj)): - 94 - 95 # access ms peaks triplets ( peak_obj_idx -1, peak_obj_idx, and peak_obj_idx + 1) - 96 # check lower and upper boundaries to not excesses mass spectrum range - 97 - 98 if peak_obj_idx != 0 and peak_obj_idx != len(self.mass_spectrum_obj)-1: - 99 -100 # current peak_obj initialted in the loop expression -101 # geting the peak on the left (previous_peak_obj) and the one in the right position (next_peak_obj) -102 next_peak_obj = self.mass_spectrum_obj[peak_obj_idx + 1] -103 previous_peak_obj = self.mass_spectrum_obj[peak_obj_idx - 1] -104 -105 # check mz range defined in max_mz variable and check if peaks have same nominal mz -106 # keeping same mz for better plotting representation only, remove it for production -107 if peak_obj.nominal_mz_exp == next_peak_obj.nominal_mz_exp and peak_obj.nominal_mz_exp == previous_peak_obj.nominal_mz_exp: -108 -109 #simulate peak shape -110 sim_mz, sim_abun = peak_obj.gaussian(mz_overlay=self.mz_overlay) -111 #update_plot(sim_mz,sim_abun, 0.5) -112 -113 #simulate peak shape -114 next_sim_mz, next_sim_abun = next_peak_obj.gaussian(mz_overlay=self.mz_overlay) -115 #update_plot(next_sim_mz, next_sim_abun, 0.5) -116 -117 -118 #simulate peak shape -119 previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(mz_overlay=self.mz_overlay) -120 #update_plot(previous_sim_mz, previous_sim_abun, 0.5) -121 -122 sim_mz_domain, summed_peaks_abun = self.sum_data( ((previous_sim_mz,previous_sim_abun), (sim_mz,sim_abun), (next_sim_mz, next_sim_abun)) ) -123 #update_plot(sim_mz_domain,summed_peaks_abun, 0.5) -124 -125 #sum simulated abundances -126 #summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun) -127 -128 #normalize abundances to 0-1 -129 #summed_peaks_abun = summed_peaks_abun/(max(summed_peaks_abun)) -130 -131 #find appexes location (mz) and magnitude -132 mz_centroid, abund_centroid = self.find_peak_apex(sim_mz_domain,summed_peaks_abun) -133 -134 #find valley location (mz_min_valley) and magnitude (abund_min_valley) -135 mz_min_valley, abund_min_valley = self.find_peak_valley(sim_mz_domain, summed_peaks_abun) -136 -137 # clear delta_rp (global implementation) and store choose resolving power increments -138 delta_rp = self.rp_increments -139 -140 # used to limited number of iterations -141 i = 0 -142 j = 0 -143 -144 # TODO: fit peak shape and decide best fit #gaussian, lorentz and voigt -145 #plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun ) -146 if len(mz_centroid) == 2 : -147 -148 while len(mz_centroid) < 3 and i <= self.max_interation: -149 -150 previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay) -151 -152 sim_mz, sim_abun = peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay) -153 -154 next_sim_mz, next_sim_abun = next_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay) -155 -156 sim_mz_domain, summed_peaks_abun = self.sum_data( ((previous_sim_mz,previous_sim_abun), (sim_mz,sim_abun), (next_sim_mz, next_sim_abun)) ) -157 -158 #update_plot(sim_mz_domain, summed_peaks_abun, 0.01) -159 -160 mz_centroid, abund_centroid = self.find_peak_apex(sim_mz_domain,summed_peaks_abun) + 83 def get_results(self): + 84 """Returns the calculated error distribution dataframe.""" + 85 + 86 if not self.df: + 87 self.run() + 88 + 89 return self.df + 90 + 91 def calc_error_dist(self): + 92 """Calculate the error distribution.""" + 93 results_list = [] + 94 + 95 indexes_without_results = list(range(len(self.mass_spectrum_obj))) + 96 # loop trough mass spectrum + 97 + 98 for peak_obj_idx, peak_obj in enumerate(tqdm(self.mass_spectrum_obj)): + 99 # access ms peaks triplets ( peak_obj_idx -1, peak_obj_idx, and peak_obj_idx + 1) +100 # check lower and upper boundaries to not excesses mass spectrum range +101 +102 if peak_obj_idx != 0 and peak_obj_idx != len(self.mass_spectrum_obj) - 1: +103 # current peak_obj initialted in the loop expression +104 # geting the peak on the left (previous_peak_obj) and the one in the right position (next_peak_obj) +105 next_peak_obj = self.mass_spectrum_obj[peak_obj_idx + 1] +106 previous_peak_obj = self.mass_spectrum_obj[peak_obj_idx - 1] +107 +108 # check mz range defined in max_mz variable and check if peaks have same nominal mz +109 # keeping same mz for better plotting representation only, remove it for production +110 if ( +111 peak_obj.nominal_mz_exp == next_peak_obj.nominal_mz_exp +112 and peak_obj.nominal_mz_exp == previous_peak_obj.nominal_mz_exp +113 ): +114 # simulate peak shape +115 sim_mz, sim_abun = peak_obj.gaussian(mz_overlay=self.mz_overlay) +116 # update_plot(sim_mz,sim_abun, 0.5) +117 +118 # simulate peak shape +119 next_sim_mz, next_sim_abun = next_peak_obj.gaussian( +120 mz_overlay=self.mz_overlay +121 ) +122 # update_plot(next_sim_mz, next_sim_abun, 0.5) +123 +124 # simulate peak shape +125 previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian( +126 mz_overlay=self.mz_overlay +127 ) +128 # update_plot(previous_sim_mz, previous_sim_abun, 0.5) +129 +130 sim_mz_domain, summed_peaks_abun = self.sum_data( +131 ( +132 (previous_sim_mz, previous_sim_abun), +133 (sim_mz, sim_abun), +134 (next_sim_mz, next_sim_abun), +135 ) +136 ) +137 # update_plot(sim_mz_domain,summed_peaks_abun, 0.5) +138 +139 # sum simulated abundances +140 # summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun) +141 +142 # normalize abundances to 0-1 +143 # summed_peaks_abun = summed_peaks_abun/(max(summed_peaks_abun)) +144 +145 # find appexes location (mz) and magnitude +146 mz_centroid, abund_centroid = self.find_peak_apex( +147 sim_mz_domain, summed_peaks_abun +148 ) +149 +150 # find valley location (mz_min_valley) and magnitude (abund_min_valley) +151 mz_min_valley, abund_min_valley = self.find_peak_valley( +152 sim_mz_domain, summed_peaks_abun +153 ) +154 +155 # clear delta_rp (global implementation) and store choose resolving power increments +156 delta_rp = self.rp_increments +157 +158 # used to limited number of iterations +159 i = 0 +160 j = 0 161 -162 delta_rp += self.rp_increments -163 -164 i += 1 -165 -166 mz_min_valley, abund_min_valley = self.find_peak_valley(sim_mz_domain, summed_peaks_abun) -167 -168 if len(mz_centroid) == 3 and len(abund_min_valley) == 2: -169 # increase all three peak resolving power until both valley magnitude is bellow the defined target -170 # calculate peak shapes with the needed resolving power to have a baseline resolution for all peaks -171 # calculate mass difference (ppm) between original centroid and the new simulated peak. -172 -173 while abund_min_valley[0] > self.base_line_target or abund_min_valley[1] > self.base_line_target and j <= self.max_interation: -174 -175 previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay) -176 -177 sim_mz, sim_abun = peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay) -178 -179 next_sim_mz, next_sim_abun = next_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay) -180 -181 sim_mz_domain, summed_peaks_abun = self.sum_data( ((previous_sim_mz,previous_sim_abun), (sim_mz,sim_abun), (next_sim_mz, next_sim_abun)) ) -182 -183 #update_plot(sim_mz_domain, summed_peaks_abun, 0.001) -184 -185 #summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun) -186 -187 -188 #find appexes location (mz) and magnitude -189 mz_centroid, abund_centroid = self.find_peak_apex(sim_mz_domain,summed_peaks_abun) -190 -191 #find valley location (mz_min_valley) and magnitude (abund_min_valley) -192 summed_peaks_abun = summed_peaks_abun/(summed_peaks_abun.max()) -193 mz_min_valley, abund_min_valley = self.find_peak_valley(sim_mz_domain, summed_peaks_abun) -194 -195 if len(abund_min_valley) != 2: -196 break -197 -198 delta_rp += self.rp_increments -199 j += 1 -200 -201 #plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun ) -202 -203 -204 #plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun ) -205 -206 mass_shift_ppp = self.calc_error(mz_centroid[1], peak_obj.mz_exp, 1000000) -207 #delta_mz = mz_centroid[1] - peak_obj.mz_exp -208 height_shift_per = self.calc_error(abund_centroid[1], peak_obj.abundance, 100) -209 #excitation_amplitude = str(mass_spectrum_obj.filename.stem).split("ex")[1].split("pc")[0] -210 #ion_time = str(mass_spectrum_obj.filename.stem).split("0pt")[1].split("s")[0] -211 peak_obj.predicted_std = mass_shift_ppp -212 -213 results_list.append( { -214 "ms_index_position" : peak_obj_idx, -215 "predicted_std": mass_shift_ppp, -216 "mz_exp": peak_obj.mz_exp, -217 "nominal_mz_exp": peak_obj.nominal_mz_exp, -218 "predicted_mz": mz_centroid[1], -219 "s2n" : peak_obj.signal_to_noise, -220 "peak_height" : peak_obj.abundance, -221 "predicted_peak_height" : abund_centroid[1], -222 "peak_height_error" : height_shift_per, -223 "resolving_power" : peak_obj.resolving_power, -224 #"excitation_amplitude" : excitation_amplitude, -225 #"ion_time" : ion_time -226 }) -227 -228 indexes_without_results.remove(peak_obj_idx) -229 #elif len(mz_centroid) == 3 and len(abund_min_valley) != 2: -230 -231 for peak_obj_idx in indexes_without_results: -232 -233 results_list.append( { -234 "ms_index_position" : peak_obj_idx, -235 "mz_exp": self.mass_spectrum_obj[peak_obj_idx].mz_exp, -236 "nominal_mz_exp": self.mass_spectrum_obj[peak_obj_idx].nominal_mz_exp, -237 "s2n" : self.mass_spectrum_obj[peak_obj_idx].signal_to_noise, -238 "peak_height" : self.mass_spectrum_obj[peak_obj_idx].abundance, -239 "resolving_power" : self.mass_spectrum_obj[peak_obj_idx].resolving_power, -240 #"excitation_amplitude" : excitation_amplitude, -241 #"ion_time" : ion_time -242 } ) -243 -244 df = DataFrame(results_list).sort_values("mz_exp") -245 -246 df.interpolate(method ='linear', limit_direction ='backward', inplace=True) -247 df.interpolate(method ='linear', limit_direction ='forward', inplace=True) -248 -249 #TODO improve interpolation for missing data -250 #f1 = interpolate.interp1d(x1, y1, kind='quadratic',fill_value="extrapolate") -251 -252 -253 for peak_obj_idx in indexes_without_results: -254 -255 predicted_std = df.loc[peak_obj_idx].predicted_std -256 -257 self.mass_spectrum_obj[peak_obj_idx].predicted_std = predicted_std +162 # TODO: fit peak shape and decide best fit #gaussian, lorentz and voigt +163 # plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun ) +164 if len(mz_centroid) == 2: +165 while len(mz_centroid) < 3 and i <= self.max_interation: +166 previous_sim_mz, previous_sim_abun = ( +167 previous_peak_obj.gaussian( +168 delta_rp=delta_rp, mz_overlay=self.mz_overlay +169 ) +170 ) +171 +172 sim_mz, sim_abun = peak_obj.gaussian( +173 delta_rp=delta_rp, mz_overlay=self.mz_overlay +174 ) +175 +176 next_sim_mz, next_sim_abun = next_peak_obj.gaussian( +177 delta_rp=delta_rp, mz_overlay=self.mz_overlay +178 ) +179 +180 sim_mz_domain, summed_peaks_abun = self.sum_data( +181 ( +182 (previous_sim_mz, previous_sim_abun), +183 (sim_mz, sim_abun), +184 (next_sim_mz, next_sim_abun), +185 ) +186 ) +187 +188 # update_plot(sim_mz_domain, summed_peaks_abun, 0.01) +189 +190 mz_centroid, abund_centroid = self.find_peak_apex( +191 sim_mz_domain, summed_peaks_abun +192 ) +193 +194 delta_rp += self.rp_increments +195 +196 i += 1 +197 +198 mz_min_valley, abund_min_valley = self.find_peak_valley( +199 sim_mz_domain, summed_peaks_abun +200 ) +201 +202 if len(mz_centroid) == 3 and len(abund_min_valley) == 2: +203 # increase all three peak resolving power until both valley magnitude is bellow the defined target +204 # calculate peak shapes with the needed resolving power to have a baseline resolution for all peaks +205 # calculate mass difference (ppm) between original centroid and the new simulated peak. +206 +207 while ( +208 abund_min_valley[0] > self.base_line_target +209 or abund_min_valley[1] > self.base_line_target +210 and j <= self.max_interation +211 ): +212 previous_sim_mz, previous_sim_abun = ( +213 previous_peak_obj.gaussian( +214 delta_rp=delta_rp, mz_overlay=self.mz_overlay +215 ) +216 ) +217 +218 sim_mz, sim_abun = peak_obj.gaussian( +219 delta_rp=delta_rp, mz_overlay=self.mz_overlay +220 ) +221 +222 next_sim_mz, next_sim_abun = next_peak_obj.gaussian( +223 delta_rp=delta_rp, mz_overlay=self.mz_overlay +224 ) +225 +226 sim_mz_domain, summed_peaks_abun = self.sum_data( +227 ( +228 (previous_sim_mz, previous_sim_abun), +229 (sim_mz, sim_abun), +230 (next_sim_mz, next_sim_abun), +231 ) +232 ) +233 +234 # update_plot(sim_mz_domain, summed_peaks_abun, 0.001) +235 +236 # summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun) +237 +238 # find appexes location (mz) and magnitude +239 mz_centroid, abund_centroid = self.find_peak_apex( +240 sim_mz_domain, summed_peaks_abun +241 ) +242 +243 # find valley location (mz_min_valley) and magnitude (abund_min_valley) +244 summed_peaks_abun = summed_peaks_abun / ( +245 summed_peaks_abun.max() +246 ) +247 mz_min_valley, abund_min_valley = self.find_peak_valley( +248 sim_mz_domain, summed_peaks_abun +249 ) +250 +251 if len(abund_min_valley) != 2: +252 break +253 +254 delta_rp += self.rp_increments +255 j += 1 +256 +257 # plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun ) 258 -259 return df +259 # plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun ) 260 -261 def sum_data(self, tuple_mz_abun_list : tuple): -262 """ Sum the abundances of the simulated peaks. -263 -264 Parameters -265 ------ -266 tuple_mz_abun_list : tuple -267 A tuple containing the mz and abundance lists. -268 -269 Returns -270 ------- -271 tuple -272 A tuple containing the summed mz and abundance lists. -273 -274 """ -275 all_mz = {} -276 -277 for mz_list, abun_list in tuple_mz_abun_list: -278 -279 for index, mz in enumerate(mz_list): -280 -281 abundance = abun_list[index] -282 -283 if mz in all_mz: -284 all_mz[mz] = all_mz[mz] + abundance -285 else: -286 all_mz[mz] = abundance -287 -288 mz_all = [] -289 abun_all = [] -290 -291 for mz in sorted (all_mz) : -292 mz_all.append(mz) -293 abun_all.append(all_mz[mz]) -294 -295 return array(mz_all), array(abun_all) -296 -297 def calc_error(self, mass_ref, mass_sim, factor): -298 """ Calculate the error between two values. -299 -300 Parameters -301 ---------- -302 mass_ref : float -303 The reference value. -304 mass_sim : float -305 The simulated value. -306 factor : float -307 The factor to multiply the error by. -308 -309 Returns -310 ------- -311 float -312 The calculated error. -313 +261 mass_shift_ppp = self.calc_error( +262 mz_centroid[1], peak_obj.mz_exp, 1000000 +263 ) +264 # delta_mz = mz_centroid[1] - peak_obj.mz_exp +265 height_shift_per = self.calc_error( +266 abund_centroid[1], peak_obj.abundance, 100 +267 ) +268 # excitation_amplitude = str(mass_spectrum_obj.filename.stem).split("ex")[1].split("pc")[0] +269 # ion_time = str(mass_spectrum_obj.filename.stem).split("0pt")[1].split("s")[0] +270 peak_obj.predicted_std = mass_shift_ppp +271 +272 results_list.append( +273 { +274 "ms_index_position": peak_obj_idx, +275 "predicted_std": mass_shift_ppp, +276 "mz_exp": peak_obj.mz_exp, +277 "nominal_mz_exp": peak_obj.nominal_mz_exp, +278 "predicted_mz": mz_centroid[1], +279 "s2n": peak_obj.signal_to_noise, +280 "peak_height": peak_obj.abundance, +281 "predicted_peak_height": abund_centroid[1], +282 "peak_height_error": height_shift_per, +283 "resolving_power": peak_obj.resolving_power, +284 # "excitation_amplitude" : excitation_amplitude, +285 # "ion_time" : ion_time +286 } +287 ) +288 +289 indexes_without_results.remove(peak_obj_idx) +290 # elif len(mz_centroid) == 3 and len(abund_min_valley) != 2: +291 +292 for peak_obj_idx in indexes_without_results: +293 results_list.append( +294 { +295 "ms_index_position": peak_obj_idx, +296 "mz_exp": self.mass_spectrum_obj[peak_obj_idx].mz_exp, +297 "nominal_mz_exp": self.mass_spectrum_obj[ +298 peak_obj_idx +299 ].nominal_mz_exp, +300 "s2n": self.mass_spectrum_obj[peak_obj_idx].signal_to_noise, +301 "peak_height": self.mass_spectrum_obj[peak_obj_idx].abundance, +302 "resolving_power": self.mass_spectrum_obj[ +303 peak_obj_idx +304 ].resolving_power, +305 # "excitation_amplitude" : excitation_amplitude, +306 # "ion_time" : ion_time +307 } +308 ) +309 +310 df = DataFrame(results_list).sort_values("mz_exp") +311 +312 df.interpolate(method="linear", limit_direction="backward", inplace=True) +313 df.interpolate(method="linear", limit_direction="forward", inplace=True) 314 -315 """ -316 return (mass_sim-mass_ref/mass_ref)*factor +315 # TODO improve interpolation for missing data +316 # f1 = interpolate.interp1d(x1, y1, kind='quadratic',fill_value="extrapolate") 317 -318 def find_peak_apex(self, mz, abund): -319 """ Find the peak apex. +318 for peak_obj_idx in indexes_without_results: +319 predicted_std = df.loc[peak_obj_idx].predicted_std 320 -321 Parameters -322 ------ -323 mz : array -324 The mz array. -325 abund : array -326 The abundance array. +321 self.mass_spectrum_obj[peak_obj_idx].predicted_std = predicted_std +322 +323 return df +324 +325 def sum_data(self, tuple_mz_abun_list: tuple): +326 """Sum the abundances of the simulated peaks. 327 -328 Returns -329 ------- -330 tuple -331 A tuple containing the peak apex mass and abundance. +328 Parameters +329 ------ +330 tuple_mz_abun_list : tuple +331 A tuple containing the mz and abundance lists. 332 -333 """ -334 dy = abund[1:] - abund[:-1] -335 -336 #replaces nan for infinity''' -337 indices_nan = where(isnan(abund))[0] -338 -339 if indices_nan.size: -340 -341 abund[indices_nan] = inf -342 dy[where(isnan(dy))[0]] = inf -343 -344 indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0] -345 -346 if indexes.size: -347 -348 return mz[indexes], abund[indexes] +333 Returns +334 ------- +335 tuple +336 A tuple containing the summed mz and abundance lists. +337 +338 """ +339 all_mz = {} +340 +341 for mz_list, abun_list in tuple_mz_abun_list: +342 for index, mz in enumerate(mz_list): +343 abundance = abun_list[index] +344 +345 if mz in all_mz: +346 all_mz[mz] = all_mz[mz] + abundance +347 else: +348 all_mz[mz] = abundance 349 -350 def find_peak_valley(self, mz, abund): -351 """ Find the peak valley. +350 mz_all = [] +351 abun_all = [] 352 -353 Parameters -354 ------ -355 mz : array -356 The mz array. -357 abund : array -358 The abundance array. -359 -360 Returns -361 ------- -362 tuple -363 A tuple containing the peak valley mz and abundance. -364 """ -365 dy = abund[1:] - abund[:-1] -366 -367 #replaces nan for infinity -368 indices_nan = where(isnan(abund))[0] -369 -370 if indices_nan.size: -371 -372 abund[indices_nan] = inf -373 dy[where(isnan(dy))[0]] = inf -374 -375 indexes = where((hstack((dy, 0)) > 0) & (hstack((0, dy)) < 0))[0] +353 for mz in sorted(all_mz): +354 mz_all.append(mz) +355 abun_all.append(all_mz[mz]) +356 +357 return array(mz_all), array(abun_all) +358 +359 def calc_error(self, mass_ref, mass_sim, factor): +360 """Calculate the error between two values. +361 +362 Parameters +363 ---------- +364 mass_ref : float +365 The reference value. +366 mass_sim : float +367 The simulated value. +368 factor : float +369 The factor to multiply the error by. +370 +371 Returns +372 ------- +373 float +374 The calculated error. +375 376 -377 return mz[indexes], abund[indexes] +377 """ +378 return (mass_sim - mass_ref / mass_ref) * factor +379 +380 def find_peak_apex(self, mz, abund): +381 """Find the peak apex. +382 +383 Parameters +384 ------ +385 mz : array +386 The mz array. +387 abund : array +388 The abundance array. +389 +390 Returns +391 ------- +392 tuple +393 A tuple containing the peak apex mass and abundance. +394 +395 """ +396 dy = abund[1:] - abund[:-1] +397 +398 # replaces nan for infinity''' +399 indices_nan = where(isnan(abund))[0] +400 +401 if indices_nan.size: +402 abund[indices_nan] = inf +403 dy[where(isnan(dy))[0]] = inf +404 +405 indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0] +406 +407 if indexes.size: +408 return mz[indexes], abund[indexes] +409 +410 def find_peak_valley(self, mz, abund): +411 """Find the peak valley. +412 +413 Parameters +414 ------ +415 mz : array +416 The mz array. +417 abund : array +418 The abundance array. +419 +420 Returns +421 ------- +422 tuple +423 A tuple containing the peak valley mz and abundance. +424 """ +425 dy = abund[1:] - abund[:-1] +426 +427 # replaces nan for infinity +428 indices_nan = where(isnan(abund))[0] +429 +430 if indices_nan.size: +431 abund[indices_nan] = inf +432 dy[where(isnan(dy))[0]] = inf +433 +434 indexes = where((hstack((dy, 0)) > 0) & (hstack((0, dy)) < 0))[0] +435 +436 return mz[indexes], abund[indexes]

    @@ -494,375 +553,433 @@

    -
     10class MassErrorPrediction(Thread):
    - 11    """ Class for mass error prediction.
    - 12
    - 13    Parameters
    - 14    ----------
    - 15    mass_spectrum : list
    - 16        List of mass spectrum objects.
    - 17    mz_overlay : int, optional
    - 18        The mz overlay value for peak simulation. Default is 10.
    - 19    rp_increments : int, optional
    - 20        The resolving power increments for peak simulation. Default is 10000.
    - 21    base_line_target : float, optional
    - 22        The target value for the baseline resolution. Default is 0.01.
    - 23    max_interation : int, optional
    - 24        The maximum number of iterations for peak simulation. Default is 1000.
    - 25    interpolation : str, optional
    - 26        The interpolation method for missing data. Default is 'linear'.
    - 27
    - 28    Attributes
    - 29    ----------
    - 30    mass_spectrum_obj : list
    - 31        List of mass spectrum objects.
    - 32    mz_overlay : int
    - 33        The mz overlay value for peak simulation.
    - 34    rp_increments : int
    - 35        The resolving power increments for peak simulation.
    - 36    base_line_target : float
    - 37        The target value for the baseline resolution.
    - 38    max_interation : int
    - 39        The maximum number of iterations for peak simulation.
    - 40    df : DataFrame or None
    - 41        The calculated error distribution dataframe.
    - 42    interpolation : str
    - 43        The interpolation method for missing data.
    - 44
    - 45    Methods
    - 46    -------
    - 47    * run().
    - 48        Runs the mass error prediction calculation.
    - 49    * get_results().
    - 50        Returns the calculated error distribution dataframe.
    - 51
    - 52    """
    - 53    def __init__(self, mass_spectrum, mz_overlay=10, rp_increments=10000, 
    - 54                 base_line_target : float=0.01, max_interation=1000, interpolation='linear'):
    - 55        
    - 56        Thread.__init__(self)
    - 57        
    - 58        self.mass_spectrum_obj = mass_spectrum
    - 59
    - 60        self.mz_overlay = mz_overlay
    - 61
    - 62        self.rp_increments = rp_increments
    - 63
    - 64        self.base_line_target = base_line_target 
    +            
     11class MassErrorPrediction(Thread):
    + 12    """Class for mass error prediction.
    + 13
    + 14    Parameters
    + 15    ----------
    + 16    mass_spectrum : list
    + 17        List of mass spectrum objects.
    + 18    mz_overlay : int, optional
    + 19        The mz overlay value for peak simulation. Default is 10.
    + 20    rp_increments : int, optional
    + 21        The resolving power increments for peak simulation. Default is 10000.
    + 22    base_line_target : float, optional
    + 23        The target value for the baseline resolution. Default is 0.01.
    + 24    max_interation : int, optional
    + 25        The maximum number of iterations for peak simulation. Default is 1000.
    + 26    interpolation : str, optional
    + 27        The interpolation method for missing data. Default is 'linear'.
    + 28
    + 29    Attributes
    + 30    ----------
    + 31    mass_spectrum_obj : list
    + 32        List of mass spectrum objects.
    + 33    mz_overlay : int
    + 34        The mz overlay value for peak simulation.
    + 35    rp_increments : int
    + 36        The resolving power increments for peak simulation.
    + 37    base_line_target : float
    + 38        The target value for the baseline resolution.
    + 39    max_interation : int
    + 40        The maximum number of iterations for peak simulation.
    + 41    df : DataFrame or None
    + 42        The calculated error distribution dataframe.
    + 43    interpolation : str
    + 44        The interpolation method for missing data.
    + 45
    + 46    Methods
    + 47    -------
    + 48    * run().
    + 49        Runs the mass error prediction calculation.
    + 50    * get_results().
    + 51        Returns the calculated error distribution dataframe.
    + 52
    + 53    """
    + 54
    + 55    def __init__(
    + 56        self,
    + 57        mass_spectrum,
    + 58        mz_overlay=10,
    + 59        rp_increments=10000,
    + 60        base_line_target: float = 0.01,
    + 61        max_interation=1000,
    + 62        interpolation="linear",
    + 63    ):
    + 64        Thread.__init__(self)
      65
    - 66        self.max_interation = max_interation
    + 66        self.mass_spectrum_obj = mass_spectrum
      67
    - 68        self.df = None
    + 68        self.mz_overlay = mz_overlay
      69
    - 70        self.interpolation = interpolation
    - 71    
    - 72    def run(self):
    - 73        """ Runs the mass error prediction calculation.
    - 74        """    
    - 75        self.df = self.calc_error_dist()
    - 76
    - 77    def get_results(self):
    - 78        """ Returns the calculated error distribution dataframe.
    - 79        """
    - 80
    - 81        if not self.df:
    - 82            self.run()
    + 70        self.rp_increments = rp_increments
    + 71
    + 72        self.base_line_target = base_line_target
    + 73
    + 74        self.max_interation = max_interation
    + 75
    + 76        self.df = None
    + 77
    + 78        self.interpolation = interpolation
    + 79
    + 80    def run(self):
    + 81        """Runs the mass error prediction calculation."""
    + 82        self.df = self.calc_error_dist()
      83
    - 84        return self.df
    - 85
    - 86    def calc_error_dist(self):
    - 87        """ Calculate the error distribution.
    - 88        """
    - 89        results_list = []
    - 90        
    - 91        indexes_without_results = list(range(len(self.mass_spectrum_obj)))
    - 92        # loop trough mass spectrum
    - 93
    - 94        for peak_obj_idx, peak_obj in enumerate(tqdm(self.mass_spectrum_obj)):
    - 95            
    - 96            # access ms peaks triplets ( peak_obj_idx -1, peak_obj_idx, and peak_obj_idx + 1)
    - 97            # check lower and upper boundaries to not excesses mass spectrum range
    - 98            
    - 99            if  peak_obj_idx != 0 and peak_obj_idx != len(self.mass_spectrum_obj)-1:
    -100                
    -101                # current peak_obj initialted in the loop expression
    -102                # geting the peak on the left (previous_peak_obj) and the one in the right position (next_peak_obj)
    -103                next_peak_obj = self.mass_spectrum_obj[peak_obj_idx + 1]
    -104                previous_peak_obj = self.mass_spectrum_obj[peak_obj_idx - 1]
    -105                
    -106                # check mz range defined in max_mz variable and check if peaks have same nominal mz
    -107                # keeping same mz for better plotting representation only, remove it for production
    -108                if  peak_obj.nominal_mz_exp == next_peak_obj.nominal_mz_exp and peak_obj.nominal_mz_exp == previous_peak_obj.nominal_mz_exp:
    -109                    
    -110                    #simulate peak shape
    -111                    sim_mz, sim_abun = peak_obj.gaussian(mz_overlay=self.mz_overlay)
    -112                    #update_plot(sim_mz,sim_abun, 0.5)
    -113                    
    -114                    #simulate peak shape
    -115                    next_sim_mz, next_sim_abun = next_peak_obj.gaussian(mz_overlay=self.mz_overlay)
    -116                    #update_plot(next_sim_mz, next_sim_abun, 0.5)
    -117                    
    -118                    
    -119                    #simulate peak shape
    -120                    previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(mz_overlay=self.mz_overlay)
    -121                    #update_plot(previous_sim_mz,  previous_sim_abun, 0.5)
    -122                    
    -123                    sim_mz_domain,  summed_peaks_abun = self.sum_data( ((previous_sim_mz,previous_sim_abun),  (sim_mz,sim_abun), (next_sim_mz, next_sim_abun)) )
    -124                    #update_plot(sim_mz_domain,summed_peaks_abun, 0.5)
    -125                    
    -126                    #sum simulated abundances 
    -127                    #summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun) 
    -128                    
    -129                    #normalize abundances to 0-1
    -130                    #summed_peaks_abun = summed_peaks_abun/(max(summed_peaks_abun))
    -131
    -132                    #find appexes location (mz) and magnitude
    -133                    mz_centroid, abund_centroid = self.find_peak_apex(sim_mz_domain,summed_peaks_abun)    
    -134
    -135                    #find valley location (mz_min_valley) and magnitude (abund_min_valley)
    -136                    mz_min_valley, abund_min_valley = self.find_peak_valley(sim_mz_domain, summed_peaks_abun)  
    -137
    -138                    # clear delta_rp (global implementation) and store choose resolving power increments   
    -139                    delta_rp = self.rp_increments
    -140                    
    -141                    # used to limited number of iterations
    -142                    i = 0
    -143                    j = 0
    -144                    
    -145                    # TODO: fit peak shape and decide best fit #gaussian, lorentz and voigt 
    -146                    #plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
    -147                    if len(mz_centroid) == 2 :
    -148                            
    -149                        while len(mz_centroid) < 3 and i <= self.max_interation:
    -150                            
    -151                            previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -152                            
    -153                            sim_mz, sim_abun = peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -154                            
    -155                            next_sim_mz, next_sim_abun = next_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -156
    -157                            sim_mz_domain,  summed_peaks_abun = self.sum_data( ((previous_sim_mz,previous_sim_abun),  (sim_mz,sim_abun), (next_sim_mz, next_sim_abun)) )
    -158                            
    -159                            #update_plot(sim_mz_domain,  summed_peaks_abun, 0.01)
    -160
    -161                            mz_centroid, abund_centroid = self.find_peak_apex(sim_mz_domain,summed_peaks_abun)    
    + 84    def get_results(self):
    + 85        """Returns the calculated error distribution dataframe."""
    + 86
    + 87        if not self.df:
    + 88            self.run()
    + 89
    + 90        return self.df
    + 91
    + 92    def calc_error_dist(self):
    + 93        """Calculate the error distribution."""
    + 94        results_list = []
    + 95
    + 96        indexes_without_results = list(range(len(self.mass_spectrum_obj)))
    + 97        # loop trough mass spectrum
    + 98
    + 99        for peak_obj_idx, peak_obj in enumerate(tqdm(self.mass_spectrum_obj)):
    +100            # access ms peaks triplets ( peak_obj_idx -1, peak_obj_idx, and peak_obj_idx + 1)
    +101            # check lower and upper boundaries to not excesses mass spectrum range
    +102
    +103            if peak_obj_idx != 0 and peak_obj_idx != len(self.mass_spectrum_obj) - 1:
    +104                # current peak_obj initialted in the loop expression
    +105                # geting the peak on the left (previous_peak_obj) and the one in the right position (next_peak_obj)
    +106                next_peak_obj = self.mass_spectrum_obj[peak_obj_idx + 1]
    +107                previous_peak_obj = self.mass_spectrum_obj[peak_obj_idx - 1]
    +108
    +109                # check mz range defined in max_mz variable and check if peaks have same nominal mz
    +110                # keeping same mz for better plotting representation only, remove it for production
    +111                if (
    +112                    peak_obj.nominal_mz_exp == next_peak_obj.nominal_mz_exp
    +113                    and peak_obj.nominal_mz_exp == previous_peak_obj.nominal_mz_exp
    +114                ):
    +115                    # simulate peak shape
    +116                    sim_mz, sim_abun = peak_obj.gaussian(mz_overlay=self.mz_overlay)
    +117                    # update_plot(sim_mz,sim_abun, 0.5)
    +118
    +119                    # simulate peak shape
    +120                    next_sim_mz, next_sim_abun = next_peak_obj.gaussian(
    +121                        mz_overlay=self.mz_overlay
    +122                    )
    +123                    # update_plot(next_sim_mz, next_sim_abun, 0.5)
    +124
    +125                    # simulate peak shape
    +126                    previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(
    +127                        mz_overlay=self.mz_overlay
    +128                    )
    +129                    # update_plot(previous_sim_mz,  previous_sim_abun, 0.5)
    +130
    +131                    sim_mz_domain, summed_peaks_abun = self.sum_data(
    +132                        (
    +133                            (previous_sim_mz, previous_sim_abun),
    +134                            (sim_mz, sim_abun),
    +135                            (next_sim_mz, next_sim_abun),
    +136                        )
    +137                    )
    +138                    # update_plot(sim_mz_domain,summed_peaks_abun, 0.5)
    +139
    +140                    # sum simulated abundances
    +141                    # summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun)
    +142
    +143                    # normalize abundances to 0-1
    +144                    # summed_peaks_abun = summed_peaks_abun/(max(summed_peaks_abun))
    +145
    +146                    # find appexes location (mz) and magnitude
    +147                    mz_centroid, abund_centroid = self.find_peak_apex(
    +148                        sim_mz_domain, summed_peaks_abun
    +149                    )
    +150
    +151                    # find valley location (mz_min_valley) and magnitude (abund_min_valley)
    +152                    mz_min_valley, abund_min_valley = self.find_peak_valley(
    +153                        sim_mz_domain, summed_peaks_abun
    +154                    )
    +155
    +156                    # clear delta_rp (global implementation) and store choose resolving power increments
    +157                    delta_rp = self.rp_increments
    +158
    +159                    # used to limited number of iterations
    +160                    i = 0
    +161                    j = 0
     162
    -163                            delta_rp += self.rp_increments
    -164                            
    -165                            i += 1
    -166
    -167                        mz_min_valley, abund_min_valley = self.find_peak_valley(sim_mz_domain, summed_peaks_abun)      
    -168
    -169                    if len(mz_centroid) == 3 and len(abund_min_valley) == 2:
    -170                        # increase all three peak resolving power until both valley magnitude is bellow the defined target
    -171                        # calculate peak shapes with the needed resolving power to have a baseline resolution for all peaks
    -172                        # calculate mass difference (ppm) between original centroid and the new simulated peak. 
    -173                        
    -174                        while  abund_min_valley[0] > self.base_line_target or abund_min_valley[1] > self.base_line_target and j <= self.max_interation:
    -175                            
    -176                            previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -177                            
    -178                            sim_mz, sim_abun = peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -179                            
    -180                            next_sim_mz, next_sim_abun = next_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -181
    -182                            sim_mz_domain,  summed_peaks_abun = self.sum_data( ((previous_sim_mz,previous_sim_abun),  (sim_mz,sim_abun), (next_sim_mz, next_sim_abun)) )
    -183                            
    -184                            #update_plot(sim_mz_domain,  summed_peaks_abun, 0.001)
    -185                            
    -186                            #summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun) 
    -187                            
    -188                            
    -189                            #find appexes location (mz) and magnitude
    -190                            mz_centroid, abund_centroid = self.find_peak_apex(sim_mz_domain,summed_peaks_abun)    
    -191                            
    -192                            #find valley location (mz_min_valley) and magnitude (abund_min_valley)
    -193                            summed_peaks_abun = summed_peaks_abun/(summed_peaks_abun.max())
    -194                            mz_min_valley, abund_min_valley = self.find_peak_valley(sim_mz_domain, summed_peaks_abun)  
    -195
    -196                            if len(abund_min_valley) != 2:
    -197                                break
    -198                            
    -199                            delta_rp += self.rp_increments
    -200                            j += 1
    -201                            
    -202                            #plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
    -203                        
    -204                        
    -205                        #plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
    -206
    -207                        mass_shift_ppp = self.calc_error(mz_centroid[1], peak_obj.mz_exp, 1000000)
    -208                        #delta_mz = mz_centroid[1] - peak_obj.mz_exp
    -209                        height_shift_per = self.calc_error(abund_centroid[1], peak_obj.abundance, 100)
    -210                        #excitation_amplitude = str(mass_spectrum_obj.filename.stem).split("ex")[1].split("pc")[0]
    -211                        #ion_time = str(mass_spectrum_obj.filename.stem).split("0pt")[1].split("s")[0]
    -212                        peak_obj.predicted_std = mass_shift_ppp
    -213                        
    -214                        results_list.append( {
    -215                        "ms_index_position" : peak_obj_idx,
    -216                        "predicted_std": mass_shift_ppp,
    -217                        "mz_exp": peak_obj.mz_exp,
    -218                        "nominal_mz_exp": peak_obj.nominal_mz_exp,
    -219                        "predicted_mz": mz_centroid[1],
    -220                        "s2n" : peak_obj.signal_to_noise,
    -221                        "peak_height" : peak_obj.abundance,
    -222                        "predicted_peak_height" : abund_centroid[1],
    -223                        "peak_height_error" : height_shift_per,
    -224                        "resolving_power" : peak_obj.resolving_power,
    -225                        #"excitation_amplitude" : excitation_amplitude,
    -226                        #"ion_time" : ion_time
    -227                        })
    -228                        
    -229                        indexes_without_results.remove(peak_obj_idx)
    -230                    #elif len(mz_centroid) == 3 and len(abund_min_valley) != 2:
    -231
    -232        for peak_obj_idx in indexes_without_results:
    -233
    -234            results_list.append( {
    -235            "ms_index_position" : peak_obj_idx,
    -236            "mz_exp": self.mass_spectrum_obj[peak_obj_idx].mz_exp,
    -237            "nominal_mz_exp": self.mass_spectrum_obj[peak_obj_idx].nominal_mz_exp,
    -238            "s2n" : self.mass_spectrum_obj[peak_obj_idx].signal_to_noise,
    -239            "peak_height" : self.mass_spectrum_obj[peak_obj_idx].abundance,
    -240            "resolving_power" : self.mass_spectrum_obj[peak_obj_idx].resolving_power,
    -241            #"excitation_amplitude" : excitation_amplitude,
    -242            #"ion_time" : ion_time
    -243            } )
    -244
    -245        df = DataFrame(results_list).sort_values("mz_exp")
    -246        
    -247        df.interpolate(method ='linear', limit_direction ='backward',  inplace=True)
    -248        df.interpolate(method ='linear', limit_direction ='forward',  inplace=True)
    -249
    -250        #TODO improve interpolation for missing data
    -251        #f1 = interpolate.interp1d(x1, y1, kind='quadratic',fill_value="extrapolate")
    -252
    -253        
    -254        for peak_obj_idx in indexes_without_results:
    -255
    -256            predicted_std = df.loc[peak_obj_idx].predicted_std
    -257            
    -258            self.mass_spectrum_obj[peak_obj_idx].predicted_std = predicted_std
    +163                    # TODO: fit peak shape and decide best fit #gaussian, lorentz and voigt
    +164                    # plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
    +165                    if len(mz_centroid) == 2:
    +166                        while len(mz_centroid) < 3 and i <= self.max_interation:
    +167                            previous_sim_mz, previous_sim_abun = (
    +168                                previous_peak_obj.gaussian(
    +169                                    delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +170                                )
    +171                            )
    +172
    +173                            sim_mz, sim_abun = peak_obj.gaussian(
    +174                                delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +175                            )
    +176
    +177                            next_sim_mz, next_sim_abun = next_peak_obj.gaussian(
    +178                                delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +179                            )
    +180
    +181                            sim_mz_domain, summed_peaks_abun = self.sum_data(
    +182                                (
    +183                                    (previous_sim_mz, previous_sim_abun),
    +184                                    (sim_mz, sim_abun),
    +185                                    (next_sim_mz, next_sim_abun),
    +186                                )
    +187                            )
    +188
    +189                            # update_plot(sim_mz_domain,  summed_peaks_abun, 0.01)
    +190
    +191                            mz_centroid, abund_centroid = self.find_peak_apex(
    +192                                sim_mz_domain, summed_peaks_abun
    +193                            )
    +194
    +195                            delta_rp += self.rp_increments
    +196
    +197                            i += 1
    +198
    +199                        mz_min_valley, abund_min_valley = self.find_peak_valley(
    +200                            sim_mz_domain, summed_peaks_abun
    +201                        )
    +202
    +203                    if len(mz_centroid) == 3 and len(abund_min_valley) == 2:
    +204                        # increase all three peak resolving power until both valley magnitude is bellow the defined target
    +205                        # calculate peak shapes with the needed resolving power to have a baseline resolution for all peaks
    +206                        # calculate mass difference (ppm) between original centroid and the new simulated peak.
    +207
    +208                        while (
    +209                            abund_min_valley[0] > self.base_line_target
    +210                            or abund_min_valley[1] > self.base_line_target
    +211                            and j <= self.max_interation
    +212                        ):
    +213                            previous_sim_mz, previous_sim_abun = (
    +214                                previous_peak_obj.gaussian(
    +215                                    delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +216                                )
    +217                            )
    +218
    +219                            sim_mz, sim_abun = peak_obj.gaussian(
    +220                                delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +221                            )
    +222
    +223                            next_sim_mz, next_sim_abun = next_peak_obj.gaussian(
    +224                                delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +225                            )
    +226
    +227                            sim_mz_domain, summed_peaks_abun = self.sum_data(
    +228                                (
    +229                                    (previous_sim_mz, previous_sim_abun),
    +230                                    (sim_mz, sim_abun),
    +231                                    (next_sim_mz, next_sim_abun),
    +232                                )
    +233                            )
    +234
    +235                            # update_plot(sim_mz_domain,  summed_peaks_abun, 0.001)
    +236
    +237                            # summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun)
    +238
    +239                            # find appexes location (mz) and magnitude
    +240                            mz_centroid, abund_centroid = self.find_peak_apex(
    +241                                sim_mz_domain, summed_peaks_abun
    +242                            )
    +243
    +244                            # find valley location (mz_min_valley) and magnitude (abund_min_valley)
    +245                            summed_peaks_abun = summed_peaks_abun / (
    +246                                summed_peaks_abun.max()
    +247                            )
    +248                            mz_min_valley, abund_min_valley = self.find_peak_valley(
    +249                                sim_mz_domain, summed_peaks_abun
    +250                            )
    +251
    +252                            if len(abund_min_valley) != 2:
    +253                                break
    +254
    +255                            delta_rp += self.rp_increments
    +256                            j += 1
    +257
    +258                            # plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
     259
    -260        return df
    +260                        # plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
     261
    -262    def sum_data(self, tuple_mz_abun_list : tuple):
    -263        """ Sum the abundances of the simulated peaks.
    -264
    -265        Parameters
    -266        ------
    -267        tuple_mz_abun_list : tuple
    -268            A tuple containing the mz and abundance lists.
    -269        
    -270        Returns
    -271        -------
    -272        tuple
    -273            A tuple containing the summed mz and abundance lists.
    -274        
    -275        """
    -276        all_mz = {}
    -277
    -278        for mz_list, abun_list in tuple_mz_abun_list:
    -279            
    -280            for index, mz in enumerate(mz_list):
    -281
    -282                abundance = abun_list[index]
    -283
    -284                if mz in all_mz:
    -285                    all_mz[mz] = all_mz[mz] + abundance    
    -286                else: 
    -287                    all_mz[mz] = abundance
    -288        
    -289        mz_all = []
    -290        abun_all = []
    -291
    -292        for mz in sorted (all_mz) : 
    -293            mz_all.append(mz)
    -294            abun_all.append(all_mz[mz])
    -295
    -296        return array(mz_all), array(abun_all)    
    -297
    -298    def calc_error(self, mass_ref, mass_sim, factor):
    -299        """ Calculate the error between two values.
    -300
    -301        Parameters
    -302        ----------
    -303        mass_ref : float
    -304            The reference value.
    -305        mass_sim : float
    -306            The simulated value.
    -307        factor : float
    -308            The factor to multiply the error by.
    -309
    -310        Returns
    -311        -------
    -312        float
    -313            The calculated error.
    -314             
    +262                        mass_shift_ppp = self.calc_error(
    +263                            mz_centroid[1], peak_obj.mz_exp, 1000000
    +264                        )
    +265                        # delta_mz = mz_centroid[1] - peak_obj.mz_exp
    +266                        height_shift_per = self.calc_error(
    +267                            abund_centroid[1], peak_obj.abundance, 100
    +268                        )
    +269                        # excitation_amplitude = str(mass_spectrum_obj.filename.stem).split("ex")[1].split("pc")[0]
    +270                        # ion_time = str(mass_spectrum_obj.filename.stem).split("0pt")[1].split("s")[0]
    +271                        peak_obj.predicted_std = mass_shift_ppp
    +272
    +273                        results_list.append(
    +274                            {
    +275                                "ms_index_position": peak_obj_idx,
    +276                                "predicted_std": mass_shift_ppp,
    +277                                "mz_exp": peak_obj.mz_exp,
    +278                                "nominal_mz_exp": peak_obj.nominal_mz_exp,
    +279                                "predicted_mz": mz_centroid[1],
    +280                                "s2n": peak_obj.signal_to_noise,
    +281                                "peak_height": peak_obj.abundance,
    +282                                "predicted_peak_height": abund_centroid[1],
    +283                                "peak_height_error": height_shift_per,
    +284                                "resolving_power": peak_obj.resolving_power,
    +285                                # "excitation_amplitude" : excitation_amplitude,
    +286                                # "ion_time" : ion_time
    +287                            }
    +288                        )
    +289
    +290                        indexes_without_results.remove(peak_obj_idx)
    +291                    # elif len(mz_centroid) == 3 and len(abund_min_valley) != 2:
    +292
    +293        for peak_obj_idx in indexes_without_results:
    +294            results_list.append(
    +295                {
    +296                    "ms_index_position": peak_obj_idx,
    +297                    "mz_exp": self.mass_spectrum_obj[peak_obj_idx].mz_exp,
    +298                    "nominal_mz_exp": self.mass_spectrum_obj[
    +299                        peak_obj_idx
    +300                    ].nominal_mz_exp,
    +301                    "s2n": self.mass_spectrum_obj[peak_obj_idx].signal_to_noise,
    +302                    "peak_height": self.mass_spectrum_obj[peak_obj_idx].abundance,
    +303                    "resolving_power": self.mass_spectrum_obj[
    +304                        peak_obj_idx
    +305                    ].resolving_power,
    +306                    # "excitation_amplitude" : excitation_amplitude,
    +307                    # "ion_time" : ion_time
    +308                }
    +309            )
    +310
    +311        df = DataFrame(results_list).sort_values("mz_exp")
    +312
    +313        df.interpolate(method="linear", limit_direction="backward", inplace=True)
    +314        df.interpolate(method="linear", limit_direction="forward", inplace=True)
     315
    -316        """
    -317        return (mass_sim-mass_ref/mass_ref)*factor
    +316        # TODO improve interpolation for missing data
    +317        # f1 = interpolate.interp1d(x1, y1, kind='quadratic',fill_value="extrapolate")
     318
    -319    def find_peak_apex(self, mz, abund):
    -320        """ Find the peak apex.
    +319        for peak_obj_idx in indexes_without_results:
    +320            predicted_std = df.loc[peak_obj_idx].predicted_std
     321
    -322        Parameters
    -323        ------
    -324        mz : array
    -325            The mz array.
    -326        abund : array
    -327            The abundance array.
    +322            self.mass_spectrum_obj[peak_obj_idx].predicted_std = predicted_std
    +323
    +324        return df
    +325
    +326    def sum_data(self, tuple_mz_abun_list: tuple):
    +327        """Sum the abundances of the simulated peaks.
     328
    -329        Returns
    -330        -------
    -331        tuple
    -332            A tuple containing the peak apex mass and abundance.
    +329        Parameters
    +330        ------
    +331        tuple_mz_abun_list : tuple
    +332            A tuple containing the mz and abundance lists.
     333
    -334        """
    -335        dy = abund[1:] - abund[:-1]
    -336
    -337        #replaces nan for infinity'''
    -338        indices_nan = where(isnan(abund))[0]
    -339
    -340        if indices_nan.size:
    -341            
    -342            abund[indices_nan] = inf
    -343            dy[where(isnan(dy))[0]] = inf
    -344
    -345        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    -346
    -347        if indexes.size:
    -348            
    -349            return mz[indexes], abund[indexes]
    +334        Returns
    +335        -------
    +336        tuple
    +337            A tuple containing the summed mz and abundance lists.
    +338
    +339        """
    +340        all_mz = {}
    +341
    +342        for mz_list, abun_list in tuple_mz_abun_list:
    +343            for index, mz in enumerate(mz_list):
    +344                abundance = abun_list[index]
    +345
    +346                if mz in all_mz:
    +347                    all_mz[mz] = all_mz[mz] + abundance
    +348                else:
    +349                    all_mz[mz] = abundance
     350
    -351    def find_peak_valley(self, mz, abund):
    -352        """ Find the peak valley.
    +351        mz_all = []
    +352        abun_all = []
     353
    -354        Parameters
    -355        ------
    -356        mz : array
    -357            The mz array.
    -358        abund : array
    -359            The abundance array.
    -360        
    -361        Returns
    -362        -------
    -363        tuple
    -364            A tuple containing the peak valley mz and abundance.
    -365        """
    -366        dy = abund[1:] - abund[:-1]
    -367        
    -368        #replaces nan for infinity
    -369        indices_nan = where(isnan(abund))[0]
    -370        
    -371        if indices_nan.size:
    -372            
    -373            abund[indices_nan] = inf
    -374            dy[where(isnan(dy))[0]] = inf
    -375        
    -376        indexes = where((hstack((dy, 0)) > 0) & (hstack((0, dy)) < 0))[0]
    +354        for mz in sorted(all_mz):
    +355            mz_all.append(mz)
    +356            abun_all.append(all_mz[mz])
    +357
    +358        return array(mz_all), array(abun_all)
    +359
    +360    def calc_error(self, mass_ref, mass_sim, factor):
    +361        """Calculate the error between two values.
    +362
    +363        Parameters
    +364        ----------
    +365        mass_ref : float
    +366            The reference value.
    +367        mass_sim : float
    +368            The simulated value.
    +369        factor : float
    +370            The factor to multiply the error by.
    +371
    +372        Returns
    +373        -------
    +374        float
    +375            The calculated error.
    +376
     377
    -378        return mz[indexes], abund[indexes]    
    +378        """
    +379        return (mass_sim - mass_ref / mass_ref) * factor
    +380
    +381    def find_peak_apex(self, mz, abund):
    +382        """Find the peak apex.
    +383
    +384        Parameters
    +385        ------
    +386        mz : array
    +387            The mz array.
    +388        abund : array
    +389            The abundance array.
    +390
    +391        Returns
    +392        -------
    +393        tuple
    +394            A tuple containing the peak apex mass and abundance.
    +395
    +396        """
    +397        dy = abund[1:] - abund[:-1]
    +398
    +399        # replaces nan for infinity'''
    +400        indices_nan = where(isnan(abund))[0]
    +401
    +402        if indices_nan.size:
    +403            abund[indices_nan] = inf
    +404            dy[where(isnan(dy))[0]] = inf
    +405
    +406        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +407
    +408        if indexes.size:
    +409            return mz[indexes], abund[indexes]
    +410
    +411    def find_peak_valley(self, mz, abund):
    +412        """Find the peak valley.
    +413
    +414        Parameters
    +415        ------
    +416        mz : array
    +417            The mz array.
    +418        abund : array
    +419            The abundance array.
    +420
    +421        Returns
    +422        -------
    +423        tuple
    +424            A tuple containing the peak valley mz and abundance.
    +425        """
    +426        dy = abund[1:] - abund[:-1]
    +427
    +428        # replaces nan for infinity
    +429        indices_nan = where(isnan(abund))[0]
    +430
    +431        if indices_nan.size:
    +432            abund[indices_nan] = inf
    +433            dy[where(isnan(dy))[0]] = inf
    +434
    +435        indexes = where((hstack((dy, 0)) > 0) & (hstack((0, dy)) < 0))[0]
    +436
    +437        return mz[indexes], abund[indexes]
     
    @@ -925,24 +1042,30 @@
    Methods
    -
    53    def __init__(self, mass_spectrum, mz_overlay=10, rp_increments=10000, 
    -54                 base_line_target : float=0.01, max_interation=1000, interpolation='linear'):
    -55        
    -56        Thread.__init__(self)
    -57        
    -58        self.mass_spectrum_obj = mass_spectrum
    -59
    -60        self.mz_overlay = mz_overlay
    -61
    -62        self.rp_increments = rp_increments
    -63
    -64        self.base_line_target = base_line_target 
    +            
    55    def __init__(
    +56        self,
    +57        mass_spectrum,
    +58        mz_overlay=10,
    +59        rp_increments=10000,
    +60        base_line_target: float = 0.01,
    +61        max_interation=1000,
    +62        interpolation="linear",
    +63    ):
    +64        Thread.__init__(self)
     65
    -66        self.max_interation = max_interation
    +66        self.mass_spectrum_obj = mass_spectrum
     67
    -68        self.df = None
    +68        self.mz_overlay = mz_overlay
     69
    -70        self.interpolation = interpolation
    +70        self.rp_increments = rp_increments
    +71
    +72        self.base_line_target = base_line_target
    +73
    +74        self.max_interation = max_interation
    +75
    +76        self.df = None
    +77
    +78        self.interpolation = interpolation
     
    @@ -1057,10 +1180,9 @@
    Methods
    -
    72    def run(self):
    -73        """ Runs the mass error prediction calculation.
    -74        """    
    -75        self.df = self.calc_error_dist()
    +            
    80    def run(self):
    +81        """Runs the mass error prediction calculation."""
    +82        self.df = self.calc_error_dist()
     
    @@ -1080,14 +1202,13 @@
    Methods
    -
    77    def get_results(self):
    -78        """ Returns the calculated error distribution dataframe.
    -79        """
    -80
    -81        if not self.df:
    -82            self.run()
    -83
    -84        return self.df
    +            
    84    def get_results(self):
    +85        """Returns the calculated error distribution dataframe."""
    +86
    +87        if not self.df:
    +88            self.run()
    +89
    +90        return self.df
     
    @@ -1107,181 +1228,239 @@
    Methods
    -
     86    def calc_error_dist(self):
    - 87        """ Calculate the error distribution.
    - 88        """
    - 89        results_list = []
    - 90        
    - 91        indexes_without_results = list(range(len(self.mass_spectrum_obj)))
    - 92        # loop trough mass spectrum
    - 93
    - 94        for peak_obj_idx, peak_obj in enumerate(tqdm(self.mass_spectrum_obj)):
    - 95            
    - 96            # access ms peaks triplets ( peak_obj_idx -1, peak_obj_idx, and peak_obj_idx + 1)
    - 97            # check lower and upper boundaries to not excesses mass spectrum range
    - 98            
    - 99            if  peak_obj_idx != 0 and peak_obj_idx != len(self.mass_spectrum_obj)-1:
    -100                
    -101                # current peak_obj initialted in the loop expression
    -102                # geting the peak on the left (previous_peak_obj) and the one in the right position (next_peak_obj)
    -103                next_peak_obj = self.mass_spectrum_obj[peak_obj_idx + 1]
    -104                previous_peak_obj = self.mass_spectrum_obj[peak_obj_idx - 1]
    -105                
    -106                # check mz range defined in max_mz variable and check if peaks have same nominal mz
    -107                # keeping same mz for better plotting representation only, remove it for production
    -108                if  peak_obj.nominal_mz_exp == next_peak_obj.nominal_mz_exp and peak_obj.nominal_mz_exp == previous_peak_obj.nominal_mz_exp:
    -109                    
    -110                    #simulate peak shape
    -111                    sim_mz, sim_abun = peak_obj.gaussian(mz_overlay=self.mz_overlay)
    -112                    #update_plot(sim_mz,sim_abun, 0.5)
    -113                    
    -114                    #simulate peak shape
    -115                    next_sim_mz, next_sim_abun = next_peak_obj.gaussian(mz_overlay=self.mz_overlay)
    -116                    #update_plot(next_sim_mz, next_sim_abun, 0.5)
    -117                    
    -118                    
    -119                    #simulate peak shape
    -120                    previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(mz_overlay=self.mz_overlay)
    -121                    #update_plot(previous_sim_mz,  previous_sim_abun, 0.5)
    -122                    
    -123                    sim_mz_domain,  summed_peaks_abun = self.sum_data( ((previous_sim_mz,previous_sim_abun),  (sim_mz,sim_abun), (next_sim_mz, next_sim_abun)) )
    -124                    #update_plot(sim_mz_domain,summed_peaks_abun, 0.5)
    -125                    
    -126                    #sum simulated abundances 
    -127                    #summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun) 
    -128                    
    -129                    #normalize abundances to 0-1
    -130                    #summed_peaks_abun = summed_peaks_abun/(max(summed_peaks_abun))
    -131
    -132                    #find appexes location (mz) and magnitude
    -133                    mz_centroid, abund_centroid = self.find_peak_apex(sim_mz_domain,summed_peaks_abun)    
    -134
    -135                    #find valley location (mz_min_valley) and magnitude (abund_min_valley)
    -136                    mz_min_valley, abund_min_valley = self.find_peak_valley(sim_mz_domain, summed_peaks_abun)  
    -137
    -138                    # clear delta_rp (global implementation) and store choose resolving power increments   
    -139                    delta_rp = self.rp_increments
    -140                    
    -141                    # used to limited number of iterations
    -142                    i = 0
    -143                    j = 0
    -144                    
    -145                    # TODO: fit peak shape and decide best fit #gaussian, lorentz and voigt 
    -146                    #plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
    -147                    if len(mz_centroid) == 2 :
    -148                            
    -149                        while len(mz_centroid) < 3 and i <= self.max_interation:
    -150                            
    -151                            previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -152                            
    -153                            sim_mz, sim_abun = peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -154                            
    -155                            next_sim_mz, next_sim_abun = next_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -156
    -157                            sim_mz_domain,  summed_peaks_abun = self.sum_data( ((previous_sim_mz,previous_sim_abun),  (sim_mz,sim_abun), (next_sim_mz, next_sim_abun)) )
    -158                            
    -159                            #update_plot(sim_mz_domain,  summed_peaks_abun, 0.01)
    -160
    -161                            mz_centroid, abund_centroid = self.find_peak_apex(sim_mz_domain,summed_peaks_abun)    
    +            
     92    def calc_error_dist(self):
    + 93        """Calculate the error distribution."""
    + 94        results_list = []
    + 95
    + 96        indexes_without_results = list(range(len(self.mass_spectrum_obj)))
    + 97        # loop trough mass spectrum
    + 98
    + 99        for peak_obj_idx, peak_obj in enumerate(tqdm(self.mass_spectrum_obj)):
    +100            # access ms peaks triplets ( peak_obj_idx -1, peak_obj_idx, and peak_obj_idx + 1)
    +101            # check lower and upper boundaries to not excesses mass spectrum range
    +102
    +103            if peak_obj_idx != 0 and peak_obj_idx != len(self.mass_spectrum_obj) - 1:
    +104                # current peak_obj initialted in the loop expression
    +105                # geting the peak on the left (previous_peak_obj) and the one in the right position (next_peak_obj)
    +106                next_peak_obj = self.mass_spectrum_obj[peak_obj_idx + 1]
    +107                previous_peak_obj = self.mass_spectrum_obj[peak_obj_idx - 1]
    +108
    +109                # check mz range defined in max_mz variable and check if peaks have same nominal mz
    +110                # keeping same mz for better plotting representation only, remove it for production
    +111                if (
    +112                    peak_obj.nominal_mz_exp == next_peak_obj.nominal_mz_exp
    +113                    and peak_obj.nominal_mz_exp == previous_peak_obj.nominal_mz_exp
    +114                ):
    +115                    # simulate peak shape
    +116                    sim_mz, sim_abun = peak_obj.gaussian(mz_overlay=self.mz_overlay)
    +117                    # update_plot(sim_mz,sim_abun, 0.5)
    +118
    +119                    # simulate peak shape
    +120                    next_sim_mz, next_sim_abun = next_peak_obj.gaussian(
    +121                        mz_overlay=self.mz_overlay
    +122                    )
    +123                    # update_plot(next_sim_mz, next_sim_abun, 0.5)
    +124
    +125                    # simulate peak shape
    +126                    previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(
    +127                        mz_overlay=self.mz_overlay
    +128                    )
    +129                    # update_plot(previous_sim_mz,  previous_sim_abun, 0.5)
    +130
    +131                    sim_mz_domain, summed_peaks_abun = self.sum_data(
    +132                        (
    +133                            (previous_sim_mz, previous_sim_abun),
    +134                            (sim_mz, sim_abun),
    +135                            (next_sim_mz, next_sim_abun),
    +136                        )
    +137                    )
    +138                    # update_plot(sim_mz_domain,summed_peaks_abun, 0.5)
    +139
    +140                    # sum simulated abundances
    +141                    # summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun)
    +142
    +143                    # normalize abundances to 0-1
    +144                    # summed_peaks_abun = summed_peaks_abun/(max(summed_peaks_abun))
    +145
    +146                    # find appexes location (mz) and magnitude
    +147                    mz_centroid, abund_centroid = self.find_peak_apex(
    +148                        sim_mz_domain, summed_peaks_abun
    +149                    )
    +150
    +151                    # find valley location (mz_min_valley) and magnitude (abund_min_valley)
    +152                    mz_min_valley, abund_min_valley = self.find_peak_valley(
    +153                        sim_mz_domain, summed_peaks_abun
    +154                    )
    +155
    +156                    # clear delta_rp (global implementation) and store choose resolving power increments
    +157                    delta_rp = self.rp_increments
    +158
    +159                    # used to limited number of iterations
    +160                    i = 0
    +161                    j = 0
     162
    -163                            delta_rp += self.rp_increments
    -164                            
    -165                            i += 1
    -166
    -167                        mz_min_valley, abund_min_valley = self.find_peak_valley(sim_mz_domain, summed_peaks_abun)      
    -168
    -169                    if len(mz_centroid) == 3 and len(abund_min_valley) == 2:
    -170                        # increase all three peak resolving power until both valley magnitude is bellow the defined target
    -171                        # calculate peak shapes with the needed resolving power to have a baseline resolution for all peaks
    -172                        # calculate mass difference (ppm) between original centroid and the new simulated peak. 
    -173                        
    -174                        while  abund_min_valley[0] > self.base_line_target or abund_min_valley[1] > self.base_line_target and j <= self.max_interation:
    -175                            
    -176                            previous_sim_mz, previous_sim_abun = previous_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -177                            
    -178                            sim_mz, sim_abun = peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -179                            
    -180                            next_sim_mz, next_sim_abun = next_peak_obj.gaussian(delta_rp=delta_rp, mz_overlay=self.mz_overlay)
    -181
    -182                            sim_mz_domain,  summed_peaks_abun = self.sum_data( ((previous_sim_mz,previous_sim_abun),  (sim_mz,sim_abun), (next_sim_mz, next_sim_abun)) )
    -183                            
    -184                            #update_plot(sim_mz_domain,  summed_peaks_abun, 0.001)
    -185                            
    -186                            #summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun) 
    -187                            
    -188                            
    -189                            #find appexes location (mz) and magnitude
    -190                            mz_centroid, abund_centroid = self.find_peak_apex(sim_mz_domain,summed_peaks_abun)    
    -191                            
    -192                            #find valley location (mz_min_valley) and magnitude (abund_min_valley)
    -193                            summed_peaks_abun = summed_peaks_abun/(summed_peaks_abun.max())
    -194                            mz_min_valley, abund_min_valley = self.find_peak_valley(sim_mz_domain, summed_peaks_abun)  
    -195
    -196                            if len(abund_min_valley) != 2:
    -197                                break
    -198                            
    -199                            delta_rp += self.rp_increments
    -200                            j += 1
    -201                            
    -202                            #plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
    -203                        
    -204                        
    -205                        #plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
    -206
    -207                        mass_shift_ppp = self.calc_error(mz_centroid[1], peak_obj.mz_exp, 1000000)
    -208                        #delta_mz = mz_centroid[1] - peak_obj.mz_exp
    -209                        height_shift_per = self.calc_error(abund_centroid[1], peak_obj.abundance, 100)
    -210                        #excitation_amplitude = str(mass_spectrum_obj.filename.stem).split("ex")[1].split("pc")[0]
    -211                        #ion_time = str(mass_spectrum_obj.filename.stem).split("0pt")[1].split("s")[0]
    -212                        peak_obj.predicted_std = mass_shift_ppp
    -213                        
    -214                        results_list.append( {
    -215                        "ms_index_position" : peak_obj_idx,
    -216                        "predicted_std": mass_shift_ppp,
    -217                        "mz_exp": peak_obj.mz_exp,
    -218                        "nominal_mz_exp": peak_obj.nominal_mz_exp,
    -219                        "predicted_mz": mz_centroid[1],
    -220                        "s2n" : peak_obj.signal_to_noise,
    -221                        "peak_height" : peak_obj.abundance,
    -222                        "predicted_peak_height" : abund_centroid[1],
    -223                        "peak_height_error" : height_shift_per,
    -224                        "resolving_power" : peak_obj.resolving_power,
    -225                        #"excitation_amplitude" : excitation_amplitude,
    -226                        #"ion_time" : ion_time
    -227                        })
    -228                        
    -229                        indexes_without_results.remove(peak_obj_idx)
    -230                    #elif len(mz_centroid) == 3 and len(abund_min_valley) != 2:
    -231
    -232        for peak_obj_idx in indexes_without_results:
    -233
    -234            results_list.append( {
    -235            "ms_index_position" : peak_obj_idx,
    -236            "mz_exp": self.mass_spectrum_obj[peak_obj_idx].mz_exp,
    -237            "nominal_mz_exp": self.mass_spectrum_obj[peak_obj_idx].nominal_mz_exp,
    -238            "s2n" : self.mass_spectrum_obj[peak_obj_idx].signal_to_noise,
    -239            "peak_height" : self.mass_spectrum_obj[peak_obj_idx].abundance,
    -240            "resolving_power" : self.mass_spectrum_obj[peak_obj_idx].resolving_power,
    -241            #"excitation_amplitude" : excitation_amplitude,
    -242            #"ion_time" : ion_time
    -243            } )
    -244
    -245        df = DataFrame(results_list).sort_values("mz_exp")
    -246        
    -247        df.interpolate(method ='linear', limit_direction ='backward',  inplace=True)
    -248        df.interpolate(method ='linear', limit_direction ='forward',  inplace=True)
    -249
    -250        #TODO improve interpolation for missing data
    -251        #f1 = interpolate.interp1d(x1, y1, kind='quadratic',fill_value="extrapolate")
    -252
    -253        
    -254        for peak_obj_idx in indexes_without_results:
    -255
    -256            predicted_std = df.loc[peak_obj_idx].predicted_std
    -257            
    -258            self.mass_spectrum_obj[peak_obj_idx].predicted_std = predicted_std
    +163                    # TODO: fit peak shape and decide best fit #gaussian, lorentz and voigt
    +164                    # plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
    +165                    if len(mz_centroid) == 2:
    +166                        while len(mz_centroid) < 3 and i <= self.max_interation:
    +167                            previous_sim_mz, previous_sim_abun = (
    +168                                previous_peak_obj.gaussian(
    +169                                    delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +170                                )
    +171                            )
    +172
    +173                            sim_mz, sim_abun = peak_obj.gaussian(
    +174                                delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +175                            )
    +176
    +177                            next_sim_mz, next_sim_abun = next_peak_obj.gaussian(
    +178                                delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +179                            )
    +180
    +181                            sim_mz_domain, summed_peaks_abun = self.sum_data(
    +182                                (
    +183                                    (previous_sim_mz, previous_sim_abun),
    +184                                    (sim_mz, sim_abun),
    +185                                    (next_sim_mz, next_sim_abun),
    +186                                )
    +187                            )
    +188
    +189                            # update_plot(sim_mz_domain,  summed_peaks_abun, 0.01)
    +190
    +191                            mz_centroid, abund_centroid = self.find_peak_apex(
    +192                                sim_mz_domain, summed_peaks_abun
    +193                            )
    +194
    +195                            delta_rp += self.rp_increments
    +196
    +197                            i += 1
    +198
    +199                        mz_min_valley, abund_min_valley = self.find_peak_valley(
    +200                            sim_mz_domain, summed_peaks_abun
    +201                        )
    +202
    +203                    if len(mz_centroid) == 3 and len(abund_min_valley) == 2:
    +204                        # increase all three peak resolving power until both valley magnitude is bellow the defined target
    +205                        # calculate peak shapes with the needed resolving power to have a baseline resolution for all peaks
    +206                        # calculate mass difference (ppm) between original centroid and the new simulated peak.
    +207
    +208                        while (
    +209                            abund_min_valley[0] > self.base_line_target
    +210                            or abund_min_valley[1] > self.base_line_target
    +211                            and j <= self.max_interation
    +212                        ):
    +213                            previous_sim_mz, previous_sim_abun = (
    +214                                previous_peak_obj.gaussian(
    +215                                    delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +216                                )
    +217                            )
    +218
    +219                            sim_mz, sim_abun = peak_obj.gaussian(
    +220                                delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +221                            )
    +222
    +223                            next_sim_mz, next_sim_abun = next_peak_obj.gaussian(
    +224                                delta_rp=delta_rp, mz_overlay=self.mz_overlay
    +225                            )
    +226
    +227                            sim_mz_domain, summed_peaks_abun = self.sum_data(
    +228                                (
    +229                                    (previous_sim_mz, previous_sim_abun),
    +230                                    (sim_mz, sim_abun),
    +231                                    (next_sim_mz, next_sim_abun),
    +232                                )
    +233                            )
    +234
    +235                            # update_plot(sim_mz_domain,  summed_peaks_abun, 0.001)
    +236
    +237                            # summed_peaks_abun = (sim_abun + next_sim_abun + previous_sim_abun)
    +238
    +239                            # find appexes location (mz) and magnitude
    +240                            mz_centroid, abund_centroid = self.find_peak_apex(
    +241                                sim_mz_domain, summed_peaks_abun
    +242                            )
    +243
    +244                            # find valley location (mz_min_valley) and magnitude (abund_min_valley)
    +245                            summed_peaks_abun = summed_peaks_abun / (
    +246                                summed_peaks_abun.max()
    +247                            )
    +248                            mz_min_valley, abund_min_valley = self.find_peak_valley(
    +249                                sim_mz_domain, summed_peaks_abun
    +250                            )
    +251
    +252                            if len(abund_min_valley) != 2:
    +253                                break
    +254
    +255                            delta_rp += self.rp_increments
    +256                            j += 1
    +257
    +258                            # plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
     259
    -260        return df
    +260                        # plot_triplets(mz_centroid,abund_centroid, mz_min_valley, abund_min_valley, sim_mz_domain, summed_peaks_abun )
    +261
    +262                        mass_shift_ppp = self.calc_error(
    +263                            mz_centroid[1], peak_obj.mz_exp, 1000000
    +264                        )
    +265                        # delta_mz = mz_centroid[1] - peak_obj.mz_exp
    +266                        height_shift_per = self.calc_error(
    +267                            abund_centroid[1], peak_obj.abundance, 100
    +268                        )
    +269                        # excitation_amplitude = str(mass_spectrum_obj.filename.stem).split("ex")[1].split("pc")[0]
    +270                        # ion_time = str(mass_spectrum_obj.filename.stem).split("0pt")[1].split("s")[0]
    +271                        peak_obj.predicted_std = mass_shift_ppp
    +272
    +273                        results_list.append(
    +274                            {
    +275                                "ms_index_position": peak_obj_idx,
    +276                                "predicted_std": mass_shift_ppp,
    +277                                "mz_exp": peak_obj.mz_exp,
    +278                                "nominal_mz_exp": peak_obj.nominal_mz_exp,
    +279                                "predicted_mz": mz_centroid[1],
    +280                                "s2n": peak_obj.signal_to_noise,
    +281                                "peak_height": peak_obj.abundance,
    +282                                "predicted_peak_height": abund_centroid[1],
    +283                                "peak_height_error": height_shift_per,
    +284                                "resolving_power": peak_obj.resolving_power,
    +285                                # "excitation_amplitude" : excitation_amplitude,
    +286                                # "ion_time" : ion_time
    +287                            }
    +288                        )
    +289
    +290                        indexes_without_results.remove(peak_obj_idx)
    +291                    # elif len(mz_centroid) == 3 and len(abund_min_valley) != 2:
    +292
    +293        for peak_obj_idx in indexes_without_results:
    +294            results_list.append(
    +295                {
    +296                    "ms_index_position": peak_obj_idx,
    +297                    "mz_exp": self.mass_spectrum_obj[peak_obj_idx].mz_exp,
    +298                    "nominal_mz_exp": self.mass_spectrum_obj[
    +299                        peak_obj_idx
    +300                    ].nominal_mz_exp,
    +301                    "s2n": self.mass_spectrum_obj[peak_obj_idx].signal_to_noise,
    +302                    "peak_height": self.mass_spectrum_obj[peak_obj_idx].abundance,
    +303                    "resolving_power": self.mass_spectrum_obj[
    +304                        peak_obj_idx
    +305                    ].resolving_power,
    +306                    # "excitation_amplitude" : excitation_amplitude,
    +307                    # "ion_time" : ion_time
    +308                }
    +309            )
    +310
    +311        df = DataFrame(results_list).sort_values("mz_exp")
    +312
    +313        df.interpolate(method="linear", limit_direction="backward", inplace=True)
    +314        df.interpolate(method="linear", limit_direction="forward", inplace=True)
    +315
    +316        # TODO improve interpolation for missing data
    +317        # f1 = interpolate.interp1d(x1, y1, kind='quadratic',fill_value="extrapolate")
    +318
    +319        for peak_obj_idx in indexes_without_results:
    +320            predicted_std = df.loc[peak_obj_idx].predicted_std
    +321
    +322            self.mass_spectrum_obj[peak_obj_idx].predicted_std = predicted_std
    +323
    +324        return df
     
    @@ -1301,41 +1480,39 @@
    Methods
    -
    262    def sum_data(self, tuple_mz_abun_list : tuple):
    -263        """ Sum the abundances of the simulated peaks.
    -264
    -265        Parameters
    -266        ------
    -267        tuple_mz_abun_list : tuple
    -268            A tuple containing the mz and abundance lists.
    -269        
    -270        Returns
    -271        -------
    -272        tuple
    -273            A tuple containing the summed mz and abundance lists.
    -274        
    -275        """
    -276        all_mz = {}
    -277
    -278        for mz_list, abun_list in tuple_mz_abun_list:
    -279            
    -280            for index, mz in enumerate(mz_list):
    -281
    -282                abundance = abun_list[index]
    -283
    -284                if mz in all_mz:
    -285                    all_mz[mz] = all_mz[mz] + abundance    
    -286                else: 
    -287                    all_mz[mz] = abundance
    -288        
    -289        mz_all = []
    -290        abun_all = []
    -291
    -292        for mz in sorted (all_mz) : 
    -293            mz_all.append(mz)
    -294            abun_all.append(all_mz[mz])
    -295
    -296        return array(mz_all), array(abun_all)    
    +            
    326    def sum_data(self, tuple_mz_abun_list: tuple):
    +327        """Sum the abundances of the simulated peaks.
    +328
    +329        Parameters
    +330        ------
    +331        tuple_mz_abun_list : tuple
    +332            A tuple containing the mz and abundance lists.
    +333
    +334        Returns
    +335        -------
    +336        tuple
    +337            A tuple containing the summed mz and abundance lists.
    +338
    +339        """
    +340        all_mz = {}
    +341
    +342        for mz_list, abun_list in tuple_mz_abun_list:
    +343            for index, mz in enumerate(mz_list):
    +344                abundance = abun_list[index]
    +345
    +346                if mz in all_mz:
    +347                    all_mz[mz] = all_mz[mz] + abundance
    +348                else:
    +349                    all_mz[mz] = abundance
    +350
    +351        mz_all = []
    +352        abun_all = []
    +353
    +354        for mz in sorted(all_mz):
    +355            mz_all.append(mz)
    +356            abun_all.append(all_mz[mz])
    +357
    +358        return array(mz_all), array(abun_all)
     
    @@ -1368,26 +1545,26 @@
    Returns
    -
    298    def calc_error(self, mass_ref, mass_sim, factor):
    -299        """ Calculate the error between two values.
    -300
    -301        Parameters
    -302        ----------
    -303        mass_ref : float
    -304            The reference value.
    -305        mass_sim : float
    -306            The simulated value.
    -307        factor : float
    -308            The factor to multiply the error by.
    -309
    -310        Returns
    -311        -------
    -312        float
    -313            The calculated error.
    -314             
    -315
    -316        """
    -317        return (mass_sim-mass_ref/mass_ref)*factor
    +            
    360    def calc_error(self, mass_ref, mass_sim, factor):
    +361        """Calculate the error between two values.
    +362
    +363        Parameters
    +364        ----------
    +365        mass_ref : float
    +366            The reference value.
    +367        mass_sim : float
    +368            The simulated value.
    +369        factor : float
    +370            The factor to multiply the error by.
    +371
    +372        Returns
    +373        -------
    +374        float
    +375            The calculated error.
    +376
    +377
    +378        """
    +379        return (mass_sim - mass_ref / mass_ref) * factor
     
    @@ -1424,37 +1601,35 @@
    Returns
    -
    319    def find_peak_apex(self, mz, abund):
    -320        """ Find the peak apex.
    -321
    -322        Parameters
    -323        ------
    -324        mz : array
    -325            The mz array.
    -326        abund : array
    -327            The abundance array.
    -328
    -329        Returns
    -330        -------
    -331        tuple
    -332            A tuple containing the peak apex mass and abundance.
    -333
    -334        """
    -335        dy = abund[1:] - abund[:-1]
    -336
    -337        #replaces nan for infinity'''
    -338        indices_nan = where(isnan(abund))[0]
    -339
    -340        if indices_nan.size:
    -341            
    -342            abund[indices_nan] = inf
    -343            dy[where(isnan(dy))[0]] = inf
    -344
    -345        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    -346
    -347        if indexes.size:
    -348            
    -349            return mz[indexes], abund[indexes]
    +            
    381    def find_peak_apex(self, mz, abund):
    +382        """Find the peak apex.
    +383
    +384        Parameters
    +385        ------
    +386        mz : array
    +387            The mz array.
    +388        abund : array
    +389            The abundance array.
    +390
    +391        Returns
    +392        -------
    +393        tuple
    +394            A tuple containing the peak apex mass and abundance.
    +395
    +396        """
    +397        dy = abund[1:] - abund[:-1]
    +398
    +399        # replaces nan for infinity'''
    +400        indices_nan = where(isnan(abund))[0]
    +401
    +402        if indices_nan.size:
    +403            abund[indices_nan] = inf
    +404            dy[where(isnan(dy))[0]] = inf
    +405
    +406        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +407
    +408        if indexes.size:
    +409            return mz[indexes], abund[indexes]
     
    @@ -1489,34 +1664,33 @@
    Returns
    -
    351    def find_peak_valley(self, mz, abund):
    -352        """ Find the peak valley.
    -353
    -354        Parameters
    -355        ------
    -356        mz : array
    -357            The mz array.
    -358        abund : array
    -359            The abundance array.
    -360        
    -361        Returns
    -362        -------
    -363        tuple
    -364            A tuple containing the peak valley mz and abundance.
    -365        """
    -366        dy = abund[1:] - abund[:-1]
    -367        
    -368        #replaces nan for infinity
    -369        indices_nan = where(isnan(abund))[0]
    -370        
    -371        if indices_nan.size:
    -372            
    -373            abund[indices_nan] = inf
    -374            dy[where(isnan(dy))[0]] = inf
    -375        
    -376        indexes = where((hstack((dy, 0)) > 0) & (hstack((0, dy)) < 0))[0]
    -377
    -378        return mz[indexes], abund[indexes]    
    +            
    411    def find_peak_valley(self, mz, abund):
    +412        """Find the peak valley.
    +413
    +414        Parameters
    +415        ------
    +416        mz : array
    +417            The mz array.
    +418        abund : array
    +419            The abundance array.
    +420
    +421        Returns
    +422        -------
    +423        tuple
    +424            A tuple containing the peak valley mz and abundance.
    +425        """
    +426        dy = abund[1:] - abund[:-1]
    +427
    +428        # replaces nan for infinity
    +429        indices_nan = where(isnan(abund))[0]
    +430
    +431        if indices_nan.size:
    +432            abund[indices_nan] = inf
    +433            dy[where(isnan(dy))[0]] = inf
    +434
    +435        indexes = where((hstack((dy, 0)) > 0) & (hstack((0, dy)) < 0))[0]
    +436
    +437        return mz[indexes], abund[indexes]
     
    diff --git a/docs/corems/mass_spectrum/calc/MassSpectrumCalc.html b/docs/corems/mass_spectrum/calc/MassSpectrumCalc.html index 03f93462..0b679090 100644 --- a/docs/corems/mass_spectrum/calc/MassSpectrumCalc.html +++ b/docs/corems/mass_spectrum/calc/MassSpectrumCalc.html @@ -72,59 +72,59 @@

      1__author__ = "Yuri E. Corilo"
       2__date__ = "Jun 27, 2019"
       3
    -  4from numpy import power, multiply, sqrt, multiply, array, mean
    +  4from numpy import power, multiply, sqrt, array, mean
       5from corems.mass_spectrum.calc.NoiseCalc import NoiseThresholdCalc
       6from corems.mass_spectrum.calc.PeakPicking import PeakPicking
       7
    -  8class MassSpecCalc(PeakPicking, NoiseThresholdCalc ):
    -  9    """ Class for Mass Spectrum Calculations
    - 10
    - 11    Class including numerical calculations related to mass spectrum class
    - 12    Inherited PeakPicking and NoiseThresholdCalc ensuring its methods are 
    - 13    available to the instantiated mass spectrum class object
    - 14
    - 15    Parameters
    - 16    -------
    - 17    mass_spectrum : MassSpectrum
    - 18        CoreMS mass spectrum object
    - 19    
    - 20    Attributes
    - 21    --------
    - 22    All Attributes are derivative from the MassSpecBase Class
    - 23
    - 24    Methods 
    - 25    --------
    - 26    * check_mspeaks(). 
    - 27        Check if the mspeaks attribute is populated
    - 28    * sort_by_abundance(). 
    - 29        Sort the mspeaks by abundance
    - 30    * percentile_assigned(report_error=False). 
    - 31        Calculate the percentage of assigned peaks
    - 32    * resolving_power_calc(B, T). 
    - 33        Calculate the resolving power
    - 34    * number_average_molecular_weight(profile=False). 
    - 35        Calculate the number average molecular weight
    - 36    * weight_average_molecular_weight(profile=False). 
    - 37        Calculate the weight average molecular weight
    - 38    """
    - 39
    - 40    def percentile_assigned(self, report_error : bool=False):
    - 41        """ Percentage of peaks which are assigned
    - 42
    - 43        Parameters
    - 44        -----------
    - 45        report_error: bool, optional
    - 46            Report the error of the assigned peaks. Default is False.
    - 47        """
    - 48        verbose = self.parameters.mass_spectrum.verbose_processing
    - 49        assign_abun = 0
    - 50        not_assign_abun = 0
    - 51        i = 0
    - 52        j = 0
    - 53        if report_error:
    - 54            error = []
    - 55        for mspeak in self.sort_by_abundance():
    - 56            
    +  8
    +  9class MassSpecCalc(PeakPicking, NoiseThresholdCalc):
    + 10    """Class for Mass Spectrum Calculations
    + 11
    + 12    Class including numerical calculations related to mass spectrum class
    + 13    Inherited PeakPicking and NoiseThresholdCalc ensuring its methods are
    + 14    available to the instantiated mass spectrum class object
    + 15
    + 16    Parameters
    + 17    -------
    + 18    mass_spectrum : MassSpectrum
    + 19        CoreMS mass spectrum object
    + 20
    + 21    Attributes
    + 22    --------
    + 23    All Attributes are derivative from the MassSpecBase Class
    + 24
    + 25    Methods
    + 26    --------
    + 27    * check_mspeaks().
    + 28        Check if the mspeaks attribute is populated
    + 29    * sort_by_abundance().
    + 30        Sort the mspeaks by abundance
    + 31    * percentile_assigned(report_error=False).
    + 32        Calculate the percentage of assigned peaks
    + 33    * resolving_power_calc(B, T).
    + 34        Calculate the resolving power
    + 35    * number_average_molecular_weight(profile=False).
    + 36        Calculate the number average molecular weight
    + 37    * weight_average_molecular_weight(profile=False).
    + 38        Calculate the weight average molecular weight
    + 39    """
    + 40
    + 41    def percentile_assigned(self, report_error: bool = False):
    + 42        """Percentage of peaks which are assigned
    + 43
    + 44        Parameters
    + 45        -----------
    + 46        report_error: bool, optional
    + 47            Report the error of the assigned peaks. Default is False.
    + 48        """
    + 49        verbose = self.parameters.mass_spectrum.verbose_processing
    + 50        assign_abun = 0
    + 51        not_assign_abun = 0
    + 52        i = 0
    + 53        j = 0
    + 54        if report_error:
    + 55            error = []
    + 56        for mspeak in self.sort_by_abundance():
      57            if mspeak.is_assigned:
      58                i += 1
      59                assign_abun += mspeak.abundance
    @@ -138,147 +138,161 @@ 

    67 total_percent = (i / (i + j)) * 100 68 total_relative_abundance = (assign_abun / (not_assign_abun + assign_abun)) * 100 69 if report_error: - 70 rms_error = sqrt(mean(array(error)**2)) + 70 rms_error = sqrt(mean(array(error) ** 2)) 71 if verbose: - 72 print('%i assigned peaks and %i unassigned peaks, total = %.2f %%, relative abundance = %.2f %%, RMS error (best candidate) (ppm) = %.3f' % (i, j, total_percent, total_relative_abundance, rms_error)) - 73 return i, j, total_percent, total_relative_abundance, rms_error - 74 - 75 else: - 76 if verbose: - 77 print('%i assigned peaks and %i unassigned peaks , total = %.2f %%, relative abundance = %.2f %%' % (i, j, total_percent, total_relative_abundance,)) - 78 return i, j, total_percent, total_relative_abundance - 79 - 80 def resolving_power_calc(self, B : float, T : float): - 81 """ Calculate the theoretical resolving power - 82 - 83 Calls on the MSPeak object function to calculate the resolving power of a peak, this calcs for all peaks in a spectrum. - 84 - 85 Parameters - 86 ----------- - 87 T : float - 88 transient time - 89 B : float - 90 Magnetic Filed Strength (Tesla) - 91 - 92 References - 93 ---------- - 94 1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.) - 95 DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K - 96 - 97 """ - 98 - 99 self.check_mspeaks() -100 return array([mspeak.resolving_power_calc(B, T) for mspeak in self.mspeaks]) -101 -102 def _f_to_mz(self): -103 """ Ledford equation for converting frequency(Hz) to m/z -104 -105 Returns -106 ---------- -107 mz_domain : numpy array -108 m/z domain after conversion from frequency -109 """ -110 Aterm, Bterm, Cterm = self.Aterm, self.Bterm, self.Cterm -111 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage -112 -113 if Cterm == 0: -114 -115 if Bterm == 0: -116 #uncalibrated data -117 mz_domain = Aterm / self.freq_exp_profile -118 -119 else: -120 -121 mz_domain = (Aterm / (self.freq_exp_profile)) + (Bterm / power((self.freq_exp_profile), 2)) -122 -123 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to -124 else: -125 -126 mz_domain = (Aterm / self.freq_exp_profile) + (Bterm / power(self.freq_exp_profile, 2)) + Cterm -127 -128 return mz_domain -129 -130 def _f_to_mz_bruker(self): -131 """ Frequency to m/z conversion (Bruker) -132 Bruker equations for converting frequency (Hz) to m/z, -133 nOmega acquisition is not yet implemented here. -134 However, nOmega should work for commerical Bruker 2xR systems as A Term is automatically defined for 2X or 1X by the instrument -135 -136 -137 Returns -138 ---------- -139 numpy.array(float) -140 m/z domain after conversion from frequency -141 """ -142 Aterm, Bterm, Cterm = self.Aterm, self.Bterm, self.Cterm -143 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage -144 if Cterm == 0: -145 -146 if Bterm == 0: -147 #uncalibrated data -148 return Aterm / self.freq_exp_profile -149 -150 else: -151 #calc2 -152 return Aterm / (self.freq_exp_profile + Bterm) -153 -154 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to -155 else: -156 diff = Aterm * Aterm -157 -158 #this sign(diff + 4) changes on older aquistion software -159 diff = diff + 4 * Cterm * (self.freq_exp_profile - Bterm) -160 diff = sqrt(diff) -161 diff = -Aterm+diff -162 #calc3 -163 return (2*Cterm)/diff -164 return diff/2* (self.freq_exp_profile - Bterm) -165 -166 def number_average_molecular_weight(self, profile : bool=False): -167 """ Average molecular weight calculation -168 -169 Parameters -170 ---------- -171 profile : bool, optional -172 is data profile or centroid mode. The default is False (e.g. Centroid data) -173 -174 Returns -175 ------- -176 float -177 The average molecular weight. + 72 print( + 73 "%i assigned peaks and %i unassigned peaks, total = %.2f %%, relative abundance = %.2f %%, RMS error (best candidate) (ppm) = %.3f" + 74 % (i, j, total_percent, total_relative_abundance, rms_error) + 75 ) + 76 return i, j, total_percent, total_relative_abundance, rms_error + 77 + 78 else: + 79 if verbose: + 80 print( + 81 "%i assigned peaks and %i unassigned peaks , total = %.2f %%, relative abundance = %.2f %%" + 82 % ( + 83 i, + 84 j, + 85 total_percent, + 86 total_relative_abundance, + 87 ) + 88 ) + 89 return i, j, total_percent, total_relative_abundance + 90 + 91 def resolving_power_calc(self, B: float, T: float): + 92 """Calculate the theoretical resolving power + 93 + 94 Calls on the MSPeak object function to calculate the resolving power of a peak, this calcs for all peaks in a spectrum. + 95 + 96 Parameters + 97 ----------- + 98 T : float + 99 transient time +100 B : float +101 Magnetic Filed Strength (Tesla) +102 +103 References +104 ---------- +105 1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.) +106 DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K +107 +108 """ +109 +110 self.check_mspeaks() +111 return array([mspeak.resolving_power_calc(B, T) for mspeak in self.mspeaks]) +112 +113 def _f_to_mz(self): +114 """Ledford equation for converting frequency(Hz) to m/z +115 +116 Returns +117 ---------- +118 mz_domain : numpy array +119 m/z domain after conversion from frequency +120 """ +121 Aterm, Bterm, Cterm = self.Aterm, self.Bterm, self.Cterm +122 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage +123 +124 if Cterm == 0: +125 if Bterm == 0: +126 # uncalibrated data +127 mz_domain = Aterm / self.freq_exp_profile +128 +129 else: +130 mz_domain = (Aterm / (self.freq_exp_profile)) + ( +131 Bterm / power((self.freq_exp_profile), 2) +132 ) +133 +134 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to +135 else: +136 mz_domain = ( +137 (Aterm / self.freq_exp_profile) +138 + (Bterm / power(self.freq_exp_profile, 2)) +139 + Cterm +140 ) +141 +142 return mz_domain +143 +144 def _f_to_mz_bruker(self): +145 """Frequency to m/z conversion (Bruker) +146 Bruker equations for converting frequency (Hz) to m/z, +147 nOmega acquisition is not yet implemented here. +148 However, nOmega should work for commerical Bruker 2xR systems as A Term is automatically defined for 2X or 1X by the instrument +149 +150 +151 Returns +152 ---------- +153 numpy.array(float) +154 m/z domain after conversion from frequency +155 """ +156 Aterm, Bterm, Cterm = self.Aterm, self.Bterm, self.Cterm +157 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage +158 if Cterm == 0: +159 if Bterm == 0: +160 # uncalibrated data +161 return Aterm / self.freq_exp_profile +162 +163 else: +164 # calc2 +165 return Aterm / (self.freq_exp_profile + Bterm) +166 +167 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to +168 else: +169 diff = Aterm * Aterm +170 +171 # this sign(diff + 4) changes on older aquistion software +172 diff = diff + 4 * Cterm * (self.freq_exp_profile - Bterm) +173 diff = sqrt(diff) +174 diff = -Aterm + diff +175 # calc3 +176 return (2 * Cterm) / diff +177 return diff / 2 * (self.freq_exp_profile - Bterm) 178 -179 """ -180 # mode is profile or centroid data -181 if profile: -182 a = multiply(self.mz_exp_profile, self.abundance_profile) -183 b = self.abundance_profile -184 return a.sum()/b.sum() -185 -186 else: -187 -188 return sum(self.mz_exp*self.abundance)/sum(self.abundance) -189 -190 def weight_average_molecular_weight(self, profile : bool=False): -191 """ -192 Weighted Average molecular weight calculation -193 -194 -195 Returns -196 ------- -197 float -198 The weight average molecular weight. -199 -200 """ -201 -202 # implement from MassSpectralPeaks objs -203 -204 if profile: -205 a = multiply(power(self.mz_exp_profile, 2), self.abundance_profile) -206 b = self.mz_exp_profile*self.abundance_profile -207 return a.sum() / b.sum() -208 -209 else: -210 return sum(power(self.mz_exp, 2)*self.abundance)/sum(self.mz_exp*self.abundance) +179 def number_average_molecular_weight(self, profile: bool = False): +180 """Average molecular weight calculation +181 +182 Parameters +183 ---------- +184 profile : bool, optional +185 is data profile or centroid mode. The default is False (e.g. Centroid data) +186 +187 Returns +188 ------- +189 float +190 The average molecular weight. +191 +192 """ +193 # mode is profile or centroid data +194 if profile: +195 a = multiply(self.mz_exp_profile, self.abundance_profile) +196 b = self.abundance_profile +197 return a.sum() / b.sum() +198 +199 else: +200 return sum(self.mz_exp * self.abundance) / sum(self.abundance) +201 +202 def weight_average_molecular_weight(self, profile: bool = False): +203 """ +204 Weighted Average molecular weight calculation +205 +206 +207 Returns +208 ------- +209 float +210 The weight average molecular weight. +211 +212 """ +213 +214 # implement from MassSpectralPeaks objs +215 +216 if profile: +217 a = multiply(power(self.mz_exp_profile, 2), self.abundance_profile) +218 b = self.mz_exp_profile * self.abundance_profile +219 return a.sum() / b.sum() +220 +221 else: +222 return sum(power(self.mz_exp, 2) * self.abundance) / sum( +223 self.mz_exp * self.abundance +224 )

    @@ -294,55 +308,54 @@

    -
      9class MassSpecCalc(PeakPicking, NoiseThresholdCalc ):
    - 10    """ Class for Mass Spectrum Calculations
    - 11
    - 12    Class including numerical calculations related to mass spectrum class
    - 13    Inherited PeakPicking and NoiseThresholdCalc ensuring its methods are 
    - 14    available to the instantiated mass spectrum class object
    - 15
    - 16    Parameters
    - 17    -------
    - 18    mass_spectrum : MassSpectrum
    - 19        CoreMS mass spectrum object
    - 20    
    - 21    Attributes
    - 22    --------
    - 23    All Attributes are derivative from the MassSpecBase Class
    - 24
    - 25    Methods 
    - 26    --------
    - 27    * check_mspeaks(). 
    - 28        Check if the mspeaks attribute is populated
    - 29    * sort_by_abundance(). 
    - 30        Sort the mspeaks by abundance
    - 31    * percentile_assigned(report_error=False). 
    - 32        Calculate the percentage of assigned peaks
    - 33    * resolving_power_calc(B, T). 
    - 34        Calculate the resolving power
    - 35    * number_average_molecular_weight(profile=False). 
    - 36        Calculate the number average molecular weight
    - 37    * weight_average_molecular_weight(profile=False). 
    - 38        Calculate the weight average molecular weight
    - 39    """
    - 40
    - 41    def percentile_assigned(self, report_error : bool=False):
    - 42        """ Percentage of peaks which are assigned
    - 43
    - 44        Parameters
    - 45        -----------
    - 46        report_error: bool, optional
    - 47            Report the error of the assigned peaks. Default is False.
    - 48        """
    - 49        verbose = self.parameters.mass_spectrum.verbose_processing
    - 50        assign_abun = 0
    - 51        not_assign_abun = 0
    - 52        i = 0
    - 53        j = 0
    - 54        if report_error:
    - 55            error = []
    - 56        for mspeak in self.sort_by_abundance():
    - 57            
    +            
     10class MassSpecCalc(PeakPicking, NoiseThresholdCalc):
    + 11    """Class for Mass Spectrum Calculations
    + 12
    + 13    Class including numerical calculations related to mass spectrum class
    + 14    Inherited PeakPicking and NoiseThresholdCalc ensuring its methods are
    + 15    available to the instantiated mass spectrum class object
    + 16
    + 17    Parameters
    + 18    -------
    + 19    mass_spectrum : MassSpectrum
    + 20        CoreMS mass spectrum object
    + 21
    + 22    Attributes
    + 23    --------
    + 24    All Attributes are derivative from the MassSpecBase Class
    + 25
    + 26    Methods
    + 27    --------
    + 28    * check_mspeaks().
    + 29        Check if the mspeaks attribute is populated
    + 30    * sort_by_abundance().
    + 31        Sort the mspeaks by abundance
    + 32    * percentile_assigned(report_error=False).
    + 33        Calculate the percentage of assigned peaks
    + 34    * resolving_power_calc(B, T).
    + 35        Calculate the resolving power
    + 36    * number_average_molecular_weight(profile=False).
    + 37        Calculate the number average molecular weight
    + 38    * weight_average_molecular_weight(profile=False).
    + 39        Calculate the weight average molecular weight
    + 40    """
    + 41
    + 42    def percentile_assigned(self, report_error: bool = False):
    + 43        """Percentage of peaks which are assigned
    + 44
    + 45        Parameters
    + 46        -----------
    + 47        report_error: bool, optional
    + 48            Report the error of the assigned peaks. Default is False.
    + 49        """
    + 50        verbose = self.parameters.mass_spectrum.verbose_processing
    + 51        assign_abun = 0
    + 52        not_assign_abun = 0
    + 53        i = 0
    + 54        j = 0
    + 55        if report_error:
    + 56            error = []
    + 57        for mspeak in self.sort_by_abundance():
      58            if mspeak.is_assigned:
      59                i += 1
      60                assign_abun += mspeak.abundance
    @@ -356,154 +369,168 @@ 

    68 total_percent = (i / (i + j)) * 100 69 total_relative_abundance = (assign_abun / (not_assign_abun + assign_abun)) * 100 70 if report_error: - 71 rms_error = sqrt(mean(array(error)**2)) + 71 rms_error = sqrt(mean(array(error) ** 2)) 72 if verbose: - 73 print('%i assigned peaks and %i unassigned peaks, total = %.2f %%, relative abundance = %.2f %%, RMS error (best candidate) (ppm) = %.3f' % (i, j, total_percent, total_relative_abundance, rms_error)) - 74 return i, j, total_percent, total_relative_abundance, rms_error - 75 - 76 else: - 77 if verbose: - 78 print('%i assigned peaks and %i unassigned peaks , total = %.2f %%, relative abundance = %.2f %%' % (i, j, total_percent, total_relative_abundance,)) - 79 return i, j, total_percent, total_relative_abundance - 80 - 81 def resolving_power_calc(self, B : float, T : float): - 82 """ Calculate the theoretical resolving power - 83 - 84 Calls on the MSPeak object function to calculate the resolving power of a peak, this calcs for all peaks in a spectrum. - 85 - 86 Parameters - 87 ----------- - 88 T : float - 89 transient time - 90 B : float - 91 Magnetic Filed Strength (Tesla) - 92 - 93 References - 94 ---------- - 95 1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.) - 96 DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K - 97 - 98 """ - 99 -100 self.check_mspeaks() -101 return array([mspeak.resolving_power_calc(B, T) for mspeak in self.mspeaks]) -102 -103 def _f_to_mz(self): -104 """ Ledford equation for converting frequency(Hz) to m/z -105 -106 Returns -107 ---------- -108 mz_domain : numpy array -109 m/z domain after conversion from frequency -110 """ -111 Aterm, Bterm, Cterm = self.Aterm, self.Bterm, self.Cterm -112 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage -113 -114 if Cterm == 0: -115 -116 if Bterm == 0: -117 #uncalibrated data -118 mz_domain = Aterm / self.freq_exp_profile -119 -120 else: -121 -122 mz_domain = (Aterm / (self.freq_exp_profile)) + (Bterm / power((self.freq_exp_profile), 2)) -123 -124 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to -125 else: -126 -127 mz_domain = (Aterm / self.freq_exp_profile) + (Bterm / power(self.freq_exp_profile, 2)) + Cterm -128 -129 return mz_domain -130 -131 def _f_to_mz_bruker(self): -132 """ Frequency to m/z conversion (Bruker) -133 Bruker equations for converting frequency (Hz) to m/z, -134 nOmega acquisition is not yet implemented here. -135 However, nOmega should work for commerical Bruker 2xR systems as A Term is automatically defined for 2X or 1X by the instrument -136 -137 -138 Returns -139 ---------- -140 numpy.array(float) -141 m/z domain after conversion from frequency -142 """ -143 Aterm, Bterm, Cterm = self.Aterm, self.Bterm, self.Cterm -144 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage -145 if Cterm == 0: -146 -147 if Bterm == 0: -148 #uncalibrated data -149 return Aterm / self.freq_exp_profile -150 -151 else: -152 #calc2 -153 return Aterm / (self.freq_exp_profile + Bterm) -154 -155 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to -156 else: -157 diff = Aterm * Aterm -158 -159 #this sign(diff + 4) changes on older aquistion software -160 diff = diff + 4 * Cterm * (self.freq_exp_profile - Bterm) -161 diff = sqrt(diff) -162 diff = -Aterm+diff -163 #calc3 -164 return (2*Cterm)/diff -165 return diff/2* (self.freq_exp_profile - Bterm) -166 -167 def number_average_molecular_weight(self, profile : bool=False): -168 """ Average molecular weight calculation -169 -170 Parameters -171 ---------- -172 profile : bool, optional -173 is data profile or centroid mode. The default is False (e.g. Centroid data) -174 -175 Returns -176 ------- -177 float -178 The average molecular weight. + 73 print( + 74 "%i assigned peaks and %i unassigned peaks, total = %.2f %%, relative abundance = %.2f %%, RMS error (best candidate) (ppm) = %.3f" + 75 % (i, j, total_percent, total_relative_abundance, rms_error) + 76 ) + 77 return i, j, total_percent, total_relative_abundance, rms_error + 78 + 79 else: + 80 if verbose: + 81 print( + 82 "%i assigned peaks and %i unassigned peaks , total = %.2f %%, relative abundance = %.2f %%" + 83 % ( + 84 i, + 85 j, + 86 total_percent, + 87 total_relative_abundance, + 88 ) + 89 ) + 90 return i, j, total_percent, total_relative_abundance + 91 + 92 def resolving_power_calc(self, B: float, T: float): + 93 """Calculate the theoretical resolving power + 94 + 95 Calls on the MSPeak object function to calculate the resolving power of a peak, this calcs for all peaks in a spectrum. + 96 + 97 Parameters + 98 ----------- + 99 T : float +100 transient time +101 B : float +102 Magnetic Filed Strength (Tesla) +103 +104 References +105 ---------- +106 1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.) +107 DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K +108 +109 """ +110 +111 self.check_mspeaks() +112 return array([mspeak.resolving_power_calc(B, T) for mspeak in self.mspeaks]) +113 +114 def _f_to_mz(self): +115 """Ledford equation for converting frequency(Hz) to m/z +116 +117 Returns +118 ---------- +119 mz_domain : numpy array +120 m/z domain after conversion from frequency +121 """ +122 Aterm, Bterm, Cterm = self.Aterm, self.Bterm, self.Cterm +123 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage +124 +125 if Cterm == 0: +126 if Bterm == 0: +127 # uncalibrated data +128 mz_domain = Aterm / self.freq_exp_profile +129 +130 else: +131 mz_domain = (Aterm / (self.freq_exp_profile)) + ( +132 Bterm / power((self.freq_exp_profile), 2) +133 ) +134 +135 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to +136 else: +137 mz_domain = ( +138 (Aterm / self.freq_exp_profile) +139 + (Bterm / power(self.freq_exp_profile, 2)) +140 + Cterm +141 ) +142 +143 return mz_domain +144 +145 def _f_to_mz_bruker(self): +146 """Frequency to m/z conversion (Bruker) +147 Bruker equations for converting frequency (Hz) to m/z, +148 nOmega acquisition is not yet implemented here. +149 However, nOmega should work for commerical Bruker 2xR systems as A Term is automatically defined for 2X or 1X by the instrument +150 +151 +152 Returns +153 ---------- +154 numpy.array(float) +155 m/z domain after conversion from frequency +156 """ +157 Aterm, Bterm, Cterm = self.Aterm, self.Bterm, self.Cterm +158 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage +159 if Cterm == 0: +160 if Bterm == 0: +161 # uncalibrated data +162 return Aterm / self.freq_exp_profile +163 +164 else: +165 # calc2 +166 return Aterm / (self.freq_exp_profile + Bterm) +167 +168 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to +169 else: +170 diff = Aterm * Aterm +171 +172 # this sign(diff + 4) changes on older aquistion software +173 diff = diff + 4 * Cterm * (self.freq_exp_profile - Bterm) +174 diff = sqrt(diff) +175 diff = -Aterm + diff +176 # calc3 +177 return (2 * Cterm) / diff +178 return diff / 2 * (self.freq_exp_profile - Bterm) 179 -180 """ -181 # mode is profile or centroid data -182 if profile: -183 a = multiply(self.mz_exp_profile, self.abundance_profile) -184 b = self.abundance_profile -185 return a.sum()/b.sum() -186 -187 else: -188 -189 return sum(self.mz_exp*self.abundance)/sum(self.abundance) -190 -191 def weight_average_molecular_weight(self, profile : bool=False): -192 """ -193 Weighted Average molecular weight calculation -194 -195 -196 Returns -197 ------- -198 float -199 The weight average molecular weight. -200 -201 """ -202 -203 # implement from MassSpectralPeaks objs -204 -205 if profile: -206 a = multiply(power(self.mz_exp_profile, 2), self.abundance_profile) -207 b = self.mz_exp_profile*self.abundance_profile -208 return a.sum() / b.sum() -209 -210 else: -211 return sum(power(self.mz_exp, 2)*self.abundance)/sum(self.mz_exp*self.abundance) +180 def number_average_molecular_weight(self, profile: bool = False): +181 """Average molecular weight calculation +182 +183 Parameters +184 ---------- +185 profile : bool, optional +186 is data profile or centroid mode. The default is False (e.g. Centroid data) +187 +188 Returns +189 ------- +190 float +191 The average molecular weight. +192 +193 """ +194 # mode is profile or centroid data +195 if profile: +196 a = multiply(self.mz_exp_profile, self.abundance_profile) +197 b = self.abundance_profile +198 return a.sum() / b.sum() +199 +200 else: +201 return sum(self.mz_exp * self.abundance) / sum(self.abundance) +202 +203 def weight_average_molecular_weight(self, profile: bool = False): +204 """ +205 Weighted Average molecular weight calculation +206 +207 +208 Returns +209 ------- +210 float +211 The weight average molecular weight. +212 +213 """ +214 +215 # implement from MassSpectralPeaks objs +216 +217 if profile: +218 a = multiply(power(self.mz_exp_profile, 2), self.abundance_profile) +219 b = self.mz_exp_profile * self.abundance_profile +220 return a.sum() / b.sum() +221 +222 else: +223 return sum(power(self.mz_exp, 2) * self.abundance) / sum( +224 self.mz_exp * self.abundance +225 )

    Class for Mass Spectrum Calculations

    Class including numerical calculations related to mass spectrum class -Inherited PeakPicking and NoiseThresholdCalc ensuring its methods are +Inherited PeakPicking and NoiseThresholdCalc ensuring its methods are available to the instantiated mass spectrum class object

    Parameters
    @@ -522,17 +549,17 @@
    Attributes
    Methods
      -
    • check_mspeaks(). +
    • check_mspeaks(). Check if the mspeaks attribute is populated
    • -
    • sort_by_abundance(). +
    • sort_by_abundance(). Sort the mspeaks by abundance
    • -
    • percentile_assigned(report_error=False). +
    • percentile_assigned(report_error=False). Calculate the percentage of assigned peaks
    • -
    • resolving_power_calc(B, T). +
    • resolving_power_calc(B, T). Calculate the resolving power
    • -
    • number_average_molecular_weight(profile=False). +
    • number_average_molecular_weight(profile=False). Calculate the number average molecular weight
    • -
    • weight_average_molecular_weight(profile=False). +
    • weight_average_molecular_weight(profile=False). Calculate the weight average molecular weight
    @@ -549,23 +576,22 @@
    Methods
    -
    41    def percentile_assigned(self, report_error : bool=False):
    -42        """ Percentage of peaks which are assigned
    -43
    -44        Parameters
    -45        -----------
    -46        report_error: bool, optional
    -47            Report the error of the assigned peaks. Default is False.
    -48        """
    -49        verbose = self.parameters.mass_spectrum.verbose_processing
    -50        assign_abun = 0
    -51        not_assign_abun = 0
    -52        i = 0
    -53        j = 0
    -54        if report_error:
    -55            error = []
    -56        for mspeak in self.sort_by_abundance():
    -57            
    +            
    42    def percentile_assigned(self, report_error: bool = False):
    +43        """Percentage of peaks which are assigned
    +44
    +45        Parameters
    +46        -----------
    +47        report_error: bool, optional
    +48            Report the error of the assigned peaks. Default is False.
    +49        """
    +50        verbose = self.parameters.mass_spectrum.verbose_processing
    +51        assign_abun = 0
    +52        not_assign_abun = 0
    +53        i = 0
    +54        j = 0
    +55        if report_error:
    +56            error = []
    +57        for mspeak in self.sort_by_abundance():
     58            if mspeak.is_assigned:
     59                i += 1
     60                assign_abun += mspeak.abundance
    @@ -579,15 +605,26 @@ 
    Methods
    68 total_percent = (i / (i + j)) * 100 69 total_relative_abundance = (assign_abun / (not_assign_abun + assign_abun)) * 100 70 if report_error: -71 rms_error = sqrt(mean(array(error)**2)) +71 rms_error = sqrt(mean(array(error) ** 2)) 72 if verbose: -73 print('%i assigned peaks and %i unassigned peaks, total = %.2f %%, relative abundance = %.2f %%, RMS error (best candidate) (ppm) = %.3f' % (i, j, total_percent, total_relative_abundance, rms_error)) -74 return i, j, total_percent, total_relative_abundance, rms_error -75 -76 else: -77 if verbose: -78 print('%i assigned peaks and %i unassigned peaks , total = %.2f %%, relative abundance = %.2f %%' % (i, j, total_percent, total_relative_abundance,)) -79 return i, j, total_percent, total_relative_abundance +73 print( +74 "%i assigned peaks and %i unassigned peaks, total = %.2f %%, relative abundance = %.2f %%, RMS error (best candidate) (ppm) = %.3f" +75 % (i, j, total_percent, total_relative_abundance, rms_error) +76 ) +77 return i, j, total_percent, total_relative_abundance, rms_error +78 +79 else: +80 if verbose: +81 print( +82 "%i assigned peaks and %i unassigned peaks , total = %.2f %%, relative abundance = %.2f %%" +83 % ( +84 i, +85 j, +86 total_percent, +87 total_relative_abundance, +88 ) +89 ) +90 return i, j, total_percent, total_relative_abundance
    @@ -614,27 +651,27 @@
    Parameters
    -
     81    def resolving_power_calc(self, B : float, T : float):
    - 82        """ Calculate the theoretical resolving power
    - 83
    - 84        Calls on the MSPeak object function to calculate the resolving power of a peak, this calcs for all peaks in a spectrum.
    - 85
    - 86        Parameters
    - 87        -----------
    - 88        T : float 
    - 89            transient time
    - 90        B : float
    - 91            Magnetic Filed Strength (Tesla)    
    - 92        
    - 93        References
    - 94        ----------
    - 95        1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)
    - 96                DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    - 97        
    - 98        """
    - 99       
    -100        self.check_mspeaks()
    -101        return array([mspeak.resolving_power_calc(B, T) for mspeak in self.mspeaks])
    +            
     92    def resolving_power_calc(self, B: float, T: float):
    + 93        """Calculate the theoretical resolving power
    + 94
    + 95        Calls on the MSPeak object function to calculate the resolving power of a peak, this calcs for all peaks in a spectrum.
    + 96
    + 97        Parameters
    + 98        -----------
    + 99        T : float
    +100            transient time
    +101        B : float
    +102            Magnetic Filed Strength (Tesla)
    +103
    +104        References
    +105        ----------
    +106        1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)
    +107                DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    +108
    +109        """
    +110
    +111        self.check_mspeaks()
    +112        return array([mspeak.resolving_power_calc(B, T) for mspeak in self.mspeaks])
     
    @@ -672,33 +709,32 @@
    References
    -
    167    def number_average_molecular_weight(self, profile : bool=False):
    -168        """ Average molecular weight calculation 
    -169        
    -170        Parameters
    -171        ----------
    -172        profile : bool, optional
    -173            is data profile or centroid mode. The default is False (e.g. Centroid data)
    -174
    -175        Returns
    -176        -------
    -177        float
    -178            The average molecular weight.
    -179
    -180        """
    -181        # mode is profile or centroid data
    -182        if profile:
    -183            a = multiply(self.mz_exp_profile, self.abundance_profile)
    -184            b = self.abundance_profile
    -185            return a.sum()/b.sum()
    -186
    -187        else:
    -188
    -189            return sum(self.mz_exp*self.abundance)/sum(self.abundance)
    +            
    180    def number_average_molecular_weight(self, profile: bool = False):
    +181        """Average molecular weight calculation
    +182
    +183        Parameters
    +184        ----------
    +185        profile : bool, optional
    +186            is data profile or centroid mode. The default is False (e.g. Centroid data)
    +187
    +188        Returns
    +189        -------
    +190        float
    +191            The average molecular weight.
    +192
    +193        """
    +194        # mode is profile or centroid data
    +195        if profile:
    +196            a = multiply(self.mz_exp_profile, self.abundance_profile)
    +197            b = self.abundance_profile
    +198            return a.sum() / b.sum()
    +199
    +200        else:
    +201            return sum(self.mz_exp * self.abundance) / sum(self.abundance)
     
    -

    Average molecular weight calculation

    +

    Average molecular weight calculation

    Parameters
    @@ -727,31 +763,33 @@
    Returns
    -
    191    def weight_average_molecular_weight(self, profile : bool=False):
    -192        """ 
    -193        Weighted Average molecular weight calculation 
    -194        
    -195
    -196        Returns
    -197        -------
    -198        float
    -199            The weight average molecular weight.
    -200
    -201        """
    -202        
    -203        # implement from MassSpectralPeaks objs
    -204
    -205        if profile:
    -206            a = multiply(power(self.mz_exp_profile, 2), self.abundance_profile)
    -207            b = self.mz_exp_profile*self.abundance_profile
    -208            return a.sum() / b.sum()
    -209
    -210        else:
    -211            return sum(power(self.mz_exp, 2)*self.abundance)/sum(self.mz_exp*self.abundance)
    +            
    203    def weight_average_molecular_weight(self, profile: bool = False):
    +204        """
    +205        Weighted Average molecular weight calculation
    +206
    +207
    +208        Returns
    +209        -------
    +210        float
    +211            The weight average molecular weight.
    +212
    +213        """
    +214
    +215        # implement from MassSpectralPeaks objs
    +216
    +217        if profile:
    +218            a = multiply(power(self.mz_exp_profile, 2), self.abundance_profile)
    +219            b = self.mz_exp_profile * self.abundance_profile
    +220            return a.sum() / b.sum()
    +221
    +222        else:
    +223            return sum(power(self.mz_exp, 2) * self.abundance) / sum(
    +224                self.mz_exp * self.abundance
    +225            )
     
    -

    Weighted Average molecular weight calculation

    +

    Weighted Average molecular weight calculation

    Returns
    diff --git a/docs/corems/mass_spectrum/calc/MeanResolvingPowerFilter.html b/docs/corems/mass_spectrum/calc/MeanResolvingPowerFilter.html index 9d46d127..3e70b4d9 100644 --- a/docs/corems/mass_spectrum/calc/MeanResolvingPowerFilter.html +++ b/docs/corems/mass_spectrum/calc/MeanResolvingPowerFilter.html @@ -87,10 +87,10 @@

    @author: Will Kew

    Module for mean resolving power filtration -Based upon the work in:

    +Based upon the work in:

    -

    Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. -Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. +

    Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. +Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. Rapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940

    Calculates a m/z normalised resolving power, fits a gaussian distribution to this, and then filters out peaks which are outside of the user defined number of standard deviations

    @@ -106,187 +106,201 @@

    4@author: Will Kew 5 6Module for mean resolving power filtration - 7Based upon the work in: + 7Based upon the work in: 8 - 9Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. - 10Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. + 9Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. + 10Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. 11Rapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940 12 13Calculates a m/z normalised resolving power, fits a gaussian distribution to this, and then filters out peaks which are outside of the user defined number of standard deviations 14 15 16""" - 17import warnings - 18from lmfit.models import GaussianModel - 19from scipy import stats + 17 + 18import warnings + 19from lmfit.models import GaussianModel 20import seaborn as sns 21import pandas as pd 22import numpy as np 23import matplotlib.pyplot as plt 24 - 25class MeanResolvingPowerFilter(): - 26 """ Class for for mean resolving power filtration. - 27 - 28 This module implements a mean resolving power filter based on the work described [1] - 29 - 30 The MeanResolvingPowerFilter class provides methods to calculate the m/z normalized resolving power, fit a Gaussian distribution to it, and filter out peaks that are outside of the user-defined number of standard deviations. - 31 - 32 Attributes - 33 ------- - 34 mass_spectrum (object): The mass spectrum object. - 35 ndeviations (int): The number of standard deviations used for filtering. - 36 plot (bool): Flag indicating whether to plot the results. - 37 guess_pars (bool): Flag indicating whether to guess the parameters for the Gaussian model. - 38 - 39 Methods - 40 ------ - 41 * extract_peaks(): Extracts the peaks from the mass spectrum. - 42 * normalise_rps(tmpdf_ms): Normalizes the resolving powers to be independent of m/z. - 43 * calculate_distribution(tmpdf_ms): Calculates the distribution of the resolving powers. - 44 * create_index_list_to_remove(tmpdf_ms, rps_thresh): Creates an index list of peaks to remove based on the calculated thresholds. - 45 * main(): Executes the main filtering process and returns the index list of peaks to remove. - 46 - 47 References - 48 ---------- - 49 1. Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. - 50 Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. - 51 Rapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940 - 52 """ - 53 - 54 def __init__(self, mass_spectrum, ndeviations : float=3, plot : bool=False, guess_pars : bool=False): - 55 # we dont want the assignments made in this exploratory class to copy to the original object, so we make a copy of it. - 56 # Possible future task - make mass spectrum base class copyable... - 57 #TODO see if there is redundancy in the AutoRecalibration function we can minimise here? - 58 #self.mass_spectrum - 59 self.mass_spectrum = mass_spectrum - 60 self.plot = plot - 61 self.ndeviations = ndeviations - 62 self.guess_pars = guess_pars - 63 - 64 def extract_peaks(self): - 65 """ Extracts the peaks from the mass spectrum. - 66 - 67 Returns - 68 ---------- - 69 tmpdf_ms : Pandas DataFrame - 70 A DataFrame containing the extracted peaks. - 71 """ - 72 ids = [] - 73 mzs = [] - 74 rps = [] - 75 for mspeak in self.mass_spectrum.mspeaks: - 76 ids.append(mspeak.index) - 77 mzs.append(mspeak.mz_exp) - 78 rps.append(mspeak.resolving_power) - 79 mzs = np.array(mzs) - 80 rps = np.array(rps) - 81 - 82 tmpdf_ms = pd.DataFrame(index=ids,columns=['mz','rp','crp']) - 83 tmpdf_ms['mz'] = mzs - 84 tmpdf_ms['rp'] = rps - 85 return tmpdf_ms - 86 - 87 def normalise_rps(self, tmpdf_ms): - 88 """ Normalizes the resolving powers to be independent of m/z. - 89 - 90 Parameters - 91 ------ - 92 tmpdf_ms : Pandas DataFrame - 93 A DataFrame containing the extracted peaks. - 94 - 95 Returns - 96 -------- - 97 tmpdf_ms : Pandas DataFrame - 98 A DataFrame with the resolving powers normalized. - 99 """ -100 -101 if self.mass_spectrum.analyzer == 'ICR': -102 tmpdf_ms['crp'] = tmpdf_ms['rp'] * np.sqrt(tmpdf_ms['mz']**2) -103 else: -104 warnings.warn(f'Analyzer type {self.mass_spectrum.analyzer} not yet supported.', UserWarning) -105 return tmpdf_ms -106 -107 def calculate_distribution(self, tmpdf_ms): -108 """ Calculates the distribution of the resolving powers. -109 -110 Parameters -111 -------- -112 tmpdf_ms : Pandas DataFrame -113 A DataFrame containing the extracted peaks with normalized resolving powers. -114 -115 Returns -116 -------- -117 rps_thresh : list -118 A list of the calculated thresholds for filtering. -119 """ -120 -121 # Use Seaborn to create a KDE of the normalised resolving powers -122 rps = sns.kdeplot(tmpdf_ms['crp']) -123 rps_data = rps.get_lines()[0].get_data() -124 tmpdf = pd.Series(index=rps_data[0],data=rps_data[1]) -125 rps_apex_ppm = tmpdf.idxmax() -126 rps_apex_val = tmpdf.max() -127 plt.close(rps.figure) -128 plt.close('all') -129 -130 # Use LMFIT to create a gaussian model of the distribution -131 lmmodel = GaussianModel() -132 lmpars = lmmodel.guess(rps_data[1], x=rps_data[0]) -133 if self.guess_pars: -134 lmpars['sigma'].value = rps_data[0][-1]*0.01 -135 lmpars['center'].value = rps_apex_ppm -136 lmpars['amplitude'].value = rps_apex_val -137 lmout = lmmodel.fit(rps_data[1], lmpars, x=rps_data[0]) -138 -139 if self.plot: -140 fig,ax = plt.subplots(figsize=(8,4)) -141 lmout.plot_fit(ax=ax,data_kws ={'color':'tab:blue'},fit_kws ={'color':'tab:red'}) -142 ax.set_xlabel('Normalised Resolving Power') -143 ax.set_ylabel('Density') -144 plt.legend(facecolor='white', framealpha=0) -145 -146 mean_res = lmout.best_values['center'] -147 std_res = lmout.best_values['sigma'] -148 fwhm_res = std_res*np.sqrt(8*np.log(2)) -149 -150 ndevs = self.ndeviations/2 -151 rps_thresh = [mean_res-(fwhm_res*ndevs),mean_res+(fwhm_res*ndevs)] -152 return rps_thresh -153 -154 def create_index_list_to_remove(self, tmpdf_ms, rps_thresh : list): -155 """ Creates an index list of peaks to remove based on the calculated thresholds. -156 -157 Parameters -158 --------- -159 tmpdf_ms : Pandas DataFrame -160 A DataFrame containing the extracted peaks with normalized resolving powers. -161 rps_thresh : list -162 A list of the calculated thresholds for filtering. -163 -164 Returns -165 ---------- -166 index_to_keep :list -167 A list of indices of peaks to keep. -168 """ -169 #Subset the list of mspeaks to only the ones to keep, return an index list which can be passed back to the main -170 -171 tmpdf_ms = tmpdf_ms[(tmpdf_ms['crp']<min(rps_thresh))|(tmpdf_ms['crp']>max(rps_thresh))] -172 index_to_keep = list(tmpdf_ms.index) -173 return index_to_keep -174 -175 def main(self): -176 """ Executes the main filtering process and returns the index list of peaks to remove. -177 -178 Returns -179 -------- -180 index_to_remove : list -181 A list of indices of peaks to remove. -182 """ -183 tmpdf_ms = self.extract_peaks() -184 tmpdf_ms = self.normalise_rps(tmpdf_ms) -185 rps_thresh = self.calculate_distribution(tmpdf_ms) -186 index_to_remove = self.create_index_list_to_remove(tmpdf_ms,rps_thresh) -187 return index_to_remove + 25 + 26class MeanResolvingPowerFilter: + 27 """Class for for mean resolving power filtration. + 28 + 29 This module implements a mean resolving power filter based on the work described [1] + 30 + 31 The MeanResolvingPowerFilter class provides methods to calculate the m/z normalized resolving power, fit a Gaussian distribution to it, and filter out peaks that are outside of the user-defined number of standard deviations. + 32 + 33 Attributes + 34 ------- + 35 mass_spectrum (object): The mass spectrum object. + 36 ndeviations (int): The number of standard deviations used for filtering. + 37 plot (bool): Flag indicating whether to plot the results. + 38 guess_pars (bool): Flag indicating whether to guess the parameters for the Gaussian model. + 39 + 40 Methods + 41 ------ + 42 * extract_peaks(): Extracts the peaks from the mass spectrum. + 43 * normalise_rps(tmpdf_ms): Normalizes the resolving powers to be independent of m/z. + 44 * calculate_distribution(tmpdf_ms): Calculates the distribution of the resolving powers. + 45 * create_index_list_to_remove(tmpdf_ms, rps_thresh): Creates an index list of peaks to remove based on the calculated thresholds. + 46 * main(): Executes the main filtering process and returns the index list of peaks to remove. + 47 + 48 References + 49 ---------- + 50 1. Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. + 51 Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. + 52 Rapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940 + 53 """ + 54 + 55 def __init__( + 56 self, + 57 mass_spectrum, + 58 ndeviations: float = 3, + 59 plot: bool = False, + 60 guess_pars: bool = False, + 61 ): + 62 # we dont want the assignments made in this exploratory class to copy to the original object, so we make a copy of it. + 63 # Possible future task - make mass spectrum base class copyable... + 64 # TODO see if there is redundancy in the AutoRecalibration function we can minimise here? + 65 # self.mass_spectrum + 66 self.mass_spectrum = mass_spectrum + 67 self.plot = plot + 68 self.ndeviations = ndeviations + 69 self.guess_pars = guess_pars + 70 + 71 def extract_peaks(self): + 72 """Extracts the peaks from the mass spectrum. + 73 + 74 Returns + 75 ---------- + 76 tmpdf_ms : Pandas DataFrame + 77 A DataFrame containing the extracted peaks. + 78 """ + 79 ids = [] + 80 mzs = [] + 81 rps = [] + 82 for mspeak in self.mass_spectrum.mspeaks: + 83 ids.append(mspeak.index) + 84 mzs.append(mspeak.mz_exp) + 85 rps.append(mspeak.resolving_power) + 86 mzs = np.array(mzs) + 87 rps = np.array(rps) + 88 + 89 tmpdf_ms = pd.DataFrame(index=ids, columns=["mz", "rp", "crp"]) + 90 tmpdf_ms["mz"] = mzs + 91 tmpdf_ms["rp"] = rps + 92 return tmpdf_ms + 93 + 94 def normalise_rps(self, tmpdf_ms): + 95 """Normalizes the resolving powers to be independent of m/z. + 96 + 97 Parameters + 98 ------ + 99 tmpdf_ms : Pandas DataFrame +100 A DataFrame containing the extracted peaks. +101 +102 Returns +103 -------- +104 tmpdf_ms : Pandas DataFrame +105 A DataFrame with the resolving powers normalized. +106 """ +107 +108 if self.mass_spectrum.analyzer == "ICR": +109 tmpdf_ms["crp"] = tmpdf_ms["rp"] * np.sqrt(tmpdf_ms["mz"] ** 2) +110 else: +111 warnings.warn( +112 f"Analyzer type {self.mass_spectrum.analyzer} not yet supported.", +113 UserWarning, +114 ) +115 return tmpdf_ms +116 +117 def calculate_distribution(self, tmpdf_ms): +118 """Calculates the distribution of the resolving powers. +119 +120 Parameters +121 -------- +122 tmpdf_ms : Pandas DataFrame +123 A DataFrame containing the extracted peaks with normalized resolving powers. +124 +125 Returns +126 -------- +127 rps_thresh : list +128 A list of the calculated thresholds for filtering. +129 """ +130 +131 # Use Seaborn to create a KDE of the normalised resolving powers +132 rps = sns.kdeplot(tmpdf_ms["crp"]) +133 rps_data = rps.get_lines()[0].get_data() +134 tmpdf = pd.Series(index=rps_data[0], data=rps_data[1]) +135 rps_apex_ppm = tmpdf.idxmax() +136 rps_apex_val = tmpdf.max() +137 plt.close(rps.figure) +138 plt.close("all") +139 +140 # Use LMFIT to create a gaussian model of the distribution +141 lmmodel = GaussianModel() +142 lmpars = lmmodel.guess(rps_data[1], x=rps_data[0]) +143 if self.guess_pars: +144 lmpars["sigma"].value = rps_data[0][-1] * 0.01 +145 lmpars["center"].value = rps_apex_ppm +146 lmpars["amplitude"].value = rps_apex_val +147 lmout = lmmodel.fit(rps_data[1], lmpars, x=rps_data[0]) +148 +149 if self.plot: +150 fig, ax = plt.subplots(figsize=(8, 4)) +151 lmout.plot_fit( +152 ax=ax, data_kws={"color": "tab:blue"}, fit_kws={"color": "tab:red"} +153 ) +154 ax.set_xlabel("Normalised Resolving Power") +155 ax.set_ylabel("Density") +156 plt.legend(facecolor="white", framealpha=0) +157 +158 mean_res = lmout.best_values["center"] +159 std_res = lmout.best_values["sigma"] +160 fwhm_res = std_res * np.sqrt(8 * np.log(2)) +161 +162 ndevs = self.ndeviations / 2 +163 rps_thresh = [mean_res - (fwhm_res * ndevs), mean_res + (fwhm_res * ndevs)] +164 return rps_thresh +165 +166 def create_index_list_to_remove(self, tmpdf_ms, rps_thresh: list): +167 """Creates an index list of peaks to remove based on the calculated thresholds. +168 +169 Parameters +170 --------- +171 tmpdf_ms : Pandas DataFrame +172 A DataFrame containing the extracted peaks with normalized resolving powers. +173 rps_thresh : list +174 A list of the calculated thresholds for filtering. +175 +176 Returns +177 ---------- +178 index_to_keep :list +179 A list of indices of peaks to keep. +180 """ +181 # Subset the list of mspeaks to only the ones to keep, return an index list which can be passed back to the main +182 +183 tmpdf_ms = tmpdf_ms[ +184 (tmpdf_ms["crp"] < min(rps_thresh)) | (tmpdf_ms["crp"] > max(rps_thresh)) +185 ] +186 index_to_keep = list(tmpdf_ms.index) +187 return index_to_keep +188 +189 def main(self): +190 """Executes the main filtering process and returns the index list of peaks to remove. +191 +192 Returns +193 -------- +194 index_to_remove : list +195 A list of indices of peaks to remove. +196 """ +197 tmpdf_ms = self.extract_peaks() +198 tmpdf_ms = self.normalise_rps(tmpdf_ms) +199 rps_thresh = self.calculate_distribution(tmpdf_ms) +200 index_to_remove = self.create_index_list_to_remove(tmpdf_ms, rps_thresh) +201 return index_to_remove

    @@ -302,169 +316,182 @@

    -
     26class MeanResolvingPowerFilter():
    - 27    """ Class for for mean resolving power filtration.
    - 28
    - 29    This module implements a mean resolving power filter based on the work described [1]
    - 30
    - 31    The MeanResolvingPowerFilter class provides methods to calculate the m/z normalized resolving power, fit a Gaussian distribution to it, and filter out peaks that are outside of the user-defined number of standard deviations.
    - 32
    - 33    Attributes
    - 34    -------
    - 35    mass_spectrum (object): The mass spectrum object.
    - 36    ndeviations (int): The number of standard deviations used for filtering.
    - 37    plot (bool): Flag indicating whether to plot the results.
    - 38    guess_pars (bool): Flag indicating whether to guess the parameters for the Gaussian model.
    - 39
    - 40    Methods
    - 41    ------
    - 42    * extract_peaks(): Extracts the peaks from the mass spectrum.  
    - 43    * normalise_rps(tmpdf_ms): Normalizes the resolving powers to be independent of m/z.  
    - 44    * calculate_distribution(tmpdf_ms): Calculates the distribution of the resolving powers.  
    - 45    * create_index_list_to_remove(tmpdf_ms, rps_thresh): Creates an index list of peaks to remove based on the calculated thresholds.  
    - 46    * main(): Executes the main filtering process and returns the index list of peaks to remove.  
    - 47
    - 48    References
    - 49    ----------
    - 50    1.  Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. 
    - 51        Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. 
    - 52        Rapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940
    - 53    """
    - 54
    - 55    def __init__(self, mass_spectrum, ndeviations : float=3, plot : bool=False, guess_pars : bool=False):
    - 56        # we dont want the assignments made in this exploratory class to copy to the original object, so we make a copy of it.  
    - 57        # Possible future task - make mass spectrum base class copyable...
    - 58        #TODO see if there is redundancy in the AutoRecalibration function we can minimise here? 
    - 59        #self.mass_spectrum
    - 60        self.mass_spectrum = mass_spectrum
    - 61        self.plot = plot
    - 62        self.ndeviations = ndeviations
    - 63        self.guess_pars = guess_pars
    - 64
    - 65    def extract_peaks(self):
    - 66        """ Extracts the peaks from the mass spectrum.
    - 67
    - 68        Returns
    - 69        ----------
    - 70        tmpdf_ms : Pandas DataFrame
    - 71            A DataFrame containing the extracted peaks.
    - 72        """
    - 73        ids = []
    - 74        mzs = []
    - 75        rps = []
    - 76        for mspeak in self.mass_spectrum.mspeaks:
    - 77            ids.append(mspeak.index)
    - 78            mzs.append(mspeak.mz_exp)
    - 79            rps.append(mspeak.resolving_power)
    - 80        mzs = np.array(mzs)
    - 81        rps = np.array(rps)  
    - 82
    - 83        tmpdf_ms = pd.DataFrame(index=ids,columns=['mz','rp','crp'])
    - 84        tmpdf_ms['mz'] = mzs
    - 85        tmpdf_ms['rp'] = rps
    - 86        return tmpdf_ms
    - 87    
    - 88    def normalise_rps(self, tmpdf_ms):
    - 89        """ Normalizes the resolving powers to be independent of m/z.
    - 90
    - 91        Parameters
    - 92        ------
    - 93        tmpdf_ms : Pandas DataFrame
    - 94            A DataFrame containing the extracted peaks.
    - 95
    - 96        Returns
    - 97        --------
    - 98        tmpdf_ms : Pandas DataFrame
    - 99            A DataFrame with the resolving powers normalized.
    -100        """
    -101        
    -102        if self.mass_spectrum.analyzer == 'ICR':
    -103            tmpdf_ms['crp'] = tmpdf_ms['rp'] * np.sqrt(tmpdf_ms['mz']**2)
    -104        else:
    -105            warnings.warn(f'Analyzer type {self.mass_spectrum.analyzer} not yet supported.', UserWarning)
    -106        return tmpdf_ms
    -107
    -108    def calculate_distribution(self, tmpdf_ms):
    -109        """ Calculates the distribution of the resolving powers.
    -110
    -111        Parameters
    -112        --------
    -113        tmpdf_ms : Pandas DataFrame
    -114            A DataFrame containing the extracted peaks with normalized resolving powers.
    -115
    -116        Returns
    -117        --------
    -118        rps_thresh : list 
    -119            A list of the calculated thresholds for filtering.
    -120        """
    -121        
    -122        # Use Seaborn to create a KDE of the normalised resolving powers
    -123        rps = sns.kdeplot(tmpdf_ms['crp']) 
    -124        rps_data = rps.get_lines()[0].get_data()
    -125        tmpdf = pd.Series(index=rps_data[0],data=rps_data[1])
    -126        rps_apex_ppm = tmpdf.idxmax()
    -127        rps_apex_val = tmpdf.max()
    -128        plt.close(rps.figure)
    -129        plt.close('all')
    -130
    -131        # Use LMFIT to create a gaussian model of the distribution
    -132        lmmodel = GaussianModel()
    -133        lmpars = lmmodel.guess(rps_data[1], x=rps_data[0])
    -134        if self.guess_pars:
    -135            lmpars['sigma'].value = rps_data[0][-1]*0.01
    -136            lmpars['center'].value = rps_apex_ppm
    -137            lmpars['amplitude'].value = rps_apex_val
    -138        lmout = lmmodel.fit(rps_data[1], lmpars, x=rps_data[0])
    -139
    -140        if self.plot:
    -141            fig,ax = plt.subplots(figsize=(8,4))
    -142            lmout.plot_fit(ax=ax,data_kws ={'color':'tab:blue'},fit_kws ={'color':'tab:red'})
    -143            ax.set_xlabel('Normalised Resolving Power')
    -144            ax.set_ylabel('Density')
    -145            plt.legend(facecolor='white', framealpha=0)
    -146
    -147        mean_res = lmout.best_values['center']
    -148        std_res = lmout.best_values['sigma']
    -149        fwhm_res = std_res*np.sqrt(8*np.log(2))
    -150
    -151        ndevs = self.ndeviations/2
    -152        rps_thresh = [mean_res-(fwhm_res*ndevs),mean_res+(fwhm_res*ndevs)]
    -153        return rps_thresh
    -154    
    -155    def create_index_list_to_remove(self, tmpdf_ms, rps_thresh : list):
    -156        """ Creates an index list of peaks to remove based on the calculated thresholds.
    -157
    -158        Parameters
    -159        ---------
    -160        tmpdf_ms : Pandas DataFrame
    -161            A DataFrame containing the extracted peaks with normalized resolving powers.
    -162        rps_thresh : list
    -163            A list of the calculated thresholds for filtering.
    -164
    -165        Returns
    -166        ----------
    -167        index_to_keep :list 
    -168            A list of indices of peaks to keep.
    -169        """
    -170        #Subset the list of mspeaks to only the ones to keep, return an index list which can be passed back to the main 
    -171        
    -172        tmpdf_ms = tmpdf_ms[(tmpdf_ms['crp']<min(rps_thresh))|(tmpdf_ms['crp']>max(rps_thresh))]
    -173        index_to_keep  = list(tmpdf_ms.index)
    -174        return index_to_keep
    -175    
    -176    def main(self):
    -177        """ Executes the main filtering process and returns the index list of peaks to remove.
    -178
    -179        Returns
    -180        --------
    -181        index_to_remove : list
    -182            A list of indices of peaks to remove.
    -183        """
    -184        tmpdf_ms = self.extract_peaks()
    -185        tmpdf_ms  = self.normalise_rps(tmpdf_ms)
    -186        rps_thresh = self.calculate_distribution(tmpdf_ms)
    -187        index_to_remove = self.create_index_list_to_remove(tmpdf_ms,rps_thresh)
    -188        return index_to_remove
    +            
     27class MeanResolvingPowerFilter:
    + 28    """Class for for mean resolving power filtration.
    + 29
    + 30    This module implements a mean resolving power filter based on the work described [1]
    + 31
    + 32    The MeanResolvingPowerFilter class provides methods to calculate the m/z normalized resolving power, fit a Gaussian distribution to it, and filter out peaks that are outside of the user-defined number of standard deviations.
    + 33
    + 34    Attributes
    + 35    -------
    + 36    mass_spectrum (object): The mass spectrum object.
    + 37    ndeviations (int): The number of standard deviations used for filtering.
    + 38    plot (bool): Flag indicating whether to plot the results.
    + 39    guess_pars (bool): Flag indicating whether to guess the parameters for the Gaussian model.
    + 40
    + 41    Methods
    + 42    ------
    + 43    * extract_peaks(): Extracts the peaks from the mass spectrum.
    + 44    * normalise_rps(tmpdf_ms): Normalizes the resolving powers to be independent of m/z.
    + 45    * calculate_distribution(tmpdf_ms): Calculates the distribution of the resolving powers.
    + 46    * create_index_list_to_remove(tmpdf_ms, rps_thresh): Creates an index list of peaks to remove based on the calculated thresholds.
    + 47    * main(): Executes the main filtering process and returns the index list of peaks to remove.
    + 48
    + 49    References
    + 50    ----------
    + 51    1.  Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P.
    + 52        Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry.
    + 53        Rapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940
    + 54    """
    + 55
    + 56    def __init__(
    + 57        self,
    + 58        mass_spectrum,
    + 59        ndeviations: float = 3,
    + 60        plot: bool = False,
    + 61        guess_pars: bool = False,
    + 62    ):
    + 63        # we dont want the assignments made in this exploratory class to copy to the original object, so we make a copy of it.
    + 64        # Possible future task - make mass spectrum base class copyable...
    + 65        # TODO see if there is redundancy in the AutoRecalibration function we can minimise here?
    + 66        # self.mass_spectrum
    + 67        self.mass_spectrum = mass_spectrum
    + 68        self.plot = plot
    + 69        self.ndeviations = ndeviations
    + 70        self.guess_pars = guess_pars
    + 71
    + 72    def extract_peaks(self):
    + 73        """Extracts the peaks from the mass spectrum.
    + 74
    + 75        Returns
    + 76        ----------
    + 77        tmpdf_ms : Pandas DataFrame
    + 78            A DataFrame containing the extracted peaks.
    + 79        """
    + 80        ids = []
    + 81        mzs = []
    + 82        rps = []
    + 83        for mspeak in self.mass_spectrum.mspeaks:
    + 84            ids.append(mspeak.index)
    + 85            mzs.append(mspeak.mz_exp)
    + 86            rps.append(mspeak.resolving_power)
    + 87        mzs = np.array(mzs)
    + 88        rps = np.array(rps)
    + 89
    + 90        tmpdf_ms = pd.DataFrame(index=ids, columns=["mz", "rp", "crp"])
    + 91        tmpdf_ms["mz"] = mzs
    + 92        tmpdf_ms["rp"] = rps
    + 93        return tmpdf_ms
    + 94
    + 95    def normalise_rps(self, tmpdf_ms):
    + 96        """Normalizes the resolving powers to be independent of m/z.
    + 97
    + 98        Parameters
    + 99        ------
    +100        tmpdf_ms : Pandas DataFrame
    +101            A DataFrame containing the extracted peaks.
    +102
    +103        Returns
    +104        --------
    +105        tmpdf_ms : Pandas DataFrame
    +106            A DataFrame with the resolving powers normalized.
    +107        """
    +108
    +109        if self.mass_spectrum.analyzer == "ICR":
    +110            tmpdf_ms["crp"] = tmpdf_ms["rp"] * np.sqrt(tmpdf_ms["mz"] ** 2)
    +111        else:
    +112            warnings.warn(
    +113                f"Analyzer type {self.mass_spectrum.analyzer} not yet supported.",
    +114                UserWarning,
    +115            )
    +116        return tmpdf_ms
    +117
    +118    def calculate_distribution(self, tmpdf_ms):
    +119        """Calculates the distribution of the resolving powers.
    +120
    +121        Parameters
    +122        --------
    +123        tmpdf_ms : Pandas DataFrame
    +124            A DataFrame containing the extracted peaks with normalized resolving powers.
    +125
    +126        Returns
    +127        --------
    +128        rps_thresh : list
    +129            A list of the calculated thresholds for filtering.
    +130        """
    +131
    +132        # Use Seaborn to create a KDE of the normalised resolving powers
    +133        rps = sns.kdeplot(tmpdf_ms["crp"])
    +134        rps_data = rps.get_lines()[0].get_data()
    +135        tmpdf = pd.Series(index=rps_data[0], data=rps_data[1])
    +136        rps_apex_ppm = tmpdf.idxmax()
    +137        rps_apex_val = tmpdf.max()
    +138        plt.close(rps.figure)
    +139        plt.close("all")
    +140
    +141        # Use LMFIT to create a gaussian model of the distribution
    +142        lmmodel = GaussianModel()
    +143        lmpars = lmmodel.guess(rps_data[1], x=rps_data[0])
    +144        if self.guess_pars:
    +145            lmpars["sigma"].value = rps_data[0][-1] * 0.01
    +146            lmpars["center"].value = rps_apex_ppm
    +147            lmpars["amplitude"].value = rps_apex_val
    +148        lmout = lmmodel.fit(rps_data[1], lmpars, x=rps_data[0])
    +149
    +150        if self.plot:
    +151            fig, ax = plt.subplots(figsize=(8, 4))
    +152            lmout.plot_fit(
    +153                ax=ax, data_kws={"color": "tab:blue"}, fit_kws={"color": "tab:red"}
    +154            )
    +155            ax.set_xlabel("Normalised Resolving Power")
    +156            ax.set_ylabel("Density")
    +157            plt.legend(facecolor="white", framealpha=0)
    +158
    +159        mean_res = lmout.best_values["center"]
    +160        std_res = lmout.best_values["sigma"]
    +161        fwhm_res = std_res * np.sqrt(8 * np.log(2))
    +162
    +163        ndevs = self.ndeviations / 2
    +164        rps_thresh = [mean_res - (fwhm_res * ndevs), mean_res + (fwhm_res * ndevs)]
    +165        return rps_thresh
    +166
    +167    def create_index_list_to_remove(self, tmpdf_ms, rps_thresh: list):
    +168        """Creates an index list of peaks to remove based on the calculated thresholds.
    +169
    +170        Parameters
    +171        ---------
    +172        tmpdf_ms : Pandas DataFrame
    +173            A DataFrame containing the extracted peaks with normalized resolving powers.
    +174        rps_thresh : list
    +175            A list of the calculated thresholds for filtering.
    +176
    +177        Returns
    +178        ----------
    +179        index_to_keep :list
    +180            A list of indices of peaks to keep.
    +181        """
    +182        # Subset the list of mspeaks to only the ones to keep, return an index list which can be passed back to the main
    +183
    +184        tmpdf_ms = tmpdf_ms[
    +185            (tmpdf_ms["crp"] < min(rps_thresh)) | (tmpdf_ms["crp"] > max(rps_thresh))
    +186        ]
    +187        index_to_keep = list(tmpdf_ms.index)
    +188        return index_to_keep
    +189
    +190    def main(self):
    +191        """Executes the main filtering process and returns the index list of peaks to remove.
    +192
    +193        Returns
    +194        --------
    +195        index_to_remove : list
    +196            A list of indices of peaks to remove.
    +197        """
    +198        tmpdf_ms = self.extract_peaks()
    +199        tmpdf_ms = self.normalise_rps(tmpdf_ms)
    +200        rps_thresh = self.calculate_distribution(tmpdf_ms)
    +201        index_to_remove = self.create_index_list_to_remove(tmpdf_ms, rps_thresh)
    +202        return index_to_remove
     
    @@ -486,18 +513,18 @@
    Attributes
    Methods
      -
    • extract_peaks(): Extracts the peaks from the mass spectrum.
    • -
    • normalise_rps(tmpdf_ms): Normalizes the resolving powers to be independent of m/z.
    • -
    • calculate_distribution(tmpdf_ms): Calculates the distribution of the resolving powers.
    • -
    • create_index_list_to_remove(tmpdf_ms, rps_thresh): Creates an index list of peaks to remove based on the calculated thresholds.
    • -
    • main(): Executes the main filtering process and returns the index list of peaks to remove.
    • +
    • extract_peaks(): Extracts the peaks from the mass spectrum.
    • +
    • normalise_rps(tmpdf_ms): Normalizes the resolving powers to be independent of m/z.
    • +
    • calculate_distribution(tmpdf_ms): Calculates the distribution of the resolving powers.
    • +
    • create_index_list_to_remove(tmpdf_ms, rps_thresh): Creates an index list of peaks to remove based on the calculated thresholds.
    • +
    • main(): Executes the main filtering process and returns the index list of peaks to remove.
    References
      -
    1. Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. -Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. +
    2. Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. +Fourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. Rapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940
    @@ -513,15 +540,21 @@
    References
    -
    55    def __init__(self, mass_spectrum, ndeviations : float=3, plot : bool=False, guess_pars : bool=False):
    -56        # we dont want the assignments made in this exploratory class to copy to the original object, so we make a copy of it.  
    -57        # Possible future task - make mass spectrum base class copyable...
    -58        #TODO see if there is redundancy in the AutoRecalibration function we can minimise here? 
    -59        #self.mass_spectrum
    -60        self.mass_spectrum = mass_spectrum
    -61        self.plot = plot
    -62        self.ndeviations = ndeviations
    -63        self.guess_pars = guess_pars
    +            
    56    def __init__(
    +57        self,
    +58        mass_spectrum,
    +59        ndeviations: float = 3,
    +60        plot: bool = False,
    +61        guess_pars: bool = False,
    +62    ):
    +63        # we dont want the assignments made in this exploratory class to copy to the original object, so we make a copy of it.
    +64        # Possible future task - make mass spectrum base class copyable...
    +65        # TODO see if there is redundancy in the AutoRecalibration function we can minimise here?
    +66        # self.mass_spectrum
    +67        self.mass_spectrum = mass_spectrum
    +68        self.plot = plot
    +69        self.ndeviations = ndeviations
    +70        self.guess_pars = guess_pars
     
    @@ -583,28 +616,28 @@
    References
    -
    65    def extract_peaks(self):
    -66        """ Extracts the peaks from the mass spectrum.
    -67
    -68        Returns
    -69        ----------
    -70        tmpdf_ms : Pandas DataFrame
    -71            A DataFrame containing the extracted peaks.
    -72        """
    -73        ids = []
    -74        mzs = []
    -75        rps = []
    -76        for mspeak in self.mass_spectrum.mspeaks:
    -77            ids.append(mspeak.index)
    -78            mzs.append(mspeak.mz_exp)
    -79            rps.append(mspeak.resolving_power)
    -80        mzs = np.array(mzs)
    -81        rps = np.array(rps)  
    -82
    -83        tmpdf_ms = pd.DataFrame(index=ids,columns=['mz','rp','crp'])
    -84        tmpdf_ms['mz'] = mzs
    -85        tmpdf_ms['rp'] = rps
    -86        return tmpdf_ms
    +            
    72    def extract_peaks(self):
    +73        """Extracts the peaks from the mass spectrum.
    +74
    +75        Returns
    +76        ----------
    +77        tmpdf_ms : Pandas DataFrame
    +78            A DataFrame containing the extracted peaks.
    +79        """
    +80        ids = []
    +81        mzs = []
    +82        rps = []
    +83        for mspeak in self.mass_spectrum.mspeaks:
    +84            ids.append(mspeak.index)
    +85            mzs.append(mspeak.mz_exp)
    +86            rps.append(mspeak.resolving_power)
    +87        mzs = np.array(mzs)
    +88        rps = np.array(rps)
    +89
    +90        tmpdf_ms = pd.DataFrame(index=ids, columns=["mz", "rp", "crp"])
    +91        tmpdf_ms["mz"] = mzs
    +92        tmpdf_ms["rp"] = rps
    +93        return tmpdf_ms
     
    @@ -631,25 +664,28 @@
    Returns
    -
     88    def normalise_rps(self, tmpdf_ms):
    - 89        """ Normalizes the resolving powers to be independent of m/z.
    - 90
    - 91        Parameters
    - 92        ------
    - 93        tmpdf_ms : Pandas DataFrame
    - 94            A DataFrame containing the extracted peaks.
    - 95
    - 96        Returns
    - 97        --------
    - 98        tmpdf_ms : Pandas DataFrame
    - 99            A DataFrame with the resolving powers normalized.
    -100        """
    -101        
    -102        if self.mass_spectrum.analyzer == 'ICR':
    -103            tmpdf_ms['crp'] = tmpdf_ms['rp'] * np.sqrt(tmpdf_ms['mz']**2)
    -104        else:
    -105            warnings.warn(f'Analyzer type {self.mass_spectrum.analyzer} not yet supported.', UserWarning)
    -106        return tmpdf_ms
    +            
     95    def normalise_rps(self, tmpdf_ms):
    + 96        """Normalizes the resolving powers to be independent of m/z.
    + 97
    + 98        Parameters
    + 99        ------
    +100        tmpdf_ms : Pandas DataFrame
    +101            A DataFrame containing the extracted peaks.
    +102
    +103        Returns
    +104        --------
    +105        tmpdf_ms : Pandas DataFrame
    +106            A DataFrame with the resolving powers normalized.
    +107        """
    +108
    +109        if self.mass_spectrum.analyzer == "ICR":
    +110            tmpdf_ms["crp"] = tmpdf_ms["rp"] * np.sqrt(tmpdf_ms["mz"] ** 2)
    +111        else:
    +112            warnings.warn(
    +113                f"Analyzer type {self.mass_spectrum.analyzer} not yet supported.",
    +114                UserWarning,
    +115            )
    +116        return tmpdf_ms
     
    @@ -683,52 +719,54 @@
    Returns
    -
    108    def calculate_distribution(self, tmpdf_ms):
    -109        """ Calculates the distribution of the resolving powers.
    -110
    -111        Parameters
    -112        --------
    -113        tmpdf_ms : Pandas DataFrame
    -114            A DataFrame containing the extracted peaks with normalized resolving powers.
    -115
    -116        Returns
    -117        --------
    -118        rps_thresh : list 
    -119            A list of the calculated thresholds for filtering.
    -120        """
    -121        
    -122        # Use Seaborn to create a KDE of the normalised resolving powers
    -123        rps = sns.kdeplot(tmpdf_ms['crp']) 
    -124        rps_data = rps.get_lines()[0].get_data()
    -125        tmpdf = pd.Series(index=rps_data[0],data=rps_data[1])
    -126        rps_apex_ppm = tmpdf.idxmax()
    -127        rps_apex_val = tmpdf.max()
    -128        plt.close(rps.figure)
    -129        plt.close('all')
    -130
    -131        # Use LMFIT to create a gaussian model of the distribution
    -132        lmmodel = GaussianModel()
    -133        lmpars = lmmodel.guess(rps_data[1], x=rps_data[0])
    -134        if self.guess_pars:
    -135            lmpars['sigma'].value = rps_data[0][-1]*0.01
    -136            lmpars['center'].value = rps_apex_ppm
    -137            lmpars['amplitude'].value = rps_apex_val
    -138        lmout = lmmodel.fit(rps_data[1], lmpars, x=rps_data[0])
    -139
    -140        if self.plot:
    -141            fig,ax = plt.subplots(figsize=(8,4))
    -142            lmout.plot_fit(ax=ax,data_kws ={'color':'tab:blue'},fit_kws ={'color':'tab:red'})
    -143            ax.set_xlabel('Normalised Resolving Power')
    -144            ax.set_ylabel('Density')
    -145            plt.legend(facecolor='white', framealpha=0)
    -146
    -147        mean_res = lmout.best_values['center']
    -148        std_res = lmout.best_values['sigma']
    -149        fwhm_res = std_res*np.sqrt(8*np.log(2))
    -150
    -151        ndevs = self.ndeviations/2
    -152        rps_thresh = [mean_res-(fwhm_res*ndevs),mean_res+(fwhm_res*ndevs)]
    -153        return rps_thresh
    +            
    118    def calculate_distribution(self, tmpdf_ms):
    +119        """Calculates the distribution of the resolving powers.
    +120
    +121        Parameters
    +122        --------
    +123        tmpdf_ms : Pandas DataFrame
    +124            A DataFrame containing the extracted peaks with normalized resolving powers.
    +125
    +126        Returns
    +127        --------
    +128        rps_thresh : list
    +129            A list of the calculated thresholds for filtering.
    +130        """
    +131
    +132        # Use Seaborn to create a KDE of the normalised resolving powers
    +133        rps = sns.kdeplot(tmpdf_ms["crp"])
    +134        rps_data = rps.get_lines()[0].get_data()
    +135        tmpdf = pd.Series(index=rps_data[0], data=rps_data[1])
    +136        rps_apex_ppm = tmpdf.idxmax()
    +137        rps_apex_val = tmpdf.max()
    +138        plt.close(rps.figure)
    +139        plt.close("all")
    +140
    +141        # Use LMFIT to create a gaussian model of the distribution
    +142        lmmodel = GaussianModel()
    +143        lmpars = lmmodel.guess(rps_data[1], x=rps_data[0])
    +144        if self.guess_pars:
    +145            lmpars["sigma"].value = rps_data[0][-1] * 0.01
    +146            lmpars["center"].value = rps_apex_ppm
    +147            lmpars["amplitude"].value = rps_apex_val
    +148        lmout = lmmodel.fit(rps_data[1], lmpars, x=rps_data[0])
    +149
    +150        if self.plot:
    +151            fig, ax = plt.subplots(figsize=(8, 4))
    +152            lmout.plot_fit(
    +153                ax=ax, data_kws={"color": "tab:blue"}, fit_kws={"color": "tab:red"}
    +154            )
    +155            ax.set_xlabel("Normalised Resolving Power")
    +156            ax.set_ylabel("Density")
    +157            plt.legend(facecolor="white", framealpha=0)
    +158
    +159        mean_res = lmout.best_values["center"]
    +160        std_res = lmout.best_values["sigma"]
    +161        fwhm_res = std_res * np.sqrt(8 * np.log(2))
    +162
    +163        ndevs = self.ndeviations / 2
    +164        rps_thresh = [mean_res - (fwhm_res * ndevs), mean_res + (fwhm_res * ndevs)]
    +165        return rps_thresh
     
    @@ -762,26 +800,28 @@
    Returns
    -
    155    def create_index_list_to_remove(self, tmpdf_ms, rps_thresh : list):
    -156        """ Creates an index list of peaks to remove based on the calculated thresholds.
    -157
    -158        Parameters
    -159        ---------
    -160        tmpdf_ms : Pandas DataFrame
    -161            A DataFrame containing the extracted peaks with normalized resolving powers.
    -162        rps_thresh : list
    -163            A list of the calculated thresholds for filtering.
    -164
    -165        Returns
    -166        ----------
    -167        index_to_keep :list 
    -168            A list of indices of peaks to keep.
    -169        """
    -170        #Subset the list of mspeaks to only the ones to keep, return an index list which can be passed back to the main 
    -171        
    -172        tmpdf_ms = tmpdf_ms[(tmpdf_ms['crp']<min(rps_thresh))|(tmpdf_ms['crp']>max(rps_thresh))]
    -173        index_to_keep  = list(tmpdf_ms.index)
    -174        return index_to_keep
    +            
    167    def create_index_list_to_remove(self, tmpdf_ms, rps_thresh: list):
    +168        """Creates an index list of peaks to remove based on the calculated thresholds.
    +169
    +170        Parameters
    +171        ---------
    +172        tmpdf_ms : Pandas DataFrame
    +173            A DataFrame containing the extracted peaks with normalized resolving powers.
    +174        rps_thresh : list
    +175            A list of the calculated thresholds for filtering.
    +176
    +177        Returns
    +178        ----------
    +179        index_to_keep :list
    +180            A list of indices of peaks to keep.
    +181        """
    +182        # Subset the list of mspeaks to only the ones to keep, return an index list which can be passed back to the main
    +183
    +184        tmpdf_ms = tmpdf_ms[
    +185            (tmpdf_ms["crp"] < min(rps_thresh)) | (tmpdf_ms["crp"] > max(rps_thresh))
    +186        ]
    +187        index_to_keep = list(tmpdf_ms.index)
    +188        return index_to_keep
     
    @@ -817,19 +857,19 @@
    Returns
    -
    176    def main(self):
    -177        """ Executes the main filtering process and returns the index list of peaks to remove.
    -178
    -179        Returns
    -180        --------
    -181        index_to_remove : list
    -182            A list of indices of peaks to remove.
    -183        """
    -184        tmpdf_ms = self.extract_peaks()
    -185        tmpdf_ms  = self.normalise_rps(tmpdf_ms)
    -186        rps_thresh = self.calculate_distribution(tmpdf_ms)
    -187        index_to_remove = self.create_index_list_to_remove(tmpdf_ms,rps_thresh)
    -188        return index_to_remove
    +            
    190    def main(self):
    +191        """Executes the main filtering process and returns the index list of peaks to remove.
    +192
    +193        Returns
    +194        --------
    +195        index_to_remove : list
    +196            A list of indices of peaks to remove.
    +197        """
    +198        tmpdf_ms = self.extract_peaks()
    +199        tmpdf_ms = self.normalise_rps(tmpdf_ms)
    +200        rps_thresh = self.calculate_distribution(tmpdf_ms)
    +201        index_to_remove = self.create_index_list_to_remove(tmpdf_ms, rps_thresh)
    +202        return index_to_remove
     
    diff --git a/docs/corems/mass_spectrum/calc/NoiseCalc.html b/docs/corems/mass_spectrum/calc/NoiseCalc.html index ad123da4..9aa3aac8 100644 --- a/docs/corems/mass_spectrum/calc/NoiseCalc.html +++ b/docs/corems/mass_spectrum/calc/NoiseCalc.html @@ -75,358 +75,371 @@

    -
      1import time
    +                        
      1import warnings
       2from typing import Tuple
       3
    -  4from numpy import where, average, std, isnan, inf, hstack, median, argmax, percentile, log10, histogram, nan
    -  5#from scipy.signal import argrelmax
    +  4from numpy import average, histogram, hstack, inf, isnan, log10, median, nan, std, where
    +  5
       6from corems import chunks
    -  7import warnings
    -  8
    -  9#from matplotlib import pyplot
    - 10__author__ = "Yuri E. Corilo"
    - 11__date__ = "Jun 27, 2019"
    +  7
    +  8# from matplotlib import pyplot
    +  9__author__ = "Yuri E. Corilo"
    + 10__date__ = "Jun 27, 2019"
    + 11
      12
    - 13
    - 14class NoiseThresholdCalc:
    - 15    """Class for noise threshold calculation.
    - 16
    - 17    Parameters
    - 18    ----------
    - 19    mass_spectrum : MassSpectrum
    - 20        The mass spectrum object.
    - 21    settings : MSParameters
    - 22        The mass spectrum parameters object.
    - 23    is_centroid : bool
    - 24        Flag indicating whether the mass spectrum is centroid or profile.
    - 25    baseline_noise : float
    - 26        The baseline noise.
    - 27    baseline_noise_std : float
    - 28        The baseline noise standard deviation.
    - 29    max_signal_to_noise : float
    - 30        The maximum signal to noise.
    - 31    max_abundance : float
    - 32        The maximum abundance.
    - 33    abundance : np.array
    - 34        The abundance array.
    - 35    abundance_profile : np.array
    - 36        The abundance profile array.
    - 37    mz_exp : np.array
    - 38        The experimental m/z array.
    - 39    mz_exp_profile : np.array
    - 40        The experimental m/z profile array.
    - 41
    - 42    Attributes
    - 43    ----------
    - 44    None
    - 45
    - 46    Methods
    - 47    -------
    - 48    * get_noise_threshold(). Get the noise threshold.    
    - 49    * cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.  
    - 50    * get_noise_average(ymincentroid). 
    - 51        Get the average noise and standard deviation.   
    - 52    * get_abundance_minima_centroid(abun_cut)
    - 53        Get the abundance minima for centroid data.   
    - 54    * run_log_noise_threshold_calc(). 
    - 55        Run the log noise threshold calculation.  
    - 56    * run_noise_threshold_calc(). 
    - 57        Run the noise threshold calculation.  
    - 58    """
    - 59
    - 60
    - 61    def get_noise_threshold(self) -> Tuple[Tuple[float, float], Tuple[float,float ]]:
    - 62        """ Get the noise threshold.
    - 63
    - 64        Returns
    - 65        -------
    - 66        Tuple[Tuple[float, float], Tuple[float, float]]
    - 67            A tuple containing the m/z and abundance noise thresholds.
    - 68            (min_mz, max_mz), (noise_threshold, noise_threshold)
    - 69        """
    - 70       
    - 71        if self.is_centroid:
    - 72
    - 73            x = min(self.mz_exp), max((self.mz_exp))
    - 74            
    - 75            if self.settings.noise_threshold_method == 'minima':
    - 76                
    - 77                abundance_threshold = self.baseline_noise + (self.settings.noise_threshold_min_std * self.baseline_noise_std)
    - 78                y = (abundance_threshold, abundance_threshold)
    - 79
    - 80            elif self.settings.noise_threshold_method == 'signal_noise':
    - 81
    - 82                normalized_threshold = (self.max_abundance * self.settings.noise_threshold_min_s2n )/self.max_signal_to_noise
    - 83                y = (normalized_threshold, normalized_threshold)
    - 84            
    - 85            elif self.settings.noise_threshold_method == "relative_abundance":
    - 86
    - 87                normalized_threshold = (max(self.abundance)/100)*self.settings.noise_threshold_min_relative_abundance
    - 88                y = (normalized_threshold, normalized_threshold)    
    + 13class NoiseThresholdCalc:
    + 14    """Class for noise threshold calculation.
    + 15
    + 16    Parameters
    + 17    ----------
    + 18    mass_spectrum : MassSpectrum
    + 19        The mass spectrum object.
    + 20    settings : MSParameters
    + 21        The mass spectrum parameters object.
    + 22    is_centroid : bool
    + 23        Flag indicating whether the mass spectrum is centroid or profile.
    + 24    baseline_noise : float
    + 25        The baseline noise.
    + 26    baseline_noise_std : float
    + 27        The baseline noise standard deviation.
    + 28    max_signal_to_noise : float
    + 29        The maximum signal to noise.
    + 30    max_abundance : float
    + 31        The maximum abundance.
    + 32    abundance : np.array
    + 33        The abundance array.
    + 34    abundance_profile : np.array
    + 35        The abundance profile array.
    + 36    mz_exp : np.array
    + 37        The experimental m/z array.
    + 38    mz_exp_profile : np.array
    + 39        The experimental m/z profile array.
    + 40
    + 41    Attributes
    + 42    ----------
    + 43    None
    + 44
    + 45    Methods
    + 46    -------
    + 47    * get_noise_threshold(). Get the noise threshold.
    + 48    * cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    + 49    * get_noise_average(ymincentroid).
    + 50        Get the average noise and standard deviation.
    + 51    * get_abundance_minima_centroid(abun_cut)
    + 52        Get the abundance minima for centroid data.
    + 53    * run_log_noise_threshold_calc().
    + 54        Run the log noise threshold calculation.
    + 55    * run_noise_threshold_calc().
    + 56        Run the noise threshold calculation.
    + 57    """
    + 58
    + 59    def get_noise_threshold(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
    + 60        """Get the noise threshold.
    + 61
    + 62        Returns
    + 63        -------
    + 64        Tuple[Tuple[float, float], Tuple[float, float]]
    + 65            A tuple containing the m/z and abundance noise thresholds.
    + 66            (min_mz, max_mz), (noise_threshold, noise_threshold)
    + 67        """
    + 68
    + 69        if self.is_centroid:
    + 70            x = min(self.mz_exp), max((self.mz_exp))
    + 71
    + 72            if self.settings.noise_threshold_method == "minima":
    + 73                abundance_threshold = self.baseline_noise + (
    + 74                    self.settings.noise_threshold_min_std * self.baseline_noise_std
    + 75                )
    + 76                y = (abundance_threshold, abundance_threshold)
    + 77
    + 78            elif self.settings.noise_threshold_method == "signal_noise":
    + 79                normalized_threshold = (
    + 80                    self.max_abundance * self.settings.noise_threshold_min_s2n
    + 81                ) / self.max_signal_to_noise
    + 82                y = (normalized_threshold, normalized_threshold)
    + 83
    + 84            elif self.settings.noise_threshold_method == "relative_abundance":
    + 85                normalized_threshold = (
    + 86                    max(self.abundance) / 100
    + 87                ) * self.settings.noise_threshold_min_relative_abundance
    + 88                y = (normalized_threshold, normalized_threshold)
      89
      90            elif self.settings.noise_threshold_method == "absolute_abundance":
    - 91
    - 92                normalized_threshold = self.abundance*self.settings.noise_threshold_absolute_abundance
    - 93                y = (normalized_threshold, normalized_threshold)
    - 94            #log noise method not tested for centroid data
    - 95            else:
    - 96                    raise  Exception("%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class" % self.settings.noise_threshold_method)
    - 97                
    - 98            return x, y    
    - 99
    -100        else:
    + 91                normalized_threshold = (
    + 92                    self.abundance * self.settings.noise_threshold_absolute_abundance
    + 93                )
    + 94                y = (normalized_threshold, normalized_threshold)
    + 95            # log noise method not tested for centroid data
    + 96            else:
    + 97                raise Exception(
    + 98                    "%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class"
    + 99                    % self.settings.noise_threshold_method
    +100                )
     101
    -102            if self.baseline_noise and self.baseline_noise_std:
    -103                
    -104                x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    -105                y = (self.baseline_noise_std, self.baseline_noise_std)
    -106                
    -107                if self.settings.noise_threshold_method == 'minima':
    -108                
    -109                    #print(self.settings.noise_threshold_min_std)
    -110                    abundance_threshold = self.baseline_noise + (self.settings.noise_threshold_min_std * self.baseline_noise_std)
    -111                    
    -112                    y = (abundance_threshold, abundance_threshold)
    -113
    -114                elif self.settings.noise_threshold_method == 'signal_noise':
    -115
    -116                    max_sn = self.abundance_profile.max()/self.baseline_noise_std
    -117
    -118                    normalized_threshold = (self.abundance_profile.max() * self.settings.noise_threshold_min_s2n )/max_sn
    -119                    y = (normalized_threshold, normalized_threshold)
    -120
    -121                elif self.settings.noise_threshold_method == "relative_abundance":
    -122
    -123                    normalized_threshold = (self.abundance_profile.max()/100)*self.settings.noise_threshold_min_relative_abundance
    +102            return x, y
    +103
    +104        else:
    +105            if self.baseline_noise and self.baseline_noise_std:
    +106                x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    +107                y = (self.baseline_noise_std, self.baseline_noise_std)
    +108
    +109                if self.settings.noise_threshold_method == "minima":
    +110                    # print(self.settings.noise_threshold_min_std)
    +111                    abundance_threshold = self.baseline_noise + (
    +112                        self.settings.noise_threshold_min_std * self.baseline_noise_std
    +113                    )
    +114
    +115                    y = (abundance_threshold, abundance_threshold)
    +116
    +117                elif self.settings.noise_threshold_method == "signal_noise":
    +118                    max_sn = self.abundance_profile.max() / self.baseline_noise_std
    +119
    +120                    normalized_threshold = (
    +121                        self.abundance_profile.max()
    +122                        * self.settings.noise_threshold_min_s2n
    +123                    ) / max_sn
     124                    y = (normalized_threshold, normalized_threshold)
     125
    -126                elif self.settings.noise_threshold_method == "absolute_abundance":
    -127
    -128                    normalized_threshold = self.settings.noise_threshold_absolute_abundance
    -129                    y = (normalized_threshold, normalized_threshold)
    -130
    -131                elif self.settings.noise_threshold_method == "log":
    -132                    normalized_threshold = self.settings.noise_threshold_log_nsigma * self.baseline_noise_std
    -133                    y = (normalized_threshold, normalized_threshold)
    -134
    -135                else:
    -136                    raise  Exception("%s method was not implemented, \
    -137                        please refer to corems.mass_spectrum.calc.NoiseCalc Class" % self.settings.noise_threshold_method)
    -138                
    -139                return x, y
    -140            
    -141            else:
    -142                
    -143                warnings.warn(
    -144                    "Noise Baseline and Noise std not specified,\
    -145                    defaulting to 0,0 run process_mass_spec() ?"
    -146                )    
    -147                return (0,0) , (0,0)
    -148
    -149    def cut_mz_domain_noise(self):
    -150        """Cut the m/z domain to the noise threshold regions.
    +126                elif self.settings.noise_threshold_method == "relative_abundance":
    +127                    normalized_threshold = (
    +128                        self.abundance_profile.max() / 100
    +129                    ) * self.settings.noise_threshold_min_relative_abundance
    +130                    y = (normalized_threshold, normalized_threshold)
    +131
    +132                elif self.settings.noise_threshold_method == "absolute_abundance":
    +133                    normalized_threshold = (
    +134                        self.settings.noise_threshold_absolute_abundance
    +135                    )
    +136                    y = (normalized_threshold, normalized_threshold)
    +137
    +138                elif self.settings.noise_threshold_method == "log":
    +139                    normalized_threshold = (
    +140                        self.settings.noise_threshold_log_nsigma
    +141                        * self.baseline_noise_std
    +142                    )
    +143                    y = (normalized_threshold, normalized_threshold)
    +144
    +145                else:
    +146                    raise Exception(
    +147                        "%s method was not implemented, \
    +148                        please refer to corems.mass_spectrum.calc.NoiseCalc Class"
    +149                        % self.settings.noise_threshold_method
    +150                    )
     151
    -152        Returns
    -153        -------
    -154        Tuple[np.array, np.array]
    -155            A tuple containing the m/z and abundance arrays of the truncated spectrum region.
    -156        """
    -157        min_mz_whole_ms = self.mz_exp_profile.min()
    -158        max_mz_whole_ms = self.mz_exp_profile.max()
    -159
    -160        if self.settings.noise_threshold_method == 'minima':
    -161            
    -162            # this calculation is taking too long (about 2 seconds)
    -163            number_average_molecular_weight = self.weight_average_molecular_weight(
    -164                profile=True)
    -165           
    -166            # +-200 is a guess for testing only, it needs adjustment for each type of analysis
    -167            # need to check min mz here or it will break
    -168            min_mz_noise = number_average_molecular_weight - 100
    -169            # need to check max mz here or it will break
    -170            max_mz_noise = number_average_molecular_weight + 100
    +152                return x, y
    +153
    +154            else:
    +155                warnings.warn(
    +156                    "Noise Baseline and Noise std not specified,\
    +157                    defaulting to 0,0 run process_mass_spec() ?"
    +158                )
    +159                return (0, 0), (0, 0)
    +160
    +161    def cut_mz_domain_noise(self):
    +162        """Cut the m/z domain to the noise threshold regions.
    +163
    +164        Returns
    +165        -------
    +166        Tuple[np.array, np.array]
    +167            A tuple containing the m/z and abundance arrays of the truncated spectrum region.
    +168        """
    +169        min_mz_whole_ms = self.mz_exp_profile.min()
    +170        max_mz_whole_ms = self.mz_exp_profile.max()
     171
    -172        else:
    -173
    -174            min_mz_noise = self.settings.noise_min_mz
    -175            max_mz_noise = self.settings.noise_max_mz
    -176
    -177        if min_mz_noise < min_mz_whole_ms:
    -178            min_mz_noise = min_mz_whole_ms
    -179
    -180        if max_mz_noise > max_mz_whole_ms:
    -181            max_mz_noise = max_mz_whole_ms
    -182
    -183        #print(min_mz_noise, max_mz_noise)
    -184        low_mz_index = (where(self.mz_exp_profile >= min_mz_noise)[0][0])
    -185        #print(self.mz_exp_profile[low_mz_index])
    -186        # low_mz_index = (argmax(self.mz_exp_profile <= min_mz_noise))
    -187        
    -188        high_mz_index = (where(self.mz_exp_profile <= max_mz_noise)[-1][-1])
    -189        
    -190        #high_mz_index = (argmax(self.mz_exp_profile <= max_mz_noise))
    -191        
    -192        if high_mz_index > low_mz_index:
    -193            # pyplot.plot(self.mz_exp_profile[low_mz_index:high_mz_index], self.abundance_profile[low_mz_index:high_mz_index])
    -194            # pyplot.show()
    -195            return self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[low_mz_index:high_mz_index]
    -196        else:
    -197            # pyplot.plot(self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index])
    -198            # pyplot.show()
    -199            return self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index]
    -200      
    -201
    -202    def get_noise_average(self, ymincentroid):
    -203        """ Get the average noise and standard deviation.
    -204
    -205        Parameters
    -206        ----------
    -207        ymincentroid : np.array
    -208            The ymincentroid array.
    -209        
    -210        Returns
    -211        -------
    -212        Tuple[float, float]
    -213            A tuple containing the average noise and standard deviation.
    -214            
    -215        """
    -216        # assumes noise to be gaussian and estimate noise level by 
    -217        # calculating the valley. 
    -218        
    -219        auto = True if self.settings.noise_threshold_method == 'minima' else False
    -220
    -221        average_noise = median((ymincentroid))*2 if auto else median(ymincentroid)
    -222        
    -223        s_deviation = ymincentroid.std()*3 if auto else ymincentroid.std()
    -224            
    -225        return average_noise, s_deviation
    -226
    -227    def get_abundance_minima_centroid(self, abun_cut):
    -228        """Get the abundance minima for centroid data.
    -229
    -230        Parameters
    -231        ----------
    -232        abun_cut : np.array
    -233            The abundance cut array.
    +172        if self.settings.noise_threshold_method == "minima":
    +173            # this calculation is taking too long (about 2 seconds)
    +174            number_average_molecular_weight = self.weight_average_molecular_weight(
    +175                profile=True
    +176            )
    +177
    +178            # +-200 is a guess for testing only, it needs adjustment for each type of analysis
    +179            # need to check min mz here or it will break
    +180            min_mz_noise = number_average_molecular_weight - 100
    +181            # need to check max mz here or it will break
    +182            max_mz_noise = number_average_molecular_weight + 100
    +183
    +184        else:
    +185            min_mz_noise = self.settings.noise_min_mz
    +186            max_mz_noise = self.settings.noise_max_mz
    +187
    +188        if min_mz_noise < min_mz_whole_ms:
    +189            min_mz_noise = min_mz_whole_ms
    +190
    +191        if max_mz_noise > max_mz_whole_ms:
    +192            max_mz_noise = max_mz_whole_ms
    +193
    +194        # print(min_mz_noise, max_mz_noise)
    +195        low_mz_index = where(self.mz_exp_profile >= min_mz_noise)[0][0]
    +196        # print(self.mz_exp_profile[low_mz_index])
    +197        # low_mz_index = (argmax(self.mz_exp_profile <= min_mz_noise))
    +198
    +199        high_mz_index = where(self.mz_exp_profile <= max_mz_noise)[-1][-1]
    +200
    +201        # high_mz_index = (argmax(self.mz_exp_profile <= max_mz_noise))
    +202
    +203        if high_mz_index > low_mz_index:
    +204            # pyplot.plot(self.mz_exp_profile[low_mz_index:high_mz_index], self.abundance_profile[low_mz_index:high_mz_index])
    +205            # pyplot.show()
    +206            return self.mz_exp_profile[
    +207                high_mz_index:low_mz_index
    +208            ], self.abundance_profile[low_mz_index:high_mz_index]
    +209        else:
    +210            # pyplot.plot(self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index])
    +211            # pyplot.show()
    +212            return self.mz_exp_profile[
    +213                high_mz_index:low_mz_index
    +214            ], self.abundance_profile[high_mz_index:low_mz_index]
    +215
    +216    def get_noise_average(self, ymincentroid):
    +217        """Get the average noise and standard deviation.
    +218
    +219        Parameters
    +220        ----------
    +221        ymincentroid : np.array
    +222            The ymincentroid array.
    +223
    +224        Returns
    +225        -------
    +226        Tuple[float, float]
    +227            A tuple containing the average noise and standard deviation.
    +228
    +229        """
    +230        # assumes noise to be gaussian and estimate noise level by
    +231        # calculating the valley.
    +232
    +233        auto = True if self.settings.noise_threshold_method == "minima" else False
     234
    -235        Returns
    -236        -------
    -237        np.array
    -238            The abundance minima array.
    -239        """ 
    -240        maximum = self.abundance_profile.max()
    -241        threshold_min = (maximum * 1.00)
    -242
    -243        y = -abun_cut
    -244
    -245        dy = y[1:] - y[:-1]
    -246        '''replaces NaN for Infinity'''
    -247        indices_nan = where(isnan(y))[0]
    -248        
    -249        if indices_nan.size:
    -250
    -251            y[indices_nan] = inf
    -252            dy[where(isnan(dy))[0]] = inf
    -253
    -254        
    -255        indices = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    -256        
    -257        if indices.size and threshold_min is not None:
    -258            indices = indices[abun_cut[indices] <= threshold_min]
    -259  
    -260        return abun_cut[indices]
    -261
    -262    def run_log_noise_threshold_calc(self):
    -263        """ Run the log noise threshold calculation.
    -264
    -265
    -266        Returns
    -267        -------
    -268        Tuple[float, float]
    -269            A tuple containing the average noise and standard deviation.
    -270            
    -271        Notes 
    -272        --------
    -273        Method for estimating the noise based on decimal log of all the data point
    -274
    -275        Idea is that you calculate a histogram of of the log10(abundance) values. 
    -276        The maximum of the histogram == the standard deviation of the noise. 
    +235        average_noise = median((ymincentroid)) * 2 if auto else median(ymincentroid)
    +236
    +237        s_deviation = ymincentroid.std() * 3 if auto else ymincentroid.std()
    +238
    +239        return average_noise, s_deviation
    +240
    +241    def get_abundance_minima_centroid(self, abun_cut):
    +242        """Get the abundance minima for centroid data.
    +243
    +244        Parameters
    +245        ----------
    +246        abun_cut : np.array
    +247            The abundance cut array.
    +248
    +249        Returns
    +250        -------
    +251        np.array
    +252            The abundance minima array.
    +253        """
    +254        maximum = self.abundance_profile.max()
    +255        threshold_min = maximum * 1.00
    +256
    +257        y = -abun_cut
    +258
    +259        dy = y[1:] - y[:-1]
    +260        """replaces NaN for Infinity"""
    +261        indices_nan = where(isnan(y))[0]
    +262
    +263        if indices_nan.size:
    +264            y[indices_nan] = inf
    +265            dy[where(isnan(dy))[0]] = inf
    +266
    +267        indices = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +268
    +269        if indices.size and threshold_min is not None:
    +270            indices = indices[abun_cut[indices] <= threshold_min]
    +271
    +272        return abun_cut[indices]
    +273
    +274    def run_log_noise_threshold_calc(self):
    +275        """Run the log noise threshold calculation.
    +276
     277
    -278
    -279        For aFT data it is a gaussian distribution of noise - not implemented here!
    -280        For mFT data it is a Rayleigh distribution, and the value is actually 10^(abu_max)*0.463.
    -281
    +278        Returns
    +279        -------
    +280        Tuple[float, float]
    +281            A tuple containing the average noise and standard deviation.
     282
    -283        See the publication cited above for the derivation of this. 
    -284
    -285        References
    -286        --------
    -287        1. dx.doi.org/10.1021/ac403278t | Anal. Chem. 2014, 86, 3308−3316
    -288
    -289        """
    +283        Notes
    +284        --------
    +285        Method for estimating the noise based on decimal log of all the data point
    +286
    +287        Idea is that you calculate a histogram of of the log10(abundance) values.
    +288        The maximum of the histogram == the standard deviation of the noise.
    +289
     290
    -291        if self.is_centroid:
    -292            raise  Exception("log noise Not tested for centroid data")
    -293        else:
    -294            # cut the spectrum to ROI
    -295            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    -296            # If there are 0 values, the log will fail
    -297            # But we may have negative values for aFT data, so we check if 0 exists
    -298            # Need to make a copy of the abundance cut values so we dont overwrite it....
    -299            tmp_abundance = abundance_cut.copy()
    -300            if 0 in tmp_abundance:
    -301                tmp_abundance[tmp_abundance==0] = nan
    -302                tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
    -303                # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined. 
    -304                # Hard to generalise - needs more investigation.
    -305
    -306            # calculate a histogram of the log10 of the abundance data
    -307            hist_values = histogram(log10(tmp_abundance),bins=self.settings.noise_threshold_log_nsigma_bins) 
    -308            #find the apex of this histogram
    -309            maxvalidx = where(hist_values[0] == max(hist_values[0]))
    -310            # get the value of this apex (note - still in log10 units)
    -311            log_sigma = hist_values[1][maxvalidx]
    -312            # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
    -313            if len(log_sigma)>1:
    -314                log_sigma = average(log_sigma)
    -315            ## To do : check if aFT or mFT and adjust method
    -316            noise_mid = 10**log_sigma
    -317            noise_1std = noise_mid*self.settings.noise_threshold_log_nsigma_corr_factor #for mFT 0.463
    -318            return float(noise_mid), float(noise_1std)
    -319
    -320    def run_noise_threshold_calc(self):
    -321        """ Runs noise threshold calculation (not log based method)
    -322        
    -323        Returns
    -324        -------
    -325        Tuple[float, float]
    -326            A tuple containing the average noise and standard deviation.
    -327
    -328        """
    -329        if self.is_centroid:
    -330            # calculates noise_baseline and noise_std
    -331            # needed to run auto noise threshold mode
    -332            # it is not used for signal to noise nor 
    -333            # relative abudance methods
    -334            abundances_chunks = chunks(self.abundance, 50)
    -335            each_min_abund = [min(x) for x in abundances_chunks]
    -336
    -337            return average(each_min_abund), std(each_min_abund)
    -338        
    -339        else:
    -340
    -341            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    -342            
    -343            if self.settings.noise_threshold_method == 'minima':
    -344
    -345                yminima = self.get_abundance_minima_centroid(abundance_cut)
    -346                
    -347                return self.get_noise_average(yminima)
    -348
    -349            else:
    -350                
    -351                # pyplot.show()
    -352                return self.get_noise_average(abundance_cut)
    +291        For aFT data it is a gaussian distribution of noise - not implemented here!
    +292        For mFT data it is a Rayleigh distribution, and the value is actually 10^(abu_max)*0.463.
    +293
    +294
    +295        See the publication cited above for the derivation of this.
    +296
    +297        References
    +298        --------
    +299        1. dx.doi.org/10.1021/ac403278t | Anal. Chem. 2014, 86, 3308−3316
    +300
    +301        """
    +302
    +303        if self.is_centroid:
    +304            raise Exception("log noise Not tested for centroid data")
    +305        else:
    +306            # cut the spectrum to ROI
    +307            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    +308            # If there are 0 values, the log will fail
    +309            # But we may have negative values for aFT data, so we check if 0 exists
    +310            # Need to make a copy of the abundance cut values so we dont overwrite it....
    +311            tmp_abundance = abundance_cut.copy()
    +312            if 0 in tmp_abundance:
    +313                tmp_abundance[tmp_abundance == 0] = nan
    +314                tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
    +315                # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined.
    +316                # Hard to generalise - needs more investigation.
    +317
    +318            # calculate a histogram of the log10 of the abundance data
    +319            hist_values = histogram(
    +320                log10(tmp_abundance), bins=self.settings.noise_threshold_log_nsigma_bins
    +321            )
    +322            # find the apex of this histogram
    +323            maxvalidx = where(hist_values[0] == max(hist_values[0]))
    +324            # get the value of this apex (note - still in log10 units)
    +325            log_sigma = hist_values[1][maxvalidx]
    +326            # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
    +327            if len(log_sigma) > 1:
    +328                log_sigma = average(log_sigma)
    +329            ## To do : check if aFT or mFT and adjust method
    +330            noise_mid = 10**log_sigma
    +331            noise_1std = (
    +332                noise_mid * self.settings.noise_threshold_log_nsigma_corr_factor
    +333            )  # for mFT 0.463
    +334            return float(noise_mid), float(noise_1std)
    +335
    +336    def run_noise_threshold_calc(self):
    +337        """Runs noise threshold calculation (not log based method)
    +338
    +339        Returns
    +340        -------
    +341        Tuple[float, float]
    +342            A tuple containing the average noise and standard deviation.
    +343
    +344        """
    +345        if self.is_centroid:
    +346            # calculates noise_baseline and noise_std
    +347            # needed to run auto noise threshold mode
    +348            # it is not used for signal to noise nor
    +349            # relative abudance methods
    +350            abundances_chunks = chunks(self.abundance, 50)
    +351            each_min_abund = [min(x) for x in abundances_chunks]
    +352
    +353            return average(each_min_abund), std(each_min_abund)
    +354
    +355        else:
    +356            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    +357
    +358            if self.settings.noise_threshold_method == "minima":
    +359                yminima = self.get_abundance_minima_centroid(abundance_cut)
    +360
    +361                return self.get_noise_average(yminima)
    +362
    +363            else:
    +364                # pyplot.show()
    +365                return self.get_noise_average(abundance_cut)
     
    @@ -442,345 +455,359 @@

    -
     15class NoiseThresholdCalc:
    - 16    """Class for noise threshold calculation.
    - 17
    - 18    Parameters
    - 19    ----------
    - 20    mass_spectrum : MassSpectrum
    - 21        The mass spectrum object.
    - 22    settings : MSParameters
    - 23        The mass spectrum parameters object.
    - 24    is_centroid : bool
    - 25        Flag indicating whether the mass spectrum is centroid or profile.
    - 26    baseline_noise : float
    - 27        The baseline noise.
    - 28    baseline_noise_std : float
    - 29        The baseline noise standard deviation.
    - 30    max_signal_to_noise : float
    - 31        The maximum signal to noise.
    - 32    max_abundance : float
    - 33        The maximum abundance.
    - 34    abundance : np.array
    - 35        The abundance array.
    - 36    abundance_profile : np.array
    - 37        The abundance profile array.
    - 38    mz_exp : np.array
    - 39        The experimental m/z array.
    - 40    mz_exp_profile : np.array
    - 41        The experimental m/z profile array.
    - 42
    - 43    Attributes
    - 44    ----------
    - 45    None
    - 46
    - 47    Methods
    - 48    -------
    - 49    * get_noise_threshold(). Get the noise threshold.    
    - 50    * cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.  
    - 51    * get_noise_average(ymincentroid). 
    - 52        Get the average noise and standard deviation.   
    - 53    * get_abundance_minima_centroid(abun_cut)
    - 54        Get the abundance minima for centroid data.   
    - 55    * run_log_noise_threshold_calc(). 
    - 56        Run the log noise threshold calculation.  
    - 57    * run_noise_threshold_calc(). 
    - 58        Run the noise threshold calculation.  
    - 59    """
    - 60
    - 61
    - 62    def get_noise_threshold(self) -> Tuple[Tuple[float, float], Tuple[float,float ]]:
    - 63        """ Get the noise threshold.
    - 64
    - 65        Returns
    - 66        -------
    - 67        Tuple[Tuple[float, float], Tuple[float, float]]
    - 68            A tuple containing the m/z and abundance noise thresholds.
    - 69            (min_mz, max_mz), (noise_threshold, noise_threshold)
    - 70        """
    - 71       
    - 72        if self.is_centroid:
    - 73
    - 74            x = min(self.mz_exp), max((self.mz_exp))
    - 75            
    - 76            if self.settings.noise_threshold_method == 'minima':
    - 77                
    - 78                abundance_threshold = self.baseline_noise + (self.settings.noise_threshold_min_std * self.baseline_noise_std)
    - 79                y = (abundance_threshold, abundance_threshold)
    - 80
    - 81            elif self.settings.noise_threshold_method == 'signal_noise':
    - 82
    - 83                normalized_threshold = (self.max_abundance * self.settings.noise_threshold_min_s2n )/self.max_signal_to_noise
    - 84                y = (normalized_threshold, normalized_threshold)
    - 85            
    - 86            elif self.settings.noise_threshold_method == "relative_abundance":
    - 87
    - 88                normalized_threshold = (max(self.abundance)/100)*self.settings.noise_threshold_min_relative_abundance
    - 89                y = (normalized_threshold, normalized_threshold)    
    +            
     14class NoiseThresholdCalc:
    + 15    """Class for noise threshold calculation.
    + 16
    + 17    Parameters
    + 18    ----------
    + 19    mass_spectrum : MassSpectrum
    + 20        The mass spectrum object.
    + 21    settings : MSParameters
    + 22        The mass spectrum parameters object.
    + 23    is_centroid : bool
    + 24        Flag indicating whether the mass spectrum is centroid or profile.
    + 25    baseline_noise : float
    + 26        The baseline noise.
    + 27    baseline_noise_std : float
    + 28        The baseline noise standard deviation.
    + 29    max_signal_to_noise : float
    + 30        The maximum signal to noise.
    + 31    max_abundance : float
    + 32        The maximum abundance.
    + 33    abundance : np.array
    + 34        The abundance array.
    + 35    abundance_profile : np.array
    + 36        The abundance profile array.
    + 37    mz_exp : np.array
    + 38        The experimental m/z array.
    + 39    mz_exp_profile : np.array
    + 40        The experimental m/z profile array.
    + 41
    + 42    Attributes
    + 43    ----------
    + 44    None
    + 45
    + 46    Methods
    + 47    -------
    + 48    * get_noise_threshold(). Get the noise threshold.
    + 49    * cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    + 50    * get_noise_average(ymincentroid).
    + 51        Get the average noise and standard deviation.
    + 52    * get_abundance_minima_centroid(abun_cut)
    + 53        Get the abundance minima for centroid data.
    + 54    * run_log_noise_threshold_calc().
    + 55        Run the log noise threshold calculation.
    + 56    * run_noise_threshold_calc().
    + 57        Run the noise threshold calculation.
    + 58    """
    + 59
    + 60    def get_noise_threshold(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
    + 61        """Get the noise threshold.
    + 62
    + 63        Returns
    + 64        -------
    + 65        Tuple[Tuple[float, float], Tuple[float, float]]
    + 66            A tuple containing the m/z and abundance noise thresholds.
    + 67            (min_mz, max_mz), (noise_threshold, noise_threshold)
    + 68        """
    + 69
    + 70        if self.is_centroid:
    + 71            x = min(self.mz_exp), max((self.mz_exp))
    + 72
    + 73            if self.settings.noise_threshold_method == "minima":
    + 74                abundance_threshold = self.baseline_noise + (
    + 75                    self.settings.noise_threshold_min_std * self.baseline_noise_std
    + 76                )
    + 77                y = (abundance_threshold, abundance_threshold)
    + 78
    + 79            elif self.settings.noise_threshold_method == "signal_noise":
    + 80                normalized_threshold = (
    + 81                    self.max_abundance * self.settings.noise_threshold_min_s2n
    + 82                ) / self.max_signal_to_noise
    + 83                y = (normalized_threshold, normalized_threshold)
    + 84
    + 85            elif self.settings.noise_threshold_method == "relative_abundance":
    + 86                normalized_threshold = (
    + 87                    max(self.abundance) / 100
    + 88                ) * self.settings.noise_threshold_min_relative_abundance
    + 89                y = (normalized_threshold, normalized_threshold)
      90
      91            elif self.settings.noise_threshold_method == "absolute_abundance":
    - 92
    - 93                normalized_threshold = self.abundance*self.settings.noise_threshold_absolute_abundance
    - 94                y = (normalized_threshold, normalized_threshold)
    - 95            #log noise method not tested for centroid data
    - 96            else:
    - 97                    raise  Exception("%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class" % self.settings.noise_threshold_method)
    - 98                
    - 99            return x, y    
    -100
    -101        else:
    + 92                normalized_threshold = (
    + 93                    self.abundance * self.settings.noise_threshold_absolute_abundance
    + 94                )
    + 95                y = (normalized_threshold, normalized_threshold)
    + 96            # log noise method not tested for centroid data
    + 97            else:
    + 98                raise Exception(
    + 99                    "%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class"
    +100                    % self.settings.noise_threshold_method
    +101                )
     102
    -103            if self.baseline_noise and self.baseline_noise_std:
    -104                
    -105                x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    -106                y = (self.baseline_noise_std, self.baseline_noise_std)
    -107                
    -108                if self.settings.noise_threshold_method == 'minima':
    -109                
    -110                    #print(self.settings.noise_threshold_min_std)
    -111                    abundance_threshold = self.baseline_noise + (self.settings.noise_threshold_min_std * self.baseline_noise_std)
    -112                    
    -113                    y = (abundance_threshold, abundance_threshold)
    -114
    -115                elif self.settings.noise_threshold_method == 'signal_noise':
    -116
    -117                    max_sn = self.abundance_profile.max()/self.baseline_noise_std
    -118
    -119                    normalized_threshold = (self.abundance_profile.max() * self.settings.noise_threshold_min_s2n )/max_sn
    -120                    y = (normalized_threshold, normalized_threshold)
    -121
    -122                elif self.settings.noise_threshold_method == "relative_abundance":
    -123
    -124                    normalized_threshold = (self.abundance_profile.max()/100)*self.settings.noise_threshold_min_relative_abundance
    +103            return x, y
    +104
    +105        else:
    +106            if self.baseline_noise and self.baseline_noise_std:
    +107                x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    +108                y = (self.baseline_noise_std, self.baseline_noise_std)
    +109
    +110                if self.settings.noise_threshold_method == "minima":
    +111                    # print(self.settings.noise_threshold_min_std)
    +112                    abundance_threshold = self.baseline_noise + (
    +113                        self.settings.noise_threshold_min_std * self.baseline_noise_std
    +114                    )
    +115
    +116                    y = (abundance_threshold, abundance_threshold)
    +117
    +118                elif self.settings.noise_threshold_method == "signal_noise":
    +119                    max_sn = self.abundance_profile.max() / self.baseline_noise_std
    +120
    +121                    normalized_threshold = (
    +122                        self.abundance_profile.max()
    +123                        * self.settings.noise_threshold_min_s2n
    +124                    ) / max_sn
     125                    y = (normalized_threshold, normalized_threshold)
     126
    -127                elif self.settings.noise_threshold_method == "absolute_abundance":
    -128
    -129                    normalized_threshold = self.settings.noise_threshold_absolute_abundance
    -130                    y = (normalized_threshold, normalized_threshold)
    -131
    -132                elif self.settings.noise_threshold_method == "log":
    -133                    normalized_threshold = self.settings.noise_threshold_log_nsigma * self.baseline_noise_std
    -134                    y = (normalized_threshold, normalized_threshold)
    -135
    -136                else:
    -137                    raise  Exception("%s method was not implemented, \
    -138                        please refer to corems.mass_spectrum.calc.NoiseCalc Class" % self.settings.noise_threshold_method)
    -139                
    -140                return x, y
    -141            
    -142            else:
    -143                
    -144                warnings.warn(
    -145                    "Noise Baseline and Noise std not specified,\
    -146                    defaulting to 0,0 run process_mass_spec() ?"
    -147                )    
    -148                return (0,0) , (0,0)
    -149
    -150    def cut_mz_domain_noise(self):
    -151        """Cut the m/z domain to the noise threshold regions.
    +127                elif self.settings.noise_threshold_method == "relative_abundance":
    +128                    normalized_threshold = (
    +129                        self.abundance_profile.max() / 100
    +130                    ) * self.settings.noise_threshold_min_relative_abundance
    +131                    y = (normalized_threshold, normalized_threshold)
    +132
    +133                elif self.settings.noise_threshold_method == "absolute_abundance":
    +134                    normalized_threshold = (
    +135                        self.settings.noise_threshold_absolute_abundance
    +136                    )
    +137                    y = (normalized_threshold, normalized_threshold)
    +138
    +139                elif self.settings.noise_threshold_method == "log":
    +140                    normalized_threshold = (
    +141                        self.settings.noise_threshold_log_nsigma
    +142                        * self.baseline_noise_std
    +143                    )
    +144                    y = (normalized_threshold, normalized_threshold)
    +145
    +146                else:
    +147                    raise Exception(
    +148                        "%s method was not implemented, \
    +149                        please refer to corems.mass_spectrum.calc.NoiseCalc Class"
    +150                        % self.settings.noise_threshold_method
    +151                    )
     152
    -153        Returns
    -154        -------
    -155        Tuple[np.array, np.array]
    -156            A tuple containing the m/z and abundance arrays of the truncated spectrum region.
    -157        """
    -158        min_mz_whole_ms = self.mz_exp_profile.min()
    -159        max_mz_whole_ms = self.mz_exp_profile.max()
    -160
    -161        if self.settings.noise_threshold_method == 'minima':
    -162            
    -163            # this calculation is taking too long (about 2 seconds)
    -164            number_average_molecular_weight = self.weight_average_molecular_weight(
    -165                profile=True)
    -166           
    -167            # +-200 is a guess for testing only, it needs adjustment for each type of analysis
    -168            # need to check min mz here or it will break
    -169            min_mz_noise = number_average_molecular_weight - 100
    -170            # need to check max mz here or it will break
    -171            max_mz_noise = number_average_molecular_weight + 100
    +153                return x, y
    +154
    +155            else:
    +156                warnings.warn(
    +157                    "Noise Baseline and Noise std not specified,\
    +158                    defaulting to 0,0 run process_mass_spec() ?"
    +159                )
    +160                return (0, 0), (0, 0)
    +161
    +162    def cut_mz_domain_noise(self):
    +163        """Cut the m/z domain to the noise threshold regions.
    +164
    +165        Returns
    +166        -------
    +167        Tuple[np.array, np.array]
    +168            A tuple containing the m/z and abundance arrays of the truncated spectrum region.
    +169        """
    +170        min_mz_whole_ms = self.mz_exp_profile.min()
    +171        max_mz_whole_ms = self.mz_exp_profile.max()
     172
    -173        else:
    -174
    -175            min_mz_noise = self.settings.noise_min_mz
    -176            max_mz_noise = self.settings.noise_max_mz
    -177
    -178        if min_mz_noise < min_mz_whole_ms:
    -179            min_mz_noise = min_mz_whole_ms
    -180
    -181        if max_mz_noise > max_mz_whole_ms:
    -182            max_mz_noise = max_mz_whole_ms
    -183
    -184        #print(min_mz_noise, max_mz_noise)
    -185        low_mz_index = (where(self.mz_exp_profile >= min_mz_noise)[0][0])
    -186        #print(self.mz_exp_profile[low_mz_index])
    -187        # low_mz_index = (argmax(self.mz_exp_profile <= min_mz_noise))
    -188        
    -189        high_mz_index = (where(self.mz_exp_profile <= max_mz_noise)[-1][-1])
    -190        
    -191        #high_mz_index = (argmax(self.mz_exp_profile <= max_mz_noise))
    -192        
    -193        if high_mz_index > low_mz_index:
    -194            # pyplot.plot(self.mz_exp_profile[low_mz_index:high_mz_index], self.abundance_profile[low_mz_index:high_mz_index])
    -195            # pyplot.show()
    -196            return self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[low_mz_index:high_mz_index]
    -197        else:
    -198            # pyplot.plot(self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index])
    -199            # pyplot.show()
    -200            return self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index]
    -201      
    -202
    -203    def get_noise_average(self, ymincentroid):
    -204        """ Get the average noise and standard deviation.
    -205
    -206        Parameters
    -207        ----------
    -208        ymincentroid : np.array
    -209            The ymincentroid array.
    -210        
    -211        Returns
    -212        -------
    -213        Tuple[float, float]
    -214            A tuple containing the average noise and standard deviation.
    -215            
    -216        """
    -217        # assumes noise to be gaussian and estimate noise level by 
    -218        # calculating the valley. 
    -219        
    -220        auto = True if self.settings.noise_threshold_method == 'minima' else False
    -221
    -222        average_noise = median((ymincentroid))*2 if auto else median(ymincentroid)
    -223        
    -224        s_deviation = ymincentroid.std()*3 if auto else ymincentroid.std()
    -225            
    -226        return average_noise, s_deviation
    -227
    -228    def get_abundance_minima_centroid(self, abun_cut):
    -229        """Get the abundance minima for centroid data.
    -230
    -231        Parameters
    -232        ----------
    -233        abun_cut : np.array
    -234            The abundance cut array.
    +173        if self.settings.noise_threshold_method == "minima":
    +174            # this calculation is taking too long (about 2 seconds)
    +175            number_average_molecular_weight = self.weight_average_molecular_weight(
    +176                profile=True
    +177            )
    +178
    +179            # +-200 is a guess for testing only, it needs adjustment for each type of analysis
    +180            # need to check min mz here or it will break
    +181            min_mz_noise = number_average_molecular_weight - 100
    +182            # need to check max mz here or it will break
    +183            max_mz_noise = number_average_molecular_weight + 100
    +184
    +185        else:
    +186            min_mz_noise = self.settings.noise_min_mz
    +187            max_mz_noise = self.settings.noise_max_mz
    +188
    +189        if min_mz_noise < min_mz_whole_ms:
    +190            min_mz_noise = min_mz_whole_ms
    +191
    +192        if max_mz_noise > max_mz_whole_ms:
    +193            max_mz_noise = max_mz_whole_ms
    +194
    +195        # print(min_mz_noise, max_mz_noise)
    +196        low_mz_index = where(self.mz_exp_profile >= min_mz_noise)[0][0]
    +197        # print(self.mz_exp_profile[low_mz_index])
    +198        # low_mz_index = (argmax(self.mz_exp_profile <= min_mz_noise))
    +199
    +200        high_mz_index = where(self.mz_exp_profile <= max_mz_noise)[-1][-1]
    +201
    +202        # high_mz_index = (argmax(self.mz_exp_profile <= max_mz_noise))
    +203
    +204        if high_mz_index > low_mz_index:
    +205            # pyplot.plot(self.mz_exp_profile[low_mz_index:high_mz_index], self.abundance_profile[low_mz_index:high_mz_index])
    +206            # pyplot.show()
    +207            return self.mz_exp_profile[
    +208                high_mz_index:low_mz_index
    +209            ], self.abundance_profile[low_mz_index:high_mz_index]
    +210        else:
    +211            # pyplot.plot(self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index])
    +212            # pyplot.show()
    +213            return self.mz_exp_profile[
    +214                high_mz_index:low_mz_index
    +215            ], self.abundance_profile[high_mz_index:low_mz_index]
    +216
    +217    def get_noise_average(self, ymincentroid):
    +218        """Get the average noise and standard deviation.
    +219
    +220        Parameters
    +221        ----------
    +222        ymincentroid : np.array
    +223            The ymincentroid array.
    +224
    +225        Returns
    +226        -------
    +227        Tuple[float, float]
    +228            A tuple containing the average noise and standard deviation.
    +229
    +230        """
    +231        # assumes noise to be gaussian and estimate noise level by
    +232        # calculating the valley.
    +233
    +234        auto = True if self.settings.noise_threshold_method == "minima" else False
     235
    -236        Returns
    -237        -------
    -238        np.array
    -239            The abundance minima array.
    -240        """ 
    -241        maximum = self.abundance_profile.max()
    -242        threshold_min = (maximum * 1.00)
    -243
    -244        y = -abun_cut
    -245
    -246        dy = y[1:] - y[:-1]
    -247        '''replaces NaN for Infinity'''
    -248        indices_nan = where(isnan(y))[0]
    -249        
    -250        if indices_nan.size:
    -251
    -252            y[indices_nan] = inf
    -253            dy[where(isnan(dy))[0]] = inf
    -254
    -255        
    -256        indices = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    -257        
    -258        if indices.size and threshold_min is not None:
    -259            indices = indices[abun_cut[indices] <= threshold_min]
    -260  
    -261        return abun_cut[indices]
    -262
    -263    def run_log_noise_threshold_calc(self):
    -264        """ Run the log noise threshold calculation.
    -265
    -266
    -267        Returns
    -268        -------
    -269        Tuple[float, float]
    -270            A tuple containing the average noise and standard deviation.
    -271            
    -272        Notes 
    -273        --------
    -274        Method for estimating the noise based on decimal log of all the data point
    -275
    -276        Idea is that you calculate a histogram of of the log10(abundance) values. 
    -277        The maximum of the histogram == the standard deviation of the noise. 
    +236        average_noise = median((ymincentroid)) * 2 if auto else median(ymincentroid)
    +237
    +238        s_deviation = ymincentroid.std() * 3 if auto else ymincentroid.std()
    +239
    +240        return average_noise, s_deviation
    +241
    +242    def get_abundance_minima_centroid(self, abun_cut):
    +243        """Get the abundance minima for centroid data.
    +244
    +245        Parameters
    +246        ----------
    +247        abun_cut : np.array
    +248            The abundance cut array.
    +249
    +250        Returns
    +251        -------
    +252        np.array
    +253            The abundance minima array.
    +254        """
    +255        maximum = self.abundance_profile.max()
    +256        threshold_min = maximum * 1.00
    +257
    +258        y = -abun_cut
    +259
    +260        dy = y[1:] - y[:-1]
    +261        """replaces NaN for Infinity"""
    +262        indices_nan = where(isnan(y))[0]
    +263
    +264        if indices_nan.size:
    +265            y[indices_nan] = inf
    +266            dy[where(isnan(dy))[0]] = inf
    +267
    +268        indices = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +269
    +270        if indices.size and threshold_min is not None:
    +271            indices = indices[abun_cut[indices] <= threshold_min]
    +272
    +273        return abun_cut[indices]
    +274
    +275    def run_log_noise_threshold_calc(self):
    +276        """Run the log noise threshold calculation.
    +277
     278
    -279
    -280        For aFT data it is a gaussian distribution of noise - not implemented here!
    -281        For mFT data it is a Rayleigh distribution, and the value is actually 10^(abu_max)*0.463.
    -282
    +279        Returns
    +280        -------
    +281        Tuple[float, float]
    +282            A tuple containing the average noise and standard deviation.
     283
    -284        See the publication cited above for the derivation of this. 
    -285
    -286        References
    -287        --------
    -288        1. dx.doi.org/10.1021/ac403278t | Anal. Chem. 2014, 86, 3308−3316
    -289
    -290        """
    +284        Notes
    +285        --------
    +286        Method for estimating the noise based on decimal log of all the data point
    +287
    +288        Idea is that you calculate a histogram of of the log10(abundance) values.
    +289        The maximum of the histogram == the standard deviation of the noise.
    +290
     291
    -292        if self.is_centroid:
    -293            raise  Exception("log noise Not tested for centroid data")
    -294        else:
    -295            # cut the spectrum to ROI
    -296            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    -297            # If there are 0 values, the log will fail
    -298            # But we may have negative values for aFT data, so we check if 0 exists
    -299            # Need to make a copy of the abundance cut values so we dont overwrite it....
    -300            tmp_abundance = abundance_cut.copy()
    -301            if 0 in tmp_abundance:
    -302                tmp_abundance[tmp_abundance==0] = nan
    -303                tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
    -304                # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined. 
    -305                # Hard to generalise - needs more investigation.
    -306
    -307            # calculate a histogram of the log10 of the abundance data
    -308            hist_values = histogram(log10(tmp_abundance),bins=self.settings.noise_threshold_log_nsigma_bins) 
    -309            #find the apex of this histogram
    -310            maxvalidx = where(hist_values[0] == max(hist_values[0]))
    -311            # get the value of this apex (note - still in log10 units)
    -312            log_sigma = hist_values[1][maxvalidx]
    -313            # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
    -314            if len(log_sigma)>1:
    -315                log_sigma = average(log_sigma)
    -316            ## To do : check if aFT or mFT and adjust method
    -317            noise_mid = 10**log_sigma
    -318            noise_1std = noise_mid*self.settings.noise_threshold_log_nsigma_corr_factor #for mFT 0.463
    -319            return float(noise_mid), float(noise_1std)
    -320
    -321    def run_noise_threshold_calc(self):
    -322        """ Runs noise threshold calculation (not log based method)
    -323        
    -324        Returns
    -325        -------
    -326        Tuple[float, float]
    -327            A tuple containing the average noise and standard deviation.
    -328
    -329        """
    -330        if self.is_centroid:
    -331            # calculates noise_baseline and noise_std
    -332            # needed to run auto noise threshold mode
    -333            # it is not used for signal to noise nor 
    -334            # relative abudance methods
    -335            abundances_chunks = chunks(self.abundance, 50)
    -336            each_min_abund = [min(x) for x in abundances_chunks]
    -337
    -338            return average(each_min_abund), std(each_min_abund)
    -339        
    -340        else:
    -341
    -342            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    -343            
    -344            if self.settings.noise_threshold_method == 'minima':
    -345
    -346                yminima = self.get_abundance_minima_centroid(abundance_cut)
    -347                
    -348                return self.get_noise_average(yminima)
    -349
    -350            else:
    -351                
    -352                # pyplot.show()
    -353                return self.get_noise_average(abundance_cut)
    +292        For aFT data it is a gaussian distribution of noise - not implemented here!
    +293        For mFT data it is a Rayleigh distribution, and the value is actually 10^(abu_max)*0.463.
    +294
    +295
    +296        See the publication cited above for the derivation of this.
    +297
    +298        References
    +299        --------
    +300        1. dx.doi.org/10.1021/ac403278t | Anal. Chem. 2014, 86, 3308−3316
    +301
    +302        """
    +303
    +304        if self.is_centroid:
    +305            raise Exception("log noise Not tested for centroid data")
    +306        else:
    +307            # cut the spectrum to ROI
    +308            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    +309            # If there are 0 values, the log will fail
    +310            # But we may have negative values for aFT data, so we check if 0 exists
    +311            # Need to make a copy of the abundance cut values so we dont overwrite it....
    +312            tmp_abundance = abundance_cut.copy()
    +313            if 0 in tmp_abundance:
    +314                tmp_abundance[tmp_abundance == 0] = nan
    +315                tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
    +316                # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined.
    +317                # Hard to generalise - needs more investigation.
    +318
    +319            # calculate a histogram of the log10 of the abundance data
    +320            hist_values = histogram(
    +321                log10(tmp_abundance), bins=self.settings.noise_threshold_log_nsigma_bins
    +322            )
    +323            # find the apex of this histogram
    +324            maxvalidx = where(hist_values[0] == max(hist_values[0]))
    +325            # get the value of this apex (note - still in log10 units)
    +326            log_sigma = hist_values[1][maxvalidx]
    +327            # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
    +328            if len(log_sigma) > 1:
    +329                log_sigma = average(log_sigma)
    +330            ## To do : check if aFT or mFT and adjust method
    +331            noise_mid = 10**log_sigma
    +332            noise_1std = (
    +333                noise_mid * self.settings.noise_threshold_log_nsigma_corr_factor
    +334            )  # for mFT 0.463
    +335            return float(noise_mid), float(noise_1std)
    +336
    +337    def run_noise_threshold_calc(self):
    +338        """Runs noise threshold calculation (not log based method)
    +339
    +340        Returns
    +341        -------
    +342        Tuple[float, float]
    +343            A tuple containing the average noise and standard deviation.
    +344
    +345        """
    +346        if self.is_centroid:
    +347            # calculates noise_baseline and noise_std
    +348            # needed to run auto noise threshold mode
    +349            # it is not used for signal to noise nor
    +350            # relative abudance methods
    +351            abundances_chunks = chunks(self.abundance, 50)
    +352            each_min_abund = [min(x) for x in abundances_chunks]
    +353
    +354            return average(each_min_abund), std(each_min_abund)
    +355
    +356        else:
    +357            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    +358
    +359            if self.settings.noise_threshold_method == "minima":
    +360                yminima = self.get_abundance_minima_centroid(abundance_cut)
    +361
    +362                return self.get_noise_average(yminima)
    +363
    +364            else:
    +365                # pyplot.show()
    +366                return self.get_noise_average(abundance_cut)
     
    @@ -822,15 +849,15 @@
    Attributes
    Methods
      -
    • get_noise_threshold(). Get the noise threshold.
    • -
    • cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    • -
    • get_noise_average(ymincentroid). -Get the average noise and standard deviation.
    • +
    • get_noise_threshold(). Get the noise threshold.
    • +
    • cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    • +
    • get_noise_average(ymincentroid). +Get the average noise and standard deviation.
    • get_abundance_minima_centroid(abun_cut) -Get the abundance minima for centroid data.
    • -
    • run_log_noise_threshold_calc(). -Run the log noise threshold calculation.
    • -
    • run_noise_threshold_calc(). +Get the abundance minima for centroid data.
    • +
    • run_log_noise_threshold_calc(). +Run the log noise threshold calculation.
    • +
    • run_noise_threshold_calc(). Run the noise threshold calculation.
    @@ -847,93 +874,107 @@

    Methods
    -
     62    def get_noise_threshold(self) -> Tuple[Tuple[float, float], Tuple[float,float ]]:
    - 63        """ Get the noise threshold.
    - 64
    - 65        Returns
    - 66        -------
    - 67        Tuple[Tuple[float, float], Tuple[float, float]]
    - 68            A tuple containing the m/z and abundance noise thresholds.
    - 69            (min_mz, max_mz), (noise_threshold, noise_threshold)
    - 70        """
    - 71       
    - 72        if self.is_centroid:
    - 73
    - 74            x = min(self.mz_exp), max((self.mz_exp))
    - 75            
    - 76            if self.settings.noise_threshold_method == 'minima':
    - 77                
    - 78                abundance_threshold = self.baseline_noise + (self.settings.noise_threshold_min_std * self.baseline_noise_std)
    - 79                y = (abundance_threshold, abundance_threshold)
    - 80
    - 81            elif self.settings.noise_threshold_method == 'signal_noise':
    - 82
    - 83                normalized_threshold = (self.max_abundance * self.settings.noise_threshold_min_s2n )/self.max_signal_to_noise
    - 84                y = (normalized_threshold, normalized_threshold)
    - 85            
    - 86            elif self.settings.noise_threshold_method == "relative_abundance":
    - 87
    - 88                normalized_threshold = (max(self.abundance)/100)*self.settings.noise_threshold_min_relative_abundance
    - 89                y = (normalized_threshold, normalized_threshold)    
    +            
     60    def get_noise_threshold(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:
    + 61        """Get the noise threshold.
    + 62
    + 63        Returns
    + 64        -------
    + 65        Tuple[Tuple[float, float], Tuple[float, float]]
    + 66            A tuple containing the m/z and abundance noise thresholds.
    + 67            (min_mz, max_mz), (noise_threshold, noise_threshold)
    + 68        """
    + 69
    + 70        if self.is_centroid:
    + 71            x = min(self.mz_exp), max((self.mz_exp))
    + 72
    + 73            if self.settings.noise_threshold_method == "minima":
    + 74                abundance_threshold = self.baseline_noise + (
    + 75                    self.settings.noise_threshold_min_std * self.baseline_noise_std
    + 76                )
    + 77                y = (abundance_threshold, abundance_threshold)
    + 78
    + 79            elif self.settings.noise_threshold_method == "signal_noise":
    + 80                normalized_threshold = (
    + 81                    self.max_abundance * self.settings.noise_threshold_min_s2n
    + 82                ) / self.max_signal_to_noise
    + 83                y = (normalized_threshold, normalized_threshold)
    + 84
    + 85            elif self.settings.noise_threshold_method == "relative_abundance":
    + 86                normalized_threshold = (
    + 87                    max(self.abundance) / 100
    + 88                ) * self.settings.noise_threshold_min_relative_abundance
    + 89                y = (normalized_threshold, normalized_threshold)
      90
      91            elif self.settings.noise_threshold_method == "absolute_abundance":
    - 92
    - 93                normalized_threshold = self.abundance*self.settings.noise_threshold_absolute_abundance
    - 94                y = (normalized_threshold, normalized_threshold)
    - 95            #log noise method not tested for centroid data
    - 96            else:
    - 97                    raise  Exception("%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class" % self.settings.noise_threshold_method)
    - 98                
    - 99            return x, y    
    -100
    -101        else:
    + 92                normalized_threshold = (
    + 93                    self.abundance * self.settings.noise_threshold_absolute_abundance
    + 94                )
    + 95                y = (normalized_threshold, normalized_threshold)
    + 96            # log noise method not tested for centroid data
    + 97            else:
    + 98                raise Exception(
    + 99                    "%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class"
    +100                    % self.settings.noise_threshold_method
    +101                )
     102
    -103            if self.baseline_noise and self.baseline_noise_std:
    -104                
    -105                x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    -106                y = (self.baseline_noise_std, self.baseline_noise_std)
    -107                
    -108                if self.settings.noise_threshold_method == 'minima':
    -109                
    -110                    #print(self.settings.noise_threshold_min_std)
    -111                    abundance_threshold = self.baseline_noise + (self.settings.noise_threshold_min_std * self.baseline_noise_std)
    -112                    
    -113                    y = (abundance_threshold, abundance_threshold)
    -114
    -115                elif self.settings.noise_threshold_method == 'signal_noise':
    -116
    -117                    max_sn = self.abundance_profile.max()/self.baseline_noise_std
    -118
    -119                    normalized_threshold = (self.abundance_profile.max() * self.settings.noise_threshold_min_s2n )/max_sn
    -120                    y = (normalized_threshold, normalized_threshold)
    -121
    -122                elif self.settings.noise_threshold_method == "relative_abundance":
    -123
    -124                    normalized_threshold = (self.abundance_profile.max()/100)*self.settings.noise_threshold_min_relative_abundance
    +103            return x, y
    +104
    +105        else:
    +106            if self.baseline_noise and self.baseline_noise_std:
    +107                x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    +108                y = (self.baseline_noise_std, self.baseline_noise_std)
    +109
    +110                if self.settings.noise_threshold_method == "minima":
    +111                    # print(self.settings.noise_threshold_min_std)
    +112                    abundance_threshold = self.baseline_noise + (
    +113                        self.settings.noise_threshold_min_std * self.baseline_noise_std
    +114                    )
    +115
    +116                    y = (abundance_threshold, abundance_threshold)
    +117
    +118                elif self.settings.noise_threshold_method == "signal_noise":
    +119                    max_sn = self.abundance_profile.max() / self.baseline_noise_std
    +120
    +121                    normalized_threshold = (
    +122                        self.abundance_profile.max()
    +123                        * self.settings.noise_threshold_min_s2n
    +124                    ) / max_sn
     125                    y = (normalized_threshold, normalized_threshold)
     126
    -127                elif self.settings.noise_threshold_method == "absolute_abundance":
    -128
    -129                    normalized_threshold = self.settings.noise_threshold_absolute_abundance
    -130                    y = (normalized_threshold, normalized_threshold)
    -131
    -132                elif self.settings.noise_threshold_method == "log":
    -133                    normalized_threshold = self.settings.noise_threshold_log_nsigma * self.baseline_noise_std
    -134                    y = (normalized_threshold, normalized_threshold)
    -135
    -136                else:
    -137                    raise  Exception("%s method was not implemented, \
    -138                        please refer to corems.mass_spectrum.calc.NoiseCalc Class" % self.settings.noise_threshold_method)
    -139                
    -140                return x, y
    -141            
    -142            else:
    -143                
    -144                warnings.warn(
    -145                    "Noise Baseline and Noise std not specified,\
    -146                    defaulting to 0,0 run process_mass_spec() ?"
    -147                )    
    -148                return (0,0) , (0,0)
    +127                elif self.settings.noise_threshold_method == "relative_abundance":
    +128                    normalized_threshold = (
    +129                        self.abundance_profile.max() / 100
    +130                    ) * self.settings.noise_threshold_min_relative_abundance
    +131                    y = (normalized_threshold, normalized_threshold)
    +132
    +133                elif self.settings.noise_threshold_method == "absolute_abundance":
    +134                    normalized_threshold = (
    +135                        self.settings.noise_threshold_absolute_abundance
    +136                    )
    +137                    y = (normalized_threshold, normalized_threshold)
    +138
    +139                elif self.settings.noise_threshold_method == "log":
    +140                    normalized_threshold = (
    +141                        self.settings.noise_threshold_log_nsigma
    +142                        * self.baseline_noise_std
    +143                    )
    +144                    y = (normalized_threshold, normalized_threshold)
    +145
    +146                else:
    +147                    raise Exception(
    +148                        "%s method was not implemented, \
    +149                        please refer to corems.mass_spectrum.calc.NoiseCalc Class"
    +150                        % self.settings.noise_threshold_method
    +151                    )
    +152
    +153                return x, y
    +154
    +155            else:
    +156                warnings.warn(
    +157                    "Noise Baseline and Noise std not specified,\
    +158                    defaulting to 0,0 run process_mass_spec() ?"
    +159                )
    +160                return (0, 0), (0, 0)
     
    @@ -960,57 +1001,60 @@
    Returns
    -
    150    def cut_mz_domain_noise(self):
    -151        """Cut the m/z domain to the noise threshold regions.
    -152
    -153        Returns
    -154        -------
    -155        Tuple[np.array, np.array]
    -156            A tuple containing the m/z and abundance arrays of the truncated spectrum region.
    -157        """
    -158        min_mz_whole_ms = self.mz_exp_profile.min()
    -159        max_mz_whole_ms = self.mz_exp_profile.max()
    -160
    -161        if self.settings.noise_threshold_method == 'minima':
    -162            
    -163            # this calculation is taking too long (about 2 seconds)
    -164            number_average_molecular_weight = self.weight_average_molecular_weight(
    -165                profile=True)
    -166           
    -167            # +-200 is a guess for testing only, it needs adjustment for each type of analysis
    -168            # need to check min mz here or it will break
    -169            min_mz_noise = number_average_molecular_weight - 100
    -170            # need to check max mz here or it will break
    -171            max_mz_noise = number_average_molecular_weight + 100
    +            
    162    def cut_mz_domain_noise(self):
    +163        """Cut the m/z domain to the noise threshold regions.
    +164
    +165        Returns
    +166        -------
    +167        Tuple[np.array, np.array]
    +168            A tuple containing the m/z and abundance arrays of the truncated spectrum region.
    +169        """
    +170        min_mz_whole_ms = self.mz_exp_profile.min()
    +171        max_mz_whole_ms = self.mz_exp_profile.max()
     172
    -173        else:
    -174
    -175            min_mz_noise = self.settings.noise_min_mz
    -176            max_mz_noise = self.settings.noise_max_mz
    -177
    -178        if min_mz_noise < min_mz_whole_ms:
    -179            min_mz_noise = min_mz_whole_ms
    -180
    -181        if max_mz_noise > max_mz_whole_ms:
    -182            max_mz_noise = max_mz_whole_ms
    -183
    -184        #print(min_mz_noise, max_mz_noise)
    -185        low_mz_index = (where(self.mz_exp_profile >= min_mz_noise)[0][0])
    -186        #print(self.mz_exp_profile[low_mz_index])
    -187        # low_mz_index = (argmax(self.mz_exp_profile <= min_mz_noise))
    -188        
    -189        high_mz_index = (where(self.mz_exp_profile <= max_mz_noise)[-1][-1])
    -190        
    -191        #high_mz_index = (argmax(self.mz_exp_profile <= max_mz_noise))
    -192        
    -193        if high_mz_index > low_mz_index:
    -194            # pyplot.plot(self.mz_exp_profile[low_mz_index:high_mz_index], self.abundance_profile[low_mz_index:high_mz_index])
    -195            # pyplot.show()
    -196            return self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[low_mz_index:high_mz_index]
    -197        else:
    -198            # pyplot.plot(self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index])
    -199            # pyplot.show()
    -200            return self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index]
    +173        if self.settings.noise_threshold_method == "minima":
    +174            # this calculation is taking too long (about 2 seconds)
    +175            number_average_molecular_weight = self.weight_average_molecular_weight(
    +176                profile=True
    +177            )
    +178
    +179            # +-200 is a guess for testing only, it needs adjustment for each type of analysis
    +180            # need to check min mz here or it will break
    +181            min_mz_noise = number_average_molecular_weight - 100
    +182            # need to check max mz here or it will break
    +183            max_mz_noise = number_average_molecular_weight + 100
    +184
    +185        else:
    +186            min_mz_noise = self.settings.noise_min_mz
    +187            max_mz_noise = self.settings.noise_max_mz
    +188
    +189        if min_mz_noise < min_mz_whole_ms:
    +190            min_mz_noise = min_mz_whole_ms
    +191
    +192        if max_mz_noise > max_mz_whole_ms:
    +193            max_mz_noise = max_mz_whole_ms
    +194
    +195        # print(min_mz_noise, max_mz_noise)
    +196        low_mz_index = where(self.mz_exp_profile >= min_mz_noise)[0][0]
    +197        # print(self.mz_exp_profile[low_mz_index])
    +198        # low_mz_index = (argmax(self.mz_exp_profile <= min_mz_noise))
    +199
    +200        high_mz_index = where(self.mz_exp_profile <= max_mz_noise)[-1][-1]
    +201
    +202        # high_mz_index = (argmax(self.mz_exp_profile <= max_mz_noise))
    +203
    +204        if high_mz_index > low_mz_index:
    +205            # pyplot.plot(self.mz_exp_profile[low_mz_index:high_mz_index], self.abundance_profile[low_mz_index:high_mz_index])
    +206            # pyplot.show()
    +207            return self.mz_exp_profile[
    +208                high_mz_index:low_mz_index
    +209            ], self.abundance_profile[low_mz_index:high_mz_index]
    +210        else:
    +211            # pyplot.plot(self.mz_exp_profile[high_mz_index:low_mz_index], self.abundance_profile[high_mz_index:low_mz_index])
    +212            # pyplot.show()
    +213            return self.mz_exp_profile[
    +214                high_mz_index:low_mz_index
    +215            ], self.abundance_profile[high_mz_index:low_mz_index]
     
    @@ -1036,30 +1080,30 @@
    Returns
    -
    203    def get_noise_average(self, ymincentroid):
    -204        """ Get the average noise and standard deviation.
    -205
    -206        Parameters
    -207        ----------
    -208        ymincentroid : np.array
    -209            The ymincentroid array.
    -210        
    -211        Returns
    -212        -------
    -213        Tuple[float, float]
    -214            A tuple containing the average noise and standard deviation.
    -215            
    -216        """
    -217        # assumes noise to be gaussian and estimate noise level by 
    -218        # calculating the valley. 
    -219        
    -220        auto = True if self.settings.noise_threshold_method == 'minima' else False
    -221
    -222        average_noise = median((ymincentroid))*2 if auto else median(ymincentroid)
    -223        
    -224        s_deviation = ymincentroid.std()*3 if auto else ymincentroid.std()
    -225            
    -226        return average_noise, s_deviation
    +            
    217    def get_noise_average(self, ymincentroid):
    +218        """Get the average noise and standard deviation.
    +219
    +220        Parameters
    +221        ----------
    +222        ymincentroid : np.array
    +223            The ymincentroid array.
    +224
    +225        Returns
    +226        -------
    +227        Tuple[float, float]
    +228            A tuple containing the average noise and standard deviation.
    +229
    +230        """
    +231        # assumes noise to be gaussian and estimate noise level by
    +232        # calculating the valley.
    +233
    +234        auto = True if self.settings.noise_threshold_method == "minima" else False
    +235
    +236        average_noise = median((ymincentroid)) * 2 if auto else median(ymincentroid)
    +237
    +238        s_deviation = ymincentroid.std() * 3 if auto else ymincentroid.std()
    +239
    +240        return average_noise, s_deviation
     
    @@ -1092,40 +1136,38 @@
    Returns
    -
    228    def get_abundance_minima_centroid(self, abun_cut):
    -229        """Get the abundance minima for centroid data.
    -230
    -231        Parameters
    -232        ----------
    -233        abun_cut : np.array
    -234            The abundance cut array.
    -235
    -236        Returns
    -237        -------
    -238        np.array
    -239            The abundance minima array.
    -240        """ 
    -241        maximum = self.abundance_profile.max()
    -242        threshold_min = (maximum * 1.00)
    -243
    -244        y = -abun_cut
    -245
    -246        dy = y[1:] - y[:-1]
    -247        '''replaces NaN for Infinity'''
    -248        indices_nan = where(isnan(y))[0]
    -249        
    -250        if indices_nan.size:
    -251
    -252            y[indices_nan] = inf
    -253            dy[where(isnan(dy))[0]] = inf
    -254
    -255        
    -256        indices = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    -257        
    -258        if indices.size and threshold_min is not None:
    -259            indices = indices[abun_cut[indices] <= threshold_min]
    -260  
    -261        return abun_cut[indices]
    +            
    242    def get_abundance_minima_centroid(self, abun_cut):
    +243        """Get the abundance minima for centroid data.
    +244
    +245        Parameters
    +246        ----------
    +247        abun_cut : np.array
    +248            The abundance cut array.
    +249
    +250        Returns
    +251        -------
    +252        np.array
    +253            The abundance minima array.
    +254        """
    +255        maximum = self.abundance_profile.max()
    +256        threshold_min = maximum * 1.00
    +257
    +258        y = -abun_cut
    +259
    +260        dy = y[1:] - y[:-1]
    +261        """replaces NaN for Infinity"""
    +262        indices_nan = where(isnan(y))[0]
    +263
    +264        if indices_nan.size:
    +265            y[indices_nan] = inf
    +266            dy[where(isnan(dy))[0]] = inf
    +267
    +268        indices = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +269
    +270        if indices.size and threshold_min is not None:
    +271            indices = indices[abun_cut[indices] <= threshold_min]
    +272
    +273        return abun_cut[indices]
     
    @@ -1158,63 +1200,67 @@
    Returns
    -
    263    def run_log_noise_threshold_calc(self):
    -264        """ Run the log noise threshold calculation.
    -265
    -266
    -267        Returns
    -268        -------
    -269        Tuple[float, float]
    -270            A tuple containing the average noise and standard deviation.
    -271            
    -272        Notes 
    -273        --------
    -274        Method for estimating the noise based on decimal log of all the data point
    -275
    -276        Idea is that you calculate a histogram of of the log10(abundance) values. 
    -277        The maximum of the histogram == the standard deviation of the noise. 
    +            
    275    def run_log_noise_threshold_calc(self):
    +276        """Run the log noise threshold calculation.
    +277
     278
    -279
    -280        For aFT data it is a gaussian distribution of noise - not implemented here!
    -281        For mFT data it is a Rayleigh distribution, and the value is actually 10^(abu_max)*0.463.
    -282
    +279        Returns
    +280        -------
    +281        Tuple[float, float]
    +282            A tuple containing the average noise and standard deviation.
     283
    -284        See the publication cited above for the derivation of this. 
    -285
    -286        References
    -287        --------
    -288        1. dx.doi.org/10.1021/ac403278t | Anal. Chem. 2014, 86, 3308−3316
    -289
    -290        """
    +284        Notes
    +285        --------
    +286        Method for estimating the noise based on decimal log of all the data point
    +287
    +288        Idea is that you calculate a histogram of of the log10(abundance) values.
    +289        The maximum of the histogram == the standard deviation of the noise.
    +290
     291
    -292        if self.is_centroid:
    -293            raise  Exception("log noise Not tested for centroid data")
    -294        else:
    -295            # cut the spectrum to ROI
    -296            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    -297            # If there are 0 values, the log will fail
    -298            # But we may have negative values for aFT data, so we check if 0 exists
    -299            # Need to make a copy of the abundance cut values so we dont overwrite it....
    -300            tmp_abundance = abundance_cut.copy()
    -301            if 0 in tmp_abundance:
    -302                tmp_abundance[tmp_abundance==0] = nan
    -303                tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
    -304                # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined. 
    -305                # Hard to generalise - needs more investigation.
    -306
    -307            # calculate a histogram of the log10 of the abundance data
    -308            hist_values = histogram(log10(tmp_abundance),bins=self.settings.noise_threshold_log_nsigma_bins) 
    -309            #find the apex of this histogram
    -310            maxvalidx = where(hist_values[0] == max(hist_values[0]))
    -311            # get the value of this apex (note - still in log10 units)
    -312            log_sigma = hist_values[1][maxvalidx]
    -313            # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
    -314            if len(log_sigma)>1:
    -315                log_sigma = average(log_sigma)
    -316            ## To do : check if aFT or mFT and adjust method
    -317            noise_mid = 10**log_sigma
    -318            noise_1std = noise_mid*self.settings.noise_threshold_log_nsigma_corr_factor #for mFT 0.463
    -319            return float(noise_mid), float(noise_1std)
    +292        For aFT data it is a gaussian distribution of noise - not implemented here!
    +293        For mFT data it is a Rayleigh distribution, and the value is actually 10^(abu_max)*0.463.
    +294
    +295
    +296        See the publication cited above for the derivation of this.
    +297
    +298        References
    +299        --------
    +300        1. dx.doi.org/10.1021/ac403278t | Anal. Chem. 2014, 86, 3308−3316
    +301
    +302        """
    +303
    +304        if self.is_centroid:
    +305            raise Exception("log noise Not tested for centroid data")
    +306        else:
    +307            # cut the spectrum to ROI
    +308            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    +309            # If there are 0 values, the log will fail
    +310            # But we may have negative values for aFT data, so we check if 0 exists
    +311            # Need to make a copy of the abundance cut values so we dont overwrite it....
    +312            tmp_abundance = abundance_cut.copy()
    +313            if 0 in tmp_abundance:
    +314                tmp_abundance[tmp_abundance == 0] = nan
    +315                tmp_abundance = tmp_abundance[~isnan(tmp_abundance)]
    +316                # It seems there are edge cases of sparse but high S/N data where the wrong values may be determined.
    +317                # Hard to generalise - needs more investigation.
    +318
    +319            # calculate a histogram of the log10 of the abundance data
    +320            hist_values = histogram(
    +321                log10(tmp_abundance), bins=self.settings.noise_threshold_log_nsigma_bins
    +322            )
    +323            # find the apex of this histogram
    +324            maxvalidx = where(hist_values[0] == max(hist_values[0]))
    +325            # get the value of this apex (note - still in log10 units)
    +326            log_sigma = hist_values[1][maxvalidx]
    +327            # If the histogram had more than one maximum frequency bin, we need to reduce that to one entry
    +328            if len(log_sigma) > 1:
    +329                log_sigma = average(log_sigma)
    +330            ## To do : check if aFT or mFT and adjust method
    +331            noise_mid = 10**log_sigma
    +332            noise_1std = (
    +333                noise_mid * self.settings.noise_threshold_log_nsigma_corr_factor
    +334            )  # for mFT 0.463
    +335            return float(noise_mid), float(noise_1std)
     
    @@ -1230,13 +1276,13 @@
    Notes

    Method for estimating the noise based on decimal log of all the data point

    -

    Idea is that you calculate a histogram of of the log10(abundance) values. -The maximum of the histogram == the standard deviation of the noise.

    +

    Idea is that you calculate a histogram of of the log10(abundance) values. +The maximum of the histogram == the standard deviation of the noise.

    For aFT data it is a gaussian distribution of noise - not implemented here! For mFT data it is a Rayleigh distribution, and the value is actually 10^(abu_max)*0.463.

    -

    See the publication cited above for the derivation of this.

    +

    See the publication cited above for the derivation of this.

    References
    @@ -1258,39 +1304,36 @@
    References
    -
    321    def run_noise_threshold_calc(self):
    -322        """ Runs noise threshold calculation (not log based method)
    -323        
    -324        Returns
    -325        -------
    -326        Tuple[float, float]
    -327            A tuple containing the average noise and standard deviation.
    -328
    -329        """
    -330        if self.is_centroid:
    -331            # calculates noise_baseline and noise_std
    -332            # needed to run auto noise threshold mode
    -333            # it is not used for signal to noise nor 
    -334            # relative abudance methods
    -335            abundances_chunks = chunks(self.abundance, 50)
    -336            each_min_abund = [min(x) for x in abundances_chunks]
    -337
    -338            return average(each_min_abund), std(each_min_abund)
    -339        
    -340        else:
    -341
    -342            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    -343            
    -344            if self.settings.noise_threshold_method == 'minima':
    -345
    -346                yminima = self.get_abundance_minima_centroid(abundance_cut)
    -347                
    -348                return self.get_noise_average(yminima)
    -349
    -350            else:
    -351                
    -352                # pyplot.show()
    -353                return self.get_noise_average(abundance_cut)
    +            
    337    def run_noise_threshold_calc(self):
    +338        """Runs noise threshold calculation (not log based method)
    +339
    +340        Returns
    +341        -------
    +342        Tuple[float, float]
    +343            A tuple containing the average noise and standard deviation.
    +344
    +345        """
    +346        if self.is_centroid:
    +347            # calculates noise_baseline and noise_std
    +348            # needed to run auto noise threshold mode
    +349            # it is not used for signal to noise nor
    +350            # relative abudance methods
    +351            abundances_chunks = chunks(self.abundance, 50)
    +352            each_min_abund = [min(x) for x in abundances_chunks]
    +353
    +354            return average(each_min_abund), std(each_min_abund)
    +355
    +356        else:
    +357            mz_cut, abundance_cut = self.cut_mz_domain_noise()
    +358
    +359            if self.settings.noise_threshold_method == "minima":
    +360                yminima = self.get_abundance_minima_centroid(abundance_cut)
    +361
    +362                return self.get_noise_average(yminima)
    +363
    +364            else:
    +365                # pyplot.show()
    +366                return self.get_noise_average(abundance_cut)
     
    diff --git a/docs/corems/mass_spectrum/calc/NoiseCalc_Bayes.html b/docs/corems/mass_spectrum/calc/NoiseCalc_Bayes.html index 052f9330..cf3efa1a 100644 --- a/docs/corems/mass_spectrum/calc/NoiseCalc_Bayes.html +++ b/docs/corems/mass_spectrum/calc/NoiseCalc_Bayes.html @@ -61,9 +61,9 @@

    API Documentation

    corems.mass_spectrum.calc.NoiseCalc_Bayes

    -

    This code is for Bayesian estimation of the noise levels. -It is it not implemented or used in the current code base. -The packages it uses are not part of the requirements. +

    This code is for Bayesian estimation of the noise levels. +It is it not implemented or used in the current code base. +The packages it uses are not part of the requirements. If you want to use it, you will need to install them manually.

    @@ -72,96 +72,98 @@

     1"""
    - 2This code is for Bayesian estimation of the noise levels. 
    - 3It is it not implemented or used in the current code base. 
    - 4The packages it uses are not part of the requirements. 
    + 2This code is for Bayesian estimation of the noise levels.
    + 3It is it not implemented or used in the current code base.
    + 4The packages it uses are not part of the requirements.
      5If you want to use it, you will need to install them manually.
      6"""
    - 7from corems.mass_spectrum.calc.NoiseCalc import NoiseThresholdCalc
    - 8
    - 9class BayesNoiseCalc(NoiseThresholdCalc):
    -10      
    -11    def from_posterior(self, param, samples):
    -12        """
    -13        # Legacy code for Bayesian efforts - not used. 
    -14        pymc3 is not installed by default, 
    -15            if have plans to use it manual installation of pymc3 
    -16            package before using this method is needed
    -17        """
    -18
    -19        import pymc3 as pm
    -20        import numpy as np
    -21        import theano.tensor as tt
    -22        from theano import as_op
    -23        from scipy.stats import gaussian_kde
    -24        
    -25        smin, smax = np.min(samples), np.max(samples)
    -26        width = smax - smin
    -27        x = np.linspace(smin, smax, 100)
    -28        y = gaussian_kde(samples)(x)
    -29        
    -30        # what was never sampled should have a small probability but not 0,
    -31        # so we'll extend the domain and use linear approximation of density on it
    -32        x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
    -33        y = np.concatenate([[0], y, [0]])
    -34        
    -35        return pm.distributions.Interpolated(param, x, y)
    -36
    -37    def error_model_from_trace(self, trace, ymincentroid):
    -38
    + 7
    + 8from corems.mass_spectrum.calc.NoiseCalc import NoiseThresholdCalc
    + 9
    +10
    +11class BayesNoiseCalc(NoiseThresholdCalc):
    +12    def from_posterior(self, param, samples):
    +13        """
    +14        # Legacy code for Bayesian efforts - not used.
    +15        pymc3 is not installed by default,
    +16            if have plans to use it manual installation of pymc3
    +17            package before using this method is needed
    +18        """
    +19
    +20        import pymc3 as pm
    +21        import numpy as np
    +22        import theano.tensor as tt
    +23        from theano import as_op
    +24        from scipy.stats import gaussian_kde
    +25
    +26        smin, smax = np.min(samples), np.max(samples)
    +27        width = smax - smin
    +28        x = np.linspace(smin, smax, 100)
    +29        y = gaussian_kde(samples)(x)
    +30
    +31        # what was never sampled should have a small probability but not 0,
    +32        # so we'll extend the domain and use linear approximation of density on it
    +33        x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
    +34        y = np.concatenate([[0], y, [0]])
    +35
    +36        return pm.distributions.Interpolated(param, x, y)
    +37
    +38    def error_model_from_trace(self, trace, ymincentroid):
     39        """
    -40        # Legacy code for Bayesian efforts - not used. 
    -41        pymc3 is not installed by default, 
    -42            if have plans to use it manual installation of pymc3 
    +40        # Legacy code for Bayesian efforts - not used.
    +41        pymc3 is not installed by default,
    +42            if have plans to use it manual installation of pymc3
     43            package before using this method is needed
     44        """
     45        import pymc3 as pm
    -46        #from pymc3 import traceplot, plot_posterior
    -47        
    +46        # from pymc3 import traceplot, plot_posterior
    +47
     48        with pm.Model() as model2:
    -49            
    -50            sd = self.from_posterior('sd', trace['sd'])
    -51            y = pm.HalfNormal('y', sd=sd, observed=ymincentroid)
    -52            start = pm.find_MAP()
    -53            step = pm.NUTS() # Hamiltonian MCMC with No U-Turn Sampler
    -54            trace = pm.sample(1000, step, start, random_seed=123, progressbar=True, tune=1000)
    -55            pm.summary(trace)
    -56            #plot_posterior(trace)
    -57            #traceplot(trace)    
    -58            return pm.summary(trace)['mean'].values[0] 
    -59
    -60    def simple_model_error_dist(self,  ymincentroid):
    -61        """
    -62        # Legacy code for Bayesian efforts - not used. 
    -63        pymc3 is not installed by default, 
    -64            if have plans to use it manual installation of pymc3 
    -65            package before using this method is needed
    -66        """
    -67        import pymc3 as pm
    -68        # from pymc3 import traceplot, plot_posterior
    -69        #import seaborn as sns
    -70        #f, ax = pyplot.subplots(figsize=(6, 6))
    -71        #sns.distplot(ymincentroid)
    -72        #sns.kdeplot(ymincentroid, ax=ax, shade=True, color="g")
    -73        #sns.rugplot(ymincentroid, color="black", ax=ax)
    -74        #ax.set(xlabel= "Peak Minima Magnitude", ylabel= "Density")
    -75        #pyplot.show()
    -76
    -77        with pm.Model() as model:
    -78            
    -79            #mu = pm.Uniform('mu', lower=-1, upper=1)
    +49            sd = self.from_posterior("sd", trace["sd"])
    +50            y = pm.HalfNormal("y", sd=sd, observed=ymincentroid)
    +51            start = pm.find_MAP()
    +52            step = pm.NUTS()  # Hamiltonian MCMC with No U-Turn Sampler
    +53            trace = pm.sample(
    +54                1000, step, start, random_seed=123, progressbar=True, tune=1000
    +55            )
    +56            pm.summary(trace)
    +57            # plot_posterior(trace)
    +58            # traceplot(trace)
    +59            return pm.summary(trace)["mean"].values[0]
    +60
    +61    def simple_model_error_dist(self, ymincentroid):
    +62        """
    +63        # Legacy code for Bayesian efforts - not used.
    +64        pymc3 is not installed by default,
    +65            if have plans to use it manual installation of pymc3
    +66            package before using this method is needed
    +67        """
    +68        import pymc3 as pm
    +69        # from pymc3 import traceplot, plot_posterior
    +70        # import seaborn as sns
    +71        # f, ax = pyplot.subplots(figsize=(6, 6))
    +72        # sns.distplot(ymincentroid)
    +73        # sns.kdeplot(ymincentroid, ax=ax, shade=True, color="g")
    +74        # sns.rugplot(ymincentroid, color="black", ax=ax)
    +75        # ax.set(xlabel= "Peak Minima Magnitude", ylabel= "Density")
    +76        # pyplot.show()
    +77
    +78        with pm.Model() as model:
    +79            # mu = pm.Uniform('mu', lower=-1, upper=1)
     80            lower = ymincentroid.min()
     81            upper = ymincentroid.max()
    -82            
    -83            sd = pm.Uniform('sd', lower=lower , upper=upper)
    -84            
    -85            y = pm.HalfNormal('y', sd=sd, observed=ymincentroid)
    -86            
    +82
    +83            sd = pm.Uniform("sd", lower=lower, upper=upper)
    +84
    +85            y = pm.HalfNormal("y", sd=sd, observed=ymincentroid)
    +86
     87            start = pm.find_MAP()
    -88            step = pm.NUTS() # Hamiltonian MCMC with No U-Turn Sampler
    -89            trace = pm.sample(1000, step, start, random_seed=123, progressbar=True, tune=1000)
    -90            
    -91            return pm.summary(trace)['mean'].values[0] 
    +88            step = pm.NUTS()  # Hamiltonian MCMC with No U-Turn Sampler
    +89            trace = pm.sample(
    +90                1000, step, start, random_seed=123, progressbar=True, tune=1000
    +91            )
    +92
    +93            return pm.summary(trace)["mean"].values[0]
     
    @@ -177,89 +179,89 @@

    -
    10class BayesNoiseCalc(NoiseThresholdCalc):
    -11      
    -12    def from_posterior(self, param, samples):
    -13        """
    -14        # Legacy code for Bayesian efforts - not used. 
    -15        pymc3 is not installed by default, 
    -16            if have plans to use it manual installation of pymc3 
    -17            package before using this method is needed
    -18        """
    -19
    -20        import pymc3 as pm
    -21        import numpy as np
    -22        import theano.tensor as tt
    -23        from theano import as_op
    -24        from scipy.stats import gaussian_kde
    -25        
    -26        smin, smax = np.min(samples), np.max(samples)
    -27        width = smax - smin
    -28        x = np.linspace(smin, smax, 100)
    -29        y = gaussian_kde(samples)(x)
    -30        
    -31        # what was never sampled should have a small probability but not 0,
    -32        # so we'll extend the domain and use linear approximation of density on it
    -33        x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
    -34        y = np.concatenate([[0], y, [0]])
    -35        
    -36        return pm.distributions.Interpolated(param, x, y)
    -37
    -38    def error_model_from_trace(self, trace, ymincentroid):
    -39
    +            
    12class BayesNoiseCalc(NoiseThresholdCalc):
    +13    def from_posterior(self, param, samples):
    +14        """
    +15        # Legacy code for Bayesian efforts - not used.
    +16        pymc3 is not installed by default,
    +17            if have plans to use it manual installation of pymc3
    +18            package before using this method is needed
    +19        """
    +20
    +21        import pymc3 as pm
    +22        import numpy as np
    +23        import theano.tensor as tt
    +24        from theano import as_op
    +25        from scipy.stats import gaussian_kde
    +26
    +27        smin, smax = np.min(samples), np.max(samples)
    +28        width = smax - smin
    +29        x = np.linspace(smin, smax, 100)
    +30        y = gaussian_kde(samples)(x)
    +31
    +32        # what was never sampled should have a small probability but not 0,
    +33        # so we'll extend the domain and use linear approximation of density on it
    +34        x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
    +35        y = np.concatenate([[0], y, [0]])
    +36
    +37        return pm.distributions.Interpolated(param, x, y)
    +38
    +39    def error_model_from_trace(self, trace, ymincentroid):
     40        """
    -41        # Legacy code for Bayesian efforts - not used. 
    -42        pymc3 is not installed by default, 
    -43            if have plans to use it manual installation of pymc3 
    +41        # Legacy code for Bayesian efforts - not used.
    +42        pymc3 is not installed by default,
    +43            if have plans to use it manual installation of pymc3
     44            package before using this method is needed
     45        """
     46        import pymc3 as pm
    -47        #from pymc3 import traceplot, plot_posterior
    -48        
    +47        # from pymc3 import traceplot, plot_posterior
    +48
     49        with pm.Model() as model2:
    -50            
    -51            sd = self.from_posterior('sd', trace['sd'])
    -52            y = pm.HalfNormal('y', sd=sd, observed=ymincentroid)
    -53            start = pm.find_MAP()
    -54            step = pm.NUTS() # Hamiltonian MCMC with No U-Turn Sampler
    -55            trace = pm.sample(1000, step, start, random_seed=123, progressbar=True, tune=1000)
    -56            pm.summary(trace)
    -57            #plot_posterior(trace)
    -58            #traceplot(trace)    
    -59            return pm.summary(trace)['mean'].values[0] 
    -60
    -61    def simple_model_error_dist(self,  ymincentroid):
    -62        """
    -63        # Legacy code for Bayesian efforts - not used. 
    -64        pymc3 is not installed by default, 
    -65            if have plans to use it manual installation of pymc3 
    -66            package before using this method is needed
    -67        """
    -68        import pymc3 as pm
    -69        # from pymc3 import traceplot, plot_posterior
    -70        #import seaborn as sns
    -71        #f, ax = pyplot.subplots(figsize=(6, 6))
    -72        #sns.distplot(ymincentroid)
    -73        #sns.kdeplot(ymincentroid, ax=ax, shade=True, color="g")
    -74        #sns.rugplot(ymincentroid, color="black", ax=ax)
    -75        #ax.set(xlabel= "Peak Minima Magnitude", ylabel= "Density")
    -76        #pyplot.show()
    -77
    -78        with pm.Model() as model:
    -79            
    -80            #mu = pm.Uniform('mu', lower=-1, upper=1)
    +50            sd = self.from_posterior("sd", trace["sd"])
    +51            y = pm.HalfNormal("y", sd=sd, observed=ymincentroid)
    +52            start = pm.find_MAP()
    +53            step = pm.NUTS()  # Hamiltonian MCMC with No U-Turn Sampler
    +54            trace = pm.sample(
    +55                1000, step, start, random_seed=123, progressbar=True, tune=1000
    +56            )
    +57            pm.summary(trace)
    +58            # plot_posterior(trace)
    +59            # traceplot(trace)
    +60            return pm.summary(trace)["mean"].values[0]
    +61
    +62    def simple_model_error_dist(self, ymincentroid):
    +63        """
    +64        # Legacy code for Bayesian efforts - not used.
    +65        pymc3 is not installed by default,
    +66            if have plans to use it manual installation of pymc3
    +67            package before using this method is needed
    +68        """
    +69        import pymc3 as pm
    +70        # from pymc3 import traceplot, plot_posterior
    +71        # import seaborn as sns
    +72        # f, ax = pyplot.subplots(figsize=(6, 6))
    +73        # sns.distplot(ymincentroid)
    +74        # sns.kdeplot(ymincentroid, ax=ax, shade=True, color="g")
    +75        # sns.rugplot(ymincentroid, color="black", ax=ax)
    +76        # ax.set(xlabel= "Peak Minima Magnitude", ylabel= "Density")
    +77        # pyplot.show()
    +78
    +79        with pm.Model() as model:
    +80            # mu = pm.Uniform('mu', lower=-1, upper=1)
     81            lower = ymincentroid.min()
     82            upper = ymincentroid.max()
    -83            
    -84            sd = pm.Uniform('sd', lower=lower , upper=upper)
    -85            
    -86            y = pm.HalfNormal('y', sd=sd, observed=ymincentroid)
    -87            
    +83
    +84            sd = pm.Uniform("sd", lower=lower, upper=upper)
    +85
    +86            y = pm.HalfNormal("y", sd=sd, observed=ymincentroid)
    +87
     88            start = pm.find_MAP()
    -89            step = pm.NUTS() # Hamiltonian MCMC with No U-Turn Sampler
    -90            trace = pm.sample(1000, step, start, random_seed=123, progressbar=True, tune=1000)
    -91            
    -92            return pm.summary(trace)['mean'].values[0] 
    +89            step = pm.NUTS()  # Hamiltonian MCMC with No U-Turn Sampler
    +90            trace = pm.sample(
    +91                1000, step, start, random_seed=123, progressbar=True, tune=1000
    +92            )
    +93
    +94            return pm.summary(trace)["mean"].values[0]
     
    @@ -301,15 +303,15 @@
    Attributes
    Methods
      -
    • get_noise_threshold(). Get the noise threshold.
    • -
    • cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    • -
    • get_noise_average(ymincentroid). -Get the average noise and standard deviation.
    • +
    • get_noise_threshold(). Get the noise threshold.
    • +
    • cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    • +
    • get_noise_average(ymincentroid). +Get the average noise and standard deviation.
    • get_abundance_minima_centroid(abun_cut) -Get the abundance minima for centroid data.
    • -
    • run_log_noise_threshold_calc(). -Run the log noise threshold calculation.
    • -
    • run_noise_threshold_calc(). +Get the abundance minima for centroid data.
    • +
    • run_log_noise_threshold_calc(). +Run the log noise threshold calculation.
    • +
    • run_noise_threshold_calc(). Run the noise threshold calculation.
    @@ -326,38 +328,38 @@
    Methods
    -
    12    def from_posterior(self, param, samples):
    -13        """
    -14        # Legacy code for Bayesian efforts - not used. 
    -15        pymc3 is not installed by default, 
    -16            if have plans to use it manual installation of pymc3 
    -17            package before using this method is needed
    -18        """
    -19
    -20        import pymc3 as pm
    -21        import numpy as np
    -22        import theano.tensor as tt
    -23        from theano import as_op
    -24        from scipy.stats import gaussian_kde
    -25        
    -26        smin, smax = np.min(samples), np.max(samples)
    -27        width = smax - smin
    -28        x = np.linspace(smin, smax, 100)
    -29        y = gaussian_kde(samples)(x)
    -30        
    -31        # what was never sampled should have a small probability but not 0,
    -32        # so we'll extend the domain and use linear approximation of density on it
    -33        x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
    -34        y = np.concatenate([[0], y, [0]])
    -35        
    -36        return pm.distributions.Interpolated(param, x, y)
    +            
    13    def from_posterior(self, param, samples):
    +14        """
    +15        # Legacy code for Bayesian efforts - not used.
    +16        pymc3 is not installed by default,
    +17            if have plans to use it manual installation of pymc3
    +18            package before using this method is needed
    +19        """
    +20
    +21        import pymc3 as pm
    +22        import numpy as np
    +23        import theano.tensor as tt
    +24        from theano import as_op
    +25        from scipy.stats import gaussian_kde
    +26
    +27        smin, smax = np.min(samples), np.max(samples)
    +28        width = smax - smin
    +29        x = np.linspace(smin, smax, 100)
    +30        y = gaussian_kde(samples)(x)
    +31
    +32        # what was never sampled should have a small probability but not 0,
    +33        # so we'll extend the domain and use linear approximation of density on it
    +34        x = np.concatenate([[x[0] - 3 * width], x, [x[-1] + 3 * width]])
    +35        y = np.concatenate([[0], y, [0]])
    +36
    +37        return pm.distributions.Interpolated(param, x, y)
     

    Legacy code for Bayesian efforts - not used.

    -

    pymc3 is not installed by default, - if have plans to use it manual installation of pymc3 +

    pymc3 is not installed by default, + if have plans to use it manual installation of pymc3 package before using this method is needed

    @@ -374,35 +376,35 @@
    Methods
    -
    38    def error_model_from_trace(self, trace, ymincentroid):
    -39
    +            
    39    def error_model_from_trace(self, trace, ymincentroid):
     40        """
    -41        # Legacy code for Bayesian efforts - not used. 
    -42        pymc3 is not installed by default, 
    -43            if have plans to use it manual installation of pymc3 
    +41        # Legacy code for Bayesian efforts - not used.
    +42        pymc3 is not installed by default,
    +43            if have plans to use it manual installation of pymc3
     44            package before using this method is needed
     45        """
     46        import pymc3 as pm
    -47        #from pymc3 import traceplot, plot_posterior
    -48        
    +47        # from pymc3 import traceplot, plot_posterior
    +48
     49        with pm.Model() as model2:
    -50            
    -51            sd = self.from_posterior('sd', trace['sd'])
    -52            y = pm.HalfNormal('y', sd=sd, observed=ymincentroid)
    -53            start = pm.find_MAP()
    -54            step = pm.NUTS() # Hamiltonian MCMC with No U-Turn Sampler
    -55            trace = pm.sample(1000, step, start, random_seed=123, progressbar=True, tune=1000)
    -56            pm.summary(trace)
    -57            #plot_posterior(trace)
    -58            #traceplot(trace)    
    -59            return pm.summary(trace)['mean'].values[0] 
    +50            sd = self.from_posterior("sd", trace["sd"])
    +51            y = pm.HalfNormal("y", sd=sd, observed=ymincentroid)
    +52            start = pm.find_MAP()
    +53            step = pm.NUTS()  # Hamiltonian MCMC with No U-Turn Sampler
    +54            trace = pm.sample(
    +55                1000, step, start, random_seed=123, progressbar=True, tune=1000
    +56            )
    +57            pm.summary(trace)
    +58            # plot_posterior(trace)
    +59            # traceplot(trace)
    +60            return pm.summary(trace)["mean"].values[0]
     

    Legacy code for Bayesian efforts - not used.

    -

    pymc3 is not installed by default, - if have plans to use it manual installation of pymc3 +

    pymc3 is not installed by default, + if have plans to use it manual installation of pymc3 package before using this method is needed

    @@ -419,45 +421,46 @@
    Methods
    -
    61    def simple_model_error_dist(self,  ymincentroid):
    -62        """
    -63        # Legacy code for Bayesian efforts - not used. 
    -64        pymc3 is not installed by default, 
    -65            if have plans to use it manual installation of pymc3 
    -66            package before using this method is needed
    -67        """
    -68        import pymc3 as pm
    -69        # from pymc3 import traceplot, plot_posterior
    -70        #import seaborn as sns
    -71        #f, ax = pyplot.subplots(figsize=(6, 6))
    -72        #sns.distplot(ymincentroid)
    -73        #sns.kdeplot(ymincentroid, ax=ax, shade=True, color="g")
    -74        #sns.rugplot(ymincentroid, color="black", ax=ax)
    -75        #ax.set(xlabel= "Peak Minima Magnitude", ylabel= "Density")
    -76        #pyplot.show()
    -77
    -78        with pm.Model() as model:
    -79            
    -80            #mu = pm.Uniform('mu', lower=-1, upper=1)
    +            
    62    def simple_model_error_dist(self, ymincentroid):
    +63        """
    +64        # Legacy code for Bayesian efforts - not used.
    +65        pymc3 is not installed by default,
    +66            if have plans to use it manual installation of pymc3
    +67            package before using this method is needed
    +68        """
    +69        import pymc3 as pm
    +70        # from pymc3 import traceplot, plot_posterior
    +71        # import seaborn as sns
    +72        # f, ax = pyplot.subplots(figsize=(6, 6))
    +73        # sns.distplot(ymincentroid)
    +74        # sns.kdeplot(ymincentroid, ax=ax, shade=True, color="g")
    +75        # sns.rugplot(ymincentroid, color="black", ax=ax)
    +76        # ax.set(xlabel= "Peak Minima Magnitude", ylabel= "Density")
    +77        # pyplot.show()
    +78
    +79        with pm.Model() as model:
    +80            # mu = pm.Uniform('mu', lower=-1, upper=1)
     81            lower = ymincentroid.min()
     82            upper = ymincentroid.max()
    -83            
    -84            sd = pm.Uniform('sd', lower=lower , upper=upper)
    -85            
    -86            y = pm.HalfNormal('y', sd=sd, observed=ymincentroid)
    -87            
    +83
    +84            sd = pm.Uniform("sd", lower=lower, upper=upper)
    +85
    +86            y = pm.HalfNormal("y", sd=sd, observed=ymincentroid)
    +87
     88            start = pm.find_MAP()
    -89            step = pm.NUTS() # Hamiltonian MCMC with No U-Turn Sampler
    -90            trace = pm.sample(1000, step, start, random_seed=123, progressbar=True, tune=1000)
    -91            
    -92            return pm.summary(trace)['mean'].values[0] 
    +89            step = pm.NUTS()  # Hamiltonian MCMC with No U-Turn Sampler
    +90            trace = pm.sample(
    +91                1000, step, start, random_seed=123, progressbar=True, tune=1000
    +92            )
    +93
    +94            return pm.summary(trace)["mean"].values[0]
     

    Legacy code for Bayesian efforts - not used.

    -

    pymc3 is not installed by default, - if have plans to use it manual installation of pymc3 +

    pymc3 is not installed by default, + if have plans to use it manual installation of pymc3 package before using this method is needed

    diff --git a/docs/corems/mass_spectrum/calc/PeakPicking.html b/docs/corems/mass_spectrum/calc/PeakPicking.html index 47eae72d..b7f0bde2 100644 --- a/docs/corems/mass_spectrum/calc/PeakPicking.html +++ b/docs/corems/mass_spectrum/calc/PeakPicking.html @@ -111,887 +111,1000 @@

    -
      1'''
    +                        
      1"""
       2@author: Yuri E. Corilo
       3@date: Jun 27, 2019
    -  4'''
    +  4"""
       5
    -  6from logging import warn
    -  7import warnings
    -  8from numpy import hstack, inf, isnan, where, array, polyfit, nan, pad, arange, zeros, searchsorted
    -  9from corems.encapsulation.constant import Labels
    - 10from corems.mass_spectra.calc import SignalProcessing as sp
    - 11
    - 12class PeakPicking:
    - 13    """ Class for peak picking.
    - 14
    - 15    Parameters
    - 16    ----------
    - 17    None
    - 18
    - 19    Attributes
    - 20    ----------
    - 21    None
    +  6import warnings
    +  7from numpy import (
    +  8    hstack,
    +  9    inf,
    + 10    isnan,
    + 11    where,
    + 12    array,
    + 13    polyfit,
    + 14    nan,
    + 15    pad,
    + 16    zeros,
    + 17    searchsorted,
    + 18)
    + 19from corems.encapsulation.constant import Labels
    + 20from corems.mass_spectra.calc import SignalProcessing as sp
    + 21
      22
    - 23    Methods
    - 24    -------
    - 25    * prepare_peak_picking_data().
    - 26        Prepare the mz, abundance, and frequence data for peak picking.
    - 27    * cut_mz_domain_peak_picking().
    - 28        Cut the m/z domain for peak picking.
    - 29    * extrapolate_axes_for_pp(mz=None, abund=None, freq=None).
    - 30        Extrapolate the m/z axis and fill the abundance axis with 0s.
    - 31    * do_peak_picking().
    - 32        Perform peak picking.
    - 33    * find_minima(apex_index, abundance, len_abundance, right=True).
    - 34        Find the minima of a peak.
    - 35    * linear_fit_calc(intes, massa, index_term, index_sign).
    - 36        Algebraic solution to a linear fit.
    - 37    * calculate_resolving_power(intes, massa, current_index).
    - 38        Calculate the resolving power of a peak.
    - 39    * cal_minima(mass, abun).
    - 40        Calculate the minima of a peak.
    - 41    * calc_centroid(mass, abund, freq).
    - 42        Calculate the centroid of a peak.
    - 43    * get_threshold(intes).
    - 44        Get the intensity threshold for peak picking.
    - 45    * algebraic_quadratic(list_mass, list_y).
    - 46        Find the apex of a peak - algebraically.
    - 47    * find_apex_fit_quadratic(mass, abund, freq, current_index).
    - 48        Find the apex of a peak.
    - 49    * check_prominence(abun, current_index, len_abundance, peak_height_diff).
    - 50        Check the prominence of a peak.
    - 51    * use_the_max(mass, abund, current_index, len_abundance, peak_height_diff).
    - 52        Use the max peak height as the centroid.
    - 53    * calc_centroid_legacy(mass, abund, freq).
    - 54        Legacy centroid calculation. Deprecated - for deletion.       
    - 55
    - 56    """
    - 57    def prepare_peak_picking_data(self):
    - 58        """ Prepare the data for peak picking.
    - 59
    - 60        This function will prepare the m/z, abundance, and frequency data for peak picking according to the settings.
    - 61
    - 62        Returns
    - 63        -------
    - 64        mz : ndarray
    - 65            The m/z axis.
    - 66        abundance : ndarray
    - 67            The abundance axis.
    - 68        freq : ndarray or None
    - 69            The frequency axis, if available.
    - 70        """
    - 71        # First apply cut_mz_domain_peak_picking
    - 72        mz, abundance, freq = self.cut_mz_domain_peak_picking()
    + 23class PeakPicking:
    + 24    """Class for peak picking.
    + 25
    + 26    Parameters
    + 27    ----------
    + 28    None
    + 29
    + 30    Attributes
    + 31    ----------
    + 32    None
    + 33
    + 34    Methods
    + 35    -------
    + 36    * prepare_peak_picking_data().
    + 37        Prepare the mz, abundance, and frequence data for peak picking.
    + 38    * cut_mz_domain_peak_picking().
    + 39        Cut the m/z domain for peak picking.
    + 40    * extrapolate_axes_for_pp(mz=None, abund=None, freq=None).
    + 41        Extrapolate the m/z axis and fill the abundance axis with 0s.
    + 42    * do_peak_picking().
    + 43        Perform peak picking.
    + 44    * find_minima(apex_index, abundance, len_abundance, right=True).
    + 45        Find the minima of a peak.
    + 46    * linear_fit_calc(intes, massa, index_term, index_sign).
    + 47        Algebraic solution to a linear fit.
    + 48    * calculate_resolving_power(intes, massa, current_index).
    + 49        Calculate the resolving power of a peak.
    + 50    * cal_minima(mass, abun).
    + 51        Calculate the minima of a peak.
    + 52    * calc_centroid(mass, abund, freq).
    + 53        Calculate the centroid of a peak.
    + 54    * get_threshold(intes).
    + 55        Get the intensity threshold for peak picking.
    + 56    * algebraic_quadratic(list_mass, list_y).
    + 57        Find the apex of a peak - algebraically.
    + 58    * find_apex_fit_quadratic(mass, abund, freq, current_index).
    + 59        Find the apex of a peak.
    + 60    * check_prominence(abun, current_index, len_abundance, peak_height_diff).
    + 61        Check the prominence of a peak.
    + 62    * use_the_max(mass, abund, current_index, len_abundance, peak_height_diff).
    + 63        Use the max peak height as the centroid.
    + 64    * calc_centroid_legacy(mass, abund, freq).
    + 65        Legacy centroid calculation. Deprecated - for deletion.
    + 66
    + 67    """
    + 68
    + 69    def prepare_peak_picking_data(self):
    + 70        """Prepare the data for peak picking.
    + 71
    + 72        This function will prepare the m/z, abundance, and frequency data for peak picking according to the settings.
      73
    - 74        # Then extrapolate the axes for peak picking
    - 75        if self.settings.picking_point_extrapolate > 0:
    - 76            mz, abundance, freq = self.extrapolate_axes_for_pp(mz, abundance, freq)
    - 77        return mz, abundance, freq
    - 78    
    - 79    def cut_mz_domain_peak_picking(self):
    - 80        """
    - 81        Cut the m/z domain for peak picking.
    - 82
    - 83        Simplified function
    - 84        
    - 85        Returns
    - 86        -------
    - 87        mz_domain_X_low_cutoff : ndarray
    - 88            The m/z values within the specified range.
    - 89        mz_domain_low_Y_cutoff : ndarray
    - 90            The abundance values within the specified range.
    - 91        freq_domain_low_Y_cutoff : ndarray or None
    - 92            The frequency values within the specified range, if available.
    - 93
    - 94        """
    - 95        max_picking_mz = self.settings.max_picking_mz
    - 96        min_picking_mz = self.settings.min_picking_mz
    - 97        
    - 98        #min_start =  where(self.mz_exp_profile  > min_picking_mz)[0][0]
    - 99        #max_final =  where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    -100        min_start =  searchsorted(a = self.mz_exp_profile, v = min_picking_mz)
    -101        max_final =  searchsorted(a = self.mz_exp_profile, v = max_picking_mz)
    -102
    -103        if self.has_frequency:
    -104
    -105            if self.freq_exp_profile.any():
    -106
    -107                return self.mz_exp_profile[min_start:max_final], self.abundance_profile[min_start:max_final], self.freq_exp_profile[min_start:max_final]
    -108
    -109        else:
    -110
    -111            return self.mz_exp_profile[min_start:max_final], self.abundance_profile[min_start:max_final], None
    -112        
    -113        
    -114    def legacy_cut_mz_domain_peak_picking(self):
    -115        """
    -116        Cut the m/z domain for peak picking.
    -117        DEPRECATED
    -118        Returns
    -119        -------
    -120        mz_domain_X_low_cutoff : ndarray
    -121            The m/z values within the specified range.
    -122        mz_domain_low_Y_cutoff : ndarray
    -123            The abundance values within the specified range.
    -124        freq_domain_low_Y_cutoff : ndarray or None
    -125            The frequency values within the specified range, if available.
    -126
    -127        """
    -128        max_picking_mz = self.settings.max_picking_mz
    -129        min_picking_mz = self.settings.min_picking_mz
    -130        
    -131        min_final =  where(self.mz_exp_profile  > min_picking_mz)[-1][-1]
    -132        min_start =  where(self.mz_exp_profile  > min_picking_mz)[0][0]
    -133
    -134        mz_domain_X_low_cutoff, mz_domain_low_Y_cutoff,  = self.mz_exp_profile [min_start:min_final], self.abundance_profile[min_start:min_final]
    -135
    -136        max_final =  where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    -137        max_start =  where(self.mz_exp_profile < max_picking_mz)[0][0]
    -138
    -139        if self.has_frequency:
    -140
    -141            if self.freq_exp_profile.any():
    + 74        Returns
    + 75        -------
    + 76        mz : ndarray
    + 77            The m/z axis.
    + 78        abundance : ndarray
    + 79            The abundance axis.
    + 80        freq : ndarray or None
    + 81            The frequency axis, if available.
    + 82        """
    + 83        # First apply cut_mz_domain_peak_picking
    + 84        mz, abundance, freq = self.cut_mz_domain_peak_picking()
    + 85
    + 86        # Then extrapolate the axes for peak picking
    + 87        if self.settings.picking_point_extrapolate > 0:
    + 88            mz, abundance, freq = self.extrapolate_axes_for_pp(mz, abundance, freq)
    + 89        return mz, abundance, freq
    + 90
    + 91    def cut_mz_domain_peak_picking(self):
    + 92        """
    + 93        Cut the m/z domain for peak picking.
    + 94
    + 95        Simplified function
    + 96
    + 97        Returns
    + 98        -------
    + 99        mz_domain_X_low_cutoff : ndarray
    +100            The m/z values within the specified range.
    +101        mz_domain_low_Y_cutoff : ndarray
    +102            The abundance values within the specified range.
    +103        freq_domain_low_Y_cutoff : ndarray or None
    +104            The frequency values within the specified range, if available.
    +105
    +106        """
    +107        max_picking_mz = self.settings.max_picking_mz
    +108        min_picking_mz = self.settings.min_picking_mz
    +109
    +110        # min_start =  where(self.mz_exp_profile  > min_picking_mz)[0][0]
    +111        # max_final =  where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    +112        min_start = searchsorted(a=self.mz_exp_profile, v=min_picking_mz)
    +113        max_final = searchsorted(a=self.mz_exp_profile, v=max_picking_mz)
    +114
    +115        if self.has_frequency:
    +116            if self.freq_exp_profile.any():
    +117                return (
    +118                    self.mz_exp_profile[min_start:max_final],
    +119                    self.abundance_profile[min_start:max_final],
    +120                    self.freq_exp_profile[min_start:max_final],
    +121                )
    +122
    +123        else:
    +124            return (
    +125                self.mz_exp_profile[min_start:max_final],
    +126                self.abundance_profile[min_start:max_final],
    +127                None,
    +128            )
    +129
    +130    def legacy_cut_mz_domain_peak_picking(self):
    +131        """
    +132        Cut the m/z domain for peak picking.
    +133        DEPRECATED
    +134        Returns
    +135        -------
    +136        mz_domain_X_low_cutoff : ndarray
    +137            The m/z values within the specified range.
    +138        mz_domain_low_Y_cutoff : ndarray
    +139            The abundance values within the specified range.
    +140        freq_domain_low_Y_cutoff : ndarray or None
    +141            The frequency values within the specified range, if available.
     142
    -143                freq_domain_low_Y_cutoff = self.freq_exp_profile[min_start:min_final]
    -144
    -145
    -146                return mz_domain_X_low_cutoff[max_start:max_final], mz_domain_low_Y_cutoff[max_start:max_final], freq_domain_low_Y_cutoff[max_start:max_final]
    -147
    -148        else:
    +143        """
    +144        max_picking_mz = self.settings.max_picking_mz
    +145        min_picking_mz = self.settings.min_picking_mz
    +146
    +147        min_final = where(self.mz_exp_profile > min_picking_mz)[-1][-1]
    +148        min_start = where(self.mz_exp_profile > min_picking_mz)[0][0]
     149
    -150            return mz_domain_X_low_cutoff[max_start:max_final], mz_domain_low_Y_cutoff[max_start:max_final], None
    -151
    -152    @staticmethod 
    -153    def extrapolate_axis(initial_array, pts):
    -154        """
    -155        This function will extrapolate an input array in both directions by N pts.
    -156
    -157        Parameters
    -158        ----------
    -159        initial_array : ndarray
    -160            The input array.
    -161        pts : int
    -162            The number of points to extrapolate.
    -163
    -164        Returns
    -165        -------
    -166        ndarray
    -167            The extrapolated array.
    -168
    -169        Notes
    -170        --------
    -171        This is a static method.        
    -172        """
    -173        initial_array_len = len(initial_array)
    -174        right_delta = initial_array[-1] - initial_array[-2]  
    -175        left_delta = initial_array[1] - initial_array[0]  
    -176        
    -177        # Create an array with extra space for extrapolation
    -178        pad_array = zeros(initial_array_len + 2 * pts)
    -179        
    -180        # Copy original array into the middle of the padded array
    -181        pad_array[pts:pts + initial_array_len] = initial_array
    -182        
    -183        # Extrapolate the right side
    -184        for pt in range(pts):
    -185            final_value = initial_array[-1]
    -186            value_to_add = right_delta * (pt + 1)
    -187            new_value = final_value + value_to_add
    -188            pad_array[initial_array_len + pts + pt] = new_value
    -189        
    -190        # Extrapolate the left side
    -191        for pt in range(pts):
    -192            first_value = initial_array[0]
    -193            value_to_subtract = left_delta * (pt + 1)
    -194            new_value = first_value - value_to_subtract
    -195            pad_array[pts - pt - 1] = new_value
    -196        
    -197        return pad_array
    -198    
    -199    def extrapolate_axes_for_pp(self, mz=None, abund=None, freq=None):
    -200        """ Extrapolate the m/z axis and fill the abundance axis with 0s.
    -201
    -202        Parameters
    -203        ----------
    -204        mz : ndarray or None
    -205            The m/z axis, if available. If None, the experimental m/z axis is used.
    -206        abund : ndarray or None
    -207            The abundance axis, if available. If None, the experimental abundance axis is used.
    -208        freq : ndarray or None
    -209            The frequency axis, if available. If None, the experimental frequency axis is used.
    -210
    -211        Returns
    -212        -------
    -213        mz : ndarray
    -214            The extrapolated m/z axis.
    -215        abund : ndarray
    -216            The abundance axis with 0s filled.
    -217        freq : ndarray or None
    -218            The extrapolated frequency axis, if available.
    -219
    -220        Notes
    -221        --------
    -222        This function will extrapolate the mz axis by the number of datapoints specified in the settings,
    -223        and fill the abundance axis with 0s. 
    -224        This should prevent peak picking issues at the spectrum edge.
    -225
    -226        """ 
    -227        # Check if the input arrays are provided
    -228        if mz is None or abund is None:
    -229            mz, abund = self.mz_exp_profile, self.abundance_profile
    -230            if self.has_frequency:
    -231                freq = self.freq_exp_profile
    -232            else: 
    -233                freq = None
    -234        pts = self.settings.picking_point_extrapolate
    -235        if pts == 0:
    -236            return mz, abund, freq
    -237        
    -238        mz = self.extrapolate_axis(mz, pts)
    -239        abund = pad(abund, (pts, pts), mode = 'constant', constant_values=(0,0))
    -240        if freq is not None:
    -241            freq = self.extrapolate_axis(freq, pts)
    -242        return mz, abund, freq
    -243
    -244    def do_peak_picking(self):
    -245        """ Perform peak picking.
    -246
    -247        """
    -248        mz, abundance, freq = self.prepare_peak_picking_data()
    -249        
    -250        if self.label == Labels.bruker_frequency or self.label == Labels.midas_frequency:
    -251            self.calc_centroid(mz, abundance, freq)
    -252
    -253        elif self.label == Labels.thermo_profile:
    -254            self.calc_centroid(mz, abundance, freq)
    -255
    -256        elif self.label == Labels.bruker_profile:
    -257            self.calc_centroid(mz, abundance, freq)
    -258
    -259        elif self.label == Labels.booster_profile:
    -260            self.calc_centroid(mz, abundance, freq)
    -261
    -262        elif self.label == Labels.simulated_profile:
    -263            self.calc_centroid(mz, abundance, freq)
    -264
    -265        else: 
    -266            raise Exception("Unknow mass spectrum type", self.label)
    -267
    -268    def find_minima(self, apex_index, abundance, len_abundance, right=True):
    -269        """ Find the minima of a peak.
    -270
    -271        Parameters
    -272        ----------
    -273        apex_index : int
    -274            The index of the peak apex.
    -275        abundance : ndarray
    -276            The abundance values.
    -277        len_abundance : int
    -278            The length of the abundance array.
    -279        right : bool, optional
    -280            Flag indicating whether to search for minima to the right of the apex (default is True).
    -281
    -282        Returns
    -283        -------
    -284        int
    -285            The index of the minima.
    -286
    -287        """
    -288        j = apex_index
    -289        
    -290        if right: minima = abundance[j] > abundance[j+1]
    -291        else: minima = abundance[j] > abundance[j-1]
    -292
    -293        while minima:
    -294            
    -295            if j == 1 or j == len_abundance -2:
    -296                break
    -297            
    -298            if right: 
    -299                j += 1
    -300
    -301                minima = abundance[j] >= abundance[j+1]
    -302
    -303            else: 
    -304                j -= 1
    -305                minima = abundance[j] >= abundance[j-1]
    -306        
    -307        if right: return j
    -308        else: return j
    -309
    -310    @staticmethod
    -311    def linear_fit_calc(intes, massa, index_term, index_sign):
    -312        """
    -313        Algebraic solution to a linear fit - roughly 25-50x faster than numpy polyfit when passing only two vals and doing a 1st order fit
    -314
    -315        Parameters
    -316        ----------
    -317        intes : ndarray
    -318            The intensity values.
    -319        massa : ndarray
    -320            The mass values.
    -321        index_term : int
    -322            The index of the current term.
    -323        index_sign : str
    -324            The index sign
    -325        
    -326        Returns
    -327        -------
    -328        ndarray
    -329            The coefficients of the linear fit.
    -330        
    -331        Notes
    -332        --------
    -333        This is a static method.
    -334        """
    -335        if index_sign == '+':
    -336            x1, x2 = massa[index_term], massa[index_term + 1]
    -337            y1, y2 = intes[index_term], intes[index_term + 1]
    -338        elif index_sign =='-':
    -339            x1, x2 = massa[index_term], massa[index_term - 1]
    -340            y1, y2 = intes[index_term], intes[index_term - 1]
    -341        else:
    -342            warnings.warn('error in linear fit calc, unknown index sign')
    -343        
    -344        # Calculate the slope (m)
    -345        slope = (y2 - y1) / (x2 - x1)
    -346        
    -347        # Calculate the intercept (b)
    -348        intercept = y1 - slope * x1
    -349        
    -350        # The coefficients array would be [slope, intercept]
    -351        coefficients = array([slope, intercept])
    -352        return coefficients
    -353
    -354    def calculate_resolving_power(self, intes, massa, current_index):
    -355        """ Calculate the resolving power of a peak.
    -356
    -357        Parameters
    -358        ----------
    -359        intes : ndarray
    -360            The intensity values.
    -361        massa : ndarray
    -362            The mass values.
    -363        current_index : int
    -364            The index of the current peak.
    -365
    -366        Returns
    -367        -------
    -368        float
    -369            The resolving power of the peak.
    -370
    -371        Notes
    -372        --------
    -373        This is a conservative calculation of resolving power,
    -374        the peak need to be resolved at least at the half-maximum magnitude,
    -375        otherwise, the combined full width at half maximum is used to calculate resolving power.
    +150        (
    +151            mz_domain_X_low_cutoff,
    +152            mz_domain_low_Y_cutoff,
    +153        ) = (
    +154            self.mz_exp_profile[min_start:min_final],
    +155            self.abundance_profile[min_start:min_final],
    +156        )
    +157
    +158        max_final = where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    +159        max_start = where(self.mz_exp_profile < max_picking_mz)[0][0]
    +160
    +161        if self.has_frequency:
    +162            if self.freq_exp_profile.any():
    +163                freq_domain_low_Y_cutoff = self.freq_exp_profile[min_start:min_final]
    +164
    +165                return (
    +166                    mz_domain_X_low_cutoff[max_start:max_final],
    +167                    mz_domain_low_Y_cutoff[max_start:max_final],
    +168                    freq_domain_low_Y_cutoff[max_start:max_final],
    +169                )
    +170
    +171        else:
    +172            return (
    +173                mz_domain_X_low_cutoff[max_start:max_final],
    +174                mz_domain_low_Y_cutoff[max_start:max_final],
    +175                None,
    +176            )
    +177
    +178    @staticmethod
    +179    def extrapolate_axis(initial_array, pts):
    +180        """
    +181        This function will extrapolate an input array in both directions by N pts.
    +182
    +183        Parameters
    +184        ----------
    +185        initial_array : ndarray
    +186            The input array.
    +187        pts : int
    +188            The number of points to extrapolate.
    +189
    +190        Returns
    +191        -------
    +192        ndarray
    +193            The extrapolated array.
    +194
    +195        Notes
    +196        --------
    +197        This is a static method.
    +198        """
    +199        initial_array_len = len(initial_array)
    +200        right_delta = initial_array[-1] - initial_array[-2]
    +201        left_delta = initial_array[1] - initial_array[0]
    +202
    +203        # Create an array with extra space for extrapolation
    +204        pad_array = zeros(initial_array_len + 2 * pts)
    +205
    +206        # Copy original array into the middle of the padded array
    +207        pad_array[pts : pts + initial_array_len] = initial_array
    +208
    +209        # Extrapolate the right side
    +210        for pt in range(pts):
    +211            final_value = initial_array[-1]
    +212            value_to_add = right_delta * (pt + 1)
    +213            new_value = final_value + value_to_add
    +214            pad_array[initial_array_len + pts + pt] = new_value
    +215
    +216        # Extrapolate the left side
    +217        for pt in range(pts):
    +218            first_value = initial_array[0]
    +219            value_to_subtract = left_delta * (pt + 1)
    +220            new_value = first_value - value_to_subtract
    +221            pad_array[pts - pt - 1] = new_value
    +222
    +223        return pad_array
    +224
    +225    def extrapolate_axes_for_pp(self, mz=None, abund=None, freq=None):
    +226        """Extrapolate the m/z axis and fill the abundance axis with 0s.
    +227
    +228        Parameters
    +229        ----------
    +230        mz : ndarray or None
    +231            The m/z axis, if available. If None, the experimental m/z axis is used.
    +232        abund : ndarray or None
    +233            The abundance axis, if available. If None, the experimental abundance axis is used.
    +234        freq : ndarray or None
    +235            The frequency axis, if available. If None, the experimental frequency axis is used.
    +236
    +237        Returns
    +238        -------
    +239        mz : ndarray
    +240            The extrapolated m/z axis.
    +241        abund : ndarray
    +242            The abundance axis with 0s filled.
    +243        freq : ndarray or None
    +244            The extrapolated frequency axis, if available.
    +245
    +246        Notes
    +247        --------
    +248        This function will extrapolate the mz axis by the number of datapoints specified in the settings,
    +249        and fill the abundance axis with 0s.
    +250        This should prevent peak picking issues at the spectrum edge.
    +251
    +252        """
    +253        # Check if the input arrays are provided
    +254        if mz is None or abund is None:
    +255            mz, abund = self.mz_exp_profile, self.abundance_profile
    +256            if self.has_frequency:
    +257                freq = self.freq_exp_profile
    +258            else:
    +259                freq = None
    +260        pts = self.settings.picking_point_extrapolate
    +261        if pts == 0:
    +262            return mz, abund, freq
    +263
    +264        mz = self.extrapolate_axis(mz, pts)
    +265        abund = pad(abund, (pts, pts), mode="constant", constant_values=(0, 0))
    +266        if freq is not None:
    +267            freq = self.extrapolate_axis(freq, pts)
    +268        return mz, abund, freq
    +269
    +270    def do_peak_picking(self):
    +271        """Perform peak picking."""
    +272        mz, abundance, freq = self.prepare_peak_picking_data()
    +273
    +274        if (
    +275            self.label == Labels.bruker_frequency
    +276            or self.label == Labels.midas_frequency
    +277        ):
    +278            self.calc_centroid(mz, abundance, freq)
    +279
    +280        elif self.label == Labels.thermo_profile:
    +281            self.calc_centroid(mz, abundance, freq)
    +282
    +283        elif self.label == Labels.bruker_profile:
    +284            self.calc_centroid(mz, abundance, freq)
    +285
    +286        elif self.label == Labels.booster_profile:
    +287            self.calc_centroid(mz, abundance, freq)
    +288
    +289        elif self.label == Labels.simulated_profile:
    +290            self.calc_centroid(mz, abundance, freq)
    +291
    +292        else:
    +293            raise Exception("Unknow mass spectrum type", self.label)
    +294
    +295    def find_minima(self, apex_index, abundance, len_abundance, right=True):
    +296        """Find the minima of a peak.
    +297
    +298        Parameters
    +299        ----------
    +300        apex_index : int
    +301            The index of the peak apex.
    +302        abundance : ndarray
    +303            The abundance values.
    +304        len_abundance : int
    +305            The length of the abundance array.
    +306        right : bool, optional
    +307            Flag indicating whether to search for minima to the right of the apex (default is True).
    +308
    +309        Returns
    +310        -------
    +311        int
    +312            The index of the minima.
    +313
    +314        """
    +315        j = apex_index
    +316
    +317        if right:
    +318            minima = abundance[j] > abundance[j + 1]
    +319        else:
    +320            minima = abundance[j] > abundance[j - 1]
    +321
    +322        while minima:
    +323            if j == 1 or j == len_abundance - 2:
    +324                break
    +325
    +326            if right:
    +327                j += 1
    +328
    +329                minima = abundance[j] >= abundance[j + 1]
    +330
    +331            else:
    +332                j -= 1
    +333                minima = abundance[j] >= abundance[j - 1]
    +334
    +335        if right:
    +336            return j
    +337        else:
    +338            return j
    +339
    +340    @staticmethod
    +341    def linear_fit_calc(intes, massa, index_term, index_sign):
    +342        """
    +343        Algebraic solution to a linear fit - roughly 25-50x faster than numpy polyfit when passing only two vals and doing a 1st order fit
    +344
    +345        Parameters
    +346        ----------
    +347        intes : ndarray
    +348            The intensity values.
    +349        massa : ndarray
    +350            The mass values.
    +351        index_term : int
    +352            The index of the current term.
    +353        index_sign : str
    +354            The index sign
    +355
    +356        Returns
    +357        -------
    +358        ndarray
    +359            The coefficients of the linear fit.
    +360
    +361        Notes
    +362        --------
    +363        This is a static method.
    +364        """
    +365        if index_sign == "+":
    +366            x1, x2 = massa[index_term], massa[index_term + 1]
    +367            y1, y2 = intes[index_term], intes[index_term + 1]
    +368        elif index_sign == "-":
    +369            x1, x2 = massa[index_term], massa[index_term - 1]
    +370            y1, y2 = intes[index_term], intes[index_term - 1]
    +371        else:
    +372            warnings.warn("error in linear fit calc, unknown index sign")
    +373
    +374        # Calculate the slope (m)
    +375        slope = (y2 - y1) / (x2 - x1)
     376
    -377        """
    -378
    -379        peak_height = intes[current_index]
    -380        target_peak_height = peak_height/2
    -381
    -382        peak_height_minus = peak_height
    -383        peak_height_plus = peak_height
    -384        
    -385        # There are issues when a peak is at the high or low limit of a spectrum in finding its local minima and maxima
    -386        # This solution will return nan for resolving power when a peak is possibly too close to an edge to avoid the issue
    -387        
    -388        if current_index <5:
    -389            warnings.warn("peak at low spectrum edge, returning no resolving power")
    -390            return nan
    -391        elif abs(current_index-len(intes))<5:
    -392            warnings.warn("peak at high spectrum edge, returning no resolving power")
    -393            return nan
    -394        else:
    -395            pass
    -396
    -397        index_minus = current_index
    -398        while peak_height_minus  >= target_peak_height:
    -399
    -400            index_minus = index_minus -1
    -401            if index_minus < 0:
    -402                warnings.warn('Res. calc. warning - peak index minus adjacent to spectrum edge \n \
    -403                        Zeroing the first 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported \n \
    -404                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)')
    -405                # Pad the first 5 data points with zeros and restart the loop
    -406                intes[:5] = 0
    -407                peak_height_minus = target_peak_height
    -408                index_minus = current_index            
    -409            else:
    -410                peak_height_minus = intes[index_minus]
    +377        # Calculate the intercept (b)
    +378        intercept = y1 - slope * x1
    +379
    +380        # The coefficients array would be [slope, intercept]
    +381        coefficients = array([slope, intercept])
    +382        return coefficients
    +383
    +384    def calculate_resolving_power(self, intes, massa, current_index):
    +385        """Calculate the resolving power of a peak.
    +386
    +387        Parameters
    +388        ----------
    +389        intes : ndarray
    +390            The intensity values.
    +391        massa : ndarray
    +392            The mass values.
    +393        current_index : int
    +394            The index of the current peak.
    +395
    +396        Returns
    +397        -------
    +398        float
    +399            The resolving power of the peak.
    +400
    +401        Notes
    +402        --------
    +403        This is a conservative calculation of resolving power,
    +404        the peak need to be resolved at least at the half-maximum magnitude,
    +405        otherwise, the combined full width at half maximum is used to calculate resolving power.
    +406
    +407        """
    +408
    +409        peak_height = intes[current_index]
    +410        target_peak_height = peak_height / 2
     411
    -412        if self.mspeaks_settings.legacy_centroid_polyfit:
    -413            x = [ massa[index_minus],  massa[index_minus+1]]
    -414            y = [ intes[index_minus],  intes[index_minus+1]]
    -415            coefficients = polyfit(x, y, 1)
    -416        else:
    -417            coefficients = self.linear_fit_calc(intes, massa, index_minus,index_sign='+')
    -418
    -419        a = coefficients[0]
    -420        b = coefficients[1]
    -421        if self.mspeaks_settings.legacy_resolving_power:
    -422            y_intercept =  intes[index_minus] + ((intes[index_minus+1] - intes[index_minus])/2)
    -423        else:
    -424            y_intercept =  target_peak_height
    -425        massa1 = (y_intercept -b)/a
    +412        peak_height_minus = peak_height
    +413        peak_height_plus = peak_height
    +414
    +415        # There are issues when a peak is at the high or low limit of a spectrum in finding its local minima and maxima
    +416        # This solution will return nan for resolving power when a peak is possibly too close to an edge to avoid the issue
    +417
    +418        if current_index < 5:
    +419            warnings.warn("peak at low spectrum edge, returning no resolving power")
    +420            return nan
    +421        elif abs(current_index - len(intes)) < 5:
    +422            warnings.warn("peak at high spectrum edge, returning no resolving power")
    +423            return nan
    +424        else:
    +425            pass
     426
    -427        index_plus = current_index
    -428        while peak_height_plus  >= target_peak_height:
    -429
    -430            index_plus = index_plus + 1
    -431               
    -432            try: 
    -433                peak_height_plus = intes[index_plus]
    -434            except IndexError:
    -435                warnings.warn('Res. calc. warning - peak index plus adjacent to spectrum edge \n \
    -436                        Zeroing the last 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported\
    -437                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)')
    -438                # Pad the first 5 data points with zeros and restart the loop
    -439                intes[-5:] = 0
    -440                peak_height_plus = target_peak_height
    -441                index_plus = current_index 
    +427        index_minus = current_index
    +428        while peak_height_minus >= target_peak_height:
    +429            index_minus = index_minus - 1
    +430            if index_minus < 0:
    +431                warnings.warn(
    +432                    "Res. calc. warning - peak index minus adjacent to spectrum edge \n \
    +433                        Zeroing the first 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported \n \
    +434                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)"
    +435                )
    +436                # Pad the first 5 data points with zeros and restart the loop
    +437                intes[:5] = 0
    +438                peak_height_minus = target_peak_height
    +439                index_minus = current_index
    +440            else:
    +441                peak_height_minus = intes[index_minus]
     442
     443        if self.mspeaks_settings.legacy_centroid_polyfit:
    -444            x = [massa[index_plus],  massa[index_plus - 1]]
    -445            y = [intes[index_plus],  intes[index_plus - 1]]
    +444            x = [massa[index_minus], massa[index_minus + 1]]
    +445            y = [intes[index_minus], intes[index_minus + 1]]
     446            coefficients = polyfit(x, y, 1)
     447        else:
    -448            coefficients = self.linear_fit_calc(intes, massa, index_plus,index_sign='-')
    -449
    -450        a = coefficients[0]
    -451        b = coefficients[1]
    -452
    -453        if self.mspeaks_settings.legacy_resolving_power:
    -454            y_intercept =  intes[index_plus - 1] + ((intes[index_plus] - intes[index_plus - 1])/2)
    -455        else:
    -456            y_intercept =  target_peak_height
    -457
    -458        massa2 = (y_intercept -b)/a
    -459
    -460        if massa1 > massa2:
    +448            coefficients = self.linear_fit_calc(
    +449                intes, massa, index_minus, index_sign="+"
    +450            )
    +451
    +452        a = coefficients[0]
    +453        b = coefficients[1]
    +454        if self.mspeaks_settings.legacy_resolving_power:
    +455            y_intercept = intes[index_minus] + (
    +456                (intes[index_minus + 1] - intes[index_minus]) / 2
    +457            )
    +458        else:
    +459            y_intercept = target_peak_height
    +460        massa1 = (y_intercept - b) / a
     461
    -462            resolvingpower =  massa[current_index]/(massa1-massa2)
    -463
    -464        else:
    +462        index_plus = current_index
    +463        while peak_height_plus >= target_peak_height:
    +464            index_plus = index_plus + 1
     465
    -466            resolvingpower =  massa[current_index]/(massa2-massa1)
    -467
    -468        return resolvingpower
    -469
    -470    def cal_minima(self, mass, abun):
    -471        """ Calculate the minima of a peak.
    -472
    -473        Parameters
    -474        ----------
    -475        mass : ndarray
    -476            The mass values.
    -477        abun : ndarray
    -478            The abundance values.
    -479
    -480        Returns
    -481        -------
    -482        ndarray or None
    -483            The mass values at the minima, if found.
    -484
    -485        """
    -486        abun = -abun
    +466            try:
    +467                peak_height_plus = intes[index_plus]
    +468            except IndexError:
    +469                warnings.warn(
    +470                    "Res. calc. warning - peak index plus adjacent to spectrum edge \n \
    +471                        Zeroing the last 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported\
    +472                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)"
    +473                )
    +474                # Pad the first 5 data points with zeros and restart the loop
    +475                intes[-5:] = 0
    +476                peak_height_plus = target_peak_height
    +477                index_plus = current_index
    +478
    +479        if self.mspeaks_settings.legacy_centroid_polyfit:
    +480            x = [massa[index_plus], massa[index_plus - 1]]
    +481            y = [intes[index_plus], intes[index_plus - 1]]
    +482            coefficients = polyfit(x, y, 1)
    +483        else:
    +484            coefficients = self.linear_fit_calc(
    +485                intes, massa, index_plus, index_sign="-"
    +486            )
     487
    -488        dy = abun[1:] - abun[:-1]
    -489        
    -490        # replaces nan for infinity
    -491        indices_nan = where(isnan(abun))[0]
    -492        
    -493        if indices_nan.size:
    -494            
    -495            abun[indices_nan] = inf
    -496            dy[where(isnan(dy))[0]] = inf
    -497        
    -498        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +488        a = coefficients[0]
    +489        b = coefficients[1]
    +490
    +491        if self.mspeaks_settings.legacy_resolving_power:
    +492            y_intercept = intes[index_plus - 1] + (
    +493                (intes[index_plus] - intes[index_plus - 1]) / 2
    +494            )
    +495        else:
    +496            y_intercept = target_peak_height
    +497
    +498        massa2 = (y_intercept - b) / a
     499
    -500        if indexes.size:
    -501            
    -502            return mass[indexes], abun[indexes]
    -503    
    -504    def calc_centroid(self, mass, abund, freq):
    -505        """ Calculate the centroid of a peak.
    -506
    -507        Parameters
    -508        ----------
    -509        mass : ndarray
    -510            The mass values.
    -511        abund : ndarray
    -512            The abundance values.
    -513        freq : ndarray or None
    -514            The frequency values, if available.
    -515
    -516        Returns
    -517        -------
    -518        None
    -519
    -520        """
    -521        
    -522        max_height = self.mspeaks_settings.peak_height_max_percent
    -523        max_prominence = self.mspeaks_settings.peak_max_prominence_percent
    -524        min_peak_datapoints = self.mspeaks_settings.min_peak_datapoints
    -525        peak_derivative_threshold = self.mspeaks_settings.peak_derivative_threshold
    -526        max_abun = max(abund)
    -527        peak_height_diff = lambda hi, li : ((abund[hi] - abund[li]) / max_abun ) * 100
    -528                    
    -529        domain = mass
    -530        signal = abund
    -531        len_signal = len(signal)
    -532        
    -533        signal_threshold, factor = self.get_threshold(abund)
    -534        max_signal = factor
    -535
    -536        correct_baseline = False
    -537
    -538        include_indexes = sp.peak_picking_first_derivative(domain, signal, max_height, max_prominence, max_signal, 
    -539                                                           min_peak_datapoints,
    -540                                                           peak_derivative_threshold,
    -541                                                           signal_threshold=signal_threshold, 
    -542                                                           correct_baseline=correct_baseline, 
    -543                                                           abun_norm=1,
    -544                                                           plot_res=False)
    -545
    -546        for indexes_tuple in include_indexes:
    -547            
    -548            apex_index = indexes_tuple[1]
    -549
    -550            peak_indexes = self.check_prominence(abund, apex_index, len_signal, peak_height_diff )
    +500        if massa1 > massa2:
    +501            resolvingpower = massa[current_index] / (massa1 - massa2)
    +502
    +503        else:
    +504            resolvingpower = massa[current_index] / (massa2 - massa1)
    +505
    +506        return resolvingpower
    +507
    +508    def cal_minima(self, mass, abun):
    +509        """Calculate the minima of a peak.
    +510
    +511        Parameters
    +512        ----------
    +513        mass : ndarray
    +514            The mass values.
    +515        abun : ndarray
    +516            The abundance values.
    +517
    +518        Returns
    +519        -------
    +520        ndarray or None
    +521            The mass values at the minima, if found.
    +522
    +523        """
    +524        abun = -abun
    +525
    +526        dy = abun[1:] - abun[:-1]
    +527
    +528        # replaces nan for infinity
    +529        indices_nan = where(isnan(abun))[0]
    +530
    +531        if indices_nan.size:
    +532            abun[indices_nan] = inf
    +533            dy[where(isnan(dy))[0]] = inf
    +534
    +535        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +536
    +537        if indexes.size:
    +538            return mass[indexes], abun[indexes]
    +539
    +540    def calc_centroid(self, mass, abund, freq):
    +541        """Calculate the centroid of a peak.
    +542
    +543        Parameters
    +544        ----------
    +545        mass : ndarray
    +546            The mass values.
    +547        abund : ndarray
    +548            The abundance values.
    +549        freq : ndarray or None
    +550            The frequency values, if available.
     551
    -552            if peak_indexes:
    -553                
    -554                mz_exp_centroid, freq_centr, intes_centr = self.find_apex_fit_quadratic(mass, abund, freq, apex_index)
    +552        Returns
    +553        -------
    +554        None
     555
    -556                if mz_exp_centroid:
    -557                                   
    -558                    peak_resolving_power = self.calculate_resolving_power( abund, mass, apex_index)
    -559                    s2n = intes_centr/self.baseline_noise_std
    -560                    self.add_mspeak(self.polarity, mz_exp_centroid, abund[apex_index] , peak_resolving_power, s2n, indexes_tuple, exp_freq=freq_centr, ms_parent=self)
    -561                #pyplot.plot(domain[start_index: final_index + 1], signal[start_index:final_index + 1], c='black')
    -562                #pyplot.show()
    -563                
    -564    def get_threshold(self, intes):
    -565        """ Get the intensity threshold for peak picking.
    -566
    -567        Parameters
    -568        ----------
    -569        intes : ndarray
    -570            The intensity values.
    +556        """
    +557
    +558        max_height = self.mspeaks_settings.peak_height_max_percent
    +559        max_prominence = self.mspeaks_settings.peak_max_prominence_percent
    +560        min_peak_datapoints = self.mspeaks_settings.min_peak_datapoints
    +561        peak_derivative_threshold = self.mspeaks_settings.peak_derivative_threshold
    +562        max_abun = max(abund)
    +563        peak_height_diff = lambda hi, li: ((abund[hi] - abund[li]) / max_abun) * 100
    +564
    +565        domain = mass
    +566        signal = abund
    +567        len_signal = len(signal)
    +568
    +569        signal_threshold, factor = self.get_threshold(abund)
    +570        max_signal = factor
     571
    -572        Returns
    -573        -------
    -574        float
    -575            The intensity threshold.
    -576        float
    -577            The factor to multiply the intensity threshold by.
    -578        """
    -579                
    -580        intes = array(intes).astype(float)
    -581       
    -582        noise_threshold_method = self.settings.noise_threshold_method
    -583
    -584        if noise_threshold_method == 'minima':
    -585            
    -586            if self.is_centroid:
    -587                warn("Auto threshould is disabled for centroid data, returning 0")
    -588                factor = 1
    -589                abundance_threshold = 1e-20
    -590            #print(self.settings.noise_threshold_min_std)
    -591            else:
    -592                abundance_threshold = self.baseline_noise + (self.settings.noise_threshold_min_std * self.baseline_noise_std)
    -593                factor = 1
    +572        correct_baseline = False
    +573
    +574        include_indexes = sp.peak_picking_first_derivative(
    +575            domain,
    +576            signal,
    +577            max_height,
    +578            max_prominence,
    +579            max_signal,
    +580            min_peak_datapoints,
    +581            peak_derivative_threshold,
    +582            signal_threshold=signal_threshold,
    +583            correct_baseline=correct_baseline,
    +584            abun_norm=1,
    +585            plot_res=False,
    +586        )
    +587
    +588        for indexes_tuple in include_indexes:
    +589            apex_index = indexes_tuple[1]
    +590
    +591            peak_indexes = self.check_prominence(
    +592                abund, apex_index, len_signal, peak_height_diff
    +593            )
     594
    -595        elif noise_threshold_method == 'signal_noise':
    -596
    -597            abundance_threshold = self.settings.noise_threshold_min_s2n
    -598            if self.is_centroid:
    -599                factor = 1
    -600            else:
    -601                factor = self.baseline_noise_std
    -602
    -603        elif noise_threshold_method == "relative_abundance":
    -604
    -605            abundance_threshold = self.settings.noise_threshold_min_relative_abundance
    -606            factor = intes.max()/100
    -607
    -608        elif noise_threshold_method == "absolute_abundance":
    -609
    -610            abundance_threshold = self.settings.noise_threshold_absolute_abundance
    -611            factor = 1
    -612
    -613        elif noise_threshold_method == 'log':
    -614            if self.is_centroid:
    -615                raise  Exception("log noise Not tested for centroid data")
    -616            abundance_threshold = self.settings.noise_threshold_log_nsigma
    -617            factor = self.baseline_noise_std
    -618
    -619        else:
    -620            raise  Exception("%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class" % noise_threshold_method)
    -621        
    -622        return abundance_threshold, factor
    -623    
    -624    @staticmethod
    -625    def algebraic_quadratic(list_mass, list_y):
    -626        """
    -627        Find the apex of a peak - algebraically. 
    -628        Faster than using numpy polyfit by ~28x per fit.
    -629
    -630        Parameters
    -631        ----------
    -632        list_mass : ndarray
    -633            list of m/z values (3 points)
    -634        list_y : ndarray
    -635            list of abundance values (3 points)
    -636
    -637        Returns
    -638        -------
    -639        a, b, c: float
    -640            coefficients of the quadratic equation.
    -641
    -642        Notes
    -643        --------
    -644        This is a static method. 
    -645        """
    -646        x_1, x_2, x_3 = list_mass
    -647        y_1, y_2, y_3 = list_y 
    -648
    -649        a = y_1/((x_1-x_2)*(x_1-x_3)) + y_2/((x_2-x_1)*(x_2-x_3)) + y_3/((x_3-x_1)*(x_3-x_2))
    -650
    -651        b = (-y_1*(x_2+x_3)/((x_1-x_2)*(x_1-x_3))
    -652            -y_2*(x_1+x_3)/((x_2-x_1)*(x_2-x_3))
    -653            -y_3*(x_1+x_2)/((x_3-x_1)*(x_3-x_2)))
    -654        
    -655        c = (y_1*x_2*x_3/((x_1-x_2)*(x_1-x_3))
    -656            +y_2*x_1*x_3/((x_2-x_1)*(x_2-x_3))
    -657            +y_3*x_1*x_2/((x_3-x_1)*(x_3-x_2)))
    -658        return a, b, c
    -659
    -660    def find_apex_fit_quadratic(self, mass, abund, freq, current_index):
    -661        """ 
    -662        Find the apex of a peak.
    -663        
    -664        Parameters
    -665        ----------
    -666        mass : ndarray
    -667            The mass values.
    -668        abund : ndarray
    -669            The abundance values.
    -670        freq : ndarray or None  
    -671            The frequency values, if available.
    -672        current_index : int
    -673            The index of the current peak.
    -674        
    -675
    -676        Returns
    -677        -------
    -678        float
    -679            The m/z value of the peak apex.
    -680        float
    -681            The frequency value of the peak apex, if available.
    -682        float
    -683            The abundance value of the peak apex.
    -684        
    -685        """
    -686        # calc prominence
    -687        #peak_indexes = self.check_prominence(abund, current_index, len_abundance, peak_height_diff )
    -688        
    -689        #if not peak_indexes:        
    -690            
    -691        #    return None, None, None, None           
    -692        
    -693        #else:    
    -694            
    -695        # fit parabola to three most abundant datapoints
    -696        list_mass = [mass[current_index - 1], mass[current_index], mass[current_index +1]]
    -697        list_y = [abund[current_index - 1],abund[current_index], abund[current_index +1]]
    -698        
    -699        if self.mspeaks_settings.legacy_centroid_polyfit:
    -700            z = polyfit(list_mass, list_y, 2)
    -701            a = z[0]
    -702            b = z[1]
    -703        else:
    -704            a, b, c = self.algebraic_quadratic(list_mass, list_y)
    +595            if peak_indexes:
    +596                mz_exp_centroid, freq_centr, intes_centr = self.find_apex_fit_quadratic(
    +597                    mass, abund, freq, apex_index
    +598                )
    +599
    +600                if mz_exp_centroid:
    +601                    peak_resolving_power = self.calculate_resolving_power(
    +602                        abund, mass, apex_index
    +603                    )
    +604                    s2n = intes_centr / self.baseline_noise_std
    +605                    self.add_mspeak(
    +606                        self.polarity,
    +607                        mz_exp_centroid,
    +608                        abund[apex_index],
    +609                        peak_resolving_power,
    +610                        s2n,
    +611                        indexes_tuple,
    +612                        exp_freq=freq_centr,
    +613                        ms_parent=self,
    +614                    )
    +615                # pyplot.plot(domain[start_index: final_index + 1], signal[start_index:final_index + 1], c='black')
    +616                # pyplot.show()
    +617
    +618    def get_threshold(self, intes):
    +619        """Get the intensity threshold for peak picking.
    +620
    +621        Parameters
    +622        ----------
    +623        intes : ndarray
    +624            The intensity values.
    +625
    +626        Returns
    +627        -------
    +628        float
    +629            The intensity threshold.
    +630        float
    +631            The factor to multiply the intensity threshold by.
    +632        """
    +633
    +634        intes = array(intes).astype(float)
    +635
    +636        noise_threshold_method = self.settings.noise_threshold_method
    +637
    +638        if noise_threshold_method == "minima":
    +639            if self.is_centroid:
    +640                warnings.warn(
    +641                    "Auto threshould is disabled for centroid data, returning 0"
    +642                )
    +643                factor = 1
    +644                abundance_threshold = 1e-20
    +645            # print(self.settings.noise_threshold_min_std)
    +646            else:
    +647                abundance_threshold = self.baseline_noise + (
    +648                    self.settings.noise_threshold_min_std * self.baseline_noise_std
    +649                )
    +650                factor = 1
    +651
    +652        elif noise_threshold_method == "signal_noise":
    +653            abundance_threshold = self.settings.noise_threshold_min_s2n
    +654            if self.is_centroid:
    +655                factor = 1
    +656            else:
    +657                factor = self.baseline_noise_std
    +658
    +659        elif noise_threshold_method == "relative_abundance":
    +660            abundance_threshold = self.settings.noise_threshold_min_relative_abundance
    +661            factor = intes.max() / 100
    +662
    +663        elif noise_threshold_method == "absolute_abundance":
    +664            abundance_threshold = self.settings.noise_threshold_absolute_abundance
    +665            factor = 1
    +666
    +667        elif noise_threshold_method == "log":
    +668            if self.is_centroid:
    +669                raise Exception("log noise Not tested for centroid data")
    +670            abundance_threshold = self.settings.noise_threshold_log_nsigma
    +671            factor = self.baseline_noise_std
    +672
    +673        else:
    +674            raise Exception(
    +675                "%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class"
    +676                % noise_threshold_method
    +677            )
    +678
    +679        return abundance_threshold, factor
    +680
    +681    @staticmethod
    +682    def algebraic_quadratic(list_mass, list_y):
    +683        """
    +684        Find the apex of a peak - algebraically.
    +685        Faster than using numpy polyfit by ~28x per fit.
    +686
    +687        Parameters
    +688        ----------
    +689        list_mass : ndarray
    +690            list of m/z values (3 points)
    +691        list_y : ndarray
    +692            list of abundance values (3 points)
    +693
    +694        Returns
    +695        -------
    +696        a, b, c: float
    +697            coefficients of the quadratic equation.
    +698
    +699        Notes
    +700        --------
    +701        This is a static method.
    +702        """
    +703        x_1, x_2, x_3 = list_mass
    +704        y_1, y_2, y_3 = list_y
     705
    -706
    -707        calculated = -b/(2*a)
    -708        
    -709        if calculated < 1 or int(calculated) != int(list_mass[1]):
    -710
    -711            mz_exp_centroid = list_mass[1]
    -712        
    -713        else:
    -714            
    -715            mz_exp_centroid = calculated 
    -716        
    -717        if self.label == Labels.bruker_frequency or self.label == Labels.midas_frequency:
    -718            
    -719            # fit parabola to three most abundant frequency datapoints
    -720            list_freq = [freq[current_index - 1], freq[current_index], freq[current_index +1]]
    -721            if self.mspeaks_settings.legacy_centroid_polyfit:
    -722                z = polyfit(list_mass, list_y, 2)
    -723                a = z[0]
    -724                b = z[1]
    -725            else:
    -726                a, b, c = self.algebraic_quadratic(list_mass, list_y)
    -727            
    -728            calculated_freq = -b/(2*a)
    -729
    -730            if calculated_freq < 1 or int(calculated_freq) != freq[current_index]:
    -731                freq_centr = list_freq[1]
    -732
    -733            else:
    -734                freq_centr = calculated_freq
    -735        
    -736        else:
    -737                freq_centr = None
    -738
    -739        if self.mspeaks_settings.legacy_centroid_polyfit:
    -740            abundance_centroid = abund[current_index]
    -741        else: 
    -742            abundance_centroid = a*mz_exp_centroid**2 + b*mz_exp_centroid + c
    -743
    -744        return mz_exp_centroid, freq_centr, abundance_centroid 
    -745    
    -746    def check_prominence(self, abun, current_index, len_abundance, peak_height_diff ) -> tuple or False:
    -747        """ Check the prominence of a peak.
    -748        
    -749        Parameters
    -750        ----------
    -751        abun : ndarray
    -752            The abundance values.
    -753        current_index : int
    -754            The index of the current peak.
    -755        len_abundance : int
    -756            The length of the abundance array.
    -757        peak_height_diff : function
    -758            The function to calculate the peak height difference.
    -759        
    -760        Returns
    -761        -------
    -762        tuple or False
    -763            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    -764            Otherwise, False.
    -765        
    -766        """
    -767
    -768        final_index = self.find_minima(current_index, abun, len_abundance, right=True)
    -769            
    -770        start_index = self.find_minima(current_index, abun, len_abundance, right=False)
    -771            
    -772        peak_indexes = (current_index-1, current_index, current_index+1)
    -773
    -774        if min( peak_height_diff(current_index,start_index), peak_height_diff(current_index,final_index) ) >  self.mspeaks_settings.peak_min_prominence_percent :   
    -775            
    -776            return peak_indexes
    -777        
    -778        else:
    -779            
    -780            return False
    -781
    -782    def use_the_max(self, mass, abund, current_index, len_abundance, peak_height_diff):
    -783        """ Use the max peak height as the centroid
    -784        
    -785        Parameters
    -786        ----------
    -787        mass : ndarray
    -788            The mass values.
    -789        abund : ndarray
    -790            The abundance values.
    -791        current_index : int
    -792            The index of the current peak.
    -793        len_abundance : int
    -794            The length of the abundance array.
    -795        peak_height_diff : function
    -796            The function to calculate the peak height difference.
    -797        
    -798        Returns
    -799        -------
    -800        float
    -801            The m/z value of the peak apex.
    -802        float
    -803            The abundance value of the peak apex.
    -804        tuple or None
    -805            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    -806            Otherwise, None.
    -807        """
    +706        a = (
    +707            y_1 / ((x_1 - x_2) * (x_1 - x_3))
    +708            + y_2 / ((x_2 - x_1) * (x_2 - x_3))
    +709            + y_3 / ((x_3 - x_1) * (x_3 - x_2))
    +710        )
    +711
    +712        b = (
    +713            -y_1 * (x_2 + x_3) / ((x_1 - x_2) * (x_1 - x_3))
    +714            - y_2 * (x_1 + x_3) / ((x_2 - x_1) * (x_2 - x_3))
    +715            - y_3 * (x_1 + x_2) / ((x_3 - x_1) * (x_3 - x_2))
    +716        )
    +717
    +718        c = (
    +719            y_1 * x_2 * x_3 / ((x_1 - x_2) * (x_1 - x_3))
    +720            + y_2 * x_1 * x_3 / ((x_2 - x_1) * (x_2 - x_3))
    +721            + y_3 * x_1 * x_2 / ((x_3 - x_1) * (x_3 - x_2))
    +722        )
    +723        return a, b, c
    +724
    +725    def find_apex_fit_quadratic(self, mass, abund, freq, current_index):
    +726        """
    +727        Find the apex of a peak.
    +728
    +729        Parameters
    +730        ----------
    +731        mass : ndarray
    +732            The mass values.
    +733        abund : ndarray
    +734            The abundance values.
    +735        freq : ndarray or None
    +736            The frequency values, if available.
    +737        current_index : int
    +738            The index of the current peak.
    +739
    +740
    +741        Returns
    +742        -------
    +743        float
    +744            The m/z value of the peak apex.
    +745        float
    +746            The frequency value of the peak apex, if available.
    +747        float
    +748            The abundance value of the peak apex.
    +749
    +750        """
    +751        # calc prominence
    +752        # peak_indexes = self.check_prominence(abund, current_index, len_abundance, peak_height_diff )
    +753
    +754        # if not peak_indexes:
    +755
    +756        #    return None, None, None, None
    +757
    +758        # else:
    +759
    +760        # fit parabola to three most abundant datapoints
    +761        list_mass = [
    +762            mass[current_index - 1],
    +763            mass[current_index],
    +764            mass[current_index + 1],
    +765        ]
    +766        list_y = [
    +767            abund[current_index - 1],
    +768            abund[current_index],
    +769            abund[current_index + 1],
    +770        ]
    +771
    +772        if self.mspeaks_settings.legacy_centroid_polyfit:
    +773            z = polyfit(list_mass, list_y, 2)
    +774            a = z[0]
    +775            b = z[1]
    +776        else:
    +777            a, b, c = self.algebraic_quadratic(list_mass, list_y)
    +778
    +779        calculated = -b / (2 * a)
    +780
    +781        if calculated < 1 or int(calculated) != int(list_mass[1]):
    +782            mz_exp_centroid = list_mass[1]
    +783
    +784        else:
    +785            mz_exp_centroid = calculated
    +786
    +787        if (
    +788            self.label == Labels.bruker_frequency
    +789            or self.label == Labels.midas_frequency
    +790        ):
    +791            # fit parabola to three most abundant frequency datapoints
    +792            list_freq = [
    +793                freq[current_index - 1],
    +794                freq[current_index],
    +795                freq[current_index + 1],
    +796            ]
    +797            if self.mspeaks_settings.legacy_centroid_polyfit:
    +798                z = polyfit(list_mass, list_y, 2)
    +799                a = z[0]
    +800                b = z[1]
    +801            else:
    +802                a, b, c = self.algebraic_quadratic(list_mass, list_y)
    +803
    +804            calculated_freq = -b / (2 * a)
    +805
    +806            if calculated_freq < 1 or int(calculated_freq) != freq[current_index]:
    +807                freq_centr = list_freq[1]
     808
    -809        peak_indexes = self.check_prominence(abund, current_index, len_abundance, peak_height_diff )
    -810        
    -811        if not peak_indexes:        
    -812
    -813            return None, None, None
    -814        
    -815        else:    
    -816            
    -817            return mass[current_index], abund[current_index], peak_indexes
    -818
    -819    def calc_centroid_legacy(self, mass, abund, freq):
    -820        """ Legacy centroid calculation
    -821        Deprecated - for deletion.
    -822        
    -823        """
    -824        warnings.warn("Legacy centroid calculation is deprecated. Please use the new centroid calculation method.")
    -825        pass
    -826        if False:
    -827            len_abundance = len(abund)
    -828            
    -829            max_abundance = max(abund)
    -830            
    -831            peak_height_diff = lambda hi, li : ((abund[hi] - abund[li]) / max_abundance )*100
    -832
    -833            abundance_threshold, factor = self.get_threshold(abund)
    -834            #print(abundance_threshold, factor)
    -835            # find indices of all peaks
    -836            dy = abund[1:] - abund[:-1]
    -837            
    -838            #replaces nan for infi nity
    -839            indices_nan = where(isnan(abund))[0]
    -840            
    -841            if indices_nan.size:
    -842                
    -843                abund[indices_nan] = inf
    -844                dy[where(isnan(dy))[0]] = inf
    -845            
    -846            indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    -847            
    -848            # noise threshold
    -849            if indexes.size and abundance_threshold is not None:
    -850                indexes = indexes[abund[indexes]/factor >= abundance_threshold]
    -851            # filter out 'peaks' within 3 points of the spectrum limits
    -852            #remove entries within 3 points of upper limit
    -853            indexes = [x for x in indexes if (len_abundance-x)>3]
    -854            #remove entries within 3 points of zero
    -855            indexes = [x for x in indexes if x>3]
    -856        
    -857            for current_index in indexes: 
    -858                
    -859                if self.label == Labels.simulated_profile: 
    +809            else:
    +810                freq_centr = calculated_freq
    +811
    +812        else:
    +813            freq_centr = None
    +814
    +815        if self.mspeaks_settings.legacy_centroid_polyfit:
    +816            abundance_centroid = abund[current_index]
    +817        else:
    +818            abundance_centroid = a * mz_exp_centroid**2 + b * mz_exp_centroid + c
    +819
    +820        return mz_exp_centroid, freq_centr, abundance_centroid
    +821
    +822    def check_prominence(
    +823        self, abun, current_index, len_abundance, peak_height_diff
    +824    ) -> tuple or False:
    +825        """Check the prominence of a peak.
    +826
    +827        Parameters
    +828        ----------
    +829        abun : ndarray
    +830            The abundance values.
    +831        current_index : int
    +832            The index of the current peak.
    +833        len_abundance : int
    +834            The length of the abundance array.
    +835        peak_height_diff : function
    +836            The function to calculate the peak height difference.
    +837
    +838        Returns
    +839        -------
    +840        tuple or False
    +841            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    +842            Otherwise, False.
    +843
    +844        """
    +845
    +846        final_index = self.find_minima(current_index, abun, len_abundance, right=True)
    +847
    +848        start_index = self.find_minima(current_index, abun, len_abundance, right=False)
    +849
    +850        peak_indexes = (current_index - 1, current_index, current_index + 1)
    +851
    +852        if (
    +853            min(
    +854                peak_height_diff(current_index, start_index),
    +855                peak_height_diff(current_index, final_index),
    +856            )
    +857            > self.mspeaks_settings.peak_min_prominence_percent
    +858        ):
    +859            return peak_indexes
     860
    -861                    mz_exp_centroid, intes_centr, peak_indexes = self.use_the_max(mass, abund, current_index, len_abundance, peak_height_diff)
    -862                    if mz_exp_centroid:
    -863                        
    -864                        peak_resolving_power = self.calculate_resolving_power( abund, mass, current_index)
    -865                        s2n = intes_centr/self.baseline_noise_std
    -866                        freq_centr = None
    -867                        self.add_mspeak(self.polarity, mz_exp_centroid, abund[current_index] , peak_resolving_power, s2n, peak_indexes, exp_freq=freq_centr, ms_parent=self)
    -868                
    -869                else:
    -870                
    -871                    mz_exp_centroid, freq_centr, intes_centr, peak_indexes = self.find_apex_fit_quadratic(mass, abund, freq, current_index, len_abundance, peak_height_diff)
    -872                    if mz_exp_centroid:
    -873                        try:
    -874                            peak_resolving_power = self.calculate_resolving_power( abund, mass, current_index)
    -875                        except IndexError: 
    -876                            print('index error, skipping peak')
    -877                            continue
    -878                        
    -879                        s2n = intes_centr/self.baseline_noise_std
    -880                        self.add_mspeak(self.polarity, mz_exp_centroid, abund[current_index] , peak_resolving_power, s2n, peak_indexes, exp_freq=freq_centr, ms_parent=self)
    -881                        
    +861        else:
    +862            return False
    +863
    +864    def use_the_max(self, mass, abund, current_index, len_abundance, peak_height_diff):
    +865        """Use the max peak height as the centroid
    +866
    +867        Parameters
    +868        ----------
    +869        mass : ndarray
    +870            The mass values.
    +871        abund : ndarray
    +872            The abundance values.
    +873        current_index : int
    +874            The index of the current peak.
    +875        len_abundance : int
    +876            The length of the abundance array.
    +877        peak_height_diff : function
    +878            The function to calculate the peak height difference.
    +879
    +880        Returns
    +881        -------
    +882        float
    +883            The m/z value of the peak apex.
    +884        float
    +885            The abundance value of the peak apex.
    +886        tuple or None
    +887            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    +888            Otherwise, None.
    +889        """
    +890
    +891        peak_indexes = self.check_prominence(
    +892            abund, current_index, len_abundance, peak_height_diff
    +893        )
    +894
    +895        if not peak_indexes:
    +896            return None, None, None
    +897
    +898        else:
    +899            return mass[current_index], abund[current_index], peak_indexes
    +900
    +901    def calc_centroid_legacy(self, mass, abund, freq):
    +902        """Legacy centroid calculation
    +903        Deprecated - for deletion.
    +904
    +905        """
    +906        warnings.warn(
    +907            "Legacy centroid calculation is deprecated. Please use the new centroid calculation method."
    +908        )
    +909        pass
    +910        if False:
    +911            len_abundance = len(abund)
    +912
    +913            max_abundance = max(abund)
    +914
    +915            peak_height_diff = (
    +916                lambda hi, li: ((abund[hi] - abund[li]) / max_abundance) * 100
    +917            )
    +918
    +919            abundance_threshold, factor = self.get_threshold(abund)
    +920            # print(abundance_threshold, factor)
    +921            # find indices of all peaks
    +922            dy = abund[1:] - abund[:-1]
    +923
    +924            # replaces nan for infi nity
    +925            indices_nan = where(isnan(abund))[0]
    +926
    +927            if indices_nan.size:
    +928                abund[indices_nan] = inf
    +929                dy[where(isnan(dy))[0]] = inf
    +930
    +931            indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +932
    +933            # noise threshold
    +934            if indexes.size and abundance_threshold is not None:
    +935                indexes = indexes[abund[indexes] / factor >= abundance_threshold]
    +936            # filter out 'peaks' within 3 points of the spectrum limits
    +937            # remove entries within 3 points of upper limit
    +938            indexes = [x for x in indexes if (len_abundance - x) > 3]
    +939            # remove entries within 3 points of zero
    +940            indexes = [x for x in indexes if x > 3]
    +941
    +942            for current_index in indexes:
    +943                if self.label == Labels.simulated_profile:
    +944                    mz_exp_centroid, intes_centr, peak_indexes = self.use_the_max(
    +945                        mass, abund, current_index, len_abundance, peak_height_diff
    +946                    )
    +947                    if mz_exp_centroid:
    +948                        peak_resolving_power = self.calculate_resolving_power(
    +949                            abund, mass, current_index
    +950                        )
    +951                        s2n = intes_centr / self.baseline_noise_std
    +952                        freq_centr = None
    +953                        self.add_mspeak(
    +954                            self.polarity,
    +955                            mz_exp_centroid,
    +956                            abund[current_index],
    +957                            peak_resolving_power,
    +958                            s2n,
    +959                            peak_indexes,
    +960                            exp_freq=freq_centr,
    +961                            ms_parent=self,
    +962                        )
    +963
    +964                else:
    +965                    mz_exp_centroid, freq_centr, intes_centr, peak_indexes = (
    +966                        self.find_apex_fit_quadratic(
    +967                            mass,
    +968                            abund,
    +969                            freq,
    +970                            current_index,
    +971                            len_abundance,
    +972                            peak_height_diff,
    +973                        )
    +974                    )
    +975                    if mz_exp_centroid:
    +976                        try:
    +977                            peak_resolving_power = self.calculate_resolving_power(
    +978                                abund, mass, current_index
    +979                            )
    +980                        except IndexError:
    +981                            print("index error, skipping peak")
    +982                            continue
    +983
    +984                        s2n = intes_centr / self.baseline_noise_std
    +985                        self.add_mspeak(
    +986                            self.polarity,
    +987                            mz_exp_centroid,
    +988                            abund[current_index],
    +989                            peak_resolving_power,
    +990                            s2n,
    +991                            peak_indexes,
    +992                            exp_freq=freq_centr,
    +993                            ms_parent=self,
    +994                        )
     
    @@ -1007,875 +1120,978 @@

    -
     13class PeakPicking:
    - 14    """ Class for peak picking.
    - 15
    - 16    Parameters
    - 17    ----------
    - 18    None
    - 19
    - 20    Attributes
    - 21    ----------
    - 22    None
    - 23
    - 24    Methods
    - 25    -------
    - 26    * prepare_peak_picking_data().
    - 27        Prepare the mz, abundance, and frequence data for peak picking.
    - 28    * cut_mz_domain_peak_picking().
    - 29        Cut the m/z domain for peak picking.
    - 30    * extrapolate_axes_for_pp(mz=None, abund=None, freq=None).
    - 31        Extrapolate the m/z axis and fill the abundance axis with 0s.
    - 32    * do_peak_picking().
    - 33        Perform peak picking.
    - 34    * find_minima(apex_index, abundance, len_abundance, right=True).
    - 35        Find the minima of a peak.
    - 36    * linear_fit_calc(intes, massa, index_term, index_sign).
    - 37        Algebraic solution to a linear fit.
    - 38    * calculate_resolving_power(intes, massa, current_index).
    - 39        Calculate the resolving power of a peak.
    - 40    * cal_minima(mass, abun).
    - 41        Calculate the minima of a peak.
    - 42    * calc_centroid(mass, abund, freq).
    - 43        Calculate the centroid of a peak.
    - 44    * get_threshold(intes).
    - 45        Get the intensity threshold for peak picking.
    - 46    * algebraic_quadratic(list_mass, list_y).
    - 47        Find the apex of a peak - algebraically.
    - 48    * find_apex_fit_quadratic(mass, abund, freq, current_index).
    - 49        Find the apex of a peak.
    - 50    * check_prominence(abun, current_index, len_abundance, peak_height_diff).
    - 51        Check the prominence of a peak.
    - 52    * use_the_max(mass, abund, current_index, len_abundance, peak_height_diff).
    - 53        Use the max peak height as the centroid.
    - 54    * calc_centroid_legacy(mass, abund, freq).
    - 55        Legacy centroid calculation. Deprecated - for deletion.       
    - 56
    - 57    """
    - 58    def prepare_peak_picking_data(self):
    - 59        """ Prepare the data for peak picking.
    - 60
    - 61        This function will prepare the m/z, abundance, and frequency data for peak picking according to the settings.
    - 62
    - 63        Returns
    - 64        -------
    - 65        mz : ndarray
    - 66            The m/z axis.
    - 67        abundance : ndarray
    - 68            The abundance axis.
    - 69        freq : ndarray or None
    - 70            The frequency axis, if available.
    - 71        """
    - 72        # First apply cut_mz_domain_peak_picking
    - 73        mz, abundance, freq = self.cut_mz_domain_peak_picking()
    +            
     24class PeakPicking:
    + 25    """Class for peak picking.
    + 26
    + 27    Parameters
    + 28    ----------
    + 29    None
    + 30
    + 31    Attributes
    + 32    ----------
    + 33    None
    + 34
    + 35    Methods
    + 36    -------
    + 37    * prepare_peak_picking_data().
    + 38        Prepare the mz, abundance, and frequence data for peak picking.
    + 39    * cut_mz_domain_peak_picking().
    + 40        Cut the m/z domain for peak picking.
    + 41    * extrapolate_axes_for_pp(mz=None, abund=None, freq=None).
    + 42        Extrapolate the m/z axis and fill the abundance axis with 0s.
    + 43    * do_peak_picking().
    + 44        Perform peak picking.
    + 45    * find_minima(apex_index, abundance, len_abundance, right=True).
    + 46        Find the minima of a peak.
    + 47    * linear_fit_calc(intes, massa, index_term, index_sign).
    + 48        Algebraic solution to a linear fit.
    + 49    * calculate_resolving_power(intes, massa, current_index).
    + 50        Calculate the resolving power of a peak.
    + 51    * cal_minima(mass, abun).
    + 52        Calculate the minima of a peak.
    + 53    * calc_centroid(mass, abund, freq).
    + 54        Calculate the centroid of a peak.
    + 55    * get_threshold(intes).
    + 56        Get the intensity threshold for peak picking.
    + 57    * algebraic_quadratic(list_mass, list_y).
    + 58        Find the apex of a peak - algebraically.
    + 59    * find_apex_fit_quadratic(mass, abund, freq, current_index).
    + 60        Find the apex of a peak.
    + 61    * check_prominence(abun, current_index, len_abundance, peak_height_diff).
    + 62        Check the prominence of a peak.
    + 63    * use_the_max(mass, abund, current_index, len_abundance, peak_height_diff).
    + 64        Use the max peak height as the centroid.
    + 65    * calc_centroid_legacy(mass, abund, freq).
    + 66        Legacy centroid calculation. Deprecated - for deletion.
    + 67
    + 68    """
    + 69
    + 70    def prepare_peak_picking_data(self):
    + 71        """Prepare the data for peak picking.
    + 72
    + 73        This function will prepare the m/z, abundance, and frequency data for peak picking according to the settings.
      74
    - 75        # Then extrapolate the axes for peak picking
    - 76        if self.settings.picking_point_extrapolate > 0:
    - 77            mz, abundance, freq = self.extrapolate_axes_for_pp(mz, abundance, freq)
    - 78        return mz, abundance, freq
    - 79    
    - 80    def cut_mz_domain_peak_picking(self):
    - 81        """
    - 82        Cut the m/z domain for peak picking.
    - 83
    - 84        Simplified function
    - 85        
    - 86        Returns
    - 87        -------
    - 88        mz_domain_X_low_cutoff : ndarray
    - 89            The m/z values within the specified range.
    - 90        mz_domain_low_Y_cutoff : ndarray
    - 91            The abundance values within the specified range.
    - 92        freq_domain_low_Y_cutoff : ndarray or None
    - 93            The frequency values within the specified range, if available.
    - 94
    - 95        """
    - 96        max_picking_mz = self.settings.max_picking_mz
    - 97        min_picking_mz = self.settings.min_picking_mz
    - 98        
    - 99        #min_start =  where(self.mz_exp_profile  > min_picking_mz)[0][0]
    -100        #max_final =  where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    -101        min_start =  searchsorted(a = self.mz_exp_profile, v = min_picking_mz)
    -102        max_final =  searchsorted(a = self.mz_exp_profile, v = max_picking_mz)
    -103
    -104        if self.has_frequency:
    -105
    -106            if self.freq_exp_profile.any():
    -107
    -108                return self.mz_exp_profile[min_start:max_final], self.abundance_profile[min_start:max_final], self.freq_exp_profile[min_start:max_final]
    -109
    -110        else:
    -111
    -112            return self.mz_exp_profile[min_start:max_final], self.abundance_profile[min_start:max_final], None
    -113        
    -114        
    -115    def legacy_cut_mz_domain_peak_picking(self):
    -116        """
    -117        Cut the m/z domain for peak picking.
    -118        DEPRECATED
    -119        Returns
    -120        -------
    -121        mz_domain_X_low_cutoff : ndarray
    -122            The m/z values within the specified range.
    -123        mz_domain_low_Y_cutoff : ndarray
    -124            The abundance values within the specified range.
    -125        freq_domain_low_Y_cutoff : ndarray or None
    -126            The frequency values within the specified range, if available.
    -127
    -128        """
    -129        max_picking_mz = self.settings.max_picking_mz
    -130        min_picking_mz = self.settings.min_picking_mz
    -131        
    -132        min_final =  where(self.mz_exp_profile  > min_picking_mz)[-1][-1]
    -133        min_start =  where(self.mz_exp_profile  > min_picking_mz)[0][0]
    -134
    -135        mz_domain_X_low_cutoff, mz_domain_low_Y_cutoff,  = self.mz_exp_profile [min_start:min_final], self.abundance_profile[min_start:min_final]
    -136
    -137        max_final =  where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    -138        max_start =  where(self.mz_exp_profile < max_picking_mz)[0][0]
    -139
    -140        if self.has_frequency:
    -141
    -142            if self.freq_exp_profile.any():
    + 75        Returns
    + 76        -------
    + 77        mz : ndarray
    + 78            The m/z axis.
    + 79        abundance : ndarray
    + 80            The abundance axis.
    + 81        freq : ndarray or None
    + 82            The frequency axis, if available.
    + 83        """
    + 84        # First apply cut_mz_domain_peak_picking
    + 85        mz, abundance, freq = self.cut_mz_domain_peak_picking()
    + 86
    + 87        # Then extrapolate the axes for peak picking
    + 88        if self.settings.picking_point_extrapolate > 0:
    + 89            mz, abundance, freq = self.extrapolate_axes_for_pp(mz, abundance, freq)
    + 90        return mz, abundance, freq
    + 91
    + 92    def cut_mz_domain_peak_picking(self):
    + 93        """
    + 94        Cut the m/z domain for peak picking.
    + 95
    + 96        Simplified function
    + 97
    + 98        Returns
    + 99        -------
    +100        mz_domain_X_low_cutoff : ndarray
    +101            The m/z values within the specified range.
    +102        mz_domain_low_Y_cutoff : ndarray
    +103            The abundance values within the specified range.
    +104        freq_domain_low_Y_cutoff : ndarray or None
    +105            The frequency values within the specified range, if available.
    +106
    +107        """
    +108        max_picking_mz = self.settings.max_picking_mz
    +109        min_picking_mz = self.settings.min_picking_mz
    +110
    +111        # min_start =  where(self.mz_exp_profile  > min_picking_mz)[0][0]
    +112        # max_final =  where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    +113        min_start = searchsorted(a=self.mz_exp_profile, v=min_picking_mz)
    +114        max_final = searchsorted(a=self.mz_exp_profile, v=max_picking_mz)
    +115
    +116        if self.has_frequency:
    +117            if self.freq_exp_profile.any():
    +118                return (
    +119                    self.mz_exp_profile[min_start:max_final],
    +120                    self.abundance_profile[min_start:max_final],
    +121                    self.freq_exp_profile[min_start:max_final],
    +122                )
    +123
    +124        else:
    +125            return (
    +126                self.mz_exp_profile[min_start:max_final],
    +127                self.abundance_profile[min_start:max_final],
    +128                None,
    +129            )
    +130
    +131    def legacy_cut_mz_domain_peak_picking(self):
    +132        """
    +133        Cut the m/z domain for peak picking.
    +134        DEPRECATED
    +135        Returns
    +136        -------
    +137        mz_domain_X_low_cutoff : ndarray
    +138            The m/z values within the specified range.
    +139        mz_domain_low_Y_cutoff : ndarray
    +140            The abundance values within the specified range.
    +141        freq_domain_low_Y_cutoff : ndarray or None
    +142            The frequency values within the specified range, if available.
     143
    -144                freq_domain_low_Y_cutoff = self.freq_exp_profile[min_start:min_final]
    -145
    -146
    -147                return mz_domain_X_low_cutoff[max_start:max_final], mz_domain_low_Y_cutoff[max_start:max_final], freq_domain_low_Y_cutoff[max_start:max_final]
    -148
    -149        else:
    +144        """
    +145        max_picking_mz = self.settings.max_picking_mz
    +146        min_picking_mz = self.settings.min_picking_mz
    +147
    +148        min_final = where(self.mz_exp_profile > min_picking_mz)[-1][-1]
    +149        min_start = where(self.mz_exp_profile > min_picking_mz)[0][0]
     150
    -151            return mz_domain_X_low_cutoff[max_start:max_final], mz_domain_low_Y_cutoff[max_start:max_final], None
    -152
    -153    @staticmethod 
    -154    def extrapolate_axis(initial_array, pts):
    -155        """
    -156        This function will extrapolate an input array in both directions by N pts.
    -157
    -158        Parameters
    -159        ----------
    -160        initial_array : ndarray
    -161            The input array.
    -162        pts : int
    -163            The number of points to extrapolate.
    -164
    -165        Returns
    -166        -------
    -167        ndarray
    -168            The extrapolated array.
    -169
    -170        Notes
    -171        --------
    -172        This is a static method.        
    -173        """
    -174        initial_array_len = len(initial_array)
    -175        right_delta = initial_array[-1] - initial_array[-2]  
    -176        left_delta = initial_array[1] - initial_array[0]  
    -177        
    -178        # Create an array with extra space for extrapolation
    -179        pad_array = zeros(initial_array_len + 2 * pts)
    -180        
    -181        # Copy original array into the middle of the padded array
    -182        pad_array[pts:pts + initial_array_len] = initial_array
    -183        
    -184        # Extrapolate the right side
    -185        for pt in range(pts):
    -186            final_value = initial_array[-1]
    -187            value_to_add = right_delta * (pt + 1)
    -188            new_value = final_value + value_to_add
    -189            pad_array[initial_array_len + pts + pt] = new_value
    -190        
    -191        # Extrapolate the left side
    -192        for pt in range(pts):
    -193            first_value = initial_array[0]
    -194            value_to_subtract = left_delta * (pt + 1)
    -195            new_value = first_value - value_to_subtract
    -196            pad_array[pts - pt - 1] = new_value
    -197        
    -198        return pad_array
    -199    
    -200    def extrapolate_axes_for_pp(self, mz=None, abund=None, freq=None):
    -201        """ Extrapolate the m/z axis and fill the abundance axis with 0s.
    -202
    -203        Parameters
    -204        ----------
    -205        mz : ndarray or None
    -206            The m/z axis, if available. If None, the experimental m/z axis is used.
    -207        abund : ndarray or None
    -208            The abundance axis, if available. If None, the experimental abundance axis is used.
    -209        freq : ndarray or None
    -210            The frequency axis, if available. If None, the experimental frequency axis is used.
    -211
    -212        Returns
    -213        -------
    -214        mz : ndarray
    -215            The extrapolated m/z axis.
    -216        abund : ndarray
    -217            The abundance axis with 0s filled.
    -218        freq : ndarray or None
    -219            The extrapolated frequency axis, if available.
    -220
    -221        Notes
    -222        --------
    -223        This function will extrapolate the mz axis by the number of datapoints specified in the settings,
    -224        and fill the abundance axis with 0s. 
    -225        This should prevent peak picking issues at the spectrum edge.
    -226
    -227        """ 
    -228        # Check if the input arrays are provided
    -229        if mz is None or abund is None:
    -230            mz, abund = self.mz_exp_profile, self.abundance_profile
    -231            if self.has_frequency:
    -232                freq = self.freq_exp_profile
    -233            else: 
    -234                freq = None
    -235        pts = self.settings.picking_point_extrapolate
    -236        if pts == 0:
    -237            return mz, abund, freq
    -238        
    -239        mz = self.extrapolate_axis(mz, pts)
    -240        abund = pad(abund, (pts, pts), mode = 'constant', constant_values=(0,0))
    -241        if freq is not None:
    -242            freq = self.extrapolate_axis(freq, pts)
    -243        return mz, abund, freq
    -244
    -245    def do_peak_picking(self):
    -246        """ Perform peak picking.
    -247
    -248        """
    -249        mz, abundance, freq = self.prepare_peak_picking_data()
    -250        
    -251        if self.label == Labels.bruker_frequency or self.label == Labels.midas_frequency:
    -252            self.calc_centroid(mz, abundance, freq)
    -253
    -254        elif self.label == Labels.thermo_profile:
    -255            self.calc_centroid(mz, abundance, freq)
    -256
    -257        elif self.label == Labels.bruker_profile:
    -258            self.calc_centroid(mz, abundance, freq)
    -259
    -260        elif self.label == Labels.booster_profile:
    -261            self.calc_centroid(mz, abundance, freq)
    -262
    -263        elif self.label == Labels.simulated_profile:
    -264            self.calc_centroid(mz, abundance, freq)
    -265
    -266        else: 
    -267            raise Exception("Unknow mass spectrum type", self.label)
    -268
    -269    def find_minima(self, apex_index, abundance, len_abundance, right=True):
    -270        """ Find the minima of a peak.
    -271
    -272        Parameters
    -273        ----------
    -274        apex_index : int
    -275            The index of the peak apex.
    -276        abundance : ndarray
    -277            The abundance values.
    -278        len_abundance : int
    -279            The length of the abundance array.
    -280        right : bool, optional
    -281            Flag indicating whether to search for minima to the right of the apex (default is True).
    -282
    -283        Returns
    -284        -------
    -285        int
    -286            The index of the minima.
    -287
    -288        """
    -289        j = apex_index
    -290        
    -291        if right: minima = abundance[j] > abundance[j+1]
    -292        else: minima = abundance[j] > abundance[j-1]
    -293
    -294        while minima:
    -295            
    -296            if j == 1 or j == len_abundance -2:
    -297                break
    -298            
    -299            if right: 
    -300                j += 1
    -301
    -302                minima = abundance[j] >= abundance[j+1]
    -303
    -304            else: 
    -305                j -= 1
    -306                minima = abundance[j] >= abundance[j-1]
    -307        
    -308        if right: return j
    -309        else: return j
    -310
    -311    @staticmethod
    -312    def linear_fit_calc(intes, massa, index_term, index_sign):
    -313        """
    -314        Algebraic solution to a linear fit - roughly 25-50x faster than numpy polyfit when passing only two vals and doing a 1st order fit
    -315
    -316        Parameters
    -317        ----------
    -318        intes : ndarray
    -319            The intensity values.
    -320        massa : ndarray
    -321            The mass values.
    -322        index_term : int
    -323            The index of the current term.
    -324        index_sign : str
    -325            The index sign
    -326        
    -327        Returns
    -328        -------
    -329        ndarray
    -330            The coefficients of the linear fit.
    -331        
    -332        Notes
    -333        --------
    -334        This is a static method.
    -335        """
    -336        if index_sign == '+':
    -337            x1, x2 = massa[index_term], massa[index_term + 1]
    -338            y1, y2 = intes[index_term], intes[index_term + 1]
    -339        elif index_sign =='-':
    -340            x1, x2 = massa[index_term], massa[index_term - 1]
    -341            y1, y2 = intes[index_term], intes[index_term - 1]
    -342        else:
    -343            warnings.warn('error in linear fit calc, unknown index sign')
    -344        
    -345        # Calculate the slope (m)
    -346        slope = (y2 - y1) / (x2 - x1)
    -347        
    -348        # Calculate the intercept (b)
    -349        intercept = y1 - slope * x1
    -350        
    -351        # The coefficients array would be [slope, intercept]
    -352        coefficients = array([slope, intercept])
    -353        return coefficients
    -354
    -355    def calculate_resolving_power(self, intes, massa, current_index):
    -356        """ Calculate the resolving power of a peak.
    -357
    -358        Parameters
    -359        ----------
    -360        intes : ndarray
    -361            The intensity values.
    -362        massa : ndarray
    -363            The mass values.
    -364        current_index : int
    -365            The index of the current peak.
    -366
    -367        Returns
    -368        -------
    -369        float
    -370            The resolving power of the peak.
    -371
    -372        Notes
    -373        --------
    -374        This is a conservative calculation of resolving power,
    -375        the peak need to be resolved at least at the half-maximum magnitude,
    -376        otherwise, the combined full width at half maximum is used to calculate resolving power.
    +151        (
    +152            mz_domain_X_low_cutoff,
    +153            mz_domain_low_Y_cutoff,
    +154        ) = (
    +155            self.mz_exp_profile[min_start:min_final],
    +156            self.abundance_profile[min_start:min_final],
    +157        )
    +158
    +159        max_final = where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    +160        max_start = where(self.mz_exp_profile < max_picking_mz)[0][0]
    +161
    +162        if self.has_frequency:
    +163            if self.freq_exp_profile.any():
    +164                freq_domain_low_Y_cutoff = self.freq_exp_profile[min_start:min_final]
    +165
    +166                return (
    +167                    mz_domain_X_low_cutoff[max_start:max_final],
    +168                    mz_domain_low_Y_cutoff[max_start:max_final],
    +169                    freq_domain_low_Y_cutoff[max_start:max_final],
    +170                )
    +171
    +172        else:
    +173            return (
    +174                mz_domain_X_low_cutoff[max_start:max_final],
    +175                mz_domain_low_Y_cutoff[max_start:max_final],
    +176                None,
    +177            )
    +178
    +179    @staticmethod
    +180    def extrapolate_axis(initial_array, pts):
    +181        """
    +182        This function will extrapolate an input array in both directions by N pts.
    +183
    +184        Parameters
    +185        ----------
    +186        initial_array : ndarray
    +187            The input array.
    +188        pts : int
    +189            The number of points to extrapolate.
    +190
    +191        Returns
    +192        -------
    +193        ndarray
    +194            The extrapolated array.
    +195
    +196        Notes
    +197        --------
    +198        This is a static method.
    +199        """
    +200        initial_array_len = len(initial_array)
    +201        right_delta = initial_array[-1] - initial_array[-2]
    +202        left_delta = initial_array[1] - initial_array[0]
    +203
    +204        # Create an array with extra space for extrapolation
    +205        pad_array = zeros(initial_array_len + 2 * pts)
    +206
    +207        # Copy original array into the middle of the padded array
    +208        pad_array[pts : pts + initial_array_len] = initial_array
    +209
    +210        # Extrapolate the right side
    +211        for pt in range(pts):
    +212            final_value = initial_array[-1]
    +213            value_to_add = right_delta * (pt + 1)
    +214            new_value = final_value + value_to_add
    +215            pad_array[initial_array_len + pts + pt] = new_value
    +216
    +217        # Extrapolate the left side
    +218        for pt in range(pts):
    +219            first_value = initial_array[0]
    +220            value_to_subtract = left_delta * (pt + 1)
    +221            new_value = first_value - value_to_subtract
    +222            pad_array[pts - pt - 1] = new_value
    +223
    +224        return pad_array
    +225
    +226    def extrapolate_axes_for_pp(self, mz=None, abund=None, freq=None):
    +227        """Extrapolate the m/z axis and fill the abundance axis with 0s.
    +228
    +229        Parameters
    +230        ----------
    +231        mz : ndarray or None
    +232            The m/z axis, if available. If None, the experimental m/z axis is used.
    +233        abund : ndarray or None
    +234            The abundance axis, if available. If None, the experimental abundance axis is used.
    +235        freq : ndarray or None
    +236            The frequency axis, if available. If None, the experimental frequency axis is used.
    +237
    +238        Returns
    +239        -------
    +240        mz : ndarray
    +241            The extrapolated m/z axis.
    +242        abund : ndarray
    +243            The abundance axis with 0s filled.
    +244        freq : ndarray or None
    +245            The extrapolated frequency axis, if available.
    +246
    +247        Notes
    +248        --------
    +249        This function will extrapolate the mz axis by the number of datapoints specified in the settings,
    +250        and fill the abundance axis with 0s.
    +251        This should prevent peak picking issues at the spectrum edge.
    +252
    +253        """
    +254        # Check if the input arrays are provided
    +255        if mz is None or abund is None:
    +256            mz, abund = self.mz_exp_profile, self.abundance_profile
    +257            if self.has_frequency:
    +258                freq = self.freq_exp_profile
    +259            else:
    +260                freq = None
    +261        pts = self.settings.picking_point_extrapolate
    +262        if pts == 0:
    +263            return mz, abund, freq
    +264
    +265        mz = self.extrapolate_axis(mz, pts)
    +266        abund = pad(abund, (pts, pts), mode="constant", constant_values=(0, 0))
    +267        if freq is not None:
    +268            freq = self.extrapolate_axis(freq, pts)
    +269        return mz, abund, freq
    +270
    +271    def do_peak_picking(self):
    +272        """Perform peak picking."""
    +273        mz, abundance, freq = self.prepare_peak_picking_data()
    +274
    +275        if (
    +276            self.label == Labels.bruker_frequency
    +277            or self.label == Labels.midas_frequency
    +278        ):
    +279            self.calc_centroid(mz, abundance, freq)
    +280
    +281        elif self.label == Labels.thermo_profile:
    +282            self.calc_centroid(mz, abundance, freq)
    +283
    +284        elif self.label == Labels.bruker_profile:
    +285            self.calc_centroid(mz, abundance, freq)
    +286
    +287        elif self.label == Labels.booster_profile:
    +288            self.calc_centroid(mz, abundance, freq)
    +289
    +290        elif self.label == Labels.simulated_profile:
    +291            self.calc_centroid(mz, abundance, freq)
    +292
    +293        else:
    +294            raise Exception("Unknow mass spectrum type", self.label)
    +295
    +296    def find_minima(self, apex_index, abundance, len_abundance, right=True):
    +297        """Find the minima of a peak.
    +298
    +299        Parameters
    +300        ----------
    +301        apex_index : int
    +302            The index of the peak apex.
    +303        abundance : ndarray
    +304            The abundance values.
    +305        len_abundance : int
    +306            The length of the abundance array.
    +307        right : bool, optional
    +308            Flag indicating whether to search for minima to the right of the apex (default is True).
    +309
    +310        Returns
    +311        -------
    +312        int
    +313            The index of the minima.
    +314
    +315        """
    +316        j = apex_index
    +317
    +318        if right:
    +319            minima = abundance[j] > abundance[j + 1]
    +320        else:
    +321            minima = abundance[j] > abundance[j - 1]
    +322
    +323        while minima:
    +324            if j == 1 or j == len_abundance - 2:
    +325                break
    +326
    +327            if right:
    +328                j += 1
    +329
    +330                minima = abundance[j] >= abundance[j + 1]
    +331
    +332            else:
    +333                j -= 1
    +334                minima = abundance[j] >= abundance[j - 1]
    +335
    +336        if right:
    +337            return j
    +338        else:
    +339            return j
    +340
    +341    @staticmethod
    +342    def linear_fit_calc(intes, massa, index_term, index_sign):
    +343        """
    +344        Algebraic solution to a linear fit - roughly 25-50x faster than numpy polyfit when passing only two vals and doing a 1st order fit
    +345
    +346        Parameters
    +347        ----------
    +348        intes : ndarray
    +349            The intensity values.
    +350        massa : ndarray
    +351            The mass values.
    +352        index_term : int
    +353            The index of the current term.
    +354        index_sign : str
    +355            The index sign
    +356
    +357        Returns
    +358        -------
    +359        ndarray
    +360            The coefficients of the linear fit.
    +361
    +362        Notes
    +363        --------
    +364        This is a static method.
    +365        """
    +366        if index_sign == "+":
    +367            x1, x2 = massa[index_term], massa[index_term + 1]
    +368            y1, y2 = intes[index_term], intes[index_term + 1]
    +369        elif index_sign == "-":
    +370            x1, x2 = massa[index_term], massa[index_term - 1]
    +371            y1, y2 = intes[index_term], intes[index_term - 1]
    +372        else:
    +373            warnings.warn("error in linear fit calc, unknown index sign")
    +374
    +375        # Calculate the slope (m)
    +376        slope = (y2 - y1) / (x2 - x1)
     377
    -378        """
    -379
    -380        peak_height = intes[current_index]
    -381        target_peak_height = peak_height/2
    -382
    -383        peak_height_minus = peak_height
    -384        peak_height_plus = peak_height
    -385        
    -386        # There are issues when a peak is at the high or low limit of a spectrum in finding its local minima and maxima
    -387        # This solution will return nan for resolving power when a peak is possibly too close to an edge to avoid the issue
    -388        
    -389        if current_index <5:
    -390            warnings.warn("peak at low spectrum edge, returning no resolving power")
    -391            return nan
    -392        elif abs(current_index-len(intes))<5:
    -393            warnings.warn("peak at high spectrum edge, returning no resolving power")
    -394            return nan
    -395        else:
    -396            pass
    -397
    -398        index_minus = current_index
    -399        while peak_height_minus  >= target_peak_height:
    -400
    -401            index_minus = index_minus -1
    -402            if index_minus < 0:
    -403                warnings.warn('Res. calc. warning - peak index minus adjacent to spectrum edge \n \
    -404                        Zeroing the first 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported \n \
    -405                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)')
    -406                # Pad the first 5 data points with zeros and restart the loop
    -407                intes[:5] = 0
    -408                peak_height_minus = target_peak_height
    -409                index_minus = current_index            
    -410            else:
    -411                peak_height_minus = intes[index_minus]
    +378        # Calculate the intercept (b)
    +379        intercept = y1 - slope * x1
    +380
    +381        # The coefficients array would be [slope, intercept]
    +382        coefficients = array([slope, intercept])
    +383        return coefficients
    +384
    +385    def calculate_resolving_power(self, intes, massa, current_index):
    +386        """Calculate the resolving power of a peak.
    +387
    +388        Parameters
    +389        ----------
    +390        intes : ndarray
    +391            The intensity values.
    +392        massa : ndarray
    +393            The mass values.
    +394        current_index : int
    +395            The index of the current peak.
    +396
    +397        Returns
    +398        -------
    +399        float
    +400            The resolving power of the peak.
    +401
    +402        Notes
    +403        --------
    +404        This is a conservative calculation of resolving power,
    +405        the peak need to be resolved at least at the half-maximum magnitude,
    +406        otherwise, the combined full width at half maximum is used to calculate resolving power.
    +407
    +408        """
    +409
    +410        peak_height = intes[current_index]
    +411        target_peak_height = peak_height / 2
     412
    -413        if self.mspeaks_settings.legacy_centroid_polyfit:
    -414            x = [ massa[index_minus],  massa[index_minus+1]]
    -415            y = [ intes[index_minus],  intes[index_minus+1]]
    -416            coefficients = polyfit(x, y, 1)
    -417        else:
    -418            coefficients = self.linear_fit_calc(intes, massa, index_minus,index_sign='+')
    -419
    -420        a = coefficients[0]
    -421        b = coefficients[1]
    -422        if self.mspeaks_settings.legacy_resolving_power:
    -423            y_intercept =  intes[index_minus] + ((intes[index_minus+1] - intes[index_minus])/2)
    -424        else:
    -425            y_intercept =  target_peak_height
    -426        massa1 = (y_intercept -b)/a
    +413        peak_height_minus = peak_height
    +414        peak_height_plus = peak_height
    +415
    +416        # There are issues when a peak is at the high or low limit of a spectrum in finding its local minima and maxima
    +417        # This solution will return nan for resolving power when a peak is possibly too close to an edge to avoid the issue
    +418
    +419        if current_index < 5:
    +420            warnings.warn("peak at low spectrum edge, returning no resolving power")
    +421            return nan
    +422        elif abs(current_index - len(intes)) < 5:
    +423            warnings.warn("peak at high spectrum edge, returning no resolving power")
    +424            return nan
    +425        else:
    +426            pass
     427
    -428        index_plus = current_index
    -429        while peak_height_plus  >= target_peak_height:
    -430
    -431            index_plus = index_plus + 1
    -432               
    -433            try: 
    -434                peak_height_plus = intes[index_plus]
    -435            except IndexError:
    -436                warnings.warn('Res. calc. warning - peak index plus adjacent to spectrum edge \n \
    -437                        Zeroing the last 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported\
    -438                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)')
    -439                # Pad the first 5 data points with zeros and restart the loop
    -440                intes[-5:] = 0
    -441                peak_height_plus = target_peak_height
    -442                index_plus = current_index 
    +428        index_minus = current_index
    +429        while peak_height_minus >= target_peak_height:
    +430            index_minus = index_minus - 1
    +431            if index_minus < 0:
    +432                warnings.warn(
    +433                    "Res. calc. warning - peak index minus adjacent to spectrum edge \n \
    +434                        Zeroing the first 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported \n \
    +435                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)"
    +436                )
    +437                # Pad the first 5 data points with zeros and restart the loop
    +438                intes[:5] = 0
    +439                peak_height_minus = target_peak_height
    +440                index_minus = current_index
    +441            else:
    +442                peak_height_minus = intes[index_minus]
     443
     444        if self.mspeaks_settings.legacy_centroid_polyfit:
    -445            x = [massa[index_plus],  massa[index_plus - 1]]
    -446            y = [intes[index_plus],  intes[index_plus - 1]]
    +445            x = [massa[index_minus], massa[index_minus + 1]]
    +446            y = [intes[index_minus], intes[index_minus + 1]]
     447            coefficients = polyfit(x, y, 1)
     448        else:
    -449            coefficients = self.linear_fit_calc(intes, massa, index_plus,index_sign='-')
    -450
    -451        a = coefficients[0]
    -452        b = coefficients[1]
    -453
    -454        if self.mspeaks_settings.legacy_resolving_power:
    -455            y_intercept =  intes[index_plus - 1] + ((intes[index_plus] - intes[index_plus - 1])/2)
    -456        else:
    -457            y_intercept =  target_peak_height
    -458
    -459        massa2 = (y_intercept -b)/a
    -460
    -461        if massa1 > massa2:
    +449            coefficients = self.linear_fit_calc(
    +450                intes, massa, index_minus, index_sign="+"
    +451            )
    +452
    +453        a = coefficients[0]
    +454        b = coefficients[1]
    +455        if self.mspeaks_settings.legacy_resolving_power:
    +456            y_intercept = intes[index_minus] + (
    +457                (intes[index_minus + 1] - intes[index_minus]) / 2
    +458            )
    +459        else:
    +460            y_intercept = target_peak_height
    +461        massa1 = (y_intercept - b) / a
     462
    -463            resolvingpower =  massa[current_index]/(massa1-massa2)
    -464
    -465        else:
    +463        index_plus = current_index
    +464        while peak_height_plus >= target_peak_height:
    +465            index_plus = index_plus + 1
     466
    -467            resolvingpower =  massa[current_index]/(massa2-massa1)
    -468
    -469        return resolvingpower
    -470
    -471    def cal_minima(self, mass, abun):
    -472        """ Calculate the minima of a peak.
    -473
    -474        Parameters
    -475        ----------
    -476        mass : ndarray
    -477            The mass values.
    -478        abun : ndarray
    -479            The abundance values.
    -480
    -481        Returns
    -482        -------
    -483        ndarray or None
    -484            The mass values at the minima, if found.
    -485
    -486        """
    -487        abun = -abun
    +467            try:
    +468                peak_height_plus = intes[index_plus]
    +469            except IndexError:
    +470                warnings.warn(
    +471                    "Res. calc. warning - peak index plus adjacent to spectrum edge \n \
    +472                        Zeroing the last 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported\
    +473                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)"
    +474                )
    +475                # Pad the first 5 data points with zeros and restart the loop
    +476                intes[-5:] = 0
    +477                peak_height_plus = target_peak_height
    +478                index_plus = current_index
    +479
    +480        if self.mspeaks_settings.legacy_centroid_polyfit:
    +481            x = [massa[index_plus], massa[index_plus - 1]]
    +482            y = [intes[index_plus], intes[index_plus - 1]]
    +483            coefficients = polyfit(x, y, 1)
    +484        else:
    +485            coefficients = self.linear_fit_calc(
    +486                intes, massa, index_plus, index_sign="-"
    +487            )
     488
    -489        dy = abun[1:] - abun[:-1]
    -490        
    -491        # replaces nan for infinity
    -492        indices_nan = where(isnan(abun))[0]
    -493        
    -494        if indices_nan.size:
    -495            
    -496            abun[indices_nan] = inf
    -497            dy[where(isnan(dy))[0]] = inf
    -498        
    -499        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +489        a = coefficients[0]
    +490        b = coefficients[1]
    +491
    +492        if self.mspeaks_settings.legacy_resolving_power:
    +493            y_intercept = intes[index_plus - 1] + (
    +494                (intes[index_plus] - intes[index_plus - 1]) / 2
    +495            )
    +496        else:
    +497            y_intercept = target_peak_height
    +498
    +499        massa2 = (y_intercept - b) / a
     500
    -501        if indexes.size:
    -502            
    -503            return mass[indexes], abun[indexes]
    -504    
    -505    def calc_centroid(self, mass, abund, freq):
    -506        """ Calculate the centroid of a peak.
    -507
    -508        Parameters
    -509        ----------
    -510        mass : ndarray
    -511            The mass values.
    -512        abund : ndarray
    -513            The abundance values.
    -514        freq : ndarray or None
    -515            The frequency values, if available.
    -516
    -517        Returns
    -518        -------
    -519        None
    -520
    -521        """
    -522        
    -523        max_height = self.mspeaks_settings.peak_height_max_percent
    -524        max_prominence = self.mspeaks_settings.peak_max_prominence_percent
    -525        min_peak_datapoints = self.mspeaks_settings.min_peak_datapoints
    -526        peak_derivative_threshold = self.mspeaks_settings.peak_derivative_threshold
    -527        max_abun = max(abund)
    -528        peak_height_diff = lambda hi, li : ((abund[hi] - abund[li]) / max_abun ) * 100
    -529                    
    -530        domain = mass
    -531        signal = abund
    -532        len_signal = len(signal)
    -533        
    -534        signal_threshold, factor = self.get_threshold(abund)
    -535        max_signal = factor
    -536
    -537        correct_baseline = False
    -538
    -539        include_indexes = sp.peak_picking_first_derivative(domain, signal, max_height, max_prominence, max_signal, 
    -540                                                           min_peak_datapoints,
    -541                                                           peak_derivative_threshold,
    -542                                                           signal_threshold=signal_threshold, 
    -543                                                           correct_baseline=correct_baseline, 
    -544                                                           abun_norm=1,
    -545                                                           plot_res=False)
    -546
    -547        for indexes_tuple in include_indexes:
    -548            
    -549            apex_index = indexes_tuple[1]
    -550
    -551            peak_indexes = self.check_prominence(abund, apex_index, len_signal, peak_height_diff )
    +501        if massa1 > massa2:
    +502            resolvingpower = massa[current_index] / (massa1 - massa2)
    +503
    +504        else:
    +505            resolvingpower = massa[current_index] / (massa2 - massa1)
    +506
    +507        return resolvingpower
    +508
    +509    def cal_minima(self, mass, abun):
    +510        """Calculate the minima of a peak.
    +511
    +512        Parameters
    +513        ----------
    +514        mass : ndarray
    +515            The mass values.
    +516        abun : ndarray
    +517            The abundance values.
    +518
    +519        Returns
    +520        -------
    +521        ndarray or None
    +522            The mass values at the minima, if found.
    +523
    +524        """
    +525        abun = -abun
    +526
    +527        dy = abun[1:] - abun[:-1]
    +528
    +529        # replaces nan for infinity
    +530        indices_nan = where(isnan(abun))[0]
    +531
    +532        if indices_nan.size:
    +533            abun[indices_nan] = inf
    +534            dy[where(isnan(dy))[0]] = inf
    +535
    +536        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +537
    +538        if indexes.size:
    +539            return mass[indexes], abun[indexes]
    +540
    +541    def calc_centroid(self, mass, abund, freq):
    +542        """Calculate the centroid of a peak.
    +543
    +544        Parameters
    +545        ----------
    +546        mass : ndarray
    +547            The mass values.
    +548        abund : ndarray
    +549            The abundance values.
    +550        freq : ndarray or None
    +551            The frequency values, if available.
     552
    -553            if peak_indexes:
    -554                
    -555                mz_exp_centroid, freq_centr, intes_centr = self.find_apex_fit_quadratic(mass, abund, freq, apex_index)
    +553        Returns
    +554        -------
    +555        None
     556
    -557                if mz_exp_centroid:
    -558                                   
    -559                    peak_resolving_power = self.calculate_resolving_power( abund, mass, apex_index)
    -560                    s2n = intes_centr/self.baseline_noise_std
    -561                    self.add_mspeak(self.polarity, mz_exp_centroid, abund[apex_index] , peak_resolving_power, s2n, indexes_tuple, exp_freq=freq_centr, ms_parent=self)
    -562                #pyplot.plot(domain[start_index: final_index + 1], signal[start_index:final_index + 1], c='black')
    -563                #pyplot.show()
    -564                
    -565    def get_threshold(self, intes):
    -566        """ Get the intensity threshold for peak picking.
    -567
    -568        Parameters
    -569        ----------
    -570        intes : ndarray
    -571            The intensity values.
    +557        """
    +558
    +559        max_height = self.mspeaks_settings.peak_height_max_percent
    +560        max_prominence = self.mspeaks_settings.peak_max_prominence_percent
    +561        min_peak_datapoints = self.mspeaks_settings.min_peak_datapoints
    +562        peak_derivative_threshold = self.mspeaks_settings.peak_derivative_threshold
    +563        max_abun = max(abund)
    +564        peak_height_diff = lambda hi, li: ((abund[hi] - abund[li]) / max_abun) * 100
    +565
    +566        domain = mass
    +567        signal = abund
    +568        len_signal = len(signal)
    +569
    +570        signal_threshold, factor = self.get_threshold(abund)
    +571        max_signal = factor
     572
    -573        Returns
    -574        -------
    -575        float
    -576            The intensity threshold.
    -577        float
    -578            The factor to multiply the intensity threshold by.
    -579        """
    -580                
    -581        intes = array(intes).astype(float)
    -582       
    -583        noise_threshold_method = self.settings.noise_threshold_method
    -584
    -585        if noise_threshold_method == 'minima':
    -586            
    -587            if self.is_centroid:
    -588                warn("Auto threshould is disabled for centroid data, returning 0")
    -589                factor = 1
    -590                abundance_threshold = 1e-20
    -591            #print(self.settings.noise_threshold_min_std)
    -592            else:
    -593                abundance_threshold = self.baseline_noise + (self.settings.noise_threshold_min_std * self.baseline_noise_std)
    -594                factor = 1
    +573        correct_baseline = False
    +574
    +575        include_indexes = sp.peak_picking_first_derivative(
    +576            domain,
    +577            signal,
    +578            max_height,
    +579            max_prominence,
    +580            max_signal,
    +581            min_peak_datapoints,
    +582            peak_derivative_threshold,
    +583            signal_threshold=signal_threshold,
    +584            correct_baseline=correct_baseline,
    +585            abun_norm=1,
    +586            plot_res=False,
    +587        )
    +588
    +589        for indexes_tuple in include_indexes:
    +590            apex_index = indexes_tuple[1]
    +591
    +592            peak_indexes = self.check_prominence(
    +593                abund, apex_index, len_signal, peak_height_diff
    +594            )
     595
    -596        elif noise_threshold_method == 'signal_noise':
    -597
    -598            abundance_threshold = self.settings.noise_threshold_min_s2n
    -599            if self.is_centroid:
    -600                factor = 1
    -601            else:
    -602                factor = self.baseline_noise_std
    -603
    -604        elif noise_threshold_method == "relative_abundance":
    -605
    -606            abundance_threshold = self.settings.noise_threshold_min_relative_abundance
    -607            factor = intes.max()/100
    -608
    -609        elif noise_threshold_method == "absolute_abundance":
    -610
    -611            abundance_threshold = self.settings.noise_threshold_absolute_abundance
    -612            factor = 1
    -613
    -614        elif noise_threshold_method == 'log':
    -615            if self.is_centroid:
    -616                raise  Exception("log noise Not tested for centroid data")
    -617            abundance_threshold = self.settings.noise_threshold_log_nsigma
    -618            factor = self.baseline_noise_std
    -619
    -620        else:
    -621            raise  Exception("%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class" % noise_threshold_method)
    -622        
    -623        return abundance_threshold, factor
    -624    
    -625    @staticmethod
    -626    def algebraic_quadratic(list_mass, list_y):
    -627        """
    -628        Find the apex of a peak - algebraically. 
    -629        Faster than using numpy polyfit by ~28x per fit.
    -630
    -631        Parameters
    -632        ----------
    -633        list_mass : ndarray
    -634            list of m/z values (3 points)
    -635        list_y : ndarray
    -636            list of abundance values (3 points)
    -637
    -638        Returns
    -639        -------
    -640        a, b, c: float
    -641            coefficients of the quadratic equation.
    -642
    -643        Notes
    -644        --------
    -645        This is a static method. 
    -646        """
    -647        x_1, x_2, x_3 = list_mass
    -648        y_1, y_2, y_3 = list_y 
    -649
    -650        a = y_1/((x_1-x_2)*(x_1-x_3)) + y_2/((x_2-x_1)*(x_2-x_3)) + y_3/((x_3-x_1)*(x_3-x_2))
    -651
    -652        b = (-y_1*(x_2+x_3)/((x_1-x_2)*(x_1-x_3))
    -653            -y_2*(x_1+x_3)/((x_2-x_1)*(x_2-x_3))
    -654            -y_3*(x_1+x_2)/((x_3-x_1)*(x_3-x_2)))
    -655        
    -656        c = (y_1*x_2*x_3/((x_1-x_2)*(x_1-x_3))
    -657            +y_2*x_1*x_3/((x_2-x_1)*(x_2-x_3))
    -658            +y_3*x_1*x_2/((x_3-x_1)*(x_3-x_2)))
    -659        return a, b, c
    -660
    -661    def find_apex_fit_quadratic(self, mass, abund, freq, current_index):
    -662        """ 
    -663        Find the apex of a peak.
    -664        
    -665        Parameters
    -666        ----------
    -667        mass : ndarray
    -668            The mass values.
    -669        abund : ndarray
    -670            The abundance values.
    -671        freq : ndarray or None  
    -672            The frequency values, if available.
    -673        current_index : int
    -674            The index of the current peak.
    -675        
    -676
    -677        Returns
    -678        -------
    -679        float
    -680            The m/z value of the peak apex.
    -681        float
    -682            The frequency value of the peak apex, if available.
    -683        float
    -684            The abundance value of the peak apex.
    -685        
    -686        """
    -687        # calc prominence
    -688        #peak_indexes = self.check_prominence(abund, current_index, len_abundance, peak_height_diff )
    -689        
    -690        #if not peak_indexes:        
    -691            
    -692        #    return None, None, None, None           
    -693        
    -694        #else:    
    -695            
    -696        # fit parabola to three most abundant datapoints
    -697        list_mass = [mass[current_index - 1], mass[current_index], mass[current_index +1]]
    -698        list_y = [abund[current_index - 1],abund[current_index], abund[current_index +1]]
    -699        
    -700        if self.mspeaks_settings.legacy_centroid_polyfit:
    -701            z = polyfit(list_mass, list_y, 2)
    -702            a = z[0]
    -703            b = z[1]
    -704        else:
    -705            a, b, c = self.algebraic_quadratic(list_mass, list_y)
    +596            if peak_indexes:
    +597                mz_exp_centroid, freq_centr, intes_centr = self.find_apex_fit_quadratic(
    +598                    mass, abund, freq, apex_index
    +599                )
    +600
    +601                if mz_exp_centroid:
    +602                    peak_resolving_power = self.calculate_resolving_power(
    +603                        abund, mass, apex_index
    +604                    )
    +605                    s2n = intes_centr / self.baseline_noise_std
    +606                    self.add_mspeak(
    +607                        self.polarity,
    +608                        mz_exp_centroid,
    +609                        abund[apex_index],
    +610                        peak_resolving_power,
    +611                        s2n,
    +612                        indexes_tuple,
    +613                        exp_freq=freq_centr,
    +614                        ms_parent=self,
    +615                    )
    +616                # pyplot.plot(domain[start_index: final_index + 1], signal[start_index:final_index + 1], c='black')
    +617                # pyplot.show()
    +618
    +619    def get_threshold(self, intes):
    +620        """Get the intensity threshold for peak picking.
    +621
    +622        Parameters
    +623        ----------
    +624        intes : ndarray
    +625            The intensity values.
    +626
    +627        Returns
    +628        -------
    +629        float
    +630            The intensity threshold.
    +631        float
    +632            The factor to multiply the intensity threshold by.
    +633        """
    +634
    +635        intes = array(intes).astype(float)
    +636
    +637        noise_threshold_method = self.settings.noise_threshold_method
    +638
    +639        if noise_threshold_method == "minima":
    +640            if self.is_centroid:
    +641                warnings.warn(
    +642                    "Auto threshould is disabled for centroid data, returning 0"
    +643                )
    +644                factor = 1
    +645                abundance_threshold = 1e-20
    +646            # print(self.settings.noise_threshold_min_std)
    +647            else:
    +648                abundance_threshold = self.baseline_noise + (
    +649                    self.settings.noise_threshold_min_std * self.baseline_noise_std
    +650                )
    +651                factor = 1
    +652
    +653        elif noise_threshold_method == "signal_noise":
    +654            abundance_threshold = self.settings.noise_threshold_min_s2n
    +655            if self.is_centroid:
    +656                factor = 1
    +657            else:
    +658                factor = self.baseline_noise_std
    +659
    +660        elif noise_threshold_method == "relative_abundance":
    +661            abundance_threshold = self.settings.noise_threshold_min_relative_abundance
    +662            factor = intes.max() / 100
    +663
    +664        elif noise_threshold_method == "absolute_abundance":
    +665            abundance_threshold = self.settings.noise_threshold_absolute_abundance
    +666            factor = 1
    +667
    +668        elif noise_threshold_method == "log":
    +669            if self.is_centroid:
    +670                raise Exception("log noise Not tested for centroid data")
    +671            abundance_threshold = self.settings.noise_threshold_log_nsigma
    +672            factor = self.baseline_noise_std
    +673
    +674        else:
    +675            raise Exception(
    +676                "%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class"
    +677                % noise_threshold_method
    +678            )
    +679
    +680        return abundance_threshold, factor
    +681
    +682    @staticmethod
    +683    def algebraic_quadratic(list_mass, list_y):
    +684        """
    +685        Find the apex of a peak - algebraically.
    +686        Faster than using numpy polyfit by ~28x per fit.
    +687
    +688        Parameters
    +689        ----------
    +690        list_mass : ndarray
    +691            list of m/z values (3 points)
    +692        list_y : ndarray
    +693            list of abundance values (3 points)
    +694
    +695        Returns
    +696        -------
    +697        a, b, c: float
    +698            coefficients of the quadratic equation.
    +699
    +700        Notes
    +701        --------
    +702        This is a static method.
    +703        """
    +704        x_1, x_2, x_3 = list_mass
    +705        y_1, y_2, y_3 = list_y
     706
    -707
    -708        calculated = -b/(2*a)
    -709        
    -710        if calculated < 1 or int(calculated) != int(list_mass[1]):
    -711
    -712            mz_exp_centroid = list_mass[1]
    -713        
    -714        else:
    -715            
    -716            mz_exp_centroid = calculated 
    -717        
    -718        if self.label == Labels.bruker_frequency or self.label == Labels.midas_frequency:
    -719            
    -720            # fit parabola to three most abundant frequency datapoints
    -721            list_freq = [freq[current_index - 1], freq[current_index], freq[current_index +1]]
    -722            if self.mspeaks_settings.legacy_centroid_polyfit:
    -723                z = polyfit(list_mass, list_y, 2)
    -724                a = z[0]
    -725                b = z[1]
    -726            else:
    -727                a, b, c = self.algebraic_quadratic(list_mass, list_y)
    -728            
    -729            calculated_freq = -b/(2*a)
    -730
    -731            if calculated_freq < 1 or int(calculated_freq) != freq[current_index]:
    -732                freq_centr = list_freq[1]
    -733
    -734            else:
    -735                freq_centr = calculated_freq
    -736        
    -737        else:
    -738                freq_centr = None
    -739
    -740        if self.mspeaks_settings.legacy_centroid_polyfit:
    -741            abundance_centroid = abund[current_index]
    -742        else: 
    -743            abundance_centroid = a*mz_exp_centroid**2 + b*mz_exp_centroid + c
    -744
    -745        return mz_exp_centroid, freq_centr, abundance_centroid 
    -746    
    -747    def check_prominence(self, abun, current_index, len_abundance, peak_height_diff ) -> tuple or False:
    -748        """ Check the prominence of a peak.
    -749        
    -750        Parameters
    -751        ----------
    -752        abun : ndarray
    -753            The abundance values.
    -754        current_index : int
    -755            The index of the current peak.
    -756        len_abundance : int
    -757            The length of the abundance array.
    -758        peak_height_diff : function
    -759            The function to calculate the peak height difference.
    -760        
    -761        Returns
    -762        -------
    -763        tuple or False
    -764            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    -765            Otherwise, False.
    -766        
    -767        """
    -768
    -769        final_index = self.find_minima(current_index, abun, len_abundance, right=True)
    -770            
    -771        start_index = self.find_minima(current_index, abun, len_abundance, right=False)
    -772            
    -773        peak_indexes = (current_index-1, current_index, current_index+1)
    -774
    -775        if min( peak_height_diff(current_index,start_index), peak_height_diff(current_index,final_index) ) >  self.mspeaks_settings.peak_min_prominence_percent :   
    -776            
    -777            return peak_indexes
    -778        
    -779        else:
    -780            
    -781            return False
    -782
    -783    def use_the_max(self, mass, abund, current_index, len_abundance, peak_height_diff):
    -784        """ Use the max peak height as the centroid
    -785        
    -786        Parameters
    -787        ----------
    -788        mass : ndarray
    -789            The mass values.
    -790        abund : ndarray
    -791            The abundance values.
    -792        current_index : int
    -793            The index of the current peak.
    -794        len_abundance : int
    -795            The length of the abundance array.
    -796        peak_height_diff : function
    -797            The function to calculate the peak height difference.
    -798        
    -799        Returns
    -800        -------
    -801        float
    -802            The m/z value of the peak apex.
    -803        float
    -804            The abundance value of the peak apex.
    -805        tuple or None
    -806            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    -807            Otherwise, None.
    -808        """
    +707        a = (
    +708            y_1 / ((x_1 - x_2) * (x_1 - x_3))
    +709            + y_2 / ((x_2 - x_1) * (x_2 - x_3))
    +710            + y_3 / ((x_3 - x_1) * (x_3 - x_2))
    +711        )
    +712
    +713        b = (
    +714            -y_1 * (x_2 + x_3) / ((x_1 - x_2) * (x_1 - x_3))
    +715            - y_2 * (x_1 + x_3) / ((x_2 - x_1) * (x_2 - x_3))
    +716            - y_3 * (x_1 + x_2) / ((x_3 - x_1) * (x_3 - x_2))
    +717        )
    +718
    +719        c = (
    +720            y_1 * x_2 * x_3 / ((x_1 - x_2) * (x_1 - x_3))
    +721            + y_2 * x_1 * x_3 / ((x_2 - x_1) * (x_2 - x_3))
    +722            + y_3 * x_1 * x_2 / ((x_3 - x_1) * (x_3 - x_2))
    +723        )
    +724        return a, b, c
    +725
    +726    def find_apex_fit_quadratic(self, mass, abund, freq, current_index):
    +727        """
    +728        Find the apex of a peak.
    +729
    +730        Parameters
    +731        ----------
    +732        mass : ndarray
    +733            The mass values.
    +734        abund : ndarray
    +735            The abundance values.
    +736        freq : ndarray or None
    +737            The frequency values, if available.
    +738        current_index : int
    +739            The index of the current peak.
    +740
    +741
    +742        Returns
    +743        -------
    +744        float
    +745            The m/z value of the peak apex.
    +746        float
    +747            The frequency value of the peak apex, if available.
    +748        float
    +749            The abundance value of the peak apex.
    +750
    +751        """
    +752        # calc prominence
    +753        # peak_indexes = self.check_prominence(abund, current_index, len_abundance, peak_height_diff )
    +754
    +755        # if not peak_indexes:
    +756
    +757        #    return None, None, None, None
    +758
    +759        # else:
    +760
    +761        # fit parabola to three most abundant datapoints
    +762        list_mass = [
    +763            mass[current_index - 1],
    +764            mass[current_index],
    +765            mass[current_index + 1],
    +766        ]
    +767        list_y = [
    +768            abund[current_index - 1],
    +769            abund[current_index],
    +770            abund[current_index + 1],
    +771        ]
    +772
    +773        if self.mspeaks_settings.legacy_centroid_polyfit:
    +774            z = polyfit(list_mass, list_y, 2)
    +775            a = z[0]
    +776            b = z[1]
    +777        else:
    +778            a, b, c = self.algebraic_quadratic(list_mass, list_y)
    +779
    +780        calculated = -b / (2 * a)
    +781
    +782        if calculated < 1 or int(calculated) != int(list_mass[1]):
    +783            mz_exp_centroid = list_mass[1]
    +784
    +785        else:
    +786            mz_exp_centroid = calculated
    +787
    +788        if (
    +789            self.label == Labels.bruker_frequency
    +790            or self.label == Labels.midas_frequency
    +791        ):
    +792            # fit parabola to three most abundant frequency datapoints
    +793            list_freq = [
    +794                freq[current_index - 1],
    +795                freq[current_index],
    +796                freq[current_index + 1],
    +797            ]
    +798            if self.mspeaks_settings.legacy_centroid_polyfit:
    +799                z = polyfit(list_mass, list_y, 2)
    +800                a = z[0]
    +801                b = z[1]
    +802            else:
    +803                a, b, c = self.algebraic_quadratic(list_mass, list_y)
    +804
    +805            calculated_freq = -b / (2 * a)
    +806
    +807            if calculated_freq < 1 or int(calculated_freq) != freq[current_index]:
    +808                freq_centr = list_freq[1]
     809
    -810        peak_indexes = self.check_prominence(abund, current_index, len_abundance, peak_height_diff )
    -811        
    -812        if not peak_indexes:        
    -813
    -814            return None, None, None
    -815        
    -816        else:    
    -817            
    -818            return mass[current_index], abund[current_index], peak_indexes
    -819
    -820    def calc_centroid_legacy(self, mass, abund, freq):
    -821        """ Legacy centroid calculation
    -822        Deprecated - for deletion.
    -823        
    -824        """
    -825        warnings.warn("Legacy centroid calculation is deprecated. Please use the new centroid calculation method.")
    -826        pass
    -827        if False:
    -828            len_abundance = len(abund)
    -829            
    -830            max_abundance = max(abund)
    -831            
    -832            peak_height_diff = lambda hi, li : ((abund[hi] - abund[li]) / max_abundance )*100
    -833
    -834            abundance_threshold, factor = self.get_threshold(abund)
    -835            #print(abundance_threshold, factor)
    -836            # find indices of all peaks
    -837            dy = abund[1:] - abund[:-1]
    -838            
    -839            #replaces nan for infi nity
    -840            indices_nan = where(isnan(abund))[0]
    -841            
    -842            if indices_nan.size:
    -843                
    -844                abund[indices_nan] = inf
    -845                dy[where(isnan(dy))[0]] = inf
    -846            
    -847            indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    -848            
    -849            # noise threshold
    -850            if indexes.size and abundance_threshold is not None:
    -851                indexes = indexes[abund[indexes]/factor >= abundance_threshold]
    -852            # filter out 'peaks' within 3 points of the spectrum limits
    -853            #remove entries within 3 points of upper limit
    -854            indexes = [x for x in indexes if (len_abundance-x)>3]
    -855            #remove entries within 3 points of zero
    -856            indexes = [x for x in indexes if x>3]
    -857        
    -858            for current_index in indexes: 
    -859                
    -860                if self.label == Labels.simulated_profile: 
    +810            else:
    +811                freq_centr = calculated_freq
    +812
    +813        else:
    +814            freq_centr = None
    +815
    +816        if self.mspeaks_settings.legacy_centroid_polyfit:
    +817            abundance_centroid = abund[current_index]
    +818        else:
    +819            abundance_centroid = a * mz_exp_centroid**2 + b * mz_exp_centroid + c
    +820
    +821        return mz_exp_centroid, freq_centr, abundance_centroid
    +822
    +823    def check_prominence(
    +824        self, abun, current_index, len_abundance, peak_height_diff
    +825    ) -> tuple or False:
    +826        """Check the prominence of a peak.
    +827
    +828        Parameters
    +829        ----------
    +830        abun : ndarray
    +831            The abundance values.
    +832        current_index : int
    +833            The index of the current peak.
    +834        len_abundance : int
    +835            The length of the abundance array.
    +836        peak_height_diff : function
    +837            The function to calculate the peak height difference.
    +838
    +839        Returns
    +840        -------
    +841        tuple or False
    +842            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    +843            Otherwise, False.
    +844
    +845        """
    +846
    +847        final_index = self.find_minima(current_index, abun, len_abundance, right=True)
    +848
    +849        start_index = self.find_minima(current_index, abun, len_abundance, right=False)
    +850
    +851        peak_indexes = (current_index - 1, current_index, current_index + 1)
    +852
    +853        if (
    +854            min(
    +855                peak_height_diff(current_index, start_index),
    +856                peak_height_diff(current_index, final_index),
    +857            )
    +858            > self.mspeaks_settings.peak_min_prominence_percent
    +859        ):
    +860            return peak_indexes
     861
    -862                    mz_exp_centroid, intes_centr, peak_indexes = self.use_the_max(mass, abund, current_index, len_abundance, peak_height_diff)
    -863                    if mz_exp_centroid:
    -864                        
    -865                        peak_resolving_power = self.calculate_resolving_power( abund, mass, current_index)
    -866                        s2n = intes_centr/self.baseline_noise_std
    -867                        freq_centr = None
    -868                        self.add_mspeak(self.polarity, mz_exp_centroid, abund[current_index] , peak_resolving_power, s2n, peak_indexes, exp_freq=freq_centr, ms_parent=self)
    -869                
    -870                else:
    -871                
    -872                    mz_exp_centroid, freq_centr, intes_centr, peak_indexes = self.find_apex_fit_quadratic(mass, abund, freq, current_index, len_abundance, peak_height_diff)
    -873                    if mz_exp_centroid:
    -874                        try:
    -875                            peak_resolving_power = self.calculate_resolving_power( abund, mass, current_index)
    -876                        except IndexError: 
    -877                            print('index error, skipping peak')
    -878                            continue
    -879                        
    -880                        s2n = intes_centr/self.baseline_noise_std
    -881                        self.add_mspeak(self.polarity, mz_exp_centroid, abund[current_index] , peak_resolving_power, s2n, peak_indexes, exp_freq=freq_centr, ms_parent=self)
    +862        else:
    +863            return False
    +864
    +865    def use_the_max(self, mass, abund, current_index, len_abundance, peak_height_diff):
    +866        """Use the max peak height as the centroid
    +867
    +868        Parameters
    +869        ----------
    +870        mass : ndarray
    +871            The mass values.
    +872        abund : ndarray
    +873            The abundance values.
    +874        current_index : int
    +875            The index of the current peak.
    +876        len_abundance : int
    +877            The length of the abundance array.
    +878        peak_height_diff : function
    +879            The function to calculate the peak height difference.
    +880
    +881        Returns
    +882        -------
    +883        float
    +884            The m/z value of the peak apex.
    +885        float
    +886            The abundance value of the peak apex.
    +887        tuple or None
    +888            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    +889            Otherwise, None.
    +890        """
    +891
    +892        peak_indexes = self.check_prominence(
    +893            abund, current_index, len_abundance, peak_height_diff
    +894        )
    +895
    +896        if not peak_indexes:
    +897            return None, None, None
    +898
    +899        else:
    +900            return mass[current_index], abund[current_index], peak_indexes
    +901
    +902    def calc_centroid_legacy(self, mass, abund, freq):
    +903        """Legacy centroid calculation
    +904        Deprecated - for deletion.
    +905
    +906        """
    +907        warnings.warn(
    +908            "Legacy centroid calculation is deprecated. Please use the new centroid calculation method."
    +909        )
    +910        pass
    +911        if False:
    +912            len_abundance = len(abund)
    +913
    +914            max_abundance = max(abund)
    +915
    +916            peak_height_diff = (
    +917                lambda hi, li: ((abund[hi] - abund[li]) / max_abundance) * 100
    +918            )
    +919
    +920            abundance_threshold, factor = self.get_threshold(abund)
    +921            # print(abundance_threshold, factor)
    +922            # find indices of all peaks
    +923            dy = abund[1:] - abund[:-1]
    +924
    +925            # replaces nan for infi nity
    +926            indices_nan = where(isnan(abund))[0]
    +927
    +928            if indices_nan.size:
    +929                abund[indices_nan] = inf
    +930                dy[where(isnan(dy))[0]] = inf
    +931
    +932            indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +933
    +934            # noise threshold
    +935            if indexes.size and abundance_threshold is not None:
    +936                indexes = indexes[abund[indexes] / factor >= abundance_threshold]
    +937            # filter out 'peaks' within 3 points of the spectrum limits
    +938            # remove entries within 3 points of upper limit
    +939            indexes = [x for x in indexes if (len_abundance - x) > 3]
    +940            # remove entries within 3 points of zero
    +941            indexes = [x for x in indexes if x > 3]
    +942
    +943            for current_index in indexes:
    +944                if self.label == Labels.simulated_profile:
    +945                    mz_exp_centroid, intes_centr, peak_indexes = self.use_the_max(
    +946                        mass, abund, current_index, len_abundance, peak_height_diff
    +947                    )
    +948                    if mz_exp_centroid:
    +949                        peak_resolving_power = self.calculate_resolving_power(
    +950                            abund, mass, current_index
    +951                        )
    +952                        s2n = intes_centr / self.baseline_noise_std
    +953                        freq_centr = None
    +954                        self.add_mspeak(
    +955                            self.polarity,
    +956                            mz_exp_centroid,
    +957                            abund[current_index],
    +958                            peak_resolving_power,
    +959                            s2n,
    +960                            peak_indexes,
    +961                            exp_freq=freq_centr,
    +962                            ms_parent=self,
    +963                        )
    +964
    +965                else:
    +966                    mz_exp_centroid, freq_centr, intes_centr, peak_indexes = (
    +967                        self.find_apex_fit_quadratic(
    +968                            mass,
    +969                            abund,
    +970                            freq,
    +971                            current_index,
    +972                            len_abundance,
    +973                            peak_height_diff,
    +974                        )
    +975                    )
    +976                    if mz_exp_centroid:
    +977                        try:
    +978                            peak_resolving_power = self.calculate_resolving_power(
    +979                                abund, mass, current_index
    +980                            )
    +981                        except IndexError:
    +982                            print("index error, skipping peak")
    +983                            continue
    +984
    +985                        s2n = intes_centr / self.baseline_noise_std
    +986                        self.add_mspeak(
    +987                            self.polarity,
    +988                            mz_exp_centroid,
    +989                            abund[current_index],
    +990                            peak_resolving_power,
    +991                            s2n,
    +992                            peak_indexes,
    +993                            exp_freq=freq_centr,
    +994                            ms_parent=self,
    +995                        )
     
    @@ -1941,27 +2157,27 @@
    Methods
    -
    58    def prepare_peak_picking_data(self):
    -59        """ Prepare the data for peak picking.
    -60
    -61        This function will prepare the m/z, abundance, and frequency data for peak picking according to the settings.
    -62
    -63        Returns
    -64        -------
    -65        mz : ndarray
    -66            The m/z axis.
    -67        abundance : ndarray
    -68            The abundance axis.
    -69        freq : ndarray or None
    -70            The frequency axis, if available.
    -71        """
    -72        # First apply cut_mz_domain_peak_picking
    -73        mz, abundance, freq = self.cut_mz_domain_peak_picking()
    +            
    70    def prepare_peak_picking_data(self):
    +71        """Prepare the data for peak picking.
    +72
    +73        This function will prepare the m/z, abundance, and frequency data for peak picking according to the settings.
     74
    -75        # Then extrapolate the axes for peak picking
    -76        if self.settings.picking_point_extrapolate > 0:
    -77            mz, abundance, freq = self.extrapolate_axes_for_pp(mz, abundance, freq)
    -78        return mz, abundance, freq
    +75        Returns
    +76        -------
    +77        mz : ndarray
    +78            The m/z axis.
    +79        abundance : ndarray
    +80            The abundance axis.
    +81        freq : ndarray or None
    +82            The frequency axis, if available.
    +83        """
    +84        # First apply cut_mz_domain_peak_picking
    +85        mz, abundance, freq = self.cut_mz_domain_peak_picking()
    +86
    +87        # Then extrapolate the axes for peak picking
    +88        if self.settings.picking_point_extrapolate > 0:
    +89            mz, abundance, freq = self.extrapolate_axes_for_pp(mz, abundance, freq)
    +90        return mz, abundance, freq
     
    @@ -1994,39 +2210,44 @@
    Returns
    -
     80    def cut_mz_domain_peak_picking(self):
    - 81        """
    - 82        Cut the m/z domain for peak picking.
    - 83
    - 84        Simplified function
    - 85        
    - 86        Returns
    - 87        -------
    - 88        mz_domain_X_low_cutoff : ndarray
    - 89            The m/z values within the specified range.
    - 90        mz_domain_low_Y_cutoff : ndarray
    - 91            The abundance values within the specified range.
    - 92        freq_domain_low_Y_cutoff : ndarray or None
    - 93            The frequency values within the specified range, if available.
    - 94
    - 95        """
    - 96        max_picking_mz = self.settings.max_picking_mz
    - 97        min_picking_mz = self.settings.min_picking_mz
    - 98        
    - 99        #min_start =  where(self.mz_exp_profile  > min_picking_mz)[0][0]
    -100        #max_final =  where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    -101        min_start =  searchsorted(a = self.mz_exp_profile, v = min_picking_mz)
    -102        max_final =  searchsorted(a = self.mz_exp_profile, v = max_picking_mz)
    -103
    -104        if self.has_frequency:
    -105
    -106            if self.freq_exp_profile.any():
    -107
    -108                return self.mz_exp_profile[min_start:max_final], self.abundance_profile[min_start:max_final], self.freq_exp_profile[min_start:max_final]
    -109
    -110        else:
    -111
    -112            return self.mz_exp_profile[min_start:max_final], self.abundance_profile[min_start:max_final], None
    +            
     92    def cut_mz_domain_peak_picking(self):
    + 93        """
    + 94        Cut the m/z domain for peak picking.
    + 95
    + 96        Simplified function
    + 97
    + 98        Returns
    + 99        -------
    +100        mz_domain_X_low_cutoff : ndarray
    +101            The m/z values within the specified range.
    +102        mz_domain_low_Y_cutoff : ndarray
    +103            The abundance values within the specified range.
    +104        freq_domain_low_Y_cutoff : ndarray or None
    +105            The frequency values within the specified range, if available.
    +106
    +107        """
    +108        max_picking_mz = self.settings.max_picking_mz
    +109        min_picking_mz = self.settings.min_picking_mz
    +110
    +111        # min_start =  where(self.mz_exp_profile  > min_picking_mz)[0][0]
    +112        # max_final =  where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    +113        min_start = searchsorted(a=self.mz_exp_profile, v=min_picking_mz)
    +114        max_final = searchsorted(a=self.mz_exp_profile, v=max_picking_mz)
    +115
    +116        if self.has_frequency:
    +117            if self.freq_exp_profile.any():
    +118                return (
    +119                    self.mz_exp_profile[min_start:max_final],
    +120                    self.abundance_profile[min_start:max_final],
    +121                    self.freq_exp_profile[min_start:max_final],
    +122                )
    +123
    +124        else:
    +125            return (
    +126                self.mz_exp_profile[min_start:max_final],
    +127                self.abundance_profile[min_start:max_final],
    +128                None,
    +129            )
     
    @@ -2059,43 +2280,53 @@
    Returns
    -
    115    def legacy_cut_mz_domain_peak_picking(self):
    -116        """
    -117        Cut the m/z domain for peak picking.
    -118        DEPRECATED
    -119        Returns
    -120        -------
    -121        mz_domain_X_low_cutoff : ndarray
    -122            The m/z values within the specified range.
    -123        mz_domain_low_Y_cutoff : ndarray
    -124            The abundance values within the specified range.
    -125        freq_domain_low_Y_cutoff : ndarray or None
    -126            The frequency values within the specified range, if available.
    -127
    -128        """
    -129        max_picking_mz = self.settings.max_picking_mz
    -130        min_picking_mz = self.settings.min_picking_mz
    -131        
    -132        min_final =  where(self.mz_exp_profile  > min_picking_mz)[-1][-1]
    -133        min_start =  where(self.mz_exp_profile  > min_picking_mz)[0][0]
    -134
    -135        mz_domain_X_low_cutoff, mz_domain_low_Y_cutoff,  = self.mz_exp_profile [min_start:min_final], self.abundance_profile[min_start:min_final]
    -136
    -137        max_final =  where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    -138        max_start =  where(self.mz_exp_profile < max_picking_mz)[0][0]
    -139
    -140        if self.has_frequency:
    -141
    -142            if self.freq_exp_profile.any():
    +            
    131    def legacy_cut_mz_domain_peak_picking(self):
    +132        """
    +133        Cut the m/z domain for peak picking.
    +134        DEPRECATED
    +135        Returns
    +136        -------
    +137        mz_domain_X_low_cutoff : ndarray
    +138            The m/z values within the specified range.
    +139        mz_domain_low_Y_cutoff : ndarray
    +140            The abundance values within the specified range.
    +141        freq_domain_low_Y_cutoff : ndarray or None
    +142            The frequency values within the specified range, if available.
     143
    -144                freq_domain_low_Y_cutoff = self.freq_exp_profile[min_start:min_final]
    -145
    -146
    -147                return mz_domain_X_low_cutoff[max_start:max_final], mz_domain_low_Y_cutoff[max_start:max_final], freq_domain_low_Y_cutoff[max_start:max_final]
    -148
    -149        else:
    +144        """
    +145        max_picking_mz = self.settings.max_picking_mz
    +146        min_picking_mz = self.settings.min_picking_mz
    +147
    +148        min_final = where(self.mz_exp_profile > min_picking_mz)[-1][-1]
    +149        min_start = where(self.mz_exp_profile > min_picking_mz)[0][0]
     150
    -151            return mz_domain_X_low_cutoff[max_start:max_final], mz_domain_low_Y_cutoff[max_start:max_final], None
    +151        (
    +152            mz_domain_X_low_cutoff,
    +153            mz_domain_low_Y_cutoff,
    +154        ) = (
    +155            self.mz_exp_profile[min_start:min_final],
    +156            self.abundance_profile[min_start:min_final],
    +157        )
    +158
    +159        max_final = where(self.mz_exp_profile < max_picking_mz)[-1][-1]
    +160        max_start = where(self.mz_exp_profile < max_picking_mz)[0][0]
    +161
    +162        if self.has_frequency:
    +163            if self.freq_exp_profile.any():
    +164                freq_domain_low_Y_cutoff = self.freq_exp_profile[min_start:min_final]
    +165
    +166                return (
    +167                    mz_domain_X_low_cutoff[max_start:max_final],
    +168                    mz_domain_low_Y_cutoff[max_start:max_final],
    +169                    freq_domain_low_Y_cutoff[max_start:max_final],
    +170                )
    +171
    +172        else:
    +173            return (
    +174                mz_domain_X_low_cutoff[max_start:max_final],
    +175                mz_domain_low_Y_cutoff[max_start:max_final],
    +176                None,
    +177            )
     
    @@ -2128,52 +2359,52 @@
    Returns
    -
    153    @staticmethod 
    -154    def extrapolate_axis(initial_array, pts):
    -155        """
    -156        This function will extrapolate an input array in both directions by N pts.
    -157
    -158        Parameters
    -159        ----------
    -160        initial_array : ndarray
    -161            The input array.
    -162        pts : int
    -163            The number of points to extrapolate.
    -164
    -165        Returns
    -166        -------
    -167        ndarray
    -168            The extrapolated array.
    -169
    -170        Notes
    -171        --------
    -172        This is a static method.        
    -173        """
    -174        initial_array_len = len(initial_array)
    -175        right_delta = initial_array[-1] - initial_array[-2]  
    -176        left_delta = initial_array[1] - initial_array[0]  
    -177        
    -178        # Create an array with extra space for extrapolation
    -179        pad_array = zeros(initial_array_len + 2 * pts)
    -180        
    -181        # Copy original array into the middle of the padded array
    -182        pad_array[pts:pts + initial_array_len] = initial_array
    -183        
    -184        # Extrapolate the right side
    -185        for pt in range(pts):
    -186            final_value = initial_array[-1]
    -187            value_to_add = right_delta * (pt + 1)
    -188            new_value = final_value + value_to_add
    -189            pad_array[initial_array_len + pts + pt] = new_value
    -190        
    -191        # Extrapolate the left side
    -192        for pt in range(pts):
    -193            first_value = initial_array[0]
    -194            value_to_subtract = left_delta * (pt + 1)
    -195            new_value = first_value - value_to_subtract
    -196            pad_array[pts - pt - 1] = new_value
    -197        
    -198        return pad_array
    +            
    179    @staticmethod
    +180    def extrapolate_axis(initial_array, pts):
    +181        """
    +182        This function will extrapolate an input array in both directions by N pts.
    +183
    +184        Parameters
    +185        ----------
    +186        initial_array : ndarray
    +187            The input array.
    +188        pts : int
    +189            The number of points to extrapolate.
    +190
    +191        Returns
    +192        -------
    +193        ndarray
    +194            The extrapolated array.
    +195
    +196        Notes
    +197        --------
    +198        This is a static method.
    +199        """
    +200        initial_array_len = len(initial_array)
    +201        right_delta = initial_array[-1] - initial_array[-2]
    +202        left_delta = initial_array[1] - initial_array[0]
    +203
    +204        # Create an array with extra space for extrapolation
    +205        pad_array = zeros(initial_array_len + 2 * pts)
    +206
    +207        # Copy original array into the middle of the padded array
    +208        pad_array[pts : pts + initial_array_len] = initial_array
    +209
    +210        # Extrapolate the right side
    +211        for pt in range(pts):
    +212            final_value = initial_array[-1]
    +213            value_to_add = right_delta * (pt + 1)
    +214            new_value = final_value + value_to_add
    +215            pad_array[initial_array_len + pts + pt] = new_value
    +216
    +217        # Extrapolate the left side
    +218        for pt in range(pts):
    +219            first_value = initial_array[0]
    +220            value_to_subtract = left_delta * (pt + 1)
    +221            new_value = first_value - value_to_subtract
    +222            pad_array[pts - pt - 1] = new_value
    +223
    +224        return pad_array
     
    @@ -2212,50 +2443,50 @@
    Notes
    -
    200    def extrapolate_axes_for_pp(self, mz=None, abund=None, freq=None):
    -201        """ Extrapolate the m/z axis and fill the abundance axis with 0s.
    -202
    -203        Parameters
    -204        ----------
    -205        mz : ndarray or None
    -206            The m/z axis, if available. If None, the experimental m/z axis is used.
    -207        abund : ndarray or None
    -208            The abundance axis, if available. If None, the experimental abundance axis is used.
    -209        freq : ndarray or None
    -210            The frequency axis, if available. If None, the experimental frequency axis is used.
    -211
    -212        Returns
    -213        -------
    -214        mz : ndarray
    -215            The extrapolated m/z axis.
    -216        abund : ndarray
    -217            The abundance axis with 0s filled.
    -218        freq : ndarray or None
    -219            The extrapolated frequency axis, if available.
    -220
    -221        Notes
    -222        --------
    -223        This function will extrapolate the mz axis by the number of datapoints specified in the settings,
    -224        and fill the abundance axis with 0s. 
    -225        This should prevent peak picking issues at the spectrum edge.
    -226
    -227        """ 
    -228        # Check if the input arrays are provided
    -229        if mz is None or abund is None:
    -230            mz, abund = self.mz_exp_profile, self.abundance_profile
    -231            if self.has_frequency:
    -232                freq = self.freq_exp_profile
    -233            else: 
    -234                freq = None
    -235        pts = self.settings.picking_point_extrapolate
    -236        if pts == 0:
    -237            return mz, abund, freq
    -238        
    -239        mz = self.extrapolate_axis(mz, pts)
    -240        abund = pad(abund, (pts, pts), mode = 'constant', constant_values=(0,0))
    -241        if freq is not None:
    -242            freq = self.extrapolate_axis(freq, pts)
    -243        return mz, abund, freq
    +            
    226    def extrapolate_axes_for_pp(self, mz=None, abund=None, freq=None):
    +227        """Extrapolate the m/z axis and fill the abundance axis with 0s.
    +228
    +229        Parameters
    +230        ----------
    +231        mz : ndarray or None
    +232            The m/z axis, if available. If None, the experimental m/z axis is used.
    +233        abund : ndarray or None
    +234            The abundance axis, if available. If None, the experimental abundance axis is used.
    +235        freq : ndarray or None
    +236            The frequency axis, if available. If None, the experimental frequency axis is used.
    +237
    +238        Returns
    +239        -------
    +240        mz : ndarray
    +241            The extrapolated m/z axis.
    +242        abund : ndarray
    +243            The abundance axis with 0s filled.
    +244        freq : ndarray or None
    +245            The extrapolated frequency axis, if available.
    +246
    +247        Notes
    +248        --------
    +249        This function will extrapolate the mz axis by the number of datapoints specified in the settings,
    +250        and fill the abundance axis with 0s.
    +251        This should prevent peak picking issues at the spectrum edge.
    +252
    +253        """
    +254        # Check if the input arrays are provided
    +255        if mz is None or abund is None:
    +256            mz, abund = self.mz_exp_profile, self.abundance_profile
    +257            if self.has_frequency:
    +258                freq = self.freq_exp_profile
    +259            else:
    +260                freq = None
    +261        pts = self.settings.picking_point_extrapolate
    +262        if pts == 0:
    +263            return mz, abund, freq
    +264
    +265        mz = self.extrapolate_axis(mz, pts)
    +266        abund = pad(abund, (pts, pts), mode="constant", constant_values=(0, 0))
    +267        if freq is not None:
    +268            freq = self.extrapolate_axis(freq, pts)
    +269        return mz, abund, freq
     
    @@ -2286,7 +2517,7 @@
    Returns
    Notes

    This function will extrapolate the mz axis by the number of datapoints specified in the settings, -and fill the abundance axis with 0s. +and fill the abundance axis with 0s. This should prevent peak picking issues at the spectrum edge.

    @@ -2303,29 +2534,30 @@

    Notes
    -
    245    def do_peak_picking(self):
    -246        """ Perform peak picking.
    -247
    -248        """
    -249        mz, abundance, freq = self.prepare_peak_picking_data()
    -250        
    -251        if self.label == Labels.bruker_frequency or self.label == Labels.midas_frequency:
    -252            self.calc_centroid(mz, abundance, freq)
    -253
    -254        elif self.label == Labels.thermo_profile:
    -255            self.calc_centroid(mz, abundance, freq)
    -256
    -257        elif self.label == Labels.bruker_profile:
    -258            self.calc_centroid(mz, abundance, freq)
    -259
    -260        elif self.label == Labels.booster_profile:
    -261            self.calc_centroid(mz, abundance, freq)
    -262
    -263        elif self.label == Labels.simulated_profile:
    -264            self.calc_centroid(mz, abundance, freq)
    -265
    -266        else: 
    -267            raise Exception("Unknow mass spectrum type", self.label)
    +            
    271    def do_peak_picking(self):
    +272        """Perform peak picking."""
    +273        mz, abundance, freq = self.prepare_peak_picking_data()
    +274
    +275        if (
    +276            self.label == Labels.bruker_frequency
    +277            or self.label == Labels.midas_frequency
    +278        ):
    +279            self.calc_centroid(mz, abundance, freq)
    +280
    +281        elif self.label == Labels.thermo_profile:
    +282            self.calc_centroid(mz, abundance, freq)
    +283
    +284        elif self.label == Labels.bruker_profile:
    +285            self.calc_centroid(mz, abundance, freq)
    +286
    +287        elif self.label == Labels.booster_profile:
    +288            self.calc_centroid(mz, abundance, freq)
    +289
    +290        elif self.label == Labels.simulated_profile:
    +291            self.calc_centroid(mz, abundance, freq)
    +292
    +293        else:
    +294            raise Exception("Unknow mass spectrum type", self.label)
     
    @@ -2345,47 +2577,50 @@
    Notes
    -
    269    def find_minima(self, apex_index, abundance, len_abundance, right=True):
    -270        """ Find the minima of a peak.
    -271
    -272        Parameters
    -273        ----------
    -274        apex_index : int
    -275            The index of the peak apex.
    -276        abundance : ndarray
    -277            The abundance values.
    -278        len_abundance : int
    -279            The length of the abundance array.
    -280        right : bool, optional
    -281            Flag indicating whether to search for minima to the right of the apex (default is True).
    -282
    -283        Returns
    -284        -------
    -285        int
    -286            The index of the minima.
    -287
    -288        """
    -289        j = apex_index
    -290        
    -291        if right: minima = abundance[j] > abundance[j+1]
    -292        else: minima = abundance[j] > abundance[j-1]
    -293
    -294        while minima:
    -295            
    -296            if j == 1 or j == len_abundance -2:
    -297                break
    -298            
    -299            if right: 
    -300                j += 1
    -301
    -302                minima = abundance[j] >= abundance[j+1]
    -303
    -304            else: 
    -305                j -= 1
    -306                minima = abundance[j] >= abundance[j-1]
    -307        
    -308        if right: return j
    -309        else: return j
    +            
    296    def find_minima(self, apex_index, abundance, len_abundance, right=True):
    +297        """Find the minima of a peak.
    +298
    +299        Parameters
    +300        ----------
    +301        apex_index : int
    +302            The index of the peak apex.
    +303        abundance : ndarray
    +304            The abundance values.
    +305        len_abundance : int
    +306            The length of the abundance array.
    +307        right : bool, optional
    +308            Flag indicating whether to search for minima to the right of the apex (default is True).
    +309
    +310        Returns
    +311        -------
    +312        int
    +313            The index of the minima.
    +314
    +315        """
    +316        j = apex_index
    +317
    +318        if right:
    +319            minima = abundance[j] > abundance[j + 1]
    +320        else:
    +321            minima = abundance[j] > abundance[j - 1]
    +322
    +323        while minima:
    +324            if j == 1 or j == len_abundance - 2:
    +325                break
    +326
    +327            if right:
    +328                j += 1
    +329
    +330                minima = abundance[j] >= abundance[j + 1]
    +331
    +332            else:
    +333                j -= 1
    +334                minima = abundance[j] >= abundance[j - 1]
    +335
    +336        if right:
    +337            return j
    +338        else:
    +339            return j
     
    @@ -2425,49 +2660,49 @@
    Returns
    -
    311    @staticmethod
    -312    def linear_fit_calc(intes, massa, index_term, index_sign):
    -313        """
    -314        Algebraic solution to a linear fit - roughly 25-50x faster than numpy polyfit when passing only two vals and doing a 1st order fit
    -315
    -316        Parameters
    -317        ----------
    -318        intes : ndarray
    -319            The intensity values.
    -320        massa : ndarray
    -321            The mass values.
    -322        index_term : int
    -323            The index of the current term.
    -324        index_sign : str
    -325            The index sign
    -326        
    -327        Returns
    -328        -------
    -329        ndarray
    -330            The coefficients of the linear fit.
    -331        
    -332        Notes
    -333        --------
    -334        This is a static method.
    -335        """
    -336        if index_sign == '+':
    -337            x1, x2 = massa[index_term], massa[index_term + 1]
    -338            y1, y2 = intes[index_term], intes[index_term + 1]
    -339        elif index_sign =='-':
    -340            x1, x2 = massa[index_term], massa[index_term - 1]
    -341            y1, y2 = intes[index_term], intes[index_term - 1]
    -342        else:
    -343            warnings.warn('error in linear fit calc, unknown index sign')
    -344        
    -345        # Calculate the slope (m)
    -346        slope = (y2 - y1) / (x2 - x1)
    -347        
    -348        # Calculate the intercept (b)
    -349        intercept = y1 - slope * x1
    -350        
    -351        # The coefficients array would be [slope, intercept]
    -352        coefficients = array([slope, intercept])
    -353        return coefficients
    +            
    341    @staticmethod
    +342    def linear_fit_calc(intes, massa, index_term, index_sign):
    +343        """
    +344        Algebraic solution to a linear fit - roughly 25-50x faster than numpy polyfit when passing only two vals and doing a 1st order fit
    +345
    +346        Parameters
    +347        ----------
    +348        intes : ndarray
    +349            The intensity values.
    +350        massa : ndarray
    +351            The mass values.
    +352        index_term : int
    +353            The index of the current term.
    +354        index_sign : str
    +355            The index sign
    +356
    +357        Returns
    +358        -------
    +359        ndarray
    +360            The coefficients of the linear fit.
    +361
    +362        Notes
    +363        --------
    +364        This is a static method.
    +365        """
    +366        if index_sign == "+":
    +367            x1, x2 = massa[index_term], massa[index_term + 1]
    +368            y1, y2 = intes[index_term], intes[index_term + 1]
    +369        elif index_sign == "-":
    +370            x1, x2 = massa[index_term], massa[index_term - 1]
    +371            y1, y2 = intes[index_term], intes[index_term - 1]
    +372        else:
    +373            warnings.warn("error in linear fit calc, unknown index sign")
    +374
    +375        # Calculate the slope (m)
    +376        slope = (y2 - y1) / (x2 - x1)
    +377
    +378        # Calculate the intercept (b)
    +379        intercept = y1 - slope * x1
    +380
    +381        # The coefficients array would be [slope, intercept]
    +382        coefficients = array([slope, intercept])
    +383        return coefficients
     
    @@ -2510,121 +2745,129 @@
    Notes
    -
    355    def calculate_resolving_power(self, intes, massa, current_index):
    -356        """ Calculate the resolving power of a peak.
    -357
    -358        Parameters
    -359        ----------
    -360        intes : ndarray
    -361            The intensity values.
    -362        massa : ndarray
    -363            The mass values.
    -364        current_index : int
    -365            The index of the current peak.
    -366
    -367        Returns
    -368        -------
    -369        float
    -370            The resolving power of the peak.
    -371
    -372        Notes
    -373        --------
    -374        This is a conservative calculation of resolving power,
    -375        the peak need to be resolved at least at the half-maximum magnitude,
    -376        otherwise, the combined full width at half maximum is used to calculate resolving power.
    -377
    -378        """
    -379
    -380        peak_height = intes[current_index]
    -381        target_peak_height = peak_height/2
    -382
    -383        peak_height_minus = peak_height
    -384        peak_height_plus = peak_height
    -385        
    -386        # There are issues when a peak is at the high or low limit of a spectrum in finding its local minima and maxima
    -387        # This solution will return nan for resolving power when a peak is possibly too close to an edge to avoid the issue
    -388        
    -389        if current_index <5:
    -390            warnings.warn("peak at low spectrum edge, returning no resolving power")
    -391            return nan
    -392        elif abs(current_index-len(intes))<5:
    -393            warnings.warn("peak at high spectrum edge, returning no resolving power")
    -394            return nan
    -395        else:
    -396            pass
    -397
    -398        index_minus = current_index
    -399        while peak_height_minus  >= target_peak_height:
    -400
    -401            index_minus = index_minus -1
    -402            if index_minus < 0:
    -403                warnings.warn('Res. calc. warning - peak index minus adjacent to spectrum edge \n \
    -404                        Zeroing the first 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported \n \
    -405                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)')
    -406                # Pad the first 5 data points with zeros and restart the loop
    -407                intes[:5] = 0
    -408                peak_height_minus = target_peak_height
    -409                index_minus = current_index            
    -410            else:
    -411                peak_height_minus = intes[index_minus]
    +            
    385    def calculate_resolving_power(self, intes, massa, current_index):
    +386        """Calculate the resolving power of a peak.
    +387
    +388        Parameters
    +389        ----------
    +390        intes : ndarray
    +391            The intensity values.
    +392        massa : ndarray
    +393            The mass values.
    +394        current_index : int
    +395            The index of the current peak.
    +396
    +397        Returns
    +398        -------
    +399        float
    +400            The resolving power of the peak.
    +401
    +402        Notes
    +403        --------
    +404        This is a conservative calculation of resolving power,
    +405        the peak need to be resolved at least at the half-maximum magnitude,
    +406        otherwise, the combined full width at half maximum is used to calculate resolving power.
    +407
    +408        """
    +409
    +410        peak_height = intes[current_index]
    +411        target_peak_height = peak_height / 2
     412
    -413        if self.mspeaks_settings.legacy_centroid_polyfit:
    -414            x = [ massa[index_minus],  massa[index_minus+1]]
    -415            y = [ intes[index_minus],  intes[index_minus+1]]
    -416            coefficients = polyfit(x, y, 1)
    -417        else:
    -418            coefficients = self.linear_fit_calc(intes, massa, index_minus,index_sign='+')
    -419
    -420        a = coefficients[0]
    -421        b = coefficients[1]
    -422        if self.mspeaks_settings.legacy_resolving_power:
    -423            y_intercept =  intes[index_minus] + ((intes[index_minus+1] - intes[index_minus])/2)
    -424        else:
    -425            y_intercept =  target_peak_height
    -426        massa1 = (y_intercept -b)/a
    +413        peak_height_minus = peak_height
    +414        peak_height_plus = peak_height
    +415
    +416        # There are issues when a peak is at the high or low limit of a spectrum in finding its local minima and maxima
    +417        # This solution will return nan for resolving power when a peak is possibly too close to an edge to avoid the issue
    +418
    +419        if current_index < 5:
    +420            warnings.warn("peak at low spectrum edge, returning no resolving power")
    +421            return nan
    +422        elif abs(current_index - len(intes)) < 5:
    +423            warnings.warn("peak at high spectrum edge, returning no resolving power")
    +424            return nan
    +425        else:
    +426            pass
     427
    -428        index_plus = current_index
    -429        while peak_height_plus  >= target_peak_height:
    -430
    -431            index_plus = index_plus + 1
    -432               
    -433            try: 
    -434                peak_height_plus = intes[index_plus]
    -435            except IndexError:
    -436                warnings.warn('Res. calc. warning - peak index plus adjacent to spectrum edge \n \
    -437                        Zeroing the last 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported\
    -438                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)')
    -439                # Pad the first 5 data points with zeros and restart the loop
    -440                intes[-5:] = 0
    -441                peak_height_plus = target_peak_height
    -442                index_plus = current_index 
    +428        index_minus = current_index
    +429        while peak_height_minus >= target_peak_height:
    +430            index_minus = index_minus - 1
    +431            if index_minus < 0:
    +432                warnings.warn(
    +433                    "Res. calc. warning - peak index minus adjacent to spectrum edge \n \
    +434                        Zeroing the first 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported \n \
    +435                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)"
    +436                )
    +437                # Pad the first 5 data points with zeros and restart the loop
    +438                intes[:5] = 0
    +439                peak_height_minus = target_peak_height
    +440                index_minus = current_index
    +441            else:
    +442                peak_height_minus = intes[index_minus]
     443
     444        if self.mspeaks_settings.legacy_centroid_polyfit:
    -445            x = [massa[index_plus],  massa[index_plus - 1]]
    -446            y = [intes[index_plus],  intes[index_plus - 1]]
    +445            x = [massa[index_minus], massa[index_minus + 1]]
    +446            y = [intes[index_minus], intes[index_minus + 1]]
     447            coefficients = polyfit(x, y, 1)
     448        else:
    -449            coefficients = self.linear_fit_calc(intes, massa, index_plus,index_sign='-')
    -450
    -451        a = coefficients[0]
    -452        b = coefficients[1]
    -453
    -454        if self.mspeaks_settings.legacy_resolving_power:
    -455            y_intercept =  intes[index_plus - 1] + ((intes[index_plus] - intes[index_plus - 1])/2)
    -456        else:
    -457            y_intercept =  target_peak_height
    -458
    -459        massa2 = (y_intercept -b)/a
    -460
    -461        if massa1 > massa2:
    +449            coefficients = self.linear_fit_calc(
    +450                intes, massa, index_minus, index_sign="+"
    +451            )
    +452
    +453        a = coefficients[0]
    +454        b = coefficients[1]
    +455        if self.mspeaks_settings.legacy_resolving_power:
    +456            y_intercept = intes[index_minus] + (
    +457                (intes[index_minus + 1] - intes[index_minus]) / 2
    +458            )
    +459        else:
    +460            y_intercept = target_peak_height
    +461        massa1 = (y_intercept - b) / a
     462
    -463            resolvingpower =  massa[current_index]/(massa1-massa2)
    -464
    -465        else:
    +463        index_plus = current_index
    +464        while peak_height_plus >= target_peak_height:
    +465            index_plus = index_plus + 1
     466
    -467            resolvingpower =  massa[current_index]/(massa2-massa1)
    -468
    -469        return resolvingpower
    +467            try:
    +468                peak_height_plus = intes[index_plus]
    +469            except IndexError:
    +470                warnings.warn(
    +471                    "Res. calc. warning - peak index plus adjacent to spectrum edge \n \
    +472                        Zeroing the last 5 data points of abundance. Peaks at spectrum edge may be incorrectly reported\
    +473                        Perhaps try to increase picking_point_extrapolate (e.g. to 3)"
    +474                )
    +475                # Pad the first 5 data points with zeros and restart the loop
    +476                intes[-5:] = 0
    +477                peak_height_plus = target_peak_height
    +478                index_plus = current_index
    +479
    +480        if self.mspeaks_settings.legacy_centroid_polyfit:
    +481            x = [massa[index_plus], massa[index_plus - 1]]
    +482            y = [intes[index_plus], intes[index_plus - 1]]
    +483            coefficients = polyfit(x, y, 1)
    +484        else:
    +485            coefficients = self.linear_fit_calc(
    +486                intes, massa, index_plus, index_sign="-"
    +487            )
    +488
    +489        a = coefficients[0]
    +490        b = coefficients[1]
    +491
    +492        if self.mspeaks_settings.legacy_resolving_power:
    +493            y_intercept = intes[index_plus - 1] + (
    +494                (intes[index_plus] - intes[index_plus - 1]) / 2
    +495            )
    +496        else:
    +497            y_intercept = target_peak_height
    +498
    +499        massa2 = (y_intercept - b) / a
    +500
    +501        if massa1 > massa2:
    +502            resolvingpower = massa[current_index] / (massa1 - massa2)
    +503
    +504        else:
    +505            resolvingpower = massa[current_index] / (massa2 - massa1)
    +506
    +507        return resolvingpower
     
    @@ -2667,39 +2910,37 @@
    Notes
    -
    471    def cal_minima(self, mass, abun):
    -472        """ Calculate the minima of a peak.
    -473
    -474        Parameters
    -475        ----------
    -476        mass : ndarray
    -477            The mass values.
    -478        abun : ndarray
    -479            The abundance values.
    -480
    -481        Returns
    -482        -------
    -483        ndarray or None
    -484            The mass values at the minima, if found.
    -485
    -486        """
    -487        abun = -abun
    -488
    -489        dy = abun[1:] - abun[:-1]
    -490        
    -491        # replaces nan for infinity
    -492        indices_nan = where(isnan(abun))[0]
    -493        
    -494        if indices_nan.size:
    -495            
    -496            abun[indices_nan] = inf
    -497            dy[where(isnan(dy))[0]] = inf
    -498        
    -499        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    -500
    -501        if indexes.size:
    -502            
    -503            return mass[indexes], abun[indexes]
    +            
    509    def cal_minima(self, mass, abun):
    +510        """Calculate the minima of a peak.
    +511
    +512        Parameters
    +513        ----------
    +514        mass : ndarray
    +515            The mass values.
    +516        abun : ndarray
    +517            The abundance values.
    +518
    +519        Returns
    +520        -------
    +521        ndarray or None
    +522            The mass values at the minima, if found.
    +523
    +524        """
    +525        abun = -abun
    +526
    +527        dy = abun[1:] - abun[:-1]
    +528
    +529        # replaces nan for infinity
    +530        indices_nan = where(isnan(abun))[0]
    +531
    +532        if indices_nan.size:
    +533            abun[indices_nan] = inf
    +534            dy[where(isnan(dy))[0]] = inf
    +535
    +536        indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +537
    +538        if indexes.size:
    +539            return mass[indexes], abun[indexes]
     
    @@ -2734,65 +2975,83 @@
    Returns
    -
    505    def calc_centroid(self, mass, abund, freq):
    -506        """ Calculate the centroid of a peak.
    -507
    -508        Parameters
    -509        ----------
    -510        mass : ndarray
    -511            The mass values.
    -512        abund : ndarray
    -513            The abundance values.
    -514        freq : ndarray or None
    -515            The frequency values, if available.
    -516
    -517        Returns
    -518        -------
    -519        None
    -520
    -521        """
    -522        
    -523        max_height = self.mspeaks_settings.peak_height_max_percent
    -524        max_prominence = self.mspeaks_settings.peak_max_prominence_percent
    -525        min_peak_datapoints = self.mspeaks_settings.min_peak_datapoints
    -526        peak_derivative_threshold = self.mspeaks_settings.peak_derivative_threshold
    -527        max_abun = max(abund)
    -528        peak_height_diff = lambda hi, li : ((abund[hi] - abund[li]) / max_abun ) * 100
    -529                    
    -530        domain = mass
    -531        signal = abund
    -532        len_signal = len(signal)
    -533        
    -534        signal_threshold, factor = self.get_threshold(abund)
    -535        max_signal = factor
    -536
    -537        correct_baseline = False
    -538
    -539        include_indexes = sp.peak_picking_first_derivative(domain, signal, max_height, max_prominence, max_signal, 
    -540                                                           min_peak_datapoints,
    -541                                                           peak_derivative_threshold,
    -542                                                           signal_threshold=signal_threshold, 
    -543                                                           correct_baseline=correct_baseline, 
    -544                                                           abun_norm=1,
    -545                                                           plot_res=False)
    -546
    -547        for indexes_tuple in include_indexes:
    -548            
    -549            apex_index = indexes_tuple[1]
    -550
    -551            peak_indexes = self.check_prominence(abund, apex_index, len_signal, peak_height_diff )
    +            
    541    def calc_centroid(self, mass, abund, freq):
    +542        """Calculate the centroid of a peak.
    +543
    +544        Parameters
    +545        ----------
    +546        mass : ndarray
    +547            The mass values.
    +548        abund : ndarray
    +549            The abundance values.
    +550        freq : ndarray or None
    +551            The frequency values, if available.
     552
    -553            if peak_indexes:
    -554                
    -555                mz_exp_centroid, freq_centr, intes_centr = self.find_apex_fit_quadratic(mass, abund, freq, apex_index)
    +553        Returns
    +554        -------
    +555        None
     556
    -557                if mz_exp_centroid:
    -558                                   
    -559                    peak_resolving_power = self.calculate_resolving_power( abund, mass, apex_index)
    -560                    s2n = intes_centr/self.baseline_noise_std
    -561                    self.add_mspeak(self.polarity, mz_exp_centroid, abund[apex_index] , peak_resolving_power, s2n, indexes_tuple, exp_freq=freq_centr, ms_parent=self)
    -562                #pyplot.plot(domain[start_index: final_index + 1], signal[start_index:final_index + 1], c='black')
    -563                #pyplot.show()
    +557        """
    +558
    +559        max_height = self.mspeaks_settings.peak_height_max_percent
    +560        max_prominence = self.mspeaks_settings.peak_max_prominence_percent
    +561        min_peak_datapoints = self.mspeaks_settings.min_peak_datapoints
    +562        peak_derivative_threshold = self.mspeaks_settings.peak_derivative_threshold
    +563        max_abun = max(abund)
    +564        peak_height_diff = lambda hi, li: ((abund[hi] - abund[li]) / max_abun) * 100
    +565
    +566        domain = mass
    +567        signal = abund
    +568        len_signal = len(signal)
    +569
    +570        signal_threshold, factor = self.get_threshold(abund)
    +571        max_signal = factor
    +572
    +573        correct_baseline = False
    +574
    +575        include_indexes = sp.peak_picking_first_derivative(
    +576            domain,
    +577            signal,
    +578            max_height,
    +579            max_prominence,
    +580            max_signal,
    +581            min_peak_datapoints,
    +582            peak_derivative_threshold,
    +583            signal_threshold=signal_threshold,
    +584            correct_baseline=correct_baseline,
    +585            abun_norm=1,
    +586            plot_res=False,
    +587        )
    +588
    +589        for indexes_tuple in include_indexes:
    +590            apex_index = indexes_tuple[1]
    +591
    +592            peak_indexes = self.check_prominence(
    +593                abund, apex_index, len_signal, peak_height_diff
    +594            )
    +595
    +596            if peak_indexes:
    +597                mz_exp_centroid, freq_centr, intes_centr = self.find_apex_fit_quadratic(
    +598                    mass, abund, freq, apex_index
    +599                )
    +600
    +601                if mz_exp_centroid:
    +602                    peak_resolving_power = self.calculate_resolving_power(
    +603                        abund, mass, apex_index
    +604                    )
    +605                    s2n = intes_centr / self.baseline_noise_std
    +606                    self.add_mspeak(
    +607                        self.polarity,
    +608                        mz_exp_centroid,
    +609                        abund[apex_index],
    +610                        peak_resolving_power,
    +611                        s2n,
    +612                        indexes_tuple,
    +613                        exp_freq=freq_centr,
    +614                        ms_parent=self,
    +615                    )
    +616                # pyplot.plot(domain[start_index: final_index + 1], signal[start_index:final_index + 1], c='black')
    +617                # pyplot.show()
     
    @@ -2829,65 +3088,68 @@
    Returns
    -
    565    def get_threshold(self, intes):
    -566        """ Get the intensity threshold for peak picking.
    -567
    -568        Parameters
    -569        ----------
    -570        intes : ndarray
    -571            The intensity values.
    -572
    -573        Returns
    -574        -------
    -575        float
    -576            The intensity threshold.
    -577        float
    -578            The factor to multiply the intensity threshold by.
    -579        """
    -580                
    -581        intes = array(intes).astype(float)
    -582       
    -583        noise_threshold_method = self.settings.noise_threshold_method
    -584
    -585        if noise_threshold_method == 'minima':
    -586            
    -587            if self.is_centroid:
    -588                warn("Auto threshould is disabled for centroid data, returning 0")
    -589                factor = 1
    -590                abundance_threshold = 1e-20
    -591            #print(self.settings.noise_threshold_min_std)
    -592            else:
    -593                abundance_threshold = self.baseline_noise + (self.settings.noise_threshold_min_std * self.baseline_noise_std)
    -594                factor = 1
    -595
    -596        elif noise_threshold_method == 'signal_noise':
    -597
    -598            abundance_threshold = self.settings.noise_threshold_min_s2n
    -599            if self.is_centroid:
    -600                factor = 1
    -601            else:
    -602                factor = self.baseline_noise_std
    -603
    -604        elif noise_threshold_method == "relative_abundance":
    -605
    -606            abundance_threshold = self.settings.noise_threshold_min_relative_abundance
    -607            factor = intes.max()/100
    -608
    -609        elif noise_threshold_method == "absolute_abundance":
    -610
    -611            abundance_threshold = self.settings.noise_threshold_absolute_abundance
    -612            factor = 1
    -613
    -614        elif noise_threshold_method == 'log':
    -615            if self.is_centroid:
    -616                raise  Exception("log noise Not tested for centroid data")
    -617            abundance_threshold = self.settings.noise_threshold_log_nsigma
    -618            factor = self.baseline_noise_std
    -619
    -620        else:
    -621            raise  Exception("%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class" % noise_threshold_method)
    -622        
    -623        return abundance_threshold, factor
    +            
    619    def get_threshold(self, intes):
    +620        """Get the intensity threshold for peak picking.
    +621
    +622        Parameters
    +623        ----------
    +624        intes : ndarray
    +625            The intensity values.
    +626
    +627        Returns
    +628        -------
    +629        float
    +630            The intensity threshold.
    +631        float
    +632            The factor to multiply the intensity threshold by.
    +633        """
    +634
    +635        intes = array(intes).astype(float)
    +636
    +637        noise_threshold_method = self.settings.noise_threshold_method
    +638
    +639        if noise_threshold_method == "minima":
    +640            if self.is_centroid:
    +641                warnings.warn(
    +642                    "Auto threshould is disabled for centroid data, returning 0"
    +643                )
    +644                factor = 1
    +645                abundance_threshold = 1e-20
    +646            # print(self.settings.noise_threshold_min_std)
    +647            else:
    +648                abundance_threshold = self.baseline_noise + (
    +649                    self.settings.noise_threshold_min_std * self.baseline_noise_std
    +650                )
    +651                factor = 1
    +652
    +653        elif noise_threshold_method == "signal_noise":
    +654            abundance_threshold = self.settings.noise_threshold_min_s2n
    +655            if self.is_centroid:
    +656                factor = 1
    +657            else:
    +658                factor = self.baseline_noise_std
    +659
    +660        elif noise_threshold_method == "relative_abundance":
    +661            abundance_threshold = self.settings.noise_threshold_min_relative_abundance
    +662            factor = intes.max() / 100
    +663
    +664        elif noise_threshold_method == "absolute_abundance":
    +665            abundance_threshold = self.settings.noise_threshold_absolute_abundance
    +666            factor = 1
    +667
    +668        elif noise_threshold_method == "log":
    +669            if self.is_centroid:
    +670                raise Exception("log noise Not tested for centroid data")
    +671            abundance_threshold = self.settings.noise_threshold_log_nsigma
    +672            factor = self.baseline_noise_std
    +673
    +674        else:
    +675            raise Exception(
    +676                "%s method was not implemented, please refer to corems.mass_spectrum.calc.NoiseCalc Class"
    +677                % noise_threshold_method
    +678            )
    +679
    +680        return abundance_threshold, factor
     
    @@ -2922,45 +3184,53 @@
    Returns
    -
    625    @staticmethod
    -626    def algebraic_quadratic(list_mass, list_y):
    -627        """
    -628        Find the apex of a peak - algebraically. 
    -629        Faster than using numpy polyfit by ~28x per fit.
    -630
    -631        Parameters
    -632        ----------
    -633        list_mass : ndarray
    -634            list of m/z values (3 points)
    -635        list_y : ndarray
    -636            list of abundance values (3 points)
    -637
    -638        Returns
    -639        -------
    -640        a, b, c: float
    -641            coefficients of the quadratic equation.
    -642
    -643        Notes
    -644        --------
    -645        This is a static method. 
    -646        """
    -647        x_1, x_2, x_3 = list_mass
    -648        y_1, y_2, y_3 = list_y 
    -649
    -650        a = y_1/((x_1-x_2)*(x_1-x_3)) + y_2/((x_2-x_1)*(x_2-x_3)) + y_3/((x_3-x_1)*(x_3-x_2))
    -651
    -652        b = (-y_1*(x_2+x_3)/((x_1-x_2)*(x_1-x_3))
    -653            -y_2*(x_1+x_3)/((x_2-x_1)*(x_2-x_3))
    -654            -y_3*(x_1+x_2)/((x_3-x_1)*(x_3-x_2)))
    -655        
    -656        c = (y_1*x_2*x_3/((x_1-x_2)*(x_1-x_3))
    -657            +y_2*x_1*x_3/((x_2-x_1)*(x_2-x_3))
    -658            +y_3*x_1*x_2/((x_3-x_1)*(x_3-x_2)))
    -659        return a, b, c
    +            
    682    @staticmethod
    +683    def algebraic_quadratic(list_mass, list_y):
    +684        """
    +685        Find the apex of a peak - algebraically.
    +686        Faster than using numpy polyfit by ~28x per fit.
    +687
    +688        Parameters
    +689        ----------
    +690        list_mass : ndarray
    +691            list of m/z values (3 points)
    +692        list_y : ndarray
    +693            list of abundance values (3 points)
    +694
    +695        Returns
    +696        -------
    +697        a, b, c: float
    +698            coefficients of the quadratic equation.
    +699
    +700        Notes
    +701        --------
    +702        This is a static method.
    +703        """
    +704        x_1, x_2, x_3 = list_mass
    +705        y_1, y_2, y_3 = list_y
    +706
    +707        a = (
    +708            y_1 / ((x_1 - x_2) * (x_1 - x_3))
    +709            + y_2 / ((x_2 - x_1) * (x_2 - x_3))
    +710            + y_3 / ((x_3 - x_1) * (x_3 - x_2))
    +711        )
    +712
    +713        b = (
    +714            -y_1 * (x_2 + x_3) / ((x_1 - x_2) * (x_1 - x_3))
    +715            - y_2 * (x_1 + x_3) / ((x_2 - x_1) * (x_2 - x_3))
    +716            - y_3 * (x_1 + x_2) / ((x_3 - x_1) * (x_3 - x_2))
    +717        )
    +718
    +719        c = (
    +720            y_1 * x_2 * x_3 / ((x_1 - x_2) * (x_1 - x_3))
    +721            + y_2 * x_1 * x_3 / ((x_2 - x_1) * (x_2 - x_3))
    +722            + y_3 * x_1 * x_2 / ((x_3 - x_1) * (x_3 - x_2))
    +723        )
    +724        return a, b, c
     
    -

    Find the apex of a peak - algebraically. +

    Find the apex of a peak - algebraically. Faster than using numpy polyfit by ~28x per fit.

    Parameters
    @@ -2997,91 +3267,102 @@
    Notes
    -
    661    def find_apex_fit_quadratic(self, mass, abund, freq, current_index):
    -662        """ 
    -663        Find the apex of a peak.
    -664        
    -665        Parameters
    -666        ----------
    -667        mass : ndarray
    -668            The mass values.
    -669        abund : ndarray
    -670            The abundance values.
    -671        freq : ndarray or None  
    -672            The frequency values, if available.
    -673        current_index : int
    -674            The index of the current peak.
    -675        
    -676
    -677        Returns
    -678        -------
    -679        float
    -680            The m/z value of the peak apex.
    -681        float
    -682            The frequency value of the peak apex, if available.
    -683        float
    -684            The abundance value of the peak apex.
    -685        
    -686        """
    -687        # calc prominence
    -688        #peak_indexes = self.check_prominence(abund, current_index, len_abundance, peak_height_diff )
    -689        
    -690        #if not peak_indexes:        
    -691            
    -692        #    return None, None, None, None           
    -693        
    -694        #else:    
    -695            
    -696        # fit parabola to three most abundant datapoints
    -697        list_mass = [mass[current_index - 1], mass[current_index], mass[current_index +1]]
    -698        list_y = [abund[current_index - 1],abund[current_index], abund[current_index +1]]
    -699        
    -700        if self.mspeaks_settings.legacy_centroid_polyfit:
    -701            z = polyfit(list_mass, list_y, 2)
    -702            a = z[0]
    -703            b = z[1]
    -704        else:
    -705            a, b, c = self.algebraic_quadratic(list_mass, list_y)
    -706
    -707
    -708        calculated = -b/(2*a)
    -709        
    -710        if calculated < 1 or int(calculated) != int(list_mass[1]):
    -711
    -712            mz_exp_centroid = list_mass[1]
    -713        
    -714        else:
    -715            
    -716            mz_exp_centroid = calculated 
    -717        
    -718        if self.label == Labels.bruker_frequency or self.label == Labels.midas_frequency:
    -719            
    -720            # fit parabola to three most abundant frequency datapoints
    -721            list_freq = [freq[current_index - 1], freq[current_index], freq[current_index +1]]
    -722            if self.mspeaks_settings.legacy_centroid_polyfit:
    -723                z = polyfit(list_mass, list_y, 2)
    -724                a = z[0]
    -725                b = z[1]
    -726            else:
    -727                a, b, c = self.algebraic_quadratic(list_mass, list_y)
    -728            
    -729            calculated_freq = -b/(2*a)
    -730
    -731            if calculated_freq < 1 or int(calculated_freq) != freq[current_index]:
    -732                freq_centr = list_freq[1]
    -733
    -734            else:
    -735                freq_centr = calculated_freq
    -736        
    -737        else:
    -738                freq_centr = None
    -739
    -740        if self.mspeaks_settings.legacy_centroid_polyfit:
    -741            abundance_centroid = abund[current_index]
    -742        else: 
    -743            abundance_centroid = a*mz_exp_centroid**2 + b*mz_exp_centroid + c
    -744
    -745        return mz_exp_centroid, freq_centr, abundance_centroid 
    +            
    726    def find_apex_fit_quadratic(self, mass, abund, freq, current_index):
    +727        """
    +728        Find the apex of a peak.
    +729
    +730        Parameters
    +731        ----------
    +732        mass : ndarray
    +733            The mass values.
    +734        abund : ndarray
    +735            The abundance values.
    +736        freq : ndarray or None
    +737            The frequency values, if available.
    +738        current_index : int
    +739            The index of the current peak.
    +740
    +741
    +742        Returns
    +743        -------
    +744        float
    +745            The m/z value of the peak apex.
    +746        float
    +747            The frequency value of the peak apex, if available.
    +748        float
    +749            The abundance value of the peak apex.
    +750
    +751        """
    +752        # calc prominence
    +753        # peak_indexes = self.check_prominence(abund, current_index, len_abundance, peak_height_diff )
    +754
    +755        # if not peak_indexes:
    +756
    +757        #    return None, None, None, None
    +758
    +759        # else:
    +760
    +761        # fit parabola to three most abundant datapoints
    +762        list_mass = [
    +763            mass[current_index - 1],
    +764            mass[current_index],
    +765            mass[current_index + 1],
    +766        ]
    +767        list_y = [
    +768            abund[current_index - 1],
    +769            abund[current_index],
    +770            abund[current_index + 1],
    +771        ]
    +772
    +773        if self.mspeaks_settings.legacy_centroid_polyfit:
    +774            z = polyfit(list_mass, list_y, 2)
    +775            a = z[0]
    +776            b = z[1]
    +777        else:
    +778            a, b, c = self.algebraic_quadratic(list_mass, list_y)
    +779
    +780        calculated = -b / (2 * a)
    +781
    +782        if calculated < 1 or int(calculated) != int(list_mass[1]):
    +783            mz_exp_centroid = list_mass[1]
    +784
    +785        else:
    +786            mz_exp_centroid = calculated
    +787
    +788        if (
    +789            self.label == Labels.bruker_frequency
    +790            or self.label == Labels.midas_frequency
    +791        ):
    +792            # fit parabola to three most abundant frequency datapoints
    +793            list_freq = [
    +794                freq[current_index - 1],
    +795                freq[current_index],
    +796                freq[current_index + 1],
    +797            ]
    +798            if self.mspeaks_settings.legacy_centroid_polyfit:
    +799                z = polyfit(list_mass, list_y, 2)
    +800                a = z[0]
    +801                b = z[1]
    +802            else:
    +803                a, b, c = self.algebraic_quadratic(list_mass, list_y)
    +804
    +805            calculated_freq = -b / (2 * a)
    +806
    +807            if calculated_freq < 1 or int(calculated_freq) != freq[current_index]:
    +808                freq_centr = list_freq[1]
    +809
    +810            else:
    +811                freq_centr = calculated_freq
    +812
    +813        else:
    +814            freq_centr = None
    +815
    +816        if self.mspeaks_settings.legacy_centroid_polyfit:
    +817            abundance_centroid = abund[current_index]
    +818        else:
    +819            abundance_centroid = a * mz_exp_centroid**2 + b * mz_exp_centroid + c
    +820
    +821        return mz_exp_centroid, freq_centr, abundance_centroid
     
    @@ -3122,41 +3403,47 @@
    Returns
    -
    747    def check_prominence(self, abun, current_index, len_abundance, peak_height_diff ) -> tuple or False:
    -748        """ Check the prominence of a peak.
    -749        
    -750        Parameters
    -751        ----------
    -752        abun : ndarray
    -753            The abundance values.
    -754        current_index : int
    -755            The index of the current peak.
    -756        len_abundance : int
    -757            The length of the abundance array.
    -758        peak_height_diff : function
    -759            The function to calculate the peak height difference.
    -760        
    -761        Returns
    -762        -------
    -763        tuple or False
    -764            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    -765            Otherwise, False.
    -766        
    -767        """
    -768
    -769        final_index = self.find_minima(current_index, abun, len_abundance, right=True)
    -770            
    -771        start_index = self.find_minima(current_index, abun, len_abundance, right=False)
    -772            
    -773        peak_indexes = (current_index-1, current_index, current_index+1)
    -774
    -775        if min( peak_height_diff(current_index,start_index), peak_height_diff(current_index,final_index) ) >  self.mspeaks_settings.peak_min_prominence_percent :   
    -776            
    -777            return peak_indexes
    -778        
    -779        else:
    -780            
    -781            return False
    +            
    823    def check_prominence(
    +824        self, abun, current_index, len_abundance, peak_height_diff
    +825    ) -> tuple or False:
    +826        """Check the prominence of a peak.
    +827
    +828        Parameters
    +829        ----------
    +830        abun : ndarray
    +831            The abundance values.
    +832        current_index : int
    +833            The index of the current peak.
    +834        len_abundance : int
    +835            The length of the abundance array.
    +836        peak_height_diff : function
    +837            The function to calculate the peak height difference.
    +838
    +839        Returns
    +840        -------
    +841        tuple or False
    +842            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    +843            Otherwise, False.
    +844
    +845        """
    +846
    +847        final_index = self.find_minima(current_index, abun, len_abundance, right=True)
    +848
    +849        start_index = self.find_minima(current_index, abun, len_abundance, right=False)
    +850
    +851        peak_indexes = (current_index - 1, current_index, current_index + 1)
    +852
    +853        if (
    +854            min(
    +855                peak_height_diff(current_index, start_index),
    +856                peak_height_diff(current_index, final_index),
    +857            )
    +858            > self.mspeaks_settings.peak_min_prominence_percent
    +859        ):
    +860            return peak_indexes
    +861
    +862        else:
    +863            return False
     
    @@ -3196,42 +3483,42 @@
    Returns
    -
    783    def use_the_max(self, mass, abund, current_index, len_abundance, peak_height_diff):
    -784        """ Use the max peak height as the centroid
    -785        
    -786        Parameters
    -787        ----------
    -788        mass : ndarray
    -789            The mass values.
    -790        abund : ndarray
    -791            The abundance values.
    -792        current_index : int
    -793            The index of the current peak.
    -794        len_abundance : int
    -795            The length of the abundance array.
    -796        peak_height_diff : function
    -797            The function to calculate the peak height difference.
    -798        
    -799        Returns
    -800        -------
    -801        float
    -802            The m/z value of the peak apex.
    -803        float
    -804            The abundance value of the peak apex.
    -805        tuple or None
    -806            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    -807            Otherwise, None.
    -808        """
    -809
    -810        peak_indexes = self.check_prominence(abund, current_index, len_abundance, peak_height_diff )
    -811        
    -812        if not peak_indexes:        
    -813
    -814            return None, None, None
    -815        
    -816        else:    
    -817            
    -818            return mass[current_index], abund[current_index], peak_indexes
    +            
    865    def use_the_max(self, mass, abund, current_index, len_abundance, peak_height_diff):
    +866        """Use the max peak height as the centroid
    +867
    +868        Parameters
    +869        ----------
    +870        mass : ndarray
    +871            The mass values.
    +872        abund : ndarray
    +873            The abundance values.
    +874        current_index : int
    +875            The index of the current peak.
    +876        len_abundance : int
    +877            The length of the abundance array.
    +878        peak_height_diff : function
    +879            The function to calculate the peak height difference.
    +880
    +881        Returns
    +882        -------
    +883        float
    +884            The m/z value of the peak apex.
    +885        float
    +886            The abundance value of the peak apex.
    +887        tuple or None
    +888            A tuple containing the indexes of the peak, if the prominence is above the threshold.
    +889            Otherwise, None.
    +890        """
    +891
    +892        peak_indexes = self.check_prominence(
    +893            abund, current_index, len_abundance, peak_height_diff
    +894        )
    +895
    +896        if not peak_indexes:
    +897            return None, None, None
    +898
    +899        else:
    +900            return mass[current_index], abund[current_index], peak_indexes
     
    @@ -3275,68 +3562,100 @@
    Returns
    -
    820    def calc_centroid_legacy(self, mass, abund, freq):
    -821        """ Legacy centroid calculation
    -822        Deprecated - for deletion.
    -823        
    -824        """
    -825        warnings.warn("Legacy centroid calculation is deprecated. Please use the new centroid calculation method.")
    -826        pass
    -827        if False:
    -828            len_abundance = len(abund)
    -829            
    -830            max_abundance = max(abund)
    -831            
    -832            peak_height_diff = lambda hi, li : ((abund[hi] - abund[li]) / max_abundance )*100
    -833
    -834            abundance_threshold, factor = self.get_threshold(abund)
    -835            #print(abundance_threshold, factor)
    -836            # find indices of all peaks
    -837            dy = abund[1:] - abund[:-1]
    -838            
    -839            #replaces nan for infi nity
    -840            indices_nan = where(isnan(abund))[0]
    -841            
    -842            if indices_nan.size:
    -843                
    -844                abund[indices_nan] = inf
    -845                dy[where(isnan(dy))[0]] = inf
    -846            
    -847            indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    -848            
    -849            # noise threshold
    -850            if indexes.size and abundance_threshold is not None:
    -851                indexes = indexes[abund[indexes]/factor >= abundance_threshold]
    -852            # filter out 'peaks' within 3 points of the spectrum limits
    -853            #remove entries within 3 points of upper limit
    -854            indexes = [x for x in indexes if (len_abundance-x)>3]
    -855            #remove entries within 3 points of zero
    -856            indexes = [x for x in indexes if x>3]
    -857        
    -858            for current_index in indexes: 
    -859                
    -860                if self.label == Labels.simulated_profile: 
    -861
    -862                    mz_exp_centroid, intes_centr, peak_indexes = self.use_the_max(mass, abund, current_index, len_abundance, peak_height_diff)
    -863                    if mz_exp_centroid:
    -864                        
    -865                        peak_resolving_power = self.calculate_resolving_power( abund, mass, current_index)
    -866                        s2n = intes_centr/self.baseline_noise_std
    -867                        freq_centr = None
    -868                        self.add_mspeak(self.polarity, mz_exp_centroid, abund[current_index] , peak_resolving_power, s2n, peak_indexes, exp_freq=freq_centr, ms_parent=self)
    -869                
    -870                else:
    -871                
    -872                    mz_exp_centroid, freq_centr, intes_centr, peak_indexes = self.find_apex_fit_quadratic(mass, abund, freq, current_index, len_abundance, peak_height_diff)
    -873                    if mz_exp_centroid:
    -874                        try:
    -875                            peak_resolving_power = self.calculate_resolving_power( abund, mass, current_index)
    -876                        except IndexError: 
    -877                            print('index error, skipping peak')
    -878                            continue
    -879                        
    -880                        s2n = intes_centr/self.baseline_noise_std
    -881                        self.add_mspeak(self.polarity, mz_exp_centroid, abund[current_index] , peak_resolving_power, s2n, peak_indexes, exp_freq=freq_centr, ms_parent=self)
    +            
    902    def calc_centroid_legacy(self, mass, abund, freq):
    +903        """Legacy centroid calculation
    +904        Deprecated - for deletion.
    +905
    +906        """
    +907        warnings.warn(
    +908            "Legacy centroid calculation is deprecated. Please use the new centroid calculation method."
    +909        )
    +910        pass
    +911        if False:
    +912            len_abundance = len(abund)
    +913
    +914            max_abundance = max(abund)
    +915
    +916            peak_height_diff = (
    +917                lambda hi, li: ((abund[hi] - abund[li]) / max_abundance) * 100
    +918            )
    +919
    +920            abundance_threshold, factor = self.get_threshold(abund)
    +921            # print(abundance_threshold, factor)
    +922            # find indices of all peaks
    +923            dy = abund[1:] - abund[:-1]
    +924
    +925            # replaces nan for infi nity
    +926            indices_nan = where(isnan(abund))[0]
    +927
    +928            if indices_nan.size:
    +929                abund[indices_nan] = inf
    +930                dy[where(isnan(dy))[0]] = inf
    +931
    +932            indexes = where((hstack((dy, 0)) < 0) & (hstack((0, dy)) > 0))[0]
    +933
    +934            # noise threshold
    +935            if indexes.size and abundance_threshold is not None:
    +936                indexes = indexes[abund[indexes] / factor >= abundance_threshold]
    +937            # filter out 'peaks' within 3 points of the spectrum limits
    +938            # remove entries within 3 points of upper limit
    +939            indexes = [x for x in indexes if (len_abundance - x) > 3]
    +940            # remove entries within 3 points of zero
    +941            indexes = [x for x in indexes if x > 3]
    +942
    +943            for current_index in indexes:
    +944                if self.label == Labels.simulated_profile:
    +945                    mz_exp_centroid, intes_centr, peak_indexes = self.use_the_max(
    +946                        mass, abund, current_index, len_abundance, peak_height_diff
    +947                    )
    +948                    if mz_exp_centroid:
    +949                        peak_resolving_power = self.calculate_resolving_power(
    +950                            abund, mass, current_index
    +951                        )
    +952                        s2n = intes_centr / self.baseline_noise_std
    +953                        freq_centr = None
    +954                        self.add_mspeak(
    +955                            self.polarity,
    +956                            mz_exp_centroid,
    +957                            abund[current_index],
    +958                            peak_resolving_power,
    +959                            s2n,
    +960                            peak_indexes,
    +961                            exp_freq=freq_centr,
    +962                            ms_parent=self,
    +963                        )
    +964
    +965                else:
    +966                    mz_exp_centroid, freq_centr, intes_centr, peak_indexes = (
    +967                        self.find_apex_fit_quadratic(
    +968                            mass,
    +969                            abund,
    +970                            freq,
    +971                            current_index,
    +972                            len_abundance,
    +973                            peak_height_diff,
    +974                        )
    +975                    )
    +976                    if mz_exp_centroid:
    +977                        try:
    +978                            peak_resolving_power = self.calculate_resolving_power(
    +979                                abund, mass, current_index
    +980                            )
    +981                        except IndexError:
    +982                            print("index error, skipping peak")
    +983                            continue
    +984
    +985                        s2n = intes_centr / self.baseline_noise_std
    +986                        self.add_mspeak(
    +987                            self.polarity,
    +988                            mz_exp_centroid,
    +989                            abund[current_index],
    +990                            peak_resolving_power,
    +991                            s2n,
    +992                            peak_indexes,
    +993                            exp_freq=freq_centr,
    +994                            ms_parent=self,
    +995                        )
     
    diff --git a/docs/corems/mass_spectrum/factory/MassSpectrumClasses.html b/docs/corems/mass_spectrum/factory/MassSpectrumClasses.html index 39c04dbf..d0f44ac9 100644 --- a/docs/corems/mass_spectrum/factory/MassSpectrumClasses.html +++ b/docs/corems/mass_spectrum/factory/MassSpectrumClasses.html @@ -412,1660 +412,1743 @@

       1from pathlib import Path
    -   2from copy import deepcopy
    +   2
        3import numpy as np
    -   4
    +   4from lmfit.models import GaussianModel
        5
    -   6#from matplotlib import rcParamsDefault, rcParams
    -   7from numpy import array, power, float64, where, histogram, trapz
    -   8
    -   9from pandas import DataFrame
    -  10from lmfit.models import GaussianModel
    -  11
    -  12from corems.mass_spectrum.calc.MassSpectrumCalc import MassSpecCalc
    -  13from corems.mass_spectrum.calc.KendrickGroup import KendrickGrouping
    -  14from corems.encapsulation.constant import Labels
    -  15from corems.ms_peak.factory.MSPeakClasses import ICRMassPeak as MSPeak
    -  16from corems.encapsulation.factory.parameters import MSParameters
    -  17from corems.encapsulation.input.parameter_from_json import load_and_set_parameters_ms, load_and_set_toml_parameters_ms
    +   6# from matplotlib import rcParamsDefault, rcParams
    +   7from numpy import array, float64, histogram, trapz, where
    +   8from pandas import DataFrame
    +   9
    +  10from corems.encapsulation.constant import Labels
    +  11from corems.encapsulation.factory.parameters import MSParameters
    +  12from corems.encapsulation.input.parameter_from_json import (
    +  13    load_and_set_parameters_ms,
    +  14    load_and_set_toml_parameters_ms,
    +  15)
    +  16from corems.mass_spectrum.calc.KendrickGroup import KendrickGrouping
    +  17from corems.mass_spectrum.calc.MassSpectrumCalc import MassSpecCalc
       18from corems.mass_spectrum.calc.MeanResolvingPowerFilter import MeanResolvingPowerFilter
    -  19
    -  20__author__ = "Yuri E. Corilo"
    -  21__date__ = "Jun 12, 2019"
    -  22
    -  23def overrides(interface_class):
    -  24    """Checks if the method overrides a method from an interface class."""
    -  25    def overrider(method):
    -  26        assert method.__name__ in dir(interface_class)
    -  27        return method
    -  28    return overrider
    -  29
    -  30class MassSpecBase(MassSpecCalc, KendrickGrouping):
    -  31    """A mass spectrum base class, stores the profile data and instrument settings.
    -  32
    -  33    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
    -  34    _mspeaks is populated under the hood by calling process_mass_spec method.
    -  35    Iteration is null if _mspeaks is empty.
    -  36
    -  37    Parameters
    -  38    ----------
    -  39    mz_exp : array_like
    -  40        The m/z values of the mass spectrum.
    -  41    abundance : array_like
    -  42        The abundance values of the mass spectrum.
    -  43    d_params : dict
    -  44        A dictionary of parameters for the mass spectrum.
    -  45    **kwargs
    -  46        Additional keyword arguments.
    -  47
    -  48    Attributes
    -  49    ----------
    -  50
    -  51    mspeaks : list
    -  52        A list of mass peaks.
    -  53    is_calibrated : bool
    -  54        Whether the mass spectrum is calibrated.
    -  55    is_centroid : bool
    -  56        Whether the mass spectrum is centroided.
    -  57    has_frequency : bool
    -  58        Whether the mass spectrum has a frequency domain.
    -  59    calibration_order : None or int
    -  60        The order of the mass spectrum's calibration.
    -  61    calibration_points : None or ndarray
    -  62        The calibration points of the mass spectrum.
    -  63    calibration_ref_mzs: None or ndarray
    -  64        The reference m/z values of the mass spectrum's calibration.
    -  65    calibration_meas_mzs : None or ndarray
    -  66        The measured m/z values of the mass spectrum's calibration.
    -  67    calibration_RMS : None or float
    -  68        The root mean square of the mass spectrum's calibration.
    -  69    calibration_segment : None or CalibrationSegment
    -  70        The calibration segment of the mass spectrum.
    -  71    _abundance : ndarray
    -  72        The abundance values of the mass spectrum.
    -  73    _mz_exp : ndarray
    -  74        The m/z values of the mass spectrum.
    -  75    _mspeaks : list
    -  76        A list of mass peaks.
    -  77    _dict_nominal_masses_indexes : dict
    -  78        A dictionary of nominal masses and their indexes.
    -  79    _baseline_noise : float
    -  80        The baseline noise of the mass spectrum.
    -  81    _baseline_noise_std : float
    -  82        The standard deviation of the baseline noise of the mass spectrum.
    -  83    _dynamic_range : float or None
    -  84        The dynamic range of the mass spectrum.
    -  85    _transient_settings : None or TransientSettings
    -  86        The transient settings of the mass spectrum.
    -  87    _frequency_domain : None or FrequencyDomain
    -  88        The frequency domain of the mass spectrum.
    -  89    _mz_cal_profile : None or MzCalibrationProfile
    -  90        The m/z calibration profile of the mass spectrum.
    -  91
    -  92    Methods
    -  93    -------
    -  94    * process_mass_spec(). Main function to process the mass spectrum, 
    -  95    including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
    +  19from corems.ms_peak.factory.MSPeakClasses import ICRMassPeak as MSPeak
    +  20
    +  21__author__ = "Yuri E. Corilo"
    +  22__date__ = "Jun 12, 2019"
    +  23
    +  24
    +  25def overrides(interface_class):
    +  26    """Checks if the method overrides a method from an interface class."""
    +  27
    +  28    def overrider(method):
    +  29        assert method.__name__ in dir(interface_class)
    +  30        return method
    +  31
    +  32    return overrider
    +  33
    +  34
    +  35class MassSpecBase(MassSpecCalc, KendrickGrouping):
    +  36    """A mass spectrum base class, stores the profile data and instrument settings.
    +  37
    +  38    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
    +  39    _mspeaks is populated under the hood by calling process_mass_spec method.
    +  40    Iteration is null if _mspeaks is empty.
    +  41
    +  42    Parameters
    +  43    ----------
    +  44    mz_exp : array_like
    +  45        The m/z values of the mass spectrum.
    +  46    abundance : array_like
    +  47        The abundance values of the mass spectrum.
    +  48    d_params : dict
    +  49        A dictionary of parameters for the mass spectrum.
    +  50    **kwargs
    +  51        Additional keyword arguments.
    +  52
    +  53    Attributes
    +  54    ----------
    +  55
    +  56    mspeaks : list
    +  57        A list of mass peaks.
    +  58    is_calibrated : bool
    +  59        Whether the mass spectrum is calibrated.
    +  60    is_centroid : bool
    +  61        Whether the mass spectrum is centroided.
    +  62    has_frequency : bool
    +  63        Whether the mass spectrum has a frequency domain.
    +  64    calibration_order : None or int
    +  65        The order of the mass spectrum's calibration.
    +  66    calibration_points : None or ndarray
    +  67        The calibration points of the mass spectrum.
    +  68    calibration_ref_mzs: None or ndarray
    +  69        The reference m/z values of the mass spectrum's calibration.
    +  70    calibration_meas_mzs : None or ndarray
    +  71        The measured m/z values of the mass spectrum's calibration.
    +  72    calibration_RMS : None or float
    +  73        The root mean square of the mass spectrum's calibration.
    +  74    calibration_segment : None or CalibrationSegment
    +  75        The calibration segment of the mass spectrum.
    +  76    _abundance : ndarray
    +  77        The abundance values of the mass spectrum.
    +  78    _mz_exp : ndarray
    +  79        The m/z values of the mass spectrum.
    +  80    _mspeaks : list
    +  81        A list of mass peaks.
    +  82    _dict_nominal_masses_indexes : dict
    +  83        A dictionary of nominal masses and their indexes.
    +  84    _baseline_noise : float
    +  85        The baseline noise of the mass spectrum.
    +  86    _baseline_noise_std : float
    +  87        The standard deviation of the baseline noise of the mass spectrum.
    +  88    _dynamic_range : float or None
    +  89        The dynamic range of the mass spectrum.
    +  90    _transient_settings : None or TransientSettings
    +  91        The transient settings of the mass spectrum.
    +  92    _frequency_domain : None or FrequencyDomain
    +  93        The frequency domain of the mass spectrum.
    +  94    _mz_cal_profile : None or MzCalibrationProfile
    +  95        The m/z calibration profile of the mass spectrum.
       96
    -  97    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()
    -  98    """
    -  99    def __init__(self, mz_exp, abundance, d_params, **kwargs):
    - 100        
    - 101        self._abundance = array(abundance, dtype=float64)
    - 102        self._mz_exp = array(mz_exp, dtype=float64)
    - 103                    
    - 104        # objects created after process_mass_spec() function
    - 105        self._mspeaks = list()
    - 106        self.mspeaks = list()
    - 107        self._dict_nominal_masses_indexes = dict()
    - 108        self._baseline_noise = 0.001
    - 109        self._baseline_noise_std = 0.001
    - 110        self._dynamic_range = None
    - 111        # set to None: initialization occurs inside subclass MassSpecfromFreq
    - 112        self._transient_settings = None
    - 113        self._frequency_domain = None
    - 114        self._mz_cal_profile = None
    - 115        self.is_calibrated = False
    - 116
    - 117        self._set_parameters_objects(d_params)
    - 118        self._init_settings()
    - 119
    - 120        self.is_centroid = False
    - 121        self.has_frequency = False
    - 122
    - 123        self.calibration_order = None
    - 124        self.calibration_points = None
    - 125        self.calibration_ref_mzs = None
    - 126        self.calibration_meas_mzs = None
    - 127        self.calibration_RMS = None
    - 128        self.calibration_segment = None
    - 129        self.calibration_raw_error_median = None
    - 130        self.calibration_raw_error_stdev = None
    - 131
    - 132    def _init_settings(self):
    - 133        """Initializes the settings for the mass spectrum."""
    - 134        self._parameters = MSParameters()
    - 135
    - 136    def __len__(self):
    - 137
    - 138        return len(self.mspeaks)
    - 139
    - 140    def __getitem__(self, position) -> MSPeak:
    - 141
    - 142        return self.mspeaks[position]
    +  97    Methods
    +  98    -------
    +  99    * process_mass_spec(). Main function to process the mass spectrum,
    + 100    including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
    + 101
    + 102    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()
    + 103    """
    + 104
    + 105    def __init__(self, mz_exp, abundance, d_params, **kwargs):
    + 106        self._abundance = array(abundance, dtype=float64)
    + 107        self._mz_exp = array(mz_exp, dtype=float64)
    + 108
    + 109        # objects created after process_mass_spec() function
    + 110        self._mspeaks = list()
    + 111        self.mspeaks = list()
    + 112        self._dict_nominal_masses_indexes = dict()
    + 113        self._baseline_noise = 0.001
    + 114        self._baseline_noise_std = 0.001
    + 115        self._dynamic_range = None
    + 116        # set to None: initialization occurs inside subclass MassSpecfromFreq
    + 117        self._transient_settings = None
    + 118        self._frequency_domain = None
    + 119        self._mz_cal_profile = None
    + 120        self.is_calibrated = False
    + 121
    + 122        self._set_parameters_objects(d_params)
    + 123        self._init_settings()
    + 124
    + 125        self.is_centroid = False
    + 126        self.has_frequency = False
    + 127
    + 128        self.calibration_order = None
    + 129        self.calibration_points = None
    + 130        self.calibration_ref_mzs = None
    + 131        self.calibration_meas_mzs = None
    + 132        self.calibration_RMS = None
    + 133        self.calibration_segment = None
    + 134        self.calibration_raw_error_median = None
    + 135        self.calibration_raw_error_stdev = None
    + 136
    + 137    def _init_settings(self):
    + 138        """Initializes the settings for the mass spectrum."""
    + 139        self._parameters = MSParameters()
    + 140
    + 141    def __len__(self):
    + 142        return len(self.mspeaks)
      143
    - 144    def set_indexes(self, list_indexes):
    - 145        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
    + 144    def __getitem__(self, position) -> MSPeak:
    + 145        return self.mspeaks[position]
      146
    - 147        Parameters
    - 148        ----------
    - 149        list_indexes : list of int
    - 150            A list of integers representing the indexes of the MSpeaks to iterate over.
    - 151
    - 152        """
    - 153        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
    + 147    def set_indexes(self, list_indexes):
    + 148        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
    + 149
    + 150        Parameters
    + 151        ----------
    + 152        list_indexes : list of int
    + 153            A list of integers representing the indexes of the MSpeaks to iterate over.
      154
    - 155        for i, mspeak in  enumerate(self.mspeaks): mspeak.index = i
    - 156
    - 157        self._set_nominal_masses_start_final_indexes()
    - 158
    - 159    def reset_indexes(self):
    - 160        """Reset the mass spectrum to iterate over all MSpeaks objects.
    - 161
    - 162        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
    - 163        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
    - 164
    - 165        """
    - 166        self.mspeaks = self._mspeaks
    - 167
    - 168        for i, mspeak in  enumerate(self.mspeaks): mspeak.index = i
    - 169
    - 170        self._set_nominal_masses_start_final_indexes()
    + 155        """
    + 156        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
    + 157
    + 158        for i, mspeak in enumerate(self.mspeaks):
    + 159            mspeak.index = i
    + 160
    + 161        self._set_nominal_masses_start_final_indexes()
    + 162
    + 163    def reset_indexes(self):
    + 164        """Reset the mass spectrum to iterate over all MSpeaks objects.
    + 165
    + 166        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
    + 167        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
    + 168
    + 169        """
    + 170        self.mspeaks = self._mspeaks
      171
    - 172    def add_mspeak(self, ion_charge, mz_exp,
    - 173                            abundance,
    - 174                            resolving_power,
    - 175                            signal_to_noise,
    - 176                            massspec_indexes,
    - 177                            exp_freq=None,
    - 178                            ms_parent=None
    - 179                        ):
    - 180        """Add a new MSPeak object to the MassSpectrum object.
    - 181
    - 182        Parameters
    - 183        ----------
    - 184        ion_charge : int
    - 185            The ion charge of the MSPeak.
    - 186        mz_exp : float
    - 187            The experimental m/z value of the MSPeak.
    - 188        abundance : float
    - 189            The abundance of the MSPeak.
    - 190        resolving_power : float
    - 191            The resolving power of the MSPeak.
    - 192        signal_to_noise : float
    - 193            The signal-to-noise ratio of the MSPeak.
    - 194        massspec_indexes : list
    - 195            A list of indexes of the MSPeak in the MassSpectrum object.
    - 196        exp_freq : float, optional
    - 197            The experimental frequency of the MSPeak. Defaults to None.
    - 198        ms_parent : MSParent, optional
    - 199            The MSParent object associated with the MSPeak. Defaults to None.
    - 200        """
    - 201        mspeak = MSPeak(
    - 202                ion_charge,
    - 203                mz_exp,
    - 204                abundance,
    - 205                resolving_power,
    - 206                signal_to_noise,
    - 207                massspec_indexes,
    - 208                len(self._mspeaks),
    - 209                exp_freq=exp_freq,
    - 210                ms_parent=ms_parent,
    - 211        )
    - 212
    - 213        self._mspeaks.append(mspeak)
    - 214
    - 215    def _set_parameters_objects(self, d_params):
    - 216        """Set the parameters of the MassSpectrum object.
    - 217
    - 218        Parameters
    - 219        ----------
    - 220        d_params : dict
    - 221            A dictionary containing the parameters to set.
    + 172        for i, mspeak in enumerate(self.mspeaks):
    + 173            mspeak.index = i
    + 174
    + 175        self._set_nominal_masses_start_final_indexes()
    + 176
    + 177    def add_mspeak(
    + 178        self,
    + 179        ion_charge,
    + 180        mz_exp,
    + 181        abundance,
    + 182        resolving_power,
    + 183        signal_to_noise,
    + 184        massspec_indexes,
    + 185        exp_freq=None,
    + 186        ms_parent=None,
    + 187    ):
    + 188        """Add a new MSPeak object to the MassSpectrum object.
    + 189
    + 190        Parameters
    + 191        ----------
    + 192        ion_charge : int
    + 193            The ion charge of the MSPeak.
    + 194        mz_exp : float
    + 195            The experimental m/z value of the MSPeak.
    + 196        abundance : float
    + 197            The abundance of the MSPeak.
    + 198        resolving_power : float
    + 199            The resolving power of the MSPeak.
    + 200        signal_to_noise : float
    + 201            The signal-to-noise ratio of the MSPeak.
    + 202        massspec_indexes : list
    + 203            A list of indexes of the MSPeak in the MassSpectrum object.
    + 204        exp_freq : float, optional
    + 205            The experimental frequency of the MSPeak. Defaults to None.
    + 206        ms_parent : MSParent, optional
    + 207            The MSParent object associated with the MSPeak. Defaults to None.
    + 208        """
    + 209        mspeak = MSPeak(
    + 210            ion_charge,
    + 211            mz_exp,
    + 212            abundance,
    + 213            resolving_power,
    + 214            signal_to_noise,
    + 215            massspec_indexes,
    + 216            len(self._mspeaks),
    + 217            exp_freq=exp_freq,
    + 218            ms_parent=ms_parent,
    + 219        )
    + 220
    + 221        self._mspeaks.append(mspeak)
      222
    - 223        Notes
    - 224        -----
    - 225        This method sets the following parameters of the MassSpectrum object:
    - 226        - _calibration_terms
    - 227        - label
    - 228        - analyzer
    - 229        - acquisition_time
    - 230        - instrument_label
    - 231        - polarity
    - 232        - scan_number
    - 233        - retention_time
    - 234        - mobility_rt
    - 235        - mobility_scan
    - 236        - _filename
    - 237        - _dir_location
    - 238        - _baseline_noise
    - 239        - _baseline_noise_std
    - 240        - sample_name
    - 241        """
    - 242        self._calibration_terms = (
    - 243            d_params.get("Aterm"),
    - 244            d_params.get("Bterm"),
    - 245            d_params.get("Cterm"),
    - 246        )
    - 247
    - 248        self.label = d_params.get(Labels.label)
    - 249
    - 250        self.analyzer = d_params.get('analyzer')
    - 251
    - 252        self.acquisition_time = d_params.get('acquisition_time')
    - 253
    - 254        self.instrument_label = d_params.get('instrument_label')
    + 223    def _set_parameters_objects(self, d_params):
    + 224        """Set the parameters of the MassSpectrum object.
    + 225
    + 226        Parameters
    + 227        ----------
    + 228        d_params : dict
    + 229            A dictionary containing the parameters to set.
    + 230
    + 231        Notes
    + 232        -----
    + 233        This method sets the following parameters of the MassSpectrum object:
    + 234        - _calibration_terms
    + 235        - label
    + 236        - analyzer
    + 237        - acquisition_time
    + 238        - instrument_label
    + 239        - polarity
    + 240        - scan_number
    + 241        - retention_time
    + 242        - mobility_rt
    + 243        - mobility_scan
    + 244        - _filename
    + 245        - _dir_location
    + 246        - _baseline_noise
    + 247        - _baseline_noise_std
    + 248        - sample_name
    + 249        """
    + 250        self._calibration_terms = (
    + 251            d_params.get("Aterm"),
    + 252            d_params.get("Bterm"),
    + 253            d_params.get("Cterm"),
    + 254        )
      255
    - 256        self.polarity = int(d_params.get("polarity"))
    + 256        self.label = d_params.get(Labels.label)
      257
    - 258        self.scan_number = d_params.get("scan_number")
    + 258        self.analyzer = d_params.get("analyzer")
      259
    - 260        self.retention_time = d_params.get("rt")
    + 260        self.acquisition_time = d_params.get("acquisition_time")
      261
    - 262        self.mobility_rt = d_params.get("mobility_rt")
    + 262        self.instrument_label = d_params.get("instrument_label")
      263
    - 264        self.mobility_scan = d_params.get("mobility_scan")
    + 264        self.polarity = int(d_params.get("polarity"))
      265
    - 266        self._filename = d_params.get("filename_path")
    + 266        self.scan_number = d_params.get("scan_number")
      267
    - 268        self._dir_location = d_params.get("dir_location")
    + 268        self.retention_time = d_params.get("rt")
      269
    - 270        self._baseline_noise = d_params.get("baseline_noise")
    + 270        self.mobility_rt = d_params.get("mobility_rt")
      271
    - 272        self._baseline_noise_std = d_params.get("baseline_noise_std")
    + 272        self.mobility_scan = d_params.get("mobility_scan")
      273
    - 274        if d_params.get('sample_name') != 'Unknown':
    + 274        self._filename = d_params.get("filename_path")
      275
    - 276            self.sample_name = d_params.get('sample_name')
    - 277            if not self.sample_name:
    - 278                self.sample_name = self.filename.stem
    - 279        else:
    - 280
    - 281            self.sample_name = self.filename.stem
    - 282
    - 283    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
    - 284        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
    - 285
    - 286        Parameters
    - 287        ----------
    - 288        Aterm : float
    - 289            The A-term calibration coefficient.
    - 290        Bterm : float
    - 291            The B-term calibration coefficient.
    - 292        C : float
    - 293            The C-term calibration coefficient.
    - 294        fas : float, optional
    - 295            The frequency amplitude scaling factor. Default is 0.
    - 296        """
    - 297        self._calibration_terms = (Aterm, Bterm, C)
    - 298
    - 299        self._mz_exp = self._f_to_mz()
    - 300        self._abundance = self._abundance
    - 301        self.find_peaks()
    - 302        self.reset_indexes()
    - 303
    - 304    def clear_molecular_formulas(self):
    - 305        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
    - 306
    - 307        Returns
    - 308        -------
    - 309        numpy.ndarray
    - 310            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
    - 311        """
    - 312        self.check_mspeaks()
    - 313        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
    - 314
    - 315    def process_mass_spec(self, keep_profile=True):
    - 316        """Process the mass spectrum.
    - 317
    - 318        Parameters
    - 319        ----------
    - 320        keep_profile : bool, optional
    - 321            Whether to keep the profile data after processing. Defaults to True.
    - 322
    - 323        Notes
    - 324        -----
    - 325        This method does the following:
    - 326        - calculates the noise threshold
    - 327        - does peak picking (creates mspeak_objs)
    - 328        - resets the mspeak_obj indexes
    - 329        """
    - 330        
    - 331        # if runned mannually make sure to rerun filter_by_noise_threshold     
    - 332        # calculates noise threshold 
    - 333        # do peak picking( create mspeak_objs) 
    - 334        # reset mspeak_obj the indexes
    - 335         
    - 336        self.cal_noise_threshold()
    - 337
    - 338        self.find_peaks()
    - 339        self.reset_indexes()
    - 340
    - 341        if self.mspeaks:
    - 342            self._dynamic_range = self.max_abundance / self.min_abundance
    - 343        else:
    - 344            self._dynamic_range = 0
    - 345        if not keep_profile:
    + 276        self._dir_location = d_params.get("dir_location")
    + 277
    + 278        self._baseline_noise = d_params.get("baseline_noise")
    + 279
    + 280        self._baseline_noise_std = d_params.get("baseline_noise_std")
    + 281
    + 282        if d_params.get("sample_name") != "Unknown":
    + 283            self.sample_name = d_params.get("sample_name")
    + 284            if not self.sample_name:
    + 285                self.sample_name = self.filename.stem
    + 286        else:
    + 287            self.sample_name = self.filename.stem
    + 288
    + 289    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
    + 290        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
    + 291
    + 292        Parameters
    + 293        ----------
    + 294        Aterm : float
    + 295            The A-term calibration coefficient.
    + 296        Bterm : float
    + 297            The B-term calibration coefficient.
    + 298        C : float
    + 299            The C-term calibration coefficient.
    + 300        fas : float, optional
    + 301            The frequency amplitude scaling factor. Default is 0.
    + 302        """
    + 303        self._calibration_terms = (Aterm, Bterm, C)
    + 304
    + 305        self._mz_exp = self._f_to_mz()
    + 306        self._abundance = self._abundance
    + 307        self.find_peaks()
    + 308        self.reset_indexes()
    + 309
    + 310    def clear_molecular_formulas(self):
    + 311        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
    + 312
    + 313        Returns
    + 314        -------
    + 315        numpy.ndarray
    + 316            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
    + 317        """
    + 318        self.check_mspeaks()
    + 319        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
    + 320
    + 321    def process_mass_spec(self, keep_profile=True):
    + 322        """Process the mass spectrum.
    + 323
    + 324        Parameters
    + 325        ----------
    + 326        keep_profile : bool, optional
    + 327            Whether to keep the profile data after processing. Defaults to True.
    + 328
    + 329        Notes
    + 330        -----
    + 331        This method does the following:
    + 332        - calculates the noise threshold
    + 333        - does peak picking (creates mspeak_objs)
    + 334        - resets the mspeak_obj indexes
    + 335        """
    + 336
    + 337        # if runned mannually make sure to rerun filter_by_noise_threshold
    + 338        # calculates noise threshold
    + 339        # do peak picking( create mspeak_objs)
    + 340        # reset mspeak_obj the indexes
    + 341
    + 342        self.cal_noise_threshold()
    + 343
    + 344        self.find_peaks()
    + 345        self.reset_indexes()
      346
    - 347            self._abundance *= 0
    - 348            self._mz_exp *= 0
    - 349            
    - 350
    - 351    def cal_noise_threshold(self):
    - 352        """Calculate the noise threshold of the mass spectrum.
    - 353
    - 354        """
    - 355
    - 356        if self.label == Labels.simulated_profile:
    + 347        if self.mspeaks:
    + 348            self._dynamic_range = self.max_abundance / self.min_abundance
    + 349        else:
    + 350            self._dynamic_range = 0
    + 351        if not keep_profile:
    + 352            self._abundance *= 0
    + 353            self._mz_exp *= 0
    + 354
    + 355    def cal_noise_threshold(self):
    + 356        """Calculate the noise threshold of the mass spectrum."""
      357
    - 358            self._baseline_noise, self._baseline_noise_std = 0.1, 1
    - 359
    - 360        if self.settings.noise_threshold_method == 'log':
    - 361
    - 362            self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
    - 363
    - 364        else:
    - 365            self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
    - 366
    - 367    @property
    - 368    def parameters(self):
    - 369        """Return the parameters of the mass spectrum."""
    - 370        return self._parameters
    - 371
    - 372    @parameters.setter
    - 373    def parameters(self, instance_MSParameters):
    - 374        self._parameters = instance_MSParameters
    + 358        if self.label == Labels.simulated_profile:
    + 359            self._baseline_noise, self._baseline_noise_std = 0.1, 1
    + 360
    + 361        if self.settings.noise_threshold_method == "log":
    + 362            self._baseline_noise, self._baseline_noise_std = (
    + 363                self.run_log_noise_threshold_calc()
    + 364            )
    + 365
    + 366        else:
    + 367            self._baseline_noise, self._baseline_noise_std = (
    + 368                self.run_noise_threshold_calc()
    + 369            )
    + 370
    + 371    @property
    + 372    def parameters(self):
    + 373        """Return the parameters of the mass spectrum."""
    + 374        return self._parameters
      375
    - 376    def set_parameter_from_json(self, parameters_path):
    - 377        """Set the parameters of the mass spectrum from a JSON file.
    - 378        
    - 379        Parameters
    - 380        ----------
    - 381        parameters_path : str
    - 382            The path to the JSON file containing the parameters.
    - 383        """
    - 384        load_and_set_parameters_ms(self, parameters_path=parameters_path)    
    - 385
    - 386    def set_parameter_from_toml(self, parameters_path):
    - 387        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)    
    - 388
    - 389    @property
    - 390    def mspeaks_settings(self): 
    - 391        """Return the MS peak settings of the mass spectrum."""
    - 392        return self.parameters.ms_peak
    - 393
    - 394    @mspeaks_settings.setter
    - 395    def mspeaks_settings(self, instance_MassSpecPeakSetting):
    - 396
    - 397            self.parameters.ms_peak = instance_MassSpecPeakSetting
    - 398
    - 399    @property
    - 400    def settings(self): 
    - 401        """Return the settings of the mass spectrum."""
    - 402        return self.parameters.mass_spectrum
    - 403
    - 404    @settings.setter
    - 405    def settings(self, instance_MassSpectrumSetting):
    + 376    @parameters.setter
    + 377    def parameters(self, instance_MSParameters):
    + 378        self._parameters = instance_MSParameters
    + 379
    + 380    def set_parameter_from_json(self, parameters_path):
    + 381        """Set the parameters of the mass spectrum from a JSON file.
    + 382
    + 383        Parameters
    + 384        ----------
    + 385        parameters_path : str
    + 386            The path to the JSON file containing the parameters.
    + 387        """
    + 388        load_and_set_parameters_ms(self, parameters_path=parameters_path)
    + 389
    + 390    def set_parameter_from_toml(self, parameters_path):
    + 391        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)
    + 392
    + 393    @property
    + 394    def mspeaks_settings(self):
    + 395        """Return the MS peak settings of the mass spectrum."""
    + 396        return self.parameters.ms_peak
    + 397
    + 398    @mspeaks_settings.setter
    + 399    def mspeaks_settings(self, instance_MassSpecPeakSetting):
    + 400        self.parameters.ms_peak = instance_MassSpecPeakSetting
    + 401
    + 402    @property
    + 403    def settings(self):
    + 404        """Return the settings of the mass spectrum."""
    + 405        return self.parameters.mass_spectrum
      406
    - 407        self.parameters.mass_spectrum =  instance_MassSpectrumSetting
    - 408
    - 409    @property
    - 410    def molecular_search_settings(self):  
    - 411        """Return the molecular search settings of the mass spectrum."""
    - 412        return self.parameters.molecular_search
    - 413
    - 414    @molecular_search_settings.setter
    - 415    def molecular_search_settings(self, instance_MolecularFormulaSearchSettings):
    - 416
    - 417        self.parameters.molecular_search =  instance_MolecularFormulaSearchSettings
    - 418
    - 419    @property
    - 420    def mz_cal_profile(self):
    - 421        """Return the calibrated m/z profile of the mass spectrum."""
    - 422        return self._mz_cal_profile
    - 423
    - 424    @mz_cal_profile.setter
    - 425    def mz_cal_profile(self, mz_cal_list):
    - 426        
    + 407    @settings.setter
    + 408    def settings(self, instance_MassSpectrumSetting):
    + 409        self.parameters.mass_spectrum = instance_MassSpectrumSetting
    + 410
    + 411    @property
    + 412    def molecular_search_settings(self):
    + 413        """Return the molecular search settings of the mass spectrum."""
    + 414        return self.parameters.molecular_search
    + 415
    + 416    @molecular_search_settings.setter
    + 417    def molecular_search_settings(self, instance_MolecularFormulaSearchSettings):
    + 418        self.parameters.molecular_search = instance_MolecularFormulaSearchSettings
    + 419
    + 420    @property
    + 421    def mz_cal_profile(self):
    + 422        """Return the calibrated m/z profile of the mass spectrum."""
    + 423        return self._mz_cal_profile
    + 424
    + 425    @mz_cal_profile.setter
    + 426    def mz_cal_profile(self, mz_cal_list):
      427        if len(mz_cal_list) == len(self._mz_exp):
      428            self._mz_cal_profile = mz_cal_list
      429        else:
    - 430            raise Exception( "calibrated array (%i) is not of the same size of the data (%i)" % (len(mz_cal_list),  len(self.mz_exp_profile)))    
    - 431
    - 432    @property
    - 433    def mz_cal(self):
    - 434        """Return the calibrated m/z values of the mass spectrum."""
    - 435        return array([mspeak.mz_cal for mspeak in self.mspeaks])
    - 436
    - 437    @mz_cal.setter
    - 438    def mz_cal(self, mz_cal_list):
    + 430            raise Exception(
    + 431                "calibrated array (%i) is not of the same size of the data (%i)"
    + 432                % (len(mz_cal_list), len(self.mz_exp_profile))
    + 433            )
    + 434
    + 435    @property
    + 436    def mz_cal(self):
    + 437        """Return the calibrated m/z values of the mass spectrum."""
    + 438        return array([mspeak.mz_cal for mspeak in self.mspeaks])
      439
    - 440            if  len(mz_cal_list) == len(self.mspeaks):
    - 441                self.is_calibrated = True
    - 442                for index, mz_cal in enumerate(mz_cal_list):
    - 443                    self.mspeaks[index].mz_cal = mz_cal
    - 444            else: 
    - 445                raise Exception( "calibrated array (%i) is not of the same size of the data (%i)" % (len(mz_cal_list),  len(self._mspeaks)))    
    - 446
    - 447    @property
    - 448    def mz_exp(self):
    - 449        """Return the experimental m/z values of the mass spectrum."""
    - 450        self.check_mspeaks()
    + 440    @mz_cal.setter
    + 441    def mz_cal(self, mz_cal_list):
    + 442        if len(mz_cal_list) == len(self.mspeaks):
    + 443            self.is_calibrated = True
    + 444            for index, mz_cal in enumerate(mz_cal_list):
    + 445                self.mspeaks[index].mz_cal = mz_cal
    + 446        else:
    + 447            raise Exception(
    + 448                "calibrated array (%i) is not of the same size of the data (%i)"
    + 449                % (len(mz_cal_list), len(self._mspeaks))
    + 450            )
      451
    - 452        if self.is_calibrated:
    - 453
    - 454            return array([mspeak.mz_cal for mspeak in self.mspeaks])
    - 455
    - 456        else:
    - 457
    - 458            return array([mspeak.mz_exp for mspeak in self.mspeaks])
    - 459 
    - 460    @property
    - 461    def freq_exp_profile(self):
    - 462        """Return the experimental frequency profile of the mass spectrum."""
    - 463        return self._frequency_domain
    - 464    
    - 465    @freq_exp_profile.setter
    - 466    def freq_exp_profile(self, new_data): self._frequency_domain = array(new_data)
    + 452    @property
    + 453    def mz_exp(self):
    + 454        """Return the experimental m/z values of the mass spectrum."""
    + 455        self.check_mspeaks()
    + 456
    + 457        if self.is_calibrated:
    + 458            return array([mspeak.mz_cal for mspeak in self.mspeaks])
    + 459
    + 460        else:
    + 461            return array([mspeak.mz_exp for mspeak in self.mspeaks])
    + 462
    + 463    @property
    + 464    def freq_exp_profile(self):
    + 465        """Return the experimental frequency profile of the mass spectrum."""
    + 466        return self._frequency_domain
      467
    - 468    @property
    - 469    def freq_exp_pp(self):
    - 470        """Return the experimental frequency values of the mass spectrum that are used for peak picking."""
    - 471        _, _, freq = self.prepare_peak_picking_data()
    - 472        return freq
    - 473
    - 474    @property
    - 475    def mz_exp_profile(self): 
    - 476        """Return the experimental m/z profile of the mass spectrum."""
    - 477        if self.is_calibrated: 
    - 478            return self.mz_cal_profile
    - 479        else:
    - 480            return self._mz_exp
    - 481
    - 482    @mz_exp_profile.setter
    - 483    def mz_exp_profile(self, new_data ): self._mz_exp = array(new_data)
    - 484
    - 485    @property
    - 486    def mz_exp_pp(self):
    - 487        """Return the experimental m/z values of the mass spectrum that are used for peak picking."""
    - 488        mz, _, _ = self.prepare_peak_picking_data()
    - 489        return mz
    - 490
    - 491    @property
    - 492    def abundance_profile(self): 
    - 493        """Return the abundance profile of the mass spectrum."""
    - 494        return self._abundance
    + 468    @freq_exp_profile.setter
    + 469    def freq_exp_profile(self, new_data):
    + 470        self._frequency_domain = array(new_data)
    + 471
    + 472    @property
    + 473    def freq_exp_pp(self):
    + 474        """Return the experimental frequency values of the mass spectrum that are used for peak picking."""
    + 475        _, _, freq = self.prepare_peak_picking_data()
    + 476        return freq
    + 477
    + 478    @property
    + 479    def mz_exp_profile(self):
    + 480        """Return the experimental m/z profile of the mass spectrum."""
    + 481        if self.is_calibrated:
    + 482            return self.mz_cal_profile
    + 483        else:
    + 484            return self._mz_exp
    + 485
    + 486    @mz_exp_profile.setter
    + 487    def mz_exp_profile(self, new_data):
    + 488        self._mz_exp = array(new_data)
    + 489
    + 490    @property
    + 491    def mz_exp_pp(self):
    + 492        """Return the experimental m/z values of the mass spectrum that are used for peak picking."""
    + 493        mz, _, _ = self.prepare_peak_picking_data()
    + 494        return mz
      495
    - 496    @abundance_profile.setter
    - 497    def abundance_profile(self, new_data): self._abundance = array(new_data)
    - 498
    - 499    @property
    - 500    def abundance_profile_pp(self):
    - 501        """Return the abundance profile of the mass spectrum that is used for peak picking."""
    - 502        _, abundance, _ = self.prepare_peak_picking_data()
    - 503        return abundance
    - 504    
    + 496    @property
    + 497    def abundance_profile(self):
    + 498        """Return the abundance profile of the mass spectrum."""
    + 499        return self._abundance
    + 500
    + 501    @abundance_profile.setter
    + 502    def abundance_profile(self, new_data):
    + 503        self._abundance = array(new_data)
    + 504
      505    @property
    - 506    def abundance(self):
    - 507        """Return the abundance values of the mass spectrum."""
    - 508        self.check_mspeaks()
    - 509        return array([mspeak.abundance for mspeak in self.mspeaks])
    + 506    def abundance_profile_pp(self):
    + 507        """Return the abundance profile of the mass spectrum that is used for peak picking."""
    + 508        _, abundance, _ = self.prepare_peak_picking_data()
    + 509        return abundance
      510
    - 511    def freq_exp(self):
    - 512        """Return the experimental frequency values of the mass spectrum."""
    - 513        self.check_mspeaks()
    - 514        return array([mspeak.freq_exp for mspeak in self.mspeaks])
    - 515
    - 516    @property
    - 517    def resolving_power(self):
    - 518        """Return the resolving power values of the mass spectrum."""
    + 511    @property
    + 512    def abundance(self):
    + 513        """Return the abundance values of the mass spectrum."""
    + 514        self.check_mspeaks()
    + 515        return array([mspeak.abundance for mspeak in self.mspeaks])
    + 516
    + 517    def freq_exp(self):
    + 518        """Return the experimental frequency values of the mass spectrum."""
      519        self.check_mspeaks()
    - 520        return array([mspeak.resolving_power for mspeak in self.mspeaks])
    + 520        return array([mspeak.freq_exp for mspeak in self.mspeaks])
      521
      522    @property
    - 523    def signal_to_noise(self):
    - 524        self.check_mspeaks()
    - 525        return array([mspeak.signal_to_noise for mspeak in self.mspeaks])
    - 526
    - 527    @property
    - 528    def nominal_mz(self):
    - 529        """Return the nominal m/z values of the mass spectrum."""
    - 530        if self._dict_nominal_masses_indexes:
    - 531            return sorted(list(self._dict_nominal_masses_indexes.keys()))
    - 532        else:
    - 533            raise ValueError("Nominal indexes not yet set")    
    - 534
    - 535    def get_mz_and_abundance_peaks_tuples(self):
    - 536        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
    - 537        self.check_mspeaks()
    - 538        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
    - 539
    - 540    @property
    - 541    def kmd(self):
    - 542        """Return the Kendrick mass defect values of the mass spectrum."""
    + 523    def resolving_power(self):
    + 524        """Return the resolving power values of the mass spectrum."""
    + 525        self.check_mspeaks()
    + 526        return array([mspeak.resolving_power for mspeak in self.mspeaks])
    + 527
    + 528    @property
    + 529    def signal_to_noise(self):
    + 530        self.check_mspeaks()
    + 531        return array([mspeak.signal_to_noise for mspeak in self.mspeaks])
    + 532
    + 533    @property
    + 534    def nominal_mz(self):
    + 535        """Return the nominal m/z values of the mass spectrum."""
    + 536        if self._dict_nominal_masses_indexes:
    + 537            return sorted(list(self._dict_nominal_masses_indexes.keys()))
    + 538        else:
    + 539            raise ValueError("Nominal indexes not yet set")
    + 540
    + 541    def get_mz_and_abundance_peaks_tuples(self):
    + 542        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
      543        self.check_mspeaks()
    - 544        return array([mspeak.kmd for mspeak in self.mspeaks])
    + 544        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
      545
      546    @property
    - 547    def kendrick_mass(self):
    - 548        """Return the Kendrick mass values of the mass spectrum."""
    + 547    def kmd(self):
    + 548        """Return the Kendrick mass defect values of the mass spectrum."""
      549        self.check_mspeaks()
    - 550        return array([mspeak.kendrick_mass for mspeak in self.mspeaks])
    + 550        return array([mspeak.kmd for mspeak in self.mspeaks])
      551
      552    @property
    - 553    def max_mz_exp(self):
    - 554        """Return the maximum experimental m/z value of the mass spectrum."""
    - 555        return max([mspeak.mz_exp for mspeak in self.mspeaks])
    - 556
    - 557    @property
    - 558    def min_mz_exp(self):
    - 559        """Return the minimum experimental m/z value of the mass spectrum."""
    - 560        return min([mspeak.mz_exp for mspeak in self.mspeaks])
    - 561
    - 562    @property
    - 563    def max_abundance(self):
    - 564        """Return the maximum abundance value of the mass spectrum."""        
    - 565        return max([mspeak.abundance for mspeak in self.mspeaks])
    - 566
    - 567    @property
    - 568    def max_signal_to_noise(self):
    - 569        """Return the maximum signal-to-noise ratio of the mass spectrum."""
    - 570        return max([mspeak.signal_to_noise for mspeak in self.mspeaks])
    - 571
    - 572    @property
    - 573    def most_abundant_mspeak(self):
    - 574        """Return the most abundant MSpeak object of the mass spectrum."""
    - 575        return max(self.mspeaks, key=lambda m: m.abundance)
    - 576
    - 577    @property
    - 578    def min_abundance(self):
    - 579        """Return the minimum abundance value of the mass spectrum."""
    - 580        return min([mspeak.abundance for mspeak in self.mspeaks])
    - 581
    - 582    # takes too much cpu time 
    + 553    def kendrick_mass(self):
    + 554        """Return the Kendrick mass values of the mass spectrum."""
    + 555        self.check_mspeaks()
    + 556        return array([mspeak.kendrick_mass for mspeak in self.mspeaks])
    + 557
    + 558    @property
    + 559    def max_mz_exp(self):
    + 560        """Return the maximum experimental m/z value of the mass spectrum."""
    + 561        return max([mspeak.mz_exp for mspeak in self.mspeaks])
    + 562
    + 563    @property
    + 564    def min_mz_exp(self):
    + 565        """Return the minimum experimental m/z value of the mass spectrum."""
    + 566        return min([mspeak.mz_exp for mspeak in self.mspeaks])
    + 567
    + 568    @property
    + 569    def max_abundance(self):
    + 570        """Return the maximum abundance value of the mass spectrum."""
    + 571        return max([mspeak.abundance for mspeak in self.mspeaks])
    + 572
    + 573    @property
    + 574    def max_signal_to_noise(self):
    + 575        """Return the maximum signal-to-noise ratio of the mass spectrum."""
    + 576        return max([mspeak.signal_to_noise for mspeak in self.mspeaks])
    + 577
    + 578    @property
    + 579    def most_abundant_mspeak(self):
    + 580        """Return the most abundant MSpeak object of the mass spectrum."""
    + 581        return max(self.mspeaks, key=lambda m: m.abundance)
    + 582
      583    @property
    - 584    def dynamic_range(self):
    - 585        """Return the dynamic range of the mass spectrum."""
    - 586        return self._dynamic_range
    + 584    def min_abundance(self):
    + 585        """Return the minimum abundance value of the mass spectrum."""
    + 586        return min([mspeak.abundance for mspeak in self.mspeaks])
      587
    - 588    @property
    - 589    def baseline_noise(self):
    - 590        """Return the baseline noise of the mass spectrum."""
    - 591        if self._baseline_noise:
    - 592            return self._baseline_noise
    - 593        else:     
    - 594            return None
    - 595
    - 596    @property
    - 597    def baseline_noise_std(self):
    - 598        """Return the standard deviation of the baseline noise of the mass spectrum."""
    - 599        if self._baseline_noise_std == 0:
    - 600            return self._baseline_noise_std
    - 601        if self._baseline_noise_std:
    - 602            return self._baseline_noise_std
    - 603        else:     
    - 604            return None
    - 605
    - 606    @property
    - 607    def Aterm(self):
    - 608        """Return the A-term calibration coefficient of the mass spectrum."""
    - 609        return self._calibration_terms[0]
    - 610
    - 611    @property
    - 612    def Bterm(self):
    - 613        """Return the B-term calibration coefficient of the mass spectrum."""
    - 614        return self._calibration_terms[1]
    - 615
    - 616    @property
    - 617    def Cterm(self):
    - 618        """Return the C-term calibration coefficient of the mass spectrum."""
    - 619        return self._calibration_terms[2]
    - 620
    - 621    @property
    - 622    def filename(self):
    - 623        """Return the filename of the mass spectrum."""
    - 624        return Path(self._filename)
    - 625
    - 626    @property
    - 627    def dir_location(self):
    - 628        """Return the directory location of the mass spectrum."""
    - 629        return self._dir_location
    - 630
    - 631    def sort_by_mz(self):
    - 632        """Sort the mass spectrum by m/z values."""
    - 633        return sorted(self, key=lambda m: m.mz_exp)
    - 634
    - 635    def sort_by_abundance(self, reverse=False):
    - 636        """Sort the mass spectrum by abundance values."""
    - 637        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
    - 638
    - 639    @property
    - 640    def tic(self):
    - 641        """Return the total ion current of the mass spectrum."""
    - 642        return trapz(self.abundance_profile, self.mz_exp_profile)
    - 643
    - 644    def check_mspeaks_warning(self):
    - 645        """Check if the mass spectrum has MSpeaks objects.
    - 646        
    - 647        Raises
    - 648        ------
    - 649        Warning
    - 650            If the mass spectrum has no MSpeaks objects.
    - 651        """
    - 652        import warnings
    - 653        if self.mspeaks:
    - 654            pass
    - 655        else:
    - 656            warnings.warn(
    - 657                "mspeaks list is empty, continuing without filtering data"
    - 658            )
    + 588    # takes too much cpu time
    + 589    @property
    + 590    def dynamic_range(self):
    + 591        """Return the dynamic range of the mass spectrum."""
    + 592        return self._dynamic_range
    + 593
    + 594    @property
    + 595    def baseline_noise(self):
    + 596        """Return the baseline noise of the mass spectrum."""
    + 597        if self._baseline_noise:
    + 598            return self._baseline_noise
    + 599        else:
    + 600            return None
    + 601
    + 602    @property
    + 603    def baseline_noise_std(self):
    + 604        """Return the standard deviation of the baseline noise of the mass spectrum."""
    + 605        if self._baseline_noise_std == 0:
    + 606            return self._baseline_noise_std
    + 607        if self._baseline_noise_std:
    + 608            return self._baseline_noise_std
    + 609        else:
    + 610            return None
    + 611
    + 612    @property
    + 613    def Aterm(self):
    + 614        """Return the A-term calibration coefficient of the mass spectrum."""
    + 615        return self._calibration_terms[0]
    + 616
    + 617    @property
    + 618    def Bterm(self):
    + 619        """Return the B-term calibration coefficient of the mass spectrum."""
    + 620        return self._calibration_terms[1]
    + 621
    + 622    @property
    + 623    def Cterm(self):
    + 624        """Return the C-term calibration coefficient of the mass spectrum."""
    + 625        return self._calibration_terms[2]
    + 626
    + 627    @property
    + 628    def filename(self):
    + 629        """Return the filename of the mass spectrum."""
    + 630        return Path(self._filename)
    + 631
    + 632    @property
    + 633    def dir_location(self):
    + 634        """Return the directory location of the mass spectrum."""
    + 635        return self._dir_location
    + 636
    + 637    def sort_by_mz(self):
    + 638        """Sort the mass spectrum by m/z values."""
    + 639        return sorted(self, key=lambda m: m.mz_exp)
    + 640
    + 641    def sort_by_abundance(self, reverse=False):
    + 642        """Sort the mass spectrum by abundance values."""
    + 643        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
    + 644
    + 645    @property
    + 646    def tic(self):
    + 647        """Return the total ion current of the mass spectrum."""
    + 648        return trapz(self.abundance_profile, self.mz_exp_profile)
    + 649
    + 650    def check_mspeaks_warning(self):
    + 651        """Check if the mass spectrum has MSpeaks objects.
    + 652
    + 653        Raises
    + 654        ------
    + 655        Warning
    + 656            If the mass spectrum has no MSpeaks objects.
    + 657        """
    + 658        import warnings
      659
    - 660    def check_mspeaks(self):
    - 661        """Check if the mass spectrum has MSpeaks objects.
    - 662
    - 663        Raises
    - 664        ------
    - 665        Exception
    - 666            If the mass spectrum has no MSpeaks objects.
    - 667        """
    - 668        if self.mspeaks:
    - 669            pass
    - 670        else:
    - 671            raise Exception(
    - 672                "mspeaks list is empty, please run process_mass_spec() first"
    - 673            )
    - 674
    - 675    def remove_assignment_by_index(self, indexes):
    - 676        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
    - 677
    - 678        Parameters
    - 679        ----------
    - 680        indexes : list of int
    - 681            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
    - 682        """
    - 683        for i in indexes: self.mspeaks[i].clear_molecular_formulas()
    - 684
    - 685    def filter_by_index(self, list_indexes):
    - 686        """Filter the mass spectrum by the specified indexes.
    - 687
    - 688        Parameters
    - 689        ----------
    - 690        list_indexes : list of int
    - 691            A list of indexes of the MSpeaks objects to drop.
    - 692
    - 693        """
    - 694
    - 695        self.mspeaks = [self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes]
    - 696
    - 697        for i, mspeak in  enumerate(self.mspeaks): mspeak.index = i
    + 660        if self.mspeaks:
    + 661            pass
    + 662        else:
    + 663            warnings.warn("mspeaks list is empty, continuing without filtering data")
    + 664
    + 665    def check_mspeaks(self):
    + 666        """Check if the mass spectrum has MSpeaks objects.
    + 667
    + 668        Raises
    + 669        ------
    + 670        Exception
    + 671            If the mass spectrum has no MSpeaks objects.
    + 672        """
    + 673        if self.mspeaks:
    + 674            pass
    + 675        else:
    + 676            raise Exception(
    + 677                "mspeaks list is empty, please run process_mass_spec() first"
    + 678            )
    + 679
    + 680    def remove_assignment_by_index(self, indexes):
    + 681        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
    + 682
    + 683        Parameters
    + 684        ----------
    + 685        indexes : list of int
    + 686            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
    + 687        """
    + 688        for i in indexes:
    + 689            self.mspeaks[i].clear_molecular_formulas()
    + 690
    + 691    def filter_by_index(self, list_indexes):
    + 692        """Filter the mass spectrum by the specified indexes.
    + 693
    + 694        Parameters
    + 695        ----------
    + 696        list_indexes : list of int
    + 697            A list of indexes of the MSpeaks objects to drop.
      698
    - 699        self._set_nominal_masses_start_final_indexes()
    + 699        """
      700
    - 701    def filter_by_mz(self, min_mz, max_mz):
    - 702        """Filter the mass spectrum by the specified m/z range.
    - 703
    - 704        Parameters
    - 705        ----------
    - 706        min_mz : float
    - 707            The minimum m/z value to keep.
    - 708        max_mz : float
    - 709            The maximum m/z value to keep.
    - 710
    - 711        """      
    - 712        self.check_mspeaks_warning()
    - 713        indexes = [index for index, mspeak in enumerate(self.mspeaks) if not min_mz <= mspeak.mz_exp <= max_mz]
    - 714        self.filter_by_index(indexes)
    - 715
    - 716    def filter_by_s2n(self, min_s2n, max_s2n=False):
    - 717        """Filter the mass spectrum by the specified signal-to-noise ratio range.
    - 718
    - 719        Parameters
    - 720        ----------
    - 721        min_s2n : float
    - 722            The minimum signal-to-noise ratio to keep.
    - 723        max_s2n : float, optional
    - 724            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
    - 725
    - 726        """
    - 727        self.check_mspeaks_warning()
    - 728        if max_s2n:
    - 729            indexes = [index for index, mspeak in enumerate(self.mspeaks) if not min_s2n <= mspeak.signal_to_noise <= max_s2n ]
    - 730        else:
    - 731            indexes = [index for index, mspeak in enumerate(self.mspeaks) if mspeak.signal_to_noise <= min_s2n ]
    - 732        self.filter_by_index(indexes)
    - 733
    - 734    def filter_by_abundance(self, min_abund, max_abund=False):
    - 735        """Filter the mass spectrum by the specified abundance range.
    - 736
    - 737        Parameters
    - 738        ----------
    - 739        min_abund : float
    - 740            The minimum abundance to keep.
    - 741        max_abund : float, optional
    - 742            The maximum abundance to keep. Defaults to False (no maximum).
    - 743
    - 744        """
    - 745        self.check_mspeaks_warning()
    - 746        if max_abund:
    - 747            indexes = [index for index, mspeak in enumerate(self.mspeaks) if not min_abund <= mspeak.abundance <= max_abund]
    - 748        else:
    - 749            indexes = [index for index, mspeak in enumerate(self.mspeaks) if mspeak.abundance <= min_abund]
    - 750        self.filter_by_index(indexes)
    - 751
    - 752    def filter_by_max_resolving_power(self, B, T):
    - 753        """Filter the mass spectrum by the specified maximum resolving power.
    - 754        
    - 755        Parameters
    - 756        ----------
    - 757        B : float
    - 758        T : float
    - 759        
    - 760        """
    - 761
    - 762        rpe = lambda m, z: (1.274e7 * z * B * T)/(m*z)
    - 763
    - 764        self.check_mspeaks_warning()
    - 765
    - 766        indexes_to_remove = [index for index, mspeak in enumerate(self.mspeaks) if  mspeak.resolving_power >= rpe(mspeak.mz_exp,mspeak.ion_charge)]
    - 767        self.filter_by_index(indexes_to_remove)
    - 768
    - 769    def filter_by_mean_resolving_power(self, ndeviations=3,plot=False,guess_pars=False):
    - 770        """Filter the mass spectrum by the specified mean resolving power.
    - 771
    - 772        Parameters
    - 773        ----------
    - 774        ndeviations : float, optional
    - 775            The number of standard deviations to use for filtering. Defaults to 3.
    - 776        plot : bool, optional
    - 777            Whether to plot the resolving power distribution. Defaults to False.
    - 778        guess_pars : bool, optional
    - 779            Whether to guess the parameters for the Gaussian model. Defaults to False.
    + 701        self.mspeaks = [
    + 702            self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes
    + 703        ]
    + 704
    + 705        for i, mspeak in enumerate(self.mspeaks):
    + 706            mspeak.index = i
    + 707
    + 708        self._set_nominal_masses_start_final_indexes()
    + 709
    + 710    def filter_by_mz(self, min_mz, max_mz):
    + 711        """Filter the mass spectrum by the specified m/z range.
    + 712
    + 713        Parameters
    + 714        ----------
    + 715        min_mz : float
    + 716            The minimum m/z value to keep.
    + 717        max_mz : float
    + 718            The maximum m/z value to keep.
    + 719
    + 720        """
    + 721        self.check_mspeaks_warning()
    + 722        indexes = [
    + 723            index
    + 724            for index, mspeak in enumerate(self.mspeaks)
    + 725            if not min_mz <= mspeak.mz_exp <= max_mz
    + 726        ]
    + 727        self.filter_by_index(indexes)
    + 728
    + 729    def filter_by_s2n(self, min_s2n, max_s2n=False):
    + 730        """Filter the mass spectrum by the specified signal-to-noise ratio range.
    + 731
    + 732        Parameters
    + 733        ----------
    + 734        min_s2n : float
    + 735            The minimum signal-to-noise ratio to keep.
    + 736        max_s2n : float, optional
    + 737            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
    + 738
    + 739        """
    + 740        self.check_mspeaks_warning()
    + 741        if max_s2n:
    + 742            indexes = [
    + 743                index
    + 744                for index, mspeak in enumerate(self.mspeaks)
    + 745                if not min_s2n <= mspeak.signal_to_noise <= max_s2n
    + 746            ]
    + 747        else:
    + 748            indexes = [
    + 749                index
    + 750                for index, mspeak in enumerate(self.mspeaks)
    + 751                if mspeak.signal_to_noise <= min_s2n
    + 752            ]
    + 753        self.filter_by_index(indexes)
    + 754
    + 755    def filter_by_abundance(self, min_abund, max_abund=False):
    + 756        """Filter the mass spectrum by the specified abundance range.
    + 757
    + 758        Parameters
    + 759        ----------
    + 760        min_abund : float
    + 761            The minimum abundance to keep.
    + 762        max_abund : float, optional
    + 763            The maximum abundance to keep. Defaults to False (no maximum).
    + 764
    + 765        """
    + 766        self.check_mspeaks_warning()
    + 767        if max_abund:
    + 768            indexes = [
    + 769                index
    + 770                for index, mspeak in enumerate(self.mspeaks)
    + 771                if not min_abund <= mspeak.abundance <= max_abund
    + 772            ]
    + 773        else:
    + 774            indexes = [
    + 775                index
    + 776                for index, mspeak in enumerate(self.mspeaks)
    + 777                if mspeak.abundance <= min_abund
    + 778            ]
    + 779        self.filter_by_index(indexes)
      780
    - 781        """
    - 782        self.check_mspeaks_warning()
    - 783        indexes_to_remove = MeanResolvingPowerFilter(self,ndeviations,plot,guess_pars).main()
    - 784        self.filter_by_index(indexes_to_remove)
    - 785
    - 786
    - 787    def filter_by_min_resolving_power(self, B, T):
    - 788        """Filter the mass spectrum by the specified minimum resolving power.
    - 789
    - 790        Parameters
    - 791        ----------
    - 792        B : float
    - 793        T : float
    + 781    def filter_by_max_resolving_power(self, B, T):
    + 782        """Filter the mass spectrum by the specified maximum resolving power.
    + 783
    + 784        Parameters
    + 785        ----------
    + 786        B : float
    + 787        T : float
    + 788
    + 789        """
    + 790
    + 791        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
    + 792
    + 793        self.check_mspeaks_warning()
      794
    - 795        """
    - 796        rpe = lambda m, z: (1.274e7 * z * B * T)/(m*z)
    - 797
    - 798        self.check_mspeaks_warning()
    - 799
    - 800        indexes_to_remove = [index for index, mspeak in enumerate(self.mspeaks) if  mspeak.resolving_power <= rpe(mspeak.mz_exp,mspeak.ion_charge)]
    - 801        self.filter_by_index(indexes_to_remove)
    - 802
    - 803    def filter_by_noise_threshold(self):
    - 804        """Filter the mass spectrum by the noise threshold."""
    - 805        
    - 806        threshold = self.get_noise_threshold()[1][0]
    - 807        
    - 808        self.check_mspeaks_warning()
    - 809        
    - 810        indexes_to_remove = [index for index, mspeak in enumerate(self.mspeaks) if  mspeak.abundance <= threshold]
    - 811        self.filter_by_index(indexes_to_remove)
    - 812
    - 813    
    - 814    def find_peaks(self):
    - 815        """Find the peaks of the mass spectrum."""
    - 816        #needs to clear previous results from peak_picking
    - 817        self._mspeaks = list()
    - 818
    - 819        #then do peak picking
    - 820        self.do_peak_picking()
    - 821        # print("A total of %i peaks were found" % len(self._mspeaks))
    + 795        indexes_to_remove = [
    + 796            index
    + 797            for index, mspeak in enumerate(self.mspeaks)
    + 798            if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge)
    + 799        ]
    + 800        self.filter_by_index(indexes_to_remove)
    + 801
    + 802    def filter_by_mean_resolving_power(
    + 803        self, ndeviations=3, plot=False, guess_pars=False
    + 804    ):
    + 805        """Filter the mass spectrum by the specified mean resolving power.
    + 806
    + 807        Parameters
    + 808        ----------
    + 809        ndeviations : float, optional
    + 810            The number of standard deviations to use for filtering. Defaults to 3.
    + 811        plot : bool, optional
    + 812            Whether to plot the resolving power distribution. Defaults to False.
    + 813        guess_pars : bool, optional
    + 814            Whether to guess the parameters for the Gaussian model. Defaults to False.
    + 815
    + 816        """
    + 817        self.check_mspeaks_warning()
    + 818        indexes_to_remove = MeanResolvingPowerFilter(
    + 819            self, ndeviations, plot, guess_pars
    + 820        ).main()
    + 821        self.filter_by_index(indexes_to_remove)
      822
    - 823    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
    - 824        """Change the Kendrick base of all MSpeaks objects.
    + 823    def filter_by_min_resolving_power(self, B, T):
    + 824        """Filter the mass spectrum by the specified minimum resolving power.
      825
      826        Parameters
      827        ----------
    - 828        kendrick_dict_base : dict
    - 829            A dictionary of the Kendrick base to change to.
    + 828        B : float
    + 829        T : float
      830
    - 831        Notes
    - 832        -----
    - 833        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
    - 834        """
    - 835        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
    - 836
    - 837        for mspeak in self.mspeaks:
    - 838
    - 839            mspeak.change_kendrick_base(kendrick_dict_base)
    - 840
    - 841    def get_nominal_mz_first_last_indexes(self, nominal_mass):
    - 842        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
    - 843
    - 844        Parameters
    - 845        ----------
    - 846        nominal_mass : int
    - 847            The nominal mass to get the indexes for.
    - 848
    - 849        Returns
    - 850        -------
    - 851        tuple
    - 852            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
    - 853        """
    - 854        if self._dict_nominal_masses_indexes:
    - 855
    - 856            if nominal_mass in self._dict_nominal_masses_indexes.keys():
    - 857
    - 858                return (self._dict_nominal_masses_indexes.get(nominal_mass)[0], self._dict_nominal_masses_indexes.get(nominal_mass)[1]+1)
    - 859
    - 860            else:
    - 861                # import warnings
    - 862                # uncomment warn to distribution
    - 863                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
    - 864                return (0, 0)
    - 865        else:
    - 866            raise Exception("run process_mass_spec() function before trying to access the data")
    - 867
    - 868    def get_masses_count_by_nominal_mass(self):
    - 869        """Return a dictionary of the nominal masses and their counts."""
    - 870
    - 871        dict_nominal_masses_count = {}
    - 872
    - 873        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    - 874
    - 875        for nominal_mass in all_nominal_masses:
    - 876            if nominal_mass not in dict_nominal_masses_count:
    - 877                dict_nominal_masses_count[nominal_mass] = len(list(self.get_nominal_mass_indexes(nominal_mass)))
    - 878
    - 879        return dict_nominal_masses_count
    - 880
    - 881    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
    - 882        """Return a dictionary of the nominal masses and their counts.
    - 883
    - 884        Parameters
    - 885        ----------
    - 886        mz_overlay : float, optional
    - 887            The m/z overlay to use for counting. Defaults to 0.1.
    - 888
    - 889        Returns
    - 890        -------
    - 891        dict
    - 892            A dictionary of the nominal masses and their counts.
    - 893        """
    - 894        dict_nominal_masses_count ={}
    - 895
    - 896        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    - 897
    - 898        for nominal_mass in all_nominal_masses:
    - 899
    - 900            if nominal_mass not in dict_nominal_masses_count:
    - 901
    - 902                min_mz = nominal_mass - mz_overlay
    - 903
    - 904                max_mz = nominal_mass + 1 + mz_overlay
    - 905
    - 906                indexes = indexes = where((self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)) 
    - 907
    - 908                dict_nominal_masses_count[nominal_mass] = indexes[0].size
    - 909
    - 910        return dict_nominal_masses_count
    - 911
    - 912    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
    - 913        """Return the indexes of the MSpeaks objects with the specified nominal mass.
    - 914
    - 915        Parameters
    - 916        ----------
    - 917        nominal_mass : int
    - 918            The nominal mass to get the indexes for.
    - 919        overlay : float, optional
    - 920            The m/z overlay to use for counting. Defaults to 0.1.
    - 921
    - 922        Returns
    - 923        -------
    - 924        generator
    - 925            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
    - 926        """       
    - 927        min_mz_to_look = nominal_mass - overlay
    - 928        max_mz_to_look = nominal_mass + 1 + overlay
    - 929
    - 930        return (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
    - 931
    - 932        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
    - 933        # return indexes
    - 934
    - 935    def _set_nominal_masses_start_final_indexes(self):
    - 936        """Set the start and final indexes of the MSpeaks objects for all nominal masses."""
    - 937        dict_nominal_masses_indexes ={}
    - 938
    - 939        all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks)
    - 940
    - 941        for nominal_mass in all_nominal_masses:
    + 831        """
    + 832        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
    + 833
    + 834        self.check_mspeaks_warning()
    + 835
    + 836        indexes_to_remove = [
    + 837            index
    + 838            for index, mspeak in enumerate(self.mspeaks)
    + 839            if mspeak.resolving_power <= rpe(mspeak.mz_exp, mspeak.ion_charge)
    + 840        ]
    + 841        self.filter_by_index(indexes_to_remove)
    + 842
    + 843    def filter_by_noise_threshold(self):
    + 844        """Filter the mass spectrum by the noise threshold."""
    + 845
    + 846        threshold = self.get_noise_threshold()[1][0]
    + 847
    + 848        self.check_mspeaks_warning()
    + 849
    + 850        indexes_to_remove = [
    + 851            index
    + 852            for index, mspeak in enumerate(self.mspeaks)
    + 853            if mspeak.abundance <= threshold
    + 854        ]
    + 855        self.filter_by_index(indexes_to_remove)
    + 856
    + 857    def find_peaks(self):
    + 858        """Find the peaks of the mass spectrum."""
    + 859        # needs to clear previous results from peak_picking
    + 860        self._mspeaks = list()
    + 861
    + 862        # then do peak picking
    + 863        self.do_peak_picking()
    + 864        # print("A total of %i peaks were found" % len(self._mspeaks))
    + 865
    + 866    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
    + 867        """Change the Kendrick base of all MSpeaks objects.
    + 868
    + 869        Parameters
    + 870        ----------
    + 871        kendrick_dict_base : dict
    + 872            A dictionary of the Kendrick base to change to.
    + 873
    + 874        Notes
    + 875        -----
    + 876        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
    + 877        """
    + 878        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
    + 879
    + 880        for mspeak in self.mspeaks:
    + 881            mspeak.change_kendrick_base(kendrick_dict_base)
    + 882
    + 883    def get_nominal_mz_first_last_indexes(self, nominal_mass):
    + 884        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
    + 885
    + 886        Parameters
    + 887        ----------
    + 888        nominal_mass : int
    + 889            The nominal mass to get the indexes for.
    + 890
    + 891        Returns
    + 892        -------
    + 893        tuple
    + 894            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
    + 895        """
    + 896        if self._dict_nominal_masses_indexes:
    + 897            if nominal_mass in self._dict_nominal_masses_indexes.keys():
    + 898                return (
    + 899                    self._dict_nominal_masses_indexes.get(nominal_mass)[0],
    + 900                    self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1,
    + 901                )
    + 902
    + 903            else:
    + 904                # import warnings
    + 905                # uncomment warn to distribution
    + 906                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
    + 907                return (0, 0)
    + 908        else:
    + 909            raise Exception(
    + 910                "run process_mass_spec() function before trying to access the data"
    + 911            )
    + 912
    + 913    def get_masses_count_by_nominal_mass(self):
    + 914        """Return a dictionary of the nominal masses and their counts."""
    + 915
    + 916        dict_nominal_masses_count = {}
    + 917
    + 918        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    + 919
    + 920        for nominal_mass in all_nominal_masses:
    + 921            if nominal_mass not in dict_nominal_masses_count:
    + 922                dict_nominal_masses_count[nominal_mass] = len(
    + 923                    list(self.get_nominal_mass_indexes(nominal_mass))
    + 924                )
    + 925
    + 926        return dict_nominal_masses_count
    + 927
    + 928    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
    + 929        """Return a dictionary of the nominal masses and their counts.
    + 930
    + 931        Parameters
    + 932        ----------
    + 933        mz_overlay : float, optional
    + 934            The m/z overlay to use for counting. Defaults to 0.1.
    + 935
    + 936        Returns
    + 937        -------
    + 938        dict
    + 939            A dictionary of the nominal masses and their counts.
    + 940        """
    + 941        dict_nominal_masses_count = {}
      942
    - 943            #indexes = self.get_nominal_mass_indexes(nominal_mass)
    - 944            # Convert the iterator to a list to avoid multiple calls
    - 945            indexes = list(self.get_nominal_mass_indexes(nominal_mass))
    - 946
    - 947            # If the list is not empty, find the first and last; otherwise, set None
    - 948            if indexes:
    - 949                first, last = indexes[0], indexes[-1]
    - 950            else:
    - 951                first = last = None
    - 952            #defaultvalue = None
    - 953            #first = last = next(indexes, defaultvalue)
    - 954            #for last in indexes:
    - 955            #    pass
    + 943        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    + 944
    + 945        for nominal_mass in all_nominal_masses:
    + 946            if nominal_mass not in dict_nominal_masses_count:
    + 947                min_mz = nominal_mass - mz_overlay
    + 948
    + 949                max_mz = nominal_mass + 1 + mz_overlay
    + 950
    + 951                indexes = indexes = where(
    + 952                    (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)
    + 953                )
    + 954
    + 955                dict_nominal_masses_count[nominal_mass] = indexes[0].size
      956
    - 957            dict_nominal_masses_indexes[nominal_mass] = (first, last)
    + 957        return dict_nominal_masses_count
      958
    - 959        self._dict_nominal_masses_indexes = dict_nominal_masses_indexes
    - 960
    - 961    def plot_centroid(self, ax=None, c='g'):
    - 962        """Plot the centroid data of the mass spectrum.
    - 963
    - 964        Parameters
    - 965        ----------
    - 966        ax : matplotlib.axes.Axes, optional
    - 967            The matplotlib axes to plot on. Defaults to None.
    - 968        c : str, optional
    - 969            The color to use for the plot. Defaults to 'g' (green).
    - 970
    - 971        Returns
    - 972        -------
    - 973        matplotlib.axes.Axes
    - 974            The matplotlib axes containing the plot.
    - 975
    - 976        Raises
    - 977        ------
    - 978        Exception
    - 979            If no centroid data is found.
    - 980        """
    - 981
    - 982        import matplotlib.pyplot as plt
    - 983        if self._mspeaks:
    - 984
    - 985            if ax is None:
    - 986                ax = plt.gca()
    - 987
    - 988            markerline_a, stemlines_a, baseline_a = ax.stem(self.mz_exp, self.abundance, linefmt='-', markerfmt=" ")
    + 959    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
    + 960        """Return the indexes of the MSpeaks objects with the specified nominal mass.
    + 961
    + 962        Parameters
    + 963        ----------
    + 964        nominal_mass : int
    + 965            The nominal mass to get the indexes for.
    + 966        overlay : float, optional
    + 967            The m/z overlay to use for counting. Defaults to 0.1.
    + 968
    + 969        Returns
    + 970        -------
    + 971        generator
    + 972            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
    + 973        """
    + 974        min_mz_to_look = nominal_mass - overlay
    + 975        max_mz_to_look = nominal_mass + 1 + overlay
    + 976
    + 977        return (
    + 978            i
    + 979            for i in range(len(self.mspeaks))
    + 980            if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look
    + 981        )
    + 982
    + 983        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
    + 984        # return indexes
    + 985
    + 986    def _set_nominal_masses_start_final_indexes(self):
    + 987        """Set the start and final indexes of the MSpeaks objects for all nominal masses."""
    + 988        dict_nominal_masses_indexes = {}
      989
    - 990            plt.setp(markerline_a, 'color', c, 'linewidth', 2)
    - 991            plt.setp(stemlines_a, 'color', c, 'linewidth', 2)
    - 992            plt.setp(baseline_a, 'color', c, 'linewidth', 2)
    - 993
    - 994            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    - 995            ax.set_ylabel('Abundance', fontsize=12)
    - 996            ax.tick_params(axis='both', which='major', labelsize=12)
    - 997
    - 998            ax.axes.spines['top'].set_visible(False)
    - 999            ax.axes.spines['right'].set_visible(False)
    -1000
    -1001            ax.get_yaxis().set_visible(False)
    -1002            ax.spines['left'].set_visible(False)
    -1003
    -1004        else:
    -1005
    -1006            raise Exception("No centroid data found, please run process_mass_spec")
    -1007
    -1008        return ax
    -1009
    -1010    def plot_profile_and_noise_threshold(self, ax=None,legend=False): 
    -1011        """Plot the profile data and noise threshold of the mass spectrum.
    -1012
    -1013        Parameters
    -1014        ----------
    -1015        ax : matplotlib.axes.Axes, optional
    -1016            The matplotlib axes to plot on. Defaults to None.
    -1017        legend : bool, optional
    -1018            Whether to show the legend. Defaults to False.
    -1019
    -1020        Returns
    -1021        -------
    -1022        matplotlib.axes.Axes
    -1023            The matplotlib axes containing the plot.
    -1024
    -1025        Raises
    -1026        ------
    -1027        Exception
    -1028            If no noise threshold is found.
    -1029        """
    -1030        import matplotlib.pyplot as plt
    -1031        if self.baseline_noise_std and self.baseline_noise_std:
    -1032
    -1033            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    -1034            baseline = (self.baseline_noise, self.baseline_noise)
    -1035
    -1036            # std = self.parameters.mass_spectrum.noise_threshold_min_std
    -1037            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
    -1038            x, y = self.get_noise_threshold()    
    -1039            
    -1040            if ax is None:
    -1041                ax = plt.gca()
    -1042            
    -1043            ax.plot(self.mz_exp_profile, self.abundance_profile, color="green",label="Spectrum")
    -1044            ax.plot(x, (baseline, baseline), color="yellow",label="Baseline Noise")
    -1045            ax.plot(x, y, color="red",label="Noise Threshold")
    -1046
    -1047            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    -1048            ax.set_ylabel('Abundance', fontsize=12)
    -1049            ax.tick_params(axis='both', which='major', labelsize=12)
    -1050
    -1051            ax.axes.spines['top'].set_visible(False)
    -1052            ax.axes.spines['right'].set_visible(False)
    -1053
    -1054            ax.get_yaxis().set_visible(False)
    -1055            ax.spines['left'].set_visible(False)
    -1056            if legend:
    -1057                ax.legend()
    + 990        all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks)
    + 991
    + 992        for nominal_mass in all_nominal_masses:
    + 993            # indexes = self.get_nominal_mass_indexes(nominal_mass)
    + 994            # Convert the iterator to a list to avoid multiple calls
    + 995            indexes = list(self.get_nominal_mass_indexes(nominal_mass))
    + 996
    + 997            # If the list is not empty, find the first and last; otherwise, set None
    + 998            if indexes:
    + 999                first, last = indexes[0], indexes[-1]
    +1000            else:
    +1001                first = last = None
    +1002            # defaultvalue = None
    +1003            # first = last = next(indexes, defaultvalue)
    +1004            # for last in indexes:
    +1005            #    pass
    +1006
    +1007            dict_nominal_masses_indexes[nominal_mass] = (first, last)
    +1008
    +1009        self._dict_nominal_masses_indexes = dict_nominal_masses_indexes
    +1010
    +1011    def plot_centroid(self, ax=None, c="g"):
    +1012        """Plot the centroid data of the mass spectrum.
    +1013
    +1014        Parameters
    +1015        ----------
    +1016        ax : matplotlib.axes.Axes, optional
    +1017            The matplotlib axes to plot on. Defaults to None.
    +1018        c : str, optional
    +1019            The color to use for the plot. Defaults to 'g' (green).
    +1020
    +1021        Returns
    +1022        -------
    +1023        matplotlib.axes.Axes
    +1024            The matplotlib axes containing the plot.
    +1025
    +1026        Raises
    +1027        ------
    +1028        Exception
    +1029            If no centroid data is found.
    +1030        """
    +1031
    +1032        import matplotlib.pyplot as plt
    +1033
    +1034        if self._mspeaks:
    +1035            if ax is None:
    +1036                ax = plt.gca()
    +1037
    +1038            markerline_a, stemlines_a, baseline_a = ax.stem(
    +1039                self.mz_exp, self.abundance, linefmt="-", markerfmt=" "
    +1040            )
    +1041
    +1042            plt.setp(markerline_a, "color", c, "linewidth", 2)
    +1043            plt.setp(stemlines_a, "color", c, "linewidth", 2)
    +1044            plt.setp(baseline_a, "color", c, "linewidth", 2)
    +1045
    +1046            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +1047            ax.set_ylabel("Abundance", fontsize=12)
    +1048            ax.tick_params(axis="both", which="major", labelsize=12)
    +1049
    +1050            ax.axes.spines["top"].set_visible(False)
    +1051            ax.axes.spines["right"].set_visible(False)
    +1052
    +1053            ax.get_yaxis().set_visible(False)
    +1054            ax.spines["left"].set_visible(False)
    +1055
    +1056        else:
    +1057            raise Exception("No centroid data found, please run process_mass_spec")
     1058
    -1059        else:
    +1059        return ax
     1060
    -1061            raise Exception("Calculate noise threshold first")
    -1062
    -1063        return ax
    -1064
    -1065    def plot_mz_domain_profile(self, color='green', ax=None): 
    -1066        """Plot the m/z domain profile of the mass spectrum.
    -1067
    -1068        Parameters
    -1069        ----------
    -1070        color : str, optional
    -1071            The color to use for the plot. Defaults to 'green'.
    -1072        ax : matplotlib.axes.Axes, optional
    -1073            The matplotlib axes to plot on. Defaults to None.
    -1074
    -1075        Returns
    -1076        -------
    -1077        matplotlib.axes.Axes
    -1078            The matplotlib axes containing the plot.
    -1079        """       
    -1080
    +1061    def plot_profile_and_noise_threshold(self, ax=None, legend=False):
    +1062        """Plot the profile data and noise threshold of the mass spectrum.
    +1063
    +1064        Parameters
    +1065        ----------
    +1066        ax : matplotlib.axes.Axes, optional
    +1067            The matplotlib axes to plot on. Defaults to None.
    +1068        legend : bool, optional
    +1069            Whether to show the legend. Defaults to False.
    +1070
    +1071        Returns
    +1072        -------
    +1073        matplotlib.axes.Axes
    +1074            The matplotlib axes containing the plot.
    +1075
    +1076        Raises
    +1077        ------
    +1078        Exception
    +1079            If no noise threshold is found.
    +1080        """
     1081        import matplotlib.pyplot as plt
     1082
    -1083        if ax is None:
    -1084            ax = plt.gca()
    -1085        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
    -1086        ax.set(xlabel='m/z', ylabel='abundance')
    -1087
    -1088        return ax
    -1089
    -1090    def to_excel(self, out_file_path, write_metadata=True):
    -1091        """Export the mass spectrum to an Excel file.
    -1092
    -1093        Parameters
    -1094        ----------
    -1095        out_file_path : str
    -1096            The path to the Excel file to export to.
    -1097        write_metadata : bool, optional
    -1098            Whether to write the metadata to the Excel file. Defaults to True.
    -1099
    -1100        Returns
    -1101        -------
    -1102        None
    -1103        """
    -1104        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1105        exportMS = HighResMassSpecExport(out_file_path, self)
    -1106        exportMS.to_excel(write_metadata=write_metadata)
    -1107
    -1108    def to_hdf(self, out_file_path):
    -1109        """Export the mass spectrum to an HDF file.
    -1110
    -1111        Parameters
    -1112        ----------
    -1113        out_file_path : str
    -1114            The path to the HDF file to export to.
    -1115
    -1116        Returns
    -1117        -------
    -1118        None
    -1119        """
    -1120        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1121        exportMS = HighResMassSpecExport(out_file_path, self)
    -1122        exportMS.to_hdf()
    -1123
    -1124    def to_csv(self, out_file_path, write_metadata=True):
    -1125        """Export the mass spectrum to a CSV file.
    -1126        
    -1127        Parameters
    -1128        ----------
    -1129        out_file_path : str
    -1130            The path to the CSV file to export to.
    -1131        write_metadata : bool, optional
    -1132            Whether to write the metadata to the CSV file. Defaults to True.
    -1133        
    +1083        if self.baseline_noise_std and self.baseline_noise_std:
    +1084            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    +1085            baseline = (self.baseline_noise, self.baseline_noise)
    +1086
    +1087            # std = self.parameters.mass_spectrum.noise_threshold_min_std
    +1088            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
    +1089            x, y = self.get_noise_threshold()
    +1090
    +1091            if ax is None:
    +1092                ax = plt.gca()
    +1093
    +1094            ax.plot(
    +1095                self.mz_exp_profile,
    +1096                self.abundance_profile,
    +1097                color="green",
    +1098                label="Spectrum",
    +1099            )
    +1100            ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise")
    +1101            ax.plot(x, y, color="red", label="Noise Threshold")
    +1102
    +1103            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +1104            ax.set_ylabel("Abundance", fontsize=12)
    +1105            ax.tick_params(axis="both", which="major", labelsize=12)
    +1106
    +1107            ax.axes.spines["top"].set_visible(False)
    +1108            ax.axes.spines["right"].set_visible(False)
    +1109
    +1110            ax.get_yaxis().set_visible(False)
    +1111            ax.spines["left"].set_visible(False)
    +1112            if legend:
    +1113                ax.legend()
    +1114
    +1115        else:
    +1116            raise Exception("Calculate noise threshold first")
    +1117
    +1118        return ax
    +1119
    +1120    def plot_mz_domain_profile(self, color="green", ax=None):
    +1121        """Plot the m/z domain profile of the mass spectrum.
    +1122
    +1123        Parameters
    +1124        ----------
    +1125        color : str, optional
    +1126            The color to use for the plot. Defaults to 'green'.
    +1127        ax : matplotlib.axes.Axes, optional
    +1128            The matplotlib axes to plot on. Defaults to None.
    +1129
    +1130        Returns
    +1131        -------
    +1132        matplotlib.axes.Axes
    +1133            The matplotlib axes containing the plot.
     1134        """
    -1135        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1136        exportMS = HighResMassSpecExport(out_file_path, self)
    -1137        exportMS.to_csv(write_metadata=write_metadata)
    -1138
    -1139    def to_pandas(self, out_file_path, write_metadata=True):
    -1140        """Export the mass spectrum to a Pandas dataframe with pkl extension.
    -1141
    -1142        Parameters
    -1143        ----------
    -1144        out_file_path : str
    -1145            The path to the CSV file to export to.
    -1146        write_metadata : bool, optional
    -1147            Whether to write the metadata to the CSV file. Defaults to True.
    -1148
    -1149        """
    -1150        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1151        exportMS = HighResMassSpecExport(out_file_path, self)
    -1152        exportMS.to_pandas(write_metadata=write_metadata)
    -1153
    -1154    def to_dataframe(self, additional_columns=None):
    -1155        """Return the mass spectrum as a Pandas dataframe.
    -1156
    -1157        Parameters
    -1158        ----------
    -1159        additional_columns : list, optional
    -1160            A list of additional columns to include in the dataframe. Defaults to None.
    -1161            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
    -1162        
    -1163        Returns
    -1164        -------
    -1165        pandas.DataFrame
    -1166            The mass spectrum as a Pandas dataframe.
    -1167        """
    -1168        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1169        exportMS = HighResMassSpecExport(self.filename, self)
    -1170        return exportMS.get_pandas_df(additional_columns = additional_columns)
    +1135
    +1136        import matplotlib.pyplot as plt
    +1137
    +1138        if ax is None:
    +1139            ax = plt.gca()
    +1140        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
    +1141        ax.set(xlabel="m/z", ylabel="abundance")
    +1142
    +1143        return ax
    +1144
    +1145    def to_excel(self, out_file_path, write_metadata=True):
    +1146        """Export the mass spectrum to an Excel file.
    +1147
    +1148        Parameters
    +1149        ----------
    +1150        out_file_path : str
    +1151            The path to the Excel file to export to.
    +1152        write_metadata : bool, optional
    +1153            Whether to write the metadata to the Excel file. Defaults to True.
    +1154
    +1155        Returns
    +1156        -------
    +1157        None
    +1158        """
    +1159        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1160
    +1161        exportMS = HighResMassSpecExport(out_file_path, self)
    +1162        exportMS.to_excel(write_metadata=write_metadata)
    +1163
    +1164    def to_hdf(self, out_file_path):
    +1165        """Export the mass spectrum to an HDF file.
    +1166
    +1167        Parameters
    +1168        ----------
    +1169        out_file_path : str
    +1170            The path to the HDF file to export to.
     1171
    -1172    def to_json(self):
    -1173        """Return the mass spectrum as a JSON file."""
    -1174        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1175        exportMS = HighResMassSpecExport(self.filename, self)
    -1176        return exportMS.to_json()
    +1172        Returns
    +1173        -------
    +1174        None
    +1175        """
    +1176        from corems.mass_spectrum.output.export import HighResMassSpecExport
     1177
    -1178    def parameters_json(self):
    -1179        """Return the parameters of the mass spectrum as a JSON string."""
    -1180        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1181        exportMS = HighResMassSpecExport(self.filename, self)
    -1182        return exportMS.parameters_to_json()
    +1178        exportMS = HighResMassSpecExport(out_file_path, self)
    +1179        exportMS.to_hdf()
    +1180
    +1181    def to_csv(self, out_file_path, write_metadata=True):
    +1182        """Export the mass spectrum to a CSV file.
     1183
    -1184    def parameters_toml(self):
    -1185        """Return the parameters of the mass spectrum as a TOML string."""
    -1186        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1187        exportMS = HighResMassSpecExport(self.filename, self)
    -1188        return exportMS.parameters_to_toml()
    -1189
    -1190class MassSpecProfile(MassSpecBase):
    -1191    """A mass spectrum class when the entry point is on profile format
    -1192    
    -1193    Notes
    -1194    -----
    -1195    Stores the profile data and instrument settings. 
    -1196    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
    -1197    _mspeaks is populated under the hood by calling process_mass_spec method.
    -1198    Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().
    +1184        Parameters
    +1185        ----------
    +1186        out_file_path : str
    +1187            The path to the CSV file to export to.
    +1188        write_metadata : bool, optional
    +1189            Whether to write the metadata to the CSV file. Defaults to True.
    +1190
    +1191        """
    +1192        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1193
    +1194        exportMS = HighResMassSpecExport(out_file_path, self)
    +1195        exportMS.to_csv(write_metadata=write_metadata)
    +1196
    +1197    def to_pandas(self, out_file_path, write_metadata=True):
    +1198        """Export the mass spectrum to a Pandas dataframe with pkl extension.
     1199
    -1200    Parameters
    -1201    ----------
    -1202    data_dict : dict
    -1203        A dictionary containing the profile data.
    -1204    d_params : dict{'str': float, int or str}
    -1205        contains the instrument settings and processing settings
    -1206    auto_process : bool, optional
    -1207        Whether to automatically process the mass spectrum. Defaults to True.
    -1208
    +1200        Parameters
    +1201        ----------
    +1202        out_file_path : str
    +1203            The path to the CSV file to export to.
    +1204        write_metadata : bool, optional
    +1205            Whether to write the metadata to the CSV file. Defaults to True.
    +1206
    +1207        """
    +1208        from corems.mass_spectrum.output.export import HighResMassSpecExport
     1209
    -1210    Attributes 
    -1211    ----------
    -1212    _abundance : ndarray
    -1213        The abundance values of the mass spectrum.
    -1214    _mz_exp : ndarray
    -1215        The m/z values of the mass spectrum.
    -1216    _mspeaks : list
    -1217        A list of mass peaks.
    -1218
    -1219    Methods 
    -1220    ----------
    -1221    * process_mass_spec(). Process the mass spectrum.
    -1222
    -1223    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()
    -1224    """
    -1225
    -1226    def __init__(self, data_dict, d_params, auto_process=True):
    -1227        # print(data_dict.keys())
    -1228        super().__init__(data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params)
    -1229       
    -1230        if auto_process:
    -1231            self.process_mass_spec()
    -1232
    -1233class MassSpecfromFreq(MassSpecBase):
    -1234    """ A mass spectrum class when data entry is on frequency domain
    +1210        exportMS = HighResMassSpecExport(out_file_path, self)
    +1211        exportMS.to_pandas(write_metadata=write_metadata)
    +1212
    +1213    def to_dataframe(self, additional_columns=None):
    +1214        """Return the mass spectrum as a Pandas dataframe.
    +1215
    +1216        Parameters
    +1217        ----------
    +1218        additional_columns : list, optional
    +1219            A list of additional columns to include in the dataframe. Defaults to None.
    +1220            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
    +1221
    +1222        Returns
    +1223        -------
    +1224        pandas.DataFrame
    +1225            The mass spectrum as a Pandas dataframe.
    +1226        """
    +1227        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1228
    +1229        exportMS = HighResMassSpecExport(self.filename, self)
    +1230        return exportMS.get_pandas_df(additional_columns=additional_columns)
    +1231
    +1232    def to_json(self):
    +1233        """Return the mass spectrum as a JSON file."""
    +1234        from corems.mass_spectrum.output.export import HighResMassSpecExport
     1235
    -1236    Notes
    -1237    -----
    -1238    - Transform to m/z based on the settings stored at d_params
    -1239    - Stores the profile data and instrument settings
    -1240    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    -1241    - _mspeaks is populated under the hood by calling process_mass_spec method
    -1242    - iteration is null if _mspeaks is empty
    -1243
    -1244    Parameters
    -1245    ----------
    -1246    frequency_domain : list(float)
    -1247        all datapoints in frequency domain in Hz
    -1248    magnitude :  frequency_domain : list(float)
    -1249        all datapoints in for magnitude of each frequency datapoint
    -1250    d_params : dict{'str': float, int or str}
    -1251        contains the instrument settings and processing settings
    -1252    auto_process : bool, optional
    -1253        Whether to automatically process the mass spectrum. Defaults to True.
    -1254    keep_profile : bool, optional
    -1255        Whether to keep the profile data. Defaults to True.
    -1256  
    -1257    Attributes
    -1258    ----------
    -1259    has_frequency : bool
    -1260        Whether the mass spectrum has frequency data.
    -1261    _frequency_domain : list(float)
    -1262        Frequency domain in Hz
    -1263    label : str
    -1264        store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
    -1265    _abundance : ndarray
    -1266        The abundance values of the mass spectrum.
    -1267    _mz_exp : ndarray
    -1268        The m/z values of the mass spectrum.
    -1269    _mspeaks : list
    -1270        A list of mass peaks.
    -1271    See Also: all the attributes of MassSpecBase class
    -1272     
    -1273    Methods
    -1274    ----------
    -1275    * _set_mz_domain().
    -1276        calculates the m_z based on the setting of d_params
    -1277    * process_mass_spec().  Process the mass spectrum.
    -1278    
    -1279    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()
    -1280    """
    -1281
    -1282    def __init__(self, frequency_domain, magnitude, d_params, 
    -1283                auto_process=True, keep_profile=True):
    -1284
    -1285        super().__init__(None, magnitude, d_params)
    +1236        exportMS = HighResMassSpecExport(self.filename, self)
    +1237        return exportMS.to_json()
    +1238
    +1239    def parameters_json(self):
    +1240        """Return the parameters of the mass spectrum as a JSON string."""
    +1241        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1242
    +1243        exportMS = HighResMassSpecExport(self.filename, self)
    +1244        return exportMS.parameters_to_json()
    +1245
    +1246    def parameters_toml(self):
    +1247        """Return the parameters of the mass spectrum as a TOML string."""
    +1248        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1249
    +1250        exportMS = HighResMassSpecExport(self.filename, self)
    +1251        return exportMS.parameters_to_toml()
    +1252
    +1253
    +1254class MassSpecProfile(MassSpecBase):
    +1255    """A mass spectrum class when the entry point is on profile format
    +1256
    +1257    Notes
    +1258    -----
    +1259    Stores the profile data and instrument settings.
    +1260    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
    +1261    _mspeaks is populated under the hood by calling process_mass_spec method.
    +1262    Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().
    +1263
    +1264    Parameters
    +1265    ----------
    +1266    data_dict : dict
    +1267        A dictionary containing the profile data.
    +1268    d_params : dict{'str': float, int or str}
    +1269        contains the instrument settings and processing settings
    +1270    auto_process : bool, optional
    +1271        Whether to automatically process the mass spectrum. Defaults to True.
    +1272
    +1273
    +1274    Attributes
    +1275    ----------
    +1276    _abundance : ndarray
    +1277        The abundance values of the mass spectrum.
    +1278    _mz_exp : ndarray
    +1279        The m/z values of the mass spectrum.
    +1280    _mspeaks : list
    +1281        A list of mass peaks.
    +1282
    +1283    Methods
    +1284    ----------
    +1285    * process_mass_spec(). Process the mass spectrum.
     1286
    -1287        self._frequency_domain = frequency_domain
    -1288        self.has_frequency = True
    -1289        self._set_mz_domain()
    -1290        self._sort_mz_domain()
    -1291        
    -1292        self.magnetron_frequency = None
    -1293        self.magnetron_frequency_sigma = None
    -1294
    -1295        #use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
    -1296        
    -1297        if auto_process:
    -1298            self.process_mass_spec(keep_profile=keep_profile)
    +1287    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()
    +1288    """
    +1289
    +1290    def __init__(self, data_dict, d_params, auto_process=True):
    +1291        # print(data_dict.keys())
    +1292        super().__init__(
    +1293            data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params
    +1294        )
    +1295
    +1296        if auto_process:
    +1297            self.process_mass_spec()
    +1298
     1299
    -1300    def _sort_mz_domain(self):
    -1301        """Sort the mass spectrum by m/z values."""
    +1300class MassSpecfromFreq(MassSpecBase):
    +1301    """A mass spectrum class when data entry is on frequency domain
     1302
    -1303        if self._mz_exp[0] > self._mz_exp[-1]:
    -1304            self._mz_exp = self._mz_exp[::-1]
    -1305            self._abundance = self._abundance[::-1]
    -1306            self._frequency_domain = self._frequency_domain[::-1]
    -1307
    -1308    def _set_mz_domain(self):
    -1309        """Set the m/z domain of the mass spectrum based on the settings of d_params."""
    -1310        if self.label == Labels.bruker_frequency:
    -1311            self._mz_exp = self._f_to_mz_bruker()
    -1312
    -1313        else:
    -1314
    -1315            self._mz_exp = self._f_to_mz()
    -1316
    -1317    @property
    -1318    def transient_settings(self): 
    -1319        """Return the transient settings of the mass spectrum."""
    -1320        return self.parameters.transient
    -1321
    -1322    @transient_settings.setter
    -1323    def transient_settings(self, instance_TransientSetting):
    -1324     
    -1325        self.parameters.transient = instance_TransientSetting  
    -1326
    -1327    def calc_magnetron_freq(self, max_magnetron_freq=50,magnetron_freq_bins=300):
    -1328        """Calculates the magnetron frequency of the mass spectrum.
    -1329
    -1330        Parameters
    -1331        ----------
    -1332        max_magnetron_freq : float, optional
    -1333            The maximum magnetron frequency. Defaults to 50.
    -1334        magnetron_freq_bins : int, optional
    -1335            The number of bins to use for the histogram. Defaults to 300.
    -1336
    -1337        Returns
    -1338        -------
    -1339        None
    -1340
    -1341        Notes
    -1342        -----
    -1343        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
    -1344        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
    -1345        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
    -1346        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
    -1347        """
    -1348        ms_df = DataFrame(self.freq_exp(),columns=['Freq'])
    -1349        ms_df['FreqDelta'] = ms_df['Freq'].diff()
    -1350
    -1351        freq_hist = histogram(ms_df[ms_df['FreqDelta']<max_magnetron_freq]['FreqDelta'],bins=magnetron_freq_bins)
    -1352    
    -1353        mod = GaussianModel()
    -1354        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
    -1355        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
    -1356        self.magnetron_frequency = out.best_values['center']
    -1357        self.magnetron_frequency_sigma = out.best_values['sigma']
    -1358            
    -1359
    -1360class MassSpecCentroid(MassSpecBase):
    -1361
    -1362    """A mass spectrum class when the entry point is on centroid format
    +1303    Notes
    +1304    -----
    +1305    - Transform to m/z based on the settings stored at d_params
    +1306    - Stores the profile data and instrument settings
    +1307    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    +1308    - _mspeaks is populated under the hood by calling process_mass_spec method
    +1309    - iteration is null if _mspeaks is empty
    +1310
    +1311    Parameters
    +1312    ----------
    +1313    frequency_domain : list(float)
    +1314        all datapoints in frequency domain in Hz
    +1315    magnitude :  frequency_domain : list(float)
    +1316        all datapoints in for magnitude of each frequency datapoint
    +1317    d_params : dict{'str': float, int or str}
    +1318        contains the instrument settings and processing settings
    +1319    auto_process : bool, optional
    +1320        Whether to automatically process the mass spectrum. Defaults to True.
    +1321    keep_profile : bool, optional
    +1322        Whether to keep the profile data. Defaults to True.
    +1323
    +1324    Attributes
    +1325    ----------
    +1326    has_frequency : bool
    +1327        Whether the mass spectrum has frequency data.
    +1328    _frequency_domain : list(float)
    +1329        Frequency domain in Hz
    +1330    label : str
    +1331        store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
    +1332    _abundance : ndarray
    +1333        The abundance values of the mass spectrum.
    +1334    _mz_exp : ndarray
    +1335        The m/z values of the mass spectrum.
    +1336    _mspeaks : list
    +1337        A list of mass peaks.
    +1338    See Also: all the attributes of MassSpecBase class
    +1339
    +1340    Methods
    +1341    ----------
    +1342    * _set_mz_domain().
    +1343        calculates the m_z based on the setting of d_params
    +1344    * process_mass_spec().  Process the mass spectrum.
    +1345
    +1346    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()
    +1347    """
    +1348
    +1349    def __init__(
    +1350        self,
    +1351        frequency_domain,
    +1352        magnitude,
    +1353        d_params,
    +1354        auto_process=True,
    +1355        keep_profile=True,
    +1356    ):
    +1357        super().__init__(None, magnitude, d_params)
    +1358
    +1359        self._frequency_domain = frequency_domain
    +1360        self.has_frequency = True
    +1361        self._set_mz_domain()
    +1362        self._sort_mz_domain()
     1363
    -1364    Notes
    -1365    -----
    -1366    - Stores the centroid data and instrument settings
    -1367    - Simulate profile data based on Gaussian or Lorentzian peak shape
    -1368    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    -1369    - _mspeaks is populated under the hood by calling process_mass_spec method
    -1370    - iteration is null if _mspeaks is empty
    +1364        self.magnetron_frequency = None
    +1365        self.magnetron_frequency_sigma = None
    +1366
    +1367        # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
    +1368
    +1369        if auto_process:
    +1370            self.process_mass_spec(keep_profile=keep_profile)
     1371
    -1372    Parameters
    -1373    ----------
    -1374    data_dict : dict {string: numpy array float64 )
    -1375        contains keys [m/z, Abundance, Resolving Power, S/N] 
    -1376    d_params : dict{'str': float, int or str}
    -1377        contains the instrument settings and processing settings
    -1378    auto_process : bool, optional
    -1379        Whether to automatically process the mass spectrum. Defaults to True.
    -1380        
    -1381    Attributes
    -1382    ----------
    -1383    label : str
    -1384        store label (Bruker, Midas Transient, see Labels class)
    -1385    _baseline_noise : float
    -1386        store baseline noise
    -1387    _baseline_noise_std : float
    -1388        store baseline noise std
    -1389    _abundance : ndarray
    -1390        The abundance values of the mass spectrum.
    -1391    _mz_exp : ndarray
    -1392        The m/z values of the mass spectrum.
    -1393    _mspeaks : list
    -1394        A list of mass peaks. 
    -1395
    -1396    
    -1397    Methods
    -1398    ----------
    -1399    * process_mass_spec().
    -1400        Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
    -1401    * __simulate_profile__data__().
    -1402        Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
    -1403
    -1404    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()
    -1405    """
    +1372    def _sort_mz_domain(self):
    +1373        """Sort the mass spectrum by m/z values."""
    +1374
    +1375        if self._mz_exp[0] > self._mz_exp[-1]:
    +1376            self._mz_exp = self._mz_exp[::-1]
    +1377            self._abundance = self._abundance[::-1]
    +1378            self._frequency_domain = self._frequency_domain[::-1]
    +1379
    +1380    def _set_mz_domain(self):
    +1381        """Set the m/z domain of the mass spectrum based on the settings of d_params."""
    +1382        if self.label == Labels.bruker_frequency:
    +1383            self._mz_exp = self._f_to_mz_bruker()
    +1384
    +1385        else:
    +1386            self._mz_exp = self._f_to_mz()
    +1387
    +1388    @property
    +1389    def transient_settings(self):
    +1390        """Return the transient settings of the mass spectrum."""
    +1391        return self.parameters.transient
    +1392
    +1393    @transient_settings.setter
    +1394    def transient_settings(self, instance_TransientSetting):
    +1395        self.parameters.transient = instance_TransientSetting
    +1396
    +1397    def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
    +1398        """Calculates the magnetron frequency of the mass spectrum.
    +1399
    +1400        Parameters
    +1401        ----------
    +1402        max_magnetron_freq : float, optional
    +1403            The maximum magnetron frequency. Defaults to 50.
    +1404        magnetron_freq_bins : int, optional
    +1405            The number of bins to use for the histogram. Defaults to 300.
     1406
    -1407    def __init__(self, data_dict, d_params, auto_process=True):
    -1408
    -1409        super().__init__([], [], d_params)
    +1407        Returns
    +1408        -------
    +1409        None
     1410
    -1411        self._set_parameters_objects(d_params)
    -1412        
    -1413        if self.label == Labels.thermo_centroid:
    -1414            self._baseline_noise = d_params.get("baseline_noise")
    -1415            self._baseline_noise_std = d_params.get("baseline_noise_std")
    -1416
    -1417        self.is_centroid = True
    -1418        self.data_dict = data_dict
    -1419        self._mz_exp = data_dict[Labels.mz]
    -1420        self._abundance = data_dict[Labels.abundance]
    -1421
    -1422        if auto_process:
    -1423            self.process_mass_spec()
    -1424            
    +1411        Notes
    +1412        -----
    +1413        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
    +1414        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
    +1415        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
    +1416        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
    +1417        """
    +1418        ms_df = DataFrame(self.freq_exp(), columns=["Freq"])
    +1419        ms_df["FreqDelta"] = ms_df["Freq"].diff()
    +1420
    +1421        freq_hist = histogram(
    +1422            ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"],
    +1423            bins=magnetron_freq_bins,
    +1424        )
     1425
    -1426    def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid):
    -1427        """Simulate profile data based on Gaussian or Lorentzian peak shape
    -1428
    -1429        Notes
    -1430        -----
    -1431        Needs theoretical resolving power calculation and define peak shape.
    -1432        This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only.
    -1433        
    -1434        Parameters
    -1435        ----------
    -1436        exp_mz_centroid : list(float)
    -1437            list of m/z values
    -1438        magnitude_centroid : list(float)
    -1439            list of abundance values
    -1440            
    -1441            
    -1442        Returns
    -1443        -------
    -1444        x : list(float)
    -1445            list of m/z values
    -1446        y : list(float)
    -1447            list of abundance values
    -1448        """
    -1449
    -1450        x, y = [], []
    -1451        for i in range(len(exp_mz_centroid)):
    -1452            x.append(exp_mz_centroid[i] - 0.0000001)
    -1453            x.append(exp_mz_centroid[i])
    -1454            x.append(exp_mz_centroid[i] + 0.0000001)
    -1455            y.append(0)
    -1456            y.append(magnitude_centroid[i])
    -1457            y.append(0)
    -1458        return x, y
    -1459
    -1460    @property
    -1461    def mz_exp_profile(self):
    -1462        """Return the m/z profile of the mass spectrum."""
    -1463        mz_list = []
    -1464        for mz in self.mz_exp:
    -1465            mz_list.append(mz - 0.0000001)
    -1466            mz_list.append(mz)
    -1467            mz_list.append(mz + 0.0000001)
    -1468        return mz_list
    -1469    
    -1470    @mz_exp_profile.setter
    -1471    def mz_exp_profile(self, _mz_exp ): self._mz_exp = _mz_exp
    -1472
    -1473    @property
    -1474    def abundance_profile(self):
    -1475        """Return the abundance profile of the mass spectrum."""
    -1476        ab_list = []
    -1477        for ab in self.abundance:
    -1478            ab_list.append(0)
    -1479            ab_list.append(ab)
    -1480            ab_list.append(0)
    -1481        return ab_list
    -1482
    -1483    @abundance_profile.setter
    -1484    def abundance_profile(self, abundance ): self._abundance = abundance
    -1485
    -1486    @property
    -1487    def tic(self):
    -1488        """Return the total ion current of the mass spectrum."""
    -1489        return sum(self.abundance)
    -1490
    -1491    def process_mass_spec(self):
    -1492        """Process the mass spectrum.
    -1493       
    -1494        """
    -1495        import tqdm
    -1496        # overwrite process_mass_spec 
    -1497        # mspeak objs are usually added inside the PeaKPicking class 
    -1498        # for profile and freq based data
    -1499        data_dict = self.data_dict
    -1500        ion_charge = self.polarity
    -1501
    -1502        # Check if resolving power is present
    -1503        rp_present = True
    -1504        if not data_dict.get(Labels.rp):
    -1505            rp_present = False
    -1506        if rp_present and list(data_dict.get(Labels.rp)) == [None]*len(data_dict.get(Labels.rp)):
    -1507            rp_present = False
    -1508
    -1509        # Check if s2n is present
    -1510        s2n_present = True
    -1511        if not data_dict.get(Labels.s2n):
    -1512            s2n_present = False
    -1513        if s2n_present and list(data_dict.get(Labels.s2n)) == [None]*len(data_dict.get(Labels.s2n)):
    -1514            s2n_present = False
    -1515        
    -1516        # Warning if no s2n data but noise thresholding is set to signal_noise
    -1517        if not s2n_present and self.parameters.mass_spectrum.noise_threshold_method == 'signal_noise':
    -1518            raise Exception("Signal to Noise data is missing for noise thresholding")
    +1426        mod = GaussianModel()
    +1427        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
    +1428        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
    +1429        self.magnetron_frequency = out.best_values["center"]
    +1430        self.magnetron_frequency_sigma = out.best_values["sigma"]
    +1431
    +1432
    +1433class MassSpecCentroid(MassSpecBase):
    +1434    """A mass spectrum class when the entry point is on centroid format
    +1435
    +1436    Notes
    +1437    -----
    +1438    - Stores the centroid data and instrument settings
    +1439    - Simulate profile data based on Gaussian or Lorentzian peak shape
    +1440    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    +1441    - _mspeaks is populated under the hood by calling process_mass_spec method
    +1442    - iteration is null if _mspeaks is empty
    +1443
    +1444    Parameters
    +1445    ----------
    +1446    data_dict : dict {string: numpy array float64 )
    +1447        contains keys [m/z, Abundance, Resolving Power, S/N]
    +1448    d_params : dict{'str': float, int or str}
    +1449        contains the instrument settings and processing settings
    +1450    auto_process : bool, optional
    +1451        Whether to automatically process the mass spectrum. Defaults to True.
    +1452
    +1453    Attributes
    +1454    ----------
    +1455    label : str
    +1456        store label (Bruker, Midas Transient, see Labels class)
    +1457    _baseline_noise : float
    +1458        store baseline noise
    +1459    _baseline_noise_std : float
    +1460        store baseline noise std
    +1461    _abundance : ndarray
    +1462        The abundance values of the mass spectrum.
    +1463    _mz_exp : ndarray
    +1464        The m/z values of the mass spectrum.
    +1465    _mspeaks : list
    +1466        A list of mass peaks.
    +1467
    +1468
    +1469    Methods
    +1470    ----------
    +1471    * process_mass_spec().
    +1472        Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
    +1473    * __simulate_profile__data__().
    +1474        Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
    +1475
    +1476    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()
    +1477    """
    +1478
    +1479    def __init__(self, data_dict, d_params, auto_process=True):
    +1480        super().__init__([], [], d_params)
    +1481
    +1482        self._set_parameters_objects(d_params)
    +1483
    +1484        if self.label == Labels.thermo_centroid:
    +1485            self._baseline_noise = d_params.get("baseline_noise")
    +1486            self._baseline_noise_std = d_params.get("baseline_noise_std")
    +1487
    +1488        self.is_centroid = True
    +1489        self.data_dict = data_dict
    +1490        self._mz_exp = data_dict[Labels.mz]
    +1491        self._abundance = data_dict[Labels.abundance]
    +1492
    +1493        if auto_process:
    +1494            self.process_mass_spec()
    +1495
    +1496    def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid):
    +1497        """Simulate profile data based on Gaussian or Lorentzian peak shape
    +1498
    +1499        Notes
    +1500        -----
    +1501        Needs theoretical resolving power calculation and define peak shape.
    +1502        This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only.
    +1503
    +1504        Parameters
    +1505        ----------
    +1506        exp_mz_centroid : list(float)
    +1507            list of m/z values
    +1508        magnitude_centroid : list(float)
    +1509            list of abundance values
    +1510
    +1511
    +1512        Returns
    +1513        -------
    +1514        x : list(float)
    +1515            list of m/z values
    +1516        y : list(float)
    +1517            list of abundance values
    +1518        """
     1519
    -1520        # Pull out abundance data        
    -1521        abun = array(data_dict.get(Labels.abundance)).astype(float)
    -1522        
    -1523        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
    -1524        abundance_threshold, factor = self.get_threshold(abun)
    -1525        
    -1526        # Set rp_i and s2n_i to None which will be overwritten if present
    -1527        rp_i, s2n_i = np.nan, np.nan
    -1528        for index, mz in enumerate(data_dict.get(Labels.mz)):
    -1529            if rp_present:
    -1530                if not data_dict.get(Labels.rp)[index]:
    -1531                    rp_i = np.nan
    -1532                else:
    -1533                    rp_i = float(data_dict.get(Labels.rp)[index])
    -1534            if s2n_present:
    -1535                if not data_dict.get(Labels.s2n)[index]:
    -1536                    s2n_i = np.nan
    -1537                else:
    -1538                    s2n_i = float(data_dict.get(Labels.s2n)[index])
    +1520        x, y = [], []
    +1521        for i in range(len(exp_mz_centroid)):
    +1522            x.append(exp_mz_centroid[i] - 0.0000001)
    +1523            x.append(exp_mz_centroid[i])
    +1524            x.append(exp_mz_centroid[i] + 0.0000001)
    +1525            y.append(0)
    +1526            y.append(magnitude_centroid[i])
    +1527            y.append(0)
    +1528        return x, y
    +1529
    +1530    @property
    +1531    def mz_exp_profile(self):
    +1532        """Return the m/z profile of the mass spectrum."""
    +1533        mz_list = []
    +1534        for mz in self.mz_exp:
    +1535            mz_list.append(mz - 0.0000001)
    +1536            mz_list.append(mz)
    +1537            mz_list.append(mz + 0.0000001)
    +1538        return mz_list
     1539
    -1540            # centroid peak does not have start and end peak index pos
    -1541            massspec_indexes = (index, index, index)
    -1542
    -1543            # Add peaks based on the noise thresholding method
    -1544            if self.parameters.mass_spectrum.noise_threshold_method in ['minima', 'relative_abundance', 'absolute_abundance'] and abun[index]/factor >= abundance_threshold:             
    -1545                self.add_mspeak(
    -1546                    ion_charge,
    -1547                    mz,
    -1548                    abun[index],
    -1549                    rp_i,
    -1550                    s2n_i,
    -1551                    massspec_indexes,
    -1552                    ms_parent=self
    -1553                )
    -1554            if self.parameters.mass_spectrum.noise_threshold_method == 'signal_noise' and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n:
    -1555                self.add_mspeak(
    -1556                    ion_charge,
    -1557                    mz,
    -1558                    abun[index],
    -1559                    rp_i,
    -1560                    s2n_i,
    -1561                    massspec_indexes,
    -1562                    ms_parent=self
    -1563                )
    -1564
    -1565        self.mspeaks = self._mspeaks
    -1566        self._dynamic_range = self.max_abundance / self.min_abundance
    -1567        self._set_nominal_masses_start_final_indexes()
    -1568        
    -1569        if self.label != Labels.thermo_centroid:
    -1570            
    -1571            if self.settings.noise_threshold_method == 'log':
    -1572                
    -1573                raise  Exception("log noise Not tested for centroid data")
    -1574                #self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
    -1575            
    -1576            else:
    -1577                self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
    -1578        
    -1579        del self.data_dict
    -1580    
    -1581class MassSpecCentroidLowRes(MassSpecCentroid):
    -1582    """A mass spectrum class when the entry point is on low resolution centroid format
    -1583
    -1584    Notes
    -1585    -----
    -1586    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead
    -1587    
    -1588    Parameters
    -1589    ----------
    -1590    data_dict : dict {string: numpy array float64 )
    -1591        contains keys [m/z, Abundance, Resolving Power, S/N]
    -1592    d_params : dict{'str': float, int or str}
    -1593        contains the instrument settings and processing settings
    -1594
    -1595    Attributes
    -1596    ----------
    -1597    _processed_tic : float
    -1598        store processed total ion current
    -1599    _abundance : ndarray
    -1600        The abundance values of the mass spectrum.
    -1601    _mz_exp : ndarray
    -1602        The m/z values of the mass spectrum.
    -1603    """
    -1604    
    -1605    def __init__(self, data_dict, d_params):
    -1606    
    -1607        self._set_parameters_objects(d_params)
    -1608        self._mz_exp = array(data_dict.get(Labels.mz))
    -1609        self._abundance = array(data_dict.get(Labels.abundance))
    -1610        self._processed_tic = None
    -1611    
    -1612    def __len__(self):
    -1613        
    -1614        return len(self.mz_exp)
    -1615        
    -1616    def __getitem__(self, position):
    -1617        
    -1618        return (self.mz_exp[position], self.abundance[position])
    -1619
    -1620    @property
    -1621    def mz_exp(self):
    -1622        """Return the m/z values of the mass spectrum."""
    -1623        return self._mz_exp 
    -1624
    -1625    @property
    -1626    def abundance(self):
    -1627        """Return the abundance values of the mass spectrum."""
    -1628        return self._abundance
    -1629
    -1630    @property
    -1631    def processed_tic(self):
    -1632        """Return the processed total ion current of the mass spectrum."""
    -1633        return sum(self._processed_tic)
    -1634    
    -1635    @property
    -1636    def tic(self):
    -1637        """Return the total ion current of the mass spectrum."""
    -1638        if self._processed_tic:
    -1639            return self._processed_tic
    -1640        else:
    -1641            return sum(self.abundance)
    -1642    
    -1643    @property
    -1644    def mz_abun_tuples(self):
    -1645        """Return the m/z and abundance values of the mass spectrum as a list of tuples."""
    -1646        r = lambda x: ( int(round(x[0],0), int(round(x[1],0))) )
    -1647
    -1648        return [r(i) for i in self]
    -1649    
    -1650    @property
    -1651    def mz_abun_dict(self):
    -1652        """Return the m/z and abundance values of the mass spectrum as a dictionary."""
    -1653        r = lambda x: int(round(x,0))
    -1654            
    -1655        return { r(i[0]):r(i[1]) for i in self}
    +1540    @mz_exp_profile.setter
    +1541    def mz_exp_profile(self, _mz_exp):
    +1542        self._mz_exp = _mz_exp
    +1543
    +1544    @property
    +1545    def abundance_profile(self):
    +1546        """Return the abundance profile of the mass spectrum."""
    +1547        ab_list = []
    +1548        for ab in self.abundance:
    +1549            ab_list.append(0)
    +1550            ab_list.append(ab)
    +1551            ab_list.append(0)
    +1552        return ab_list
    +1553
    +1554    @abundance_profile.setter
    +1555    def abundance_profile(self, abundance):
    +1556        self._abundance = abundance
    +1557
    +1558    @property
    +1559    def tic(self):
    +1560        """Return the total ion current of the mass spectrum."""
    +1561        return sum(self.abundance)
    +1562
    +1563    def process_mass_spec(self):
    +1564        """Process the mass spectrum."""
    +1565        import tqdm
    +1566
    +1567        # overwrite process_mass_spec
    +1568        # mspeak objs are usually added inside the PeaKPicking class
    +1569        # for profile and freq based data
    +1570        data_dict = self.data_dict
    +1571        ion_charge = self.polarity
    +1572
    +1573        # Check if resolving power is present
    +1574        rp_present = True
    +1575        if not data_dict.get(Labels.rp):
    +1576            rp_present = False
    +1577        if rp_present and list(data_dict.get(Labels.rp)) == [None] * len(
    +1578            data_dict.get(Labels.rp)
    +1579        ):
    +1580            rp_present = False
    +1581
    +1582        # Check if s2n is present
    +1583        s2n_present = True
    +1584        if not data_dict.get(Labels.s2n):
    +1585            s2n_present = False
    +1586        if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len(
    +1587            data_dict.get(Labels.s2n)
    +1588        ):
    +1589            s2n_present = False
    +1590
    +1591        # Warning if no s2n data but noise thresholding is set to signal_noise
    +1592        if (
    +1593            not s2n_present
    +1594            and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
    +1595        ):
    +1596            raise Exception("Signal to Noise data is missing for noise thresholding")
    +1597
    +1598        # Pull out abundance data
    +1599        abun = array(data_dict.get(Labels.abundance)).astype(float)
    +1600
    +1601        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
    +1602        abundance_threshold, factor = self.get_threshold(abun)
    +1603
    +1604        # Set rp_i and s2n_i to None which will be overwritten if present
    +1605        rp_i, s2n_i = np.nan, np.nan
    +1606        for index, mz in enumerate(data_dict.get(Labels.mz)):
    +1607            if rp_present:
    +1608                if not data_dict.get(Labels.rp)[index]:
    +1609                    rp_i = np.nan
    +1610                else:
    +1611                    rp_i = float(data_dict.get(Labels.rp)[index])
    +1612            if s2n_present:
    +1613                if not data_dict.get(Labels.s2n)[index]:
    +1614                    s2n_i = np.nan
    +1615                else:
    +1616                    s2n_i = float(data_dict.get(Labels.s2n)[index])
    +1617
    +1618            # centroid peak does not have start and end peak index pos
    +1619            massspec_indexes = (index, index, index)
    +1620
    +1621            # Add peaks based on the noise thresholding method
    +1622            if (
    +1623                self.parameters.mass_spectrum.noise_threshold_method
    +1624                in ["minima", "relative_abundance", "absolute_abundance"]
    +1625                and abun[index] / factor >= abundance_threshold
    +1626            ):
    +1627                self.add_mspeak(
    +1628                    ion_charge,
    +1629                    mz,
    +1630                    abun[index],
    +1631                    rp_i,
    +1632                    s2n_i,
    +1633                    massspec_indexes,
    +1634                    ms_parent=self,
    +1635                )
    +1636            if (
    +1637                self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
    +1638                and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n
    +1639            ):
    +1640                self.add_mspeak(
    +1641                    ion_charge,
    +1642                    mz,
    +1643                    abun[index],
    +1644                    rp_i,
    +1645                    s2n_i,
    +1646                    massspec_indexes,
    +1647                    ms_parent=self,
    +1648                )
    +1649
    +1650        self.mspeaks = self._mspeaks
    +1651        self._dynamic_range = self.max_abundance / self.min_abundance
    +1652        self._set_nominal_masses_start_final_indexes()
    +1653
    +1654        if self.label != Labels.thermo_centroid:
    +1655            if self.settings.noise_threshold_method == "log":
    +1656                raise Exception("log noise Not tested for centroid data")
    +1657                # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
    +1658
    +1659            else:
    +1660                self._baseline_noise, self._baseline_noise_std = (
    +1661                    self.run_noise_threshold_calc()
    +1662                )
    +1663
    +1664        del self.data_dict
    +1665
    +1666
    +1667class MassSpecCentroidLowRes(MassSpecCentroid):
    +1668    """A mass spectrum class when the entry point is on low resolution centroid format
    +1669
    +1670    Notes
    +1671    -----
    +1672    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead
    +1673
    +1674    Parameters
    +1675    ----------
    +1676    data_dict : dict {string: numpy array float64 )
    +1677        contains keys [m/z, Abundance, Resolving Power, S/N]
    +1678    d_params : dict{'str': float, int or str}
    +1679        contains the instrument settings and processing settings
    +1680
    +1681    Attributes
    +1682    ----------
    +1683    _processed_tic : float
    +1684        store processed total ion current
    +1685    _abundance : ndarray
    +1686        The abundance values of the mass spectrum.
    +1687    _mz_exp : ndarray
    +1688        The m/z values of the mass spectrum.
    +1689    """
    +1690
    +1691    def __init__(self, data_dict, d_params):
    +1692        self._set_parameters_objects(d_params)
    +1693        self._mz_exp = array(data_dict.get(Labels.mz))
    +1694        self._abundance = array(data_dict.get(Labels.abundance))
    +1695        self._processed_tic = None
    +1696
    +1697    def __len__(self):
    +1698        return len(self.mz_exp)
    +1699
    +1700    def __getitem__(self, position):
    +1701        return (self.mz_exp[position], self.abundance[position])
    +1702
    +1703    @property
    +1704    def mz_exp(self):
    +1705        """Return the m/z values of the mass spectrum."""
    +1706        return self._mz_exp
    +1707
    +1708    @property
    +1709    def abundance(self):
    +1710        """Return the abundance values of the mass spectrum."""
    +1711        return self._abundance
    +1712
    +1713    @property
    +1714    def processed_tic(self):
    +1715        """Return the processed total ion current of the mass spectrum."""
    +1716        return sum(self._processed_tic)
    +1717
    +1718    @property
    +1719    def tic(self):
    +1720        """Return the total ion current of the mass spectrum."""
    +1721        if self._processed_tic:
    +1722            return self._processed_tic
    +1723        else:
    +1724            return sum(self.abundance)
    +1725
    +1726    @property
    +1727    def mz_abun_tuples(self):
    +1728        """Return the m/z and abundance values of the mass spectrum as a list of tuples."""
    +1729        r = lambda x: (int(round(x[0], 0), int(round(x[1], 0))))
    +1730
    +1731        return [r(i) for i in self]
    +1732
    +1733    @property
    +1734    def mz_abun_dict(self):
    +1735        """Return the m/z and abundance values of the mass spectrum as a dictionary."""
    +1736        r = lambda x: int(round(x, 0))
    +1737
    +1738        return {r(i[0]): r(i[1]) for i in self}
     
    @@ -2081,12 +2164,14 @@

    -
    24def overrides(interface_class):
    -25    """Checks if the method overrides a method from an interface class."""
    -26    def overrider(method):
    -27        assert method.__name__ in dir(interface_class)
    -28        return method
    -29    return overrider
    +            
    26def overrides(interface_class):
    +27    """Checks if the method overrides a method from an interface class."""
    +28
    +29    def overrider(method):
    +30        assert method.__name__ in dir(interface_class)
    +31        return method
    +32
    +33    return overrider
     
    @@ -2106,1165 +2191,1223 @@

    -
      31class MassSpecBase(MassSpecCalc, KendrickGrouping):
    -  32    """A mass spectrum base class, stores the profile data and instrument settings.
    -  33
    -  34    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
    -  35    _mspeaks is populated under the hood by calling process_mass_spec method.
    -  36    Iteration is null if _mspeaks is empty.
    -  37
    -  38    Parameters
    -  39    ----------
    -  40    mz_exp : array_like
    -  41        The m/z values of the mass spectrum.
    -  42    abundance : array_like
    -  43        The abundance values of the mass spectrum.
    -  44    d_params : dict
    -  45        A dictionary of parameters for the mass spectrum.
    -  46    **kwargs
    -  47        Additional keyword arguments.
    -  48
    -  49    Attributes
    -  50    ----------
    -  51
    -  52    mspeaks : list
    -  53        A list of mass peaks.
    -  54    is_calibrated : bool
    -  55        Whether the mass spectrum is calibrated.
    -  56    is_centroid : bool
    -  57        Whether the mass spectrum is centroided.
    -  58    has_frequency : bool
    -  59        Whether the mass spectrum has a frequency domain.
    -  60    calibration_order : None or int
    -  61        The order of the mass spectrum's calibration.
    -  62    calibration_points : None or ndarray
    -  63        The calibration points of the mass spectrum.
    -  64    calibration_ref_mzs: None or ndarray
    -  65        The reference m/z values of the mass spectrum's calibration.
    -  66    calibration_meas_mzs : None or ndarray
    -  67        The measured m/z values of the mass spectrum's calibration.
    -  68    calibration_RMS : None or float
    -  69        The root mean square of the mass spectrum's calibration.
    -  70    calibration_segment : None or CalibrationSegment
    -  71        The calibration segment of the mass spectrum.
    -  72    _abundance : ndarray
    -  73        The abundance values of the mass spectrum.
    -  74    _mz_exp : ndarray
    -  75        The m/z values of the mass spectrum.
    -  76    _mspeaks : list
    -  77        A list of mass peaks.
    -  78    _dict_nominal_masses_indexes : dict
    -  79        A dictionary of nominal masses and their indexes.
    -  80    _baseline_noise : float
    -  81        The baseline noise of the mass spectrum.
    -  82    _baseline_noise_std : float
    -  83        The standard deviation of the baseline noise of the mass spectrum.
    -  84    _dynamic_range : float or None
    -  85        The dynamic range of the mass spectrum.
    -  86    _transient_settings : None or TransientSettings
    -  87        The transient settings of the mass spectrum.
    -  88    _frequency_domain : None or FrequencyDomain
    -  89        The frequency domain of the mass spectrum.
    -  90    _mz_cal_profile : None or MzCalibrationProfile
    -  91        The m/z calibration profile of the mass spectrum.
    -  92
    -  93    Methods
    -  94    -------
    -  95    * process_mass_spec(). Main function to process the mass spectrum, 
    -  96    including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
    +            
      36class MassSpecBase(MassSpecCalc, KendrickGrouping):
    +  37    """A mass spectrum base class, stores the profile data and instrument settings.
    +  38
    +  39    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
    +  40    _mspeaks is populated under the hood by calling process_mass_spec method.
    +  41    Iteration is null if _mspeaks is empty.
    +  42
    +  43    Parameters
    +  44    ----------
    +  45    mz_exp : array_like
    +  46        The m/z values of the mass spectrum.
    +  47    abundance : array_like
    +  48        The abundance values of the mass spectrum.
    +  49    d_params : dict
    +  50        A dictionary of parameters for the mass spectrum.
    +  51    **kwargs
    +  52        Additional keyword arguments.
    +  53
    +  54    Attributes
    +  55    ----------
    +  56
    +  57    mspeaks : list
    +  58        A list of mass peaks.
    +  59    is_calibrated : bool
    +  60        Whether the mass spectrum is calibrated.
    +  61    is_centroid : bool
    +  62        Whether the mass spectrum is centroided.
    +  63    has_frequency : bool
    +  64        Whether the mass spectrum has a frequency domain.
    +  65    calibration_order : None or int
    +  66        The order of the mass spectrum's calibration.
    +  67    calibration_points : None or ndarray
    +  68        The calibration points of the mass spectrum.
    +  69    calibration_ref_mzs: None or ndarray
    +  70        The reference m/z values of the mass spectrum's calibration.
    +  71    calibration_meas_mzs : None or ndarray
    +  72        The measured m/z values of the mass spectrum's calibration.
    +  73    calibration_RMS : None or float
    +  74        The root mean square of the mass spectrum's calibration.
    +  75    calibration_segment : None or CalibrationSegment
    +  76        The calibration segment of the mass spectrum.
    +  77    _abundance : ndarray
    +  78        The abundance values of the mass spectrum.
    +  79    _mz_exp : ndarray
    +  80        The m/z values of the mass spectrum.
    +  81    _mspeaks : list
    +  82        A list of mass peaks.
    +  83    _dict_nominal_masses_indexes : dict
    +  84        A dictionary of nominal masses and their indexes.
    +  85    _baseline_noise : float
    +  86        The baseline noise of the mass spectrum.
    +  87    _baseline_noise_std : float
    +  88        The standard deviation of the baseline noise of the mass spectrum.
    +  89    _dynamic_range : float or None
    +  90        The dynamic range of the mass spectrum.
    +  91    _transient_settings : None or TransientSettings
    +  92        The transient settings of the mass spectrum.
    +  93    _frequency_domain : None or FrequencyDomain
    +  94        The frequency domain of the mass spectrum.
    +  95    _mz_cal_profile : None or MzCalibrationProfile
    +  96        The m/z calibration profile of the mass spectrum.
       97
    -  98    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()
    -  99    """
    - 100    def __init__(self, mz_exp, abundance, d_params, **kwargs):
    - 101        
    - 102        self._abundance = array(abundance, dtype=float64)
    - 103        self._mz_exp = array(mz_exp, dtype=float64)
    - 104                    
    - 105        # objects created after process_mass_spec() function
    - 106        self._mspeaks = list()
    - 107        self.mspeaks = list()
    - 108        self._dict_nominal_masses_indexes = dict()
    - 109        self._baseline_noise = 0.001
    - 110        self._baseline_noise_std = 0.001
    - 111        self._dynamic_range = None
    - 112        # set to None: initialization occurs inside subclass MassSpecfromFreq
    - 113        self._transient_settings = None
    - 114        self._frequency_domain = None
    - 115        self._mz_cal_profile = None
    - 116        self.is_calibrated = False
    - 117
    - 118        self._set_parameters_objects(d_params)
    - 119        self._init_settings()
    - 120
    - 121        self.is_centroid = False
    - 122        self.has_frequency = False
    - 123
    - 124        self.calibration_order = None
    - 125        self.calibration_points = None
    - 126        self.calibration_ref_mzs = None
    - 127        self.calibration_meas_mzs = None
    - 128        self.calibration_RMS = None
    - 129        self.calibration_segment = None
    - 130        self.calibration_raw_error_median = None
    - 131        self.calibration_raw_error_stdev = None
    - 132
    - 133    def _init_settings(self):
    - 134        """Initializes the settings for the mass spectrum."""
    - 135        self._parameters = MSParameters()
    - 136
    - 137    def __len__(self):
    - 138
    - 139        return len(self.mspeaks)
    - 140
    - 141    def __getitem__(self, position) -> MSPeak:
    - 142
    - 143        return self.mspeaks[position]
    +  98    Methods
    +  99    -------
    + 100    * process_mass_spec(). Main function to process the mass spectrum,
    + 101    including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
    + 102
    + 103    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()
    + 104    """
    + 105
    + 106    def __init__(self, mz_exp, abundance, d_params, **kwargs):
    + 107        self._abundance = array(abundance, dtype=float64)
    + 108        self._mz_exp = array(mz_exp, dtype=float64)
    + 109
    + 110        # objects created after process_mass_spec() function
    + 111        self._mspeaks = list()
    + 112        self.mspeaks = list()
    + 113        self._dict_nominal_masses_indexes = dict()
    + 114        self._baseline_noise = 0.001
    + 115        self._baseline_noise_std = 0.001
    + 116        self._dynamic_range = None
    + 117        # set to None: initialization occurs inside subclass MassSpecfromFreq
    + 118        self._transient_settings = None
    + 119        self._frequency_domain = None
    + 120        self._mz_cal_profile = None
    + 121        self.is_calibrated = False
    + 122
    + 123        self._set_parameters_objects(d_params)
    + 124        self._init_settings()
    + 125
    + 126        self.is_centroid = False
    + 127        self.has_frequency = False
    + 128
    + 129        self.calibration_order = None
    + 130        self.calibration_points = None
    + 131        self.calibration_ref_mzs = None
    + 132        self.calibration_meas_mzs = None
    + 133        self.calibration_RMS = None
    + 134        self.calibration_segment = None
    + 135        self.calibration_raw_error_median = None
    + 136        self.calibration_raw_error_stdev = None
    + 137
    + 138    def _init_settings(self):
    + 139        """Initializes the settings for the mass spectrum."""
    + 140        self._parameters = MSParameters()
    + 141
    + 142    def __len__(self):
    + 143        return len(self.mspeaks)
      144
    - 145    def set_indexes(self, list_indexes):
    - 146        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
    + 145    def __getitem__(self, position) -> MSPeak:
    + 146        return self.mspeaks[position]
      147
    - 148        Parameters
    - 149        ----------
    - 150        list_indexes : list of int
    - 151            A list of integers representing the indexes of the MSpeaks to iterate over.
    - 152
    - 153        """
    - 154        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
    + 148    def set_indexes(self, list_indexes):
    + 149        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
    + 150
    + 151        Parameters
    + 152        ----------
    + 153        list_indexes : list of int
    + 154            A list of integers representing the indexes of the MSpeaks to iterate over.
      155
    - 156        for i, mspeak in  enumerate(self.mspeaks): mspeak.index = i
    - 157
    - 158        self._set_nominal_masses_start_final_indexes()
    - 159
    - 160    def reset_indexes(self):
    - 161        """Reset the mass spectrum to iterate over all MSpeaks objects.
    - 162
    - 163        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
    - 164        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
    - 165
    - 166        """
    - 167        self.mspeaks = self._mspeaks
    - 168
    - 169        for i, mspeak in  enumerate(self.mspeaks): mspeak.index = i
    - 170
    - 171        self._set_nominal_masses_start_final_indexes()
    + 156        """
    + 157        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
    + 158
    + 159        for i, mspeak in enumerate(self.mspeaks):
    + 160            mspeak.index = i
    + 161
    + 162        self._set_nominal_masses_start_final_indexes()
    + 163
    + 164    def reset_indexes(self):
    + 165        """Reset the mass spectrum to iterate over all MSpeaks objects.
    + 166
    + 167        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
    + 168        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
    + 169
    + 170        """
    + 171        self.mspeaks = self._mspeaks
      172
    - 173    def add_mspeak(self, ion_charge, mz_exp,
    - 174                            abundance,
    - 175                            resolving_power,
    - 176                            signal_to_noise,
    - 177                            massspec_indexes,
    - 178                            exp_freq=None,
    - 179                            ms_parent=None
    - 180                        ):
    - 181        """Add a new MSPeak object to the MassSpectrum object.
    - 182
    - 183        Parameters
    - 184        ----------
    - 185        ion_charge : int
    - 186            The ion charge of the MSPeak.
    - 187        mz_exp : float
    - 188            The experimental m/z value of the MSPeak.
    - 189        abundance : float
    - 190            The abundance of the MSPeak.
    - 191        resolving_power : float
    - 192            The resolving power of the MSPeak.
    - 193        signal_to_noise : float
    - 194            The signal-to-noise ratio of the MSPeak.
    - 195        massspec_indexes : list
    - 196            A list of indexes of the MSPeak in the MassSpectrum object.
    - 197        exp_freq : float, optional
    - 198            The experimental frequency of the MSPeak. Defaults to None.
    - 199        ms_parent : MSParent, optional
    - 200            The MSParent object associated with the MSPeak. Defaults to None.
    - 201        """
    - 202        mspeak = MSPeak(
    - 203                ion_charge,
    - 204                mz_exp,
    - 205                abundance,
    - 206                resolving_power,
    - 207                signal_to_noise,
    - 208                massspec_indexes,
    - 209                len(self._mspeaks),
    - 210                exp_freq=exp_freq,
    - 211                ms_parent=ms_parent,
    - 212        )
    - 213
    - 214        self._mspeaks.append(mspeak)
    - 215
    - 216    def _set_parameters_objects(self, d_params):
    - 217        """Set the parameters of the MassSpectrum object.
    - 218
    - 219        Parameters
    - 220        ----------
    - 221        d_params : dict
    - 222            A dictionary containing the parameters to set.
    + 173        for i, mspeak in enumerate(self.mspeaks):
    + 174            mspeak.index = i
    + 175
    + 176        self._set_nominal_masses_start_final_indexes()
    + 177
    + 178    def add_mspeak(
    + 179        self,
    + 180        ion_charge,
    + 181        mz_exp,
    + 182        abundance,
    + 183        resolving_power,
    + 184        signal_to_noise,
    + 185        massspec_indexes,
    + 186        exp_freq=None,
    + 187        ms_parent=None,
    + 188    ):
    + 189        """Add a new MSPeak object to the MassSpectrum object.
    + 190
    + 191        Parameters
    + 192        ----------
    + 193        ion_charge : int
    + 194            The ion charge of the MSPeak.
    + 195        mz_exp : float
    + 196            The experimental m/z value of the MSPeak.
    + 197        abundance : float
    + 198            The abundance of the MSPeak.
    + 199        resolving_power : float
    + 200            The resolving power of the MSPeak.
    + 201        signal_to_noise : float
    + 202            The signal-to-noise ratio of the MSPeak.
    + 203        massspec_indexes : list
    + 204            A list of indexes of the MSPeak in the MassSpectrum object.
    + 205        exp_freq : float, optional
    + 206            The experimental frequency of the MSPeak. Defaults to None.
    + 207        ms_parent : MSParent, optional
    + 208            The MSParent object associated with the MSPeak. Defaults to None.
    + 209        """
    + 210        mspeak = MSPeak(
    + 211            ion_charge,
    + 212            mz_exp,
    + 213            abundance,
    + 214            resolving_power,
    + 215            signal_to_noise,
    + 216            massspec_indexes,
    + 217            len(self._mspeaks),
    + 218            exp_freq=exp_freq,
    + 219            ms_parent=ms_parent,
    + 220        )
    + 221
    + 222        self._mspeaks.append(mspeak)
      223
    - 224        Notes
    - 225        -----
    - 226        This method sets the following parameters of the MassSpectrum object:
    - 227        - _calibration_terms
    - 228        - label
    - 229        - analyzer
    - 230        - acquisition_time
    - 231        - instrument_label
    - 232        - polarity
    - 233        - scan_number
    - 234        - retention_time
    - 235        - mobility_rt
    - 236        - mobility_scan
    - 237        - _filename
    - 238        - _dir_location
    - 239        - _baseline_noise
    - 240        - _baseline_noise_std
    - 241        - sample_name
    - 242        """
    - 243        self._calibration_terms = (
    - 244            d_params.get("Aterm"),
    - 245            d_params.get("Bterm"),
    - 246            d_params.get("Cterm"),
    - 247        )
    - 248
    - 249        self.label = d_params.get(Labels.label)
    - 250
    - 251        self.analyzer = d_params.get('analyzer')
    - 252
    - 253        self.acquisition_time = d_params.get('acquisition_time')
    - 254
    - 255        self.instrument_label = d_params.get('instrument_label')
    + 224    def _set_parameters_objects(self, d_params):
    + 225        """Set the parameters of the MassSpectrum object.
    + 226
    + 227        Parameters
    + 228        ----------
    + 229        d_params : dict
    + 230            A dictionary containing the parameters to set.
    + 231
    + 232        Notes
    + 233        -----
    + 234        This method sets the following parameters of the MassSpectrum object:
    + 235        - _calibration_terms
    + 236        - label
    + 237        - analyzer
    + 238        - acquisition_time
    + 239        - instrument_label
    + 240        - polarity
    + 241        - scan_number
    + 242        - retention_time
    + 243        - mobility_rt
    + 244        - mobility_scan
    + 245        - _filename
    + 246        - _dir_location
    + 247        - _baseline_noise
    + 248        - _baseline_noise_std
    + 249        - sample_name
    + 250        """
    + 251        self._calibration_terms = (
    + 252            d_params.get("Aterm"),
    + 253            d_params.get("Bterm"),
    + 254            d_params.get("Cterm"),
    + 255        )
      256
    - 257        self.polarity = int(d_params.get("polarity"))
    + 257        self.label = d_params.get(Labels.label)
      258
    - 259        self.scan_number = d_params.get("scan_number")
    + 259        self.analyzer = d_params.get("analyzer")
      260
    - 261        self.retention_time = d_params.get("rt")
    + 261        self.acquisition_time = d_params.get("acquisition_time")
      262
    - 263        self.mobility_rt = d_params.get("mobility_rt")
    + 263        self.instrument_label = d_params.get("instrument_label")
      264
    - 265        self.mobility_scan = d_params.get("mobility_scan")
    + 265        self.polarity = int(d_params.get("polarity"))
      266
    - 267        self._filename = d_params.get("filename_path")
    + 267        self.scan_number = d_params.get("scan_number")
      268
    - 269        self._dir_location = d_params.get("dir_location")
    + 269        self.retention_time = d_params.get("rt")
      270
    - 271        self._baseline_noise = d_params.get("baseline_noise")
    + 271        self.mobility_rt = d_params.get("mobility_rt")
      272
    - 273        self._baseline_noise_std = d_params.get("baseline_noise_std")
    + 273        self.mobility_scan = d_params.get("mobility_scan")
      274
    - 275        if d_params.get('sample_name') != 'Unknown':
    + 275        self._filename = d_params.get("filename_path")
      276
    - 277            self.sample_name = d_params.get('sample_name')
    - 278            if not self.sample_name:
    - 279                self.sample_name = self.filename.stem
    - 280        else:
    - 281
    - 282            self.sample_name = self.filename.stem
    - 283
    - 284    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
    - 285        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
    - 286
    - 287        Parameters
    - 288        ----------
    - 289        Aterm : float
    - 290            The A-term calibration coefficient.
    - 291        Bterm : float
    - 292            The B-term calibration coefficient.
    - 293        C : float
    - 294            The C-term calibration coefficient.
    - 295        fas : float, optional
    - 296            The frequency amplitude scaling factor. Default is 0.
    - 297        """
    - 298        self._calibration_terms = (Aterm, Bterm, C)
    - 299
    - 300        self._mz_exp = self._f_to_mz()
    - 301        self._abundance = self._abundance
    - 302        self.find_peaks()
    - 303        self.reset_indexes()
    - 304
    - 305    def clear_molecular_formulas(self):
    - 306        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
    - 307
    - 308        Returns
    - 309        -------
    - 310        numpy.ndarray
    - 311            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
    - 312        """
    - 313        self.check_mspeaks()
    - 314        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
    - 315
    - 316    def process_mass_spec(self, keep_profile=True):
    - 317        """Process the mass spectrum.
    - 318
    - 319        Parameters
    - 320        ----------
    - 321        keep_profile : bool, optional
    - 322            Whether to keep the profile data after processing. Defaults to True.
    - 323
    - 324        Notes
    - 325        -----
    - 326        This method does the following:
    - 327        - calculates the noise threshold
    - 328        - does peak picking (creates mspeak_objs)
    - 329        - resets the mspeak_obj indexes
    - 330        """
    - 331        
    - 332        # if runned mannually make sure to rerun filter_by_noise_threshold     
    - 333        # calculates noise threshold 
    - 334        # do peak picking( create mspeak_objs) 
    - 335        # reset mspeak_obj the indexes
    - 336         
    - 337        self.cal_noise_threshold()
    - 338
    - 339        self.find_peaks()
    - 340        self.reset_indexes()
    - 341
    - 342        if self.mspeaks:
    - 343            self._dynamic_range = self.max_abundance / self.min_abundance
    - 344        else:
    - 345            self._dynamic_range = 0
    - 346        if not keep_profile:
    + 277        self._dir_location = d_params.get("dir_location")
    + 278
    + 279        self._baseline_noise = d_params.get("baseline_noise")
    + 280
    + 281        self._baseline_noise_std = d_params.get("baseline_noise_std")
    + 282
    + 283        if d_params.get("sample_name") != "Unknown":
    + 284            self.sample_name = d_params.get("sample_name")
    + 285            if not self.sample_name:
    + 286                self.sample_name = self.filename.stem
    + 287        else:
    + 288            self.sample_name = self.filename.stem
    + 289
    + 290    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
    + 291        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
    + 292
    + 293        Parameters
    + 294        ----------
    + 295        Aterm : float
    + 296            The A-term calibration coefficient.
    + 297        Bterm : float
    + 298            The B-term calibration coefficient.
    + 299        C : float
    + 300            The C-term calibration coefficient.
    + 301        fas : float, optional
    + 302            The frequency amplitude scaling factor. Default is 0.
    + 303        """
    + 304        self._calibration_terms = (Aterm, Bterm, C)
    + 305
    + 306        self._mz_exp = self._f_to_mz()
    + 307        self._abundance = self._abundance
    + 308        self.find_peaks()
    + 309        self.reset_indexes()
    + 310
    + 311    def clear_molecular_formulas(self):
    + 312        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
    + 313
    + 314        Returns
    + 315        -------
    + 316        numpy.ndarray
    + 317            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
    + 318        """
    + 319        self.check_mspeaks()
    + 320        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
    + 321
    + 322    def process_mass_spec(self, keep_profile=True):
    + 323        """Process the mass spectrum.
    + 324
    + 325        Parameters
    + 326        ----------
    + 327        keep_profile : bool, optional
    + 328            Whether to keep the profile data after processing. Defaults to True.
    + 329
    + 330        Notes
    + 331        -----
    + 332        This method does the following:
    + 333        - calculates the noise threshold
    + 334        - does peak picking (creates mspeak_objs)
    + 335        - resets the mspeak_obj indexes
    + 336        """
    + 337
    + 338        # if runned mannually make sure to rerun filter_by_noise_threshold
    + 339        # calculates noise threshold
    + 340        # do peak picking( create mspeak_objs)
    + 341        # reset mspeak_obj the indexes
    + 342
    + 343        self.cal_noise_threshold()
    + 344
    + 345        self.find_peaks()
    + 346        self.reset_indexes()
      347
    - 348            self._abundance *= 0
    - 349            self._mz_exp *= 0
    - 350            
    - 351
    - 352    def cal_noise_threshold(self):
    - 353        """Calculate the noise threshold of the mass spectrum.
    - 354
    - 355        """
    - 356
    - 357        if self.label == Labels.simulated_profile:
    + 348        if self.mspeaks:
    + 349            self._dynamic_range = self.max_abundance / self.min_abundance
    + 350        else:
    + 351            self._dynamic_range = 0
    + 352        if not keep_profile:
    + 353            self._abundance *= 0
    + 354            self._mz_exp *= 0
    + 355
    + 356    def cal_noise_threshold(self):
    + 357        """Calculate the noise threshold of the mass spectrum."""
      358
    - 359            self._baseline_noise, self._baseline_noise_std = 0.1, 1
    - 360
    - 361        if self.settings.noise_threshold_method == 'log':
    - 362
    - 363            self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
    - 364
    - 365        else:
    - 366            self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
    - 367
    - 368    @property
    - 369    def parameters(self):
    - 370        """Return the parameters of the mass spectrum."""
    - 371        return self._parameters
    - 372
    - 373    @parameters.setter
    - 374    def parameters(self, instance_MSParameters):
    - 375        self._parameters = instance_MSParameters
    + 359        if self.label == Labels.simulated_profile:
    + 360            self._baseline_noise, self._baseline_noise_std = 0.1, 1
    + 361
    + 362        if self.settings.noise_threshold_method == "log":
    + 363            self._baseline_noise, self._baseline_noise_std = (
    + 364                self.run_log_noise_threshold_calc()
    + 365            )
    + 366
    + 367        else:
    + 368            self._baseline_noise, self._baseline_noise_std = (
    + 369                self.run_noise_threshold_calc()
    + 370            )
    + 371
    + 372    @property
    + 373    def parameters(self):
    + 374        """Return the parameters of the mass spectrum."""
    + 375        return self._parameters
      376
    - 377    def set_parameter_from_json(self, parameters_path):
    - 378        """Set the parameters of the mass spectrum from a JSON file.
    - 379        
    - 380        Parameters
    - 381        ----------
    - 382        parameters_path : str
    - 383            The path to the JSON file containing the parameters.
    - 384        """
    - 385        load_and_set_parameters_ms(self, parameters_path=parameters_path)    
    - 386
    - 387    def set_parameter_from_toml(self, parameters_path):
    - 388        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)    
    - 389
    - 390    @property
    - 391    def mspeaks_settings(self): 
    - 392        """Return the MS peak settings of the mass spectrum."""
    - 393        return self.parameters.ms_peak
    - 394
    - 395    @mspeaks_settings.setter
    - 396    def mspeaks_settings(self, instance_MassSpecPeakSetting):
    - 397
    - 398            self.parameters.ms_peak = instance_MassSpecPeakSetting
    - 399
    - 400    @property
    - 401    def settings(self): 
    - 402        """Return the settings of the mass spectrum."""
    - 403        return self.parameters.mass_spectrum
    - 404
    - 405    @settings.setter
    - 406    def settings(self, instance_MassSpectrumSetting):
    + 377    @parameters.setter
    + 378    def parameters(self, instance_MSParameters):
    + 379        self._parameters = instance_MSParameters
    + 380
    + 381    def set_parameter_from_json(self, parameters_path):
    + 382        """Set the parameters of the mass spectrum from a JSON file.
    + 383
    + 384        Parameters
    + 385        ----------
    + 386        parameters_path : str
    + 387            The path to the JSON file containing the parameters.
    + 388        """
    + 389        load_and_set_parameters_ms(self, parameters_path=parameters_path)
    + 390
    + 391    def set_parameter_from_toml(self, parameters_path):
    + 392        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)
    + 393
    + 394    @property
    + 395    def mspeaks_settings(self):
    + 396        """Return the MS peak settings of the mass spectrum."""
    + 397        return self.parameters.ms_peak
    + 398
    + 399    @mspeaks_settings.setter
    + 400    def mspeaks_settings(self, instance_MassSpecPeakSetting):
    + 401        self.parameters.ms_peak = instance_MassSpecPeakSetting
    + 402
    + 403    @property
    + 404    def settings(self):
    + 405        """Return the settings of the mass spectrum."""
    + 406        return self.parameters.mass_spectrum
      407
    - 408        self.parameters.mass_spectrum =  instance_MassSpectrumSetting
    - 409
    - 410    @property
    - 411    def molecular_search_settings(self):  
    - 412        """Return the molecular search settings of the mass spectrum."""
    - 413        return self.parameters.molecular_search
    - 414
    - 415    @molecular_search_settings.setter
    - 416    def molecular_search_settings(self, instance_MolecularFormulaSearchSettings):
    - 417
    - 418        self.parameters.molecular_search =  instance_MolecularFormulaSearchSettings
    - 419
    - 420    @property
    - 421    def mz_cal_profile(self):
    - 422        """Return the calibrated m/z profile of the mass spectrum."""
    - 423        return self._mz_cal_profile
    - 424
    - 425    @mz_cal_profile.setter
    - 426    def mz_cal_profile(self, mz_cal_list):
    - 427        
    + 408    @settings.setter
    + 409    def settings(self, instance_MassSpectrumSetting):
    + 410        self.parameters.mass_spectrum = instance_MassSpectrumSetting
    + 411
    + 412    @property
    + 413    def molecular_search_settings(self):
    + 414        """Return the molecular search settings of the mass spectrum."""
    + 415        return self.parameters.molecular_search
    + 416
    + 417    @molecular_search_settings.setter
    + 418    def molecular_search_settings(self, instance_MolecularFormulaSearchSettings):
    + 419        self.parameters.molecular_search = instance_MolecularFormulaSearchSettings
    + 420
    + 421    @property
    + 422    def mz_cal_profile(self):
    + 423        """Return the calibrated m/z profile of the mass spectrum."""
    + 424        return self._mz_cal_profile
    + 425
    + 426    @mz_cal_profile.setter
    + 427    def mz_cal_profile(self, mz_cal_list):
      428        if len(mz_cal_list) == len(self._mz_exp):
      429            self._mz_cal_profile = mz_cal_list
      430        else:
    - 431            raise Exception( "calibrated array (%i) is not of the same size of the data (%i)" % (len(mz_cal_list),  len(self.mz_exp_profile)))    
    - 432
    - 433    @property
    - 434    def mz_cal(self):
    - 435        """Return the calibrated m/z values of the mass spectrum."""
    - 436        return array([mspeak.mz_cal for mspeak in self.mspeaks])
    - 437
    - 438    @mz_cal.setter
    - 439    def mz_cal(self, mz_cal_list):
    + 431            raise Exception(
    + 432                "calibrated array (%i) is not of the same size of the data (%i)"
    + 433                % (len(mz_cal_list), len(self.mz_exp_profile))
    + 434            )
    + 435
    + 436    @property
    + 437    def mz_cal(self):
    + 438        """Return the calibrated m/z values of the mass spectrum."""
    + 439        return array([mspeak.mz_cal for mspeak in self.mspeaks])
      440
    - 441            if  len(mz_cal_list) == len(self.mspeaks):
    - 442                self.is_calibrated = True
    - 443                for index, mz_cal in enumerate(mz_cal_list):
    - 444                    self.mspeaks[index].mz_cal = mz_cal
    - 445            else: 
    - 446                raise Exception( "calibrated array (%i) is not of the same size of the data (%i)" % (len(mz_cal_list),  len(self._mspeaks)))    
    - 447
    - 448    @property
    - 449    def mz_exp(self):
    - 450        """Return the experimental m/z values of the mass spectrum."""
    - 451        self.check_mspeaks()
    + 441    @mz_cal.setter
    + 442    def mz_cal(self, mz_cal_list):
    + 443        if len(mz_cal_list) == len(self.mspeaks):
    + 444            self.is_calibrated = True
    + 445            for index, mz_cal in enumerate(mz_cal_list):
    + 446                self.mspeaks[index].mz_cal = mz_cal
    + 447        else:
    + 448            raise Exception(
    + 449                "calibrated array (%i) is not of the same size of the data (%i)"
    + 450                % (len(mz_cal_list), len(self._mspeaks))
    + 451            )
      452
    - 453        if self.is_calibrated:
    - 454
    - 455            return array([mspeak.mz_cal for mspeak in self.mspeaks])
    - 456
    - 457        else:
    - 458
    - 459            return array([mspeak.mz_exp for mspeak in self.mspeaks])
    - 460 
    - 461    @property
    - 462    def freq_exp_profile(self):
    - 463        """Return the experimental frequency profile of the mass spectrum."""
    - 464        return self._frequency_domain
    - 465    
    - 466    @freq_exp_profile.setter
    - 467    def freq_exp_profile(self, new_data): self._frequency_domain = array(new_data)
    + 453    @property
    + 454    def mz_exp(self):
    + 455        """Return the experimental m/z values of the mass spectrum."""
    + 456        self.check_mspeaks()
    + 457
    + 458        if self.is_calibrated:
    + 459            return array([mspeak.mz_cal for mspeak in self.mspeaks])
    + 460
    + 461        else:
    + 462            return array([mspeak.mz_exp for mspeak in self.mspeaks])
    + 463
    + 464    @property
    + 465    def freq_exp_profile(self):
    + 466        """Return the experimental frequency profile of the mass spectrum."""
    + 467        return self._frequency_domain
      468
    - 469    @property
    - 470    def freq_exp_pp(self):
    - 471        """Return the experimental frequency values of the mass spectrum that are used for peak picking."""
    - 472        _, _, freq = self.prepare_peak_picking_data()
    - 473        return freq
    - 474
    - 475    @property
    - 476    def mz_exp_profile(self): 
    - 477        """Return the experimental m/z profile of the mass spectrum."""
    - 478        if self.is_calibrated: 
    - 479            return self.mz_cal_profile
    - 480        else:
    - 481            return self._mz_exp
    - 482
    - 483    @mz_exp_profile.setter
    - 484    def mz_exp_profile(self, new_data ): self._mz_exp = array(new_data)
    - 485
    - 486    @property
    - 487    def mz_exp_pp(self):
    - 488        """Return the experimental m/z values of the mass spectrum that are used for peak picking."""
    - 489        mz, _, _ = self.prepare_peak_picking_data()
    - 490        return mz
    - 491
    - 492    @property
    - 493    def abundance_profile(self): 
    - 494        """Return the abundance profile of the mass spectrum."""
    - 495        return self._abundance
    + 469    @freq_exp_profile.setter
    + 470    def freq_exp_profile(self, new_data):
    + 471        self._frequency_domain = array(new_data)
    + 472
    + 473    @property
    + 474    def freq_exp_pp(self):
    + 475        """Return the experimental frequency values of the mass spectrum that are used for peak picking."""
    + 476        _, _, freq = self.prepare_peak_picking_data()
    + 477        return freq
    + 478
    + 479    @property
    + 480    def mz_exp_profile(self):
    + 481        """Return the experimental m/z profile of the mass spectrum."""
    + 482        if self.is_calibrated:
    + 483            return self.mz_cal_profile
    + 484        else:
    + 485            return self._mz_exp
    + 486
    + 487    @mz_exp_profile.setter
    + 488    def mz_exp_profile(self, new_data):
    + 489        self._mz_exp = array(new_data)
    + 490
    + 491    @property
    + 492    def mz_exp_pp(self):
    + 493        """Return the experimental m/z values of the mass spectrum that are used for peak picking."""
    + 494        mz, _, _ = self.prepare_peak_picking_data()
    + 495        return mz
      496
    - 497    @abundance_profile.setter
    - 498    def abundance_profile(self, new_data): self._abundance = array(new_data)
    - 499
    - 500    @property
    - 501    def abundance_profile_pp(self):
    - 502        """Return the abundance profile of the mass spectrum that is used for peak picking."""
    - 503        _, abundance, _ = self.prepare_peak_picking_data()
    - 504        return abundance
    - 505    
    + 497    @property
    + 498    def abundance_profile(self):
    + 499        """Return the abundance profile of the mass spectrum."""
    + 500        return self._abundance
    + 501
    + 502    @abundance_profile.setter
    + 503    def abundance_profile(self, new_data):
    + 504        self._abundance = array(new_data)
    + 505
      506    @property
    - 507    def abundance(self):
    - 508        """Return the abundance values of the mass spectrum."""
    - 509        self.check_mspeaks()
    - 510        return array([mspeak.abundance for mspeak in self.mspeaks])
    + 507    def abundance_profile_pp(self):
    + 508        """Return the abundance profile of the mass spectrum that is used for peak picking."""
    + 509        _, abundance, _ = self.prepare_peak_picking_data()
    + 510        return abundance
      511
    - 512    def freq_exp(self):
    - 513        """Return the experimental frequency values of the mass spectrum."""
    - 514        self.check_mspeaks()
    - 515        return array([mspeak.freq_exp for mspeak in self.mspeaks])
    - 516
    - 517    @property
    - 518    def resolving_power(self):
    - 519        """Return the resolving power values of the mass spectrum."""
    + 512    @property
    + 513    def abundance(self):
    + 514        """Return the abundance values of the mass spectrum."""
    + 515        self.check_mspeaks()
    + 516        return array([mspeak.abundance for mspeak in self.mspeaks])
    + 517
    + 518    def freq_exp(self):
    + 519        """Return the experimental frequency values of the mass spectrum."""
      520        self.check_mspeaks()
    - 521        return array([mspeak.resolving_power for mspeak in self.mspeaks])
    + 521        return array([mspeak.freq_exp for mspeak in self.mspeaks])
      522
      523    @property
    - 524    def signal_to_noise(self):
    - 525        self.check_mspeaks()
    - 526        return array([mspeak.signal_to_noise for mspeak in self.mspeaks])
    - 527
    - 528    @property
    - 529    def nominal_mz(self):
    - 530        """Return the nominal m/z values of the mass spectrum."""
    - 531        if self._dict_nominal_masses_indexes:
    - 532            return sorted(list(self._dict_nominal_masses_indexes.keys()))
    - 533        else:
    - 534            raise ValueError("Nominal indexes not yet set")    
    - 535
    - 536    def get_mz_and_abundance_peaks_tuples(self):
    - 537        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
    - 538        self.check_mspeaks()
    - 539        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
    - 540
    - 541    @property
    - 542    def kmd(self):
    - 543        """Return the Kendrick mass defect values of the mass spectrum."""
    + 524    def resolving_power(self):
    + 525        """Return the resolving power values of the mass spectrum."""
    + 526        self.check_mspeaks()
    + 527        return array([mspeak.resolving_power for mspeak in self.mspeaks])
    + 528
    + 529    @property
    + 530    def signal_to_noise(self):
    + 531        self.check_mspeaks()
    + 532        return array([mspeak.signal_to_noise for mspeak in self.mspeaks])
    + 533
    + 534    @property
    + 535    def nominal_mz(self):
    + 536        """Return the nominal m/z values of the mass spectrum."""
    + 537        if self._dict_nominal_masses_indexes:
    + 538            return sorted(list(self._dict_nominal_masses_indexes.keys()))
    + 539        else:
    + 540            raise ValueError("Nominal indexes not yet set")
    + 541
    + 542    def get_mz_and_abundance_peaks_tuples(self):
    + 543        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
      544        self.check_mspeaks()
    - 545        return array([mspeak.kmd for mspeak in self.mspeaks])
    + 545        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
      546
      547    @property
    - 548    def kendrick_mass(self):
    - 549        """Return the Kendrick mass values of the mass spectrum."""
    + 548    def kmd(self):
    + 549        """Return the Kendrick mass defect values of the mass spectrum."""
      550        self.check_mspeaks()
    - 551        return array([mspeak.kendrick_mass for mspeak in self.mspeaks])
    + 551        return array([mspeak.kmd for mspeak in self.mspeaks])
      552
      553    @property
    - 554    def max_mz_exp(self):
    - 555        """Return the maximum experimental m/z value of the mass spectrum."""
    - 556        return max([mspeak.mz_exp for mspeak in self.mspeaks])
    - 557
    - 558    @property
    - 559    def min_mz_exp(self):
    - 560        """Return the minimum experimental m/z value of the mass spectrum."""
    - 561        return min([mspeak.mz_exp for mspeak in self.mspeaks])
    - 562
    - 563    @property
    - 564    def max_abundance(self):
    - 565        """Return the maximum abundance value of the mass spectrum."""        
    - 566        return max([mspeak.abundance for mspeak in self.mspeaks])
    - 567
    - 568    @property
    - 569    def max_signal_to_noise(self):
    - 570        """Return the maximum signal-to-noise ratio of the mass spectrum."""
    - 571        return max([mspeak.signal_to_noise for mspeak in self.mspeaks])
    - 572
    - 573    @property
    - 574    def most_abundant_mspeak(self):
    - 575        """Return the most abundant MSpeak object of the mass spectrum."""
    - 576        return max(self.mspeaks, key=lambda m: m.abundance)
    - 577
    - 578    @property
    - 579    def min_abundance(self):
    - 580        """Return the minimum abundance value of the mass spectrum."""
    - 581        return min([mspeak.abundance for mspeak in self.mspeaks])
    - 582
    - 583    # takes too much cpu time 
    + 554    def kendrick_mass(self):
    + 555        """Return the Kendrick mass values of the mass spectrum."""
    + 556        self.check_mspeaks()
    + 557        return array([mspeak.kendrick_mass for mspeak in self.mspeaks])
    + 558
    + 559    @property
    + 560    def max_mz_exp(self):
    + 561        """Return the maximum experimental m/z value of the mass spectrum."""
    + 562        return max([mspeak.mz_exp for mspeak in self.mspeaks])
    + 563
    + 564    @property
    + 565    def min_mz_exp(self):
    + 566        """Return the minimum experimental m/z value of the mass spectrum."""
    + 567        return min([mspeak.mz_exp for mspeak in self.mspeaks])
    + 568
    + 569    @property
    + 570    def max_abundance(self):
    + 571        """Return the maximum abundance value of the mass spectrum."""
    + 572        return max([mspeak.abundance for mspeak in self.mspeaks])
    + 573
    + 574    @property
    + 575    def max_signal_to_noise(self):
    + 576        """Return the maximum signal-to-noise ratio of the mass spectrum."""
    + 577        return max([mspeak.signal_to_noise for mspeak in self.mspeaks])
    + 578
    + 579    @property
    + 580    def most_abundant_mspeak(self):
    + 581        """Return the most abundant MSpeak object of the mass spectrum."""
    + 582        return max(self.mspeaks, key=lambda m: m.abundance)
    + 583
      584    @property
    - 585    def dynamic_range(self):
    - 586        """Return the dynamic range of the mass spectrum."""
    - 587        return self._dynamic_range
    + 585    def min_abundance(self):
    + 586        """Return the minimum abundance value of the mass spectrum."""
    + 587        return min([mspeak.abundance for mspeak in self.mspeaks])
      588
    - 589    @property
    - 590    def baseline_noise(self):
    - 591        """Return the baseline noise of the mass spectrum."""
    - 592        if self._baseline_noise:
    - 593            return self._baseline_noise
    - 594        else:     
    - 595            return None
    - 596
    - 597    @property
    - 598    def baseline_noise_std(self):
    - 599        """Return the standard deviation of the baseline noise of the mass spectrum."""
    - 600        if self._baseline_noise_std == 0:
    - 601            return self._baseline_noise_std
    - 602        if self._baseline_noise_std:
    - 603            return self._baseline_noise_std
    - 604        else:     
    - 605            return None
    - 606
    - 607    @property
    - 608    def Aterm(self):
    - 609        """Return the A-term calibration coefficient of the mass spectrum."""
    - 610        return self._calibration_terms[0]
    - 611
    - 612    @property
    - 613    def Bterm(self):
    - 614        """Return the B-term calibration coefficient of the mass spectrum."""
    - 615        return self._calibration_terms[1]
    - 616
    - 617    @property
    - 618    def Cterm(self):
    - 619        """Return the C-term calibration coefficient of the mass spectrum."""
    - 620        return self._calibration_terms[2]
    - 621
    - 622    @property
    - 623    def filename(self):
    - 624        """Return the filename of the mass spectrum."""
    - 625        return Path(self._filename)
    - 626
    - 627    @property
    - 628    def dir_location(self):
    - 629        """Return the directory location of the mass spectrum."""
    - 630        return self._dir_location
    - 631
    - 632    def sort_by_mz(self):
    - 633        """Sort the mass spectrum by m/z values."""
    - 634        return sorted(self, key=lambda m: m.mz_exp)
    - 635
    - 636    def sort_by_abundance(self, reverse=False):
    - 637        """Sort the mass spectrum by abundance values."""
    - 638        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
    - 639
    - 640    @property
    - 641    def tic(self):
    - 642        """Return the total ion current of the mass spectrum."""
    - 643        return trapz(self.abundance_profile, self.mz_exp_profile)
    - 644
    - 645    def check_mspeaks_warning(self):
    - 646        """Check if the mass spectrum has MSpeaks objects.
    - 647        
    - 648        Raises
    - 649        ------
    - 650        Warning
    - 651            If the mass spectrum has no MSpeaks objects.
    - 652        """
    - 653        import warnings
    - 654        if self.mspeaks:
    - 655            pass
    - 656        else:
    - 657            warnings.warn(
    - 658                "mspeaks list is empty, continuing without filtering data"
    - 659            )
    + 589    # takes too much cpu time
    + 590    @property
    + 591    def dynamic_range(self):
    + 592        """Return the dynamic range of the mass spectrum."""
    + 593        return self._dynamic_range
    + 594
    + 595    @property
    + 596    def baseline_noise(self):
    + 597        """Return the baseline noise of the mass spectrum."""
    + 598        if self._baseline_noise:
    + 599            return self._baseline_noise
    + 600        else:
    + 601            return None
    + 602
    + 603    @property
    + 604    def baseline_noise_std(self):
    + 605        """Return the standard deviation of the baseline noise of the mass spectrum."""
    + 606        if self._baseline_noise_std == 0:
    + 607            return self._baseline_noise_std
    + 608        if self._baseline_noise_std:
    + 609            return self._baseline_noise_std
    + 610        else:
    + 611            return None
    + 612
    + 613    @property
    + 614    def Aterm(self):
    + 615        """Return the A-term calibration coefficient of the mass spectrum."""
    + 616        return self._calibration_terms[0]
    + 617
    + 618    @property
    + 619    def Bterm(self):
    + 620        """Return the B-term calibration coefficient of the mass spectrum."""
    + 621        return self._calibration_terms[1]
    + 622
    + 623    @property
    + 624    def Cterm(self):
    + 625        """Return the C-term calibration coefficient of the mass spectrum."""
    + 626        return self._calibration_terms[2]
    + 627
    + 628    @property
    + 629    def filename(self):
    + 630        """Return the filename of the mass spectrum."""
    + 631        return Path(self._filename)
    + 632
    + 633    @property
    + 634    def dir_location(self):
    + 635        """Return the directory location of the mass spectrum."""
    + 636        return self._dir_location
    + 637
    + 638    def sort_by_mz(self):
    + 639        """Sort the mass spectrum by m/z values."""
    + 640        return sorted(self, key=lambda m: m.mz_exp)
    + 641
    + 642    def sort_by_abundance(self, reverse=False):
    + 643        """Sort the mass spectrum by abundance values."""
    + 644        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
    + 645
    + 646    @property
    + 647    def tic(self):
    + 648        """Return the total ion current of the mass spectrum."""
    + 649        return trapz(self.abundance_profile, self.mz_exp_profile)
    + 650
    + 651    def check_mspeaks_warning(self):
    + 652        """Check if the mass spectrum has MSpeaks objects.
    + 653
    + 654        Raises
    + 655        ------
    + 656        Warning
    + 657            If the mass spectrum has no MSpeaks objects.
    + 658        """
    + 659        import warnings
      660
    - 661    def check_mspeaks(self):
    - 662        """Check if the mass spectrum has MSpeaks objects.
    - 663
    - 664        Raises
    - 665        ------
    - 666        Exception
    - 667            If the mass spectrum has no MSpeaks objects.
    - 668        """
    - 669        if self.mspeaks:
    - 670            pass
    - 671        else:
    - 672            raise Exception(
    - 673                "mspeaks list is empty, please run process_mass_spec() first"
    - 674            )
    - 675
    - 676    def remove_assignment_by_index(self, indexes):
    - 677        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
    - 678
    - 679        Parameters
    - 680        ----------
    - 681        indexes : list of int
    - 682            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
    - 683        """
    - 684        for i in indexes: self.mspeaks[i].clear_molecular_formulas()
    - 685
    - 686    def filter_by_index(self, list_indexes):
    - 687        """Filter the mass spectrum by the specified indexes.
    - 688
    - 689        Parameters
    - 690        ----------
    - 691        list_indexes : list of int
    - 692            A list of indexes of the MSpeaks objects to drop.
    - 693
    - 694        """
    - 695
    - 696        self.mspeaks = [self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes]
    - 697
    - 698        for i, mspeak in  enumerate(self.mspeaks): mspeak.index = i
    + 661        if self.mspeaks:
    + 662            pass
    + 663        else:
    + 664            warnings.warn("mspeaks list is empty, continuing without filtering data")
    + 665
    + 666    def check_mspeaks(self):
    + 667        """Check if the mass spectrum has MSpeaks objects.
    + 668
    + 669        Raises
    + 670        ------
    + 671        Exception
    + 672            If the mass spectrum has no MSpeaks objects.
    + 673        """
    + 674        if self.mspeaks:
    + 675            pass
    + 676        else:
    + 677            raise Exception(
    + 678                "mspeaks list is empty, please run process_mass_spec() first"
    + 679            )
    + 680
    + 681    def remove_assignment_by_index(self, indexes):
    + 682        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
    + 683
    + 684        Parameters
    + 685        ----------
    + 686        indexes : list of int
    + 687            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
    + 688        """
    + 689        for i in indexes:
    + 690            self.mspeaks[i].clear_molecular_formulas()
    + 691
    + 692    def filter_by_index(self, list_indexes):
    + 693        """Filter the mass spectrum by the specified indexes.
    + 694
    + 695        Parameters
    + 696        ----------
    + 697        list_indexes : list of int
    + 698            A list of indexes of the MSpeaks objects to drop.
      699
    - 700        self._set_nominal_masses_start_final_indexes()
    + 700        """
      701
    - 702    def filter_by_mz(self, min_mz, max_mz):
    - 703        """Filter the mass spectrum by the specified m/z range.
    - 704
    - 705        Parameters
    - 706        ----------
    - 707        min_mz : float
    - 708            The minimum m/z value to keep.
    - 709        max_mz : float
    - 710            The maximum m/z value to keep.
    - 711
    - 712        """      
    - 713        self.check_mspeaks_warning()
    - 714        indexes = [index for index, mspeak in enumerate(self.mspeaks) if not min_mz <= mspeak.mz_exp <= max_mz]
    - 715        self.filter_by_index(indexes)
    - 716
    - 717    def filter_by_s2n(self, min_s2n, max_s2n=False):
    - 718        """Filter the mass spectrum by the specified signal-to-noise ratio range.
    - 719
    - 720        Parameters
    - 721        ----------
    - 722        min_s2n : float
    - 723            The minimum signal-to-noise ratio to keep.
    - 724        max_s2n : float, optional
    - 725            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
    - 726
    - 727        """
    - 728        self.check_mspeaks_warning()
    - 729        if max_s2n:
    - 730            indexes = [index for index, mspeak in enumerate(self.mspeaks) if not min_s2n <= mspeak.signal_to_noise <= max_s2n ]
    - 731        else:
    - 732            indexes = [index for index, mspeak in enumerate(self.mspeaks) if mspeak.signal_to_noise <= min_s2n ]
    - 733        self.filter_by_index(indexes)
    - 734
    - 735    def filter_by_abundance(self, min_abund, max_abund=False):
    - 736        """Filter the mass spectrum by the specified abundance range.
    - 737
    - 738        Parameters
    - 739        ----------
    - 740        min_abund : float
    - 741            The minimum abundance to keep.
    - 742        max_abund : float, optional
    - 743            The maximum abundance to keep. Defaults to False (no maximum).
    - 744
    - 745        """
    - 746        self.check_mspeaks_warning()
    - 747        if max_abund:
    - 748            indexes = [index for index, mspeak in enumerate(self.mspeaks) if not min_abund <= mspeak.abundance <= max_abund]
    - 749        else:
    - 750            indexes = [index for index, mspeak in enumerate(self.mspeaks) if mspeak.abundance <= min_abund]
    - 751        self.filter_by_index(indexes)
    - 752
    - 753    def filter_by_max_resolving_power(self, B, T):
    - 754        """Filter the mass spectrum by the specified maximum resolving power.
    - 755        
    - 756        Parameters
    - 757        ----------
    - 758        B : float
    - 759        T : float
    - 760        
    - 761        """
    - 762
    - 763        rpe = lambda m, z: (1.274e7 * z * B * T)/(m*z)
    - 764
    - 765        self.check_mspeaks_warning()
    - 766
    - 767        indexes_to_remove = [index for index, mspeak in enumerate(self.mspeaks) if  mspeak.resolving_power >= rpe(mspeak.mz_exp,mspeak.ion_charge)]
    - 768        self.filter_by_index(indexes_to_remove)
    - 769
    - 770    def filter_by_mean_resolving_power(self, ndeviations=3,plot=False,guess_pars=False):
    - 771        """Filter the mass spectrum by the specified mean resolving power.
    - 772
    - 773        Parameters
    - 774        ----------
    - 775        ndeviations : float, optional
    - 776            The number of standard deviations to use for filtering. Defaults to 3.
    - 777        plot : bool, optional
    - 778            Whether to plot the resolving power distribution. Defaults to False.
    - 779        guess_pars : bool, optional
    - 780            Whether to guess the parameters for the Gaussian model. Defaults to False.
    + 702        self.mspeaks = [
    + 703            self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes
    + 704        ]
    + 705
    + 706        for i, mspeak in enumerate(self.mspeaks):
    + 707            mspeak.index = i
    + 708
    + 709        self._set_nominal_masses_start_final_indexes()
    + 710
    + 711    def filter_by_mz(self, min_mz, max_mz):
    + 712        """Filter the mass spectrum by the specified m/z range.
    + 713
    + 714        Parameters
    + 715        ----------
    + 716        min_mz : float
    + 717            The minimum m/z value to keep.
    + 718        max_mz : float
    + 719            The maximum m/z value to keep.
    + 720
    + 721        """
    + 722        self.check_mspeaks_warning()
    + 723        indexes = [
    + 724            index
    + 725            for index, mspeak in enumerate(self.mspeaks)
    + 726            if not min_mz <= mspeak.mz_exp <= max_mz
    + 727        ]
    + 728        self.filter_by_index(indexes)
    + 729
    + 730    def filter_by_s2n(self, min_s2n, max_s2n=False):
    + 731        """Filter the mass spectrum by the specified signal-to-noise ratio range.
    + 732
    + 733        Parameters
    + 734        ----------
    + 735        min_s2n : float
    + 736            The minimum signal-to-noise ratio to keep.
    + 737        max_s2n : float, optional
    + 738            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
    + 739
    + 740        """
    + 741        self.check_mspeaks_warning()
    + 742        if max_s2n:
    + 743            indexes = [
    + 744                index
    + 745                for index, mspeak in enumerate(self.mspeaks)
    + 746                if not min_s2n <= mspeak.signal_to_noise <= max_s2n
    + 747            ]
    + 748        else:
    + 749            indexes = [
    + 750                index
    + 751                for index, mspeak in enumerate(self.mspeaks)
    + 752                if mspeak.signal_to_noise <= min_s2n
    + 753            ]
    + 754        self.filter_by_index(indexes)
    + 755
    + 756    def filter_by_abundance(self, min_abund, max_abund=False):
    + 757        """Filter the mass spectrum by the specified abundance range.
    + 758
    + 759        Parameters
    + 760        ----------
    + 761        min_abund : float
    + 762            The minimum abundance to keep.
    + 763        max_abund : float, optional
    + 764            The maximum abundance to keep. Defaults to False (no maximum).
    + 765
    + 766        """
    + 767        self.check_mspeaks_warning()
    + 768        if max_abund:
    + 769            indexes = [
    + 770                index
    + 771                for index, mspeak in enumerate(self.mspeaks)
    + 772                if not min_abund <= mspeak.abundance <= max_abund
    + 773            ]
    + 774        else:
    + 775            indexes = [
    + 776                index
    + 777                for index, mspeak in enumerate(self.mspeaks)
    + 778                if mspeak.abundance <= min_abund
    + 779            ]
    + 780        self.filter_by_index(indexes)
      781
    - 782        """
    - 783        self.check_mspeaks_warning()
    - 784        indexes_to_remove = MeanResolvingPowerFilter(self,ndeviations,plot,guess_pars).main()
    - 785        self.filter_by_index(indexes_to_remove)
    - 786
    - 787
    - 788    def filter_by_min_resolving_power(self, B, T):
    - 789        """Filter the mass spectrum by the specified minimum resolving power.
    - 790
    - 791        Parameters
    - 792        ----------
    - 793        B : float
    - 794        T : float
    + 782    def filter_by_max_resolving_power(self, B, T):
    + 783        """Filter the mass spectrum by the specified maximum resolving power.
    + 784
    + 785        Parameters
    + 786        ----------
    + 787        B : float
    + 788        T : float
    + 789
    + 790        """
    + 791
    + 792        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
    + 793
    + 794        self.check_mspeaks_warning()
      795
    - 796        """
    - 797        rpe = lambda m, z: (1.274e7 * z * B * T)/(m*z)
    - 798
    - 799        self.check_mspeaks_warning()
    - 800
    - 801        indexes_to_remove = [index for index, mspeak in enumerate(self.mspeaks) if  mspeak.resolving_power <= rpe(mspeak.mz_exp,mspeak.ion_charge)]
    - 802        self.filter_by_index(indexes_to_remove)
    - 803
    - 804    def filter_by_noise_threshold(self):
    - 805        """Filter the mass spectrum by the noise threshold."""
    - 806        
    - 807        threshold = self.get_noise_threshold()[1][0]
    - 808        
    - 809        self.check_mspeaks_warning()
    - 810        
    - 811        indexes_to_remove = [index for index, mspeak in enumerate(self.mspeaks) if  mspeak.abundance <= threshold]
    - 812        self.filter_by_index(indexes_to_remove)
    - 813
    - 814    
    - 815    def find_peaks(self):
    - 816        """Find the peaks of the mass spectrum."""
    - 817        #needs to clear previous results from peak_picking
    - 818        self._mspeaks = list()
    - 819
    - 820        #then do peak picking
    - 821        self.do_peak_picking()
    - 822        # print("A total of %i peaks were found" % len(self._mspeaks))
    + 796        indexes_to_remove = [
    + 797            index
    + 798            for index, mspeak in enumerate(self.mspeaks)
    + 799            if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge)
    + 800        ]
    + 801        self.filter_by_index(indexes_to_remove)
    + 802
    + 803    def filter_by_mean_resolving_power(
    + 804        self, ndeviations=3, plot=False, guess_pars=False
    + 805    ):
    + 806        """Filter the mass spectrum by the specified mean resolving power.
    + 807
    + 808        Parameters
    + 809        ----------
    + 810        ndeviations : float, optional
    + 811            The number of standard deviations to use for filtering. Defaults to 3.
    + 812        plot : bool, optional
    + 813            Whether to plot the resolving power distribution. Defaults to False.
    + 814        guess_pars : bool, optional
    + 815            Whether to guess the parameters for the Gaussian model. Defaults to False.
    + 816
    + 817        """
    + 818        self.check_mspeaks_warning()
    + 819        indexes_to_remove = MeanResolvingPowerFilter(
    + 820            self, ndeviations, plot, guess_pars
    + 821        ).main()
    + 822        self.filter_by_index(indexes_to_remove)
      823
    - 824    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
    - 825        """Change the Kendrick base of all MSpeaks objects.
    + 824    def filter_by_min_resolving_power(self, B, T):
    + 825        """Filter the mass spectrum by the specified minimum resolving power.
      826
      827        Parameters
      828        ----------
    - 829        kendrick_dict_base : dict
    - 830            A dictionary of the Kendrick base to change to.
    + 829        B : float
    + 830        T : float
      831
    - 832        Notes
    - 833        -----
    - 834        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
    - 835        """
    - 836        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
    - 837
    - 838        for mspeak in self.mspeaks:
    - 839
    - 840            mspeak.change_kendrick_base(kendrick_dict_base)
    - 841
    - 842    def get_nominal_mz_first_last_indexes(self, nominal_mass):
    - 843        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
    - 844
    - 845        Parameters
    - 846        ----------
    - 847        nominal_mass : int
    - 848            The nominal mass to get the indexes for.
    - 849
    - 850        Returns
    - 851        -------
    - 852        tuple
    - 853            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
    - 854        """
    - 855        if self._dict_nominal_masses_indexes:
    - 856
    - 857            if nominal_mass in self._dict_nominal_masses_indexes.keys():
    - 858
    - 859                return (self._dict_nominal_masses_indexes.get(nominal_mass)[0], self._dict_nominal_masses_indexes.get(nominal_mass)[1]+1)
    - 860
    - 861            else:
    - 862                # import warnings
    - 863                # uncomment warn to distribution
    - 864                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
    - 865                return (0, 0)
    - 866        else:
    - 867            raise Exception("run process_mass_spec() function before trying to access the data")
    - 868
    - 869    def get_masses_count_by_nominal_mass(self):
    - 870        """Return a dictionary of the nominal masses and their counts."""
    - 871
    - 872        dict_nominal_masses_count = {}
    - 873
    - 874        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    - 875
    - 876        for nominal_mass in all_nominal_masses:
    - 877            if nominal_mass not in dict_nominal_masses_count:
    - 878                dict_nominal_masses_count[nominal_mass] = len(list(self.get_nominal_mass_indexes(nominal_mass)))
    - 879
    - 880        return dict_nominal_masses_count
    - 881
    - 882    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
    - 883        """Return a dictionary of the nominal masses and their counts.
    - 884
    - 885        Parameters
    - 886        ----------
    - 887        mz_overlay : float, optional
    - 888            The m/z overlay to use for counting. Defaults to 0.1.
    - 889
    - 890        Returns
    - 891        -------
    - 892        dict
    - 893            A dictionary of the nominal masses and their counts.
    - 894        """
    - 895        dict_nominal_masses_count ={}
    - 896
    - 897        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    - 898
    - 899        for nominal_mass in all_nominal_masses:
    - 900
    - 901            if nominal_mass not in dict_nominal_masses_count:
    - 902
    - 903                min_mz = nominal_mass - mz_overlay
    - 904
    - 905                max_mz = nominal_mass + 1 + mz_overlay
    - 906
    - 907                indexes = indexes = where((self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)) 
    - 908
    - 909                dict_nominal_masses_count[nominal_mass] = indexes[0].size
    - 910
    - 911        return dict_nominal_masses_count
    - 912
    - 913    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
    - 914        """Return the indexes of the MSpeaks objects with the specified nominal mass.
    - 915
    - 916        Parameters
    - 917        ----------
    - 918        nominal_mass : int
    - 919            The nominal mass to get the indexes for.
    - 920        overlay : float, optional
    - 921            The m/z overlay to use for counting. Defaults to 0.1.
    - 922
    - 923        Returns
    - 924        -------
    - 925        generator
    - 926            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
    - 927        """       
    - 928        min_mz_to_look = nominal_mass - overlay
    - 929        max_mz_to_look = nominal_mass + 1 + overlay
    - 930
    - 931        return (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
    - 932
    - 933        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
    - 934        # return indexes
    - 935
    - 936    def _set_nominal_masses_start_final_indexes(self):
    - 937        """Set the start and final indexes of the MSpeaks objects for all nominal masses."""
    - 938        dict_nominal_masses_indexes ={}
    - 939
    - 940        all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks)
    - 941
    - 942        for nominal_mass in all_nominal_masses:
    + 832        """
    + 833        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
    + 834
    + 835        self.check_mspeaks_warning()
    + 836
    + 837        indexes_to_remove = [
    + 838            index
    + 839            for index, mspeak in enumerate(self.mspeaks)
    + 840            if mspeak.resolving_power <= rpe(mspeak.mz_exp, mspeak.ion_charge)
    + 841        ]
    + 842        self.filter_by_index(indexes_to_remove)
    + 843
    + 844    def filter_by_noise_threshold(self):
    + 845        """Filter the mass spectrum by the noise threshold."""
    + 846
    + 847        threshold = self.get_noise_threshold()[1][0]
    + 848
    + 849        self.check_mspeaks_warning()
    + 850
    + 851        indexes_to_remove = [
    + 852            index
    + 853            for index, mspeak in enumerate(self.mspeaks)
    + 854            if mspeak.abundance <= threshold
    + 855        ]
    + 856        self.filter_by_index(indexes_to_remove)
    + 857
    + 858    def find_peaks(self):
    + 859        """Find the peaks of the mass spectrum."""
    + 860        # needs to clear previous results from peak_picking
    + 861        self._mspeaks = list()
    + 862
    + 863        # then do peak picking
    + 864        self.do_peak_picking()
    + 865        # print("A total of %i peaks were found" % len(self._mspeaks))
    + 866
    + 867    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
    + 868        """Change the Kendrick base of all MSpeaks objects.
    + 869
    + 870        Parameters
    + 871        ----------
    + 872        kendrick_dict_base : dict
    + 873            A dictionary of the Kendrick base to change to.
    + 874
    + 875        Notes
    + 876        -----
    + 877        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
    + 878        """
    + 879        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
    + 880
    + 881        for mspeak in self.mspeaks:
    + 882            mspeak.change_kendrick_base(kendrick_dict_base)
    + 883
    + 884    def get_nominal_mz_first_last_indexes(self, nominal_mass):
    + 885        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
    + 886
    + 887        Parameters
    + 888        ----------
    + 889        nominal_mass : int
    + 890            The nominal mass to get the indexes for.
    + 891
    + 892        Returns
    + 893        -------
    + 894        tuple
    + 895            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
    + 896        """
    + 897        if self._dict_nominal_masses_indexes:
    + 898            if nominal_mass in self._dict_nominal_masses_indexes.keys():
    + 899                return (
    + 900                    self._dict_nominal_masses_indexes.get(nominal_mass)[0],
    + 901                    self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1,
    + 902                )
    + 903
    + 904            else:
    + 905                # import warnings
    + 906                # uncomment warn to distribution
    + 907                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
    + 908                return (0, 0)
    + 909        else:
    + 910            raise Exception(
    + 911                "run process_mass_spec() function before trying to access the data"
    + 912            )
    + 913
    + 914    def get_masses_count_by_nominal_mass(self):
    + 915        """Return a dictionary of the nominal masses and their counts."""
    + 916
    + 917        dict_nominal_masses_count = {}
    + 918
    + 919        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    + 920
    + 921        for nominal_mass in all_nominal_masses:
    + 922            if nominal_mass not in dict_nominal_masses_count:
    + 923                dict_nominal_masses_count[nominal_mass] = len(
    + 924                    list(self.get_nominal_mass_indexes(nominal_mass))
    + 925                )
    + 926
    + 927        return dict_nominal_masses_count
    + 928
    + 929    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
    + 930        """Return a dictionary of the nominal masses and their counts.
    + 931
    + 932        Parameters
    + 933        ----------
    + 934        mz_overlay : float, optional
    + 935            The m/z overlay to use for counting. Defaults to 0.1.
    + 936
    + 937        Returns
    + 938        -------
    + 939        dict
    + 940            A dictionary of the nominal masses and their counts.
    + 941        """
    + 942        dict_nominal_masses_count = {}
      943
    - 944            #indexes = self.get_nominal_mass_indexes(nominal_mass)
    - 945            # Convert the iterator to a list to avoid multiple calls
    - 946            indexes = list(self.get_nominal_mass_indexes(nominal_mass))
    - 947
    - 948            # If the list is not empty, find the first and last; otherwise, set None
    - 949            if indexes:
    - 950                first, last = indexes[0], indexes[-1]
    - 951            else:
    - 952                first = last = None
    - 953            #defaultvalue = None
    - 954            #first = last = next(indexes, defaultvalue)
    - 955            #for last in indexes:
    - 956            #    pass
    + 944        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    + 945
    + 946        for nominal_mass in all_nominal_masses:
    + 947            if nominal_mass not in dict_nominal_masses_count:
    + 948                min_mz = nominal_mass - mz_overlay
    + 949
    + 950                max_mz = nominal_mass + 1 + mz_overlay
    + 951
    + 952                indexes = indexes = where(
    + 953                    (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)
    + 954                )
    + 955
    + 956                dict_nominal_masses_count[nominal_mass] = indexes[0].size
      957
    - 958            dict_nominal_masses_indexes[nominal_mass] = (first, last)
    + 958        return dict_nominal_masses_count
      959
    - 960        self._dict_nominal_masses_indexes = dict_nominal_masses_indexes
    - 961
    - 962    def plot_centroid(self, ax=None, c='g'):
    - 963        """Plot the centroid data of the mass spectrum.
    - 964
    - 965        Parameters
    - 966        ----------
    - 967        ax : matplotlib.axes.Axes, optional
    - 968            The matplotlib axes to plot on. Defaults to None.
    - 969        c : str, optional
    - 970            The color to use for the plot. Defaults to 'g' (green).
    - 971
    - 972        Returns
    - 973        -------
    - 974        matplotlib.axes.Axes
    - 975            The matplotlib axes containing the plot.
    - 976
    - 977        Raises
    - 978        ------
    - 979        Exception
    - 980            If no centroid data is found.
    - 981        """
    - 982
    - 983        import matplotlib.pyplot as plt
    - 984        if self._mspeaks:
    - 985
    - 986            if ax is None:
    - 987                ax = plt.gca()
    - 988
    - 989            markerline_a, stemlines_a, baseline_a = ax.stem(self.mz_exp, self.abundance, linefmt='-', markerfmt=" ")
    + 960    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
    + 961        """Return the indexes of the MSpeaks objects with the specified nominal mass.
    + 962
    + 963        Parameters
    + 964        ----------
    + 965        nominal_mass : int
    + 966            The nominal mass to get the indexes for.
    + 967        overlay : float, optional
    + 968            The m/z overlay to use for counting. Defaults to 0.1.
    + 969
    + 970        Returns
    + 971        -------
    + 972        generator
    + 973            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
    + 974        """
    + 975        min_mz_to_look = nominal_mass - overlay
    + 976        max_mz_to_look = nominal_mass + 1 + overlay
    + 977
    + 978        return (
    + 979            i
    + 980            for i in range(len(self.mspeaks))
    + 981            if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look
    + 982        )
    + 983
    + 984        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
    + 985        # return indexes
    + 986
    + 987    def _set_nominal_masses_start_final_indexes(self):
    + 988        """Set the start and final indexes of the MSpeaks objects for all nominal masses."""
    + 989        dict_nominal_masses_indexes = {}
      990
    - 991            plt.setp(markerline_a, 'color', c, 'linewidth', 2)
    - 992            plt.setp(stemlines_a, 'color', c, 'linewidth', 2)
    - 993            plt.setp(baseline_a, 'color', c, 'linewidth', 2)
    - 994
    - 995            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    - 996            ax.set_ylabel('Abundance', fontsize=12)
    - 997            ax.tick_params(axis='both', which='major', labelsize=12)
    - 998
    - 999            ax.axes.spines['top'].set_visible(False)
    -1000            ax.axes.spines['right'].set_visible(False)
    -1001
    -1002            ax.get_yaxis().set_visible(False)
    -1003            ax.spines['left'].set_visible(False)
    -1004
    -1005        else:
    -1006
    -1007            raise Exception("No centroid data found, please run process_mass_spec")
    -1008
    -1009        return ax
    -1010
    -1011    def plot_profile_and_noise_threshold(self, ax=None,legend=False): 
    -1012        """Plot the profile data and noise threshold of the mass spectrum.
    -1013
    -1014        Parameters
    -1015        ----------
    -1016        ax : matplotlib.axes.Axes, optional
    -1017            The matplotlib axes to plot on. Defaults to None.
    -1018        legend : bool, optional
    -1019            Whether to show the legend. Defaults to False.
    -1020
    -1021        Returns
    -1022        -------
    -1023        matplotlib.axes.Axes
    -1024            The matplotlib axes containing the plot.
    -1025
    -1026        Raises
    -1027        ------
    -1028        Exception
    -1029            If no noise threshold is found.
    -1030        """
    -1031        import matplotlib.pyplot as plt
    -1032        if self.baseline_noise_std and self.baseline_noise_std:
    -1033
    -1034            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    -1035            baseline = (self.baseline_noise, self.baseline_noise)
    -1036
    -1037            # std = self.parameters.mass_spectrum.noise_threshold_min_std
    -1038            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
    -1039            x, y = self.get_noise_threshold()    
    -1040            
    -1041            if ax is None:
    -1042                ax = plt.gca()
    -1043            
    -1044            ax.plot(self.mz_exp_profile, self.abundance_profile, color="green",label="Spectrum")
    -1045            ax.plot(x, (baseline, baseline), color="yellow",label="Baseline Noise")
    -1046            ax.plot(x, y, color="red",label="Noise Threshold")
    -1047
    -1048            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    -1049            ax.set_ylabel('Abundance', fontsize=12)
    -1050            ax.tick_params(axis='both', which='major', labelsize=12)
    -1051
    -1052            ax.axes.spines['top'].set_visible(False)
    -1053            ax.axes.spines['right'].set_visible(False)
    -1054
    -1055            ax.get_yaxis().set_visible(False)
    -1056            ax.spines['left'].set_visible(False)
    -1057            if legend:
    -1058                ax.legend()
    + 991        all_nominal_masses = set(i.nominal_mz_exp for i in self.mspeaks)
    + 992
    + 993        for nominal_mass in all_nominal_masses:
    + 994            # indexes = self.get_nominal_mass_indexes(nominal_mass)
    + 995            # Convert the iterator to a list to avoid multiple calls
    + 996            indexes = list(self.get_nominal_mass_indexes(nominal_mass))
    + 997
    + 998            # If the list is not empty, find the first and last; otherwise, set None
    + 999            if indexes:
    +1000                first, last = indexes[0], indexes[-1]
    +1001            else:
    +1002                first = last = None
    +1003            # defaultvalue = None
    +1004            # first = last = next(indexes, defaultvalue)
    +1005            # for last in indexes:
    +1006            #    pass
    +1007
    +1008            dict_nominal_masses_indexes[nominal_mass] = (first, last)
    +1009
    +1010        self._dict_nominal_masses_indexes = dict_nominal_masses_indexes
    +1011
    +1012    def plot_centroid(self, ax=None, c="g"):
    +1013        """Plot the centroid data of the mass spectrum.
    +1014
    +1015        Parameters
    +1016        ----------
    +1017        ax : matplotlib.axes.Axes, optional
    +1018            The matplotlib axes to plot on. Defaults to None.
    +1019        c : str, optional
    +1020            The color to use for the plot. Defaults to 'g' (green).
    +1021
    +1022        Returns
    +1023        -------
    +1024        matplotlib.axes.Axes
    +1025            The matplotlib axes containing the plot.
    +1026
    +1027        Raises
    +1028        ------
    +1029        Exception
    +1030            If no centroid data is found.
    +1031        """
    +1032
    +1033        import matplotlib.pyplot as plt
    +1034
    +1035        if self._mspeaks:
    +1036            if ax is None:
    +1037                ax = plt.gca()
    +1038
    +1039            markerline_a, stemlines_a, baseline_a = ax.stem(
    +1040                self.mz_exp, self.abundance, linefmt="-", markerfmt=" "
    +1041            )
    +1042
    +1043            plt.setp(markerline_a, "color", c, "linewidth", 2)
    +1044            plt.setp(stemlines_a, "color", c, "linewidth", 2)
    +1045            plt.setp(baseline_a, "color", c, "linewidth", 2)
    +1046
    +1047            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +1048            ax.set_ylabel("Abundance", fontsize=12)
    +1049            ax.tick_params(axis="both", which="major", labelsize=12)
    +1050
    +1051            ax.axes.spines["top"].set_visible(False)
    +1052            ax.axes.spines["right"].set_visible(False)
    +1053
    +1054            ax.get_yaxis().set_visible(False)
    +1055            ax.spines["left"].set_visible(False)
    +1056
    +1057        else:
    +1058            raise Exception("No centroid data found, please run process_mass_spec")
     1059
    -1060        else:
    +1060        return ax
     1061
    -1062            raise Exception("Calculate noise threshold first")
    -1063
    -1064        return ax
    -1065
    -1066    def plot_mz_domain_profile(self, color='green', ax=None): 
    -1067        """Plot the m/z domain profile of the mass spectrum.
    -1068
    -1069        Parameters
    -1070        ----------
    -1071        color : str, optional
    -1072            The color to use for the plot. Defaults to 'green'.
    -1073        ax : matplotlib.axes.Axes, optional
    -1074            The matplotlib axes to plot on. Defaults to None.
    -1075
    -1076        Returns
    -1077        -------
    -1078        matplotlib.axes.Axes
    -1079            The matplotlib axes containing the plot.
    -1080        """       
    -1081
    +1062    def plot_profile_and_noise_threshold(self, ax=None, legend=False):
    +1063        """Plot the profile data and noise threshold of the mass spectrum.
    +1064
    +1065        Parameters
    +1066        ----------
    +1067        ax : matplotlib.axes.Axes, optional
    +1068            The matplotlib axes to plot on. Defaults to None.
    +1069        legend : bool, optional
    +1070            Whether to show the legend. Defaults to False.
    +1071
    +1072        Returns
    +1073        -------
    +1074        matplotlib.axes.Axes
    +1075            The matplotlib axes containing the plot.
    +1076
    +1077        Raises
    +1078        ------
    +1079        Exception
    +1080            If no noise threshold is found.
    +1081        """
     1082        import matplotlib.pyplot as plt
     1083
    -1084        if ax is None:
    -1085            ax = plt.gca()
    -1086        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
    -1087        ax.set(xlabel='m/z', ylabel='abundance')
    -1088
    -1089        return ax
    -1090
    -1091    def to_excel(self, out_file_path, write_metadata=True):
    -1092        """Export the mass spectrum to an Excel file.
    -1093
    -1094        Parameters
    -1095        ----------
    -1096        out_file_path : str
    -1097            The path to the Excel file to export to.
    -1098        write_metadata : bool, optional
    -1099            Whether to write the metadata to the Excel file. Defaults to True.
    -1100
    -1101        Returns
    -1102        -------
    -1103        None
    -1104        """
    -1105        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1106        exportMS = HighResMassSpecExport(out_file_path, self)
    -1107        exportMS.to_excel(write_metadata=write_metadata)
    -1108
    -1109    def to_hdf(self, out_file_path):
    -1110        """Export the mass spectrum to an HDF file.
    -1111
    -1112        Parameters
    -1113        ----------
    -1114        out_file_path : str
    -1115            The path to the HDF file to export to.
    -1116
    -1117        Returns
    -1118        -------
    -1119        None
    -1120        """
    -1121        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1122        exportMS = HighResMassSpecExport(out_file_path, self)
    -1123        exportMS.to_hdf()
    -1124
    -1125    def to_csv(self, out_file_path, write_metadata=True):
    -1126        """Export the mass spectrum to a CSV file.
    -1127        
    -1128        Parameters
    -1129        ----------
    -1130        out_file_path : str
    -1131            The path to the CSV file to export to.
    -1132        write_metadata : bool, optional
    -1133            Whether to write the metadata to the CSV file. Defaults to True.
    -1134        
    +1084        if self.baseline_noise_std and self.baseline_noise_std:
    +1085            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    +1086            baseline = (self.baseline_noise, self.baseline_noise)
    +1087
    +1088            # std = self.parameters.mass_spectrum.noise_threshold_min_std
    +1089            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
    +1090            x, y = self.get_noise_threshold()
    +1091
    +1092            if ax is None:
    +1093                ax = plt.gca()
    +1094
    +1095            ax.plot(
    +1096                self.mz_exp_profile,
    +1097                self.abundance_profile,
    +1098                color="green",
    +1099                label="Spectrum",
    +1100            )
    +1101            ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise")
    +1102            ax.plot(x, y, color="red", label="Noise Threshold")
    +1103
    +1104            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +1105            ax.set_ylabel("Abundance", fontsize=12)
    +1106            ax.tick_params(axis="both", which="major", labelsize=12)
    +1107
    +1108            ax.axes.spines["top"].set_visible(False)
    +1109            ax.axes.spines["right"].set_visible(False)
    +1110
    +1111            ax.get_yaxis().set_visible(False)
    +1112            ax.spines["left"].set_visible(False)
    +1113            if legend:
    +1114                ax.legend()
    +1115
    +1116        else:
    +1117            raise Exception("Calculate noise threshold first")
    +1118
    +1119        return ax
    +1120
    +1121    def plot_mz_domain_profile(self, color="green", ax=None):
    +1122        """Plot the m/z domain profile of the mass spectrum.
    +1123
    +1124        Parameters
    +1125        ----------
    +1126        color : str, optional
    +1127            The color to use for the plot. Defaults to 'green'.
    +1128        ax : matplotlib.axes.Axes, optional
    +1129            The matplotlib axes to plot on. Defaults to None.
    +1130
    +1131        Returns
    +1132        -------
    +1133        matplotlib.axes.Axes
    +1134            The matplotlib axes containing the plot.
     1135        """
    -1136        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1137        exportMS = HighResMassSpecExport(out_file_path, self)
    -1138        exportMS.to_csv(write_metadata=write_metadata)
    -1139
    -1140    def to_pandas(self, out_file_path, write_metadata=True):
    -1141        """Export the mass spectrum to a Pandas dataframe with pkl extension.
    -1142
    -1143        Parameters
    -1144        ----------
    -1145        out_file_path : str
    -1146            The path to the CSV file to export to.
    -1147        write_metadata : bool, optional
    -1148            Whether to write the metadata to the CSV file. Defaults to True.
    -1149
    -1150        """
    -1151        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1152        exportMS = HighResMassSpecExport(out_file_path, self)
    -1153        exportMS.to_pandas(write_metadata=write_metadata)
    -1154
    -1155    def to_dataframe(self, additional_columns=None):
    -1156        """Return the mass spectrum as a Pandas dataframe.
    -1157
    -1158        Parameters
    -1159        ----------
    -1160        additional_columns : list, optional
    -1161            A list of additional columns to include in the dataframe. Defaults to None.
    -1162            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
    -1163        
    -1164        Returns
    -1165        -------
    -1166        pandas.DataFrame
    -1167            The mass spectrum as a Pandas dataframe.
    -1168        """
    -1169        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1170        exportMS = HighResMassSpecExport(self.filename, self)
    -1171        return exportMS.get_pandas_df(additional_columns = additional_columns)
    +1136
    +1137        import matplotlib.pyplot as plt
    +1138
    +1139        if ax is None:
    +1140            ax = plt.gca()
    +1141        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
    +1142        ax.set(xlabel="m/z", ylabel="abundance")
    +1143
    +1144        return ax
    +1145
    +1146    def to_excel(self, out_file_path, write_metadata=True):
    +1147        """Export the mass spectrum to an Excel file.
    +1148
    +1149        Parameters
    +1150        ----------
    +1151        out_file_path : str
    +1152            The path to the Excel file to export to.
    +1153        write_metadata : bool, optional
    +1154            Whether to write the metadata to the Excel file. Defaults to True.
    +1155
    +1156        Returns
    +1157        -------
    +1158        None
    +1159        """
    +1160        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1161
    +1162        exportMS = HighResMassSpecExport(out_file_path, self)
    +1163        exportMS.to_excel(write_metadata=write_metadata)
    +1164
    +1165    def to_hdf(self, out_file_path):
    +1166        """Export the mass spectrum to an HDF file.
    +1167
    +1168        Parameters
    +1169        ----------
    +1170        out_file_path : str
    +1171            The path to the HDF file to export to.
     1172
    -1173    def to_json(self):
    -1174        """Return the mass spectrum as a JSON file."""
    -1175        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1176        exportMS = HighResMassSpecExport(self.filename, self)
    -1177        return exportMS.to_json()
    +1173        Returns
    +1174        -------
    +1175        None
    +1176        """
    +1177        from corems.mass_spectrum.output.export import HighResMassSpecExport
     1178
    -1179    def parameters_json(self):
    -1180        """Return the parameters of the mass spectrum as a JSON string."""
    -1181        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1182        exportMS = HighResMassSpecExport(self.filename, self)
    -1183        return exportMS.parameters_to_json()
    +1179        exportMS = HighResMassSpecExport(out_file_path, self)
    +1180        exportMS.to_hdf()
    +1181
    +1182    def to_csv(self, out_file_path, write_metadata=True):
    +1183        """Export the mass spectrum to a CSV file.
     1184
    -1185    def parameters_toml(self):
    -1186        """Return the parameters of the mass spectrum as a TOML string."""
    -1187        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1188        exportMS = HighResMassSpecExport(self.filename, self)
    -1189        return exportMS.parameters_to_toml()
    +1185        Parameters
    +1186        ----------
    +1187        out_file_path : str
    +1188            The path to the CSV file to export to.
    +1189        write_metadata : bool, optional
    +1190            Whether to write the metadata to the CSV file. Defaults to True.
    +1191
    +1192        """
    +1193        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1194
    +1195        exportMS = HighResMassSpecExport(out_file_path, self)
    +1196        exportMS.to_csv(write_metadata=write_metadata)
    +1197
    +1198    def to_pandas(self, out_file_path, write_metadata=True):
    +1199        """Export the mass spectrum to a Pandas dataframe with pkl extension.
    +1200
    +1201        Parameters
    +1202        ----------
    +1203        out_file_path : str
    +1204            The path to the CSV file to export to.
    +1205        write_metadata : bool, optional
    +1206            Whether to write the metadata to the CSV file. Defaults to True.
    +1207
    +1208        """
    +1209        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1210
    +1211        exportMS = HighResMassSpecExport(out_file_path, self)
    +1212        exportMS.to_pandas(write_metadata=write_metadata)
    +1213
    +1214    def to_dataframe(self, additional_columns=None):
    +1215        """Return the mass spectrum as a Pandas dataframe.
    +1216
    +1217        Parameters
    +1218        ----------
    +1219        additional_columns : list, optional
    +1220            A list of additional columns to include in the dataframe. Defaults to None.
    +1221            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
    +1222
    +1223        Returns
    +1224        -------
    +1225        pandas.DataFrame
    +1226            The mass spectrum as a Pandas dataframe.
    +1227        """
    +1228        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1229
    +1230        exportMS = HighResMassSpecExport(self.filename, self)
    +1231        return exportMS.get_pandas_df(additional_columns=additional_columns)
    +1232
    +1233    def to_json(self):
    +1234        """Return the mass spectrum as a JSON file."""
    +1235        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1236
    +1237        exportMS = HighResMassSpecExport(self.filename, self)
    +1238        return exportMS.to_json()
    +1239
    +1240    def parameters_json(self):
    +1241        """Return the parameters of the mass spectrum as a JSON string."""
    +1242        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1243
    +1244        exportMS = HighResMassSpecExport(self.filename, self)
    +1245        return exportMS.parameters_to_json()
    +1246
    +1247    def parameters_toml(self):
    +1248        """Return the parameters of the mass spectrum as a TOML string."""
    +1249        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1250
    +1251        exportMS = HighResMassSpecExport(self.filename, self)
    +1252        return exportMS.parameters_to_toml()
     
    @@ -3334,7 +3477,7 @@
    Attributes
    Methods
      -
    • process_mass_spec(). Main function to process the mass spectrum, +
    • process_mass_spec(). Main function to process the mass spectrum, including calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
    @@ -3352,38 +3495,37 @@
    Methods
    -
    100    def __init__(self, mz_exp, abundance, d_params, **kwargs):
    -101        
    -102        self._abundance = array(abundance, dtype=float64)
    -103        self._mz_exp = array(mz_exp, dtype=float64)
    -104                    
    -105        # objects created after process_mass_spec() function
    -106        self._mspeaks = list()
    -107        self.mspeaks = list()
    -108        self._dict_nominal_masses_indexes = dict()
    -109        self._baseline_noise = 0.001
    -110        self._baseline_noise_std = 0.001
    -111        self._dynamic_range = None
    -112        # set to None: initialization occurs inside subclass MassSpecfromFreq
    -113        self._transient_settings = None
    -114        self._frequency_domain = None
    -115        self._mz_cal_profile = None
    -116        self.is_calibrated = False
    -117
    -118        self._set_parameters_objects(d_params)
    -119        self._init_settings()
    -120
    -121        self.is_centroid = False
    -122        self.has_frequency = False
    -123
    -124        self.calibration_order = None
    -125        self.calibration_points = None
    -126        self.calibration_ref_mzs = None
    -127        self.calibration_meas_mzs = None
    -128        self.calibration_RMS = None
    -129        self.calibration_segment = None
    -130        self.calibration_raw_error_median = None
    -131        self.calibration_raw_error_stdev = None
    +            
    106    def __init__(self, mz_exp, abundance, d_params, **kwargs):
    +107        self._abundance = array(abundance, dtype=float64)
    +108        self._mz_exp = array(mz_exp, dtype=float64)
    +109
    +110        # objects created after process_mass_spec() function
    +111        self._mspeaks = list()
    +112        self.mspeaks = list()
    +113        self._dict_nominal_masses_indexes = dict()
    +114        self._baseline_noise = 0.001
    +115        self._baseline_noise_std = 0.001
    +116        self._dynamic_range = None
    +117        # set to None: initialization occurs inside subclass MassSpecfromFreq
    +118        self._transient_settings = None
    +119        self._frequency_domain = None
    +120        self._mz_cal_profile = None
    +121        self.is_calibrated = False
    +122
    +123        self._set_parameters_objects(d_params)
    +124        self._init_settings()
    +125
    +126        self.is_centroid = False
    +127        self.has_frequency = False
    +128
    +129        self.calibration_order = None
    +130        self.calibration_points = None
    +131        self.calibration_ref_mzs = None
    +132        self.calibration_meas_mzs = None
    +133        self.calibration_RMS = None
    +134        self.calibration_segment = None
    +135        self.calibration_raw_error_median = None
    +136        self.calibration_raw_error_stdev = None
     
    @@ -3533,20 +3675,21 @@
    Methods
    -
    145    def set_indexes(self, list_indexes):
    -146        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
    -147
    -148        Parameters
    -149        ----------
    -150        list_indexes : list of int
    -151            A list of integers representing the indexes of the MSpeaks to iterate over.
    -152
    -153        """
    -154        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
    +            
    148    def set_indexes(self, list_indexes):
    +149        """Set the mass spectrum to iterate over only the selected MSpeaks indexes.
    +150
    +151        Parameters
    +152        ----------
    +153        list_indexes : list of int
    +154            A list of integers representing the indexes of the MSpeaks to iterate over.
     155
    -156        for i, mspeak in  enumerate(self.mspeaks): mspeak.index = i
    -157
    -158        self._set_nominal_masses_start_final_indexes()
    +156        """
    +157        self.mspeaks = [self._mspeaks[i] for i in list_indexes]
    +158
    +159        for i, mspeak in enumerate(self.mspeaks):
    +160            mspeak.index = i
    +161
    +162        self._set_nominal_masses_start_final_indexes()
     
    @@ -3573,18 +3716,19 @@
    Parameters
    -
    160    def reset_indexes(self):
    -161        """Reset the mass spectrum to iterate over all MSpeaks objects.
    -162
    -163        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
    -164        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
    -165
    -166        """
    -167        self.mspeaks = self._mspeaks
    -168
    -169        for i, mspeak in  enumerate(self.mspeaks): mspeak.index = i
    -170
    -171        self._set_nominal_masses_start_final_indexes()
    +            
    164    def reset_indexes(self):
    +165        """Reset the mass spectrum to iterate over all MSpeaks objects.
    +166
    +167        This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.
    +168        It also sets the index of each MSpeak object to its corresponding position in the mass spectrum.
    +169
    +170        """
    +171        self.mspeaks = self._mspeaks
    +172
    +173        for i, mspeak in enumerate(self.mspeaks):
    +174            mspeak.index = i
    +175
    +176        self._set_nominal_masses_start_final_indexes()
     
    @@ -3607,48 +3751,51 @@
    Parameters
    -
    173    def add_mspeak(self, ion_charge, mz_exp,
    -174                            abundance,
    -175                            resolving_power,
    -176                            signal_to_noise,
    -177                            massspec_indexes,
    -178                            exp_freq=None,
    -179                            ms_parent=None
    -180                        ):
    -181        """Add a new MSPeak object to the MassSpectrum object.
    -182
    -183        Parameters
    -184        ----------
    -185        ion_charge : int
    -186            The ion charge of the MSPeak.
    -187        mz_exp : float
    -188            The experimental m/z value of the MSPeak.
    -189        abundance : float
    -190            The abundance of the MSPeak.
    -191        resolving_power : float
    -192            The resolving power of the MSPeak.
    -193        signal_to_noise : float
    -194            The signal-to-noise ratio of the MSPeak.
    -195        massspec_indexes : list
    -196            A list of indexes of the MSPeak in the MassSpectrum object.
    -197        exp_freq : float, optional
    -198            The experimental frequency of the MSPeak. Defaults to None.
    -199        ms_parent : MSParent, optional
    -200            The MSParent object associated with the MSPeak. Defaults to None.
    -201        """
    -202        mspeak = MSPeak(
    -203                ion_charge,
    -204                mz_exp,
    -205                abundance,
    -206                resolving_power,
    -207                signal_to_noise,
    -208                massspec_indexes,
    -209                len(self._mspeaks),
    -210                exp_freq=exp_freq,
    -211                ms_parent=ms_parent,
    -212        )
    -213
    -214        self._mspeaks.append(mspeak)
    +            
    178    def add_mspeak(
    +179        self,
    +180        ion_charge,
    +181        mz_exp,
    +182        abundance,
    +183        resolving_power,
    +184        signal_to_noise,
    +185        massspec_indexes,
    +186        exp_freq=None,
    +187        ms_parent=None,
    +188    ):
    +189        """Add a new MSPeak object to the MassSpectrum object.
    +190
    +191        Parameters
    +192        ----------
    +193        ion_charge : int
    +194            The ion charge of the MSPeak.
    +195        mz_exp : float
    +196            The experimental m/z value of the MSPeak.
    +197        abundance : float
    +198            The abundance of the MSPeak.
    +199        resolving_power : float
    +200            The resolving power of the MSPeak.
    +201        signal_to_noise : float
    +202            The signal-to-noise ratio of the MSPeak.
    +203        massspec_indexes : list
    +204            A list of indexes of the MSPeak in the MassSpectrum object.
    +205        exp_freq : float, optional
    +206            The experimental frequency of the MSPeak. Defaults to None.
    +207        ms_parent : MSParent, optional
    +208            The MSParent object associated with the MSPeak. Defaults to None.
    +209        """
    +210        mspeak = MSPeak(
    +211            ion_charge,
    +212            mz_exp,
    +213            abundance,
    +214            resolving_power,
    +215            signal_to_noise,
    +216            massspec_indexes,
    +217            len(self._mspeaks),
    +218            exp_freq=exp_freq,
    +219            ms_parent=ms_parent,
    +220        )
    +221
    +222        self._mspeaks.append(mspeak)
     
    @@ -3689,26 +3836,26 @@
    Parameters
    -
    284    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
    -285        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
    -286
    -287        Parameters
    -288        ----------
    -289        Aterm : float
    -290            The A-term calibration coefficient.
    -291        Bterm : float
    -292            The B-term calibration coefficient.
    -293        C : float
    -294            The C-term calibration coefficient.
    -295        fas : float, optional
    -296            The frequency amplitude scaling factor. Default is 0.
    -297        """
    -298        self._calibration_terms = (Aterm, Bterm, C)
    -299
    -300        self._mz_exp = self._f_to_mz()
    -301        self._abundance = self._abundance
    -302        self.find_peaks()
    -303        self.reset_indexes()
    +            
    290    def reset_cal_therms(self, Aterm, Bterm, C, fas=0):
    +291        """Reset calibration terms and recalculate the mass-to-charge ratio and abundance.
    +292
    +293        Parameters
    +294        ----------
    +295        Aterm : float
    +296            The A-term calibration coefficient.
    +297        Bterm : float
    +298            The B-term calibration coefficient.
    +299        C : float
    +300            The C-term calibration coefficient.
    +301        fas : float, optional
    +302            The frequency amplitude scaling factor. Default is 0.
    +303        """
    +304        self._calibration_terms = (Aterm, Bterm, C)
    +305
    +306        self._mz_exp = self._f_to_mz()
    +307        self._abundance = self._abundance
    +308        self.find_peaks()
    +309        self.reset_indexes()
     
    @@ -3741,16 +3888,16 @@
    Parameters
    -
    305    def clear_molecular_formulas(self):
    -306        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
    -307
    -308        Returns
    -309        -------
    -310        numpy.ndarray
    -311            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
    -312        """
    -313        self.check_mspeaks()
    -314        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
    +            
    311    def clear_molecular_formulas(self):
    +312        """Clear the molecular formulas for all mspeaks in the MassSpectrum.
    +313
    +314        Returns
    +315        -------
    +316        numpy.ndarray
    +317            An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
    +318        """
    +319        self.check_mspeaks()
    +320        return array([mspeak.clear_molecular_formulas() for mspeak in self.mspeaks])
     
    @@ -3776,40 +3923,39 @@
    Returns
    -
    316    def process_mass_spec(self, keep_profile=True):
    -317        """Process the mass spectrum.
    -318
    -319        Parameters
    -320        ----------
    -321        keep_profile : bool, optional
    -322            Whether to keep the profile data after processing. Defaults to True.
    -323
    -324        Notes
    -325        -----
    -326        This method does the following:
    -327        - calculates the noise threshold
    -328        - does peak picking (creates mspeak_objs)
    -329        - resets the mspeak_obj indexes
    -330        """
    -331        
    -332        # if runned mannually make sure to rerun filter_by_noise_threshold     
    -333        # calculates noise threshold 
    -334        # do peak picking( create mspeak_objs) 
    -335        # reset mspeak_obj the indexes
    -336         
    -337        self.cal_noise_threshold()
    -338
    -339        self.find_peaks()
    -340        self.reset_indexes()
    -341
    -342        if self.mspeaks:
    -343            self._dynamic_range = self.max_abundance / self.min_abundance
    -344        else:
    -345            self._dynamic_range = 0
    -346        if not keep_profile:
    +            
    322    def process_mass_spec(self, keep_profile=True):
    +323        """Process the mass spectrum.
    +324
    +325        Parameters
    +326        ----------
    +327        keep_profile : bool, optional
    +328            Whether to keep the profile data after processing. Defaults to True.
    +329
    +330        Notes
    +331        -----
    +332        This method does the following:
    +333        - calculates the noise threshold
    +334        - does peak picking (creates mspeak_objs)
    +335        - resets the mspeak_obj indexes
    +336        """
    +337
    +338        # if runned mannually make sure to rerun filter_by_noise_threshold
    +339        # calculates noise threshold
    +340        # do peak picking( create mspeak_objs)
    +341        # reset mspeak_obj the indexes
    +342
    +343        self.cal_noise_threshold()
    +344
    +345        self.find_peaks()
    +346        self.reset_indexes()
     347
    -348            self._abundance *= 0
    -349            self._mz_exp *= 0
    +348        if self.mspeaks:
    +349            self._dynamic_range = self.max_abundance / self.min_abundance
    +350        else:
    +351            self._dynamic_range = 0
    +352        if not keep_profile:
    +353            self._abundance *= 0
    +354            self._mz_exp *= 0
     
    @@ -3846,21 +3992,21 @@
    Notes
    -
    352    def cal_noise_threshold(self):
    -353        """Calculate the noise threshold of the mass spectrum.
    -354
    -355        """
    -356
    -357        if self.label == Labels.simulated_profile:
    +            
    356    def cal_noise_threshold(self):
    +357        """Calculate the noise threshold of the mass spectrum."""
     358
    -359            self._baseline_noise, self._baseline_noise_std = 0.1, 1
    -360
    -361        if self.settings.noise_threshold_method == 'log':
    -362
    -363            self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
    -364
    -365        else:
    -366            self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
    +359        if self.label == Labels.simulated_profile:
    +360            self._baseline_noise, self._baseline_noise_std = 0.1, 1
    +361
    +362        if self.settings.noise_threshold_method == "log":
    +363            self._baseline_noise, self._baseline_noise_std = (
    +364                self.run_log_noise_threshold_calc()
    +365            )
    +366
    +367        else:
    +368            self._baseline_noise, self._baseline_noise_std = (
    +369                self.run_noise_threshold_calc()
    +370            )
     
    @@ -3893,15 +4039,15 @@
    Notes
    -
    377    def set_parameter_from_json(self, parameters_path):
    -378        """Set the parameters of the mass spectrum from a JSON file.
    -379        
    -380        Parameters
    -381        ----------
    -382        parameters_path : str
    -383            The path to the JSON file containing the parameters.
    -384        """
    -385        load_and_set_parameters_ms(self, parameters_path=parameters_path)    
    +            
    381    def set_parameter_from_json(self, parameters_path):
    +382        """Set the parameters of the mass spectrum from a JSON file.
    +383
    +384        Parameters
    +385        ----------
    +386        parameters_path : str
    +387            The path to the JSON file containing the parameters.
    +388        """
    +389        load_and_set_parameters_ms(self, parameters_path=parameters_path)
     
    @@ -3928,8 +4074,8 @@
    Parameters
    -
    387    def set_parameter_from_toml(self, parameters_path):
    -388        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)    
    +            
    391    def set_parameter_from_toml(self, parameters_path):
    +392        load_and_set_toml_parameters_ms(self, parameters_path=parameters_path)
     
    @@ -4116,10 +4262,10 @@
    Parameters
    -
    512    def freq_exp(self):
    -513        """Return the experimental frequency values of the mass spectrum."""
    -514        self.check_mspeaks()
    -515        return array([mspeak.freq_exp for mspeak in self.mspeaks])
    +            
    518    def freq_exp(self):
    +519        """Return the experimental frequency values of the mass spectrum."""
    +520        self.check_mspeaks()
    +521        return array([mspeak.freq_exp for mspeak in self.mspeaks])
     
    @@ -4176,10 +4322,10 @@
    Parameters
    -
    536    def get_mz_and_abundance_peaks_tuples(self):
    -537        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
    -538        self.check_mspeaks()
    -539        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
    +            
    542    def get_mz_and_abundance_peaks_tuples(self):
    +543        """Return a list of tuples containing the m/z and abundance values of the mass spectrum."""
    +544        self.check_mspeaks()
    +545        return [(mspeak.mz_exp, mspeak.abundance) for mspeak in self.mspeaks]
     
    @@ -4407,9 +4553,9 @@
    Parameters
    -
    632    def sort_by_mz(self):
    -633        """Sort the mass spectrum by m/z values."""
    -634        return sorted(self, key=lambda m: m.mz_exp)
    +            
    638    def sort_by_mz(self):
    +639        """Sort the mass spectrum by m/z values."""
    +640        return sorted(self, key=lambda m: m.mz_exp)
     
    @@ -4429,9 +4575,9 @@
    Parameters
    -
    636    def sort_by_abundance(self, reverse=False):
    -637        """Sort the mass spectrum by abundance values."""
    -638        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
    +            
    642    def sort_by_abundance(self, reverse=False):
    +643        """Sort the mass spectrum by abundance values."""
    +644        return sorted(self, key=lambda m: m.abundance, reverse=reverse)
     
    @@ -4464,21 +4610,20 @@
    Parameters
    -
    645    def check_mspeaks_warning(self):
    -646        """Check if the mass spectrum has MSpeaks objects.
    -647        
    -648        Raises
    -649        ------
    -650        Warning
    -651            If the mass spectrum has no MSpeaks objects.
    -652        """
    -653        import warnings
    -654        if self.mspeaks:
    -655            pass
    -656        else:
    -657            warnings.warn(
    -658                "mspeaks list is empty, continuing without filtering data"
    -659            )
    +            
    651    def check_mspeaks_warning(self):
    +652        """Check if the mass spectrum has MSpeaks objects.
    +653
    +654        Raises
    +655        ------
    +656        Warning
    +657            If the mass spectrum has no MSpeaks objects.
    +658        """
    +659        import warnings
    +660
    +661        if self.mspeaks:
    +662            pass
    +663        else:
    +664            warnings.warn("mspeaks list is empty, continuing without filtering data")
     
    @@ -4504,20 +4649,20 @@
    Raises
    -
    661    def check_mspeaks(self):
    -662        """Check if the mass spectrum has MSpeaks objects.
    -663
    -664        Raises
    -665        ------
    -666        Exception
    -667            If the mass spectrum has no MSpeaks objects.
    -668        """
    -669        if self.mspeaks:
    -670            pass
    -671        else:
    -672            raise Exception(
    -673                "mspeaks list is empty, please run process_mass_spec() first"
    -674            )
    +            
    666    def check_mspeaks(self):
    +667        """Check if the mass spectrum has MSpeaks objects.
    +668
    +669        Raises
    +670        ------
    +671        Exception
    +672            If the mass spectrum has no MSpeaks objects.
    +673        """
    +674        if self.mspeaks:
    +675            pass
    +676        else:
    +677            raise Exception(
    +678                "mspeaks list is empty, please run process_mass_spec() first"
    +679            )
     
    @@ -4543,15 +4688,16 @@
    Raises
    -
    676    def remove_assignment_by_index(self, indexes):
    -677        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
    -678
    -679        Parameters
    -680        ----------
    -681        indexes : list of int
    -682            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
    -683        """
    -684        for i in indexes: self.mspeaks[i].clear_molecular_formulas()
    +            
    681    def remove_assignment_by_index(self, indexes):
    +682        """Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.
    +683
    +684        Parameters
    +685        ----------
    +686        indexes : list of int
    +687            A list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
    +688        """
    +689        for i in indexes:
    +690            self.mspeaks[i].clear_molecular_formulas()
     
    @@ -4578,21 +4724,24 @@
    Parameters
    -
    686    def filter_by_index(self, list_indexes):
    -687        """Filter the mass spectrum by the specified indexes.
    -688
    -689        Parameters
    -690        ----------
    -691        list_indexes : list of int
    -692            A list of indexes of the MSpeaks objects to drop.
    -693
    -694        """
    -695
    -696        self.mspeaks = [self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes]
    -697
    -698        for i, mspeak in  enumerate(self.mspeaks): mspeak.index = i
    +            
    692    def filter_by_index(self, list_indexes):
    +693        """Filter the mass spectrum by the specified indexes.
    +694
    +695        Parameters
    +696        ----------
    +697        list_indexes : list of int
    +698            A list of indexes of the MSpeaks objects to drop.
     699
    -700        self._set_nominal_masses_start_final_indexes()
    +700        """
    +701
    +702        self.mspeaks = [
    +703            self.mspeaks[i] for i in range(len(self.mspeaks)) if i not in list_indexes
    +704        ]
    +705
    +706        for i, mspeak in enumerate(self.mspeaks):
    +707            mspeak.index = i
    +708
    +709        self._set_nominal_masses_start_final_indexes()
     
    @@ -4619,20 +4768,24 @@
    Parameters
    -
    702    def filter_by_mz(self, min_mz, max_mz):
    -703        """Filter the mass spectrum by the specified m/z range.
    -704
    -705        Parameters
    -706        ----------
    -707        min_mz : float
    -708            The minimum m/z value to keep.
    -709        max_mz : float
    -710            The maximum m/z value to keep.
    -711
    -712        """      
    -713        self.check_mspeaks_warning()
    -714        indexes = [index for index, mspeak in enumerate(self.mspeaks) if not min_mz <= mspeak.mz_exp <= max_mz]
    -715        self.filter_by_index(indexes)
    +            
    711    def filter_by_mz(self, min_mz, max_mz):
    +712        """Filter the mass spectrum by the specified m/z range.
    +713
    +714        Parameters
    +715        ----------
    +716        min_mz : float
    +717            The minimum m/z value to keep.
    +718        max_mz : float
    +719            The maximum m/z value to keep.
    +720
    +721        """
    +722        self.check_mspeaks_warning()
    +723        indexes = [
    +724            index
    +725            for index, mspeak in enumerate(self.mspeaks)
    +726            if not min_mz <= mspeak.mz_exp <= max_mz
    +727        ]
    +728        self.filter_by_index(indexes)
     
    @@ -4661,23 +4814,31 @@
    Parameters
    -
    717    def filter_by_s2n(self, min_s2n, max_s2n=False):
    -718        """Filter the mass spectrum by the specified signal-to-noise ratio range.
    -719
    -720        Parameters
    -721        ----------
    -722        min_s2n : float
    -723            The minimum signal-to-noise ratio to keep.
    -724        max_s2n : float, optional
    -725            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
    -726
    -727        """
    -728        self.check_mspeaks_warning()
    -729        if max_s2n:
    -730            indexes = [index for index, mspeak in enumerate(self.mspeaks) if not min_s2n <= mspeak.signal_to_noise <= max_s2n ]
    -731        else:
    -732            indexes = [index for index, mspeak in enumerate(self.mspeaks) if mspeak.signal_to_noise <= min_s2n ]
    -733        self.filter_by_index(indexes)
    +            
    730    def filter_by_s2n(self, min_s2n, max_s2n=False):
    +731        """Filter the mass spectrum by the specified signal-to-noise ratio range.
    +732
    +733        Parameters
    +734        ----------
    +735        min_s2n : float
    +736            The minimum signal-to-noise ratio to keep.
    +737        max_s2n : float, optional
    +738            The maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
    +739
    +740        """
    +741        self.check_mspeaks_warning()
    +742        if max_s2n:
    +743            indexes = [
    +744                index
    +745                for index, mspeak in enumerate(self.mspeaks)
    +746                if not min_s2n <= mspeak.signal_to_noise <= max_s2n
    +747            ]
    +748        else:
    +749            indexes = [
    +750                index
    +751                for index, mspeak in enumerate(self.mspeaks)
    +752                if mspeak.signal_to_noise <= min_s2n
    +753            ]
    +754        self.filter_by_index(indexes)
     
    @@ -4706,23 +4867,31 @@
    Parameters
    -
    735    def filter_by_abundance(self, min_abund, max_abund=False):
    -736        """Filter the mass spectrum by the specified abundance range.
    -737
    -738        Parameters
    -739        ----------
    -740        min_abund : float
    -741            The minimum abundance to keep.
    -742        max_abund : float, optional
    -743            The maximum abundance to keep. Defaults to False (no maximum).
    -744
    -745        """
    -746        self.check_mspeaks_warning()
    -747        if max_abund:
    -748            indexes = [index for index, mspeak in enumerate(self.mspeaks) if not min_abund <= mspeak.abundance <= max_abund]
    -749        else:
    -750            indexes = [index for index, mspeak in enumerate(self.mspeaks) if mspeak.abundance <= min_abund]
    -751        self.filter_by_index(indexes)
    +            
    756    def filter_by_abundance(self, min_abund, max_abund=False):
    +757        """Filter the mass spectrum by the specified abundance range.
    +758
    +759        Parameters
    +760        ----------
    +761        min_abund : float
    +762            The minimum abundance to keep.
    +763        max_abund : float, optional
    +764            The maximum abundance to keep. Defaults to False (no maximum).
    +765
    +766        """
    +767        self.check_mspeaks_warning()
    +768        if max_abund:
    +769            indexes = [
    +770                index
    +771                for index, mspeak in enumerate(self.mspeaks)
    +772                if not min_abund <= mspeak.abundance <= max_abund
    +773            ]
    +774        else:
    +775            indexes = [
    +776                index
    +777                for index, mspeak in enumerate(self.mspeaks)
    +778                if mspeak.abundance <= min_abund
    +779            ]
    +780        self.filter_by_index(indexes)
     
    @@ -4751,22 +4920,26 @@
    Parameters
    -
    753    def filter_by_max_resolving_power(self, B, T):
    -754        """Filter the mass spectrum by the specified maximum resolving power.
    -755        
    -756        Parameters
    -757        ----------
    -758        B : float
    -759        T : float
    -760        
    -761        """
    -762
    -763        rpe = lambda m, z: (1.274e7 * z * B * T)/(m*z)
    -764
    -765        self.check_mspeaks_warning()
    -766
    -767        indexes_to_remove = [index for index, mspeak in enumerate(self.mspeaks) if  mspeak.resolving_power >= rpe(mspeak.mz_exp,mspeak.ion_charge)]
    -768        self.filter_by_index(indexes_to_remove)
    +            
    782    def filter_by_max_resolving_power(self, B, T):
    +783        """Filter the mass spectrum by the specified maximum resolving power.
    +784
    +785        Parameters
    +786        ----------
    +787        B : float
    +788        T : float
    +789
    +790        """
    +791
    +792        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
    +793
    +794        self.check_mspeaks_warning()
    +795
    +796        indexes_to_remove = [
    +797            index
    +798            for index, mspeak in enumerate(self.mspeaks)
    +799            if mspeak.resolving_power >= rpe(mspeak.mz_exp, mspeak.ion_charge)
    +800        ]
    +801        self.filter_by_index(indexes_to_remove)
     
    @@ -4793,22 +4966,26 @@
    Parameters
    -
    770    def filter_by_mean_resolving_power(self, ndeviations=3,plot=False,guess_pars=False):
    -771        """Filter the mass spectrum by the specified mean resolving power.
    -772
    -773        Parameters
    -774        ----------
    -775        ndeviations : float, optional
    -776            The number of standard deviations to use for filtering. Defaults to 3.
    -777        plot : bool, optional
    -778            Whether to plot the resolving power distribution. Defaults to False.
    -779        guess_pars : bool, optional
    -780            Whether to guess the parameters for the Gaussian model. Defaults to False.
    -781
    -782        """
    -783        self.check_mspeaks_warning()
    -784        indexes_to_remove = MeanResolvingPowerFilter(self,ndeviations,plot,guess_pars).main()
    -785        self.filter_by_index(indexes_to_remove)
    +            
    803    def filter_by_mean_resolving_power(
    +804        self, ndeviations=3, plot=False, guess_pars=False
    +805    ):
    +806        """Filter the mass spectrum by the specified mean resolving power.
    +807
    +808        Parameters
    +809        ----------
    +810        ndeviations : float, optional
    +811            The number of standard deviations to use for filtering. Defaults to 3.
    +812        plot : bool, optional
    +813            Whether to plot the resolving power distribution. Defaults to False.
    +814        guess_pars : bool, optional
    +815            Whether to guess the parameters for the Gaussian model. Defaults to False.
    +816
    +817        """
    +818        self.check_mspeaks_warning()
    +819        indexes_to_remove = MeanResolvingPowerFilter(
    +820            self, ndeviations, plot, guess_pars
    +821        ).main()
    +822        self.filter_by_index(indexes_to_remove)
     
    @@ -4839,21 +5016,25 @@
    Parameters
    -
    788    def filter_by_min_resolving_power(self, B, T):
    -789        """Filter the mass spectrum by the specified minimum resolving power.
    -790
    -791        Parameters
    -792        ----------
    -793        B : float
    -794        T : float
    -795
    -796        """
    -797        rpe = lambda m, z: (1.274e7 * z * B * T)/(m*z)
    -798
    -799        self.check_mspeaks_warning()
    -800
    -801        indexes_to_remove = [index for index, mspeak in enumerate(self.mspeaks) if  mspeak.resolving_power <= rpe(mspeak.mz_exp,mspeak.ion_charge)]
    -802        self.filter_by_index(indexes_to_remove)
    +            
    824    def filter_by_min_resolving_power(self, B, T):
    +825        """Filter the mass spectrum by the specified minimum resolving power.
    +826
    +827        Parameters
    +828        ----------
    +829        B : float
    +830        T : float
    +831
    +832        """
    +833        rpe = lambda m, z: (1.274e7 * z * B * T) / (m * z)
    +834
    +835        self.check_mspeaks_warning()
    +836
    +837        indexes_to_remove = [
    +838            index
    +839            for index, mspeak in enumerate(self.mspeaks)
    +840            if mspeak.resolving_power <= rpe(mspeak.mz_exp, mspeak.ion_charge)
    +841        ]
    +842        self.filter_by_index(indexes_to_remove)
     
    @@ -4880,15 +5061,19 @@
    Parameters
    -
    804    def filter_by_noise_threshold(self):
    -805        """Filter the mass spectrum by the noise threshold."""
    -806        
    -807        threshold = self.get_noise_threshold()[1][0]
    -808        
    -809        self.check_mspeaks_warning()
    -810        
    -811        indexes_to_remove = [index for index, mspeak in enumerate(self.mspeaks) if  mspeak.abundance <= threshold]
    -812        self.filter_by_index(indexes_to_remove)
    +            
    844    def filter_by_noise_threshold(self):
    +845        """Filter the mass spectrum by the noise threshold."""
    +846
    +847        threshold = self.get_noise_threshold()[1][0]
    +848
    +849        self.check_mspeaks_warning()
    +850
    +851        indexes_to_remove = [
    +852            index
    +853            for index, mspeak in enumerate(self.mspeaks)
    +854            if mspeak.abundance <= threshold
    +855        ]
    +856        self.filter_by_index(indexes_to_remove)
     
    @@ -4908,14 +5093,14 @@
    Parameters
    -
    815    def find_peaks(self):
    -816        """Find the peaks of the mass spectrum."""
    -817        #needs to clear previous results from peak_picking
    -818        self._mspeaks = list()
    -819
    -820        #then do peak picking
    -821        self.do_peak_picking()
    -822        # print("A total of %i peaks were found" % len(self._mspeaks))
    +            
    858    def find_peaks(self):
    +859        """Find the peaks of the mass spectrum."""
    +860        # needs to clear previous results from peak_picking
    +861        self._mspeaks = list()
    +862
    +863        # then do peak picking
    +864        self.do_peak_picking()
    +865        # print("A total of %i peaks were found" % len(self._mspeaks))
     
    @@ -4935,23 +5120,22 @@
    Parameters
    -
    824    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
    -825        """Change the Kendrick base of all MSpeaks objects.
    -826
    -827        Parameters
    -828        ----------
    -829        kendrick_dict_base : dict
    -830            A dictionary of the Kendrick base to change to.
    -831
    -832        Notes
    -833        -----
    -834        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
    -835        """
    -836        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
    -837
    -838        for mspeak in self.mspeaks:
    -839
    -840            mspeak.change_kendrick_base(kendrick_dict_base)
    +            
    867    def change_kendrick_base_all_mspeaks(self, kendrick_dict_base):
    +868        """Change the Kendrick base of all MSpeaks objects.
    +869
    +870        Parameters
    +871        ----------
    +872        kendrick_dict_base : dict
    +873            A dictionary of the Kendrick base to change to.
    +874
    +875        Notes
    +876        -----
    +877        Example of kendrick_dict_base parameter: kendrick_dict_base = {"C": 1, "H": 2} or {"C": 1, "H": 1, "O":1} etc
    +878        """
    +879        self.parameters.ms_peak.kendrick_base = kendrick_dict_base
    +880
    +881        for mspeak in self.mspeaks:
    +882            mspeak.change_kendrick_base(kendrick_dict_base)
     
    @@ -4982,32 +5166,35 @@
    Notes
    -
    842    def get_nominal_mz_first_last_indexes(self, nominal_mass):
    -843        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
    -844
    -845        Parameters
    -846        ----------
    -847        nominal_mass : int
    -848            The nominal mass to get the indexes for.
    -849
    -850        Returns
    -851        -------
    -852        tuple
    -853            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
    -854        """
    -855        if self._dict_nominal_masses_indexes:
    -856
    -857            if nominal_mass in self._dict_nominal_masses_indexes.keys():
    -858
    -859                return (self._dict_nominal_masses_indexes.get(nominal_mass)[0], self._dict_nominal_masses_indexes.get(nominal_mass)[1]+1)
    -860
    -861            else:
    -862                # import warnings
    -863                # uncomment warn to distribution
    -864                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
    -865                return (0, 0)
    -866        else:
    -867            raise Exception("run process_mass_spec() function before trying to access the data")
    +            
    884    def get_nominal_mz_first_last_indexes(self, nominal_mass):
    +885        """Return the first and last indexes of the MSpeaks objects with the specified nominal mass.
    +886
    +887        Parameters
    +888        ----------
    +889        nominal_mass : int
    +890            The nominal mass to get the indexes for.
    +891
    +892        Returns
    +893        -------
    +894        tuple
    +895            A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
    +896        """
    +897        if self._dict_nominal_masses_indexes:
    +898            if nominal_mass in self._dict_nominal_masses_indexes.keys():
    +899                return (
    +900                    self._dict_nominal_masses_indexes.get(nominal_mass)[0],
    +901                    self._dict_nominal_masses_indexes.get(nominal_mass)[1] + 1,
    +902                )
    +903
    +904            else:
    +905                # import warnings
    +906                # uncomment warn to distribution
    +907                # warnings.warn("Nominal mass not found in _dict_nominal_masses_indexes, returning (0, 0) for nominal mass %i"%nominal_mass)
    +908                return (0, 0)
    +909        else:
    +910            raise Exception(
    +911                "run process_mass_spec() function before trying to access the data"
    +912            )
     
    @@ -5040,18 +5227,20 @@
    Returns
    -
    869    def get_masses_count_by_nominal_mass(self):
    -870        """Return a dictionary of the nominal masses and their counts."""
    -871
    -872        dict_nominal_masses_count = {}
    -873
    -874        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    -875
    -876        for nominal_mass in all_nominal_masses:
    -877            if nominal_mass not in dict_nominal_masses_count:
    -878                dict_nominal_masses_count[nominal_mass] = len(list(self.get_nominal_mass_indexes(nominal_mass)))
    -879
    -880        return dict_nominal_masses_count
    +            
    914    def get_masses_count_by_nominal_mass(self):
    +915        """Return a dictionary of the nominal masses and their counts."""
    +916
    +917        dict_nominal_masses_count = {}
    +918
    +919        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    +920
    +921        for nominal_mass in all_nominal_masses:
    +922            if nominal_mass not in dict_nominal_masses_count:
    +923                dict_nominal_masses_count[nominal_mass] = len(
    +924                    list(self.get_nominal_mass_indexes(nominal_mass))
    +925                )
    +926
    +927        return dict_nominal_masses_count
     
    @@ -5071,36 +5260,36 @@
    Returns
    -
    882    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
    -883        """Return a dictionary of the nominal masses and their counts.
    -884
    -885        Parameters
    -886        ----------
    -887        mz_overlay : float, optional
    -888            The m/z overlay to use for counting. Defaults to 0.1.
    -889
    -890        Returns
    -891        -------
    -892        dict
    -893            A dictionary of the nominal masses and their counts.
    -894        """
    -895        dict_nominal_masses_count ={}
    -896
    -897        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    -898
    -899        for nominal_mass in all_nominal_masses:
    -900
    -901            if nominal_mass not in dict_nominal_masses_count:
    -902
    -903                min_mz = nominal_mass - mz_overlay
    -904
    -905                max_mz = nominal_mass + 1 + mz_overlay
    -906
    -907                indexes = indexes = where((self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)) 
    -908
    -909                dict_nominal_masses_count[nominal_mass] = indexes[0].size
    -910
    -911        return dict_nominal_masses_count
    +            
    929    def datapoints_count_by_nominal_mz(self, mz_overlay=0.1):
    +930        """Return a dictionary of the nominal masses and their counts.
    +931
    +932        Parameters
    +933        ----------
    +934        mz_overlay : float, optional
    +935            The m/z overlay to use for counting. Defaults to 0.1.
    +936
    +937        Returns
    +938        -------
    +939        dict
    +940            A dictionary of the nominal masses and their counts.
    +941        """
    +942        dict_nominal_masses_count = {}
    +943
    +944        all_nominal_masses = list(set([i.nominal_mz_exp for i in self.mspeaks]))
    +945
    +946        for nominal_mass in all_nominal_masses:
    +947            if nominal_mass not in dict_nominal_masses_count:
    +948                min_mz = nominal_mass - mz_overlay
    +949
    +950                max_mz = nominal_mass + 1 + mz_overlay
    +951
    +952                indexes = indexes = where(
    +953                    (self.mz_exp_profile > min_mz) & (self.mz_exp_profile < max_mz)
    +954                )
    +955
    +956                dict_nominal_masses_count[nominal_mass] = indexes[0].size
    +957
    +958        return dict_nominal_masses_count
     
    @@ -5133,28 +5322,32 @@
    Returns
    -
    913    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
    -914        """Return the indexes of the MSpeaks objects with the specified nominal mass.
    -915
    -916        Parameters
    -917        ----------
    -918        nominal_mass : int
    -919            The nominal mass to get the indexes for.
    -920        overlay : float, optional
    -921            The m/z overlay to use for counting. Defaults to 0.1.
    -922
    -923        Returns
    -924        -------
    -925        generator
    -926            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
    -927        """       
    -928        min_mz_to_look = nominal_mass - overlay
    -929        max_mz_to_look = nominal_mass + 1 + overlay
    -930
    -931        return (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
    -932
    -933        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
    -934        # return indexes
    +            
    960    def get_nominal_mass_indexes(self, nominal_mass, overlay=0.1):
    +961        """Return the indexes of the MSpeaks objects with the specified nominal mass.
    +962
    +963        Parameters
    +964        ----------
    +965        nominal_mass : int
    +966            The nominal mass to get the indexes for.
    +967        overlay : float, optional
    +968            The m/z overlay to use for counting. Defaults to 0.1.
    +969
    +970        Returns
    +971        -------
    +972        generator
    +973            A generator of the indexes of the MSpeaks objects with the specified nominal mass.
    +974        """
    +975        min_mz_to_look = nominal_mass - overlay
    +976        max_mz_to_look = nominal_mass + 1 + overlay
    +977
    +978        return (
    +979            i
    +980            for i in range(len(self.mspeaks))
    +981            if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look
    +982        )
    +983
    +984        # indexes = (i for i in range(len(self.mspeaks)) if min_mz_to_look <= self.mspeaks[i].mz_exp <= max_mz_to_look)
    +985        # return indexes
     
    @@ -5189,54 +5382,55 @@
    Returns
    -
     962    def plot_centroid(self, ax=None, c='g'):
    - 963        """Plot the centroid data of the mass spectrum.
    - 964
    - 965        Parameters
    - 966        ----------
    - 967        ax : matplotlib.axes.Axes, optional
    - 968            The matplotlib axes to plot on. Defaults to None.
    - 969        c : str, optional
    - 970            The color to use for the plot. Defaults to 'g' (green).
    - 971
    - 972        Returns
    - 973        -------
    - 974        matplotlib.axes.Axes
    - 975            The matplotlib axes containing the plot.
    - 976
    - 977        Raises
    - 978        ------
    - 979        Exception
    - 980            If no centroid data is found.
    - 981        """
    - 982
    - 983        import matplotlib.pyplot as plt
    - 984        if self._mspeaks:
    - 985
    - 986            if ax is None:
    - 987                ax = plt.gca()
    - 988
    - 989            markerline_a, stemlines_a, baseline_a = ax.stem(self.mz_exp, self.abundance, linefmt='-', markerfmt=" ")
    - 990
    - 991            plt.setp(markerline_a, 'color', c, 'linewidth', 2)
    - 992            plt.setp(stemlines_a, 'color', c, 'linewidth', 2)
    - 993            plt.setp(baseline_a, 'color', c, 'linewidth', 2)
    - 994
    - 995            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    - 996            ax.set_ylabel('Abundance', fontsize=12)
    - 997            ax.tick_params(axis='both', which='major', labelsize=12)
    - 998
    - 999            ax.axes.spines['top'].set_visible(False)
    -1000            ax.axes.spines['right'].set_visible(False)
    -1001
    -1002            ax.get_yaxis().set_visible(False)
    -1003            ax.spines['left'].set_visible(False)
    -1004
    -1005        else:
    -1006
    -1007            raise Exception("No centroid data found, please run process_mass_spec")
    -1008
    -1009        return ax
    +            
    1012    def plot_centroid(self, ax=None, c="g"):
    +1013        """Plot the centroid data of the mass spectrum.
    +1014
    +1015        Parameters
    +1016        ----------
    +1017        ax : matplotlib.axes.Axes, optional
    +1018            The matplotlib axes to plot on. Defaults to None.
    +1019        c : str, optional
    +1020            The color to use for the plot. Defaults to 'g' (green).
    +1021
    +1022        Returns
    +1023        -------
    +1024        matplotlib.axes.Axes
    +1025            The matplotlib axes containing the plot.
    +1026
    +1027        Raises
    +1028        ------
    +1029        Exception
    +1030            If no centroid data is found.
    +1031        """
    +1032
    +1033        import matplotlib.pyplot as plt
    +1034
    +1035        if self._mspeaks:
    +1036            if ax is None:
    +1037                ax = plt.gca()
    +1038
    +1039            markerline_a, stemlines_a, baseline_a = ax.stem(
    +1040                self.mz_exp, self.abundance, linefmt="-", markerfmt=" "
    +1041            )
    +1042
    +1043            plt.setp(markerline_a, "color", c, "linewidth", 2)
    +1044            plt.setp(stemlines_a, "color", c, "linewidth", 2)
    +1045            plt.setp(baseline_a, "color", c, "linewidth", 2)
    +1046
    +1047            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +1048            ax.set_ylabel("Abundance", fontsize=12)
    +1049            ax.tick_params(axis="both", which="major", labelsize=12)
    +1050
    +1051            ax.axes.spines["top"].set_visible(False)
    +1052            ax.axes.spines["right"].set_visible(False)
    +1053
    +1054            ax.get_yaxis().set_visible(False)
    +1055            ax.spines["left"].set_visible(False)
    +1056
    +1057        else:
    +1058            raise Exception("No centroid data found, please run process_mass_spec")
    +1059
    +1060        return ax
     
    @@ -5277,60 +5471,64 @@
    Raises
    -
    1011    def plot_profile_and_noise_threshold(self, ax=None,legend=False): 
    -1012        """Plot the profile data and noise threshold of the mass spectrum.
    -1013
    -1014        Parameters
    -1015        ----------
    -1016        ax : matplotlib.axes.Axes, optional
    -1017            The matplotlib axes to plot on. Defaults to None.
    -1018        legend : bool, optional
    -1019            Whether to show the legend. Defaults to False.
    -1020
    -1021        Returns
    -1022        -------
    -1023        matplotlib.axes.Axes
    -1024            The matplotlib axes containing the plot.
    -1025
    -1026        Raises
    -1027        ------
    -1028        Exception
    -1029            If no noise threshold is found.
    -1030        """
    -1031        import matplotlib.pyplot as plt
    -1032        if self.baseline_noise_std and self.baseline_noise_std:
    -1033
    -1034            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    -1035            baseline = (self.baseline_noise, self.baseline_noise)
    -1036
    -1037            # std = self.parameters.mass_spectrum.noise_threshold_min_std
    -1038            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
    -1039            x, y = self.get_noise_threshold()    
    -1040            
    -1041            if ax is None:
    -1042                ax = plt.gca()
    -1043            
    -1044            ax.plot(self.mz_exp_profile, self.abundance_profile, color="green",label="Spectrum")
    -1045            ax.plot(x, (baseline, baseline), color="yellow",label="Baseline Noise")
    -1046            ax.plot(x, y, color="red",label="Noise Threshold")
    -1047
    -1048            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    -1049            ax.set_ylabel('Abundance', fontsize=12)
    -1050            ax.tick_params(axis='both', which='major', labelsize=12)
    -1051
    -1052            ax.axes.spines['top'].set_visible(False)
    -1053            ax.axes.spines['right'].set_visible(False)
    -1054
    -1055            ax.get_yaxis().set_visible(False)
    -1056            ax.spines['left'].set_visible(False)
    -1057            if legend:
    -1058                ax.legend()
    -1059
    -1060        else:
    -1061
    -1062            raise Exception("Calculate noise threshold first")
    -1063
    -1064        return ax
    +            
    1062    def plot_profile_and_noise_threshold(self, ax=None, legend=False):
    +1063        """Plot the profile data and noise threshold of the mass spectrum.
    +1064
    +1065        Parameters
    +1066        ----------
    +1067        ax : matplotlib.axes.Axes, optional
    +1068            The matplotlib axes to plot on. Defaults to None.
    +1069        legend : bool, optional
    +1070            Whether to show the legend. Defaults to False.
    +1071
    +1072        Returns
    +1073        -------
    +1074        matplotlib.axes.Axes
    +1075            The matplotlib axes containing the plot.
    +1076
    +1077        Raises
    +1078        ------
    +1079        Exception
    +1080            If no noise threshold is found.
    +1081        """
    +1082        import matplotlib.pyplot as plt
    +1083
    +1084        if self.baseline_noise_std and self.baseline_noise_std:
    +1085            # x = (self.mz_exp_profile.min(), self.mz_exp_profile.max())
    +1086            baseline = (self.baseline_noise, self.baseline_noise)
    +1087
    +1088            # std = self.parameters.mass_spectrum.noise_threshold_min_std
    +1089            # threshold = self.baseline_noise_std + (std * self.baseline_noise_std)
    +1090            x, y = self.get_noise_threshold()
    +1091
    +1092            if ax is None:
    +1093                ax = plt.gca()
    +1094
    +1095            ax.plot(
    +1096                self.mz_exp_profile,
    +1097                self.abundance_profile,
    +1098                color="green",
    +1099                label="Spectrum",
    +1100            )
    +1101            ax.plot(x, (baseline, baseline), color="yellow", label="Baseline Noise")
    +1102            ax.plot(x, y, color="red", label="Noise Threshold")
    +1103
    +1104            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +1105            ax.set_ylabel("Abundance", fontsize=12)
    +1106            ax.tick_params(axis="both", which="major", labelsize=12)
    +1107
    +1108            ax.axes.spines["top"].set_visible(False)
    +1109            ax.axes.spines["right"].set_visible(False)
    +1110
    +1111            ax.get_yaxis().set_visible(False)
    +1112            ax.spines["left"].set_visible(False)
    +1113            if legend:
    +1114                ax.legend()
    +1115
    +1116        else:
    +1117            raise Exception("Calculate noise threshold first")
    +1118
    +1119        return ax
     
    @@ -5371,30 +5569,30 @@
    Raises
    -
    1066    def plot_mz_domain_profile(self, color='green', ax=None): 
    -1067        """Plot the m/z domain profile of the mass spectrum.
    -1068
    -1069        Parameters
    -1070        ----------
    -1071        color : str, optional
    -1072            The color to use for the plot. Defaults to 'green'.
    -1073        ax : matplotlib.axes.Axes, optional
    -1074            The matplotlib axes to plot on. Defaults to None.
    -1075
    -1076        Returns
    -1077        -------
    -1078        matplotlib.axes.Axes
    -1079            The matplotlib axes containing the plot.
    -1080        """       
    -1081
    -1082        import matplotlib.pyplot as plt
    -1083
    -1084        if ax is None:
    -1085            ax = plt.gca()
    -1086        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
    -1087        ax.set(xlabel='m/z', ylabel='abundance')
    -1088
    -1089        return ax
    +            
    1121    def plot_mz_domain_profile(self, color="green", ax=None):
    +1122        """Plot the m/z domain profile of the mass spectrum.
    +1123
    +1124        Parameters
    +1125        ----------
    +1126        color : str, optional
    +1127            The color to use for the plot. Defaults to 'green'.
    +1128        ax : matplotlib.axes.Axes, optional
    +1129            The matplotlib axes to plot on. Defaults to None.
    +1130
    +1131        Returns
    +1132        -------
    +1133        matplotlib.axes.Axes
    +1134            The matplotlib axes containing the plot.
    +1135        """
    +1136
    +1137        import matplotlib.pyplot as plt
    +1138
    +1139        if ax is None:
    +1140            ax = plt.gca()
    +1141        ax.plot(self.mz_exp_profile, self.abundance_profile, color=color)
    +1142        ax.set(xlabel="m/z", ylabel="abundance")
    +1143
    +1144        return ax
     
    @@ -5429,23 +5627,24 @@
    Returns
    -
    1091    def to_excel(self, out_file_path, write_metadata=True):
    -1092        """Export the mass spectrum to an Excel file.
    -1093
    -1094        Parameters
    -1095        ----------
    -1096        out_file_path : str
    -1097            The path to the Excel file to export to.
    -1098        write_metadata : bool, optional
    -1099            Whether to write the metadata to the Excel file. Defaults to True.
    -1100
    -1101        Returns
    -1102        -------
    -1103        None
    -1104        """
    -1105        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1106        exportMS = HighResMassSpecExport(out_file_path, self)
    -1107        exportMS.to_excel(write_metadata=write_metadata)
    +            
    1146    def to_excel(self, out_file_path, write_metadata=True):
    +1147        """Export the mass spectrum to an Excel file.
    +1148
    +1149        Parameters
    +1150        ----------
    +1151        out_file_path : str
    +1152            The path to the Excel file to export to.
    +1153        write_metadata : bool, optional
    +1154            Whether to write the metadata to the Excel file. Defaults to True.
    +1155
    +1156        Returns
    +1157        -------
    +1158        None
    +1159        """
    +1160        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1161
    +1162        exportMS = HighResMassSpecExport(out_file_path, self)
    +1163        exportMS.to_excel(write_metadata=write_metadata)
     
    @@ -5480,21 +5679,22 @@
    Returns
    -
    1109    def to_hdf(self, out_file_path):
    -1110        """Export the mass spectrum to an HDF file.
    -1111
    -1112        Parameters
    -1113        ----------
    -1114        out_file_path : str
    -1115            The path to the HDF file to export to.
    -1116
    -1117        Returns
    -1118        -------
    -1119        None
    -1120        """
    -1121        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1122        exportMS = HighResMassSpecExport(out_file_path, self)
    -1123        exportMS.to_hdf()
    +            
    1165    def to_hdf(self, out_file_path):
    +1166        """Export the mass spectrum to an HDF file.
    +1167
    +1168        Parameters
    +1169        ----------
    +1170        out_file_path : str
    +1171            The path to the HDF file to export to.
    +1172
    +1173        Returns
    +1174        -------
    +1175        None
    +1176        """
    +1177        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1178
    +1179        exportMS = HighResMassSpecExport(out_file_path, self)
    +1180        exportMS.to_hdf()
     
    @@ -5527,20 +5727,21 @@
    Returns
    -
    1125    def to_csv(self, out_file_path, write_metadata=True):
    -1126        """Export the mass spectrum to a CSV file.
    -1127        
    -1128        Parameters
    -1129        ----------
    -1130        out_file_path : str
    -1131            The path to the CSV file to export to.
    -1132        write_metadata : bool, optional
    -1133            Whether to write the metadata to the CSV file. Defaults to True.
    -1134        
    -1135        """
    -1136        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1137        exportMS = HighResMassSpecExport(out_file_path, self)
    -1138        exportMS.to_csv(write_metadata=write_metadata)
    +            
    1182    def to_csv(self, out_file_path, write_metadata=True):
    +1183        """Export the mass spectrum to a CSV file.
    +1184
    +1185        Parameters
    +1186        ----------
    +1187        out_file_path : str
    +1188            The path to the CSV file to export to.
    +1189        write_metadata : bool, optional
    +1190            Whether to write the metadata to the CSV file. Defaults to True.
    +1191
    +1192        """
    +1193        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1194
    +1195        exportMS = HighResMassSpecExport(out_file_path, self)
    +1196        exportMS.to_csv(write_metadata=write_metadata)
     
    @@ -5569,20 +5770,21 @@
    Parameters
    -
    1140    def to_pandas(self, out_file_path, write_metadata=True):
    -1141        """Export the mass spectrum to a Pandas dataframe with pkl extension.
    -1142
    -1143        Parameters
    -1144        ----------
    -1145        out_file_path : str
    -1146            The path to the CSV file to export to.
    -1147        write_metadata : bool, optional
    -1148            Whether to write the metadata to the CSV file. Defaults to True.
    -1149
    -1150        """
    -1151        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1152        exportMS = HighResMassSpecExport(out_file_path, self)
    -1153        exportMS.to_pandas(write_metadata=write_metadata)
    +            
    1198    def to_pandas(self, out_file_path, write_metadata=True):
    +1199        """Export the mass spectrum to a Pandas dataframe with pkl extension.
    +1200
    +1201        Parameters
    +1202        ----------
    +1203        out_file_path : str
    +1204            The path to the CSV file to export to.
    +1205        write_metadata : bool, optional
    +1206            Whether to write the metadata to the CSV file. Defaults to True.
    +1207
    +1208        """
    +1209        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1210
    +1211        exportMS = HighResMassSpecExport(out_file_path, self)
    +1212        exportMS.to_pandas(write_metadata=write_metadata)
     
    @@ -5611,23 +5813,24 @@
    Parameters
    -
    1155    def to_dataframe(self, additional_columns=None):
    -1156        """Return the mass spectrum as a Pandas dataframe.
    -1157
    -1158        Parameters
    -1159        ----------
    -1160        additional_columns : list, optional
    -1161            A list of additional columns to include in the dataframe. Defaults to None.
    -1162            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
    -1163        
    -1164        Returns
    -1165        -------
    -1166        pandas.DataFrame
    -1167            The mass spectrum as a Pandas dataframe.
    -1168        """
    -1169        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1170        exportMS = HighResMassSpecExport(self.filename, self)
    -1171        return exportMS.get_pandas_df(additional_columns = additional_columns)
    +            
    1214    def to_dataframe(self, additional_columns=None):
    +1215        """Return the mass spectrum as a Pandas dataframe.
    +1216
    +1217        Parameters
    +1218        ----------
    +1219        additional_columns : list, optional
    +1220            A list of additional columns to include in the dataframe. Defaults to None.
    +1221            Suitable columns are: "Aromaticity Index", "Aromaticity Index (modified)", and "NOSC"
    +1222
    +1223        Returns
    +1224        -------
    +1225        pandas.DataFrame
    +1226            The mass spectrum as a Pandas dataframe.
    +1227        """
    +1228        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1229
    +1230        exportMS = HighResMassSpecExport(self.filename, self)
    +1231        return exportMS.get_pandas_df(additional_columns=additional_columns)
     
    @@ -5661,11 +5864,12 @@
    Returns
    -
    1173    def to_json(self):
    -1174        """Return the mass spectrum as a JSON file."""
    -1175        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1176        exportMS = HighResMassSpecExport(self.filename, self)
    -1177        return exportMS.to_json()
    +            
    1233    def to_json(self):
    +1234        """Return the mass spectrum as a JSON file."""
    +1235        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1236
    +1237        exportMS = HighResMassSpecExport(self.filename, self)
    +1238        return exportMS.to_json()
     
    @@ -5685,11 +5889,12 @@
    Returns
    -
    1179    def parameters_json(self):
    -1180        """Return the parameters of the mass spectrum as a JSON string."""
    -1181        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1182        exportMS = HighResMassSpecExport(self.filename, self)
    -1183        return exportMS.parameters_to_json()
    +            
    1240    def parameters_json(self):
    +1241        """Return the parameters of the mass spectrum as a JSON string."""
    +1242        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1243
    +1244        exportMS = HighResMassSpecExport(self.filename, self)
    +1245        return exportMS.parameters_to_json()
     
    @@ -5709,11 +5914,12 @@
    Returns
    -
    1185    def parameters_toml(self):
    -1186        """Return the parameters of the mass spectrum as a TOML string."""
    -1187        from corems.mass_spectrum.output.export import HighResMassSpecExport
    -1188        exportMS = HighResMassSpecExport(self.filename, self)
    -1189        return exportMS.parameters_to_toml()
    +            
    1247    def parameters_toml(self):
    +1248        """Return the parameters of the mass spectrum as a TOML string."""
    +1249        from corems.mass_spectrum.output.export import HighResMassSpecExport
    +1250
    +1251        exportMS = HighResMassSpecExport(self.filename, self)
    +1252        return exportMS.parameters_to_toml()
     
    @@ -5784,48 +5990,50 @@
    Inherited Members
    -
    1191class MassSpecProfile(MassSpecBase):
    -1192    """A mass spectrum class when the entry point is on profile format
    -1193    
    -1194    Notes
    -1195    -----
    -1196    Stores the profile data and instrument settings. 
    -1197    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
    -1198    _mspeaks is populated under the hood by calling process_mass_spec method.
    -1199    Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().
    -1200
    -1201    Parameters
    -1202    ----------
    -1203    data_dict : dict
    -1204        A dictionary containing the profile data.
    -1205    d_params : dict{'str': float, int or str}
    -1206        contains the instrument settings and processing settings
    -1207    auto_process : bool, optional
    -1208        Whether to automatically process the mass spectrum. Defaults to True.
    -1209
    -1210
    -1211    Attributes 
    -1212    ----------
    -1213    _abundance : ndarray
    -1214        The abundance values of the mass spectrum.
    -1215    _mz_exp : ndarray
    -1216        The m/z values of the mass spectrum.
    -1217    _mspeaks : list
    -1218        A list of mass peaks.
    -1219
    -1220    Methods 
    -1221    ----------
    -1222    * process_mass_spec(). Process the mass spectrum.
    -1223
    -1224    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()
    -1225    """
    -1226
    -1227    def __init__(self, data_dict, d_params, auto_process=True):
    -1228        # print(data_dict.keys())
    -1229        super().__init__(data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params)
    -1230       
    -1231        if auto_process:
    -1232            self.process_mass_spec()
    +            
    1255class MassSpecProfile(MassSpecBase):
    +1256    """A mass spectrum class when the entry point is on profile format
    +1257
    +1258    Notes
    +1259    -----
    +1260    Stores the profile data and instrument settings.
    +1261    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.
    +1262    _mspeaks is populated under the hood by calling process_mass_spec method.
    +1263    Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().
    +1264
    +1265    Parameters
    +1266    ----------
    +1267    data_dict : dict
    +1268        A dictionary containing the profile data.
    +1269    d_params : dict{'str': float, int or str}
    +1270        contains the instrument settings and processing settings
    +1271    auto_process : bool, optional
    +1272        Whether to automatically process the mass spectrum. Defaults to True.
    +1273
    +1274
    +1275    Attributes
    +1276    ----------
    +1277    _abundance : ndarray
    +1278        The abundance values of the mass spectrum.
    +1279    _mz_exp : ndarray
    +1280        The m/z values of the mass spectrum.
    +1281    _mspeaks : list
    +1282        A list of mass peaks.
    +1283
    +1284    Methods
    +1285    ----------
    +1286    * process_mass_spec(). Process the mass spectrum.
    +1287
    +1288    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()
    +1289    """
    +1290
    +1291    def __init__(self, data_dict, d_params, auto_process=True):
    +1292        # print(data_dict.keys())
    +1293        super().__init__(
    +1294            data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params
    +1295        )
    +1296
    +1297        if auto_process:
    +1298            self.process_mass_spec()
     
    @@ -5833,7 +6041,7 @@
    Inherited Members
    Notes
    -

    Stores the profile data and instrument settings. +

    Stores the profile data and instrument settings. Iteration over a list of MSPeaks classes stored at the _mspeaks attributes. _mspeaks is populated under the hood by calling process_mass_spec method. Iteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().

    @@ -5851,12 +6059,14 @@
    Parameters
    Attributes
    -

    _abundance : ndarray - The abundance values of the mass spectrum. -_mz_exp : ndarray - The m/z values of the mass spectrum. -_mspeaks : list - A list of mass peaks.

    +
      +
    • _abundance (ndarray): +The abundance values of the mass spectrum.
    • +
    • _mz_exp (ndarray): +The m/z values of the mass spectrum.
    • +
    • _mspeaks (list): +A list of mass peaks.
    • +
    Methods
    @@ -5878,12 +6088,14 @@
    Methods
    -
    1227    def __init__(self, data_dict, d_params, auto_process=True):
    -1228        # print(data_dict.keys())
    -1229        super().__init__(data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params)
    -1230       
    -1231        if auto_process:
    -1232            self.process_mass_spec()
    +            
    1291    def __init__(self, data_dict, d_params, auto_process=True):
    +1292        # print(data_dict.keys())
    +1293        super().__init__(
    +1294            data_dict.get(Labels.mz), data_dict.get(Labels.abundance), d_params
    +1295        )
    +1296
    +1297        if auto_process:
    +1298            self.process_mass_spec()
     
    @@ -6042,131 +6254,137 @@
    Inherited Members
    -
    1234class MassSpecfromFreq(MassSpecBase):
    -1235    """ A mass spectrum class when data entry is on frequency domain
    -1236
    -1237    Notes
    -1238    -----
    -1239    - Transform to m/z based on the settings stored at d_params
    -1240    - Stores the profile data and instrument settings
    -1241    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    -1242    - _mspeaks is populated under the hood by calling process_mass_spec method
    -1243    - iteration is null if _mspeaks is empty
    -1244
    -1245    Parameters
    -1246    ----------
    -1247    frequency_domain : list(float)
    -1248        all datapoints in frequency domain in Hz
    -1249    magnitude :  frequency_domain : list(float)
    -1250        all datapoints in for magnitude of each frequency datapoint
    -1251    d_params : dict{'str': float, int or str}
    -1252        contains the instrument settings and processing settings
    -1253    auto_process : bool, optional
    -1254        Whether to automatically process the mass spectrum. Defaults to True.
    -1255    keep_profile : bool, optional
    -1256        Whether to keep the profile data. Defaults to True.
    -1257  
    -1258    Attributes
    -1259    ----------
    -1260    has_frequency : bool
    -1261        Whether the mass spectrum has frequency data.
    -1262    _frequency_domain : list(float)
    -1263        Frequency domain in Hz
    -1264    label : str
    -1265        store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
    -1266    _abundance : ndarray
    -1267        The abundance values of the mass spectrum.
    -1268    _mz_exp : ndarray
    -1269        The m/z values of the mass spectrum.
    -1270    _mspeaks : list
    -1271        A list of mass peaks.
    -1272    See Also: all the attributes of MassSpecBase class
    -1273     
    -1274    Methods
    -1275    ----------
    -1276    * _set_mz_domain().
    -1277        calculates the m_z based on the setting of d_params
    -1278    * process_mass_spec().  Process the mass spectrum.
    -1279    
    -1280    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()
    -1281    """
    -1282
    -1283    def __init__(self, frequency_domain, magnitude, d_params, 
    -1284                auto_process=True, keep_profile=True):
    -1285
    -1286        super().__init__(None, magnitude, d_params)
    -1287
    -1288        self._frequency_domain = frequency_domain
    -1289        self.has_frequency = True
    -1290        self._set_mz_domain()
    -1291        self._sort_mz_domain()
    -1292        
    -1293        self.magnetron_frequency = None
    -1294        self.magnetron_frequency_sigma = None
    -1295
    -1296        #use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
    -1297        
    -1298        if auto_process:
    -1299            self.process_mass_spec(keep_profile=keep_profile)
    -1300
    -1301    def _sort_mz_domain(self):
    -1302        """Sort the mass spectrum by m/z values."""
    +            
    1301class MassSpecfromFreq(MassSpecBase):
    +1302    """A mass spectrum class when data entry is on frequency domain
     1303
    -1304        if self._mz_exp[0] > self._mz_exp[-1]:
    -1305            self._mz_exp = self._mz_exp[::-1]
    -1306            self._abundance = self._abundance[::-1]
    -1307            self._frequency_domain = self._frequency_domain[::-1]
    -1308
    -1309    def _set_mz_domain(self):
    -1310        """Set the m/z domain of the mass spectrum based on the settings of d_params."""
    -1311        if self.label == Labels.bruker_frequency:
    -1312            self._mz_exp = self._f_to_mz_bruker()
    -1313
    -1314        else:
    -1315
    -1316            self._mz_exp = self._f_to_mz()
    -1317
    -1318    @property
    -1319    def transient_settings(self): 
    -1320        """Return the transient settings of the mass spectrum."""
    -1321        return self.parameters.transient
    -1322
    -1323    @transient_settings.setter
    -1324    def transient_settings(self, instance_TransientSetting):
    -1325     
    -1326        self.parameters.transient = instance_TransientSetting  
    -1327
    -1328    def calc_magnetron_freq(self, max_magnetron_freq=50,magnetron_freq_bins=300):
    -1329        """Calculates the magnetron frequency of the mass spectrum.
    -1330
    -1331        Parameters
    -1332        ----------
    -1333        max_magnetron_freq : float, optional
    -1334            The maximum magnetron frequency. Defaults to 50.
    -1335        magnetron_freq_bins : int, optional
    -1336            The number of bins to use for the histogram. Defaults to 300.
    -1337
    -1338        Returns
    -1339        -------
    -1340        None
    -1341
    -1342        Notes
    -1343        -----
    -1344        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
    -1345        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
    -1346        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
    -1347        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
    -1348        """
    -1349        ms_df = DataFrame(self.freq_exp(),columns=['Freq'])
    -1350        ms_df['FreqDelta'] = ms_df['Freq'].diff()
    -1351
    -1352        freq_hist = histogram(ms_df[ms_df['FreqDelta']<max_magnetron_freq]['FreqDelta'],bins=magnetron_freq_bins)
    -1353    
    -1354        mod = GaussianModel()
    -1355        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
    -1356        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
    -1357        self.magnetron_frequency = out.best_values['center']
    -1358        self.magnetron_frequency_sigma = out.best_values['sigma']
    +1304    Notes
    +1305    -----
    +1306    - Transform to m/z based on the settings stored at d_params
    +1307    - Stores the profile data and instrument settings
    +1308    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    +1309    - _mspeaks is populated under the hood by calling process_mass_spec method
    +1310    - iteration is null if _mspeaks is empty
    +1311
    +1312    Parameters
    +1313    ----------
    +1314    frequency_domain : list(float)
    +1315        all datapoints in frequency domain in Hz
    +1316    magnitude :  frequency_domain : list(float)
    +1317        all datapoints in for magnitude of each frequency datapoint
    +1318    d_params : dict{'str': float, int or str}
    +1319        contains the instrument settings and processing settings
    +1320    auto_process : bool, optional
    +1321        Whether to automatically process the mass spectrum. Defaults to True.
    +1322    keep_profile : bool, optional
    +1323        Whether to keep the profile data. Defaults to True.
    +1324
    +1325    Attributes
    +1326    ----------
    +1327    has_frequency : bool
    +1328        Whether the mass spectrum has frequency data.
    +1329    _frequency_domain : list(float)
    +1330        Frequency domain in Hz
    +1331    label : str
    +1332        store label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
    +1333    _abundance : ndarray
    +1334        The abundance values of the mass spectrum.
    +1335    _mz_exp : ndarray
    +1336        The m/z values of the mass spectrum.
    +1337    _mspeaks : list
    +1338        A list of mass peaks.
    +1339    See Also: all the attributes of MassSpecBase class
    +1340
    +1341    Methods
    +1342    ----------
    +1343    * _set_mz_domain().
    +1344        calculates the m_z based on the setting of d_params
    +1345    * process_mass_spec().  Process the mass spectrum.
    +1346
    +1347    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()
    +1348    """
    +1349
    +1350    def __init__(
    +1351        self,
    +1352        frequency_domain,
    +1353        magnitude,
    +1354        d_params,
    +1355        auto_process=True,
    +1356        keep_profile=True,
    +1357    ):
    +1358        super().__init__(None, magnitude, d_params)
    +1359
    +1360        self._frequency_domain = frequency_domain
    +1361        self.has_frequency = True
    +1362        self._set_mz_domain()
    +1363        self._sort_mz_domain()
    +1364
    +1365        self.magnetron_frequency = None
    +1366        self.magnetron_frequency_sigma = None
    +1367
    +1368        # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
    +1369
    +1370        if auto_process:
    +1371            self.process_mass_spec(keep_profile=keep_profile)
    +1372
    +1373    def _sort_mz_domain(self):
    +1374        """Sort the mass spectrum by m/z values."""
    +1375
    +1376        if self._mz_exp[0] > self._mz_exp[-1]:
    +1377            self._mz_exp = self._mz_exp[::-1]
    +1378            self._abundance = self._abundance[::-1]
    +1379            self._frequency_domain = self._frequency_domain[::-1]
    +1380
    +1381    def _set_mz_domain(self):
    +1382        """Set the m/z domain of the mass spectrum based on the settings of d_params."""
    +1383        if self.label == Labels.bruker_frequency:
    +1384            self._mz_exp = self._f_to_mz_bruker()
    +1385
    +1386        else:
    +1387            self._mz_exp = self._f_to_mz()
    +1388
    +1389    @property
    +1390    def transient_settings(self):
    +1391        """Return the transient settings of the mass spectrum."""
    +1392        return self.parameters.transient
    +1393
    +1394    @transient_settings.setter
    +1395    def transient_settings(self, instance_TransientSetting):
    +1396        self.parameters.transient = instance_TransientSetting
    +1397
    +1398    def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
    +1399        """Calculates the magnetron frequency of the mass spectrum.
    +1400
    +1401        Parameters
    +1402        ----------
    +1403        max_magnetron_freq : float, optional
    +1404            The maximum magnetron frequency. Defaults to 50.
    +1405        magnetron_freq_bins : int, optional
    +1406            The number of bins to use for the histogram. Defaults to 300.
    +1407
    +1408        Returns
    +1409        -------
    +1410        None
    +1411
    +1412        Notes
    +1413        -----
    +1414        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
    +1415        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
    +1416        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
    +1417        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
    +1418        """
    +1419        ms_df = DataFrame(self.freq_exp(), columns=["Freq"])
    +1420        ms_df["FreqDelta"] = ms_df["Freq"].diff()
    +1421
    +1422        freq_hist = histogram(
    +1423            ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"],
    +1424            bins=magnetron_freq_bins,
    +1425        )
    +1426
    +1427        mod = GaussianModel()
    +1428        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
    +1429        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
    +1430        self.magnetron_frequency = out.best_values["center"]
    +1431        self.magnetron_frequency_sigma = out.best_values["sigma"]
     
    @@ -6237,23 +6455,28 @@
    Methods
    -
    1283    def __init__(self, frequency_domain, magnitude, d_params, 
    -1284                auto_process=True, keep_profile=True):
    -1285
    -1286        super().__init__(None, magnitude, d_params)
    -1287
    -1288        self._frequency_domain = frequency_domain
    -1289        self.has_frequency = True
    -1290        self._set_mz_domain()
    -1291        self._sort_mz_domain()
    -1292        
    -1293        self.magnetron_frequency = None
    -1294        self.magnetron_frequency_sigma = None
    -1295
    -1296        #use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
    -1297        
    -1298        if auto_process:
    -1299            self.process_mass_spec(keep_profile=keep_profile)
    +            
    1350    def __init__(
    +1351        self,
    +1352        frequency_domain,
    +1353        magnitude,
    +1354        d_params,
    +1355        auto_process=True,
    +1356        keep_profile=True,
    +1357    ):
    +1358        super().__init__(None, magnitude, d_params)
    +1359
    +1360        self._frequency_domain = frequency_domain
    +1361        self.has_frequency = True
    +1362        self._set_mz_domain()
    +1363        self._sort_mz_domain()
    +1364
    +1365        self.magnetron_frequency = None
    +1366        self.magnetron_frequency_sigma = None
    +1367
    +1368        # use this call to automatically process data as the object is created, Setting need to be changed before initiating the class to be in effect
    +1369
    +1370        if auto_process:
    +1371            self.process_mass_spec(keep_profile=keep_profile)
     
    @@ -6317,37 +6540,40 @@
    Methods
    -
    1328    def calc_magnetron_freq(self, max_magnetron_freq=50,magnetron_freq_bins=300):
    -1329        """Calculates the magnetron frequency of the mass spectrum.
    -1330
    -1331        Parameters
    -1332        ----------
    -1333        max_magnetron_freq : float, optional
    -1334            The maximum magnetron frequency. Defaults to 50.
    -1335        magnetron_freq_bins : int, optional
    -1336            The number of bins to use for the histogram. Defaults to 300.
    -1337
    -1338        Returns
    -1339        -------
    -1340        None
    -1341
    -1342        Notes
    -1343        -----
    -1344        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
    -1345        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
    -1346        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
    -1347        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
    -1348        """
    -1349        ms_df = DataFrame(self.freq_exp(),columns=['Freq'])
    -1350        ms_df['FreqDelta'] = ms_df['Freq'].diff()
    -1351
    -1352        freq_hist = histogram(ms_df[ms_df['FreqDelta']<max_magnetron_freq]['FreqDelta'],bins=magnetron_freq_bins)
    -1353    
    -1354        mod = GaussianModel()
    -1355        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
    -1356        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
    -1357        self.magnetron_frequency = out.best_values['center']
    -1358        self.magnetron_frequency_sigma = out.best_values['sigma']
    +            
    1398    def calc_magnetron_freq(self, max_magnetron_freq=50, magnetron_freq_bins=300):
    +1399        """Calculates the magnetron frequency of the mass spectrum.
    +1400
    +1401        Parameters
    +1402        ----------
    +1403        max_magnetron_freq : float, optional
    +1404            The maximum magnetron frequency. Defaults to 50.
    +1405        magnetron_freq_bins : int, optional
    +1406            The number of bins to use for the histogram. Defaults to 300.
    +1407
    +1408        Returns
    +1409        -------
    +1410        None
    +1411
    +1412        Notes
    +1413        -----
    +1414        Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.
    +1415        A histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.
    +1416        A gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.
    +1417        This appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.
    +1418        """
    +1419        ms_df = DataFrame(self.freq_exp(), columns=["Freq"])
    +1420        ms_df["FreqDelta"] = ms_df["Freq"].diff()
    +1421
    +1422        freq_hist = histogram(
    +1423            ms_df[ms_df["FreqDelta"] < max_magnetron_freq]["FreqDelta"],
    +1424            bins=magnetron_freq_bins,
    +1425        )
    +1426
    +1427        mod = GaussianModel()
    +1428        pars = mod.guess(freq_hist[0], x=freq_hist[1][:-1])
    +1429        out = mod.fit(freq_hist[0], pars, x=freq_hist[1][:-1])
    +1430        self.magnetron_frequency = out.best_values["center"]
    +1431        self.magnetron_frequency_sigma = out.best_values["sigma"]
     
    @@ -6529,226 +6755,238 @@
    Inherited Members
    -
    1361class MassSpecCentroid(MassSpecBase):
    -1362
    -1363    """A mass spectrum class when the entry point is on centroid format
    -1364
    -1365    Notes
    -1366    -----
    -1367    - Stores the centroid data and instrument settings
    -1368    - Simulate profile data based on Gaussian or Lorentzian peak shape
    -1369    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    -1370    - _mspeaks is populated under the hood by calling process_mass_spec method
    -1371    - iteration is null if _mspeaks is empty
    -1372
    -1373    Parameters
    -1374    ----------
    -1375    data_dict : dict {string: numpy array float64 )
    -1376        contains keys [m/z, Abundance, Resolving Power, S/N] 
    -1377    d_params : dict{'str': float, int or str}
    -1378        contains the instrument settings and processing settings
    -1379    auto_process : bool, optional
    -1380        Whether to automatically process the mass spectrum. Defaults to True.
    -1381        
    -1382    Attributes
    -1383    ----------
    -1384    label : str
    -1385        store label (Bruker, Midas Transient, see Labels class)
    -1386    _baseline_noise : float
    -1387        store baseline noise
    -1388    _baseline_noise_std : float
    -1389        store baseline noise std
    -1390    _abundance : ndarray
    -1391        The abundance values of the mass spectrum.
    -1392    _mz_exp : ndarray
    -1393        The m/z values of the mass spectrum.
    -1394    _mspeaks : list
    -1395        A list of mass peaks. 
    -1396
    -1397    
    -1398    Methods
    -1399    ----------
    -1400    * process_mass_spec().
    -1401        Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
    -1402    * __simulate_profile__data__().
    -1403        Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
    -1404
    -1405    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()
    -1406    """
    -1407
    -1408    def __init__(self, data_dict, d_params, auto_process=True):
    -1409
    -1410        super().__init__([], [], d_params)
    -1411
    -1412        self._set_parameters_objects(d_params)
    -1413        
    -1414        if self.label == Labels.thermo_centroid:
    -1415            self._baseline_noise = d_params.get("baseline_noise")
    -1416            self._baseline_noise_std = d_params.get("baseline_noise_std")
    -1417
    -1418        self.is_centroid = True
    -1419        self.data_dict = data_dict
    -1420        self._mz_exp = data_dict[Labels.mz]
    -1421        self._abundance = data_dict[Labels.abundance]
    -1422
    -1423        if auto_process:
    -1424            self.process_mass_spec()
    -1425            
    -1426
    -1427    def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid):
    -1428        """Simulate profile data based on Gaussian or Lorentzian peak shape
    -1429
    -1430        Notes
    -1431        -----
    -1432        Needs theoretical resolving power calculation and define peak shape.
    -1433        This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only.
    -1434        
    -1435        Parameters
    -1436        ----------
    -1437        exp_mz_centroid : list(float)
    -1438            list of m/z values
    -1439        magnitude_centroid : list(float)
    -1440            list of abundance values
    -1441            
    -1442            
    -1443        Returns
    -1444        -------
    -1445        x : list(float)
    -1446            list of m/z values
    -1447        y : list(float)
    -1448            list of abundance values
    -1449        """
    -1450
    -1451        x, y = [], []
    -1452        for i in range(len(exp_mz_centroid)):
    -1453            x.append(exp_mz_centroid[i] - 0.0000001)
    -1454            x.append(exp_mz_centroid[i])
    -1455            x.append(exp_mz_centroid[i] + 0.0000001)
    -1456            y.append(0)
    -1457            y.append(magnitude_centroid[i])
    -1458            y.append(0)
    -1459        return x, y
    -1460
    -1461    @property
    -1462    def mz_exp_profile(self):
    -1463        """Return the m/z profile of the mass spectrum."""
    -1464        mz_list = []
    -1465        for mz in self.mz_exp:
    -1466            mz_list.append(mz - 0.0000001)
    -1467            mz_list.append(mz)
    -1468            mz_list.append(mz + 0.0000001)
    -1469        return mz_list
    -1470    
    -1471    @mz_exp_profile.setter
    -1472    def mz_exp_profile(self, _mz_exp ): self._mz_exp = _mz_exp
    -1473
    -1474    @property
    -1475    def abundance_profile(self):
    -1476        """Return the abundance profile of the mass spectrum."""
    -1477        ab_list = []
    -1478        for ab in self.abundance:
    -1479            ab_list.append(0)
    -1480            ab_list.append(ab)
    -1481            ab_list.append(0)
    -1482        return ab_list
    -1483
    -1484    @abundance_profile.setter
    -1485    def abundance_profile(self, abundance ): self._abundance = abundance
    -1486
    -1487    @property
    -1488    def tic(self):
    -1489        """Return the total ion current of the mass spectrum."""
    -1490        return sum(self.abundance)
    -1491
    -1492    def process_mass_spec(self):
    -1493        """Process the mass spectrum.
    -1494       
    -1495        """
    -1496        import tqdm
    -1497        # overwrite process_mass_spec 
    -1498        # mspeak objs are usually added inside the PeaKPicking class 
    -1499        # for profile and freq based data
    -1500        data_dict = self.data_dict
    -1501        ion_charge = self.polarity
    -1502
    -1503        # Check if resolving power is present
    -1504        rp_present = True
    -1505        if not data_dict.get(Labels.rp):
    -1506            rp_present = False
    -1507        if rp_present and list(data_dict.get(Labels.rp)) == [None]*len(data_dict.get(Labels.rp)):
    -1508            rp_present = False
    -1509
    -1510        # Check if s2n is present
    -1511        s2n_present = True
    -1512        if not data_dict.get(Labels.s2n):
    -1513            s2n_present = False
    -1514        if s2n_present and list(data_dict.get(Labels.s2n)) == [None]*len(data_dict.get(Labels.s2n)):
    -1515            s2n_present = False
    -1516        
    -1517        # Warning if no s2n data but noise thresholding is set to signal_noise
    -1518        if not s2n_present and self.parameters.mass_spectrum.noise_threshold_method == 'signal_noise':
    -1519            raise Exception("Signal to Noise data is missing for noise thresholding")
    +            
    1434class MassSpecCentroid(MassSpecBase):
    +1435    """A mass spectrum class when the entry point is on centroid format
    +1436
    +1437    Notes
    +1438    -----
    +1439    - Stores the centroid data and instrument settings
    +1440    - Simulate profile data based on Gaussian or Lorentzian peak shape
    +1441    - Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    +1442    - _mspeaks is populated under the hood by calling process_mass_spec method
    +1443    - iteration is null if _mspeaks is empty
    +1444
    +1445    Parameters
    +1446    ----------
    +1447    data_dict : dict {string: numpy array float64 )
    +1448        contains keys [m/z, Abundance, Resolving Power, S/N]
    +1449    d_params : dict{'str': float, int or str}
    +1450        contains the instrument settings and processing settings
    +1451    auto_process : bool, optional
    +1452        Whether to automatically process the mass spectrum. Defaults to True.
    +1453
    +1454    Attributes
    +1455    ----------
    +1456    label : str
    +1457        store label (Bruker, Midas Transient, see Labels class)
    +1458    _baseline_noise : float
    +1459        store baseline noise
    +1460    _baseline_noise_std : float
    +1461        store baseline noise std
    +1462    _abundance : ndarray
    +1463        The abundance values of the mass spectrum.
    +1464    _mz_exp : ndarray
    +1465        The m/z values of the mass spectrum.
    +1466    _mspeaks : list
    +1467        A list of mass peaks.
    +1468
    +1469
    +1470    Methods
    +1471    ----------
    +1472    * process_mass_spec().
    +1473        Process the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
    +1474    * __simulate_profile__data__().
    +1475        Simulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
    +1476
    +1477    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()
    +1478    """
    +1479
    +1480    def __init__(self, data_dict, d_params, auto_process=True):
    +1481        super().__init__([], [], d_params)
    +1482
    +1483        self._set_parameters_objects(d_params)
    +1484
    +1485        if self.label == Labels.thermo_centroid:
    +1486            self._baseline_noise = d_params.get("baseline_noise")
    +1487            self._baseline_noise_std = d_params.get("baseline_noise_std")
    +1488
    +1489        self.is_centroid = True
    +1490        self.data_dict = data_dict
    +1491        self._mz_exp = data_dict[Labels.mz]
    +1492        self._abundance = data_dict[Labels.abundance]
    +1493
    +1494        if auto_process:
    +1495            self.process_mass_spec()
    +1496
    +1497    def __simulate_profile__data__(self, exp_mz_centroid, magnitude_centroid):
    +1498        """Simulate profile data based on Gaussian or Lorentzian peak shape
    +1499
    +1500        Notes
    +1501        -----
    +1502        Needs theoretical resolving power calculation and define peak shape.
    +1503        This is a quick fix to trick a line plot be able to plot as sticks for plotting and inspection purposes only.
    +1504
    +1505        Parameters
    +1506        ----------
    +1507        exp_mz_centroid : list(float)
    +1508            list of m/z values
    +1509        magnitude_centroid : list(float)
    +1510            list of abundance values
    +1511
    +1512
    +1513        Returns
    +1514        -------
    +1515        x : list(float)
    +1516            list of m/z values
    +1517        y : list(float)
    +1518            list of abundance values
    +1519        """
     1520
    -1521        # Pull out abundance data        
    -1522        abun = array(data_dict.get(Labels.abundance)).astype(float)
    -1523        
    -1524        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
    -1525        abundance_threshold, factor = self.get_threshold(abun)
    -1526        
    -1527        # Set rp_i and s2n_i to None which will be overwritten if present
    -1528        rp_i, s2n_i = np.nan, np.nan
    -1529        for index, mz in enumerate(data_dict.get(Labels.mz)):
    -1530            if rp_present:
    -1531                if not data_dict.get(Labels.rp)[index]:
    -1532                    rp_i = np.nan
    -1533                else:
    -1534                    rp_i = float(data_dict.get(Labels.rp)[index])
    -1535            if s2n_present:
    -1536                if not data_dict.get(Labels.s2n)[index]:
    -1537                    s2n_i = np.nan
    -1538                else:
    -1539                    s2n_i = float(data_dict.get(Labels.s2n)[index])
    +1521        x, y = [], []
    +1522        for i in range(len(exp_mz_centroid)):
    +1523            x.append(exp_mz_centroid[i] - 0.0000001)
    +1524            x.append(exp_mz_centroid[i])
    +1525            x.append(exp_mz_centroid[i] + 0.0000001)
    +1526            y.append(0)
    +1527            y.append(magnitude_centroid[i])
    +1528            y.append(0)
    +1529        return x, y
    +1530
    +1531    @property
    +1532    def mz_exp_profile(self):
    +1533        """Return the m/z profile of the mass spectrum."""
    +1534        mz_list = []
    +1535        for mz in self.mz_exp:
    +1536            mz_list.append(mz - 0.0000001)
    +1537            mz_list.append(mz)
    +1538            mz_list.append(mz + 0.0000001)
    +1539        return mz_list
     1540
    -1541            # centroid peak does not have start and end peak index pos
    -1542            massspec_indexes = (index, index, index)
    -1543
    -1544            # Add peaks based on the noise thresholding method
    -1545            if self.parameters.mass_spectrum.noise_threshold_method in ['minima', 'relative_abundance', 'absolute_abundance'] and abun[index]/factor >= abundance_threshold:             
    -1546                self.add_mspeak(
    -1547                    ion_charge,
    -1548                    mz,
    -1549                    abun[index],
    -1550                    rp_i,
    -1551                    s2n_i,
    -1552                    massspec_indexes,
    -1553                    ms_parent=self
    -1554                )
    -1555            if self.parameters.mass_spectrum.noise_threshold_method == 'signal_noise' and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n:
    -1556                self.add_mspeak(
    -1557                    ion_charge,
    -1558                    mz,
    -1559                    abun[index],
    -1560                    rp_i,
    -1561                    s2n_i,
    -1562                    massspec_indexes,
    -1563                    ms_parent=self
    -1564                )
    -1565
    -1566        self.mspeaks = self._mspeaks
    -1567        self._dynamic_range = self.max_abundance / self.min_abundance
    -1568        self._set_nominal_masses_start_final_indexes()
    -1569        
    -1570        if self.label != Labels.thermo_centroid:
    -1571            
    -1572            if self.settings.noise_threshold_method == 'log':
    -1573                
    -1574                raise  Exception("log noise Not tested for centroid data")
    -1575                #self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
    -1576            
    -1577            else:
    -1578                self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
    -1579        
    -1580        del self.data_dict
    +1541    @mz_exp_profile.setter
    +1542    def mz_exp_profile(self, _mz_exp):
    +1543        self._mz_exp = _mz_exp
    +1544
    +1545    @property
    +1546    def abundance_profile(self):
    +1547        """Return the abundance profile of the mass spectrum."""
    +1548        ab_list = []
    +1549        for ab in self.abundance:
    +1550            ab_list.append(0)
    +1551            ab_list.append(ab)
    +1552            ab_list.append(0)
    +1553        return ab_list
    +1554
    +1555    @abundance_profile.setter
    +1556    def abundance_profile(self, abundance):
    +1557        self._abundance = abundance
    +1558
    +1559    @property
    +1560    def tic(self):
    +1561        """Return the total ion current of the mass spectrum."""
    +1562        return sum(self.abundance)
    +1563
    +1564    def process_mass_spec(self):
    +1565        """Process the mass spectrum."""
    +1566        import tqdm
    +1567
    +1568        # overwrite process_mass_spec
    +1569        # mspeak objs are usually added inside the PeaKPicking class
    +1570        # for profile and freq based data
    +1571        data_dict = self.data_dict
    +1572        ion_charge = self.polarity
    +1573
    +1574        # Check if resolving power is present
    +1575        rp_present = True
    +1576        if not data_dict.get(Labels.rp):
    +1577            rp_present = False
    +1578        if rp_present and list(data_dict.get(Labels.rp)) == [None] * len(
    +1579            data_dict.get(Labels.rp)
    +1580        ):
    +1581            rp_present = False
    +1582
    +1583        # Check if s2n is present
    +1584        s2n_present = True
    +1585        if not data_dict.get(Labels.s2n):
    +1586            s2n_present = False
    +1587        if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len(
    +1588            data_dict.get(Labels.s2n)
    +1589        ):
    +1590            s2n_present = False
    +1591
    +1592        # Warning if no s2n data but noise thresholding is set to signal_noise
    +1593        if (
    +1594            not s2n_present
    +1595            and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
    +1596        ):
    +1597            raise Exception("Signal to Noise data is missing for noise thresholding")
    +1598
    +1599        # Pull out abundance data
    +1600        abun = array(data_dict.get(Labels.abundance)).astype(float)
    +1601
    +1602        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
    +1603        abundance_threshold, factor = self.get_threshold(abun)
    +1604
    +1605        # Set rp_i and s2n_i to None which will be overwritten if present
    +1606        rp_i, s2n_i = np.nan, np.nan
    +1607        for index, mz in enumerate(data_dict.get(Labels.mz)):
    +1608            if rp_present:
    +1609                if not data_dict.get(Labels.rp)[index]:
    +1610                    rp_i = np.nan
    +1611                else:
    +1612                    rp_i = float(data_dict.get(Labels.rp)[index])
    +1613            if s2n_present:
    +1614                if not data_dict.get(Labels.s2n)[index]:
    +1615                    s2n_i = np.nan
    +1616                else:
    +1617                    s2n_i = float(data_dict.get(Labels.s2n)[index])
    +1618
    +1619            # centroid peak does not have start and end peak index pos
    +1620            massspec_indexes = (index, index, index)
    +1621
    +1622            # Add peaks based on the noise thresholding method
    +1623            if (
    +1624                self.parameters.mass_spectrum.noise_threshold_method
    +1625                in ["minima", "relative_abundance", "absolute_abundance"]
    +1626                and abun[index] / factor >= abundance_threshold
    +1627            ):
    +1628                self.add_mspeak(
    +1629                    ion_charge,
    +1630                    mz,
    +1631                    abun[index],
    +1632                    rp_i,
    +1633                    s2n_i,
    +1634                    massspec_indexes,
    +1635                    ms_parent=self,
    +1636                )
    +1637            if (
    +1638                self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
    +1639                and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n
    +1640            ):
    +1641                self.add_mspeak(
    +1642                    ion_charge,
    +1643                    mz,
    +1644                    abun[index],
    +1645                    rp_i,
    +1646                    s2n_i,
    +1647                    massspec_indexes,
    +1648                    ms_parent=self,
    +1649                )
    +1650
    +1651        self.mspeaks = self._mspeaks
    +1652        self._dynamic_range = self.max_abundance / self.min_abundance
    +1653        self._set_nominal_masses_start_final_indexes()
    +1654
    +1655        if self.label != Labels.thermo_centroid:
    +1656            if self.settings.noise_threshold_method == "log":
    +1657                raise Exception("log noise Not tested for centroid data")
    +1658                # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
    +1659
    +1660            else:
    +1661                self._baseline_noise, self._baseline_noise_std = (
    +1662                    self.run_noise_threshold_calc()
    +1663                )
    +1664
    +1665        del self.data_dict
     
    @@ -6815,23 +7053,22 @@
    Methods
    -
    1408    def __init__(self, data_dict, d_params, auto_process=True):
    -1409
    -1410        super().__init__([], [], d_params)
    -1411
    -1412        self._set_parameters_objects(d_params)
    -1413        
    -1414        if self.label == Labels.thermo_centroid:
    -1415            self._baseline_noise = d_params.get("baseline_noise")
    -1416            self._baseline_noise_std = d_params.get("baseline_noise_std")
    -1417
    -1418        self.is_centroid = True
    -1419        self.data_dict = data_dict
    -1420        self._mz_exp = data_dict[Labels.mz]
    -1421        self._abundance = data_dict[Labels.abundance]
    -1422
    -1423        if auto_process:
    -1424            self.process_mass_spec()
    +            
    1480    def __init__(self, data_dict, d_params, auto_process=True):
    +1481        super().__init__([], [], d_params)
    +1482
    +1483        self._set_parameters_objects(d_params)
    +1484
    +1485        if self.label == Labels.thermo_centroid:
    +1486            self._baseline_noise = d_params.get("baseline_noise")
    +1487            self._baseline_noise_std = d_params.get("baseline_noise_std")
    +1488
    +1489        self.is_centroid = True
    +1490        self.data_dict = data_dict
    +1491        self._mz_exp = data_dict[Labels.mz]
    +1492        self._abundance = data_dict[Labels.abundance]
    +1493
    +1494        if auto_process:
    +1495            self.process_mass_spec()
     
    @@ -6910,95 +7147,108 @@
    Methods
    -
    1492    def process_mass_spec(self):
    -1493        """Process the mass spectrum.
    -1494       
    -1495        """
    -1496        import tqdm
    -1497        # overwrite process_mass_spec 
    -1498        # mspeak objs are usually added inside the PeaKPicking class 
    -1499        # for profile and freq based data
    -1500        data_dict = self.data_dict
    -1501        ion_charge = self.polarity
    -1502
    -1503        # Check if resolving power is present
    -1504        rp_present = True
    -1505        if not data_dict.get(Labels.rp):
    -1506            rp_present = False
    -1507        if rp_present and list(data_dict.get(Labels.rp)) == [None]*len(data_dict.get(Labels.rp)):
    -1508            rp_present = False
    -1509
    -1510        # Check if s2n is present
    -1511        s2n_present = True
    -1512        if not data_dict.get(Labels.s2n):
    -1513            s2n_present = False
    -1514        if s2n_present and list(data_dict.get(Labels.s2n)) == [None]*len(data_dict.get(Labels.s2n)):
    -1515            s2n_present = False
    -1516        
    -1517        # Warning if no s2n data but noise thresholding is set to signal_noise
    -1518        if not s2n_present and self.parameters.mass_spectrum.noise_threshold_method == 'signal_noise':
    -1519            raise Exception("Signal to Noise data is missing for noise thresholding")
    -1520
    -1521        # Pull out abundance data        
    -1522        abun = array(data_dict.get(Labels.abundance)).astype(float)
    -1523        
    -1524        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
    -1525        abundance_threshold, factor = self.get_threshold(abun)
    -1526        
    -1527        # Set rp_i and s2n_i to None which will be overwritten if present
    -1528        rp_i, s2n_i = np.nan, np.nan
    -1529        for index, mz in enumerate(data_dict.get(Labels.mz)):
    -1530            if rp_present:
    -1531                if not data_dict.get(Labels.rp)[index]:
    -1532                    rp_i = np.nan
    -1533                else:
    -1534                    rp_i = float(data_dict.get(Labels.rp)[index])
    -1535            if s2n_present:
    -1536                if not data_dict.get(Labels.s2n)[index]:
    -1537                    s2n_i = np.nan
    -1538                else:
    -1539                    s2n_i = float(data_dict.get(Labels.s2n)[index])
    -1540
    -1541            # centroid peak does not have start and end peak index pos
    -1542            massspec_indexes = (index, index, index)
    -1543
    -1544            # Add peaks based on the noise thresholding method
    -1545            if self.parameters.mass_spectrum.noise_threshold_method in ['minima', 'relative_abundance', 'absolute_abundance'] and abun[index]/factor >= abundance_threshold:             
    -1546                self.add_mspeak(
    -1547                    ion_charge,
    -1548                    mz,
    -1549                    abun[index],
    -1550                    rp_i,
    -1551                    s2n_i,
    -1552                    massspec_indexes,
    -1553                    ms_parent=self
    -1554                )
    -1555            if self.parameters.mass_spectrum.noise_threshold_method == 'signal_noise' and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n:
    -1556                self.add_mspeak(
    -1557                    ion_charge,
    -1558                    mz,
    -1559                    abun[index],
    -1560                    rp_i,
    -1561                    s2n_i,
    -1562                    massspec_indexes,
    -1563                    ms_parent=self
    -1564                )
    -1565
    -1566        self.mspeaks = self._mspeaks
    -1567        self._dynamic_range = self.max_abundance / self.min_abundance
    -1568        self._set_nominal_masses_start_final_indexes()
    -1569        
    -1570        if self.label != Labels.thermo_centroid:
    -1571            
    -1572            if self.settings.noise_threshold_method == 'log':
    -1573                
    -1574                raise  Exception("log noise Not tested for centroid data")
    -1575                #self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
    -1576            
    -1577            else:
    -1578                self._baseline_noise, self._baseline_noise_std = self.run_noise_threshold_calc()
    -1579        
    -1580        del self.data_dict
    +            
    1564    def process_mass_spec(self):
    +1565        """Process the mass spectrum."""
    +1566        import tqdm
    +1567
    +1568        # overwrite process_mass_spec
    +1569        # mspeak objs are usually added inside the PeaKPicking class
    +1570        # for profile and freq based data
    +1571        data_dict = self.data_dict
    +1572        ion_charge = self.polarity
    +1573
    +1574        # Check if resolving power is present
    +1575        rp_present = True
    +1576        if not data_dict.get(Labels.rp):
    +1577            rp_present = False
    +1578        if rp_present and list(data_dict.get(Labels.rp)) == [None] * len(
    +1579            data_dict.get(Labels.rp)
    +1580        ):
    +1581            rp_present = False
    +1582
    +1583        # Check if s2n is present
    +1584        s2n_present = True
    +1585        if not data_dict.get(Labels.s2n):
    +1586            s2n_present = False
    +1587        if s2n_present and list(data_dict.get(Labels.s2n)) == [None] * len(
    +1588            data_dict.get(Labels.s2n)
    +1589        ):
    +1590            s2n_present = False
    +1591
    +1592        # Warning if no s2n data but noise thresholding is set to signal_noise
    +1593        if (
    +1594            not s2n_present
    +1595            and self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
    +1596        ):
    +1597            raise Exception("Signal to Noise data is missing for noise thresholding")
    +1598
    +1599        # Pull out abundance data
    +1600        abun = array(data_dict.get(Labels.abundance)).astype(float)
    +1601
    +1602        # Get the threshold for filtering if using minima, relative, or absolute abundance thresholding
    +1603        abundance_threshold, factor = self.get_threshold(abun)
    +1604
    +1605        # Set rp_i and s2n_i to None which will be overwritten if present
    +1606        rp_i, s2n_i = np.nan, np.nan
    +1607        for index, mz in enumerate(data_dict.get(Labels.mz)):
    +1608            if rp_present:
    +1609                if not data_dict.get(Labels.rp)[index]:
    +1610                    rp_i = np.nan
    +1611                else:
    +1612                    rp_i = float(data_dict.get(Labels.rp)[index])
    +1613            if s2n_present:
    +1614                if not data_dict.get(Labels.s2n)[index]:
    +1615                    s2n_i = np.nan
    +1616                else:
    +1617                    s2n_i = float(data_dict.get(Labels.s2n)[index])
    +1618
    +1619            # centroid peak does not have start and end peak index pos
    +1620            massspec_indexes = (index, index, index)
    +1621
    +1622            # Add peaks based on the noise thresholding method
    +1623            if (
    +1624                self.parameters.mass_spectrum.noise_threshold_method
    +1625                in ["minima", "relative_abundance", "absolute_abundance"]
    +1626                and abun[index] / factor >= abundance_threshold
    +1627            ):
    +1628                self.add_mspeak(
    +1629                    ion_charge,
    +1630                    mz,
    +1631                    abun[index],
    +1632                    rp_i,
    +1633                    s2n_i,
    +1634                    massspec_indexes,
    +1635                    ms_parent=self,
    +1636                )
    +1637            if (
    +1638                self.parameters.mass_spectrum.noise_threshold_method == "signal_noise"
    +1639                and s2n_i >= self.parameters.mass_spectrum.noise_threshold_min_s2n
    +1640            ):
    +1641                self.add_mspeak(
    +1642                    ion_charge,
    +1643                    mz,
    +1644                    abun[index],
    +1645                    rp_i,
    +1646                    s2n_i,
    +1647                    massspec_indexes,
    +1648                    ms_parent=self,
    +1649                )
    +1650
    +1651        self.mspeaks = self._mspeaks
    +1652        self._dynamic_range = self.max_abundance / self.min_abundance
    +1653        self._set_nominal_masses_start_final_indexes()
    +1654
    +1655        if self.label != Labels.thermo_centroid:
    +1656            if self.settings.noise_threshold_method == "log":
    +1657                raise Exception("log noise Not tested for centroid data")
    +1658                # self._baseline_noise, self._baseline_noise_std = self.run_log_noise_threshold_calc()
    +1659
    +1660            else:
    +1661                self._baseline_noise, self._baseline_noise_std = (
    +1662                    self.run_noise_threshold_calc()
    +1663                )
    +1664
    +1665        del self.data_dict
     
    @@ -7154,81 +7404,78 @@
    Inherited Members
    -
    1582class MassSpecCentroidLowRes(MassSpecCentroid):
    -1583    """A mass spectrum class when the entry point is on low resolution centroid format
    -1584
    -1585    Notes
    -1586    -----
    -1587    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead
    -1588    
    -1589    Parameters
    -1590    ----------
    -1591    data_dict : dict {string: numpy array float64 )
    -1592        contains keys [m/z, Abundance, Resolving Power, S/N]
    -1593    d_params : dict{'str': float, int or str}
    -1594        contains the instrument settings and processing settings
    -1595
    -1596    Attributes
    -1597    ----------
    -1598    _processed_tic : float
    -1599        store processed total ion current
    -1600    _abundance : ndarray
    -1601        The abundance values of the mass spectrum.
    -1602    _mz_exp : ndarray
    -1603        The m/z values of the mass spectrum.
    -1604    """
    -1605    
    -1606    def __init__(self, data_dict, d_params):
    -1607    
    -1608        self._set_parameters_objects(d_params)
    -1609        self._mz_exp = array(data_dict.get(Labels.mz))
    -1610        self._abundance = array(data_dict.get(Labels.abundance))
    -1611        self._processed_tic = None
    -1612    
    -1613    def __len__(self):
    -1614        
    -1615        return len(self.mz_exp)
    -1616        
    -1617    def __getitem__(self, position):
    -1618        
    -1619        return (self.mz_exp[position], self.abundance[position])
    -1620
    -1621    @property
    -1622    def mz_exp(self):
    -1623        """Return the m/z values of the mass spectrum."""
    -1624        return self._mz_exp 
    -1625
    -1626    @property
    -1627    def abundance(self):
    -1628        """Return the abundance values of the mass spectrum."""
    -1629        return self._abundance
    -1630
    -1631    @property
    -1632    def processed_tic(self):
    -1633        """Return the processed total ion current of the mass spectrum."""
    -1634        return sum(self._processed_tic)
    -1635    
    -1636    @property
    -1637    def tic(self):
    -1638        """Return the total ion current of the mass spectrum."""
    -1639        if self._processed_tic:
    -1640            return self._processed_tic
    -1641        else:
    -1642            return sum(self.abundance)
    -1643    
    -1644    @property
    -1645    def mz_abun_tuples(self):
    -1646        """Return the m/z and abundance values of the mass spectrum as a list of tuples."""
    -1647        r = lambda x: ( int(round(x[0],0), int(round(x[1],0))) )
    -1648
    -1649        return [r(i) for i in self]
    -1650    
    -1651    @property
    -1652    def mz_abun_dict(self):
    -1653        """Return the m/z and abundance values of the mass spectrum as a dictionary."""
    -1654        r = lambda x: int(round(x,0))
    -1655            
    -1656        return { r(i[0]):r(i[1]) for i in self}
    +            
    1668class MassSpecCentroidLowRes(MassSpecCentroid):
    +1669    """A mass spectrum class when the entry point is on low resolution centroid format
    +1670
    +1671    Notes
    +1672    -----
    +1673    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead
    +1674
    +1675    Parameters
    +1676    ----------
    +1677    data_dict : dict {string: numpy array float64 )
    +1678        contains keys [m/z, Abundance, Resolving Power, S/N]
    +1679    d_params : dict{'str': float, int or str}
    +1680        contains the instrument settings and processing settings
    +1681
    +1682    Attributes
    +1683    ----------
    +1684    _processed_tic : float
    +1685        store processed total ion current
    +1686    _abundance : ndarray
    +1687        The abundance values of the mass spectrum.
    +1688    _mz_exp : ndarray
    +1689        The m/z values of the mass spectrum.
    +1690    """
    +1691
    +1692    def __init__(self, data_dict, d_params):
    +1693        self._set_parameters_objects(d_params)
    +1694        self._mz_exp = array(data_dict.get(Labels.mz))
    +1695        self._abundance = array(data_dict.get(Labels.abundance))
    +1696        self._processed_tic = None
    +1697
    +1698    def __len__(self):
    +1699        return len(self.mz_exp)
    +1700
    +1701    def __getitem__(self, position):
    +1702        return (self.mz_exp[position], self.abundance[position])
    +1703
    +1704    @property
    +1705    def mz_exp(self):
    +1706        """Return the m/z values of the mass spectrum."""
    +1707        return self._mz_exp
    +1708
    +1709    @property
    +1710    def abundance(self):
    +1711        """Return the abundance values of the mass spectrum."""
    +1712        return self._abundance
    +1713
    +1714    @property
    +1715    def processed_tic(self):
    +1716        """Return the processed total ion current of the mass spectrum."""
    +1717        return sum(self._processed_tic)
    +1718
    +1719    @property
    +1720    def tic(self):
    +1721        """Return the total ion current of the mass spectrum."""
    +1722        if self._processed_tic:
    +1723            return self._processed_tic
    +1724        else:
    +1725            return sum(self.abundance)
    +1726
    +1727    @property
    +1728    def mz_abun_tuples(self):
    +1729        """Return the m/z and abundance values of the mass spectrum as a list of tuples."""
    +1730        r = lambda x: (int(round(x[0], 0), int(round(x[1], 0))))
    +1731
    +1732        return [r(i) for i in self]
    +1733
    +1734    @property
    +1735    def mz_abun_dict(self):
    +1736        """Return the m/z and abundance values of the mass spectrum as a dictionary."""
    +1737        r = lambda x: int(round(x, 0))
    +1738
    +1739        return {r(i[0]): r(i[1]) for i in self}
     
    @@ -7270,12 +7517,11 @@
    Attributes
    -
    1606    def __init__(self, data_dict, d_params):
    -1607    
    -1608        self._set_parameters_objects(d_params)
    -1609        self._mz_exp = array(data_dict.get(Labels.mz))
    -1610        self._abundance = array(data_dict.get(Labels.abundance))
    -1611        self._processed_tic = None
    +            
    1692    def __init__(self, data_dict, d_params):
    +1693        self._set_parameters_objects(d_params)
    +1694        self._mz_exp = array(data_dict.get(Labels.mz))
    +1695        self._abundance = array(data_dict.get(Labels.abundance))
    +1696        self._processed_tic = None
     
    diff --git a/docs/corems/mass_spectrum/input.html b/docs/corems/mass_spectrum/input.html index ee41a3a6..3bf30db1 100644 --- a/docs/corems/mass_spectrum/input.html +++ b/docs/corems/mass_spectrum/input.html @@ -34,7 +34,6 @@

    Submodules

  • coremsHDF5
  • massList
  • numpyArray
  • -
  • win_only
  • diff --git a/docs/corems/mass_spectrum/input/baseClass.html b/docs/corems/mass_spectrum/input/baseClass.html index bb5c75cd..14168c18 100644 --- a/docs/corems/mass_spectrum/input/baseClass.html +++ b/docs/corems/mass_spectrum/input/baseClass.html @@ -129,477 +129,527 @@

      1__author__ = "Yuri E. Corilo"
       2__date__ = "Nov 11, 2019"
       3
    -  4from io import StringIO, BytesIO
    -  5from pathlib import Path
    -  6from copy import deepcopy
    +  4from copy import deepcopy
    +  5from io import BytesIO
    +  6from pathlib import Path
       7
    -  8from pandas import read_csv, read_pickle, read_excel
    -  9import chardet
    - 10from pandas.core.frame import DataFrame
    - 11from s3path import S3Path
    - 12
    - 13from bs4 import BeautifulSoup
    - 14
    - 15from corems.encapsulation.factory.processingSetting import DataInputSetting
    - 16from corems.encapsulation.factory.parameters import default_parameters
    - 17from corems.encapsulation.constant import Labels
    - 18from corems.encapsulation.input.parameter_from_json import load_and_set_parameters_class, load_and_set_parameters_ms, load_and_set_toml_parameters_class
    - 19
    - 20
    - 21class MassListBaseClass:
    - 22    '''The MassListBaseClass object reads mass list data types and returns the mass spectrum obj
    +  8import chardet
    +  9from bs4 import BeautifulSoup
    + 10from pandas import read_csv, read_excel, read_pickle
    + 11from pandas.core.frame import DataFrame
    + 12from s3path import S3Path
    + 13
    + 14from corems.encapsulation.constant import Labels
    + 15from corems.encapsulation.factory.parameters import default_parameters
    + 16from corems.encapsulation.factory.processingSetting import DataInputSetting
    + 17from corems.encapsulation.input.parameter_from_json import (
    + 18    load_and_set_parameters_class,
    + 19    load_and_set_parameters_ms,
    + 20    load_and_set_toml_parameters_class,
    + 21)
    + 22
      23
    - 24    Parameters
    - 25    ----------
    - 26    file_location : Path or S3Path
    - 27        Full data path.
    - 28    isCentroid : bool, optional
    - 29        Determines the mass spectrum data structure. If set to True, it assumes centroid mode. If set to False, it assumes profile mode and attempts to peak pick. Default is True.
    - 30    analyzer : str, optional
    - 31        The analyzer used for the mass spectrum. Default is 'Unknown'.
    - 32    instrument_label : str, optional
    - 33        The label of the instrument used for the mass spectrum. Default is 'Unknown'.
    - 34    sample_name : str, optional
    - 35        The name of the sample. Default is None.
    - 36    header_lines : int, optional
    - 37        The number of lines to skip in the file, including the column labels line. Default is 0.
    - 38    isThermoProfile : bool, optional
    - 39        Determines the number of expected columns in the file. If set to True, only m/z and intensity columns are expected. Signal-to-noise ratio (S/N) and resolving power (RP) will be calculated based on the data. Default is False.
    - 40    headerless : bool, optional
    - 41        If True, assumes that there are no headers present in the file (e.g., a .xy file from Bruker) and assumes two columns: m/z and intensity. Default is False.
    - 42
    - 43    Attributes
    - 44    ----------
    - 45    parameters : DataInputSetting
    - 46        The data input settings for the mass spectrum.
    - 47    data_type : str
    - 48        The type of data in the file.
    - 49    delimiter : str
    - 50        The delimiter used to read text-based files.
    - 51    
    - 52    Methods
    - 53    -------
    - 54    * set_parameter_from_toml(parameters_path). Sets the data input settings from a TOML file.
    - 55    * set_parameter_from_json(parameters_path). Sets the data input settings from a JSON file.
    - 56    * get_dataframe(). Reads the file and returns the data as a pandas DataFrame.
    - 57    * load_settings(mass_spec_obj, output_parameters). Loads the settings for the mass spectrum.
    - 58    * get_output_parameters(polarity, scan_index=0). Returns the output parameters for the mass spectrum.
    - 59    * clean_data_frame(dataframe). Cleans the data frame by removing columns that are not in the expected columns set.
    - 60
    - 61    '''
    - 62    
    - 63    def __init__(self, file_location:Path|S3Path, isCentroid:bool=True, analyzer:str='Unknown', instrument_label:str='Unknown',
    - 64                 sample_name:str=None, header_lines:int=0, isThermoProfile:bool=False,headerless:bool=False):
    + 24class MassListBaseClass:
    + 25    """The MassListBaseClass object reads mass list data types and returns the mass spectrum obj
    + 26
    + 27    Parameters
    + 28    ----------
    + 29    file_location : Path or S3Path
    + 30        Full data path.
    + 31    isCentroid : bool, optional
    + 32        Determines the mass spectrum data structure. If set to True, it assumes centroid mode. If set to False, it assumes profile mode and attempts to peak pick. Default is True.
    + 33    analyzer : str, optional
    + 34        The analyzer used for the mass spectrum. Default is 'Unknown'.
    + 35    instrument_label : str, optional
    + 36        The label of the instrument used for the mass spectrum. Default is 'Unknown'.
    + 37    sample_name : str, optional
    + 38        The name of the sample. Default is None.
    + 39    header_lines : int, optional
    + 40        The number of lines to skip in the file, including the column labels line. Default is 0.
    + 41    isThermoProfile : bool, optional
    + 42        Determines the number of expected columns in the file. If set to True, only m/z and intensity columns are expected. Signal-to-noise ratio (S/N) and resolving power (RP) will be calculated based on the data. Default is False.
    + 43    headerless : bool, optional
    + 44        If True, assumes that there are no headers present in the file (e.g., a .xy file from Bruker) and assumes two columns: m/z and intensity. Default is False.
    + 45
    + 46    Attributes
    + 47    ----------
    + 48    parameters : DataInputSetting
    + 49        The data input settings for the mass spectrum.
    + 50    data_type : str
    + 51        The type of data in the file.
    + 52    delimiter : str
    + 53        The delimiter used to read text-based files.
    + 54
    + 55    Methods
    + 56    -------
    + 57    * set_parameter_from_toml(parameters_path). Sets the data input settings from a TOML file.
    + 58    * set_parameter_from_json(parameters_path). Sets the data input settings from a JSON file.
    + 59    * get_dataframe(). Reads the file and returns the data as a pandas DataFrame.
    + 60    * load_settings(mass_spec_obj, output_parameters). Loads the settings for the mass spectrum.
    + 61    * get_output_parameters(polarity, scan_index=0). Returns the output parameters for the mass spectrum.
    + 62    * clean_data_frame(dataframe). Cleans the data frame by removing columns that are not in the expected columns set.
    + 63
    + 64    """
      65
    - 66        self.file_location = Path(file_location) if isinstance(file_location, str) else file_location
    - 67        
    - 68        if not self.file_location.exists():
    - 69            raise FileExistsError("File does not exist: %s" % file_location)
    - 70
    - 71        # (newline="\n")
    - 72
    - 73        self.header_lines = header_lines
    - 74
    - 75        if isThermoProfile:
    - 76            
    - 77            self._expected_columns = {Labels.mz, Labels.abundance}
    - 78
    - 79        else:
    + 66    def __init__(
    + 67        self,
    + 68        file_location: Path | S3Path,
    + 69        isCentroid: bool = True,
    + 70        analyzer: str = "Unknown",
    + 71        instrument_label: str = "Unknown",
    + 72        sample_name: str = None,
    + 73        header_lines: int = 0,
    + 74        isThermoProfile: bool = False,
    + 75        headerless: bool = False,
    + 76    ):
    + 77        self.file_location = (
    + 78            Path(file_location) if isinstance(file_location, str) else file_location
    + 79        )
      80
    - 81            self._expected_columns = {Labels.mz, Labels.abundance, Labels.s2n, Labels.rp}
    - 82
    - 83        self._delimiter = None
    - 84
    - 85        self.isCentroid = isCentroid
    - 86
    - 87        self.isThermoProfile = isThermoProfile
    - 88
    - 89        self.headerless = headerless
    + 81        if not self.file_location.exists():
    + 82            raise FileExistsError("File does not exist: %s" % file_location)
    + 83
    + 84        # (newline="\n")
    + 85
    + 86        self.header_lines = header_lines
    + 87
    + 88        if isThermoProfile:
    + 89            self._expected_columns = {Labels.mz, Labels.abundance}
      90
    - 91        self._data_type = None
    - 92
    - 93        self.analyzer = analyzer
    - 94
    - 95        self.instrument_label = instrument_label
    - 96
    - 97        self.sample_name = sample_name
    + 91        else:
    + 92            self._expected_columns = {
    + 93                Labels.mz,
    + 94                Labels.abundance,
    + 95                Labels.s2n,
    + 96                Labels.rp,
    + 97            }
      98
    - 99        self._parameters = deepcopy(DataInputSetting())
    + 99        self._delimiter = None
     100
    -101    @property
    -102    def parameters(self):
    -103        return self._parameters
    +101        self.isCentroid = isCentroid
    +102
    +103        self.isThermoProfile = isThermoProfile
     104
    -105    @parameters.setter
    -106    def parameters(self, instance_DataInputSetting):
    -107        self._parameters = instance_DataInputSetting
    -108    
    -109    def set_parameter_from_toml(self, parameters_path):
    -110        self._parameters = load_and_set_toml_parameters_class(
    -111            'DataInput', self.parameters, parameters_path=parameters_path)
    +105        self.headerless = headerless
    +106
    +107        self._data_type = None
    +108
    +109        self.analyzer = analyzer
    +110
    +111        self.instrument_label = instrument_label
     112
    -113    def set_parameter_from_json(self, parameters_path):
    -114        self._parameters = load_and_set_parameters_class(
    -115            'DataInput', self.parameters, parameters_path=parameters_path)
    +113        self.sample_name = sample_name
    +114
    +115        self._parameters = deepcopy(DataInputSetting())
     116
     117    @property
    -118    def data_type(self):
    -119        return self._data_type
    +118    def parameters(self):
    +119        return self._parameters
     120
    -121    @data_type.setter
    -122    def data_type(self, data_type):
    -123        self._data_type = data_type
    +121    @parameters.setter
    +122    def parameters(self, instance_DataInputSetting):
    +123        self._parameters = instance_DataInputSetting
     124
    -125    @property
    -126    def delimiter(self):
    -127        return self._delimiter
    -128
    -129    @delimiter.setter
    -130    def delimiter(self, delimiter):
    -131        self._delimiter = delimiter
    -132
    -133    
    -134    def encoding_detector(self, file_location) -> str:
    -135        """
    -136        Detects the encoding of a file.
    -137
    -138        Parameters
    -139        --------
    -140        file_location : str
    -141            The location of the file to be analyzed.
    +125    def set_parameter_from_toml(self, parameters_path):
    +126        self._parameters = load_and_set_toml_parameters_class(
    +127            "DataInput", self.parameters, parameters_path=parameters_path
    +128        )
    +129
    +130    def set_parameter_from_json(self, parameters_path):
    +131        self._parameters = load_and_set_parameters_class(
    +132            "DataInput", self.parameters, parameters_path=parameters_path
    +133        )
    +134
    +135    @property
    +136    def data_type(self):
    +137        return self._data_type
    +138
    +139    @data_type.setter
    +140    def data_type(self, data_type):
    +141        self._data_type = data_type
     142
    -143        Returns
    -144        --------
    -145        str 
    -146            The detected encoding of the file.
    -147        """
    -148
    -149        with file_location.open('rb') as rawdata:
    -150            result = chardet.detect(rawdata.read(10000))
    -151        return result['encoding']
    -152
    -153    def set_data_type(self):
    -154        """
    -155        Set the data type and delimiter based on the file extension.
    -156
    -157        Raises
    -158        ------
    -159        TypeError
    -160            If the data type could not be automatically recognized.
    -161        """
    -162        if self.file_location.suffix == '.csv':
    -163            self.data_type = 'txt'
    -164            self.delimiter = ','
    -165        elif self.file_location.suffix == '.txt':
    -166            self.data_type = 'txt'
    -167            self.delimiter = '\t'
    -168        elif self.file_location.suffix == '.tsv':
    -169            self.data_type = 'txt'
    -170            self.delimiter = '\t'
    -171        elif self.file_location.suffix == '.xlsx':
    -172            self.data_type = 'excel'
    -173        elif self.file_location.suffix == '.ascii':
    -174            self.data_type = 'txt'
    -175            self.delimiter = '  '
    -176        elif self.file_location.suffix == '.pkl':
    -177            self.data_type = 'dataframe'
    -178        elif self.file_location.suffix == '.pks':
    -179            self.data_type = 'pks'
    -180            self.delimiter = '          '
    -181            self.header_lines = 9
    -182        elif self.file_location.suffix == '.xml':
    -183            self.data_type = 'xml'
    -184            # self.delimiter = None
    -185            # self.header_lines = None
    -186        elif self.file_location.suffix == '.xy':
    -187            self.data_type = 'txt'
    -188            self.delimiter = ' '
    -189            self.header_lines = None
    -190        else:
    -191            raise TypeError(
    -192                "Data type could not be automatically recognized for %s; please set data type and delimiter manually." % self.file_location.name)
    -193
    -194    def get_dataframe(self) -> DataFrame:
    -195            """
    -196            Get the data as a pandas DataFrame.
    -197
    -198            Returns
    -199            -------
    -200            pandas.DataFrame
    -201                The data as a pandas DataFrame.
    -202
    -203            Raises
    -204            ------
    -205            TypeError
    -206                If the data type is not supported.
    -207            """
    -208        
    -209            if not self.data_type or not self.delimiter:
    -210                self.set_data_type()
    -211
    -212            if isinstance(self.file_location, S3Path):
    -213                data = BytesIO(self.file_location.open('rb').read())
    -214            else:
    -215                data = self.file_location
    +143    @property
    +144    def delimiter(self):
    +145        return self._delimiter
    +146
    +147    @delimiter.setter
    +148    def delimiter(self, delimiter):
    +149        self._delimiter = delimiter
    +150
    +151    def encoding_detector(self, file_location) -> str:
    +152        """
    +153        Detects the encoding of a file.
    +154
    +155        Parameters
    +156        --------
    +157        file_location : str
    +158            The location of the file to be analyzed.
    +159
    +160        Returns
    +161        --------
    +162        str
    +163            The detected encoding of the file.
    +164        """
    +165
    +166        with file_location.open("rb") as rawdata:
    +167            result = chardet.detect(rawdata.read(10000))
    +168        return result["encoding"]
    +169
    +170    def set_data_type(self):
    +171        """
    +172        Set the data type and delimiter based on the file extension.
    +173
    +174        Raises
    +175        ------
    +176        TypeError
    +177            If the data type could not be automatically recognized.
    +178        """
    +179        if self.file_location.suffix == ".csv":
    +180            self.data_type = "txt"
    +181            self.delimiter = ","
    +182        elif self.file_location.suffix == ".txt":
    +183            self.data_type = "txt"
    +184            self.delimiter = "\t"
    +185        elif self.file_location.suffix == ".tsv":
    +186            self.data_type = "txt"
    +187            self.delimiter = "\t"
    +188        elif self.file_location.suffix == ".xlsx":
    +189            self.data_type = "excel"
    +190        elif self.file_location.suffix == ".ascii":
    +191            self.data_type = "txt"
    +192            self.delimiter = "  "
    +193        elif self.file_location.suffix == ".pkl":
    +194            self.data_type = "dataframe"
    +195        elif self.file_location.suffix == ".pks":
    +196            self.data_type = "pks"
    +197            self.delimiter = "          "
    +198            self.header_lines = 9
    +199        elif self.file_location.suffix == ".xml":
    +200            self.data_type = "xml"
    +201            # self.delimiter = None
    +202            # self.header_lines = None
    +203        elif self.file_location.suffix == ".xy":
    +204            self.data_type = "txt"
    +205            self.delimiter = " "
    +206            self.header_lines = None
    +207        else:
    +208            raise TypeError(
    +209                "Data type could not be automatically recognized for %s; please set data type and delimiter manually."
    +210                % self.file_location.name
    +211            )
    +212
    +213    def get_dataframe(self) -> DataFrame:
    +214        """
    +215        Get the data as a pandas DataFrame.
     216
    -217            if self.data_type == 'txt':
    -218                if self.headerless:
    -219                    dataframe = read_csv(data,  skiprows=self.header_lines, delimiter=self.delimiter, header=None, names=['m/z','I'],
    -220                                encoding=self.encoding_detector(self.file_location), engine='python')
    -221                else:
    -222                    dataframe = read_csv(data,  skiprows=self.header_lines, delimiter=self.delimiter,
    -223                                encoding=self.encoding_detector(self.file_location), engine='python')
    -224
    -225            elif self.data_type == 'pks':
    -226                names=["m/z", "I", "Scaled Peak Height", "Resolving Power", "Frequency", 'S/N']
    -227                clean_data = []
    -228                with self.file_location.open() as maglabfile:
    -229                    for i in  maglabfile.readlines()[8:-1]:
    -230                        clean_data.append(i.split())
    -231                dataframe = DataFrame(clean_data, columns=names)
    -232
    -233            elif self.data_type == 'dataframe':
    -234                dataframe = read_pickle(data)
    +217        Returns
    +218        -------
    +219        pandas.DataFrame
    +220            The data as a pandas DataFrame.
    +221
    +222        Raises
    +223        ------
    +224        TypeError
    +225            If the data type is not supported.
    +226        """
    +227
    +228        if not self.data_type or not self.delimiter:
    +229            self.set_data_type()
    +230
    +231        if isinstance(self.file_location, S3Path):
    +232            data = BytesIO(self.file_location.open("rb").read())
    +233        else:
    +234            data = self.file_location
     235
    -236            elif self.data_type == 'excel':
    -237                dataframe = read_excel(data)
    -238
    -239            elif self.data_type == 'xml':
    -240                dataframe = self.read_xml_peaks(data)
    -241
    -242            else:
    -243                raise TypeError('Data type %s is not supported' % self.data_type)
    -244
    -245            return dataframe
    -246
    -247    def load_settings(self, mass_spec_obj, output_parameters):
    -248        """
    -249        #TODO loading output parameters from json file is not functional
    -250        Load settings from a JSON file and apply them to the given mass_spec_obj.
    -251
    -252        Parameters
    -253        ----------
    -254        mass_spec_obj : MassSpec
    -255            The mass spectrum object to apply the settings to.
    -256
    -257        """
    -258        import json
    -259        import warnings
    -260
    -261        settings_file_path = self.file_location.with_suffix('.json')
    -262
    -263        if settings_file_path.exists():
    -264
    -265            self._parameters = load_and_set_parameters_class(
    -266                'DataInput', self._parameters, parameters_path=settings_file_path)
    -267
    -268            load_and_set_parameters_ms(
    -269                mass_spec_obj, parameters_path=settings_file_path)
    +236        if self.data_type == "txt":
    +237            if self.headerless:
    +238                dataframe = read_csv(
    +239                    data,
    +240                    skiprows=self.header_lines,
    +241                    delimiter=self.delimiter,
    +242                    header=None,
    +243                    names=["m/z", "I"],
    +244                    encoding=self.encoding_detector(self.file_location),
    +245                    engine="python",
    +246                )
    +247            else:
    +248                dataframe = read_csv(
    +249                    data,
    +250                    skiprows=self.header_lines,
    +251                    delimiter=self.delimiter,
    +252                    encoding=self.encoding_detector(self.file_location),
    +253                    engine="python",
    +254                )
    +255
    +256        elif self.data_type == "pks":
    +257            names = [
    +258                "m/z",
    +259                "I",
    +260                "Scaled Peak Height",
    +261                "Resolving Power",
    +262                "Frequency",
    +263                "S/N",
    +264            ]
    +265            clean_data = []
    +266            with self.file_location.open() as maglabfile:
    +267                for i in maglabfile.readlines()[8:-1]:
    +268                    clean_data.append(i.split())
    +269            dataframe = DataFrame(clean_data, columns=names)
     270
    -271        else:
    -272
    -273            warnings.warn(
    -274                "auto settings loading is enabled but could not locate the file:  %s. Please load the settings manually" % settings_file_path)
    -275
    -276        # TODO this will load the setting from SettingCoreMS.json
    -277        # coreMSHFD5 overrides this function to import the attrs stored in the h5 file
    -278        #loaded_settings = {}
    -279        #loaded_settings['MoleculaSearch'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MoleculaSearchSetting')
    -280        #loaded_settings['MassSpecPeak'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MassSpecPeakSetting')
    -281
    -282        #loaded_settings['MassSpectrum'] = self.get_scan_group_attr_data(scan_index, time_index, 'MassSpectrumSetting')
    -283        #loaded_settings['Transient'] = self.get_scan_group_attr_data(scan_index, time_index, 'TransientSetting')
    +271        elif self.data_type == "dataframe":
    +272            dataframe = read_pickle(data)
    +273
    +274        elif self.data_type == "excel":
    +275            dataframe = read_excel(data)
    +276
    +277        elif self.data_type == "xml":
    +278            dataframe = self.read_xml_peaks(data)
    +279
    +280        else:
    +281            raise TypeError("Data type %s is not supported" % self.data_type)
    +282
    +283        return dataframe
     284
    -285    def get_output_parameters(self, polarity:int, scan_index:int=0) -> dict:
    -286            """
    -287            Get the output parameters for the mass spectrum.
    -288
    -289            Parameters
    -290            ----------
    -291            polarity : int
    -292                The polarity of the mass spectrum +1 or -1.
    -293            scan_index : int, optional
    -294                The index of the scan. Default is 0.
    -295
    -296            Returns
    -297            -------
    -298            dict
    -299                A dictionary containing the output parameters.
    +285    def load_settings(self, mass_spec_obj, output_parameters):
    +286        """
    +287        #TODO loading output parameters from json file is not functional
    +288        Load settings from a JSON file and apply them to the given mass_spec_obj.
    +289
    +290        Parameters
    +291        ----------
    +292        mass_spec_obj : MassSpec
    +293            The mass spectrum object to apply the settings to.
    +294
    +295        """
    +296        import json
    +297        import warnings
    +298
    +299        settings_file_path = self.file_location.with_suffix(".json")
     300
    -301            """
    -302            from copy import deepcopy
    -303
    -304            output_parameters = default_parameters(self.file_location)
    +301        if settings_file_path.exists():
    +302            self._parameters = load_and_set_parameters_class(
    +303                "DataInput", self._parameters, parameters_path=settings_file_path
    +304            )
     305
    -306            if self.isCentroid:
    -307                output_parameters['label'] = Labels.corems_centroid
    -308            else:
    -309                output_parameters['label'] = Labels.bruker_profile
    -310
    -311            output_parameters['analyzer'] = self.analyzer
    -312
    -313            output_parameters['instrument_label'] = self.instrument_label
    -314
    -315            output_parameters['sample_name'] = self.sample_name
    -316
    -317            output_parameters["Aterm"] = None
    -318
    -319            output_parameters["Bterm"] = None
    -320
    -321            output_parameters["Cterm"] = None
    -322
    -323            output_parameters["polarity"] = polarity
    +306            load_and_set_parameters_ms(
    +307                mass_spec_obj, parameters_path=settings_file_path
    +308            )
    +309
    +310        else:
    +311            warnings.warn(
    +312                "auto settings loading is enabled but could not locate the file:  %s. Please load the settings manually"
    +313                % settings_file_path
    +314            )
    +315
    +316        # TODO this will load the setting from SettingCoreMS.json
    +317        # coreMSHFD5 overrides this function to import the attrs stored in the h5 file
    +318        # loaded_settings = {}
    +319        # loaded_settings['MoleculaSearch'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MoleculaSearchSetting')
    +320        # loaded_settings['MassSpecPeak'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MassSpecPeakSetting')
    +321
    +322        # loaded_settings['MassSpectrum'] = self.get_scan_group_attr_data(scan_index, time_index, 'MassSpectrumSetting')
    +323        # loaded_settings['Transient'] = self.get_scan_group_attr_data(scan_index, time_index, 'TransientSetting')
     324
    -325            #scan_number and rt will be need to lc ms====
    -326
    -327            output_parameters["mobility_scan"] = 0
    +325    def get_output_parameters(self, polarity: int, scan_index: int = 0) -> dict:
    +326        """
    +327        Get the output parameters for the mass spectrum.
     328
    -329            output_parameters["mobility_rt"] = 0
    -330
    -331            output_parameters["scan_number"] = scan_index
    -332
    -333            output_parameters["rt"] = 0
    -334
    -335            return output_parameters
    -336
    -337    def clean_data_frame(self, dataframe):
    -338            """
    -339            Clean the input dataframe by removing columns that are not expected.
    +329        Parameters
    +330        ----------
    +331        polarity : int
    +332            The polarity of the mass spectrum +1 or -1.
    +333        scan_index : int, optional
    +334            The index of the scan. Default is 0.
    +335
    +336        Returns
    +337        -------
    +338        dict
    +339            A dictionary containing the output parameters.
     340
    -341            Parameters
    -342            ----------
    -343            pandas.DataFrame
    -344                The input dataframe to be cleaned.
    +341        """
    +342        from copy import deepcopy
    +343
    +344        output_parameters = default_parameters(self.file_location)
     345
    -346            """
    -347            
    -348            for column_name in dataframe.columns:
    -349
    -350                expected_column_name = self.parameters.header_translate.get(
    -351                    column_name)
    -352                if expected_column_name not in self._expected_columns:
    -353
    -354                    del dataframe[column_name]
    -355
    -356    def check_columns(self, header_labels: list[str]):
    -357        """
    -358        Check if the given header labels match the expected columns.
    -359
    -360        Parameters
    -361        ----------
    -362        header_labels : list
    -363            The header labels to be checked.
    -364            
    -365        Raises
    -366        ------
    -367        Exception
    -368            If any expected column is not found in the header labels.
    -369        """
    -370        found_label = set()
    -371
    -372        for label in header_labels:
    -373            if not label in self._expected_columns:
    -374                user_column_name = self.parameters.header_translate.get(label)
    -375                if user_column_name in self._expected_columns:
    -376                    found_label.add(user_column_name)
    -377            else:
    -378                found_label.add(label)
    -379
    -380        not_found = self._expected_columns - found_label
    -381
    -382        if len(not_found) > 0:
    -383            raise Exception("Please make sure to include the columns %s" % ', '.join(not_found))
    -384
    -385    def read_xml_peaks(self, data:str) -> DataFrame:
    -386            '''
    -387            Read peaks from a Bruker .xml file and return a pandas DataFrame.
    -388
    -389            Parameters
    -390            ----------
    -391            data : str
    -392                The path to the .xml file.
    -393
    -394            Returns
    -395            -------
    -396            pandas.DataFrame
    -397                A DataFrame containing the peak data with columns: 'm/z', 'I', 'Resolving Power', 'Area', 'S/N', 'fwhm'.
    -398            '''
    -399            from numpy import nan
    -400            with open(data, "r") as file:
    -401                content = file.readlines()
    -402                content = "".join(content)
    -403                bs_content = BeautifulSoup(content, features='xml')
    -404            peaks_xml = bs_content.find_all("pk")   
    -405
    -406            # initialise lists of the peak variables
    -407            areas = []
    -408            fwhms = []
    -409            intensities = []
    -410            mzs = []
    -411            res = []
    -412            sn = []
    -413            #iterate through the peaks appending to each list
    -414            for peak in peaks_xml:
    -415                areas.append(float(peak.get('a', nan)))      # Use a default value if key 'a' is missing
    -416                fwhms.append(float(peak.get('fwhm', nan)))   # Use a default value if key 'fwhm' is missing
    -417                intensities.append(float(peak.get('i', nan))) # Use a default value if key 'i' is missing
    -418                mzs.append(float(peak.get('mz', nan)))       # Use a default value if key 'mz' is missing
    -419                res.append(float(peak.get('res', nan)))      # Use a default value if key 'res' is missing
    -420                sn.append(float(peak.get('sn', nan)))       # Use a default value if key 'sn' is missing
    -421
    -422            #Compile pandas dataframe of these values    
    -423            names=["m/z", "I", "Resolving Power", "Area", 'S/N','fwhm']    
    -424            df = DataFrame(columns = names,dtype=float)
    -425            df['m/z'] = mzs
    -426            df['I'] = intensities
    -427            df['Resolving Power'] = res
    -428            df['Area'] = areas
    -429            df['S/N'] = sn
    -430            df['fwhm'] = fwhms
    -431            return df
    +346        if self.isCentroid:
    +347            output_parameters["label"] = Labels.corems_centroid
    +348        else:
    +349            output_parameters["label"] = Labels.bruker_profile
    +350
    +351        output_parameters["analyzer"] = self.analyzer
    +352
    +353        output_parameters["instrument_label"] = self.instrument_label
    +354
    +355        output_parameters["sample_name"] = self.sample_name
    +356
    +357        output_parameters["Aterm"] = None
    +358
    +359        output_parameters["Bterm"] = None
    +360
    +361        output_parameters["Cterm"] = None
    +362
    +363        output_parameters["polarity"] = polarity
    +364
    +365        # scan_number and rt will be need to lc ms====
    +366
    +367        output_parameters["mobility_scan"] = 0
    +368
    +369        output_parameters["mobility_rt"] = 0
    +370
    +371        output_parameters["scan_number"] = scan_index
    +372
    +373        output_parameters["rt"] = 0
    +374
    +375        return output_parameters
    +376
    +377    def clean_data_frame(self, dataframe):
    +378        """
    +379        Clean the input dataframe by removing columns that are not expected.
    +380
    +381        Parameters
    +382        ----------
    +383        pandas.DataFrame
    +384            The input dataframe to be cleaned.
    +385
    +386        """
    +387
    +388        for column_name in dataframe.columns:
    +389            expected_column_name = self.parameters.header_translate.get(column_name)
    +390            if expected_column_name not in self._expected_columns:
    +391                del dataframe[column_name]
    +392
    +393    def check_columns(self, header_labels: list[str]):
    +394        """
    +395        Check if the given header labels match the expected columns.
    +396
    +397        Parameters
    +398        ----------
    +399        header_labels : list
    +400            The header labels to be checked.
    +401
    +402        Raises
    +403        ------
    +404        Exception
    +405            If any expected column is not found in the header labels.
    +406        """
    +407        found_label = set()
    +408
    +409        for label in header_labels:
    +410            if not label in self._expected_columns:
    +411                user_column_name = self.parameters.header_translate.get(label)
    +412                if user_column_name in self._expected_columns:
    +413                    found_label.add(user_column_name)
    +414            else:
    +415                found_label.add(label)
    +416
    +417        not_found = self._expected_columns - found_label
    +418
    +419        if len(not_found) > 0:
    +420            raise Exception(
    +421                "Please make sure to include the columns %s" % ", ".join(not_found)
    +422            )
    +423
    +424    def read_xml_peaks(self, data: str) -> DataFrame:
    +425        """
    +426        Read peaks from a Bruker .xml file and return a pandas DataFrame.
    +427
    +428        Parameters
    +429        ----------
    +430        data : str
    +431            The path to the .xml file.
     432
    -433    def get_xml_polarity(self):
    -434            """
    -435            Get the polarity from an XML peaklist.
    -436
    -437            Returns
    -438            -------
    -439            int
    -440                The polarity of the XML peaklist. Returns -1 for negative polarity, +1 for positive polarity.
    -441
    -442            Raises
    -443            ------
    -444            Exception
    -445                If the data type is not XML peaklist in Bruker format or if the polarity is unhandled.
    -446            """
    -447            
    -448            # Check its an actual xml
    -449            if not self.data_type or not self.delimiter:
    -450
    -451                self.set_data_type()
    -452
    -453            if isinstance(self.file_location, S3Path):
    -454                # data = self.file_location.open('rb').read()
    -455                data = BytesIO(self.file_location.open('rb').read())
    -456            
    -457            else:
    -458                data = self.file_location
    -459
    -460            if self.data_type != 'xml':
    -461                raise Exception ("This function is only for XML peaklists (Bruker format)")
    -462
    -463            with open(data, "r") as file:
    -464                content = file.readlines()
    -465                content = "".join(content)
    -466                bs_content = BeautifulSoup(content, features='xml')
    -467            polarity = bs_content.find_all("ms_spectrum")[0]['polarity']
    -468            if polarity == '-':
    -469                return -1
    -470            elif polarity == '+':
    -471                return +1
    -472            else:
    -473                raise Exception("Polarity %s unhandled" % polarity)
    -474            
    +433        Returns
    +434        -------
    +435        pandas.DataFrame
    +436            A DataFrame containing the peak data with columns: 'm/z', 'I', 'Resolving Power', 'Area', 'S/N', 'fwhm'.
    +437        """
    +438        from numpy import nan
    +439
    +440        with open(data, "r") as file:
    +441            content = file.readlines()
    +442            content = "".join(content)
    +443            bs_content = BeautifulSoup(content, features="xml")
    +444        peaks_xml = bs_content.find_all("pk")
    +445
    +446        # initialise lists of the peak variables
    +447        areas = []
    +448        fwhms = []
    +449        intensities = []
    +450        mzs = []
    +451        res = []
    +452        sn = []
    +453        # iterate through the peaks appending to each list
    +454        for peak in peaks_xml:
    +455            areas.append(
    +456                float(peak.get("a", nan))
    +457            )  # Use a default value if key 'a' is missing
    +458            fwhms.append(
    +459                float(peak.get("fwhm", nan))
    +460            )  # Use a default value if key 'fwhm' is missing
    +461            intensities.append(
    +462                float(peak.get("i", nan))
    +463            )  # Use a default value if key 'i' is missing
    +464            mzs.append(
    +465                float(peak.get("mz", nan))
    +466            )  # Use a default value if key 'mz' is missing
    +467            res.append(
    +468                float(peak.get("res", nan))
    +469            )  # Use a default value if key 'res' is missing
    +470            sn.append(
    +471                float(peak.get("sn", nan))
    +472            )  # Use a default value if key 'sn' is missing
    +473
    +474        # Compile pandas dataframe of these values
    +475        names = ["m/z", "I", "Resolving Power", "Area", "S/N", "fwhm"]
    +476        df = DataFrame(columns=names, dtype=float)
    +477        df["m/z"] = mzs
    +478        df["I"] = intensities
    +479        df["Resolving Power"] = res
    +480        df["Area"] = areas
    +481        df["S/N"] = sn
    +482        df["fwhm"] = fwhms
    +483        return df
    +484
    +485    def get_xml_polarity(self):
    +486        """
    +487        Get the polarity from an XML peaklist.
    +488
    +489        Returns
    +490        -------
    +491        int
    +492            The polarity of the XML peaklist. Returns -1 for negative polarity, +1 for positive polarity.
    +493
    +494        Raises
    +495        ------
    +496        Exception
    +497            If the data type is not XML peaklist in Bruker format or if the polarity is unhandled.
    +498        """
    +499
    +500        # Check its an actual xml
    +501        if not self.data_type or not self.delimiter:
    +502            self.set_data_type()
    +503
    +504        if isinstance(self.file_location, S3Path):
    +505            # data = self.file_location.open('rb').read()
    +506            data = BytesIO(self.file_location.open("rb").read())
    +507
    +508        else:
    +509            data = self.file_location
    +510
    +511        if self.data_type != "xml":
    +512            raise Exception("This function is only for XML peaklists (Bruker format)")
    +513
    +514        with open(data, "r") as file:
    +515            content = file.readlines()
    +516            content = "".join(content)
    +517            bs_content = BeautifulSoup(content, features="xml")
    +518        polarity = bs_content.find_all("ms_spectrum")[0]["polarity"]
    +519        if polarity == "-":
    +520            return -1
    +521        elif polarity == "+":
    +522            return +1
    +523        else:
    +524            raise Exception("Polarity %s unhandled" % polarity)
     
    @@ -615,459 +665,507 @@

    -
     23class MassListBaseClass:
    - 24    '''The MassListBaseClass object reads mass list data types and returns the mass spectrum obj
    - 25
    - 26    Parameters
    - 27    ----------
    - 28    file_location : Path or S3Path
    - 29        Full data path.
    - 30    isCentroid : bool, optional
    - 31        Determines the mass spectrum data structure. If set to True, it assumes centroid mode. If set to False, it assumes profile mode and attempts to peak pick. Default is True.
    - 32    analyzer : str, optional
    - 33        The analyzer used for the mass spectrum. Default is 'Unknown'.
    - 34    instrument_label : str, optional
    - 35        The label of the instrument used for the mass spectrum. Default is 'Unknown'.
    - 36    sample_name : str, optional
    - 37        The name of the sample. Default is None.
    - 38    header_lines : int, optional
    - 39        The number of lines to skip in the file, including the column labels line. Default is 0.
    - 40    isThermoProfile : bool, optional
    - 41        Determines the number of expected columns in the file. If set to True, only m/z and intensity columns are expected. Signal-to-noise ratio (S/N) and resolving power (RP) will be calculated based on the data. Default is False.
    - 42    headerless : bool, optional
    - 43        If True, assumes that there are no headers present in the file (e.g., a .xy file from Bruker) and assumes two columns: m/z and intensity. Default is False.
    - 44
    - 45    Attributes
    - 46    ----------
    - 47    parameters : DataInputSetting
    - 48        The data input settings for the mass spectrum.
    - 49    data_type : str
    - 50        The type of data in the file.
    - 51    delimiter : str
    - 52        The delimiter used to read text-based files.
    - 53    
    - 54    Methods
    - 55    -------
    - 56    * set_parameter_from_toml(parameters_path). Sets the data input settings from a TOML file.
    - 57    * set_parameter_from_json(parameters_path). Sets the data input settings from a JSON file.
    - 58    * get_dataframe(). Reads the file and returns the data as a pandas DataFrame.
    - 59    * load_settings(mass_spec_obj, output_parameters). Loads the settings for the mass spectrum.
    - 60    * get_output_parameters(polarity, scan_index=0). Returns the output parameters for the mass spectrum.
    - 61    * clean_data_frame(dataframe). Cleans the data frame by removing columns that are not in the expected columns set.
    - 62
    - 63    '''
    - 64    
    - 65    def __init__(self, file_location:Path|S3Path, isCentroid:bool=True, analyzer:str='Unknown', instrument_label:str='Unknown',
    - 66                 sample_name:str=None, header_lines:int=0, isThermoProfile:bool=False,headerless:bool=False):
    - 67
    - 68        self.file_location = Path(file_location) if isinstance(file_location, str) else file_location
    - 69        
    - 70        if not self.file_location.exists():
    - 71            raise FileExistsError("File does not exist: %s" % file_location)
    - 72
    - 73        # (newline="\n")
    - 74
    - 75        self.header_lines = header_lines
    - 76
    - 77        if isThermoProfile:
    - 78            
    - 79            self._expected_columns = {Labels.mz, Labels.abundance}
    - 80
    - 81        else:
    - 82
    - 83            self._expected_columns = {Labels.mz, Labels.abundance, Labels.s2n, Labels.rp}
    +            
     25class MassListBaseClass:
    + 26    """The MassListBaseClass object reads mass list data types and returns the mass spectrum obj
    + 27
    + 28    Parameters
    + 29    ----------
    + 30    file_location : Path or S3Path
    + 31        Full data path.
    + 32    isCentroid : bool, optional
    + 33        Determines the mass spectrum data structure. If set to True, it assumes centroid mode. If set to False, it assumes profile mode and attempts to peak pick. Default is True.
    + 34    analyzer : str, optional
    + 35        The analyzer used for the mass spectrum. Default is 'Unknown'.
    + 36    instrument_label : str, optional
    + 37        The label of the instrument used for the mass spectrum. Default is 'Unknown'.
    + 38    sample_name : str, optional
    + 39        The name of the sample. Default is None.
    + 40    header_lines : int, optional
    + 41        The number of lines to skip in the file, including the column labels line. Default is 0.
    + 42    isThermoProfile : bool, optional
    + 43        Determines the number of expected columns in the file. If set to True, only m/z and intensity columns are expected. Signal-to-noise ratio (S/N) and resolving power (RP) will be calculated based on the data. Default is False.
    + 44    headerless : bool, optional
    + 45        If True, assumes that there are no headers present in the file (e.g., a .xy file from Bruker) and assumes two columns: m/z and intensity. Default is False.
    + 46
    + 47    Attributes
    + 48    ----------
    + 49    parameters : DataInputSetting
    + 50        The data input settings for the mass spectrum.
    + 51    data_type : str
    + 52        The type of data in the file.
    + 53    delimiter : str
    + 54        The delimiter used to read text-based files.
    + 55
    + 56    Methods
    + 57    -------
    + 58    * set_parameter_from_toml(parameters_path). Sets the data input settings from a TOML file.
    + 59    * set_parameter_from_json(parameters_path). Sets the data input settings from a JSON file.
    + 60    * get_dataframe(). Reads the file and returns the data as a pandas DataFrame.
    + 61    * load_settings(mass_spec_obj, output_parameters). Loads the settings for the mass spectrum.
    + 62    * get_output_parameters(polarity, scan_index=0). Returns the output parameters for the mass spectrum.
    + 63    * clean_data_frame(dataframe). Cleans the data frame by removing columns that are not in the expected columns set.
    + 64
    + 65    """
    + 66
    + 67    def __init__(
    + 68        self,
    + 69        file_location: Path | S3Path,
    + 70        isCentroid: bool = True,
    + 71        analyzer: str = "Unknown",
    + 72        instrument_label: str = "Unknown",
    + 73        sample_name: str = None,
    + 74        header_lines: int = 0,
    + 75        isThermoProfile: bool = False,
    + 76        headerless: bool = False,
    + 77    ):
    + 78        self.file_location = (
    + 79            Path(file_location) if isinstance(file_location, str) else file_location
    + 80        )
    + 81
    + 82        if not self.file_location.exists():
    + 83            raise FileExistsError("File does not exist: %s" % file_location)
      84
    - 85        self._delimiter = None
    + 85        # (newline="\n")
      86
    - 87        self.isCentroid = isCentroid
    + 87        self.header_lines = header_lines
      88
    - 89        self.isThermoProfile = isThermoProfile
    - 90
    - 91        self.headerless = headerless
    - 92
    - 93        self._data_type = None
    - 94
    - 95        self.analyzer = analyzer
    - 96
    - 97        self.instrument_label = instrument_label
    - 98
    - 99        self.sample_name = sample_name
    -100
    -101        self._parameters = deepcopy(DataInputSetting())
    -102
    -103    @property
    -104    def parameters(self):
    -105        return self._parameters
    -106
    -107    @parameters.setter
    -108    def parameters(self, instance_DataInputSetting):
    -109        self._parameters = instance_DataInputSetting
    -110    
    -111    def set_parameter_from_toml(self, parameters_path):
    -112        self._parameters = load_and_set_toml_parameters_class(
    -113            'DataInput', self.parameters, parameters_path=parameters_path)
    -114
    -115    def set_parameter_from_json(self, parameters_path):
    -116        self._parameters = load_and_set_parameters_class(
    -117            'DataInput', self.parameters, parameters_path=parameters_path)
    -118
    -119    @property
    -120    def data_type(self):
    -121        return self._data_type
    -122
    -123    @data_type.setter
    -124    def data_type(self, data_type):
    -125        self._data_type = data_type
    -126
    -127    @property
    -128    def delimiter(self):
    -129        return self._delimiter
    + 89        if isThermoProfile:
    + 90            self._expected_columns = {Labels.mz, Labels.abundance}
    + 91
    + 92        else:
    + 93            self._expected_columns = {
    + 94                Labels.mz,
    + 95                Labels.abundance,
    + 96                Labels.s2n,
    + 97                Labels.rp,
    + 98            }
    + 99
    +100        self._delimiter = None
    +101
    +102        self.isCentroid = isCentroid
    +103
    +104        self.isThermoProfile = isThermoProfile
    +105
    +106        self.headerless = headerless
    +107
    +108        self._data_type = None
    +109
    +110        self.analyzer = analyzer
    +111
    +112        self.instrument_label = instrument_label
    +113
    +114        self.sample_name = sample_name
    +115
    +116        self._parameters = deepcopy(DataInputSetting())
    +117
    +118    @property
    +119    def parameters(self):
    +120        return self._parameters
    +121
    +122    @parameters.setter
    +123    def parameters(self, instance_DataInputSetting):
    +124        self._parameters = instance_DataInputSetting
    +125
    +126    def set_parameter_from_toml(self, parameters_path):
    +127        self._parameters = load_and_set_toml_parameters_class(
    +128            "DataInput", self.parameters, parameters_path=parameters_path
    +129        )
     130
    -131    @delimiter.setter
    -132    def delimiter(self, delimiter):
    -133        self._delimiter = delimiter
    -134
    -135    
    -136    def encoding_detector(self, file_location) -> str:
    -137        """
    -138        Detects the encoding of a file.
    +131    def set_parameter_from_json(self, parameters_path):
    +132        self._parameters = load_and_set_parameters_class(
    +133            "DataInput", self.parameters, parameters_path=parameters_path
    +134        )
    +135
    +136    @property
    +137    def data_type(self):
    +138        return self._data_type
     139
    -140        Parameters
    -141        --------
    -142        file_location : str
    -143            The location of the file to be analyzed.
    -144
    -145        Returns
    -146        --------
    -147        str 
    -148            The detected encoding of the file.
    -149        """
    -150
    -151        with file_location.open('rb') as rawdata:
    -152            result = chardet.detect(rawdata.read(10000))
    -153        return result['encoding']
    -154
    -155    def set_data_type(self):
    -156        """
    -157        Set the data type and delimiter based on the file extension.
    -158
    -159        Raises
    -160        ------
    -161        TypeError
    -162            If the data type could not be automatically recognized.
    -163        """
    -164        if self.file_location.suffix == '.csv':
    -165            self.data_type = 'txt'
    -166            self.delimiter = ','
    -167        elif self.file_location.suffix == '.txt':
    -168            self.data_type = 'txt'
    -169            self.delimiter = '\t'
    -170        elif self.file_location.suffix == '.tsv':
    -171            self.data_type = 'txt'
    -172            self.delimiter = '\t'
    -173        elif self.file_location.suffix == '.xlsx':
    -174            self.data_type = 'excel'
    -175        elif self.file_location.suffix == '.ascii':
    -176            self.data_type = 'txt'
    -177            self.delimiter = '  '
    -178        elif self.file_location.suffix == '.pkl':
    -179            self.data_type = 'dataframe'
    -180        elif self.file_location.suffix == '.pks':
    -181            self.data_type = 'pks'
    -182            self.delimiter = '          '
    -183            self.header_lines = 9
    -184        elif self.file_location.suffix == '.xml':
    -185            self.data_type = 'xml'
    -186            # self.delimiter = None
    -187            # self.header_lines = None
    -188        elif self.file_location.suffix == '.xy':
    -189            self.data_type = 'txt'
    -190            self.delimiter = ' '
    -191            self.header_lines = None
    -192        else:
    -193            raise TypeError(
    -194                "Data type could not be automatically recognized for %s; please set data type and delimiter manually." % self.file_location.name)
    -195
    -196    def get_dataframe(self) -> DataFrame:
    -197            """
    -198            Get the data as a pandas DataFrame.
    -199
    -200            Returns
    -201            -------
    -202            pandas.DataFrame
    -203                The data as a pandas DataFrame.
    -204
    -205            Raises
    -206            ------
    -207            TypeError
    -208                If the data type is not supported.
    -209            """
    -210        
    -211            if not self.data_type or not self.delimiter:
    -212                self.set_data_type()
    +140    @data_type.setter
    +141    def data_type(self, data_type):
    +142        self._data_type = data_type
    +143
    +144    @property
    +145    def delimiter(self):
    +146        return self._delimiter
    +147
    +148    @delimiter.setter
    +149    def delimiter(self, delimiter):
    +150        self._delimiter = delimiter
    +151
    +152    def encoding_detector(self, file_location) -> str:
    +153        """
    +154        Detects the encoding of a file.
    +155
    +156        Parameters
    +157        --------
    +158        file_location : str
    +159            The location of the file to be analyzed.
    +160
    +161        Returns
    +162        --------
    +163        str
    +164            The detected encoding of the file.
    +165        """
    +166
    +167        with file_location.open("rb") as rawdata:
    +168            result = chardet.detect(rawdata.read(10000))
    +169        return result["encoding"]
    +170
    +171    def set_data_type(self):
    +172        """
    +173        Set the data type and delimiter based on the file extension.
    +174
    +175        Raises
    +176        ------
    +177        TypeError
    +178            If the data type could not be automatically recognized.
    +179        """
    +180        if self.file_location.suffix == ".csv":
    +181            self.data_type = "txt"
    +182            self.delimiter = ","
    +183        elif self.file_location.suffix == ".txt":
    +184            self.data_type = "txt"
    +185            self.delimiter = "\t"
    +186        elif self.file_location.suffix == ".tsv":
    +187            self.data_type = "txt"
    +188            self.delimiter = "\t"
    +189        elif self.file_location.suffix == ".xlsx":
    +190            self.data_type = "excel"
    +191        elif self.file_location.suffix == ".ascii":
    +192            self.data_type = "txt"
    +193            self.delimiter = "  "
    +194        elif self.file_location.suffix == ".pkl":
    +195            self.data_type = "dataframe"
    +196        elif self.file_location.suffix == ".pks":
    +197            self.data_type = "pks"
    +198            self.delimiter = "          "
    +199            self.header_lines = 9
    +200        elif self.file_location.suffix == ".xml":
    +201            self.data_type = "xml"
    +202            # self.delimiter = None
    +203            # self.header_lines = None
    +204        elif self.file_location.suffix == ".xy":
    +205            self.data_type = "txt"
    +206            self.delimiter = " "
    +207            self.header_lines = None
    +208        else:
    +209            raise TypeError(
    +210                "Data type could not be automatically recognized for %s; please set data type and delimiter manually."
    +211                % self.file_location.name
    +212            )
     213
    -214            if isinstance(self.file_location, S3Path):
    -215                data = BytesIO(self.file_location.open('rb').read())
    -216            else:
    -217                data = self.file_location
    -218
    -219            if self.data_type == 'txt':
    -220                if self.headerless:
    -221                    dataframe = read_csv(data,  skiprows=self.header_lines, delimiter=self.delimiter, header=None, names=['m/z','I'],
    -222                                encoding=self.encoding_detector(self.file_location), engine='python')
    -223                else:
    -224                    dataframe = read_csv(data,  skiprows=self.header_lines, delimiter=self.delimiter,
    -225                                encoding=self.encoding_detector(self.file_location), engine='python')
    -226
    -227            elif self.data_type == 'pks':
    -228                names=["m/z", "I", "Scaled Peak Height", "Resolving Power", "Frequency", 'S/N']
    -229                clean_data = []
    -230                with self.file_location.open() as maglabfile:
    -231                    for i in  maglabfile.readlines()[8:-1]:
    -232                        clean_data.append(i.split())
    -233                dataframe = DataFrame(clean_data, columns=names)
    -234
    -235            elif self.data_type == 'dataframe':
    -236                dataframe = read_pickle(data)
    -237
    -238            elif self.data_type == 'excel':
    -239                dataframe = read_excel(data)
    -240
    -241            elif self.data_type == 'xml':
    -242                dataframe = self.read_xml_peaks(data)
    -243
    -244            else:
    -245                raise TypeError('Data type %s is not supported' % self.data_type)
    -246
    -247            return dataframe
    -248
    -249    def load_settings(self, mass_spec_obj, output_parameters):
    -250        """
    -251        #TODO loading output parameters from json file is not functional
    -252        Load settings from a JSON file and apply them to the given mass_spec_obj.
    -253
    -254        Parameters
    -255        ----------
    -256        mass_spec_obj : MassSpec
    -257            The mass spectrum object to apply the settings to.
    -258
    -259        """
    -260        import json
    -261        import warnings
    -262
    -263        settings_file_path = self.file_location.with_suffix('.json')
    -264
    -265        if settings_file_path.exists():
    -266
    -267            self._parameters = load_and_set_parameters_class(
    -268                'DataInput', self._parameters, parameters_path=settings_file_path)
    -269
    -270            load_and_set_parameters_ms(
    -271                mass_spec_obj, parameters_path=settings_file_path)
    -272
    -273        else:
    +214    def get_dataframe(self) -> DataFrame:
    +215        """
    +216        Get the data as a pandas DataFrame.
    +217
    +218        Returns
    +219        -------
    +220        pandas.DataFrame
    +221            The data as a pandas DataFrame.
    +222
    +223        Raises
    +224        ------
    +225        TypeError
    +226            If the data type is not supported.
    +227        """
    +228
    +229        if not self.data_type or not self.delimiter:
    +230            self.set_data_type()
    +231
    +232        if isinstance(self.file_location, S3Path):
    +233            data = BytesIO(self.file_location.open("rb").read())
    +234        else:
    +235            data = self.file_location
    +236
    +237        if self.data_type == "txt":
    +238            if self.headerless:
    +239                dataframe = read_csv(
    +240                    data,
    +241                    skiprows=self.header_lines,
    +242                    delimiter=self.delimiter,
    +243                    header=None,
    +244                    names=["m/z", "I"],
    +245                    encoding=self.encoding_detector(self.file_location),
    +246                    engine="python",
    +247                )
    +248            else:
    +249                dataframe = read_csv(
    +250                    data,
    +251                    skiprows=self.header_lines,
    +252                    delimiter=self.delimiter,
    +253                    encoding=self.encoding_detector(self.file_location),
    +254                    engine="python",
    +255                )
    +256
    +257        elif self.data_type == "pks":
    +258            names = [
    +259                "m/z",
    +260                "I",
    +261                "Scaled Peak Height",
    +262                "Resolving Power",
    +263                "Frequency",
    +264                "S/N",
    +265            ]
    +266            clean_data = []
    +267            with self.file_location.open() as maglabfile:
    +268                for i in maglabfile.readlines()[8:-1]:
    +269                    clean_data.append(i.split())
    +270            dataframe = DataFrame(clean_data, columns=names)
    +271
    +272        elif self.data_type == "dataframe":
    +273            dataframe = read_pickle(data)
     274
    -275            warnings.warn(
    -276                "auto settings loading is enabled but could not locate the file:  %s. Please load the settings manually" % settings_file_path)
    +275        elif self.data_type == "excel":
    +276            dataframe = read_excel(data)
     277
    -278        # TODO this will load the setting from SettingCoreMS.json
    -279        # coreMSHFD5 overrides this function to import the attrs stored in the h5 file
    -280        #loaded_settings = {}
    -281        #loaded_settings['MoleculaSearch'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MoleculaSearchSetting')
    -282        #loaded_settings['MassSpecPeak'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MassSpecPeakSetting')
    +278        elif self.data_type == "xml":
    +279            dataframe = self.read_xml_peaks(data)
    +280
    +281        else:
    +282            raise TypeError("Data type %s is not supported" % self.data_type)
     283
    -284        #loaded_settings['MassSpectrum'] = self.get_scan_group_attr_data(scan_index, time_index, 'MassSpectrumSetting')
    -285        #loaded_settings['Transient'] = self.get_scan_group_attr_data(scan_index, time_index, 'TransientSetting')
    -286
    -287    def get_output_parameters(self, polarity:int, scan_index:int=0) -> dict:
    -288            """
    -289            Get the output parameters for the mass spectrum.
    +284        return dataframe
    +285
    +286    def load_settings(self, mass_spec_obj, output_parameters):
    +287        """
    +288        #TODO loading output parameters from json file is not functional
    +289        Load settings from a JSON file and apply them to the given mass_spec_obj.
     290
    -291            Parameters
    -292            ----------
    -293            polarity : int
    -294                The polarity of the mass spectrum +1 or -1.
    -295            scan_index : int, optional
    -296                The index of the scan. Default is 0.
    -297
    -298            Returns
    -299            -------
    -300            dict
    -301                A dictionary containing the output parameters.
    -302
    -303            """
    -304            from copy import deepcopy
    -305
    -306            output_parameters = default_parameters(self.file_location)
    -307
    -308            if self.isCentroid:
    -309                output_parameters['label'] = Labels.corems_centroid
    -310            else:
    -311                output_parameters['label'] = Labels.bruker_profile
    -312
    -313            output_parameters['analyzer'] = self.analyzer
    -314
    -315            output_parameters['instrument_label'] = self.instrument_label
    +291        Parameters
    +292        ----------
    +293        mass_spec_obj : MassSpec
    +294            The mass spectrum object to apply the settings to.
    +295
    +296        """
    +297        import json
    +298        import warnings
    +299
    +300        settings_file_path = self.file_location.with_suffix(".json")
    +301
    +302        if settings_file_path.exists():
    +303            self._parameters = load_and_set_parameters_class(
    +304                "DataInput", self._parameters, parameters_path=settings_file_path
    +305            )
    +306
    +307            load_and_set_parameters_ms(
    +308                mass_spec_obj, parameters_path=settings_file_path
    +309            )
    +310
    +311        else:
    +312            warnings.warn(
    +313                "auto settings loading is enabled but could not locate the file:  %s. Please load the settings manually"
    +314                % settings_file_path
    +315            )
     316
    -317            output_parameters['sample_name'] = self.sample_name
    -318
    -319            output_parameters["Aterm"] = None
    -320
    -321            output_parameters["Bterm"] = None
    +317        # TODO this will load the setting from SettingCoreMS.json
    +318        # coreMSHFD5 overrides this function to import the attrs stored in the h5 file
    +319        # loaded_settings = {}
    +320        # loaded_settings['MoleculaSearch'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MoleculaSearchSetting')
    +321        # loaded_settings['MassSpecPeak'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MassSpecPeakSetting')
     322
    -323            output_parameters["Cterm"] = None
    -324
    -325            output_parameters["polarity"] = polarity
    -326
    -327            #scan_number and rt will be need to lc ms====
    -328
    -329            output_parameters["mobility_scan"] = 0
    -330
    -331            output_parameters["mobility_rt"] = 0
    -332
    -333            output_parameters["scan_number"] = scan_index
    -334
    -335            output_parameters["rt"] = 0
    +323        # loaded_settings['MassSpectrum'] = self.get_scan_group_attr_data(scan_index, time_index, 'MassSpectrumSetting')
    +324        # loaded_settings['Transient'] = self.get_scan_group_attr_data(scan_index, time_index, 'TransientSetting')
    +325
    +326    def get_output_parameters(self, polarity: int, scan_index: int = 0) -> dict:
    +327        """
    +328        Get the output parameters for the mass spectrum.
    +329
    +330        Parameters
    +331        ----------
    +332        polarity : int
    +333            The polarity of the mass spectrum +1 or -1.
    +334        scan_index : int, optional
    +335            The index of the scan. Default is 0.
     336
    -337            return output_parameters
    -338
    -339    def clean_data_frame(self, dataframe):
    -340            """
    -341            Clean the input dataframe by removing columns that are not expected.
    -342
    -343            Parameters
    -344            ----------
    -345            pandas.DataFrame
    -346                The input dataframe to be cleaned.
    -347
    -348            """
    -349            
    -350            for column_name in dataframe.columns:
    +337        Returns
    +338        -------
    +339        dict
    +340            A dictionary containing the output parameters.
    +341
    +342        """
    +343        from copy import deepcopy
    +344
    +345        output_parameters = default_parameters(self.file_location)
    +346
    +347        if self.isCentroid:
    +348            output_parameters["label"] = Labels.corems_centroid
    +349        else:
    +350            output_parameters["label"] = Labels.bruker_profile
     351
    -352                expected_column_name = self.parameters.header_translate.get(
    -353                    column_name)
    -354                if expected_column_name not in self._expected_columns:
    +352        output_parameters["analyzer"] = self.analyzer
    +353
    +354        output_parameters["instrument_label"] = self.instrument_label
     355
    -356                    del dataframe[column_name]
    +356        output_parameters["sample_name"] = self.sample_name
     357
    -358    def check_columns(self, header_labels: list[str]):
    -359        """
    -360        Check if the given header labels match the expected columns.
    +358        output_parameters["Aterm"] = None
    +359
    +360        output_parameters["Bterm"] = None
     361
    -362        Parameters
    -363        ----------
    -364        header_labels : list
    -365            The header labels to be checked.
    -366            
    -367        Raises
    -368        ------
    -369        Exception
    -370            If any expected column is not found in the header labels.
    -371        """
    -372        found_label = set()
    +362        output_parameters["Cterm"] = None
    +363
    +364        output_parameters["polarity"] = polarity
    +365
    +366        # scan_number and rt will be need to lc ms====
    +367
    +368        output_parameters["mobility_scan"] = 0
    +369
    +370        output_parameters["mobility_rt"] = 0
    +371
    +372        output_parameters["scan_number"] = scan_index
     373
    -374        for label in header_labels:
    -375            if not label in self._expected_columns:
    -376                user_column_name = self.parameters.header_translate.get(label)
    -377                if user_column_name in self._expected_columns:
    -378                    found_label.add(user_column_name)
    -379            else:
    -380                found_label.add(label)
    +374        output_parameters["rt"] = 0
    +375
    +376        return output_parameters
    +377
    +378    def clean_data_frame(self, dataframe):
    +379        """
    +380        Clean the input dataframe by removing columns that are not expected.
     381
    -382        not_found = self._expected_columns - found_label
    -383
    -384        if len(not_found) > 0:
    -385            raise Exception("Please make sure to include the columns %s" % ', '.join(not_found))
    +382        Parameters
    +383        ----------
    +384        pandas.DataFrame
    +385            The input dataframe to be cleaned.
     386
    -387    def read_xml_peaks(self, data:str) -> DataFrame:
    -388            '''
    -389            Read peaks from a Bruker .xml file and return a pandas DataFrame.
    -390
    -391            Parameters
    -392            ----------
    -393            data : str
    -394                The path to the .xml file.
    -395
    -396            Returns
    -397            -------
    -398            pandas.DataFrame
    -399                A DataFrame containing the peak data with columns: 'm/z', 'I', 'Resolving Power', 'Area', 'S/N', 'fwhm'.
    -400            '''
    -401            from numpy import nan
    -402            with open(data, "r") as file:
    -403                content = file.readlines()
    -404                content = "".join(content)
    -405                bs_content = BeautifulSoup(content, features='xml')
    -406            peaks_xml = bs_content.find_all("pk")   
    -407
    -408            # initialise lists of the peak variables
    -409            areas = []
    -410            fwhms = []
    -411            intensities = []
    -412            mzs = []
    -413            res = []
    -414            sn = []
    -415            #iterate through the peaks appending to each list
    -416            for peak in peaks_xml:
    -417                areas.append(float(peak.get('a', nan)))      # Use a default value if key 'a' is missing
    -418                fwhms.append(float(peak.get('fwhm', nan)))   # Use a default value if key 'fwhm' is missing
    -419                intensities.append(float(peak.get('i', nan))) # Use a default value if key 'i' is missing
    -420                mzs.append(float(peak.get('mz', nan)))       # Use a default value if key 'mz' is missing
    -421                res.append(float(peak.get('res', nan)))      # Use a default value if key 'res' is missing
    -422                sn.append(float(peak.get('sn', nan)))       # Use a default value if key 'sn' is missing
    -423
    -424            #Compile pandas dataframe of these values    
    -425            names=["m/z", "I", "Resolving Power", "Area", 'S/N','fwhm']    
    -426            df = DataFrame(columns = names,dtype=float)
    -427            df['m/z'] = mzs
    -428            df['I'] = intensities
    -429            df['Resolving Power'] = res
    -430            df['Area'] = areas
    -431            df['S/N'] = sn
    -432            df['fwhm'] = fwhms
    -433            return df
    -434
    -435    def get_xml_polarity(self):
    -436            """
    -437            Get the polarity from an XML peaklist.
    -438
    -439            Returns
    -440            -------
    -441            int
    -442                The polarity of the XML peaklist. Returns -1 for negative polarity, +1 for positive polarity.
    -443
    -444            Raises
    -445            ------
    -446            Exception
    -447                If the data type is not XML peaklist in Bruker format or if the polarity is unhandled.
    -448            """
    -449            
    -450            # Check its an actual xml
    -451            if not self.data_type or not self.delimiter:
    -452
    -453                self.set_data_type()
    -454
    -455            if isinstance(self.file_location, S3Path):
    -456                # data = self.file_location.open('rb').read()
    -457                data = BytesIO(self.file_location.open('rb').read())
    -458            
    -459            else:
    -460                data = self.file_location
    -461
    -462            if self.data_type != 'xml':
    -463                raise Exception ("This function is only for XML peaklists (Bruker format)")
    -464
    -465            with open(data, "r") as file:
    -466                content = file.readlines()
    -467                content = "".join(content)
    -468                bs_content = BeautifulSoup(content, features='xml')
    -469            polarity = bs_content.find_all("ms_spectrum")[0]['polarity']
    -470            if polarity == '-':
    -471                return -1
    -472            elif polarity == '+':
    -473                return +1
    -474            else:
    -475                raise Exception("Polarity %s unhandled" % polarity)
    +387        """
    +388
    +389        for column_name in dataframe.columns:
    +390            expected_column_name = self.parameters.header_translate.get(column_name)
    +391            if expected_column_name not in self._expected_columns:
    +392                del dataframe[column_name]
    +393
    +394    def check_columns(self, header_labels: list[str]):
    +395        """
    +396        Check if the given header labels match the expected columns.
    +397
    +398        Parameters
    +399        ----------
    +400        header_labels : list
    +401            The header labels to be checked.
    +402
    +403        Raises
    +404        ------
    +405        Exception
    +406            If any expected column is not found in the header labels.
    +407        """
    +408        found_label = set()
    +409
    +410        for label in header_labels:
    +411            if not label in self._expected_columns:
    +412                user_column_name = self.parameters.header_translate.get(label)
    +413                if user_column_name in self._expected_columns:
    +414                    found_label.add(user_column_name)
    +415            else:
    +416                found_label.add(label)
    +417
    +418        not_found = self._expected_columns - found_label
    +419
    +420        if len(not_found) > 0:
    +421            raise Exception(
    +422                "Please make sure to include the columns %s" % ", ".join(not_found)
    +423            )
    +424
    +425    def read_xml_peaks(self, data: str) -> DataFrame:
    +426        """
    +427        Read peaks from a Bruker .xml file and return a pandas DataFrame.
    +428
    +429        Parameters
    +430        ----------
    +431        data : str
    +432            The path to the .xml file.
    +433
    +434        Returns
    +435        -------
    +436        pandas.DataFrame
    +437            A DataFrame containing the peak data with columns: 'm/z', 'I', 'Resolving Power', 'Area', 'S/N', 'fwhm'.
    +438        """
    +439        from numpy import nan
    +440
    +441        with open(data, "r") as file:
    +442            content = file.readlines()
    +443            content = "".join(content)
    +444            bs_content = BeautifulSoup(content, features="xml")
    +445        peaks_xml = bs_content.find_all("pk")
    +446
    +447        # initialise lists of the peak variables
    +448        areas = []
    +449        fwhms = []
    +450        intensities = []
    +451        mzs = []
    +452        res = []
    +453        sn = []
    +454        # iterate through the peaks appending to each list
    +455        for peak in peaks_xml:
    +456            areas.append(
    +457                float(peak.get("a", nan))
    +458            )  # Use a default value if key 'a' is missing
    +459            fwhms.append(
    +460                float(peak.get("fwhm", nan))
    +461            )  # Use a default value if key 'fwhm' is missing
    +462            intensities.append(
    +463                float(peak.get("i", nan))
    +464            )  # Use a default value if key 'i' is missing
    +465            mzs.append(
    +466                float(peak.get("mz", nan))
    +467            )  # Use a default value if key 'mz' is missing
    +468            res.append(
    +469                float(peak.get("res", nan))
    +470            )  # Use a default value if key 'res' is missing
    +471            sn.append(
    +472                float(peak.get("sn", nan))
    +473            )  # Use a default value if key 'sn' is missing
    +474
    +475        # Compile pandas dataframe of these values
    +476        names = ["m/z", "I", "Resolving Power", "Area", "S/N", "fwhm"]
    +477        df = DataFrame(columns=names, dtype=float)
    +478        df["m/z"] = mzs
    +479        df["I"] = intensities
    +480        df["Resolving Power"] = res
    +481        df["Area"] = areas
    +482        df["S/N"] = sn
    +483        df["fwhm"] = fwhms
    +484        return df
    +485
    +486    def get_xml_polarity(self):
    +487        """
    +488        Get the polarity from an XML peaklist.
    +489
    +490        Returns
    +491        -------
    +492        int
    +493            The polarity of the XML peaklist. Returns -1 for negative polarity, +1 for positive polarity.
    +494
    +495        Raises
    +496        ------
    +497        Exception
    +498            If the data type is not XML peaklist in Bruker format or if the polarity is unhandled.
    +499        """
    +500
    +501        # Check its an actual xml
    +502        if not self.data_type or not self.delimiter:
    +503            self.set_data_type()
    +504
    +505        if isinstance(self.file_location, S3Path):
    +506            # data = self.file_location.open('rb').read()
    +507            data = BytesIO(self.file_location.open("rb").read())
    +508
    +509        else:
    +510            data = self.file_location
    +511
    +512        if self.data_type != "xml":
    +513            raise Exception("This function is only for XML peaklists (Bruker format)")
    +514
    +515        with open(data, "r") as file:
    +516            content = file.readlines()
    +517            content = "".join(content)
    +518            bs_content = BeautifulSoup(content, features="xml")
    +519        polarity = bs_content.find_all("ms_spectrum")[0]["polarity"]
    +520        if polarity == "-":
    +521            return -1
    +522        elif polarity == "+":
    +523            return +1
    +524        else:
    +525            raise Exception("Polarity %s unhandled" % polarity)
     
    @@ -1128,43 +1226,56 @@
    Methods
    -
     65    def __init__(self, file_location:Path|S3Path, isCentroid:bool=True, analyzer:str='Unknown', instrument_label:str='Unknown',
    - 66                 sample_name:str=None, header_lines:int=0, isThermoProfile:bool=False,headerless:bool=False):
    - 67
    - 68        self.file_location = Path(file_location) if isinstance(file_location, str) else file_location
    - 69        
    - 70        if not self.file_location.exists():
    - 71            raise FileExistsError("File does not exist: %s" % file_location)
    - 72
    - 73        # (newline="\n")
    - 74
    - 75        self.header_lines = header_lines
    - 76
    - 77        if isThermoProfile:
    - 78            
    - 79            self._expected_columns = {Labels.mz, Labels.abundance}
    - 80
    - 81        else:
    - 82
    - 83            self._expected_columns = {Labels.mz, Labels.abundance, Labels.s2n, Labels.rp}
    +            
     67    def __init__(
    + 68        self,
    + 69        file_location: Path | S3Path,
    + 70        isCentroid: bool = True,
    + 71        analyzer: str = "Unknown",
    + 72        instrument_label: str = "Unknown",
    + 73        sample_name: str = None,
    + 74        header_lines: int = 0,
    + 75        isThermoProfile: bool = False,
    + 76        headerless: bool = False,
    + 77    ):
    + 78        self.file_location = (
    + 79            Path(file_location) if isinstance(file_location, str) else file_location
    + 80        )
    + 81
    + 82        if not self.file_location.exists():
    + 83            raise FileExistsError("File does not exist: %s" % file_location)
      84
    - 85        self._delimiter = None
    + 85        # (newline="\n")
      86
    - 87        self.isCentroid = isCentroid
    + 87        self.header_lines = header_lines
      88
    - 89        self.isThermoProfile = isThermoProfile
    - 90
    - 91        self.headerless = headerless
    - 92
    - 93        self._data_type = None
    - 94
    - 95        self.analyzer = analyzer
    - 96
    - 97        self.instrument_label = instrument_label
    - 98
    - 99        self.sample_name = sample_name
    -100
    -101        self._parameters = deepcopy(DataInputSetting())
    + 89        if isThermoProfile:
    + 90            self._expected_columns = {Labels.mz, Labels.abundance}
    + 91
    + 92        else:
    + 93            self._expected_columns = {
    + 94                Labels.mz,
    + 95                Labels.abundance,
    + 96                Labels.s2n,
    + 97                Labels.rp,
    + 98            }
    + 99
    +100        self._delimiter = None
    +101
    +102        self.isCentroid = isCentroid
    +103
    +104        self.isThermoProfile = isThermoProfile
    +105
    +106        self.headerless = headerless
    +107
    +108        self._data_type = None
    +109
    +110        self.analyzer = analyzer
    +111
    +112        self.instrument_label = instrument_label
    +113
    +114        self.sample_name = sample_name
    +115
    +116        self._parameters = deepcopy(DataInputSetting())
     
    @@ -1281,9 +1392,10 @@
    Methods
    -
    111    def set_parameter_from_toml(self, parameters_path):
    -112        self._parameters = load_and_set_toml_parameters_class(
    -113            'DataInput', self.parameters, parameters_path=parameters_path)
    +            
    126    def set_parameter_from_toml(self, parameters_path):
    +127        self._parameters = load_and_set_toml_parameters_class(
    +128            "DataInput", self.parameters, parameters_path=parameters_path
    +129        )
     
    @@ -1301,9 +1413,10 @@
    Methods
    -
    115    def set_parameter_from_json(self, parameters_path):
    -116        self._parameters = load_and_set_parameters_class(
    -117            'DataInput', self.parameters, parameters_path=parameters_path)
    +            
    131    def set_parameter_from_json(self, parameters_path):
    +132        self._parameters = load_and_set_parameters_class(
    +133            "DataInput", self.parameters, parameters_path=parameters_path
    +134        )
     
    @@ -1343,24 +1456,24 @@
    Methods
    -
    136    def encoding_detector(self, file_location) -> str:
    -137        """
    -138        Detects the encoding of a file.
    -139
    -140        Parameters
    -141        --------
    -142        file_location : str
    -143            The location of the file to be analyzed.
    -144
    -145        Returns
    -146        --------
    -147        str 
    -148            The detected encoding of the file.
    -149        """
    -150
    -151        with file_location.open('rb') as rawdata:
    -152            result = chardet.detect(rawdata.read(10000))
    -153        return result['encoding']
    +            
    152    def encoding_detector(self, file_location) -> str:
    +153        """
    +154        Detects the encoding of a file.
    +155
    +156        Parameters
    +157        --------
    +158        file_location : str
    +159            The location of the file to be analyzed.
    +160
    +161        Returns
    +162        --------
    +163        str
    +164            The detected encoding of the file.
    +165        """
    +166
    +167        with file_location.open("rb") as rawdata:
    +168            result = chardet.detect(rawdata.read(10000))
    +169        return result["encoding"]
     
    @@ -1393,46 +1506,48 @@
    Returns
    -
    155    def set_data_type(self):
    -156        """
    -157        Set the data type and delimiter based on the file extension.
    -158
    -159        Raises
    -160        ------
    -161        TypeError
    -162            If the data type could not be automatically recognized.
    -163        """
    -164        if self.file_location.suffix == '.csv':
    -165            self.data_type = 'txt'
    -166            self.delimiter = ','
    -167        elif self.file_location.suffix == '.txt':
    -168            self.data_type = 'txt'
    -169            self.delimiter = '\t'
    -170        elif self.file_location.suffix == '.tsv':
    -171            self.data_type = 'txt'
    -172            self.delimiter = '\t'
    -173        elif self.file_location.suffix == '.xlsx':
    -174            self.data_type = 'excel'
    -175        elif self.file_location.suffix == '.ascii':
    -176            self.data_type = 'txt'
    -177            self.delimiter = '  '
    -178        elif self.file_location.suffix == '.pkl':
    -179            self.data_type = 'dataframe'
    -180        elif self.file_location.suffix == '.pks':
    -181            self.data_type = 'pks'
    -182            self.delimiter = '          '
    -183            self.header_lines = 9
    -184        elif self.file_location.suffix == '.xml':
    -185            self.data_type = 'xml'
    -186            # self.delimiter = None
    -187            # self.header_lines = None
    -188        elif self.file_location.suffix == '.xy':
    -189            self.data_type = 'txt'
    -190            self.delimiter = ' '
    -191            self.header_lines = None
    -192        else:
    -193            raise TypeError(
    -194                "Data type could not be automatically recognized for %s; please set data type and delimiter manually." % self.file_location.name)
    +            
    171    def set_data_type(self):
    +172        """
    +173        Set the data type and delimiter based on the file extension.
    +174
    +175        Raises
    +176        ------
    +177        TypeError
    +178            If the data type could not be automatically recognized.
    +179        """
    +180        if self.file_location.suffix == ".csv":
    +181            self.data_type = "txt"
    +182            self.delimiter = ","
    +183        elif self.file_location.suffix == ".txt":
    +184            self.data_type = "txt"
    +185            self.delimiter = "\t"
    +186        elif self.file_location.suffix == ".tsv":
    +187            self.data_type = "txt"
    +188            self.delimiter = "\t"
    +189        elif self.file_location.suffix == ".xlsx":
    +190            self.data_type = "excel"
    +191        elif self.file_location.suffix == ".ascii":
    +192            self.data_type = "txt"
    +193            self.delimiter = "  "
    +194        elif self.file_location.suffix == ".pkl":
    +195            self.data_type = "dataframe"
    +196        elif self.file_location.suffix == ".pks":
    +197            self.data_type = "pks"
    +198            self.delimiter = "          "
    +199            self.header_lines = 9
    +200        elif self.file_location.suffix == ".xml":
    +201            self.data_type = "xml"
    +202            # self.delimiter = None
    +203            # self.header_lines = None
    +204        elif self.file_location.suffix == ".xy":
    +205            self.data_type = "txt"
    +206            self.delimiter = " "
    +207            self.header_lines = None
    +208        else:
    +209            raise TypeError(
    +210                "Data type could not be automatically recognized for %s; please set data type and delimiter manually."
    +211                % self.file_location.name
    +212            )
     
    @@ -1458,58 +1573,77 @@
    Raises
    -
    196    def get_dataframe(self) -> DataFrame:
    -197            """
    -198            Get the data as a pandas DataFrame.
    -199
    -200            Returns
    -201            -------
    -202            pandas.DataFrame
    -203                The data as a pandas DataFrame.
    -204
    -205            Raises
    -206            ------
    -207            TypeError
    -208                If the data type is not supported.
    -209            """
    -210        
    -211            if not self.data_type or not self.delimiter:
    -212                self.set_data_type()
    -213
    -214            if isinstance(self.file_location, S3Path):
    -215                data = BytesIO(self.file_location.open('rb').read())
    -216            else:
    -217                data = self.file_location
    -218
    -219            if self.data_type == 'txt':
    -220                if self.headerless:
    -221                    dataframe = read_csv(data,  skiprows=self.header_lines, delimiter=self.delimiter, header=None, names=['m/z','I'],
    -222                                encoding=self.encoding_detector(self.file_location), engine='python')
    -223                else:
    -224                    dataframe = read_csv(data,  skiprows=self.header_lines, delimiter=self.delimiter,
    -225                                encoding=self.encoding_detector(self.file_location), engine='python')
    -226
    -227            elif self.data_type == 'pks':
    -228                names=["m/z", "I", "Scaled Peak Height", "Resolving Power", "Frequency", 'S/N']
    -229                clean_data = []
    -230                with self.file_location.open() as maglabfile:
    -231                    for i in  maglabfile.readlines()[8:-1]:
    -232                        clean_data.append(i.split())
    -233                dataframe = DataFrame(clean_data, columns=names)
    -234
    -235            elif self.data_type == 'dataframe':
    -236                dataframe = read_pickle(data)
    -237
    -238            elif self.data_type == 'excel':
    -239                dataframe = read_excel(data)
    -240
    -241            elif self.data_type == 'xml':
    -242                dataframe = self.read_xml_peaks(data)
    -243
    -244            else:
    -245                raise TypeError('Data type %s is not supported' % self.data_type)
    -246
    -247            return dataframe
    +            
    214    def get_dataframe(self) -> DataFrame:
    +215        """
    +216        Get the data as a pandas DataFrame.
    +217
    +218        Returns
    +219        -------
    +220        pandas.DataFrame
    +221            The data as a pandas DataFrame.
    +222
    +223        Raises
    +224        ------
    +225        TypeError
    +226            If the data type is not supported.
    +227        """
    +228
    +229        if not self.data_type or not self.delimiter:
    +230            self.set_data_type()
    +231
    +232        if isinstance(self.file_location, S3Path):
    +233            data = BytesIO(self.file_location.open("rb").read())
    +234        else:
    +235            data = self.file_location
    +236
    +237        if self.data_type == "txt":
    +238            if self.headerless:
    +239                dataframe = read_csv(
    +240                    data,
    +241                    skiprows=self.header_lines,
    +242                    delimiter=self.delimiter,
    +243                    header=None,
    +244                    names=["m/z", "I"],
    +245                    encoding=self.encoding_detector(self.file_location),
    +246                    engine="python",
    +247                )
    +248            else:
    +249                dataframe = read_csv(
    +250                    data,
    +251                    skiprows=self.header_lines,
    +252                    delimiter=self.delimiter,
    +253                    encoding=self.encoding_detector(self.file_location),
    +254                    engine="python",
    +255                )
    +256
    +257        elif self.data_type == "pks":
    +258            names = [
    +259                "m/z",
    +260                "I",
    +261                "Scaled Peak Height",
    +262                "Resolving Power",
    +263                "Frequency",
    +264                "S/N",
    +265            ]
    +266            clean_data = []
    +267            with self.file_location.open() as maglabfile:
    +268                for i in maglabfile.readlines()[8:-1]:
    +269                    clean_data.append(i.split())
    +270            dataframe = DataFrame(clean_data, columns=names)
    +271
    +272        elif self.data_type == "dataframe":
    +273            dataframe = read_pickle(data)
    +274
    +275        elif self.data_type == "excel":
    +276            dataframe = read_excel(data)
    +277
    +278        elif self.data_type == "xml":
    +279            dataframe = self.read_xml_peaks(data)
    +280
    +281        else:
    +282            raise TypeError("Data type %s is not supported" % self.data_type)
    +283
    +284        return dataframe
     
    @@ -1541,43 +1675,45 @@
    Raises
    -
    249    def load_settings(self, mass_spec_obj, output_parameters):
    -250        """
    -251        #TODO loading output parameters from json file is not functional
    -252        Load settings from a JSON file and apply them to the given mass_spec_obj.
    -253
    -254        Parameters
    -255        ----------
    -256        mass_spec_obj : MassSpec
    -257            The mass spectrum object to apply the settings to.
    -258
    -259        """
    -260        import json
    -261        import warnings
    -262
    -263        settings_file_path = self.file_location.with_suffix('.json')
    -264
    -265        if settings_file_path.exists():
    -266
    -267            self._parameters = load_and_set_parameters_class(
    -268                'DataInput', self._parameters, parameters_path=settings_file_path)
    -269
    -270            load_and_set_parameters_ms(
    -271                mass_spec_obj, parameters_path=settings_file_path)
    -272
    -273        else:
    -274
    -275            warnings.warn(
    -276                "auto settings loading is enabled but could not locate the file:  %s. Please load the settings manually" % settings_file_path)
    -277
    -278        # TODO this will load the setting from SettingCoreMS.json
    -279        # coreMSHFD5 overrides this function to import the attrs stored in the h5 file
    -280        #loaded_settings = {}
    -281        #loaded_settings['MoleculaSearch'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MoleculaSearchSetting')
    -282        #loaded_settings['MassSpecPeak'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MassSpecPeakSetting')
    -283
    -284        #loaded_settings['MassSpectrum'] = self.get_scan_group_attr_data(scan_index, time_index, 'MassSpectrumSetting')
    -285        #loaded_settings['Transient'] = self.get_scan_group_attr_data(scan_index, time_index, 'TransientSetting')
    +            
    286    def load_settings(self, mass_spec_obj, output_parameters):
    +287        """
    +288        #TODO loading output parameters from json file is not functional
    +289        Load settings from a JSON file and apply them to the given mass_spec_obj.
    +290
    +291        Parameters
    +292        ----------
    +293        mass_spec_obj : MassSpec
    +294            The mass spectrum object to apply the settings to.
    +295
    +296        """
    +297        import json
    +298        import warnings
    +299
    +300        settings_file_path = self.file_location.with_suffix(".json")
    +301
    +302        if settings_file_path.exists():
    +303            self._parameters = load_and_set_parameters_class(
    +304                "DataInput", self._parameters, parameters_path=settings_file_path
    +305            )
    +306
    +307            load_and_set_parameters_ms(
    +308                mass_spec_obj, parameters_path=settings_file_path
    +309            )
    +310
    +311        else:
    +312            warnings.warn(
    +313                "auto settings loading is enabled but could not locate the file:  %s. Please load the settings manually"
    +314                % settings_file_path
    +315            )
    +316
    +317        # TODO this will load the setting from SettingCoreMS.json
    +318        # coreMSHFD5 overrides this function to import the attrs stored in the h5 file
    +319        # loaded_settings = {}
    +320        # loaded_settings['MoleculaSearch'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MoleculaSearchSetting')
    +321        # loaded_settings['MassSpecPeak'] = self.get_scan_group_attr_data(scan_index,  time_index, 'MassSpecPeakSetting')
    +322
    +323        # loaded_settings['MassSpectrum'] = self.get_scan_group_attr_data(scan_index, time_index, 'MassSpectrumSetting')
    +324        # loaded_settings['Transient'] = self.get_scan_group_attr_data(scan_index, time_index, 'TransientSetting')
     
    @@ -1606,57 +1742,57 @@
    Parameters
    -
    287    def get_output_parameters(self, polarity:int, scan_index:int=0) -> dict:
    -288            """
    -289            Get the output parameters for the mass spectrum.
    -290
    -291            Parameters
    -292            ----------
    -293            polarity : int
    -294                The polarity of the mass spectrum +1 or -1.
    -295            scan_index : int, optional
    -296                The index of the scan. Default is 0.
    -297
    -298            Returns
    -299            -------
    -300            dict
    -301                A dictionary containing the output parameters.
    -302
    -303            """
    -304            from copy import deepcopy
    -305
    -306            output_parameters = default_parameters(self.file_location)
    -307
    -308            if self.isCentroid:
    -309                output_parameters['label'] = Labels.corems_centroid
    -310            else:
    -311                output_parameters['label'] = Labels.bruker_profile
    -312
    -313            output_parameters['analyzer'] = self.analyzer
    -314
    -315            output_parameters['instrument_label'] = self.instrument_label
    -316
    -317            output_parameters['sample_name'] = self.sample_name
    -318
    -319            output_parameters["Aterm"] = None
    -320
    -321            output_parameters["Bterm"] = None
    -322
    -323            output_parameters["Cterm"] = None
    -324
    -325            output_parameters["polarity"] = polarity
    -326
    -327            #scan_number and rt will be need to lc ms====
    -328
    -329            output_parameters["mobility_scan"] = 0
    -330
    -331            output_parameters["mobility_rt"] = 0
    -332
    -333            output_parameters["scan_number"] = scan_index
    -334
    -335            output_parameters["rt"] = 0
    +            
    326    def get_output_parameters(self, polarity: int, scan_index: int = 0) -> dict:
    +327        """
    +328        Get the output parameters for the mass spectrum.
    +329
    +330        Parameters
    +331        ----------
    +332        polarity : int
    +333            The polarity of the mass spectrum +1 or -1.
    +334        scan_index : int, optional
    +335            The index of the scan. Default is 0.
     336
    -337            return output_parameters
    +337        Returns
    +338        -------
    +339        dict
    +340            A dictionary containing the output parameters.
    +341
    +342        """
    +343        from copy import deepcopy
    +344
    +345        output_parameters = default_parameters(self.file_location)
    +346
    +347        if self.isCentroid:
    +348            output_parameters["label"] = Labels.corems_centroid
    +349        else:
    +350            output_parameters["label"] = Labels.bruker_profile
    +351
    +352        output_parameters["analyzer"] = self.analyzer
    +353
    +354        output_parameters["instrument_label"] = self.instrument_label
    +355
    +356        output_parameters["sample_name"] = self.sample_name
    +357
    +358        output_parameters["Aterm"] = None
    +359
    +360        output_parameters["Bterm"] = None
    +361
    +362        output_parameters["Cterm"] = None
    +363
    +364        output_parameters["polarity"] = polarity
    +365
    +366        # scan_number and rt will be need to lc ms====
    +367
    +368        output_parameters["mobility_scan"] = 0
    +369
    +370        output_parameters["mobility_rt"] = 0
    +371
    +372        output_parameters["scan_number"] = scan_index
    +373
    +374        output_parameters["rt"] = 0
    +375
    +376        return output_parameters
     
    @@ -1691,24 +1827,21 @@
    Returns
    -
    339    def clean_data_frame(self, dataframe):
    -340            """
    -341            Clean the input dataframe by removing columns that are not expected.
    -342
    -343            Parameters
    -344            ----------
    -345            pandas.DataFrame
    -346                The input dataframe to be cleaned.
    -347
    -348            """
    -349            
    -350            for column_name in dataframe.columns:
    -351
    -352                expected_column_name = self.parameters.header_translate.get(
    -353                    column_name)
    -354                if expected_column_name not in self._expected_columns:
    -355
    -356                    del dataframe[column_name]
    +            
    378    def clean_data_frame(self, dataframe):
    +379        """
    +380        Clean the input dataframe by removing columns that are not expected.
    +381
    +382        Parameters
    +383        ----------
    +384        pandas.DataFrame
    +385            The input dataframe to be cleaned.
    +386
    +387        """
    +388
    +389        for column_name in dataframe.columns:
    +390            expected_column_name = self.parameters.header_translate.get(column_name)
    +391            if expected_column_name not in self._expected_columns:
    +392                del dataframe[column_name]
     
    @@ -1734,34 +1867,36 @@
    Parameters
    -
    358    def check_columns(self, header_labels: list[str]):
    -359        """
    -360        Check if the given header labels match the expected columns.
    -361
    -362        Parameters
    -363        ----------
    -364        header_labels : list
    -365            The header labels to be checked.
    -366            
    -367        Raises
    -368        ------
    -369        Exception
    -370            If any expected column is not found in the header labels.
    -371        """
    -372        found_label = set()
    -373
    -374        for label in header_labels:
    -375            if not label in self._expected_columns:
    -376                user_column_name = self.parameters.header_translate.get(label)
    -377                if user_column_name in self._expected_columns:
    -378                    found_label.add(user_column_name)
    -379            else:
    -380                found_label.add(label)
    -381
    -382        not_found = self._expected_columns - found_label
    -383
    -384        if len(not_found) > 0:
    -385            raise Exception("Please make sure to include the columns %s" % ', '.join(not_found))
    +            
    394    def check_columns(self, header_labels: list[str]):
    +395        """
    +396        Check if the given header labels match the expected columns.
    +397
    +398        Parameters
    +399        ----------
    +400        header_labels : list
    +401            The header labels to be checked.
    +402
    +403        Raises
    +404        ------
    +405        Exception
    +406            If any expected column is not found in the header labels.
    +407        """
    +408        found_label = set()
    +409
    +410        for label in header_labels:
    +411            if not label in self._expected_columns:
    +412                user_column_name = self.parameters.header_translate.get(label)
    +413                if user_column_name in self._expected_columns:
    +414                    found_label.add(user_column_name)
    +415            else:
    +416                found_label.add(label)
    +417
    +418        not_found = self._expected_columns - found_label
    +419
    +420        if len(not_found) > 0:
    +421            raise Exception(
    +422                "Please make sure to include the columns %s" % ", ".join(not_found)
    +423            )
     
    @@ -1794,53 +1929,66 @@
    Raises
    -
    387    def read_xml_peaks(self, data:str) -> DataFrame:
    -388            '''
    -389            Read peaks from a Bruker .xml file and return a pandas DataFrame.
    -390
    -391            Parameters
    -392            ----------
    -393            data : str
    -394                The path to the .xml file.
    -395
    -396            Returns
    -397            -------
    -398            pandas.DataFrame
    -399                A DataFrame containing the peak data with columns: 'm/z', 'I', 'Resolving Power', 'Area', 'S/N', 'fwhm'.
    -400            '''
    -401            from numpy import nan
    -402            with open(data, "r") as file:
    -403                content = file.readlines()
    -404                content = "".join(content)
    -405                bs_content = BeautifulSoup(content, features='xml')
    -406            peaks_xml = bs_content.find_all("pk")   
    -407
    -408            # initialise lists of the peak variables
    -409            areas = []
    -410            fwhms = []
    -411            intensities = []
    -412            mzs = []
    -413            res = []
    -414            sn = []
    -415            #iterate through the peaks appending to each list
    -416            for peak in peaks_xml:
    -417                areas.append(float(peak.get('a', nan)))      # Use a default value if key 'a' is missing
    -418                fwhms.append(float(peak.get('fwhm', nan)))   # Use a default value if key 'fwhm' is missing
    -419                intensities.append(float(peak.get('i', nan))) # Use a default value if key 'i' is missing
    -420                mzs.append(float(peak.get('mz', nan)))       # Use a default value if key 'mz' is missing
    -421                res.append(float(peak.get('res', nan)))      # Use a default value if key 'res' is missing
    -422                sn.append(float(peak.get('sn', nan)))       # Use a default value if key 'sn' is missing
    -423
    -424            #Compile pandas dataframe of these values    
    -425            names=["m/z", "I", "Resolving Power", "Area", 'S/N','fwhm']    
    -426            df = DataFrame(columns = names,dtype=float)
    -427            df['m/z'] = mzs
    -428            df['I'] = intensities
    -429            df['Resolving Power'] = res
    -430            df['Area'] = areas
    -431            df['S/N'] = sn
    -432            df['fwhm'] = fwhms
    -433            return df
    +            
    425    def read_xml_peaks(self, data: str) -> DataFrame:
    +426        """
    +427        Read peaks from a Bruker .xml file and return a pandas DataFrame.
    +428
    +429        Parameters
    +430        ----------
    +431        data : str
    +432            The path to the .xml file.
    +433
    +434        Returns
    +435        -------
    +436        pandas.DataFrame
    +437            A DataFrame containing the peak data with columns: 'm/z', 'I', 'Resolving Power', 'Area', 'S/N', 'fwhm'.
    +438        """
    +439        from numpy import nan
    +440
    +441        with open(data, "r") as file:
    +442            content = file.readlines()
    +443            content = "".join(content)
    +444            bs_content = BeautifulSoup(content, features="xml")
    +445        peaks_xml = bs_content.find_all("pk")
    +446
    +447        # initialise lists of the peak variables
    +448        areas = []
    +449        fwhms = []
    +450        intensities = []
    +451        mzs = []
    +452        res = []
    +453        sn = []
    +454        # iterate through the peaks appending to each list
    +455        for peak in peaks_xml:
    +456            areas.append(
    +457                float(peak.get("a", nan))
    +458            )  # Use a default value if key 'a' is missing
    +459            fwhms.append(
    +460                float(peak.get("fwhm", nan))
    +461            )  # Use a default value if key 'fwhm' is missing
    +462            intensities.append(
    +463                float(peak.get("i", nan))
    +464            )  # Use a default value if key 'i' is missing
    +465            mzs.append(
    +466                float(peak.get("mz", nan))
    +467            )  # Use a default value if key 'mz' is missing
    +468            res.append(
    +469                float(peak.get("res", nan))
    +470            )  # Use a default value if key 'res' is missing
    +471            sn.append(
    +472                float(peak.get("sn", nan))
    +473            )  # Use a default value if key 'sn' is missing
    +474
    +475        # Compile pandas dataframe of these values
    +476        names = ["m/z", "I", "Resolving Power", "Area", "S/N", "fwhm"]
    +477        df = DataFrame(columns=names, dtype=float)
    +478        df["m/z"] = mzs
    +479        df["I"] = intensities
    +480        df["Resolving Power"] = res
    +481        df["Area"] = areas
    +482        df["S/N"] = sn
    +483        df["fwhm"] = fwhms
    +484        return df
     
    @@ -1873,47 +2021,46 @@
    Returns
    -
    435    def get_xml_polarity(self):
    -436            """
    -437            Get the polarity from an XML peaklist.
    -438
    -439            Returns
    -440            -------
    -441            int
    -442                The polarity of the XML peaklist. Returns -1 for negative polarity, +1 for positive polarity.
    -443
    -444            Raises
    -445            ------
    -446            Exception
    -447                If the data type is not XML peaklist in Bruker format or if the polarity is unhandled.
    -448            """
    -449            
    -450            # Check its an actual xml
    -451            if not self.data_type or not self.delimiter:
    -452
    -453                self.set_data_type()
    -454
    -455            if isinstance(self.file_location, S3Path):
    -456                # data = self.file_location.open('rb').read()
    -457                data = BytesIO(self.file_location.open('rb').read())
    -458            
    -459            else:
    -460                data = self.file_location
    -461
    -462            if self.data_type != 'xml':
    -463                raise Exception ("This function is only for XML peaklists (Bruker format)")
    -464
    -465            with open(data, "r") as file:
    -466                content = file.readlines()
    -467                content = "".join(content)
    -468                bs_content = BeautifulSoup(content, features='xml')
    -469            polarity = bs_content.find_all("ms_spectrum")[0]['polarity']
    -470            if polarity == '-':
    -471                return -1
    -472            elif polarity == '+':
    -473                return +1
    -474            else:
    -475                raise Exception("Polarity %s unhandled" % polarity)
    +            
    486    def get_xml_polarity(self):
    +487        """
    +488        Get the polarity from an XML peaklist.
    +489
    +490        Returns
    +491        -------
    +492        int
    +493            The polarity of the XML peaklist. Returns -1 for negative polarity, +1 for positive polarity.
    +494
    +495        Raises
    +496        ------
    +497        Exception
    +498            If the data type is not XML peaklist in Bruker format or if the polarity is unhandled.
    +499        """
    +500
    +501        # Check its an actual xml
    +502        if not self.data_type or not self.delimiter:
    +503            self.set_data_type()
    +504
    +505        if isinstance(self.file_location, S3Path):
    +506            # data = self.file_location.open('rb').read()
    +507            data = BytesIO(self.file_location.open("rb").read())
    +508
    +509        else:
    +510            data = self.file_location
    +511
    +512        if self.data_type != "xml":
    +513            raise Exception("This function is only for XML peaklists (Bruker format)")
    +514
    +515        with open(data, "r") as file:
    +516            content = file.readlines()
    +517            content = "".join(content)
    +518            bs_content = BeautifulSoup(content, features="xml")
    +519        polarity = bs_content.find_all("ms_spectrum")[0]["polarity"]
    +520        if polarity == "-":
    +521            return -1
    +522        elif polarity == "+":
    +523            return +1
    +524        else:
    +525            raise Exception("Polarity %s unhandled" % polarity)
     
    diff --git a/docs/corems/mass_spectrum/input/boosterHDF5.html b/docs/corems/mass_spectrum/input/boosterHDF5.html index 3263cd65..505ea089 100644 --- a/docs/corems/mass_spectrum/input/boosterHDF5.html +++ b/docs/corems/mass_spectrum/input/boosterHDF5.html @@ -82,166 +82,163 @@

    2 3import h5py 4from s3path import S3Path - 5from corems.mass_spectrum.input.baseClass import MassListBaseClass - 6from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecProfile - 7from corems.encapsulation.constant import Labels - 8from corems.encapsulation.factory.parameters import default_parameters - 9 - 10class ReadHDF_BoosterMassSpectrum(MassListBaseClass): - 11 """ The ReadHDF_BoosterMassSpectrum class parses the mass spectrum data from an HDF file and generate a mass spectrum object. - 12 - 13 Parameters - 14 ---------- - 15 file_location : str - 16 The path to the HDF file. - 17 isCentroid : bool, optional - 18 Specifies whether the mass spectrum is centroided or not. Default is False. - 19 - 20 Attributes - 21 ---------- - 22 polarity : int - 23 The polarity of the mass spectrum. - 24 h5pydata : h5py.File - 25 The HDF file object. - 26 scans : list - 27 The list of scan names in the HDF file. - 28 - 29 Methods - 30 ------- - 31 * get_data_profile(mz, abundance, auto_process). Returns a MassSpecProfile object from the given m/z and abundance arrays. - 32 * get_attr_data(scan, attr_srt). Returns the attribute value for the given scan and attribute name. - 33 * get_polarity(file_location). Returns the polarity of the mass spectrum. - 34 * get_mass_spectrum(auto_process). Returns the mass spectrum as a MassSpecProfile object. - 35 * get_output_parameters(). Returns the default output parameters for the mass spectrum. - 36 """ - 37 - 38 def __init__(self, file_location, isCentroid=False): - 39 self.polarity = self.get_polarity(file_location) - 40 super().__init__(file_location, isCentroid=False) - 41 - 42 def get_data_profile(self, mz, abundance, auto_process) -> MassSpecProfile: - 43 """ - 44 Returns a MassSpecProfile object from the given m/z and abundance arrays. - 45 - 46 Parameters - 47 ---------- - 48 mz : array_like - 49 The m/z values. - 50 abundance : array_like - 51 The abundance values. - 52 auto_process : bool - 53 Specifies whether to automatically process the mass spectrum. - 54 - 55 Returns - 56 ------- - 57 MassSpecProfile - 58 The MassSpecProfile object. - 59 - 60 """ - 61 data_dict = {Labels.mz: mz, Labels.abundance: abundance} - 62 output_parameters = self.get_output_parameters() - 63 return MassSpecProfile(data_dict, output_parameters, auto_process=auto_process) - 64 - 65 def get_attr_data(self, scan, attr_srt): - 66 """ - 67 Returns the attribute value for the given scan and attribute name. - 68 - 69 Parameters - 70 ---------- - 71 scan : int - 72 The scan index. - 73 attr_srt : str - 74 The attribute name. - 75 - 76 Returns - 77 ------- - 78 object - 79 The attribute value. - 80 - 81 """ - 82 return self.h5pydata[self.scans[scan]].attrs[attr_srt] - 83 - 84 def get_polarity(self, file_location:str | S3Path) -> int: - 85 """ - 86 Returns the polarity of the mass spectrum. - 87 - 88 Parameters - 89 ---------- - 90 file_location : str - 91 The path to the HDF file. - 92 - 93 Returns - 94 ------- - 95 int - 96 The polarity of the mass spectrum. - 97 - 98 """ - 99 if isinstance(file_location, S3Path): -100 data = BytesIO(file_location.open('rb').read()) -101 else: -102 data = file_location -103 -104 self.h5pydata = h5py.File(data, 'r') -105 self.scans = list(self.h5pydata.keys()) -106 -107 polarity = self.get_attr_data(0,'r_h_polarity') -108 -109 if polarity == 'negative scan': -110 return -1 -111 else: -112 return +1 -113 -114 def get_mass_spectrum(self, auto_process=True) -> MassSpecProfile: -115 """ -116 Returns the mass spectrum as a MassSpecProfile object. -117 -118 Parameters -119 ---------- -120 auto_process : bool, optional -121 Specifies whether to automatically process the mass spectrum. Default is True. -122 -123 Returns -124 ------- -125 MassSpecProfile -126 The MassSpecProfile object. -127 -128 """ -129 if len(self.scans) == 1: -130 booster_data = self.h5pydata[self.scans[0]] -131 -132 if self.isCentroid: -133 raise NotImplementedError -134 else: -135 mz = booster_data[0] -136 abun = booster_data[1] -137 return self.get_data_profile(mz, abun, auto_process) -138 -139 def get_output_parameters(self) -> dict: -140 """ -141 Returns the default output parameters for the mass spectrum. -142 -143 Returns -144 ------- -145 dict -146 The default output parameters. -147 -148 """ -149 d_params = default_parameters(self.file_location) -150 d_params["polarity"] = self.polarity -151 d_params["filename_path"] = self.file_location -152 d_params["mobility_scan"] = 0 -153 d_params["mobility_rt"] = 0 -154 d_params["scan_number"] = 0 -155 d_params["rt"] = self.get_attr_data(0, 'r_h_start_time') -156 d_params['label'] = Labels.booster_profile -157 d_params["Aterm"] = self.get_attr_data(0, 'r_cparams')[0] -158 d_params["Bterm"] = self.get_attr_data(0, 'r_cparams')[1] -159 return d_params -160 -161 -162 -163 -164 + 5 + 6from corems.encapsulation.constant import Labels + 7from corems.encapsulation.factory.parameters import default_parameters + 8from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecProfile + 9from corems.mass_spectrum.input.baseClass import MassListBaseClass + 10 + 11 + 12class ReadHDF_BoosterMassSpectrum(MassListBaseClass): + 13 """The ReadHDF_BoosterMassSpectrum class parses the mass spectrum data from an HDF file and generate a mass spectrum object. + 14 + 15 Parameters + 16 ---------- + 17 file_location : str + 18 The path to the HDF file. + 19 isCentroid : bool, optional + 20 Specifies whether the mass spectrum is centroided or not. Default is False. + 21 + 22 Attributes + 23 ---------- + 24 polarity : int + 25 The polarity of the mass spectrum. + 26 h5pydata : h5py.File + 27 The HDF file object. + 28 scans : list + 29 The list of scan names in the HDF file. + 30 + 31 Methods + 32 ------- + 33 * get_data_profile(mz, abundance, auto_process). Returns a MassSpecProfile object from the given m/z and abundance arrays. + 34 * get_attr_data(scan, attr_srt). Returns the attribute value for the given scan and attribute name. + 35 * get_polarity(file_location). Returns the polarity of the mass spectrum. + 36 * get_mass_spectrum(auto_process). Returns the mass spectrum as a MassSpecProfile object. + 37 * get_output_parameters(). Returns the default output parameters for the mass spectrum. + 38 """ + 39 + 40 def __init__(self, file_location, isCentroid=False): + 41 self.polarity = self.get_polarity(file_location) + 42 super().__init__(file_location, isCentroid=False) + 43 + 44 def get_data_profile(self, mz, abundance, auto_process) -> MassSpecProfile: + 45 """ + 46 Returns a MassSpecProfile object from the given m/z and abundance arrays. + 47 + 48 Parameters + 49 ---------- + 50 mz : array_like + 51 The m/z values. + 52 abundance : array_like + 53 The abundance values. + 54 auto_process : bool + 55 Specifies whether to automatically process the mass spectrum. + 56 + 57 Returns + 58 ------- + 59 MassSpecProfile + 60 The MassSpecProfile object. + 61 + 62 """ + 63 data_dict = {Labels.mz: mz, Labels.abundance: abundance} + 64 output_parameters = self.get_output_parameters() + 65 return MassSpecProfile(data_dict, output_parameters, auto_process=auto_process) + 66 + 67 def get_attr_data(self, scan, attr_srt): + 68 """ + 69 Returns the attribute value for the given scan and attribute name. + 70 + 71 Parameters + 72 ---------- + 73 scan : int + 74 The scan index. + 75 attr_srt : str + 76 The attribute name. + 77 + 78 Returns + 79 ------- + 80 object + 81 The attribute value. + 82 + 83 """ + 84 return self.h5pydata[self.scans[scan]].attrs[attr_srt] + 85 + 86 def get_polarity(self, file_location: str | S3Path) -> int: + 87 """ + 88 Returns the polarity of the mass spectrum. + 89 + 90 Parameters + 91 ---------- + 92 file_location : str + 93 The path to the HDF file. + 94 + 95 Returns + 96 ------- + 97 int + 98 The polarity of the mass spectrum. + 99 +100 """ +101 if isinstance(file_location, S3Path): +102 data = BytesIO(file_location.open("rb").read()) +103 else: +104 data = file_location +105 +106 self.h5pydata = h5py.File(data, "r") +107 self.scans = list(self.h5pydata.keys()) +108 +109 polarity = self.get_attr_data(0, "r_h_polarity") +110 +111 if polarity == "negative scan": +112 return -1 +113 else: +114 return +1 +115 +116 def get_mass_spectrum(self, auto_process=True) -> MassSpecProfile: +117 """ +118 Returns the mass spectrum as a MassSpecProfile object. +119 +120 Parameters +121 ---------- +122 auto_process : bool, optional +123 Specifies whether to automatically process the mass spectrum. Default is True. +124 +125 Returns +126 ------- +127 MassSpecProfile +128 The MassSpecProfile object. +129 +130 """ +131 if len(self.scans) == 1: +132 booster_data = self.h5pydata[self.scans[0]] +133 +134 if self.isCentroid: +135 raise NotImplementedError +136 else: +137 mz = booster_data[0] +138 abun = booster_data[1] +139 return self.get_data_profile(mz, abun, auto_process) +140 +141 def get_output_parameters(self) -> dict: +142 """ +143 Returns the default output parameters for the mass spectrum. +144 +145 Returns +146 ------- +147 dict +148 The default output parameters. +149 +150 """ +151 d_params = default_parameters(self.file_location) +152 d_params["polarity"] = self.polarity +153 d_params["filename_path"] = self.file_location +154 d_params["mobility_scan"] = 0 +155 d_params["mobility_rt"] = 0 +156 d_params["scan_number"] = 0 +157 d_params["rt"] = self.get_attr_data(0, "r_h_start_time") +158 d_params["label"] = Labels.booster_profile +159 d_params["Aterm"] = self.get_attr_data(0, "r_cparams")[0] +160 d_params["Bterm"] = self.get_attr_data(0, "r_cparams")[1] +161 return d_params

    @@ -257,156 +254,156 @@

    -
     12class ReadHDF_BoosterMassSpectrum(MassListBaseClass):
    - 13    """ The ReadHDF_BoosterMassSpectrum class parses the mass spectrum data from an HDF file and generate a mass spectrum object.
    - 14
    - 15    Parameters
    - 16    ----------
    - 17    file_location : str
    - 18        The path to the HDF file.
    - 19    isCentroid : bool, optional
    - 20        Specifies whether the mass spectrum is centroided or not. Default is False.
    - 21
    - 22    Attributes
    - 23    ----------
    - 24    polarity : int
    - 25        The polarity of the mass spectrum.
    - 26    h5pydata : h5py.File
    - 27        The HDF file object.
    - 28    scans : list
    - 29        The list of scan names in the HDF file.
    - 30
    - 31    Methods
    - 32    -------
    - 33    * get_data_profile(mz, abundance, auto_process). Returns a MassSpecProfile object from the given m/z and abundance arrays.
    - 34    * get_attr_data(scan, attr_srt). Returns the attribute value for the given scan and attribute name.
    - 35    * get_polarity(file_location). Returns the polarity of the mass spectrum.
    - 36    * get_mass_spectrum(auto_process). Returns the mass spectrum as a MassSpecProfile object.
    - 37    * get_output_parameters(). Returns the default output parameters for the mass spectrum.
    - 38    """
    - 39
    - 40    def __init__(self, file_location, isCentroid=False):
    - 41        self.polarity = self.get_polarity(file_location)
    - 42        super().__init__(file_location, isCentroid=False)
    - 43        
    - 44    def get_data_profile(self, mz, abundance, auto_process) -> MassSpecProfile:
    - 45        """
    - 46        Returns a MassSpecProfile object from the given m/z and abundance arrays.
    - 47
    - 48        Parameters
    - 49        ----------
    - 50        mz : array_like
    - 51            The m/z values.
    - 52        abundance : array_like
    - 53            The abundance values.
    - 54        auto_process : bool
    - 55            Specifies whether to automatically process the mass spectrum.
    - 56
    - 57        Returns
    - 58        -------
    - 59        MassSpecProfile
    - 60            The MassSpecProfile object.
    - 61
    - 62        """
    - 63        data_dict = {Labels.mz: mz, Labels.abundance: abundance}
    - 64        output_parameters = self.get_output_parameters()
    - 65        return MassSpecProfile(data_dict, output_parameters, auto_process=auto_process)
    - 66    
    - 67    def get_attr_data(self, scan, attr_srt):
    - 68        """
    - 69        Returns the attribute value for the given scan and attribute name.
    - 70
    - 71        Parameters
    - 72        ----------
    - 73        scan : int
    - 74            The scan index.
    - 75        attr_srt : str
    - 76            The attribute name.
    - 77
    - 78        Returns
    - 79        -------
    - 80        object
    - 81            The attribute value.
    - 82
    - 83        """
    - 84        return self.h5pydata[self.scans[scan]].attrs[attr_srt]
    - 85
    - 86    def get_polarity(self, file_location:str | S3Path) -> int:
    - 87        """
    - 88        Returns the polarity of the mass spectrum.
    - 89
    - 90        Parameters
    - 91        ----------
    - 92        file_location : str
    - 93            The path to the HDF file.
    - 94
    - 95        Returns
    - 96        -------
    - 97        int
    - 98            The polarity of the mass spectrum.
    - 99
    -100        """
    -101        if isinstance(file_location, S3Path):
    -102            data = BytesIO(file_location.open('rb').read())
    -103        else:
    -104            data = file_location
    -105        
    -106        self.h5pydata = h5py.File(data, 'r')
    -107        self.scans = list(self.h5pydata.keys())
    -108        
    -109        polarity = self.get_attr_data(0,'r_h_polarity')
    -110        
    -111        if polarity == 'negative scan':
    -112            return -1
    -113        else:
    -114            return +1    
    -115
    -116    def get_mass_spectrum(self, auto_process=True) -> MassSpecProfile:
    -117        """
    -118        Returns the mass spectrum as a MassSpecProfile object.
    -119
    -120        Parameters
    -121        ----------
    -122        auto_process : bool, optional
    -123            Specifies whether to automatically process the mass spectrum. Default is True.
    -124
    -125        Returns
    -126        -------
    -127        MassSpecProfile
    -128            The MassSpecProfile object.
    -129
    -130        """
    -131        if len(self.scans) == 1:
    -132            booster_data = self.h5pydata[self.scans[0]]
    -133            
    -134            if self.isCentroid:
    -135                raise NotImplementedError
    -136            else:
    -137                mz = booster_data[0]
    -138                abun = booster_data[1]
    -139                return self.get_data_profile(mz, abun, auto_process)
    -140
    -141    def get_output_parameters(self) -> dict:
    -142        """
    -143        Returns the default output parameters for the mass spectrum.
    -144
    -145        Returns
    -146        -------
    -147        dict
    -148            The default output parameters.
    -149
    -150        """
    -151        d_params = default_parameters(self.file_location)
    -152        d_params["polarity"] = self.polarity
    -153        d_params["filename_path"] = self.file_location
    -154        d_params["mobility_scan"] = 0
    -155        d_params["mobility_rt"] = 0
    -156        d_params["scan_number"] = 0
    -157        d_params["rt"] = self.get_attr_data(0, 'r_h_start_time')
    -158        d_params['label'] = Labels.booster_profile
    -159        d_params["Aterm"] = self.get_attr_data(0, 'r_cparams')[0]
    -160        d_params["Bterm"] = self.get_attr_data(0, 'r_cparams')[1]
    -161        return d_params
    +            
     13class ReadHDF_BoosterMassSpectrum(MassListBaseClass):
    + 14    """The ReadHDF_BoosterMassSpectrum class parses the mass spectrum data from an HDF file and generate a mass spectrum object.
    + 15
    + 16    Parameters
    + 17    ----------
    + 18    file_location : str
    + 19        The path to the HDF file.
    + 20    isCentroid : bool, optional
    + 21        Specifies whether the mass spectrum is centroided or not. Default is False.
    + 22
    + 23    Attributes
    + 24    ----------
    + 25    polarity : int
    + 26        The polarity of the mass spectrum.
    + 27    h5pydata : h5py.File
    + 28        The HDF file object.
    + 29    scans : list
    + 30        The list of scan names in the HDF file.
    + 31
    + 32    Methods
    + 33    -------
    + 34    * get_data_profile(mz, abundance, auto_process). Returns a MassSpecProfile object from the given m/z and abundance arrays.
    + 35    * get_attr_data(scan, attr_srt). Returns the attribute value for the given scan and attribute name.
    + 36    * get_polarity(file_location). Returns the polarity of the mass spectrum.
    + 37    * get_mass_spectrum(auto_process). Returns the mass spectrum as a MassSpecProfile object.
    + 38    * get_output_parameters(). Returns the default output parameters for the mass spectrum.
    + 39    """
    + 40
    + 41    def __init__(self, file_location, isCentroid=False):
    + 42        self.polarity = self.get_polarity(file_location)
    + 43        super().__init__(file_location, isCentroid=False)
    + 44
    + 45    def get_data_profile(self, mz, abundance, auto_process) -> MassSpecProfile:
    + 46        """
    + 47        Returns a MassSpecProfile object from the given m/z and abundance arrays.
    + 48
    + 49        Parameters
    + 50        ----------
    + 51        mz : array_like
    + 52            The m/z values.
    + 53        abundance : array_like
    + 54            The abundance values.
    + 55        auto_process : bool
    + 56            Specifies whether to automatically process the mass spectrum.
    + 57
    + 58        Returns
    + 59        -------
    + 60        MassSpecProfile
    + 61            The MassSpecProfile object.
    + 62
    + 63        """
    + 64        data_dict = {Labels.mz: mz, Labels.abundance: abundance}
    + 65        output_parameters = self.get_output_parameters()
    + 66        return MassSpecProfile(data_dict, output_parameters, auto_process=auto_process)
    + 67
    + 68    def get_attr_data(self, scan, attr_srt):
    + 69        """
    + 70        Returns the attribute value for the given scan and attribute name.
    + 71
    + 72        Parameters
    + 73        ----------
    + 74        scan : int
    + 75            The scan index.
    + 76        attr_srt : str
    + 77            The attribute name.
    + 78
    + 79        Returns
    + 80        -------
    + 81        object
    + 82            The attribute value.
    + 83
    + 84        """
    + 85        return self.h5pydata[self.scans[scan]].attrs[attr_srt]
    + 86
    + 87    def get_polarity(self, file_location: str | S3Path) -> int:
    + 88        """
    + 89        Returns the polarity of the mass spectrum.
    + 90
    + 91        Parameters
    + 92        ----------
    + 93        file_location : str
    + 94            The path to the HDF file.
    + 95
    + 96        Returns
    + 97        -------
    + 98        int
    + 99            The polarity of the mass spectrum.
    +100
    +101        """
    +102        if isinstance(file_location, S3Path):
    +103            data = BytesIO(file_location.open("rb").read())
    +104        else:
    +105            data = file_location
    +106
    +107        self.h5pydata = h5py.File(data, "r")
    +108        self.scans = list(self.h5pydata.keys())
    +109
    +110        polarity = self.get_attr_data(0, "r_h_polarity")
    +111
    +112        if polarity == "negative scan":
    +113            return -1
    +114        else:
    +115            return +1
    +116
    +117    def get_mass_spectrum(self, auto_process=True) -> MassSpecProfile:
    +118        """
    +119        Returns the mass spectrum as a MassSpecProfile object.
    +120
    +121        Parameters
    +122        ----------
    +123        auto_process : bool, optional
    +124            Specifies whether to automatically process the mass spectrum. Default is True.
    +125
    +126        Returns
    +127        -------
    +128        MassSpecProfile
    +129            The MassSpecProfile object.
    +130
    +131        """
    +132        if len(self.scans) == 1:
    +133            booster_data = self.h5pydata[self.scans[0]]
    +134
    +135            if self.isCentroid:
    +136                raise NotImplementedError
    +137            else:
    +138                mz = booster_data[0]
    +139                abun = booster_data[1]
    +140                return self.get_data_profile(mz, abun, auto_process)
    +141
    +142    def get_output_parameters(self) -> dict:
    +143        """
    +144        Returns the default output parameters for the mass spectrum.
    +145
    +146        Returns
    +147        -------
    +148        dict
    +149            The default output parameters.
    +150
    +151        """
    +152        d_params = default_parameters(self.file_location)
    +153        d_params["polarity"] = self.polarity
    +154        d_params["filename_path"] = self.file_location
    +155        d_params["mobility_scan"] = 0
    +156        d_params["mobility_rt"] = 0
    +157        d_params["scan_number"] = 0
    +158        d_params["rt"] = self.get_attr_data(0, "r_h_start_time")
    +159        d_params["label"] = Labels.booster_profile
    +160        d_params["Aterm"] = self.get_attr_data(0, "r_cparams")[0]
    +161        d_params["Bterm"] = self.get_attr_data(0, "r_cparams")[1]
    +162        return d_params
     
    @@ -454,9 +451,9 @@
    Methods
    -
    40    def __init__(self, file_location, isCentroid=False):
    -41        self.polarity = self.get_polarity(file_location)
    -42        super().__init__(file_location, isCentroid=False)
    +            
    41    def __init__(self, file_location, isCentroid=False):
    +42        self.polarity = self.get_polarity(file_location)
    +43        super().__init__(file_location, isCentroid=False)
     
    @@ -485,28 +482,28 @@
    Methods
    -
    44    def get_data_profile(self, mz, abundance, auto_process) -> MassSpecProfile:
    -45        """
    -46        Returns a MassSpecProfile object from the given m/z and abundance arrays.
    -47
    -48        Parameters
    -49        ----------
    -50        mz : array_like
    -51            The m/z values.
    -52        abundance : array_like
    -53            The abundance values.
    -54        auto_process : bool
    -55            Specifies whether to automatically process the mass spectrum.
    -56
    -57        Returns
    -58        -------
    -59        MassSpecProfile
    -60            The MassSpecProfile object.
    -61
    -62        """
    -63        data_dict = {Labels.mz: mz, Labels.abundance: abundance}
    -64        output_parameters = self.get_output_parameters()
    -65        return MassSpecProfile(data_dict, output_parameters, auto_process=auto_process)
    +            
    45    def get_data_profile(self, mz, abundance, auto_process) -> MassSpecProfile:
    +46        """
    +47        Returns a MassSpecProfile object from the given m/z and abundance arrays.
    +48
    +49        Parameters
    +50        ----------
    +51        mz : array_like
    +52            The m/z values.
    +53        abundance : array_like
    +54            The abundance values.
    +55        auto_process : bool
    +56            Specifies whether to automatically process the mass spectrum.
    +57
    +58        Returns
    +59        -------
    +60        MassSpecProfile
    +61            The MassSpecProfile object.
    +62
    +63        """
    +64        data_dict = {Labels.mz: mz, Labels.abundance: abundance}
    +65        output_parameters = self.get_output_parameters()
    +66        return MassSpecProfile(data_dict, output_parameters, auto_process=auto_process)
     
    @@ -543,24 +540,24 @@
    Returns
    -
    67    def get_attr_data(self, scan, attr_srt):
    -68        """
    -69        Returns the attribute value for the given scan and attribute name.
    -70
    -71        Parameters
    -72        ----------
    -73        scan : int
    -74            The scan index.
    -75        attr_srt : str
    -76            The attribute name.
    -77
    -78        Returns
    -79        -------
    -80        object
    -81            The attribute value.
    -82
    -83        """
    -84        return self.h5pydata[self.scans[scan]].attrs[attr_srt]
    +            
    68    def get_attr_data(self, scan, attr_srt):
    +69        """
    +70        Returns the attribute value for the given scan and attribute name.
    +71
    +72        Parameters
    +73        ----------
    +74        scan : int
    +75            The scan index.
    +76        attr_srt : str
    +77            The attribute name.
    +78
    +79        Returns
    +80        -------
    +81        object
    +82            The attribute value.
    +83
    +84        """
    +85        return self.h5pydata[self.scans[scan]].attrs[attr_srt]
     
    @@ -595,35 +592,35 @@
    Returns
    -
     86    def get_polarity(self, file_location:str | S3Path) -> int:
    - 87        """
    - 88        Returns the polarity of the mass spectrum.
    - 89
    - 90        Parameters
    - 91        ----------
    - 92        file_location : str
    - 93            The path to the HDF file.
    - 94
    - 95        Returns
    - 96        -------
    - 97        int
    - 98            The polarity of the mass spectrum.
    - 99
    -100        """
    -101        if isinstance(file_location, S3Path):
    -102            data = BytesIO(file_location.open('rb').read())
    -103        else:
    -104            data = file_location
    -105        
    -106        self.h5pydata = h5py.File(data, 'r')
    -107        self.scans = list(self.h5pydata.keys())
    -108        
    -109        polarity = self.get_attr_data(0,'r_h_polarity')
    -110        
    -111        if polarity == 'negative scan':
    -112            return -1
    -113        else:
    -114            return +1    
    +            
     87    def get_polarity(self, file_location: str | S3Path) -> int:
    + 88        """
    + 89        Returns the polarity of the mass spectrum.
    + 90
    + 91        Parameters
    + 92        ----------
    + 93        file_location : str
    + 94            The path to the HDF file.
    + 95
    + 96        Returns
    + 97        -------
    + 98        int
    + 99            The polarity of the mass spectrum.
    +100
    +101        """
    +102        if isinstance(file_location, S3Path):
    +103            data = BytesIO(file_location.open("rb").read())
    +104        else:
    +105            data = file_location
    +106
    +107        self.h5pydata = h5py.File(data, "r")
    +108        self.scans = list(self.h5pydata.keys())
    +109
    +110        polarity = self.get_attr_data(0, "r_h_polarity")
    +111
    +112        if polarity == "negative scan":
    +113            return -1
    +114        else:
    +115            return +1
     
    @@ -656,30 +653,30 @@
    Returns
    -
    116    def get_mass_spectrum(self, auto_process=True) -> MassSpecProfile:
    -117        """
    -118        Returns the mass spectrum as a MassSpecProfile object.
    -119
    -120        Parameters
    -121        ----------
    -122        auto_process : bool, optional
    -123            Specifies whether to automatically process the mass spectrum. Default is True.
    -124
    -125        Returns
    -126        -------
    -127        MassSpecProfile
    -128            The MassSpecProfile object.
    -129
    -130        """
    -131        if len(self.scans) == 1:
    -132            booster_data = self.h5pydata[self.scans[0]]
    -133            
    -134            if self.isCentroid:
    -135                raise NotImplementedError
    -136            else:
    -137                mz = booster_data[0]
    -138                abun = booster_data[1]
    -139                return self.get_data_profile(mz, abun, auto_process)
    +            
    117    def get_mass_spectrum(self, auto_process=True) -> MassSpecProfile:
    +118        """
    +119        Returns the mass spectrum as a MassSpecProfile object.
    +120
    +121        Parameters
    +122        ----------
    +123        auto_process : bool, optional
    +124            Specifies whether to automatically process the mass spectrum. Default is True.
    +125
    +126        Returns
    +127        -------
    +128        MassSpecProfile
    +129            The MassSpecProfile object.
    +130
    +131        """
    +132        if len(self.scans) == 1:
    +133            booster_data = self.h5pydata[self.scans[0]]
    +134
    +135            if self.isCentroid:
    +136                raise NotImplementedError
    +137            else:
    +138                mz = booster_data[0]
    +139                abun = booster_data[1]
    +140                return self.get_data_profile(mz, abun, auto_process)
     
    @@ -712,27 +709,27 @@
    Returns
    -
    141    def get_output_parameters(self) -> dict:
    -142        """
    -143        Returns the default output parameters for the mass spectrum.
    -144
    -145        Returns
    -146        -------
    -147        dict
    -148            The default output parameters.
    -149
    -150        """
    -151        d_params = default_parameters(self.file_location)
    -152        d_params["polarity"] = self.polarity
    -153        d_params["filename_path"] = self.file_location
    -154        d_params["mobility_scan"] = 0
    -155        d_params["mobility_rt"] = 0
    -156        d_params["scan_number"] = 0
    -157        d_params["rt"] = self.get_attr_data(0, 'r_h_start_time')
    -158        d_params['label'] = Labels.booster_profile
    -159        d_params["Aterm"] = self.get_attr_data(0, 'r_cparams')[0]
    -160        d_params["Bterm"] = self.get_attr_data(0, 'r_cparams')[1]
    -161        return d_params
    +            
    142    def get_output_parameters(self) -> dict:
    +143        """
    +144        Returns the default output parameters for the mass spectrum.
    +145
    +146        Returns
    +147        -------
    +148        dict
    +149            The default output parameters.
    +150
    +151        """
    +152        d_params = default_parameters(self.file_location)
    +153        d_params["polarity"] = self.polarity
    +154        d_params["filename_path"] = self.file_location
    +155        d_params["mobility_scan"] = 0
    +156        d_params["mobility_rt"] = 0
    +157        d_params["scan_number"] = 0
    +158        d_params["rt"] = self.get_attr_data(0, "r_h_start_time")
    +159        d_params["label"] = Labels.booster_profile
    +160        d_params["Aterm"] = self.get_attr_data(0, "r_cparams")[0]
    +161        d_params["Bterm"] = self.get_attr_data(0, "r_cparams")[1]
    +162        return d_params
     
    diff --git a/docs/corems/mass_spectrum/input/coremsHDF5.html b/docs/corems/mass_spectrum/input/coremsHDF5.html index f5f2d455..8d8bcfcb 100644 --- a/docs/corems/mass_spectrum/input/coremsHDF5.html +++ b/docs/corems/mass_spectrum/input/coremsHDF5.html @@ -103,435 +103,448 @@

    8from corems.encapsulation.input.parameter_from_json import _set_dict_data_ms 9from corems.mass_spectrum.input.massList import ReadCoremsMasslist 10from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecCentroid - 11from corems.encapsulation.constant import Labels - 12from corems.encapsulation.factory.parameters import default_parameters + 11from corems.encapsulation.factory.parameters import default_parameters + 12 13 - 14 - 15class ReadCoreMSHDF_MassSpectrum(ReadCoremsMasslist): - 16 """Class for reading mass spectrum data from a CoreMS HDF5 file. - 17 - 18 Attributes - 19 ---------- - 20 h5pydata : h5py.File - 21 The HDF5 file object. - 22 scans : list - 23 List of scan labels in the HDF5 file. - 24 - 25 Parameters - 26 ---------- - 27 file_location : str or S3Path - 28 The path to the CoreMS HDF5 file. - 29 - 30 Methods - 31 ------- - 32 * load_raw_data(mass_spectrum, scan_index=0) Load raw data into the mass spectrum object. - 33 * get_mass_spectrum(scan_number=0, time_index=-1, auto_process=True, load_settings=True, load_raw=True).Get a mass spectrum object. - 34 * load_settings(mass_spectrum, scan_index=0, time_index=-1). Load settings into the mass spectrum object. - 35 * get_dataframe(scan_index=0, time_index=-1). Get a pandas DataFrame representing the mass spectrum. - 36 * get_time_index_to_pull(scan_label, time_index). Get the time index to pull from the HDF5 file. - 37 * get_high_level_attr_data(attr_str). Get high-level attribute data from the HDF5 file. - 38 * get_scan_group_attr_data(scan_index, time_index, attr_group, attr_srt=None). Get scan group attribute data from the HDF5 file. - 39 * get_raw_data_attr_data(scan_index, attr_group, attr_str). Get raw data attribute data from the HDF5 file. - 40 * get_output_parameters(polarity, scan_index=0). Get the output parameters for the mass spectrum. - 41 """ - 42 - 43 def __init__(self, file_location): - 44 super().__init__(file_location) - 45 - 46 if isinstance(self.file_location, S3Path): - 47 data = BytesIO(self.file_location.open("rb").read()) - 48 else: - 49 data = self.file_location - 50 - 51 self.h5pydata = h5py.File(data, "r") - 52 - 53 self.scans = list(self.h5pydata.keys()) - 54 - 55 def load_raw_data(self, mass_spectrum, scan_index=0): - 56 """ - 57 Load raw data into the mass spectrum object. - 58 - 59 Parameters - 60 ---------- - 61 mass_spectrum : MassSpecCentroid - 62 The mass spectrum object to load the raw data into. - 63 scan_index : int, optional - 64 The index of the scan to load the raw data from. Default is 0. - 65 """ - 66 - 67 scan_label = self.scans[scan_index] - 68 - 69 # Check if the "raw_ms" group in the scan is empty - 70 if self.h5pydata[scan_label]["raw_ms"].shape is not None: + 14class ReadCoreMSHDF_MassSpectrum(ReadCoremsMasslist): + 15 """Class for reading mass spectrum data from a CoreMS HDF5 file. + 16 + 17 Attributes + 18 ---------- + 19 h5pydata : h5py.File + 20 The HDF5 file object. + 21 scans : list + 22 List of scan labels in the HDF5 file. + 23 + 24 Parameters + 25 ---------- + 26 file_location : str or S3Path + 27 The path to the CoreMS HDF5 file. + 28 + 29 Methods + 30 ------- + 31 * load_raw_data(mass_spectrum, scan_index=0) Load raw data into the mass spectrum object. + 32 * get_mass_spectrum(scan_number=0, time_index=-1, auto_process=True, load_settings=True, load_raw=True).Get a mass spectrum object. + 33 * load_settings(mass_spectrum, scan_index=0, time_index=-1). Load settings into the mass spectrum object. + 34 * get_dataframe(scan_index=0, time_index=-1). Get a pandas DataFrame representing the mass spectrum. + 35 * get_time_index_to_pull(scan_label, time_index). Get the time index to pull from the HDF5 file. + 36 * get_high_level_attr_data(attr_str). Get high-level attribute data from the HDF5 file. + 37 * get_scan_group_attr_data(scan_index, time_index, attr_group, attr_srt=None). Get scan group attribute data from the HDF5 file. + 38 * get_raw_data_attr_data(scan_index, attr_group, attr_str). Get raw data attribute data from the HDF5 file. + 39 * get_output_parameters(polarity, scan_index=0). Get the output parameters for the mass spectrum. + 40 """ + 41 + 42 def __init__(self, file_location): + 43 super().__init__(file_location) + 44 + 45 if isinstance(self.file_location, S3Path): + 46 data = BytesIO(self.file_location.open("rb").read()) + 47 else: + 48 data = self.file_location + 49 + 50 self.h5pydata = h5py.File(data, "r") + 51 + 52 self.scans = list(self.h5pydata.keys()) + 53 + 54 def load_raw_data(self, mass_spectrum, scan_index=0): + 55 """ + 56 Load raw data into the mass spectrum object. + 57 + 58 Parameters + 59 ---------- + 60 mass_spectrum : MassSpecCentroid + 61 The mass spectrum object to load the raw data into. + 62 scan_index : int, optional + 63 The index of the scan to load the raw data from. Default is 0. + 64 """ + 65 + 66 scan_label = self.scans[scan_index] + 67 + 68 # Check if the "raw_ms" group in the scan is empty + 69 if self.h5pydata[scan_label]["raw_ms"].shape is not None: + 70 mz_profile = self.h5pydata[scan_label]["raw_ms"][0] 71 - 72 mz_profile = self.h5pydata[scan_label]["raw_ms"][0] + 72 abundance_profile = self.h5pydata[scan_label]["raw_ms"][1] 73 - 74 abundance_profile = self.h5pydata[scan_label]["raw_ms"][1] + 74 mass_spectrum.mz_exp_profile = mz_profile 75 - 76 mass_spectrum.mz_exp_profile = mz_profile + 76 mass_spectrum.abundance_profile = abundance_profile 77 - 78 mass_spectrum.abundance_profile = abundance_profile - 79 - 80 def get_mass_spectrum( - 81 self, - 82 scan_number=0, - 83 time_index=-1, - 84 auto_process=True, - 85 load_settings=True, - 86 load_raw=True, - 87 load_molecular_formula=True, - 88 ): - 89 """ - 90 Instantiate a mass spectrum object from the CoreMS HDF5 file. - 91 Note that this always returns a centroid mass spectrum object; functionality for profile and - 92 frequency mass spectra is not yet implemented. - 93 - 94 Parameters - 95 ---------- - 96 scan_number : int, optional - 97 The index of the scan to retrieve the mass spectrum from. Default is 0. - 98 time_index : int, optional - 99 The index of the time point to retrieve the mass spectrum from. Default is -1. -100 auto_process : bool, optional -101 Whether to automatically process the mass spectrum. Default is True. -102 load_settings : bool, optional -103 Whether to load the settings into the mass spectrum object. Default is True. -104 load_raw : bool, optional -105 Whether to load the raw data into the mass spectrum object. Default is True. -106 load_molecular_formula : bool, optional -107 Whether to load the molecular formula into the mass spectrum object. -108 Default is True. -109 -110 Returns -111 ------- -112 MassSpecCentroid -113 The mass spectrum object. -114 -115 Raises -116 ------ -117 ValueError -118 If the CoreMS file is not valid. -119 If the mass spectrum has not been processed and load_molecular_formula is True. -120 """ -121 if "mass_spectra" in self.scans[0]: -122 scan_index = self.scans.index("mass_spectra/" + str(scan_number)) -123 else: -124 scan_index = self.scans.index(str(scan_number)) -125 dataframe = self.get_dataframe(scan_index, time_index=time_index) -126 if dataframe["Molecular Formula"].any() and not dataframe["C"].any(): -127 cols = dataframe.columns.tolist() -128 cols = cols[cols.index("Molecular Formula") + 1 :] -129 for index, row in dataframe.iterrows(): -130 if row["Molecular Formula"] is not None: -131 og_formula = row["Molecular Formula"] -132 for col in cols: -133 if "col" in og_formula: -134 # get the digit after the element ("col") in the molecular formula and set it to the dataframe -135 row[col] = int(og_formula.split(col)[1].split(" ")[0]) -136 -137 if not set( -138 ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"] -139 ).issubset(dataframe.columns): -140 raise ValueError( -141 "%s it is not a valid CoreMS file" % str(self.file_location) -142 ) + 78 def get_mass_spectrum( + 79 self, + 80 scan_number=0, + 81 time_index=-1, + 82 auto_process=True, + 83 load_settings=True, + 84 load_raw=True, + 85 load_molecular_formula=True, + 86 ): + 87 """ + 88 Instantiate a mass spectrum object from the CoreMS HDF5 file. + 89 Note that this always returns a centroid mass spectrum object; functionality for profile and + 90 frequency mass spectra is not yet implemented. + 91 + 92 Parameters + 93 ---------- + 94 scan_number : int, optional + 95 The index of the scan to retrieve the mass spectrum from. Default is 0. + 96 time_index : int, optional + 97 The index of the time point to retrieve the mass spectrum from. Default is -1. + 98 auto_process : bool, optional + 99 Whether to automatically process the mass spectrum. Default is True. +100 load_settings : bool, optional +101 Whether to load the settings into the mass spectrum object. Default is True. +102 load_raw : bool, optional +103 Whether to load the raw data into the mass spectrum object. Default is True. +104 load_molecular_formula : bool, optional +105 Whether to load the molecular formula into the mass spectrum object. +106 Default is True. +107 +108 Returns +109 ------- +110 MassSpecCentroid +111 The mass spectrum object. +112 +113 Raises +114 ------ +115 ValueError +116 If the CoreMS file is not valid. +117 If the mass spectrum has not been processed and load_molecular_formula is True. +118 """ +119 if "mass_spectra" in self.scans[0]: +120 scan_index = self.scans.index("mass_spectra/" + str(scan_number)) +121 else: +122 scan_index = self.scans.index(str(scan_number)) +123 dataframe = self.get_dataframe(scan_index, time_index=time_index) +124 if dataframe["Molecular Formula"].any() and not dataframe["C"].any(): +125 cols = dataframe.columns.tolist() +126 cols = cols[cols.index("Molecular Formula") + 1 :] +127 for index, row in dataframe.iterrows(): +128 if row["Molecular Formula"] is not None: +129 og_formula = row["Molecular Formula"] +130 for col in cols: +131 if "col" in og_formula: +132 # get the digit after the element ("col") in the molecular formula and set it to the dataframe +133 row[col] = int(og_formula.split(col)[1].split(" ")[0]) +134 +135 if not set( +136 ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"] +137 ).issubset(dataframe.columns): +138 raise ValueError( +139 "%s it is not a valid CoreMS file" % str(self.file_location) +140 ) +141 +142 dataframe.rename(columns=self.parameters.header_translate, inplace=True) 143 -144 dataframe.rename(columns=self.parameters.header_translate, inplace=True) -145 -146 # Cast m/z, and 'Peak Height' to float -147 dataframe["m/z"] = dataframe["m/z"].astype(float) -148 dataframe["Peak Height"] = dataframe["Peak Height"].astype(float) +144 # Cast m/z, and 'Peak Height' to float +145 dataframe["m/z"] = dataframe["m/z"].astype(float) +146 dataframe["Peak Height"] = dataframe["Peak Height"].astype(float) +147 +148 polarity = dataframe["Ion Charge"].values[0] 149 -150 polarity = dataframe["Ion Charge"].values[0] +150 output_parameters = self.get_output_parameters(polarity, scan_index=scan_index) 151 -152 output_parameters = self.get_output_parameters(polarity, scan_index=scan_index) -153 -154 mass_spec_obj = MassSpecCentroid( -155 dataframe.to_dict(orient="list"), output_parameters, auto_process = False -156 ) -157 -158 if auto_process: -159 # Set the settings on the mass spectrum object to relative abuncance of 0 so all peaks get added -160 mass_spec_obj.settings.noise_threshold_method = "absolute_abundance" -161 mass_spec_obj.settings.noise_threshold_absolute_abundance = 0 -162 mass_spec_obj.process_mass_spec() -163 -164 if load_settings: -165 # Load settings into the mass spectrum object -166 self.load_settings( -167 mass_spec_obj, -168 scan_index=scan_index, -169 time_index=time_index -170 ) -171 -172 if load_raw: -173 self.load_raw_data( -174 mass_spec_obj, -175 scan_index=scan_index -176 ) -177 -178 if load_molecular_formula: -179 if not auto_process: -180 raise ValueError( -181 "Can only add molecular formula if the mass spectrum has been processed" -182 ) -183 else: -184 self.add_molecular_formula(mass_spec_obj, dataframe) -185 -186 return mass_spec_obj -187 -188 def load_settings(self, mass_spectrum, scan_index=0, time_index=-1): -189 """ -190 Load settings into the mass spectrum object. -191 -192 Parameters -193 ---------- -194 mass_spectrum : MassSpecCentroid -195 The mass spectrum object to load the settings into. -196 scan_index : int, optional -197 The index of the scan to load the settings from. Default is 0. -198 time_index : int, optional -199 The index of the time point to load the settings from. Default is -1. -200 """ -201 -202 loaded_settings = {} -203 loaded_settings["MoleculaSearch"] = self.get_scan_group_attr_data( -204 scan_index, time_index, "MoleculaSearchSetting" -205 ) -206 loaded_settings["MassSpecPeak"] = self.get_scan_group_attr_data( -207 scan_index, time_index, "MassSpecPeakSetting" -208 ) -209 loaded_settings["MassSpectrum"] = self.get_scan_group_attr_data( -210 scan_index, time_index, "MassSpectrumSetting" -211 ) -212 loaded_settings["Transient"] = self.get_scan_group_attr_data( -213 scan_index, time_index, "TransientSetting" -214 ) -215 -216 _set_dict_data_ms(loaded_settings, mass_spectrum) -217 -218 def get_dataframe(self, scan_index=0, time_index=-1): -219 """ -220 Get a pandas DataFrame representing the mass spectrum. +152 mass_spec_obj = MassSpecCentroid( +153 dataframe.to_dict(orient="list"), output_parameters, auto_process=False +154 ) +155 +156 if auto_process: +157 # Set the settings on the mass spectrum object to relative abuncance of 0 so all peaks get added +158 mass_spec_obj.settings.noise_threshold_method = "absolute_abundance" +159 mass_spec_obj.settings.noise_threshold_absolute_abundance = 0 +160 mass_spec_obj.process_mass_spec() +161 +162 if load_settings: +163 # Load settings into the mass spectrum object +164 self.load_settings( +165 mass_spec_obj, scan_index=scan_index, time_index=time_index +166 ) +167 +168 if load_raw: +169 self.load_raw_data(mass_spec_obj, scan_index=scan_index) +170 +171 if load_molecular_formula: +172 if not auto_process: +173 raise ValueError( +174 "Can only add molecular formula if the mass spectrum has been processed" +175 ) +176 else: +177 self.add_molecular_formula(mass_spec_obj, dataframe) +178 +179 return mass_spec_obj +180 +181 def load_settings(self, mass_spectrum, scan_index=0, time_index=-1): +182 """ +183 Load settings into the mass spectrum object. +184 +185 Parameters +186 ---------- +187 mass_spectrum : MassSpecCentroid +188 The mass spectrum object to load the settings into. +189 scan_index : int, optional +190 The index of the scan to load the settings from. Default is 0. +191 time_index : int, optional +192 The index of the time point to load the settings from. Default is -1. +193 """ +194 +195 loaded_settings = {} +196 loaded_settings["MoleculaSearch"] = self.get_scan_group_attr_data( +197 scan_index, time_index, "MoleculaSearchSetting" +198 ) +199 loaded_settings["MassSpecPeak"] = self.get_scan_group_attr_data( +200 scan_index, time_index, "MassSpecPeakSetting" +201 ) +202 loaded_settings["MassSpectrum"] = self.get_scan_group_attr_data( +203 scan_index, time_index, "MassSpectrumSetting" +204 ) +205 loaded_settings["Transient"] = self.get_scan_group_attr_data( +206 scan_index, time_index, "TransientSetting" +207 ) +208 +209 _set_dict_data_ms(loaded_settings, mass_spectrum) +210 +211 def get_dataframe(self, scan_index=0, time_index=-1): +212 """ +213 Get a pandas DataFrame representing the mass spectrum. +214 +215 Parameters +216 ---------- +217 scan_index : int, optional +218 The index of the scan to retrieve the DataFrame from. Default is 0. +219 time_index : int, optional +220 The index of the time point to retrieve the DataFrame from. Default is -1. 221 -222 Parameters -223 ---------- -224 scan_index : int, optional -225 The index of the scan to retrieve the DataFrame from. Default is 0. -226 time_index : int, optional -227 The index of the time point to retrieve the DataFrame from. Default is -1. -228 -229 Returns -230 ------- -231 DataFrame -232 The pandas DataFrame representing the mass spectrum. -233 """ -234 -235 columnsLabels = self.get_scan_group_attr_data( -236 scan_index, time_index, "ColumnsLabels" -237 ) -238 -239 scan_label = self.scans[scan_index] -240 -241 index_to_pull = self.get_time_index_to_pull(scan_label, time_index) -242 -243 corems_table_data = self.h5pydata[scan_label][index_to_pull] -244 -245 list_dict = [] -246 for row in corems_table_data: -247 data_dict = {} -248 for data_index, data in enumerate(row): -249 label = columnsLabels[data_index] -250 # if data starts with a b' it is a byte string, so decode it -251 if isinstance(data, bytes): -252 data = data.decode("utf-8") -253 if data == "nan": -254 data = None -255 data_dict[label] = data -256 -257 list_dict.append(data_dict) -258 -259 # Reorder the columns from low to high "Index" to match the order of the dataframe -260 df = DataFrame(list_dict) -261 # set the "Index" column to int so it sorts correctly -262 df["Index"] = df["Index"].astype(int) -263 df = df.sort_values(by="Index") -264 # Reset index to match the "Index" column -265 df = df.set_index("Index", drop=False) -266 -267 return df -268 -269 def get_time_index_to_pull(self, scan_label, time_index): -270 """ -271 Get the time index to pull from the HDF5 file. +222 Returns +223 ------- +224 DataFrame +225 The pandas DataFrame representing the mass spectrum. +226 """ +227 +228 columnsLabels = self.get_scan_group_attr_data( +229 scan_index, time_index, "ColumnsLabels" +230 ) +231 +232 scan_label = self.scans[scan_index] +233 +234 index_to_pull = self.get_time_index_to_pull(scan_label, time_index) +235 +236 corems_table_data = self.h5pydata[scan_label][index_to_pull] +237 +238 list_dict = [] +239 for row in corems_table_data: +240 data_dict = {} +241 for data_index, data in enumerate(row): +242 label = columnsLabels[data_index] +243 # if data starts with a b' it is a byte string, so decode it +244 if isinstance(data, bytes): +245 data = data.decode("utf-8") +246 if data == "nan": +247 data = None +248 data_dict[label] = data +249 +250 list_dict.append(data_dict) +251 +252 # Reorder the columns from low to high "Index" to match the order of the dataframe +253 df = DataFrame(list_dict) +254 # set the "Index" column to int so it sorts correctly +255 df["Index"] = df["Index"].astype(int) +256 df = df.sort_values(by="Index") +257 # Reset index to match the "Index" column +258 df = df.set_index("Index", drop=False) +259 +260 return df +261 +262 def get_time_index_to_pull(self, scan_label, time_index): +263 """ +264 Get the time index to pull from the HDF5 file. +265 +266 Parameters +267 ---------- +268 scan_label : str +269 The label of the scan. +270 time_index : int +271 The index of the time point. 272 -273 Parameters -274 ---------- -275 scan_label : str -276 The label of the scan. -277 time_index : int -278 The index of the time point. -279 -280 Returns -281 ------- -282 str -283 The time index to pull. -284 """ +273 Returns +274 ------- +275 str +276 The time index to pull. +277 """ +278 +279 time_data = sorted( +280 [(i, int(i)) for i in self.h5pydata[scan_label].keys() if i != "raw_ms"], +281 key=lambda m: m[1], +282 ) +283 +284 index_to_pull = time_data[time_index][0] 285 -286 time_data = sorted( -287 [(i, int(i)) for i in self.h5pydata[scan_label].keys() if i != "raw_ms"], -288 key=lambda m: m[1], -289 ) -290 -291 index_to_pull = time_data[time_index][0] -292 -293 return index_to_pull -294 -295 def get_high_level_attr_data(self, attr_str): -296 """ -297 Get high-level attribute data from the HDF5 file. -298 -299 Parameters -300 ---------- -301 attr_str : str -302 The attribute string. -303 -304 Returns -305 ------- -306 dict -307 The attribute data. -308 -309 Raises -310 ------ -311 KeyError -312 If the attribute string is not found in the HDF5 file. -313 """ -314 -315 return self.h5pydata.attrs[attr_str] -316 -317 def get_scan_group_attr_data( -318 self, scan_index, time_index, attr_group, attr_srt=None -319 ): -320 """ -321 Get scan group attribute data from the HDF5 file. -322 -323 Parameters -324 ---------- -325 scan_index : int -326 The index of the scan. -327 time_index : int -328 The index of the time point. -329 attr_group : str -330 The attribute group. -331 attr_srt : str, optional -332 The attribute string. Default is None. -333 -334 Returns -335 ------- -336 dict -337 The attribute data. -338 -339 Notes -340 ----- -341 This method retrieves attribute data from the HDF5 file for a specific scan and time point. -342 The attribute data is stored in the specified attribute group. -343 If an attribute string is provided, only the corresponding attribute value is returned. -344 If no attribute string is provided, all attribute data in the group is returned as a dictionary. -345 """ -346 # Get index of self.scans where scan_index_str is found -347 scan_label = self.scans[scan_index] +286 return index_to_pull +287 +288 def get_high_level_attr_data(self, attr_str): +289 """ +290 Get high-level attribute data from the HDF5 file. +291 +292 Parameters +293 ---------- +294 attr_str : str +295 The attribute string. +296 +297 Returns +298 ------- +299 dict +300 The attribute data. +301 +302 Raises +303 ------ +304 KeyError +305 If the attribute string is not found in the HDF5 file. +306 """ +307 +308 return self.h5pydata.attrs[attr_str] +309 +310 def get_scan_group_attr_data( +311 self, scan_index, time_index, attr_group, attr_srt=None +312 ): +313 """ +314 Get scan group attribute data from the HDF5 file. +315 +316 Parameters +317 ---------- +318 scan_index : int +319 The index of the scan. +320 time_index : int +321 The index of the time point. +322 attr_group : str +323 The attribute group. +324 attr_srt : str, optional +325 The attribute string. Default is None. +326 +327 Returns +328 ------- +329 dict +330 The attribute data. +331 +332 Notes +333 ----- +334 This method retrieves attribute data from the HDF5 file for a specific scan and time point. +335 The attribute data is stored in the specified attribute group. +336 If an attribute string is provided, only the corresponding attribute value is returned. +337 If no attribute string is provided, all attribute data in the group is returned as a dictionary. +338 """ +339 # Get index of self.scans where scan_index_str is found +340 scan_label = self.scans[scan_index] +341 +342 index_to_pull = self.get_time_index_to_pull(scan_label, time_index) +343 +344 if attr_srt: +345 return json.loads( +346 self.h5pydata[scan_label][index_to_pull].attrs[attr_group] +347 )[attr_srt] 348 -349 index_to_pull = self.get_time_index_to_pull(scan_label, time_index) -350 -351 if attr_srt: -352 return json.loads( -353 self.h5pydata[scan_label][index_to_pull].attrs[attr_group] -354 )[attr_srt] +349 else: +350 data = self.h5pydata[scan_label][index_to_pull].attrs.get(attr_group) +351 if data: +352 return json.loads(data) +353 else: +354 return {} 355 -356 else: -357 data = self.h5pydata[scan_label][index_to_pull].attrs.get(attr_group) -358 if data: -359 return json.loads(data) -360 else: -361 return {} -362 -363 def get_raw_data_attr_data(self, scan_index, attr_group, attr_str): -364 """ -365 Get raw data attribute data from the HDF5 file. -366 -367 Parameters -368 ---------- -369 scan_index : int -370 The index of the scan. -371 attr_group : str -372 The attribute group. -373 attr_str : str -374 The attribute string. -375 -376 Returns -377 ------- -378 dict -379 The attribute data. -380 -381 Raises -382 ------ -383 KeyError -384 If the attribute string is not found in the attribute group. -385 -386 Notes -387 ----- -388 This method retrieves the attribute data associated with a specific scan, attribute group, and attribute string -389 from the HDF5 file. It returns the attribute data as a dictionary. -390 -391 Example usage: -392 >>> data = get_raw_data_attr_data(0, "group1", "attribute1") -393 >>> print(data) -394 {'key1': 'value1', 'key2': 'value2'} -395 """ -396 scan_label = self.scans[scan_index] -397 try: -398 json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[attr_str] -399 except KeyError: -400 attr_str = attr_str.replace("baseline", "baselise") -401 return json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[attr_str] -402 -403 def get_output_parameters(self, polarity, scan_index=0): -404 """ -405 Get the output parameters for the mass spectrum. -406 -407 Parameters -408 ---------- -409 polarity : str -410 The polarity of the mass spectrum. -411 scan_index : int, optional -412 The index of the scan. Default is 0. -413 -414 Returns -415 ------- -416 dict -417 The output parameters. -418 """ -419 -420 d_params = default_parameters(self.file_location) -421 d_params["filename_path"] = self.file_location -422 d_params['polarity'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'polarity') -423 d_params['rt'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'rt') -424 -425 d_params['tic'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'tic') -426 -427 d_params['mobility_scan'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'mobility_scan') -428 d_params['mobility_rt'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'mobility_rt') -429 d_params['Aterm'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Aterm') -430 d_params['Bterm'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Bterm') -431 d_params['Cterm'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Cterm') -432 d_params['baseline_noise'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'baseline_noise') -433 d_params['baseline_noise_std'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'baseline_noise_std') -434 -435 d_params['analyzer'] = self.get_high_level_attr_data('analyzer') -436 d_params['instrument_label'] = self.get_high_level_attr_data('instrument_label') -437 d_params['sample_name'] = self.get_high_level_attr_data('sample_name') -438 -439 return d_params +356 def get_raw_data_attr_data(self, scan_index, attr_group, attr_str): +357 """ +358 Get raw data attribute data from the HDF5 file. +359 +360 Parameters +361 ---------- +362 scan_index : int +363 The index of the scan. +364 attr_group : str +365 The attribute group. +366 attr_str : str +367 The attribute string. +368 +369 Returns +370 ------- +371 dict +372 The attribute data. +373 +374 Raises +375 ------ +376 KeyError +377 If the attribute string is not found in the attribute group. +378 +379 Notes +380 ----- +381 This method retrieves the attribute data associated with a specific scan, attribute group, and attribute string +382 from the HDF5 file. It returns the attribute data as a dictionary. +383 +384 Example usage: +385 >>> data = get_raw_data_attr_data(0, "group1", "attribute1") +386 >>> print(data) +387 {'key1': 'value1', 'key2': 'value2'} +388 """ +389 scan_label = self.scans[scan_index] +390 try: +391 json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[attr_str] +392 except KeyError: +393 attr_str = attr_str.replace("baseline", "baselise") +394 return json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[ +395 attr_str +396 ] +397 +398 def get_output_parameters(self, polarity, scan_index=0): +399 """ +400 Get the output parameters for the mass spectrum. +401 +402 Parameters +403 ---------- +404 polarity : str +405 The polarity of the mass spectrum. +406 scan_index : int, optional +407 The index of the scan. Default is 0. +408 +409 Returns +410 ------- +411 dict +412 The output parameters. +413 """ +414 +415 d_params = default_parameters(self.file_location) +416 d_params["filename_path"] = self.file_location +417 d_params["polarity"] = self.get_raw_data_attr_data( +418 scan_index, "MassSpecAttrs", "polarity" +419 ) +420 d_params["rt"] = self.get_raw_data_attr_data(scan_index, "MassSpecAttrs", "rt") +421 +422 d_params["tic"] = self.get_raw_data_attr_data( +423 scan_index, "MassSpecAttrs", "tic" +424 ) +425 +426 d_params["mobility_scan"] = self.get_raw_data_attr_data( +427 scan_index, "MassSpecAttrs", "mobility_scan" +428 ) +429 d_params["mobility_rt"] = self.get_raw_data_attr_data( +430 scan_index, "MassSpecAttrs", "mobility_rt" +431 ) +432 d_params["Aterm"] = self.get_raw_data_attr_data( +433 scan_index, "MassSpecAttrs", "Aterm" +434 ) +435 d_params["Bterm"] = self.get_raw_data_attr_data( +436 scan_index, "MassSpecAttrs", "Bterm" +437 ) +438 d_params["Cterm"] = self.get_raw_data_attr_data( +439 scan_index, "MassSpecAttrs", "Cterm" +440 ) +441 d_params["baseline_noise"] = self.get_raw_data_attr_data( +442 scan_index, "MassSpecAttrs", "baseline_noise" +443 ) +444 d_params["baseline_noise_std"] = self.get_raw_data_attr_data( +445 scan_index, "MassSpecAttrs", "baseline_noise_std" +446 ) +447 +448 d_params["analyzer"] = self.get_high_level_attr_data("analyzer") +449 d_params["instrument_label"] = self.get_high_level_attr_data("instrument_label") +450 d_params["sample_name"] = self.get_high_level_attr_data("sample_name") +451 +452 return d_params

    @@ -547,431 +560,445 @@

    -
     16class ReadCoreMSHDF_MassSpectrum(ReadCoremsMasslist):
    - 17    """Class for reading mass spectrum data from a CoreMS HDF5 file.
    - 18
    - 19    Attributes
    - 20    ----------
    - 21    h5pydata : h5py.File
    - 22        The HDF5 file object.
    - 23    scans : list
    - 24        List of scan labels in the HDF5 file.
    - 25
    - 26    Parameters
    - 27    ----------
    - 28    file_location : str or S3Path
    - 29        The path to the CoreMS HDF5 file.
    - 30
    - 31    Methods
    - 32    -------
    - 33    * load_raw_data(mass_spectrum, scan_index=0) Load raw data into the mass spectrum object.
    - 34    * get_mass_spectrum(scan_number=0, time_index=-1, auto_process=True, load_settings=True, load_raw=True).Get a mass spectrum object.
    - 35    * load_settings(mass_spectrum, scan_index=0, time_index=-1). Load settings into the mass spectrum object.
    - 36    * get_dataframe(scan_index=0, time_index=-1). Get a pandas DataFrame representing the mass spectrum.
    - 37    * get_time_index_to_pull(scan_label, time_index). Get the time index to pull from the HDF5 file.
    - 38    * get_high_level_attr_data(attr_str). Get high-level attribute data from the HDF5 file.
    - 39    * get_scan_group_attr_data(scan_index, time_index, attr_group, attr_srt=None). Get scan group attribute data from the HDF5 file.
    - 40    * get_raw_data_attr_data(scan_index, attr_group, attr_str). Get raw data attribute data from the HDF5 file.
    - 41    * get_output_parameters(polarity, scan_index=0). Get the output parameters for the mass spectrum.
    - 42    """
    - 43
    - 44    def __init__(self, file_location):
    - 45        super().__init__(file_location)
    - 46
    - 47        if isinstance(self.file_location, S3Path):
    - 48            data = BytesIO(self.file_location.open("rb").read())
    - 49        else:
    - 50            data = self.file_location
    - 51
    - 52        self.h5pydata = h5py.File(data, "r")
    - 53
    - 54        self.scans = list(self.h5pydata.keys())
    - 55
    - 56    def load_raw_data(self, mass_spectrum, scan_index=0):
    - 57        """
    - 58        Load raw data into the mass spectrum object.
    - 59
    - 60        Parameters
    - 61        ----------
    - 62        mass_spectrum : MassSpecCentroid
    - 63            The mass spectrum object to load the raw data into.
    - 64        scan_index : int, optional
    - 65            The index of the scan to load the raw data from. Default is 0.
    - 66        """
    - 67
    - 68        scan_label = self.scans[scan_index]
    - 69
    - 70        # Check if the "raw_ms" group in the scan is empty
    - 71        if self.h5pydata[scan_label]["raw_ms"].shape is not None:
    +            
     15class ReadCoreMSHDF_MassSpectrum(ReadCoremsMasslist):
    + 16    """Class for reading mass spectrum data from a CoreMS HDF5 file.
    + 17
    + 18    Attributes
    + 19    ----------
    + 20    h5pydata : h5py.File
    + 21        The HDF5 file object.
    + 22    scans : list
    + 23        List of scan labels in the HDF5 file.
    + 24
    + 25    Parameters
    + 26    ----------
    + 27    file_location : str or S3Path
    + 28        The path to the CoreMS HDF5 file.
    + 29
    + 30    Methods
    + 31    -------
    + 32    * load_raw_data(mass_spectrum, scan_index=0) Load raw data into the mass spectrum object.
    + 33    * get_mass_spectrum(scan_number=0, time_index=-1, auto_process=True, load_settings=True, load_raw=True).Get a mass spectrum object.
    + 34    * load_settings(mass_spectrum, scan_index=0, time_index=-1). Load settings into the mass spectrum object.
    + 35    * get_dataframe(scan_index=0, time_index=-1). Get a pandas DataFrame representing the mass spectrum.
    + 36    * get_time_index_to_pull(scan_label, time_index). Get the time index to pull from the HDF5 file.
    + 37    * get_high_level_attr_data(attr_str). Get high-level attribute data from the HDF5 file.
    + 38    * get_scan_group_attr_data(scan_index, time_index, attr_group, attr_srt=None). Get scan group attribute data from the HDF5 file.
    + 39    * get_raw_data_attr_data(scan_index, attr_group, attr_str). Get raw data attribute data from the HDF5 file.
    + 40    * get_output_parameters(polarity, scan_index=0). Get the output parameters for the mass spectrum.
    + 41    """
    + 42
    + 43    def __init__(self, file_location):
    + 44        super().__init__(file_location)
    + 45
    + 46        if isinstance(self.file_location, S3Path):
    + 47            data = BytesIO(self.file_location.open("rb").read())
    + 48        else:
    + 49            data = self.file_location
    + 50
    + 51        self.h5pydata = h5py.File(data, "r")
    + 52
    + 53        self.scans = list(self.h5pydata.keys())
    + 54
    + 55    def load_raw_data(self, mass_spectrum, scan_index=0):
    + 56        """
    + 57        Load raw data into the mass spectrum object.
    + 58
    + 59        Parameters
    + 60        ----------
    + 61        mass_spectrum : MassSpecCentroid
    + 62            The mass spectrum object to load the raw data into.
    + 63        scan_index : int, optional
    + 64            The index of the scan to load the raw data from. Default is 0.
    + 65        """
    + 66
    + 67        scan_label = self.scans[scan_index]
    + 68
    + 69        # Check if the "raw_ms" group in the scan is empty
    + 70        if self.h5pydata[scan_label]["raw_ms"].shape is not None:
    + 71            mz_profile = self.h5pydata[scan_label]["raw_ms"][0]
      72
    - 73            mz_profile = self.h5pydata[scan_label]["raw_ms"][0]
    + 73            abundance_profile = self.h5pydata[scan_label]["raw_ms"][1]
      74
    - 75            abundance_profile = self.h5pydata[scan_label]["raw_ms"][1]
    + 75            mass_spectrum.mz_exp_profile = mz_profile
      76
    - 77            mass_spectrum.mz_exp_profile = mz_profile
    + 77            mass_spectrum.abundance_profile = abundance_profile
      78
    - 79            mass_spectrum.abundance_profile = abundance_profile
    - 80
    - 81    def get_mass_spectrum(
    - 82        self,
    - 83        scan_number=0,
    - 84        time_index=-1,
    - 85        auto_process=True,
    - 86        load_settings=True,
    - 87        load_raw=True,
    - 88        load_molecular_formula=True,
    - 89    ):
    - 90        """
    - 91        Instantiate a mass spectrum object from the CoreMS HDF5 file. 
    - 92        Note that this always returns a centroid mass spectrum object; functionality for profile and
    - 93        frequency mass spectra is not yet implemented.
    - 94
    - 95        Parameters
    - 96        ----------
    - 97        scan_number : int, optional
    - 98            The index of the scan to retrieve the mass spectrum from. Default is 0.
    - 99        time_index : int, optional
    -100            The index of the time point to retrieve the mass spectrum from. Default is -1.
    -101        auto_process : bool, optional
    -102            Whether to automatically process the mass spectrum. Default is True.
    -103        load_settings : bool, optional
    -104            Whether to load the settings into the mass spectrum object. Default is True.
    -105        load_raw : bool, optional
    -106            Whether to load the raw data into the mass spectrum object. Default is True.
    -107        load_molecular_formula : bool, optional
    -108            Whether to load the molecular formula into the mass spectrum object.
    -109            Default is True.
    -110
    -111        Returns
    -112        -------
    -113        MassSpecCentroid
    -114            The mass spectrum object.
    -115        
    -116        Raises
    -117        ------
    -118        ValueError
    -119            If the CoreMS file is not valid.
    -120            If the mass spectrum has not been processed and load_molecular_formula is True.
    -121        """
    -122        if "mass_spectra" in self.scans[0]:
    -123            scan_index = self.scans.index("mass_spectra/" + str(scan_number))
    -124        else:
    -125            scan_index = self.scans.index(str(scan_number))
    -126        dataframe = self.get_dataframe(scan_index, time_index=time_index)
    -127        if dataframe["Molecular Formula"].any() and not dataframe["C"].any():
    -128            cols = dataframe.columns.tolist()
    -129            cols = cols[cols.index("Molecular Formula") + 1 :]
    -130            for index, row in dataframe.iterrows():
    -131                if row["Molecular Formula"] is not None:
    -132                    og_formula = row["Molecular Formula"]
    -133                    for col in cols:
    -134                        if "col" in og_formula:
    -135                            # get the digit after the element ("col") in the molecular formula and set it to the dataframe
    -136                            row[col] = int(og_formula.split(col)[1].split(" ")[0])
    -137
    -138        if not set(
    -139            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
    -140        ).issubset(dataframe.columns):
    -141            raise ValueError(
    -142                "%s it is not a valid CoreMS file" % str(self.file_location)
    -143            )
    + 79    def get_mass_spectrum(
    + 80        self,
    + 81        scan_number=0,
    + 82        time_index=-1,
    + 83        auto_process=True,
    + 84        load_settings=True,
    + 85        load_raw=True,
    + 86        load_molecular_formula=True,
    + 87    ):
    + 88        """
    + 89        Instantiate a mass spectrum object from the CoreMS HDF5 file.
    + 90        Note that this always returns a centroid mass spectrum object; functionality for profile and
    + 91        frequency mass spectra is not yet implemented.
    + 92
    + 93        Parameters
    + 94        ----------
    + 95        scan_number : int, optional
    + 96            The index of the scan to retrieve the mass spectrum from. Default is 0.
    + 97        time_index : int, optional
    + 98            The index of the time point to retrieve the mass spectrum from. Default is -1.
    + 99        auto_process : bool, optional
    +100            Whether to automatically process the mass spectrum. Default is True.
    +101        load_settings : bool, optional
    +102            Whether to load the settings into the mass spectrum object. Default is True.
    +103        load_raw : bool, optional
    +104            Whether to load the raw data into the mass spectrum object. Default is True.
    +105        load_molecular_formula : bool, optional
    +106            Whether to load the molecular formula into the mass spectrum object.
    +107            Default is True.
    +108
    +109        Returns
    +110        -------
    +111        MassSpecCentroid
    +112            The mass spectrum object.
    +113
    +114        Raises
    +115        ------
    +116        ValueError
    +117            If the CoreMS file is not valid.
    +118            If the mass spectrum has not been processed and load_molecular_formula is True.
    +119        """
    +120        if "mass_spectra" in self.scans[0]:
    +121            scan_index = self.scans.index("mass_spectra/" + str(scan_number))
    +122        else:
    +123            scan_index = self.scans.index(str(scan_number))
    +124        dataframe = self.get_dataframe(scan_index, time_index=time_index)
    +125        if dataframe["Molecular Formula"].any() and not dataframe["C"].any():
    +126            cols = dataframe.columns.tolist()
    +127            cols = cols[cols.index("Molecular Formula") + 1 :]
    +128            for index, row in dataframe.iterrows():
    +129                if row["Molecular Formula"] is not None:
    +130                    og_formula = row["Molecular Formula"]
    +131                    for col in cols:
    +132                        if "col" in og_formula:
    +133                            # get the digit after the element ("col") in the molecular formula and set it to the dataframe
    +134                            row[col] = int(og_formula.split(col)[1].split(" ")[0])
    +135
    +136        if not set(
    +137            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
    +138        ).issubset(dataframe.columns):
    +139            raise ValueError(
    +140                "%s it is not a valid CoreMS file" % str(self.file_location)
    +141            )
    +142
    +143        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
     144
    -145        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -146
    -147        # Cast m/z, and 'Peak Height' to float
    -148        dataframe["m/z"] = dataframe["m/z"].astype(float)
    -149        dataframe["Peak Height"] = dataframe["Peak Height"].astype(float)
    +145        # Cast m/z, and 'Peak Height' to float
    +146        dataframe["m/z"] = dataframe["m/z"].astype(float)
    +147        dataframe["Peak Height"] = dataframe["Peak Height"].astype(float)
    +148
    +149        polarity = dataframe["Ion Charge"].values[0]
     150
    -151        polarity = dataframe["Ion Charge"].values[0]
    +151        output_parameters = self.get_output_parameters(polarity, scan_index=scan_index)
     152
    -153        output_parameters = self.get_output_parameters(polarity, scan_index=scan_index)
    -154
    -155        mass_spec_obj = MassSpecCentroid(
    -156            dataframe.to_dict(orient="list"), output_parameters, auto_process = False
    -157        )
    -158
    -159        if auto_process:
    -160            # Set the settings on the mass spectrum object to relative abuncance of 0 so all peaks get added
    -161            mass_spec_obj.settings.noise_threshold_method = "absolute_abundance"
    -162            mass_spec_obj.settings.noise_threshold_absolute_abundance = 0
    -163            mass_spec_obj.process_mass_spec()
    -164
    -165        if load_settings:
    -166            # Load settings into the mass spectrum object
    -167            self.load_settings(
    -168                mass_spec_obj, 
    -169                scan_index=scan_index, 
    -170                time_index=time_index
    -171            )
    -172
    -173        if load_raw:
    -174            self.load_raw_data(
    -175                mass_spec_obj, 
    -176                scan_index=scan_index
    -177                )
    -178
    -179        if load_molecular_formula:
    -180            if not auto_process:
    -181                raise ValueError(
    -182                    "Can only add molecular formula if the mass spectrum has been processed"
    -183                )
    -184            else:
    -185                self.add_molecular_formula(mass_spec_obj, dataframe)
    -186
    -187        return mass_spec_obj
    -188
    -189    def load_settings(self, mass_spectrum, scan_index=0, time_index=-1):
    -190        """
    -191        Load settings into the mass spectrum object.
    -192
    -193        Parameters
    -194        ----------
    -195        mass_spectrum : MassSpecCentroid
    -196            The mass spectrum object to load the settings into.
    -197        scan_index : int, optional
    -198            The index of the scan to load the settings from. Default is 0.
    -199        time_index : int, optional
    -200            The index of the time point to load the settings from. Default is -1.
    -201        """
    -202
    -203        loaded_settings = {}
    -204        loaded_settings["MoleculaSearch"] = self.get_scan_group_attr_data(
    -205            scan_index, time_index, "MoleculaSearchSetting"
    -206        )
    -207        loaded_settings["MassSpecPeak"] = self.get_scan_group_attr_data(
    -208            scan_index, time_index, "MassSpecPeakSetting"
    -209        )
    -210        loaded_settings["MassSpectrum"] = self.get_scan_group_attr_data(
    -211            scan_index, time_index, "MassSpectrumSetting"
    -212        )
    -213        loaded_settings["Transient"] = self.get_scan_group_attr_data(
    -214            scan_index, time_index, "TransientSetting"
    -215        )
    -216
    -217        _set_dict_data_ms(loaded_settings, mass_spectrum)
    -218
    -219    def get_dataframe(self, scan_index=0, time_index=-1):
    -220        """
    -221        Get a pandas DataFrame representing the mass spectrum.
    +153        mass_spec_obj = MassSpecCentroid(
    +154            dataframe.to_dict(orient="list"), output_parameters, auto_process=False
    +155        )
    +156
    +157        if auto_process:
    +158            # Set the settings on the mass spectrum object to relative abuncance of 0 so all peaks get added
    +159            mass_spec_obj.settings.noise_threshold_method = "absolute_abundance"
    +160            mass_spec_obj.settings.noise_threshold_absolute_abundance = 0
    +161            mass_spec_obj.process_mass_spec()
    +162
    +163        if load_settings:
    +164            # Load settings into the mass spectrum object
    +165            self.load_settings(
    +166                mass_spec_obj, scan_index=scan_index, time_index=time_index
    +167            )
    +168
    +169        if load_raw:
    +170            self.load_raw_data(mass_spec_obj, scan_index=scan_index)
    +171
    +172        if load_molecular_formula:
    +173            if not auto_process:
    +174                raise ValueError(
    +175                    "Can only add molecular formula if the mass spectrum has been processed"
    +176                )
    +177            else:
    +178                self.add_molecular_formula(mass_spec_obj, dataframe)
    +179
    +180        return mass_spec_obj
    +181
    +182    def load_settings(self, mass_spectrum, scan_index=0, time_index=-1):
    +183        """
    +184        Load settings into the mass spectrum object.
    +185
    +186        Parameters
    +187        ----------
    +188        mass_spectrum : MassSpecCentroid
    +189            The mass spectrum object to load the settings into.
    +190        scan_index : int, optional
    +191            The index of the scan to load the settings from. Default is 0.
    +192        time_index : int, optional
    +193            The index of the time point to load the settings from. Default is -1.
    +194        """
    +195
    +196        loaded_settings = {}
    +197        loaded_settings["MoleculaSearch"] = self.get_scan_group_attr_data(
    +198            scan_index, time_index, "MoleculaSearchSetting"
    +199        )
    +200        loaded_settings["MassSpecPeak"] = self.get_scan_group_attr_data(
    +201            scan_index, time_index, "MassSpecPeakSetting"
    +202        )
    +203        loaded_settings["MassSpectrum"] = self.get_scan_group_attr_data(
    +204            scan_index, time_index, "MassSpectrumSetting"
    +205        )
    +206        loaded_settings["Transient"] = self.get_scan_group_attr_data(
    +207            scan_index, time_index, "TransientSetting"
    +208        )
    +209
    +210        _set_dict_data_ms(loaded_settings, mass_spectrum)
    +211
    +212    def get_dataframe(self, scan_index=0, time_index=-1):
    +213        """
    +214        Get a pandas DataFrame representing the mass spectrum.
    +215
    +216        Parameters
    +217        ----------
    +218        scan_index : int, optional
    +219            The index of the scan to retrieve the DataFrame from. Default is 0.
    +220        time_index : int, optional
    +221            The index of the time point to retrieve the DataFrame from. Default is -1.
     222
    -223        Parameters
    -224        ----------
    -225        scan_index : int, optional
    -226            The index of the scan to retrieve the DataFrame from. Default is 0.
    -227        time_index : int, optional
    -228            The index of the time point to retrieve the DataFrame from. Default is -1.
    -229
    -230        Returns
    -231        -------
    -232        DataFrame
    -233            The pandas DataFrame representing the mass spectrum.
    -234        """
    -235
    -236        columnsLabels = self.get_scan_group_attr_data(
    -237            scan_index, time_index, "ColumnsLabels"
    -238        )
    -239
    -240        scan_label = self.scans[scan_index]
    -241
    -242        index_to_pull = self.get_time_index_to_pull(scan_label, time_index)
    -243
    -244        corems_table_data = self.h5pydata[scan_label][index_to_pull]
    -245
    -246        list_dict = []
    -247        for row in corems_table_data:
    -248            data_dict = {}
    -249            for data_index, data in enumerate(row):
    -250                label = columnsLabels[data_index]
    -251                # if data starts with a b' it is a byte string, so decode it
    -252                if isinstance(data, bytes):
    -253                    data = data.decode("utf-8")
    -254                if data == "nan":
    -255                    data = None
    -256                data_dict[label] = data
    -257
    -258            list_dict.append(data_dict)
    -259
    -260        # Reorder the columns from low to high "Index" to match the order of the dataframe
    -261        df = DataFrame(list_dict)
    -262        # set the "Index" column to int so it sorts correctly
    -263        df["Index"] = df["Index"].astype(int)
    -264        df = df.sort_values(by="Index")
    -265        # Reset index to match the "Index" column
    -266        df = df.set_index("Index", drop=False)
    -267
    -268        return df
    -269
    -270    def get_time_index_to_pull(self, scan_label, time_index):
    -271        """
    -272        Get the time index to pull from the HDF5 file.
    +223        Returns
    +224        -------
    +225        DataFrame
    +226            The pandas DataFrame representing the mass spectrum.
    +227        """
    +228
    +229        columnsLabels = self.get_scan_group_attr_data(
    +230            scan_index, time_index, "ColumnsLabels"
    +231        )
    +232
    +233        scan_label = self.scans[scan_index]
    +234
    +235        index_to_pull = self.get_time_index_to_pull(scan_label, time_index)
    +236
    +237        corems_table_data = self.h5pydata[scan_label][index_to_pull]
    +238
    +239        list_dict = []
    +240        for row in corems_table_data:
    +241            data_dict = {}
    +242            for data_index, data in enumerate(row):
    +243                label = columnsLabels[data_index]
    +244                # if data starts with a b' it is a byte string, so decode it
    +245                if isinstance(data, bytes):
    +246                    data = data.decode("utf-8")
    +247                if data == "nan":
    +248                    data = None
    +249                data_dict[label] = data
    +250
    +251            list_dict.append(data_dict)
    +252
    +253        # Reorder the columns from low to high "Index" to match the order of the dataframe
    +254        df = DataFrame(list_dict)
    +255        # set the "Index" column to int so it sorts correctly
    +256        df["Index"] = df["Index"].astype(int)
    +257        df = df.sort_values(by="Index")
    +258        # Reset index to match the "Index" column
    +259        df = df.set_index("Index", drop=False)
    +260
    +261        return df
    +262
    +263    def get_time_index_to_pull(self, scan_label, time_index):
    +264        """
    +265        Get the time index to pull from the HDF5 file.
    +266
    +267        Parameters
    +268        ----------
    +269        scan_label : str
    +270            The label of the scan.
    +271        time_index : int
    +272            The index of the time point.
     273
    -274        Parameters
    -275        ----------
    -276        scan_label : str
    -277            The label of the scan.
    -278        time_index : int
    -279            The index of the time point.
    -280
    -281        Returns
    -282        -------
    -283        str
    -284            The time index to pull.
    -285        """
    +274        Returns
    +275        -------
    +276        str
    +277            The time index to pull.
    +278        """
    +279
    +280        time_data = sorted(
    +281            [(i, int(i)) for i in self.h5pydata[scan_label].keys() if i != "raw_ms"],
    +282            key=lambda m: m[1],
    +283        )
    +284
    +285        index_to_pull = time_data[time_index][0]
     286
    -287        time_data = sorted(
    -288            [(i, int(i)) for i in self.h5pydata[scan_label].keys() if i != "raw_ms"],
    -289            key=lambda m: m[1],
    -290        )
    -291
    -292        index_to_pull = time_data[time_index][0]
    -293
    -294        return index_to_pull
    -295
    -296    def get_high_level_attr_data(self, attr_str):
    -297        """
    -298        Get high-level attribute data from the HDF5 file.
    -299
    -300        Parameters
    -301        ----------
    -302        attr_str : str
    -303            The attribute string.
    -304
    -305        Returns
    -306        -------
    -307        dict
    -308            The attribute data.
    -309
    -310        Raises
    -311        ------
    -312        KeyError
    -313            If the attribute string is not found in the HDF5 file.
    -314        """
    -315
    -316        return self.h5pydata.attrs[attr_str]
    -317
    -318    def get_scan_group_attr_data(
    -319        self, scan_index, time_index, attr_group, attr_srt=None
    -320    ):
    -321        """
    -322        Get scan group attribute data from the HDF5 file.
    -323
    -324        Parameters
    -325        ----------
    -326        scan_index : int
    -327            The index of the scan.
    -328        time_index : int
    -329            The index of the time point.
    -330        attr_group : str
    -331            The attribute group.
    -332        attr_srt : str, optional
    -333            The attribute string. Default is None.
    -334
    -335        Returns
    -336        -------
    -337        dict
    -338            The attribute data.
    -339
    -340        Notes
    -341        -----
    -342        This method retrieves attribute data from the HDF5 file for a specific scan and time point.
    -343        The attribute data is stored in the specified attribute group.
    -344        If an attribute string is provided, only the corresponding attribute value is returned.
    -345        If no attribute string is provided, all attribute data in the group is returned as a dictionary.
    -346        """
    -347        # Get index of self.scans where scan_index_str is found
    -348        scan_label = self.scans[scan_index]
    +287        return index_to_pull
    +288
    +289    def get_high_level_attr_data(self, attr_str):
    +290        """
    +291        Get high-level attribute data from the HDF5 file.
    +292
    +293        Parameters
    +294        ----------
    +295        attr_str : str
    +296            The attribute string.
    +297
    +298        Returns
    +299        -------
    +300        dict
    +301            The attribute data.
    +302
    +303        Raises
    +304        ------
    +305        KeyError
    +306            If the attribute string is not found in the HDF5 file.
    +307        """
    +308
    +309        return self.h5pydata.attrs[attr_str]
    +310
    +311    def get_scan_group_attr_data(
    +312        self, scan_index, time_index, attr_group, attr_srt=None
    +313    ):
    +314        """
    +315        Get scan group attribute data from the HDF5 file.
    +316
    +317        Parameters
    +318        ----------
    +319        scan_index : int
    +320            The index of the scan.
    +321        time_index : int
    +322            The index of the time point.
    +323        attr_group : str
    +324            The attribute group.
    +325        attr_srt : str, optional
    +326            The attribute string. Default is None.
    +327
    +328        Returns
    +329        -------
    +330        dict
    +331            The attribute data.
    +332
    +333        Notes
    +334        -----
    +335        This method retrieves attribute data from the HDF5 file for a specific scan and time point.
    +336        The attribute data is stored in the specified attribute group.
    +337        If an attribute string is provided, only the corresponding attribute value is returned.
    +338        If no attribute string is provided, all attribute data in the group is returned as a dictionary.
    +339        """
    +340        # Get index of self.scans where scan_index_str is found
    +341        scan_label = self.scans[scan_index]
    +342
    +343        index_to_pull = self.get_time_index_to_pull(scan_label, time_index)
    +344
    +345        if attr_srt:
    +346            return json.loads(
    +347                self.h5pydata[scan_label][index_to_pull].attrs[attr_group]
    +348            )[attr_srt]
     349
    -350        index_to_pull = self.get_time_index_to_pull(scan_label, time_index)
    -351
    -352        if attr_srt:
    -353            return json.loads(
    -354                self.h5pydata[scan_label][index_to_pull].attrs[attr_group]
    -355            )[attr_srt]
    +350        else:
    +351            data = self.h5pydata[scan_label][index_to_pull].attrs.get(attr_group)
    +352            if data:
    +353                return json.loads(data)
    +354            else:
    +355                return {}
     356
    -357        else:
    -358            data = self.h5pydata[scan_label][index_to_pull].attrs.get(attr_group)
    -359            if data:
    -360                return json.loads(data)
    -361            else:
    -362                return {}
    -363
    -364    def get_raw_data_attr_data(self, scan_index, attr_group, attr_str):
    -365        """
    -366        Get raw data attribute data from the HDF5 file.
    -367
    -368        Parameters
    -369        ----------
    -370        scan_index : int
    -371            The index of the scan.
    -372        attr_group : str
    -373            The attribute group.
    -374        attr_str : str
    -375            The attribute string.
    -376
    -377        Returns
    -378        -------
    -379        dict
    -380            The attribute data.
    -381
    -382        Raises
    -383        ------
    -384        KeyError
    -385            If the attribute string is not found in the attribute group.
    -386
    -387        Notes
    -388        -----
    -389        This method retrieves the attribute data associated with a specific scan, attribute group, and attribute string
    -390        from the HDF5 file. It returns the attribute data as a dictionary.
    -391
    -392        Example usage:
    -393        >>> data = get_raw_data_attr_data(0, "group1", "attribute1")
    -394        >>> print(data)
    -395        {'key1': 'value1', 'key2': 'value2'}
    -396        """
    -397        scan_label = self.scans[scan_index]
    -398        try:
    -399            json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[attr_str]
    -400        except KeyError:
    -401            attr_str = attr_str.replace("baseline", "baselise")
    -402        return json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[attr_str]
    -403
    -404    def get_output_parameters(self, polarity, scan_index=0):
    -405        """
    -406        Get the output parameters for the mass spectrum.
    -407
    -408        Parameters
    -409        ----------
    -410        polarity : str
    -411            The polarity of the mass spectrum.
    -412        scan_index : int, optional
    -413            The index of the scan. Default is 0.
    -414
    -415        Returns
    -416        -------
    -417        dict
    -418            The output parameters.
    -419        """
    -420
    -421        d_params = default_parameters(self.file_location)
    -422        d_params["filename_path"] = self.file_location
    -423        d_params['polarity'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'polarity')
    -424        d_params['rt'] =     self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'rt')
    -425        
    -426        d_params['tic'] =  self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'tic')
    -427        
    -428        d_params['mobility_scan'] =    self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'mobility_scan')
    -429        d_params['mobility_rt'] =     self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'mobility_rt')
    -430        d_params['Aterm'] =  self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Aterm')
    -431        d_params['Bterm'] =  self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Bterm')
    -432        d_params['Cterm'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Cterm')
    -433        d_params['baseline_noise'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'baseline_noise')
    -434        d_params['baseline_noise_std'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'baseline_noise_std')
    -435        
    -436        d_params['analyzer'] = self.get_high_level_attr_data('analyzer')
    -437        d_params['instrument_label'] = self.get_high_level_attr_data('instrument_label')
    -438        d_params['sample_name'] = self.get_high_level_attr_data('sample_name')
    -439
    -440        return d_params
    +357    def get_raw_data_attr_data(self, scan_index, attr_group, attr_str):
    +358        """
    +359        Get raw data attribute data from the HDF5 file.
    +360
    +361        Parameters
    +362        ----------
    +363        scan_index : int
    +364            The index of the scan.
    +365        attr_group : str
    +366            The attribute group.
    +367        attr_str : str
    +368            The attribute string.
    +369
    +370        Returns
    +371        -------
    +372        dict
    +373            The attribute data.
    +374
    +375        Raises
    +376        ------
    +377        KeyError
    +378            If the attribute string is not found in the attribute group.
    +379
    +380        Notes
    +381        -----
    +382        This method retrieves the attribute data associated with a specific scan, attribute group, and attribute string
    +383        from the HDF5 file. It returns the attribute data as a dictionary.
    +384
    +385        Example usage:
    +386        >>> data = get_raw_data_attr_data(0, "group1", "attribute1")
    +387        >>> print(data)
    +388        {'key1': 'value1', 'key2': 'value2'}
    +389        """
    +390        scan_label = self.scans[scan_index]
    +391        try:
    +392            json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[attr_str]
    +393        except KeyError:
    +394            attr_str = attr_str.replace("baseline", "baselise")
    +395        return json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[
    +396            attr_str
    +397        ]
    +398
    +399    def get_output_parameters(self, polarity, scan_index=0):
    +400        """
    +401        Get the output parameters for the mass spectrum.
    +402
    +403        Parameters
    +404        ----------
    +405        polarity : str
    +406            The polarity of the mass spectrum.
    +407        scan_index : int, optional
    +408            The index of the scan. Default is 0.
    +409
    +410        Returns
    +411        -------
    +412        dict
    +413            The output parameters.
    +414        """
    +415
    +416        d_params = default_parameters(self.file_location)
    +417        d_params["filename_path"] = self.file_location
    +418        d_params["polarity"] = self.get_raw_data_attr_data(
    +419            scan_index, "MassSpecAttrs", "polarity"
    +420        )
    +421        d_params["rt"] = self.get_raw_data_attr_data(scan_index, "MassSpecAttrs", "rt")
    +422
    +423        d_params["tic"] = self.get_raw_data_attr_data(
    +424            scan_index, "MassSpecAttrs", "tic"
    +425        )
    +426
    +427        d_params["mobility_scan"] = self.get_raw_data_attr_data(
    +428            scan_index, "MassSpecAttrs", "mobility_scan"
    +429        )
    +430        d_params["mobility_rt"] = self.get_raw_data_attr_data(
    +431            scan_index, "MassSpecAttrs", "mobility_rt"
    +432        )
    +433        d_params["Aterm"] = self.get_raw_data_attr_data(
    +434            scan_index, "MassSpecAttrs", "Aterm"
    +435        )
    +436        d_params["Bterm"] = self.get_raw_data_attr_data(
    +437            scan_index, "MassSpecAttrs", "Bterm"
    +438        )
    +439        d_params["Cterm"] = self.get_raw_data_attr_data(
    +440            scan_index, "MassSpecAttrs", "Cterm"
    +441        )
    +442        d_params["baseline_noise"] = self.get_raw_data_attr_data(
    +443            scan_index, "MassSpecAttrs", "baseline_noise"
    +444        )
    +445        d_params["baseline_noise_std"] = self.get_raw_data_attr_data(
    +446            scan_index, "MassSpecAttrs", "baseline_noise_std"
    +447        )
    +448
    +449        d_params["analyzer"] = self.get_high_level_attr_data("analyzer")
    +450        d_params["instrument_label"] = self.get_high_level_attr_data("instrument_label")
    +451        d_params["sample_name"] = self.get_high_level_attr_data("sample_name")
    +452
    +453        return d_params
     
    @@ -1019,17 +1046,17 @@
    Methods
    -
    44    def __init__(self, file_location):
    -45        super().__init__(file_location)
    -46
    -47        if isinstance(self.file_location, S3Path):
    -48            data = BytesIO(self.file_location.open("rb").read())
    -49        else:
    -50            data = self.file_location
    -51
    -52        self.h5pydata = h5py.File(data, "r")
    -53
    -54        self.scans = list(self.h5pydata.keys())
    +            
    43    def __init__(self, file_location):
    +44        super().__init__(file_location)
    +45
    +46        if isinstance(self.file_location, S3Path):
    +47            data = BytesIO(self.file_location.open("rb").read())
    +48        else:
    +49            data = self.file_location
    +50
    +51        self.h5pydata = h5py.File(data, "r")
    +52
    +53        self.scans = list(self.h5pydata.keys())
     
    @@ -1069,30 +1096,29 @@
    Methods
    -
    56    def load_raw_data(self, mass_spectrum, scan_index=0):
    -57        """
    -58        Load raw data into the mass spectrum object.
    -59
    -60        Parameters
    -61        ----------
    -62        mass_spectrum : MassSpecCentroid
    -63            The mass spectrum object to load the raw data into.
    -64        scan_index : int, optional
    -65            The index of the scan to load the raw data from. Default is 0.
    -66        """
    -67
    -68        scan_label = self.scans[scan_index]
    -69
    -70        # Check if the "raw_ms" group in the scan is empty
    -71        if self.h5pydata[scan_label]["raw_ms"].shape is not None:
    +            
    55    def load_raw_data(self, mass_spectrum, scan_index=0):
    +56        """
    +57        Load raw data into the mass spectrum object.
    +58
    +59        Parameters
    +60        ----------
    +61        mass_spectrum : MassSpecCentroid
    +62            The mass spectrum object to load the raw data into.
    +63        scan_index : int, optional
    +64            The index of the scan to load the raw data from. Default is 0.
    +65        """
    +66
    +67        scan_label = self.scans[scan_index]
    +68
    +69        # Check if the "raw_ms" group in the scan is empty
    +70        if self.h5pydata[scan_label]["raw_ms"].shape is not None:
    +71            mz_profile = self.h5pydata[scan_label]["raw_ms"][0]
     72
    -73            mz_profile = self.h5pydata[scan_label]["raw_ms"][0]
    +73            abundance_profile = self.h5pydata[scan_label]["raw_ms"][1]
     74
    -75            abundance_profile = self.h5pydata[scan_label]["raw_ms"][1]
    +75            mass_spectrum.mz_exp_profile = mz_profile
     76
    -77            mass_spectrum.mz_exp_profile = mz_profile
    -78
    -79            mass_spectrum.abundance_profile = abundance_profile
    +77            mass_spectrum.abundance_profile = abundance_profile
     
    @@ -1121,117 +1147,112 @@
    Parameters
    -
     81    def get_mass_spectrum(
    - 82        self,
    - 83        scan_number=0,
    - 84        time_index=-1,
    - 85        auto_process=True,
    - 86        load_settings=True,
    - 87        load_raw=True,
    - 88        load_molecular_formula=True,
    - 89    ):
    - 90        """
    - 91        Instantiate a mass spectrum object from the CoreMS HDF5 file. 
    - 92        Note that this always returns a centroid mass spectrum object; functionality for profile and
    - 93        frequency mass spectra is not yet implemented.
    - 94
    - 95        Parameters
    - 96        ----------
    - 97        scan_number : int, optional
    - 98            The index of the scan to retrieve the mass spectrum from. Default is 0.
    - 99        time_index : int, optional
    -100            The index of the time point to retrieve the mass spectrum from. Default is -1.
    -101        auto_process : bool, optional
    -102            Whether to automatically process the mass spectrum. Default is True.
    -103        load_settings : bool, optional
    -104            Whether to load the settings into the mass spectrum object. Default is True.
    -105        load_raw : bool, optional
    -106            Whether to load the raw data into the mass spectrum object. Default is True.
    -107        load_molecular_formula : bool, optional
    -108            Whether to load the molecular formula into the mass spectrum object.
    -109            Default is True.
    -110
    -111        Returns
    -112        -------
    -113        MassSpecCentroid
    -114            The mass spectrum object.
    -115        
    -116        Raises
    -117        ------
    -118        ValueError
    -119            If the CoreMS file is not valid.
    -120            If the mass spectrum has not been processed and load_molecular_formula is True.
    -121        """
    -122        if "mass_spectra" in self.scans[0]:
    -123            scan_index = self.scans.index("mass_spectra/" + str(scan_number))
    -124        else:
    -125            scan_index = self.scans.index(str(scan_number))
    -126        dataframe = self.get_dataframe(scan_index, time_index=time_index)
    -127        if dataframe["Molecular Formula"].any() and not dataframe["C"].any():
    -128            cols = dataframe.columns.tolist()
    -129            cols = cols[cols.index("Molecular Formula") + 1 :]
    -130            for index, row in dataframe.iterrows():
    -131                if row["Molecular Formula"] is not None:
    -132                    og_formula = row["Molecular Formula"]
    -133                    for col in cols:
    -134                        if "col" in og_formula:
    -135                            # get the digit after the element ("col") in the molecular formula and set it to the dataframe
    -136                            row[col] = int(og_formula.split(col)[1].split(" ")[0])
    -137
    -138        if not set(
    -139            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
    -140        ).issubset(dataframe.columns):
    -141            raise ValueError(
    -142                "%s it is not a valid CoreMS file" % str(self.file_location)
    -143            )
    +            
     79    def get_mass_spectrum(
    + 80        self,
    + 81        scan_number=0,
    + 82        time_index=-1,
    + 83        auto_process=True,
    + 84        load_settings=True,
    + 85        load_raw=True,
    + 86        load_molecular_formula=True,
    + 87    ):
    + 88        """
    + 89        Instantiate a mass spectrum object from the CoreMS HDF5 file.
    + 90        Note that this always returns a centroid mass spectrum object; functionality for profile and
    + 91        frequency mass spectra is not yet implemented.
    + 92
    + 93        Parameters
    + 94        ----------
    + 95        scan_number : int, optional
    + 96            The index of the scan to retrieve the mass spectrum from. Default is 0.
    + 97        time_index : int, optional
    + 98            The index of the time point to retrieve the mass spectrum from. Default is -1.
    + 99        auto_process : bool, optional
    +100            Whether to automatically process the mass spectrum. Default is True.
    +101        load_settings : bool, optional
    +102            Whether to load the settings into the mass spectrum object. Default is True.
    +103        load_raw : bool, optional
    +104            Whether to load the raw data into the mass spectrum object. Default is True.
    +105        load_molecular_formula : bool, optional
    +106            Whether to load the molecular formula into the mass spectrum object.
    +107            Default is True.
    +108
    +109        Returns
    +110        -------
    +111        MassSpecCentroid
    +112            The mass spectrum object.
    +113
    +114        Raises
    +115        ------
    +116        ValueError
    +117            If the CoreMS file is not valid.
    +118            If the mass spectrum has not been processed and load_molecular_formula is True.
    +119        """
    +120        if "mass_spectra" in self.scans[0]:
    +121            scan_index = self.scans.index("mass_spectra/" + str(scan_number))
    +122        else:
    +123            scan_index = self.scans.index(str(scan_number))
    +124        dataframe = self.get_dataframe(scan_index, time_index=time_index)
    +125        if dataframe["Molecular Formula"].any() and not dataframe["C"].any():
    +126            cols = dataframe.columns.tolist()
    +127            cols = cols[cols.index("Molecular Formula") + 1 :]
    +128            for index, row in dataframe.iterrows():
    +129                if row["Molecular Formula"] is not None:
    +130                    og_formula = row["Molecular Formula"]
    +131                    for col in cols:
    +132                        if "col" in og_formula:
    +133                            # get the digit after the element ("col") in the molecular formula and set it to the dataframe
    +134                            row[col] = int(og_formula.split(col)[1].split(" ")[0])
    +135
    +136        if not set(
    +137            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
    +138        ).issubset(dataframe.columns):
    +139            raise ValueError(
    +140                "%s it is not a valid CoreMS file" % str(self.file_location)
    +141            )
    +142
    +143        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
     144
    -145        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -146
    -147        # Cast m/z, and 'Peak Height' to float
    -148        dataframe["m/z"] = dataframe["m/z"].astype(float)
    -149        dataframe["Peak Height"] = dataframe["Peak Height"].astype(float)
    +145        # Cast m/z, and 'Peak Height' to float
    +146        dataframe["m/z"] = dataframe["m/z"].astype(float)
    +147        dataframe["Peak Height"] = dataframe["Peak Height"].astype(float)
    +148
    +149        polarity = dataframe["Ion Charge"].values[0]
     150
    -151        polarity = dataframe["Ion Charge"].values[0]
    +151        output_parameters = self.get_output_parameters(polarity, scan_index=scan_index)
     152
    -153        output_parameters = self.get_output_parameters(polarity, scan_index=scan_index)
    -154
    -155        mass_spec_obj = MassSpecCentroid(
    -156            dataframe.to_dict(orient="list"), output_parameters, auto_process = False
    -157        )
    -158
    -159        if auto_process:
    -160            # Set the settings on the mass spectrum object to relative abuncance of 0 so all peaks get added
    -161            mass_spec_obj.settings.noise_threshold_method = "absolute_abundance"
    -162            mass_spec_obj.settings.noise_threshold_absolute_abundance = 0
    -163            mass_spec_obj.process_mass_spec()
    -164
    -165        if load_settings:
    -166            # Load settings into the mass spectrum object
    -167            self.load_settings(
    -168                mass_spec_obj, 
    -169                scan_index=scan_index, 
    -170                time_index=time_index
    -171            )
    -172
    -173        if load_raw:
    -174            self.load_raw_data(
    -175                mass_spec_obj, 
    -176                scan_index=scan_index
    -177                )
    -178
    -179        if load_molecular_formula:
    -180            if not auto_process:
    -181                raise ValueError(
    -182                    "Can only add molecular formula if the mass spectrum has been processed"
    -183                )
    -184            else:
    -185                self.add_molecular_formula(mass_spec_obj, dataframe)
    -186
    -187        return mass_spec_obj
    +153        mass_spec_obj = MassSpecCentroid(
    +154            dataframe.to_dict(orient="list"), output_parameters, auto_process=False
    +155        )
    +156
    +157        if auto_process:
    +158            # Set the settings on the mass spectrum object to relative abuncance of 0 so all peaks get added
    +159            mass_spec_obj.settings.noise_threshold_method = "absolute_abundance"
    +160            mass_spec_obj.settings.noise_threshold_absolute_abundance = 0
    +161            mass_spec_obj.process_mass_spec()
    +162
    +163        if load_settings:
    +164            # Load settings into the mass spectrum object
    +165            self.load_settings(
    +166                mass_spec_obj, scan_index=scan_index, time_index=time_index
    +167            )
    +168
    +169        if load_raw:
    +170            self.load_raw_data(mass_spec_obj, scan_index=scan_index)
    +171
    +172        if load_molecular_formula:
    +173            if not auto_process:
    +174                raise ValueError(
    +175                    "Can only add molecular formula if the mass spectrum has been processed"
    +176                )
    +177            else:
    +178                self.add_molecular_formula(mass_spec_obj, dataframe)
    +179
    +180        return mass_spec_obj
     
    -

    Instantiate a mass spectrum object from the CoreMS HDF5 file. +

    Instantiate a mass spectrum object from the CoreMS HDF5 file. Note that this always returns a centroid mass spectrum object; functionality for profile and frequency mass spectra is not yet implemented.

    @@ -1280,35 +1301,35 @@
    Raises
    -
    189    def load_settings(self, mass_spectrum, scan_index=0, time_index=-1):
    -190        """
    -191        Load settings into the mass spectrum object.
    -192
    -193        Parameters
    -194        ----------
    -195        mass_spectrum : MassSpecCentroid
    -196            The mass spectrum object to load the settings into.
    -197        scan_index : int, optional
    -198            The index of the scan to load the settings from. Default is 0.
    -199        time_index : int, optional
    -200            The index of the time point to load the settings from. Default is -1.
    -201        """
    -202
    -203        loaded_settings = {}
    -204        loaded_settings["MoleculaSearch"] = self.get_scan_group_attr_data(
    -205            scan_index, time_index, "MoleculaSearchSetting"
    -206        )
    -207        loaded_settings["MassSpecPeak"] = self.get_scan_group_attr_data(
    -208            scan_index, time_index, "MassSpecPeakSetting"
    -209        )
    -210        loaded_settings["MassSpectrum"] = self.get_scan_group_attr_data(
    -211            scan_index, time_index, "MassSpectrumSetting"
    -212        )
    -213        loaded_settings["Transient"] = self.get_scan_group_attr_data(
    -214            scan_index, time_index, "TransientSetting"
    -215        )
    -216
    -217        _set_dict_data_ms(loaded_settings, mass_spectrum)
    +            
    182    def load_settings(self, mass_spectrum, scan_index=0, time_index=-1):
    +183        """
    +184        Load settings into the mass spectrum object.
    +185
    +186        Parameters
    +187        ----------
    +188        mass_spectrum : MassSpecCentroid
    +189            The mass spectrum object to load the settings into.
    +190        scan_index : int, optional
    +191            The index of the scan to load the settings from. Default is 0.
    +192        time_index : int, optional
    +193            The index of the time point to load the settings from. Default is -1.
    +194        """
    +195
    +196        loaded_settings = {}
    +197        loaded_settings["MoleculaSearch"] = self.get_scan_group_attr_data(
    +198            scan_index, time_index, "MoleculaSearchSetting"
    +199        )
    +200        loaded_settings["MassSpecPeak"] = self.get_scan_group_attr_data(
    +201            scan_index, time_index, "MassSpecPeakSetting"
    +202        )
    +203        loaded_settings["MassSpectrum"] = self.get_scan_group_attr_data(
    +204            scan_index, time_index, "MassSpectrumSetting"
    +205        )
    +206        loaded_settings["Transient"] = self.get_scan_group_attr_data(
    +207            scan_index, time_index, "TransientSetting"
    +208        )
    +209
    +210        _set_dict_data_ms(loaded_settings, mass_spectrum)
     
    @@ -1339,56 +1360,56 @@
    Parameters
    -
    219    def get_dataframe(self, scan_index=0, time_index=-1):
    -220        """
    -221        Get a pandas DataFrame representing the mass spectrum.
    +            
    212    def get_dataframe(self, scan_index=0, time_index=-1):
    +213        """
    +214        Get a pandas DataFrame representing the mass spectrum.
    +215
    +216        Parameters
    +217        ----------
    +218        scan_index : int, optional
    +219            The index of the scan to retrieve the DataFrame from. Default is 0.
    +220        time_index : int, optional
    +221            The index of the time point to retrieve the DataFrame from. Default is -1.
     222
    -223        Parameters
    -224        ----------
    -225        scan_index : int, optional
    -226            The index of the scan to retrieve the DataFrame from. Default is 0.
    -227        time_index : int, optional
    -228            The index of the time point to retrieve the DataFrame from. Default is -1.
    -229
    -230        Returns
    -231        -------
    -232        DataFrame
    -233            The pandas DataFrame representing the mass spectrum.
    -234        """
    -235
    -236        columnsLabels = self.get_scan_group_attr_data(
    -237            scan_index, time_index, "ColumnsLabels"
    -238        )
    -239
    -240        scan_label = self.scans[scan_index]
    -241
    -242        index_to_pull = self.get_time_index_to_pull(scan_label, time_index)
    -243
    -244        corems_table_data = self.h5pydata[scan_label][index_to_pull]
    -245
    -246        list_dict = []
    -247        for row in corems_table_data:
    -248            data_dict = {}
    -249            for data_index, data in enumerate(row):
    -250                label = columnsLabels[data_index]
    -251                # if data starts with a b' it is a byte string, so decode it
    -252                if isinstance(data, bytes):
    -253                    data = data.decode("utf-8")
    -254                if data == "nan":
    -255                    data = None
    -256                data_dict[label] = data
    -257
    -258            list_dict.append(data_dict)
    -259
    -260        # Reorder the columns from low to high "Index" to match the order of the dataframe
    -261        df = DataFrame(list_dict)
    -262        # set the "Index" column to int so it sorts correctly
    -263        df["Index"] = df["Index"].astype(int)
    -264        df = df.sort_values(by="Index")
    -265        # Reset index to match the "Index" column
    -266        df = df.set_index("Index", drop=False)
    -267
    -268        return df
    +223        Returns
    +224        -------
    +225        DataFrame
    +226            The pandas DataFrame representing the mass spectrum.
    +227        """
    +228
    +229        columnsLabels = self.get_scan_group_attr_data(
    +230            scan_index, time_index, "ColumnsLabels"
    +231        )
    +232
    +233        scan_label = self.scans[scan_index]
    +234
    +235        index_to_pull = self.get_time_index_to_pull(scan_label, time_index)
    +236
    +237        corems_table_data = self.h5pydata[scan_label][index_to_pull]
    +238
    +239        list_dict = []
    +240        for row in corems_table_data:
    +241            data_dict = {}
    +242            for data_index, data in enumerate(row):
    +243                label = columnsLabels[data_index]
    +244                # if data starts with a b' it is a byte string, so decode it
    +245                if isinstance(data, bytes):
    +246                    data = data.decode("utf-8")
    +247                if data == "nan":
    +248                    data = None
    +249                data_dict[label] = data
    +250
    +251            list_dict.append(data_dict)
    +252
    +253        # Reorder the columns from low to high "Index" to match the order of the dataframe
    +254        df = DataFrame(list_dict)
    +255        # set the "Index" column to int so it sorts correctly
    +256        df["Index"] = df["Index"].astype(int)
    +257        df = df.sort_values(by="Index")
    +258        # Reset index to match the "Index" column
    +259        df = df.set_index("Index", drop=False)
    +260
    +261        return df
     
    @@ -1423,31 +1444,31 @@
    Returns
    -
    270    def get_time_index_to_pull(self, scan_label, time_index):
    -271        """
    -272        Get the time index to pull from the HDF5 file.
    +            
    263    def get_time_index_to_pull(self, scan_label, time_index):
    +264        """
    +265        Get the time index to pull from the HDF5 file.
    +266
    +267        Parameters
    +268        ----------
    +269        scan_label : str
    +270            The label of the scan.
    +271        time_index : int
    +272            The index of the time point.
     273
    -274        Parameters
    -275        ----------
    -276        scan_label : str
    -277            The label of the scan.
    -278        time_index : int
    -279            The index of the time point.
    -280
    -281        Returns
    -282        -------
    -283        str
    -284            The time index to pull.
    -285        """
    +274        Returns
    +275        -------
    +276        str
    +277            The time index to pull.
    +278        """
    +279
    +280        time_data = sorted(
    +281            [(i, int(i)) for i in self.h5pydata[scan_label].keys() if i != "raw_ms"],
    +282            key=lambda m: m[1],
    +283        )
    +284
    +285        index_to_pull = time_data[time_index][0]
     286
    -287        time_data = sorted(
    -288            [(i, int(i)) for i in self.h5pydata[scan_label].keys() if i != "raw_ms"],
    -289            key=lambda m: m[1],
    -290        )
    -291
    -292        index_to_pull = time_data[time_index][0]
    -293
    -294        return index_to_pull
    +287        return index_to_pull
     
    @@ -1482,27 +1503,27 @@
    Returns
    -
    296    def get_high_level_attr_data(self, attr_str):
    -297        """
    -298        Get high-level attribute data from the HDF5 file.
    -299
    -300        Parameters
    -301        ----------
    -302        attr_str : str
    -303            The attribute string.
    -304
    -305        Returns
    -306        -------
    -307        dict
    -308            The attribute data.
    -309
    -310        Raises
    -311        ------
    -312        KeyError
    -313            If the attribute string is not found in the HDF5 file.
    -314        """
    -315
    -316        return self.h5pydata.attrs[attr_str]
    +            
    289    def get_high_level_attr_data(self, attr_str):
    +290        """
    +291        Get high-level attribute data from the HDF5 file.
    +292
    +293        Parameters
    +294        ----------
    +295        attr_str : str
    +296            The attribute string.
    +297
    +298        Returns
    +299        -------
    +300        dict
    +301            The attribute data.
    +302
    +303        Raises
    +304        ------
    +305        KeyError
    +306            If the attribute string is not found in the HDF5 file.
    +307        """
    +308
    +309        return self.h5pydata.attrs[attr_str]
     
    @@ -1541,51 +1562,51 @@
    Raises
    -
    318    def get_scan_group_attr_data(
    -319        self, scan_index, time_index, attr_group, attr_srt=None
    -320    ):
    -321        """
    -322        Get scan group attribute data from the HDF5 file.
    -323
    -324        Parameters
    -325        ----------
    -326        scan_index : int
    -327            The index of the scan.
    -328        time_index : int
    -329            The index of the time point.
    -330        attr_group : str
    -331            The attribute group.
    -332        attr_srt : str, optional
    -333            The attribute string. Default is None.
    -334
    -335        Returns
    -336        -------
    -337        dict
    -338            The attribute data.
    -339
    -340        Notes
    -341        -----
    -342        This method retrieves attribute data from the HDF5 file for a specific scan and time point.
    -343        The attribute data is stored in the specified attribute group.
    -344        If an attribute string is provided, only the corresponding attribute value is returned.
    -345        If no attribute string is provided, all attribute data in the group is returned as a dictionary.
    -346        """
    -347        # Get index of self.scans where scan_index_str is found
    -348        scan_label = self.scans[scan_index]
    +            
    311    def get_scan_group_attr_data(
    +312        self, scan_index, time_index, attr_group, attr_srt=None
    +313    ):
    +314        """
    +315        Get scan group attribute data from the HDF5 file.
    +316
    +317        Parameters
    +318        ----------
    +319        scan_index : int
    +320            The index of the scan.
    +321        time_index : int
    +322            The index of the time point.
    +323        attr_group : str
    +324            The attribute group.
    +325        attr_srt : str, optional
    +326            The attribute string. Default is None.
    +327
    +328        Returns
    +329        -------
    +330        dict
    +331            The attribute data.
    +332
    +333        Notes
    +334        -----
    +335        This method retrieves attribute data from the HDF5 file for a specific scan and time point.
    +336        The attribute data is stored in the specified attribute group.
    +337        If an attribute string is provided, only the corresponding attribute value is returned.
    +338        If no attribute string is provided, all attribute data in the group is returned as a dictionary.
    +339        """
    +340        # Get index of self.scans where scan_index_str is found
    +341        scan_label = self.scans[scan_index]
    +342
    +343        index_to_pull = self.get_time_index_to_pull(scan_label, time_index)
    +344
    +345        if attr_srt:
    +346            return json.loads(
    +347                self.h5pydata[scan_label][index_to_pull].attrs[attr_group]
    +348            )[attr_srt]
     349
    -350        index_to_pull = self.get_time_index_to_pull(scan_label, time_index)
    -351
    -352        if attr_srt:
    -353            return json.loads(
    -354                self.h5pydata[scan_label][index_to_pull].attrs[attr_group]
    -355            )[attr_srt]
    -356
    -357        else:
    -358            data = self.h5pydata[scan_label][index_to_pull].attrs.get(attr_group)
    -359            if data:
    -360                return json.loads(data)
    -361            else:
    -362                return {}
    +350        else:
    +351            data = self.h5pydata[scan_label][index_to_pull].attrs.get(attr_group)
    +352            if data:
    +353                return json.loads(data)
    +354            else:
    +355                return {}
     
    @@ -1631,45 +1652,47 @@
    Notes
    -
    364    def get_raw_data_attr_data(self, scan_index, attr_group, attr_str):
    -365        """
    -366        Get raw data attribute data from the HDF5 file.
    -367
    -368        Parameters
    -369        ----------
    -370        scan_index : int
    -371            The index of the scan.
    -372        attr_group : str
    -373            The attribute group.
    -374        attr_str : str
    -375            The attribute string.
    -376
    -377        Returns
    -378        -------
    -379        dict
    -380            The attribute data.
    -381
    -382        Raises
    -383        ------
    -384        KeyError
    -385            If the attribute string is not found in the attribute group.
    -386
    -387        Notes
    -388        -----
    -389        This method retrieves the attribute data associated with a specific scan, attribute group, and attribute string
    -390        from the HDF5 file. It returns the attribute data as a dictionary.
    -391
    -392        Example usage:
    -393        >>> data = get_raw_data_attr_data(0, "group1", "attribute1")
    -394        >>> print(data)
    -395        {'key1': 'value1', 'key2': 'value2'}
    -396        """
    -397        scan_label = self.scans[scan_index]
    -398        try:
    -399            json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[attr_str]
    -400        except KeyError:
    -401            attr_str = attr_str.replace("baseline", "baselise")
    -402        return json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[attr_str]
    +            
    357    def get_raw_data_attr_data(self, scan_index, attr_group, attr_str):
    +358        """
    +359        Get raw data attribute data from the HDF5 file.
    +360
    +361        Parameters
    +362        ----------
    +363        scan_index : int
    +364            The index of the scan.
    +365        attr_group : str
    +366            The attribute group.
    +367        attr_str : str
    +368            The attribute string.
    +369
    +370        Returns
    +371        -------
    +372        dict
    +373            The attribute data.
    +374
    +375        Raises
    +376        ------
    +377        KeyError
    +378            If the attribute string is not found in the attribute group.
    +379
    +380        Notes
    +381        -----
    +382        This method retrieves the attribute data associated with a specific scan, attribute group, and attribute string
    +383        from the HDF5 file. It returns the attribute data as a dictionary.
    +384
    +385        Example usage:
    +386        >>> data = get_raw_data_attr_data(0, "group1", "attribute1")
    +387        >>> print(data)
    +388        {'key1': 'value1', 'key2': 'value2'}
    +389        """
    +390        scan_label = self.scans[scan_index]
    +391        try:
    +392            json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[attr_str]
    +393        except KeyError:
    +394            attr_str = attr_str.replace("baseline", "baselise")
    +395        return json.loads(self.h5pydata[scan_label]["raw_ms"].attrs[attr_group])[
    +396            attr_str
    +397        ]
     
    @@ -1726,43 +1749,61 @@
    Notes
    -
    404    def get_output_parameters(self, polarity, scan_index=0):
    -405        """
    -406        Get the output parameters for the mass spectrum.
    -407
    -408        Parameters
    -409        ----------
    -410        polarity : str
    -411            The polarity of the mass spectrum.
    -412        scan_index : int, optional
    -413            The index of the scan. Default is 0.
    -414
    -415        Returns
    -416        -------
    -417        dict
    -418            The output parameters.
    -419        """
    -420
    -421        d_params = default_parameters(self.file_location)
    -422        d_params["filename_path"] = self.file_location
    -423        d_params['polarity'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'polarity')
    -424        d_params['rt'] =     self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'rt')
    -425        
    -426        d_params['tic'] =  self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'tic')
    -427        
    -428        d_params['mobility_scan'] =    self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'mobility_scan')
    -429        d_params['mobility_rt'] =     self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'mobility_rt')
    -430        d_params['Aterm'] =  self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Aterm')
    -431        d_params['Bterm'] =  self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Bterm')
    -432        d_params['Cterm'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'Cterm')
    -433        d_params['baseline_noise'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'baseline_noise')
    -434        d_params['baseline_noise_std'] = self.get_raw_data_attr_data( scan_index, 'MassSpecAttrs', 'baseline_noise_std')
    -435        
    -436        d_params['analyzer'] = self.get_high_level_attr_data('analyzer')
    -437        d_params['instrument_label'] = self.get_high_level_attr_data('instrument_label')
    -438        d_params['sample_name'] = self.get_high_level_attr_data('sample_name')
    -439
    -440        return d_params
    +            
    399    def get_output_parameters(self, polarity, scan_index=0):
    +400        """
    +401        Get the output parameters for the mass spectrum.
    +402
    +403        Parameters
    +404        ----------
    +405        polarity : str
    +406            The polarity of the mass spectrum.
    +407        scan_index : int, optional
    +408            The index of the scan. Default is 0.
    +409
    +410        Returns
    +411        -------
    +412        dict
    +413            The output parameters.
    +414        """
    +415
    +416        d_params = default_parameters(self.file_location)
    +417        d_params["filename_path"] = self.file_location
    +418        d_params["polarity"] = self.get_raw_data_attr_data(
    +419            scan_index, "MassSpecAttrs", "polarity"
    +420        )
    +421        d_params["rt"] = self.get_raw_data_attr_data(scan_index, "MassSpecAttrs", "rt")
    +422
    +423        d_params["tic"] = self.get_raw_data_attr_data(
    +424            scan_index, "MassSpecAttrs", "tic"
    +425        )
    +426
    +427        d_params["mobility_scan"] = self.get_raw_data_attr_data(
    +428            scan_index, "MassSpecAttrs", "mobility_scan"
    +429        )
    +430        d_params["mobility_rt"] = self.get_raw_data_attr_data(
    +431            scan_index, "MassSpecAttrs", "mobility_rt"
    +432        )
    +433        d_params["Aterm"] = self.get_raw_data_attr_data(
    +434            scan_index, "MassSpecAttrs", "Aterm"
    +435        )
    +436        d_params["Bterm"] = self.get_raw_data_attr_data(
    +437            scan_index, "MassSpecAttrs", "Bterm"
    +438        )
    +439        d_params["Cterm"] = self.get_raw_data_attr_data(
    +440            scan_index, "MassSpecAttrs", "Cterm"
    +441        )
    +442        d_params["baseline_noise"] = self.get_raw_data_attr_data(
    +443            scan_index, "MassSpecAttrs", "baseline_noise"
    +444        )
    +445        d_params["baseline_noise_std"] = self.get_raw_data_attr_data(
    +446            scan_index, "MassSpecAttrs", "baseline_noise_std"
    +447        )
    +448
    +449        d_params["analyzer"] = self.get_high_level_attr_data("analyzer")
    +450        d_params["instrument_label"] = self.get_high_level_attr_data("instrument_label")
    +451        d_params["sample_name"] = self.get_high_level_attr_data("sample_name")
    +452
    +453        return d_params
     
    diff --git a/docs/corems/mass_spectrum/input/massList.html b/docs/corems/mass_spectrum/input/massList.html index 9d2d5e59..855d0ebe 100644 --- a/docs/corems/mass_spectrum/input/massList.html +++ b/docs/corems/mass_spectrum/input/massList.html @@ -84,335 +84,387 @@

      1__author__ = "Yuri E. Corilo"
       2__date__ = "Jun 12, 2019"
       3
    -  4import numpy as np
    -  5import warnings
    -  6
    -  7from corems.encapsulation.constant import Atoms
    -  8from corems.mass_spectrum.input.baseClass import MassListBaseClass
    -  9from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecProfile, MassSpecCentroid
    - 10from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula, MolecularFormulaIsotopologue
    - 11from corems.encapsulation.constant import Labels, Atoms
    - 12from corems.encapsulation.factory.processingSetting  import DataInputSetting
    +  4import warnings
    +  5
    +  6from corems.encapsulation.constant import Atoms, Labels
    +  7from corems.mass_spectrum.factory.MassSpectrumClasses import (
    +  8    MassSpecCentroid,
    +  9    MassSpecProfile,
    + 10)
    + 11from corems.mass_spectrum.input.baseClass import MassListBaseClass
    + 12from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula
      13
    - 14class ReadCoremsMasslist(MassListBaseClass):
    - 15    """
    - 16    The ReadCoremsMasslist object reads processed mass list data types
    - 17    and returns the mass spectrum obj with the molecular formula obj
    - 18
    - 19    **Only available for centroid mass spectrum type:** it will ignore the parameter **isCentroid** 
    - 20    Please see MassListBaseClass for more details
    - 21
    - 22    """
    - 23
    - 24    def get_mass_spectrum(self, loadSettings:bool =True) -> MassSpecCentroid:
    - 25        """
    - 26        Get the mass spectrum object from the processed mass list data.
    - 27
    - 28        Parameters
    - 29        ----------
    - 30        loadSettings : bool, optional
    - 31            Whether to load the settings for the mass spectrum. Default is True.
    - 32
    - 33        Returns
    - 34        -------
    - 35        MassSpecCentroid
    - 36            The mass spectrum object.
    - 37
    - 38        Raises
    - 39        ------
    - 40        ValueError
    - 41            If the input file is not a valid CoreMS file.
    - 42        """
    - 43
    - 44        dataframe = self.get_dataframe()
    - 45
    - 46        if not set(['H/C', 'O/C', 'Heteroatom Class', 'Ion Type', 'Is Isotopologue']).issubset(dataframe.columns):
    - 47            raise ValueError("%s it is not a valid CoreMS file" % str(self.file_location))
    - 48
    - 49        self.check_columns(dataframe.columns)
    - 50
    - 51        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    - 52
    - 53        polarity = dataframe['Ion Charge'].values[0]
    - 54
    - 55        output_parameters = self.get_output_parameters(polarity)
    - 56
    - 57        mass_spec_obj = MassSpecCentroid(dataframe.to_dict(orient='list'), output_parameters)
    - 58
    - 59        if loadSettings is True:
    - 60            self.load_settings(mass_spec_obj, output_parameters)
    + 14
    + 15class ReadCoremsMasslist(MassListBaseClass):
    + 16    """
    + 17    The ReadCoremsMasslist object reads processed mass list data types
    + 18    and returns the mass spectrum obj with the molecular formula obj
    + 19
    + 20    **Only available for centroid mass spectrum type:** it will ignore the parameter **isCentroid**
    + 21    Please see MassListBaseClass for more details
    + 22
    + 23    """
    + 24
    + 25    def get_mass_spectrum(self, loadSettings: bool = True) -> MassSpecCentroid:
    + 26        """
    + 27        Get the mass spectrum object from the processed mass list data.
    + 28
    + 29        Parameters
    + 30        ----------
    + 31        loadSettings : bool, optional
    + 32            Whether to load the settings for the mass spectrum. Default is True.
    + 33
    + 34        Returns
    + 35        -------
    + 36        MassSpecCentroid
    + 37            The mass spectrum object.
    + 38
    + 39        Raises
    + 40        ------
    + 41        ValueError
    + 42            If the input file is not a valid CoreMS file.
    + 43        """
    + 44
    + 45        dataframe = self.get_dataframe()
    + 46
    + 47        if not set(
    + 48            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
    + 49        ).issubset(dataframe.columns):
    + 50            raise ValueError(
    + 51                "%s it is not a valid CoreMS file" % str(self.file_location)
    + 52            )
    + 53
    + 54        self.check_columns(dataframe.columns)
    + 55
    + 56        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    + 57
    + 58        polarity = dataframe["Ion Charge"].values[0]
    + 59
    + 60        output_parameters = self.get_output_parameters(polarity)
      61
    - 62        self.add_molecular_formula(mass_spec_obj, dataframe)
    - 63
    - 64        return mass_spec_obj
    + 62        mass_spec_obj = MassSpecCentroid(
    + 63            dataframe.to_dict(orient="list"), output_parameters
    + 64        )
      65
    - 66    def add_molecular_formula(self, mass_spec_obj, dataframe):
    - 67        """
    - 68        Add molecular formula information to the mass spectrum object.
    - 69
    - 70        Parameters
    - 71        ----------
    - 72        mass_spec_obj : MassSpecCentroid
    - 73            The mass spectrum object to add the molecular formula to.
    - 74        dataframe : pandas.DataFrame
    - 75            The processed mass list data.
    - 76            
    - 77        """
    - 78
    - 79        # check if is coreMS file
    - 80        if 'Is Isotopologue' in dataframe:
    - 81            # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp)
    - 82            dataframe = dataframe.reset_index(drop=True)
    + 66        if loadSettings is True:
    + 67            self.load_settings(mass_spec_obj, output_parameters)
    + 68
    + 69        self.add_molecular_formula(mass_spec_obj, dataframe)
    + 70
    + 71        return mass_spec_obj
    + 72
    + 73    def add_molecular_formula(self, mass_spec_obj, dataframe):
    + 74        """
    + 75        Add molecular formula information to the mass spectrum object.
    + 76
    + 77        Parameters
    + 78        ----------
    + 79        mass_spec_obj : MassSpecCentroid
    + 80            The mass spectrum object to add the molecular formula to.
    + 81        dataframe : pandas.DataFrame
    + 82            The processed mass list data.
      83
    - 84            mz_exp_df = dataframe[Labels.mz].astype(float)
    - 85            formula_df = dataframe[dataframe.columns.intersection(Atoms.atoms_order)].copy()
    - 86            formula_df.fillna(0, inplace=True)
    - 87            formula_df.replace(b'nan', 0, inplace=True)
    - 88
    - 89            ion_type_df = dataframe["Ion Type"]
    - 90            ion_charge_df = dataframe["Ion Charge"]
    - 91            is_isotopologue_df = dataframe['Is Isotopologue']
    - 92            if 'Adduct' in dataframe:
    - 93                adduct_df = dataframe['Adduct']
    - 94            else:
    - 95                adduct_df = None
    - 96
    - 97        mass_spec_mz_exp_list = mass_spec_obj.mz_exp
    - 98
    - 99        for df_index, mz_exp in enumerate(mz_exp_df):
    -100
    -101            counts = 0
    -102
    -103            ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp))
    -104
    -105            if 'Is Isotopologue' in dataframe:
    -106
    -107                atoms = list(formula_df.columns.astype(str))
    -108                counts = list(formula_df.iloc[df_index].astype(int))
    -109
    -110                formula_dict = dict(zip(atoms, counts))
    -111
    -112                # Drop any atoms with 0 counts
    -113                formula_dict = {atom: formula_dict[atom] for atom in formula_dict if formula_dict[atom] > 0}
    -114
    -115            if sum(counts) > 0:
    -116                ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
    -117                if adduct_df is not None:
    -118                    adduct_atom = str(adduct_df[df_index])
    -119                    if adduct_atom == 'None':
    -120                        adduct_atom = None
    -121                else:
    -122                    adduct_atom = None
    -123
    -124                # If not isotopologue, cast as MolecularFormula
    -125                if not bool(int(is_isotopologue_df[df_index])):
    -126                    mfobj = MolecularFormula(
    -127                        formula_dict, int(ion_charge_df[df_index]), 
    -128                        mspeak_parent=mass_spec_obj[ms_peak_index] , 
    -129                        ion_type=ion_type, adduct_atom=adduct_atom
    -130                        )
    -131                    
    -132                # if is isotopologue, recast as MolecularFormulaIsotopologue
    -133                if bool(int(is_isotopologue_df[df_index])):
    + 84        """
    + 85
    + 86        # check if is coreMS file
    + 87        if "Is Isotopologue" in dataframe:
    + 88            # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp)
    + 89            dataframe = dataframe.reset_index(drop=True)
    + 90
    + 91            mz_exp_df = dataframe[Labels.mz].astype(float)
    + 92            formula_df = dataframe[
    + 93                dataframe.columns.intersection(Atoms.atoms_order)
    + 94            ].copy()
    + 95            formula_df.fillna(0, inplace=True)
    + 96            formula_df.replace(b"nan", 0, inplace=True)
    + 97
    + 98            ion_type_df = dataframe["Ion Type"]
    + 99            ion_charge_df = dataframe["Ion Charge"]
    +100            is_isotopologue_df = dataframe["Is Isotopologue"]
    +101            if "Adduct" in dataframe:
    +102                adduct_df = dataframe["Adduct"]
    +103            else:
    +104                adduct_df = None
    +105
    +106        mass_spec_mz_exp_list = mass_spec_obj.mz_exp
    +107
    +108        for df_index, mz_exp in enumerate(mz_exp_df):
    +109            counts = 0
    +110
    +111            ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp))
    +112
    +113            if "Is Isotopologue" in dataframe:
    +114                atoms = list(formula_df.columns.astype(str))
    +115                counts = list(formula_df.iloc[df_index].astype(int))
    +116
    +117                formula_dict = dict(zip(atoms, counts))
    +118
    +119                # Drop any atoms with 0 counts
    +120                formula_dict = {
    +121                    atom: formula_dict[atom]
    +122                    for atom in formula_dict
    +123                    if formula_dict[atom] > 0
    +124                }
    +125
    +126            if sum(counts) > 0:
    +127                ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
    +128                if adduct_df is not None:
    +129                    adduct_atom = str(adduct_df[df_index])
    +130                    if adduct_atom == "None":
    +131                        adduct_atom = None
    +132                else:
    +133                    adduct_atom = None
     134
    -135                    # First make a MolecularFormula object for the parent so we can get probabilities etc
    -136                    formula_list_parent = {}
    -137                    for atom in formula_dict:
    -138                        if atom in Atoms.isotopes.keys():
    -139                            formula_list_parent[atom] = formula_dict[atom]
    -140                        else:
    -141                            # remove any numbers from the atom name to cast as a mono-isotopic atom
    -142                            atom_mono = atom.strip('0123456789')
    -143                            if atom_mono in Atoms.isotopes.keys() and atom_mono in formula_list_parent.keys():
    -144                                formula_list_parent[atom_mono] = formula_list_parent[atom_mono]+formula_dict[atom]
    -145                            elif atom_mono in Atoms.isotopes.keys():
    -146                                formula_list_parent[atom_mono] = formula_dict[atom]
    -147                            else:
    -148                                warnings.warn(f"Atom {atom} not in Atoms.atoms_order")
    -149                    mono_index = int(dataframe.iloc[df_index]['Mono Isotopic Index'])
    -150                    mono_mfobj = MolecularFormula(
    -151                        formula_list_parent, 
    -152                        int(ion_charge_df[df_index]), 
    -153                        mspeak_parent=mass_spec_obj[mono_index], 
    -154                        ion_type=ion_type, 
    -155                        adduct_atom=adduct_atom
    -156                        )
    -157                    
    -158                    # Next, generate isotopologues from the parent
    -159                    isos = list(
    -160                        mono_mfobj.isotopologues(
    -161                        min_abundance = mass_spec_obj[df_index].abundance*0.001, 
    -162                        current_mono_abundance = mass_spec_obj[mono_index].abundance, 
    -163                        dynamic_range = mass_spec_obj.dynamic_range
    -164                         )
    -165                    )
    -166
    -167                    # Finally, find the isotopologue that matches the formula_dict
    -168                    matched_isos = []
    -169                    for iso in isos:
    -170                        # If match was already found, exit the loop
    -171                        if len(matched_isos) > 0:
    -172                            break
    -173                        else:
    -174                            # Check the atoms match
    -175                            if set(iso.atoms) == set(formula_dict.keys()):
    -176                                # Check the values of the atoms match
    -177                                if all([iso[atom] == formula_dict[atom] for atom in formula_dict]):
    -178                                    matched_isos.append(iso)
    -179
    -180                    if len(matched_isos) == 0:
    -181                        raise ValueError("No isotopologue matched the formula_dict")
    -182                    mfobj = matched_isos[0]        
    +135                # If not isotopologue, cast as MolecularFormula
    +136                if not bool(int(is_isotopologue_df[df_index])):
    +137                    mfobj = MolecularFormula(
    +138                        formula_dict,
    +139                        int(ion_charge_df[df_index]),
    +140                        mspeak_parent=mass_spec_obj[ms_peak_index],
    +141                        ion_type=ion_type,
    +142                        adduct_atom=adduct_atom,
    +143                    )
    +144
    +145                # if is isotopologue, recast as MolecularFormulaIsotopologue
    +146                if bool(int(is_isotopologue_df[df_index])):
    +147                    # First make a MolecularFormula object for the parent so we can get probabilities etc
    +148                    formula_list_parent = {}
    +149                    for atom in formula_dict:
    +150                        if atom in Atoms.isotopes.keys():
    +151                            formula_list_parent[atom] = formula_dict[atom]
    +152                        else:
    +153                            # remove any numbers from the atom name to cast as a mono-isotopic atom
    +154                            atom_mono = atom.strip("0123456789")
    +155                            if (
    +156                                atom_mono in Atoms.isotopes.keys()
    +157                                and atom_mono in formula_list_parent.keys()
    +158                            ):
    +159                                formula_list_parent[atom_mono] = (
    +160                                    formula_list_parent[atom_mono] + formula_dict[atom]
    +161                                )
    +162                            elif atom_mono in Atoms.isotopes.keys():
    +163                                formula_list_parent[atom_mono] = formula_dict[atom]
    +164                            else:
    +165                                warnings.warn(f"Atom {atom} not in Atoms.atoms_order")
    +166                    mono_index = int(dataframe.iloc[df_index]["Mono Isotopic Index"])
    +167                    mono_mfobj = MolecularFormula(
    +168                        formula_list_parent,
    +169                        int(ion_charge_df[df_index]),
    +170                        mspeak_parent=mass_spec_obj[mono_index],
    +171                        ion_type=ion_type,
    +172                        adduct_atom=adduct_atom,
    +173                    )
    +174
    +175                    # Next, generate isotopologues from the parent
    +176                    isos = list(
    +177                        mono_mfobj.isotopologues(
    +178                            min_abundance=mass_spec_obj[df_index].abundance * 0.001,
    +179                            current_mono_abundance=mass_spec_obj[mono_index].abundance,
    +180                            dynamic_range=mass_spec_obj.dynamic_range,
    +181                        )
    +182                    )
     183
    -184                    # Add the mono isotopic index, confidence score and isotopologue similarity    
    -185                    mfobj.mspeak_index_mono_isotopic = int(dataframe.iloc[df_index]['Mono Isotopic Index'])
    -186                
    -187                # Add the confidence score and isotopologue similarity and average MZ error score
    -188                if 'm/z Error Score' in dataframe:
    -189                    mfobj._mass_error_average_score = float(dataframe.iloc[df_index]['m/z Error Score'])
    -190                if 'Confidence Score' in dataframe:
    -191                    mfobj._confidence_score = float(dataframe.iloc[df_index]['Confidence Score'])
    -192                if 'Isotopologue Similarity' in dataframe:
    -193                    mfobj._isotopologue_similarity = float(dataframe.iloc[df_index]['Isotopologue Similarity'])
    -194                mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)
    -195
    -196
    -197class ReadMassList(MassListBaseClass):
    -198    """
    -199    The ReadMassList object reads unprocessed mass list data types
    -200    and returns the mass spectrum object.
    +184                    # Finally, find the isotopologue that matches the formula_dict
    +185                    matched_isos = []
    +186                    for iso in isos:
    +187                        # If match was already found, exit the loop
    +188                        if len(matched_isos) > 0:
    +189                            break
    +190                        else:
    +191                            # Check the atoms match
    +192                            if set(iso.atoms) == set(formula_dict.keys()):
    +193                                # Check the values of the atoms match
    +194                                if all(
    +195                                    [
    +196                                        iso[atom] == formula_dict[atom]
    +197                                        for atom in formula_dict
    +198                                    ]
    +199                                ):
    +200                                    matched_isos.append(iso)
     201
    -202    Parameters
    -203    ----------
    -204    MassListBaseClass : class
    -205        The base class for reading mass list data types.
    -206
    -207    Methods
    -208    -------
    -209    * get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.
    +202                    if len(matched_isos) == 0:
    +203                        raise ValueError("No isotopologue matched the formula_dict")
    +204                    mfobj = matched_isos[0]
    +205
    +206                    # Add the mono isotopic index, confidence score and isotopologue similarity
    +207                    mfobj.mspeak_index_mono_isotopic = int(
    +208                        dataframe.iloc[df_index]["Mono Isotopic Index"]
    +209                    )
     210
    -211    """
    -212
    -213    def get_mass_spectrum(self, polarity:int, scan:int=0, auto_process:bool=True, loadSettings:bool=True):
    -214        """
    -215        Reads mass list data types and returns the mass spectrum object.
    -216
    -217        Parameters
    -218        ----------
    -219        polarity : int
    -220            The polarity of the mass spectrum (+1 or -1).
    -221        scan : int, optional
    -222            The scan number of the mass spectrum (default is 0).
    -223        auto_process : bool, optional
    -224            Flag indicating whether to automatically process the mass spectrum (default is True).
    -225        loadSettings : bool, optional
    -226            Flag indicating whether to load settings for the mass spectrum (default is True).
    -227
    -228        Returns
    -229        -------
    -230        mass_spec : MassSpecCentroid or MassSpecProfile
    -231            The mass spectrum object.
    -232
    -233        """
    -234
    -235        # delimiter = "  " or " " or  "," or "\t" etc
    +211                # Add the confidence score and isotopologue similarity and average MZ error score
    +212                if "m/z Error Score" in dataframe:
    +213                    mfobj._mass_error_average_score = float(
    +214                        dataframe.iloc[df_index]["m/z Error Score"]
    +215                    )
    +216                if "Confidence Score" in dataframe:
    +217                    mfobj._confidence_score = float(
    +218                        dataframe.iloc[df_index]["Confidence Score"]
    +219                    )
    +220                if "Isotopologue Similarity" in dataframe:
    +221                    mfobj._isotopologue_similarity = float(
    +222                        dataframe.iloc[df_index]["Isotopologue Similarity"]
    +223                    )
    +224                mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)
    +225
    +226
    +227class ReadMassList(MassListBaseClass):
    +228    """
    +229    The ReadMassList object reads unprocessed mass list data types
    +230    and returns the mass spectrum object.
    +231
    +232    Parameters
    +233    ----------
    +234    MassListBaseClass : class
    +235        The base class for reading mass list data types.
     236
    -237        if self.isCentroid:
    -238
    -239            dataframe = self.get_dataframe()
    +237    Methods
    +238    -------
    +239    * get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.
     240
    -241            self.check_columns(dataframe.columns)
    +241    """
     242
    -243            self.clean_data_frame(dataframe)
    -244
    -245            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -246
    -247            output_parameters = self.get_output_parameters(polarity)
    -248
    -249            mass_spec = MassSpecCentroid(dataframe.to_dict(orient='list'), output_parameters, auto_process=auto_process)
    -250
    -251            if loadSettings:
    -252                self.load_settings(mass_spec, output_parameters)
    -253
    -254            return mass_spec
    -255
    -256        else:
    -257
    -258            dataframe = self.get_dataframe()
    -259
    -260            self.check_columns(dataframe.columns)
    -261
    -262            output_parameters = self.get_output_parameters(polarity)
    +243    def get_mass_spectrum(
    +244        self,
    +245        polarity: int,
    +246        scan: int = 0,
    +247        auto_process: bool = True,
    +248        loadSettings: bool = True,
    +249    ):
    +250        """
    +251        Reads mass list data types and returns the mass spectrum object.
    +252
    +253        Parameters
    +254        ----------
    +255        polarity : int
    +256            The polarity of the mass spectrum (+1 or -1).
    +257        scan : int, optional
    +258            The scan number of the mass spectrum (default is 0).
    +259        auto_process : bool, optional
    +260            Flag indicating whether to automatically process the mass spectrum (default is True).
    +261        loadSettings : bool, optional
    +262            Flag indicating whether to load settings for the mass spectrum (default is True).
     263
    -264            self.clean_data_frame(dataframe)
    -265
    -266            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -267
    -268            mass_spec = MassSpecProfile(dataframe.to_dict(orient='list'), output_parameters, auto_process=auto_process)
    -269
    -270            if loadSettings:
    -271                self.load_settings(mass_spec, output_parameters)
    +264        Returns
    +265        -------
    +266        mass_spec : MassSpecCentroid or MassSpecProfile
    +267            The mass spectrum object.
    +268
    +269        """
    +270
    +271        # delimiter = "  " or " " or  "," or "\t" etc
     272
    -273            return mass_spec
    -274    
    +273        if self.isCentroid:
    +274            dataframe = self.get_dataframe()
     275
    -276class ReadBrukerXMLList(MassListBaseClass):
    -277    """
    -278    The ReadBrukerXMLList object reads Bruker XML objects
    -279    and returns the mass spectrum object.
    -280    See MassListBaseClass for details
    +276            self.check_columns(dataframe.columns)
    +277
    +278            self.clean_data_frame(dataframe)
    +279
    +280            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
     281
    -282    Parameters
    -283    ----------
    -284    MassListBaseClass : class
    -285        The base class for reading mass list data types and returning the mass spectrum object.
    -286
    -287    Methods
    -288    -------
    -289    * get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.
    -290
    -291    """
    +282            output_parameters = self.get_output_parameters(polarity)
    +283
    +284            mass_spec = MassSpecCentroid(
    +285                dataframe.to_dict(orient="list"),
    +286                output_parameters,
    +287                auto_process=auto_process,
    +288            )
    +289
    +290            if loadSettings:
    +291                self.load_settings(mass_spec, output_parameters)
     292
    -293    def get_mass_spectrum(self, polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True):
    -294        """
    -295        Reads mass list data types and returns the mass spectrum object.
    -296
    -297        Parameters
    -298        ----------
    -299        polarity : bool, optional
    -300            The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
    -301        scan : int, optional
    -302            The scan number of the mass spectrum. Default is 0.
    -303        auto_process : bool, optional
    -304            Whether to automatically process the mass spectrum. Default is True.
    -305        loadSettings : bool, optional
    -306            Whether to load the settings for the mass spectrum. Default is True.
    -307
    -308        Returns
    -309        -------
    -310        mass_spec : MassSpecCentroid
    -311            The mass spectrum object representing the centroided mass spectrum.
    -312        """
    -313        # delimiter = "  " or " " or  "," or "\t" etc
    +293            return mass_spec
    +294
    +295        else:
    +296            dataframe = self.get_dataframe()
    +297
    +298            self.check_columns(dataframe.columns)
    +299
    +300            output_parameters = self.get_output_parameters(polarity)
    +301
    +302            self.clean_data_frame(dataframe)
    +303
    +304            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    +305
    +306            mass_spec = MassSpecProfile(
    +307                dataframe.to_dict(orient="list"),
    +308                output_parameters,
    +309                auto_process=auto_process,
    +310            )
    +311
    +312            if loadSettings:
    +313                self.load_settings(mass_spec, output_parameters)
     314
    -315        if polarity == None:
    -316            polarity = self.get_xml_polarity()
    -317        dataframe = self.get_dataframe()
    -318
    -319        self.check_columns(dataframe.columns)
    -320
    -321        self.clean_data_frame(dataframe)
    -322
    -323        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -324
    -325        output_parameters = self.get_output_parameters(polarity)
    -326
    -327        mass_spec = MassSpecCentroid(dataframe.to_dict(orient='list'), output_parameters, auto_process=auto_process)
    +315            return mass_spec
    +316
    +317
    +318class ReadBrukerXMLList(MassListBaseClass):
    +319    """
    +320    The ReadBrukerXMLList object reads Bruker XML objects
    +321    and returns the mass spectrum object.
    +322    See MassListBaseClass for details
    +323
    +324    Parameters
    +325    ----------
    +326    MassListBaseClass : class
    +327        The base class for reading mass list data types and returning the mass spectrum object.
     328
    -329        if loadSettings:
    -330            self.load_settings(mass_spec, output_parameters)
    -331
    -332        return mass_spec
    +329    Methods
    +330    -------
    +331    * get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.
    +332
    +333    """
    +334
    +335    def get_mass_spectrum(
    +336        self,
    +337        polarity: bool = None,
    +338        scan: int = 0,
    +339        auto_process: bool = True,
    +340        loadSettings: bool = True,
    +341    ):
    +342        """
    +343        Reads mass list data types and returns the mass spectrum object.
    +344
    +345        Parameters
    +346        ----------
    +347        polarity : bool, optional
    +348            The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
    +349        scan : int, optional
    +350            The scan number of the mass spectrum. Default is 0.
    +351        auto_process : bool, optional
    +352            Whether to automatically process the mass spectrum. Default is True.
    +353        loadSettings : bool, optional
    +354            Whether to load the settings for the mass spectrum. Default is True.
    +355
    +356        Returns
    +357        -------
    +358        mass_spec : MassSpecCentroid
    +359            The mass spectrum object representing the centroided mass spectrum.
    +360        """
    +361        # delimiter = "  " or " " or  "," or "\t" etc
    +362
    +363        if polarity == None:
    +364            polarity = self.get_xml_polarity()
    +365        dataframe = self.get_dataframe()
    +366
    +367        self.check_columns(dataframe.columns)
    +368
    +369        self.clean_data_frame(dataframe)
    +370
    +371        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    +372
    +373        output_parameters = self.get_output_parameters(polarity)
    +374
    +375        mass_spec = MassSpecCentroid(
    +376            dataframe.to_dict(orient="list"),
    +377            output_parameters,
    +378            auto_process=auto_process,
    +379        )
    +380
    +381        if loadSettings:
    +382            self.load_settings(mass_spec, output_parameters)
    +383
    +384        return mass_spec
     
    @@ -428,194 +480,223 @@

    -
     15class ReadCoremsMasslist(MassListBaseClass):
    - 16    """
    - 17    The ReadCoremsMasslist object reads processed mass list data types
    - 18    and returns the mass spectrum obj with the molecular formula obj
    - 19
    - 20    **Only available for centroid mass spectrum type:** it will ignore the parameter **isCentroid** 
    - 21    Please see MassListBaseClass for more details
    - 22
    - 23    """
    - 24
    - 25    def get_mass_spectrum(self, loadSettings:bool =True) -> MassSpecCentroid:
    - 26        """
    - 27        Get the mass spectrum object from the processed mass list data.
    - 28
    - 29        Parameters
    - 30        ----------
    - 31        loadSettings : bool, optional
    - 32            Whether to load the settings for the mass spectrum. Default is True.
    - 33
    - 34        Returns
    - 35        -------
    - 36        MassSpecCentroid
    - 37            The mass spectrum object.
    - 38
    - 39        Raises
    - 40        ------
    - 41        ValueError
    - 42            If the input file is not a valid CoreMS file.
    - 43        """
    - 44
    - 45        dataframe = self.get_dataframe()
    - 46
    - 47        if not set(['H/C', 'O/C', 'Heteroatom Class', 'Ion Type', 'Is Isotopologue']).issubset(dataframe.columns):
    - 48            raise ValueError("%s it is not a valid CoreMS file" % str(self.file_location))
    - 49
    - 50        self.check_columns(dataframe.columns)
    - 51
    - 52        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    - 53
    - 54        polarity = dataframe['Ion Charge'].values[0]
    - 55
    - 56        output_parameters = self.get_output_parameters(polarity)
    - 57
    - 58        mass_spec_obj = MassSpecCentroid(dataframe.to_dict(orient='list'), output_parameters)
    - 59
    - 60        if loadSettings is True:
    - 61            self.load_settings(mass_spec_obj, output_parameters)
    +            
     16class ReadCoremsMasslist(MassListBaseClass):
    + 17    """
    + 18    The ReadCoremsMasslist object reads processed mass list data types
    + 19    and returns the mass spectrum obj with the molecular formula obj
    + 20
    + 21    **Only available for centroid mass spectrum type:** it will ignore the parameter **isCentroid**
    + 22    Please see MassListBaseClass for more details
    + 23
    + 24    """
    + 25
    + 26    def get_mass_spectrum(self, loadSettings: bool = True) -> MassSpecCentroid:
    + 27        """
    + 28        Get the mass spectrum object from the processed mass list data.
    + 29
    + 30        Parameters
    + 31        ----------
    + 32        loadSettings : bool, optional
    + 33            Whether to load the settings for the mass spectrum. Default is True.
    + 34
    + 35        Returns
    + 36        -------
    + 37        MassSpecCentroid
    + 38            The mass spectrum object.
    + 39
    + 40        Raises
    + 41        ------
    + 42        ValueError
    + 43            If the input file is not a valid CoreMS file.
    + 44        """
    + 45
    + 46        dataframe = self.get_dataframe()
    + 47
    + 48        if not set(
    + 49            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
    + 50        ).issubset(dataframe.columns):
    + 51            raise ValueError(
    + 52                "%s it is not a valid CoreMS file" % str(self.file_location)
    + 53            )
    + 54
    + 55        self.check_columns(dataframe.columns)
    + 56
    + 57        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    + 58
    + 59        polarity = dataframe["Ion Charge"].values[0]
    + 60
    + 61        output_parameters = self.get_output_parameters(polarity)
      62
    - 63        self.add_molecular_formula(mass_spec_obj, dataframe)
    - 64
    - 65        return mass_spec_obj
    + 63        mass_spec_obj = MassSpecCentroid(
    + 64            dataframe.to_dict(orient="list"), output_parameters
    + 65        )
      66
    - 67    def add_molecular_formula(self, mass_spec_obj, dataframe):
    - 68        """
    - 69        Add molecular formula information to the mass spectrum object.
    - 70
    - 71        Parameters
    - 72        ----------
    - 73        mass_spec_obj : MassSpecCentroid
    - 74            The mass spectrum object to add the molecular formula to.
    - 75        dataframe : pandas.DataFrame
    - 76            The processed mass list data.
    - 77            
    - 78        """
    - 79
    - 80        # check if is coreMS file
    - 81        if 'Is Isotopologue' in dataframe:
    - 82            # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp)
    - 83            dataframe = dataframe.reset_index(drop=True)
    + 67        if loadSettings is True:
    + 68            self.load_settings(mass_spec_obj, output_parameters)
    + 69
    + 70        self.add_molecular_formula(mass_spec_obj, dataframe)
    + 71
    + 72        return mass_spec_obj
    + 73
    + 74    def add_molecular_formula(self, mass_spec_obj, dataframe):
    + 75        """
    + 76        Add molecular formula information to the mass spectrum object.
    + 77
    + 78        Parameters
    + 79        ----------
    + 80        mass_spec_obj : MassSpecCentroid
    + 81            The mass spectrum object to add the molecular formula to.
    + 82        dataframe : pandas.DataFrame
    + 83            The processed mass list data.
      84
    - 85            mz_exp_df = dataframe[Labels.mz].astype(float)
    - 86            formula_df = dataframe[dataframe.columns.intersection(Atoms.atoms_order)].copy()
    - 87            formula_df.fillna(0, inplace=True)
    - 88            formula_df.replace(b'nan', 0, inplace=True)
    - 89
    - 90            ion_type_df = dataframe["Ion Type"]
    - 91            ion_charge_df = dataframe["Ion Charge"]
    - 92            is_isotopologue_df = dataframe['Is Isotopologue']
    - 93            if 'Adduct' in dataframe:
    - 94                adduct_df = dataframe['Adduct']
    - 95            else:
    - 96                adduct_df = None
    - 97
    - 98        mass_spec_mz_exp_list = mass_spec_obj.mz_exp
    - 99
    -100        for df_index, mz_exp in enumerate(mz_exp_df):
    -101
    -102            counts = 0
    -103
    -104            ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp))
    -105
    -106            if 'Is Isotopologue' in dataframe:
    -107
    -108                atoms = list(formula_df.columns.astype(str))
    -109                counts = list(formula_df.iloc[df_index].astype(int))
    -110
    -111                formula_dict = dict(zip(atoms, counts))
    -112
    -113                # Drop any atoms with 0 counts
    -114                formula_dict = {atom: formula_dict[atom] for atom in formula_dict if formula_dict[atom] > 0}
    -115
    -116            if sum(counts) > 0:
    -117                ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
    -118                if adduct_df is not None:
    -119                    adduct_atom = str(adduct_df[df_index])
    -120                    if adduct_atom == 'None':
    -121                        adduct_atom = None
    -122                else:
    -123                    adduct_atom = None
    -124
    -125                # If not isotopologue, cast as MolecularFormula
    -126                if not bool(int(is_isotopologue_df[df_index])):
    -127                    mfobj = MolecularFormula(
    -128                        formula_dict, int(ion_charge_df[df_index]), 
    -129                        mspeak_parent=mass_spec_obj[ms_peak_index] , 
    -130                        ion_type=ion_type, adduct_atom=adduct_atom
    -131                        )
    -132                    
    -133                # if is isotopologue, recast as MolecularFormulaIsotopologue
    -134                if bool(int(is_isotopologue_df[df_index])):
    + 85        """
    + 86
    + 87        # check if is coreMS file
    + 88        if "Is Isotopologue" in dataframe:
    + 89            # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp)
    + 90            dataframe = dataframe.reset_index(drop=True)
    + 91
    + 92            mz_exp_df = dataframe[Labels.mz].astype(float)
    + 93            formula_df = dataframe[
    + 94                dataframe.columns.intersection(Atoms.atoms_order)
    + 95            ].copy()
    + 96            formula_df.fillna(0, inplace=True)
    + 97            formula_df.replace(b"nan", 0, inplace=True)
    + 98
    + 99            ion_type_df = dataframe["Ion Type"]
    +100            ion_charge_df = dataframe["Ion Charge"]
    +101            is_isotopologue_df = dataframe["Is Isotopologue"]
    +102            if "Adduct" in dataframe:
    +103                adduct_df = dataframe["Adduct"]
    +104            else:
    +105                adduct_df = None
    +106
    +107        mass_spec_mz_exp_list = mass_spec_obj.mz_exp
    +108
    +109        for df_index, mz_exp in enumerate(mz_exp_df):
    +110            counts = 0
    +111
    +112            ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp))
    +113
    +114            if "Is Isotopologue" in dataframe:
    +115                atoms = list(formula_df.columns.astype(str))
    +116                counts = list(formula_df.iloc[df_index].astype(int))
    +117
    +118                formula_dict = dict(zip(atoms, counts))
    +119
    +120                # Drop any atoms with 0 counts
    +121                formula_dict = {
    +122                    atom: formula_dict[atom]
    +123                    for atom in formula_dict
    +124                    if formula_dict[atom] > 0
    +125                }
    +126
    +127            if sum(counts) > 0:
    +128                ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
    +129                if adduct_df is not None:
    +130                    adduct_atom = str(adduct_df[df_index])
    +131                    if adduct_atom == "None":
    +132                        adduct_atom = None
    +133                else:
    +134                    adduct_atom = None
     135
    -136                    # First make a MolecularFormula object for the parent so we can get probabilities etc
    -137                    formula_list_parent = {}
    -138                    for atom in formula_dict:
    -139                        if atom in Atoms.isotopes.keys():
    -140                            formula_list_parent[atom] = formula_dict[atom]
    -141                        else:
    -142                            # remove any numbers from the atom name to cast as a mono-isotopic atom
    -143                            atom_mono = atom.strip('0123456789')
    -144                            if atom_mono in Atoms.isotopes.keys() and atom_mono in formula_list_parent.keys():
    -145                                formula_list_parent[atom_mono] = formula_list_parent[atom_mono]+formula_dict[atom]
    -146                            elif atom_mono in Atoms.isotopes.keys():
    -147                                formula_list_parent[atom_mono] = formula_dict[atom]
    -148                            else:
    -149                                warnings.warn(f"Atom {atom} not in Atoms.atoms_order")
    -150                    mono_index = int(dataframe.iloc[df_index]['Mono Isotopic Index'])
    -151                    mono_mfobj = MolecularFormula(
    -152                        formula_list_parent, 
    -153                        int(ion_charge_df[df_index]), 
    -154                        mspeak_parent=mass_spec_obj[mono_index], 
    -155                        ion_type=ion_type, 
    -156                        adduct_atom=adduct_atom
    -157                        )
    -158                    
    -159                    # Next, generate isotopologues from the parent
    -160                    isos = list(
    -161                        mono_mfobj.isotopologues(
    -162                        min_abundance = mass_spec_obj[df_index].abundance*0.001, 
    -163                        current_mono_abundance = mass_spec_obj[mono_index].abundance, 
    -164                        dynamic_range = mass_spec_obj.dynamic_range
    -165                         )
    -166                    )
    -167
    -168                    # Finally, find the isotopologue that matches the formula_dict
    -169                    matched_isos = []
    -170                    for iso in isos:
    -171                        # If match was already found, exit the loop
    -172                        if len(matched_isos) > 0:
    -173                            break
    -174                        else:
    -175                            # Check the atoms match
    -176                            if set(iso.atoms) == set(formula_dict.keys()):
    -177                                # Check the values of the atoms match
    -178                                if all([iso[atom] == formula_dict[atom] for atom in formula_dict]):
    -179                                    matched_isos.append(iso)
    -180
    -181                    if len(matched_isos) == 0:
    -182                        raise ValueError("No isotopologue matched the formula_dict")
    -183                    mfobj = matched_isos[0]        
    +136                # If not isotopologue, cast as MolecularFormula
    +137                if not bool(int(is_isotopologue_df[df_index])):
    +138                    mfobj = MolecularFormula(
    +139                        formula_dict,
    +140                        int(ion_charge_df[df_index]),
    +141                        mspeak_parent=mass_spec_obj[ms_peak_index],
    +142                        ion_type=ion_type,
    +143                        adduct_atom=adduct_atom,
    +144                    )
    +145
    +146                # if is isotopologue, recast as MolecularFormulaIsotopologue
    +147                if bool(int(is_isotopologue_df[df_index])):
    +148                    # First make a MolecularFormula object for the parent so we can get probabilities etc
    +149                    formula_list_parent = {}
    +150                    for atom in formula_dict:
    +151                        if atom in Atoms.isotopes.keys():
    +152                            formula_list_parent[atom] = formula_dict[atom]
    +153                        else:
    +154                            # remove any numbers from the atom name to cast as a mono-isotopic atom
    +155                            atom_mono = atom.strip("0123456789")
    +156                            if (
    +157                                atom_mono in Atoms.isotopes.keys()
    +158                                and atom_mono in formula_list_parent.keys()
    +159                            ):
    +160                                formula_list_parent[atom_mono] = (
    +161                                    formula_list_parent[atom_mono] + formula_dict[atom]
    +162                                )
    +163                            elif atom_mono in Atoms.isotopes.keys():
    +164                                formula_list_parent[atom_mono] = formula_dict[atom]
    +165                            else:
    +166                                warnings.warn(f"Atom {atom} not in Atoms.atoms_order")
    +167                    mono_index = int(dataframe.iloc[df_index]["Mono Isotopic Index"])
    +168                    mono_mfobj = MolecularFormula(
    +169                        formula_list_parent,
    +170                        int(ion_charge_df[df_index]),
    +171                        mspeak_parent=mass_spec_obj[mono_index],
    +172                        ion_type=ion_type,
    +173                        adduct_atom=adduct_atom,
    +174                    )
    +175
    +176                    # Next, generate isotopologues from the parent
    +177                    isos = list(
    +178                        mono_mfobj.isotopologues(
    +179                            min_abundance=mass_spec_obj[df_index].abundance * 0.001,
    +180                            current_mono_abundance=mass_spec_obj[mono_index].abundance,
    +181                            dynamic_range=mass_spec_obj.dynamic_range,
    +182                        )
    +183                    )
     184
    -185                    # Add the mono isotopic index, confidence score and isotopologue similarity    
    -186                    mfobj.mspeak_index_mono_isotopic = int(dataframe.iloc[df_index]['Mono Isotopic Index'])
    -187                
    -188                # Add the confidence score and isotopologue similarity and average MZ error score
    -189                if 'm/z Error Score' in dataframe:
    -190                    mfobj._mass_error_average_score = float(dataframe.iloc[df_index]['m/z Error Score'])
    -191                if 'Confidence Score' in dataframe:
    -192                    mfobj._confidence_score = float(dataframe.iloc[df_index]['Confidence Score'])
    -193                if 'Isotopologue Similarity' in dataframe:
    -194                    mfobj._isotopologue_similarity = float(dataframe.iloc[df_index]['Isotopologue Similarity'])
    -195                mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)
    +185                    # Finally, find the isotopologue that matches the formula_dict
    +186                    matched_isos = []
    +187                    for iso in isos:
    +188                        # If match was already found, exit the loop
    +189                        if len(matched_isos) > 0:
    +190                            break
    +191                        else:
    +192                            # Check the atoms match
    +193                            if set(iso.atoms) == set(formula_dict.keys()):
    +194                                # Check the values of the atoms match
    +195                                if all(
    +196                                    [
    +197                                        iso[atom] == formula_dict[atom]
    +198                                        for atom in formula_dict
    +199                                    ]
    +200                                ):
    +201                                    matched_isos.append(iso)
    +202
    +203                    if len(matched_isos) == 0:
    +204                        raise ValueError("No isotopologue matched the formula_dict")
    +205                    mfobj = matched_isos[0]
    +206
    +207                    # Add the mono isotopic index, confidence score and isotopologue similarity
    +208                    mfobj.mspeak_index_mono_isotopic = int(
    +209                        dataframe.iloc[df_index]["Mono Isotopic Index"]
    +210                    )
    +211
    +212                # Add the confidence score and isotopologue similarity and average MZ error score
    +213                if "m/z Error Score" in dataframe:
    +214                    mfobj._mass_error_average_score = float(
    +215                        dataframe.iloc[df_index]["m/z Error Score"]
    +216                    )
    +217                if "Confidence Score" in dataframe:
    +218                    mfobj._confidence_score = float(
    +219                        dataframe.iloc[df_index]["Confidence Score"]
    +220                    )
    +221                if "Isotopologue Similarity" in dataframe:
    +222                    mfobj._isotopologue_similarity = float(
    +223                        dataframe.iloc[df_index]["Isotopologue Similarity"]
    +224                    )
    +225                mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)
     

    The ReadCoremsMasslist object reads processed mass list data types and returns the mass spectrum obj with the molecular formula obj

    -

    Only available for centroid mass spectrum type: it will ignore the parameter isCentroid +

    Only available for centroid mass spectrum type: it will ignore the parameter isCentroid Please see MassListBaseClass for more details

    @@ -631,47 +712,53 @@

    -
    25    def get_mass_spectrum(self, loadSettings:bool =True) -> MassSpecCentroid:
    -26        """
    -27        Get the mass spectrum object from the processed mass list data.
    -28
    -29        Parameters
    -30        ----------
    -31        loadSettings : bool, optional
    -32            Whether to load the settings for the mass spectrum. Default is True.
    -33
    -34        Returns
    -35        -------
    -36        MassSpecCentroid
    -37            The mass spectrum object.
    -38
    -39        Raises
    -40        ------
    -41        ValueError
    -42            If the input file is not a valid CoreMS file.
    -43        """
    -44
    -45        dataframe = self.get_dataframe()
    -46
    -47        if not set(['H/C', 'O/C', 'Heteroatom Class', 'Ion Type', 'Is Isotopologue']).issubset(dataframe.columns):
    -48            raise ValueError("%s it is not a valid CoreMS file" % str(self.file_location))
    -49
    -50        self.check_columns(dataframe.columns)
    -51
    -52        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -53
    -54        polarity = dataframe['Ion Charge'].values[0]
    -55
    -56        output_parameters = self.get_output_parameters(polarity)
    -57
    -58        mass_spec_obj = MassSpecCentroid(dataframe.to_dict(orient='list'), output_parameters)
    -59
    -60        if loadSettings is True:
    -61            self.load_settings(mass_spec_obj, output_parameters)
    +            
    26    def get_mass_spectrum(self, loadSettings: bool = True) -> MassSpecCentroid:
    +27        """
    +28        Get the mass spectrum object from the processed mass list data.
    +29
    +30        Parameters
    +31        ----------
    +32        loadSettings : bool, optional
    +33            Whether to load the settings for the mass spectrum. Default is True.
    +34
    +35        Returns
    +36        -------
    +37        MassSpecCentroid
    +38            The mass spectrum object.
    +39
    +40        Raises
    +41        ------
    +42        ValueError
    +43            If the input file is not a valid CoreMS file.
    +44        """
    +45
    +46        dataframe = self.get_dataframe()
    +47
    +48        if not set(
    +49            ["H/C", "O/C", "Heteroatom Class", "Ion Type", "Is Isotopologue"]
    +50        ).issubset(dataframe.columns):
    +51            raise ValueError(
    +52                "%s it is not a valid CoreMS file" % str(self.file_location)
    +53            )
    +54
    +55        self.check_columns(dataframe.columns)
    +56
    +57        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    +58
    +59        polarity = dataframe["Ion Charge"].values[0]
    +60
    +61        output_parameters = self.get_output_parameters(polarity)
     62
    -63        self.add_molecular_formula(mass_spec_obj, dataframe)
    -64
    -65        return mass_spec_obj
    +63        mass_spec_obj = MassSpecCentroid(
    +64            dataframe.to_dict(orient="list"), output_parameters
    +65        )
    +66
    +67        if loadSettings is True:
    +68            self.load_settings(mass_spec_obj, output_parameters)
    +69
    +70        self.add_molecular_formula(mass_spec_obj, dataframe)
    +71
    +72        return mass_spec_obj
     
    @@ -710,135 +797,158 @@
    Raises
    -
     67    def add_molecular_formula(self, mass_spec_obj, dataframe):
    - 68        """
    - 69        Add molecular formula information to the mass spectrum object.
    - 70
    - 71        Parameters
    - 72        ----------
    - 73        mass_spec_obj : MassSpecCentroid
    - 74            The mass spectrum object to add the molecular formula to.
    - 75        dataframe : pandas.DataFrame
    - 76            The processed mass list data.
    - 77            
    - 78        """
    - 79
    - 80        # check if is coreMS file
    - 81        if 'Is Isotopologue' in dataframe:
    - 82            # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp)
    - 83            dataframe = dataframe.reset_index(drop=True)
    +            
     74    def add_molecular_formula(self, mass_spec_obj, dataframe):
    + 75        """
    + 76        Add molecular formula information to the mass spectrum object.
    + 77
    + 78        Parameters
    + 79        ----------
    + 80        mass_spec_obj : MassSpecCentroid
    + 81            The mass spectrum object to add the molecular formula to.
    + 82        dataframe : pandas.DataFrame
    + 83            The processed mass list data.
      84
    - 85            mz_exp_df = dataframe[Labels.mz].astype(float)
    - 86            formula_df = dataframe[dataframe.columns.intersection(Atoms.atoms_order)].copy()
    - 87            formula_df.fillna(0, inplace=True)
    - 88            formula_df.replace(b'nan', 0, inplace=True)
    - 89
    - 90            ion_type_df = dataframe["Ion Type"]
    - 91            ion_charge_df = dataframe["Ion Charge"]
    - 92            is_isotopologue_df = dataframe['Is Isotopologue']
    - 93            if 'Adduct' in dataframe:
    - 94                adduct_df = dataframe['Adduct']
    - 95            else:
    - 96                adduct_df = None
    - 97
    - 98        mass_spec_mz_exp_list = mass_spec_obj.mz_exp
    - 99
    -100        for df_index, mz_exp in enumerate(mz_exp_df):
    -101
    -102            counts = 0
    -103
    -104            ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp))
    -105
    -106            if 'Is Isotopologue' in dataframe:
    -107
    -108                atoms = list(formula_df.columns.astype(str))
    -109                counts = list(formula_df.iloc[df_index].astype(int))
    -110
    -111                formula_dict = dict(zip(atoms, counts))
    -112
    -113                # Drop any atoms with 0 counts
    -114                formula_dict = {atom: formula_dict[atom] for atom in formula_dict if formula_dict[atom] > 0}
    -115
    -116            if sum(counts) > 0:
    -117                ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
    -118                if adduct_df is not None:
    -119                    adduct_atom = str(adduct_df[df_index])
    -120                    if adduct_atom == 'None':
    -121                        adduct_atom = None
    -122                else:
    -123                    adduct_atom = None
    -124
    -125                # If not isotopologue, cast as MolecularFormula
    -126                if not bool(int(is_isotopologue_df[df_index])):
    -127                    mfobj = MolecularFormula(
    -128                        formula_dict, int(ion_charge_df[df_index]), 
    -129                        mspeak_parent=mass_spec_obj[ms_peak_index] , 
    -130                        ion_type=ion_type, adduct_atom=adduct_atom
    -131                        )
    -132                    
    -133                # if is isotopologue, recast as MolecularFormulaIsotopologue
    -134                if bool(int(is_isotopologue_df[df_index])):
    + 85        """
    + 86
    + 87        # check if is coreMS file
    + 88        if "Is Isotopologue" in dataframe:
    + 89            # Reindex dataframe to row index to avoid issues with duplicated indexes (e.g. when multiple formula map to single mz_exp)
    + 90            dataframe = dataframe.reset_index(drop=True)
    + 91
    + 92            mz_exp_df = dataframe[Labels.mz].astype(float)
    + 93            formula_df = dataframe[
    + 94                dataframe.columns.intersection(Atoms.atoms_order)
    + 95            ].copy()
    + 96            formula_df.fillna(0, inplace=True)
    + 97            formula_df.replace(b"nan", 0, inplace=True)
    + 98
    + 99            ion_type_df = dataframe["Ion Type"]
    +100            ion_charge_df = dataframe["Ion Charge"]
    +101            is_isotopologue_df = dataframe["Is Isotopologue"]
    +102            if "Adduct" in dataframe:
    +103                adduct_df = dataframe["Adduct"]
    +104            else:
    +105                adduct_df = None
    +106
    +107        mass_spec_mz_exp_list = mass_spec_obj.mz_exp
    +108
    +109        for df_index, mz_exp in enumerate(mz_exp_df):
    +110            counts = 0
    +111
    +112            ms_peak_index = list(mass_spec_mz_exp_list).index(float(mz_exp))
    +113
    +114            if "Is Isotopologue" in dataframe:
    +115                atoms = list(formula_df.columns.astype(str))
    +116                counts = list(formula_df.iloc[df_index].astype(int))
    +117
    +118                formula_dict = dict(zip(atoms, counts))
    +119
    +120                # Drop any atoms with 0 counts
    +121                formula_dict = {
    +122                    atom: formula_dict[atom]
    +123                    for atom in formula_dict
    +124                    if formula_dict[atom] > 0
    +125                }
    +126
    +127            if sum(counts) > 0:
    +128                ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
    +129                if adduct_df is not None:
    +130                    adduct_atom = str(adduct_df[df_index])
    +131                    if adduct_atom == "None":
    +132                        adduct_atom = None
    +133                else:
    +134                    adduct_atom = None
     135
    -136                    # First make a MolecularFormula object for the parent so we can get probabilities etc
    -137                    formula_list_parent = {}
    -138                    for atom in formula_dict:
    -139                        if atom in Atoms.isotopes.keys():
    -140                            formula_list_parent[atom] = formula_dict[atom]
    -141                        else:
    -142                            # remove any numbers from the atom name to cast as a mono-isotopic atom
    -143                            atom_mono = atom.strip('0123456789')
    -144                            if atom_mono in Atoms.isotopes.keys() and atom_mono in formula_list_parent.keys():
    -145                                formula_list_parent[atom_mono] = formula_list_parent[atom_mono]+formula_dict[atom]
    -146                            elif atom_mono in Atoms.isotopes.keys():
    -147                                formula_list_parent[atom_mono] = formula_dict[atom]
    -148                            else:
    -149                                warnings.warn(f"Atom {atom} not in Atoms.atoms_order")
    -150                    mono_index = int(dataframe.iloc[df_index]['Mono Isotopic Index'])
    -151                    mono_mfobj = MolecularFormula(
    -152                        formula_list_parent, 
    -153                        int(ion_charge_df[df_index]), 
    -154                        mspeak_parent=mass_spec_obj[mono_index], 
    -155                        ion_type=ion_type, 
    -156                        adduct_atom=adduct_atom
    -157                        )
    -158                    
    -159                    # Next, generate isotopologues from the parent
    -160                    isos = list(
    -161                        mono_mfobj.isotopologues(
    -162                        min_abundance = mass_spec_obj[df_index].abundance*0.001, 
    -163                        current_mono_abundance = mass_spec_obj[mono_index].abundance, 
    -164                        dynamic_range = mass_spec_obj.dynamic_range
    -165                         )
    -166                    )
    -167
    -168                    # Finally, find the isotopologue that matches the formula_dict
    -169                    matched_isos = []
    -170                    for iso in isos:
    -171                        # If match was already found, exit the loop
    -172                        if len(matched_isos) > 0:
    -173                            break
    -174                        else:
    -175                            # Check the atoms match
    -176                            if set(iso.atoms) == set(formula_dict.keys()):
    -177                                # Check the values of the atoms match
    -178                                if all([iso[atom] == formula_dict[atom] for atom in formula_dict]):
    -179                                    matched_isos.append(iso)
    -180
    -181                    if len(matched_isos) == 0:
    -182                        raise ValueError("No isotopologue matched the formula_dict")
    -183                    mfobj = matched_isos[0]        
    +136                # If not isotopologue, cast as MolecularFormula
    +137                if not bool(int(is_isotopologue_df[df_index])):
    +138                    mfobj = MolecularFormula(
    +139                        formula_dict,
    +140                        int(ion_charge_df[df_index]),
    +141                        mspeak_parent=mass_spec_obj[ms_peak_index],
    +142                        ion_type=ion_type,
    +143                        adduct_atom=adduct_atom,
    +144                    )
    +145
    +146                # if is isotopologue, recast as MolecularFormulaIsotopologue
    +147                if bool(int(is_isotopologue_df[df_index])):
    +148                    # First make a MolecularFormula object for the parent so we can get probabilities etc
    +149                    formula_list_parent = {}
    +150                    for atom in formula_dict:
    +151                        if atom in Atoms.isotopes.keys():
    +152                            formula_list_parent[atom] = formula_dict[atom]
    +153                        else:
    +154                            # remove any numbers from the atom name to cast as a mono-isotopic atom
    +155                            atom_mono = atom.strip("0123456789")
    +156                            if (
    +157                                atom_mono in Atoms.isotopes.keys()
    +158                                and atom_mono in formula_list_parent.keys()
    +159                            ):
    +160                                formula_list_parent[atom_mono] = (
    +161                                    formula_list_parent[atom_mono] + formula_dict[atom]
    +162                                )
    +163                            elif atom_mono in Atoms.isotopes.keys():
    +164                                formula_list_parent[atom_mono] = formula_dict[atom]
    +165                            else:
    +166                                warnings.warn(f"Atom {atom} not in Atoms.atoms_order")
    +167                    mono_index = int(dataframe.iloc[df_index]["Mono Isotopic Index"])
    +168                    mono_mfobj = MolecularFormula(
    +169                        formula_list_parent,
    +170                        int(ion_charge_df[df_index]),
    +171                        mspeak_parent=mass_spec_obj[mono_index],
    +172                        ion_type=ion_type,
    +173                        adduct_atom=adduct_atom,
    +174                    )
    +175
    +176                    # Next, generate isotopologues from the parent
    +177                    isos = list(
    +178                        mono_mfobj.isotopologues(
    +179                            min_abundance=mass_spec_obj[df_index].abundance * 0.001,
    +180                            current_mono_abundance=mass_spec_obj[mono_index].abundance,
    +181                            dynamic_range=mass_spec_obj.dynamic_range,
    +182                        )
    +183                    )
     184
    -185                    # Add the mono isotopic index, confidence score and isotopologue similarity    
    -186                    mfobj.mspeak_index_mono_isotopic = int(dataframe.iloc[df_index]['Mono Isotopic Index'])
    -187                
    -188                # Add the confidence score and isotopologue similarity and average MZ error score
    -189                if 'm/z Error Score' in dataframe:
    -190                    mfobj._mass_error_average_score = float(dataframe.iloc[df_index]['m/z Error Score'])
    -191                if 'Confidence Score' in dataframe:
    -192                    mfobj._confidence_score = float(dataframe.iloc[df_index]['Confidence Score'])
    -193                if 'Isotopologue Similarity' in dataframe:
    -194                    mfobj._isotopologue_similarity = float(dataframe.iloc[df_index]['Isotopologue Similarity'])
    -195                mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)
    +185                    # Finally, find the isotopologue that matches the formula_dict
    +186                    matched_isos = []
    +187                    for iso in isos:
    +188                        # If match was already found, exit the loop
    +189                        if len(matched_isos) > 0:
    +190                            break
    +191                        else:
    +192                            # Check the atoms match
    +193                            if set(iso.atoms) == set(formula_dict.keys()):
    +194                                # Check the values of the atoms match
    +195                                if all(
    +196                                    [
    +197                                        iso[atom] == formula_dict[atom]
    +198                                        for atom in formula_dict
    +199                                    ]
    +200                                ):
    +201                                    matched_isos.append(iso)
    +202
    +203                    if len(matched_isos) == 0:
    +204                        raise ValueError("No isotopologue matched the formula_dict")
    +205                    mfobj = matched_isos[0]
    +206
    +207                    # Add the mono isotopic index, confidence score and isotopologue similarity
    +208                    mfobj.mspeak_index_mono_isotopic = int(
    +209                        dataframe.iloc[df_index]["Mono Isotopic Index"]
    +210                    )
    +211
    +212                # Add the confidence score and isotopologue similarity and average MZ error score
    +213                if "m/z Error Score" in dataframe:
    +214                    mfobj._mass_error_average_score = float(
    +215                        dataframe.iloc[df_index]["m/z Error Score"]
    +216                    )
    +217                if "Confidence Score" in dataframe:
    +218                    mfobj._confidence_score = float(
    +219                        dataframe.iloc[df_index]["Confidence Score"]
    +220                    )
    +221                if "Isotopologue Similarity" in dataframe:
    +222                    mfobj._isotopologue_similarity = float(
    +223                        dataframe.iloc[df_index]["Isotopologue Similarity"]
    +224                    )
    +225                mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)
     
    @@ -899,83 +1009,95 @@
    Inherited Members
    -
    198class ReadMassList(MassListBaseClass):
    -199    """
    -200    The ReadMassList object reads unprocessed mass list data types
    -201    and returns the mass spectrum object.
    -202
    -203    Parameters
    -204    ----------
    -205    MassListBaseClass : class
    -206        The base class for reading mass list data types.
    -207
    -208    Methods
    -209    -------
    -210    * get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.
    -211
    -212    """
    -213
    -214    def get_mass_spectrum(self, polarity:int, scan:int=0, auto_process:bool=True, loadSettings:bool=True):
    -215        """
    -216        Reads mass list data types and returns the mass spectrum object.
    -217
    -218        Parameters
    -219        ----------
    -220        polarity : int
    -221            The polarity of the mass spectrum (+1 or -1).
    -222        scan : int, optional
    -223            The scan number of the mass spectrum (default is 0).
    -224        auto_process : bool, optional
    -225            Flag indicating whether to automatically process the mass spectrum (default is True).
    -226        loadSettings : bool, optional
    -227            Flag indicating whether to load settings for the mass spectrum (default is True).
    -228
    -229        Returns
    -230        -------
    -231        mass_spec : MassSpecCentroid or MassSpecProfile
    -232            The mass spectrum object.
    -233
    -234        """
    -235
    -236        # delimiter = "  " or " " or  "," or "\t" etc
    +            
    228class ReadMassList(MassListBaseClass):
    +229    """
    +230    The ReadMassList object reads unprocessed mass list data types
    +231    and returns the mass spectrum object.
    +232
    +233    Parameters
    +234    ----------
    +235    MassListBaseClass : class
    +236        The base class for reading mass list data types.
     237
    -238        if self.isCentroid:
    -239
    -240            dataframe = self.get_dataframe()
    +238    Methods
    +239    -------
    +240    * get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.
     241
    -242            self.check_columns(dataframe.columns)
    +242    """
     243
    -244            self.clean_data_frame(dataframe)
    -245
    -246            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -247
    -248            output_parameters = self.get_output_parameters(polarity)
    -249
    -250            mass_spec = MassSpecCentroid(dataframe.to_dict(orient='list'), output_parameters, auto_process=auto_process)
    -251
    -252            if loadSettings:
    -253                self.load_settings(mass_spec, output_parameters)
    -254
    -255            return mass_spec
    -256
    -257        else:
    -258
    -259            dataframe = self.get_dataframe()
    -260
    -261            self.check_columns(dataframe.columns)
    -262
    -263            output_parameters = self.get_output_parameters(polarity)
    +244    def get_mass_spectrum(
    +245        self,
    +246        polarity: int,
    +247        scan: int = 0,
    +248        auto_process: bool = True,
    +249        loadSettings: bool = True,
    +250    ):
    +251        """
    +252        Reads mass list data types and returns the mass spectrum object.
    +253
    +254        Parameters
    +255        ----------
    +256        polarity : int
    +257            The polarity of the mass spectrum (+1 or -1).
    +258        scan : int, optional
    +259            The scan number of the mass spectrum (default is 0).
    +260        auto_process : bool, optional
    +261            Flag indicating whether to automatically process the mass spectrum (default is True).
    +262        loadSettings : bool, optional
    +263            Flag indicating whether to load settings for the mass spectrum (default is True).
     264
    -265            self.clean_data_frame(dataframe)
    -266
    -267            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -268
    -269            mass_spec = MassSpecProfile(dataframe.to_dict(orient='list'), output_parameters, auto_process=auto_process)
    -270
    -271            if loadSettings:
    -272                self.load_settings(mass_spec, output_parameters)
    +265        Returns
    +266        -------
    +267        mass_spec : MassSpecCentroid or MassSpecProfile
    +268            The mass spectrum object.
    +269
    +270        """
    +271
    +272        # delimiter = "  " or " " or  "," or "\t" etc
     273
    -274            return mass_spec
    +274        if self.isCentroid:
    +275            dataframe = self.get_dataframe()
    +276
    +277            self.check_columns(dataframe.columns)
    +278
    +279            self.clean_data_frame(dataframe)
    +280
    +281            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    +282
    +283            output_parameters = self.get_output_parameters(polarity)
    +284
    +285            mass_spec = MassSpecCentroid(
    +286                dataframe.to_dict(orient="list"),
    +287                output_parameters,
    +288                auto_process=auto_process,
    +289            )
    +290
    +291            if loadSettings:
    +292                self.load_settings(mass_spec, output_parameters)
    +293
    +294            return mass_spec
    +295
    +296        else:
    +297            dataframe = self.get_dataframe()
    +298
    +299            self.check_columns(dataframe.columns)
    +300
    +301            output_parameters = self.get_output_parameters(polarity)
    +302
    +303            self.clean_data_frame(dataframe)
    +304
    +305            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    +306
    +307            mass_spec = MassSpecProfile(
    +308                dataframe.to_dict(orient="list"),
    +309                output_parameters,
    +310                auto_process=auto_process,
    +311            )
    +312
    +313            if loadSettings:
    +314                self.load_settings(mass_spec, output_parameters)
    +315
    +316            return mass_spec
     
    @@ -1008,67 +1130,79 @@
    Methods
    -
    214    def get_mass_spectrum(self, polarity:int, scan:int=0, auto_process:bool=True, loadSettings:bool=True):
    -215        """
    -216        Reads mass list data types and returns the mass spectrum object.
    -217
    -218        Parameters
    -219        ----------
    -220        polarity : int
    -221            The polarity of the mass spectrum (+1 or -1).
    -222        scan : int, optional
    -223            The scan number of the mass spectrum (default is 0).
    -224        auto_process : bool, optional
    -225            Flag indicating whether to automatically process the mass spectrum (default is True).
    -226        loadSettings : bool, optional
    -227            Flag indicating whether to load settings for the mass spectrum (default is True).
    -228
    -229        Returns
    -230        -------
    -231        mass_spec : MassSpecCentroid or MassSpecProfile
    -232            The mass spectrum object.
    -233
    -234        """
    -235
    -236        # delimiter = "  " or " " or  "," or "\t" etc
    -237
    -238        if self.isCentroid:
    -239
    -240            dataframe = self.get_dataframe()
    -241
    -242            self.check_columns(dataframe.columns)
    -243
    -244            self.clean_data_frame(dataframe)
    -245
    -246            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -247
    -248            output_parameters = self.get_output_parameters(polarity)
    -249
    -250            mass_spec = MassSpecCentroid(dataframe.to_dict(orient='list'), output_parameters, auto_process=auto_process)
    -251
    -252            if loadSettings:
    -253                self.load_settings(mass_spec, output_parameters)
    -254
    -255            return mass_spec
    -256
    -257        else:
    -258
    -259            dataframe = self.get_dataframe()
    -260
    -261            self.check_columns(dataframe.columns)
    -262
    -263            output_parameters = self.get_output_parameters(polarity)
    +            
    244    def get_mass_spectrum(
    +245        self,
    +246        polarity: int,
    +247        scan: int = 0,
    +248        auto_process: bool = True,
    +249        loadSettings: bool = True,
    +250    ):
    +251        """
    +252        Reads mass list data types and returns the mass spectrum object.
    +253
    +254        Parameters
    +255        ----------
    +256        polarity : int
    +257            The polarity of the mass spectrum (+1 or -1).
    +258        scan : int, optional
    +259            The scan number of the mass spectrum (default is 0).
    +260        auto_process : bool, optional
    +261            Flag indicating whether to automatically process the mass spectrum (default is True).
    +262        loadSettings : bool, optional
    +263            Flag indicating whether to load settings for the mass spectrum (default is True).
     264
    -265            self.clean_data_frame(dataframe)
    -266
    -267            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -268
    -269            mass_spec = MassSpecProfile(dataframe.to_dict(orient='list'), output_parameters, auto_process=auto_process)
    -270
    -271            if loadSettings:
    -272                self.load_settings(mass_spec, output_parameters)
    +265        Returns
    +266        -------
    +267        mass_spec : MassSpecCentroid or MassSpecProfile
    +268            The mass spectrum object.
    +269
    +270        """
    +271
    +272        # delimiter = "  " or " " or  "," or "\t" etc
     273
    -274            return mass_spec
    +274        if self.isCentroid:
    +275            dataframe = self.get_dataframe()
    +276
    +277            self.check_columns(dataframe.columns)
    +278
    +279            self.clean_data_frame(dataframe)
    +280
    +281            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    +282
    +283            output_parameters = self.get_output_parameters(polarity)
    +284
    +285            mass_spec = MassSpecCentroid(
    +286                dataframe.to_dict(orient="list"),
    +287                output_parameters,
    +288                auto_process=auto_process,
    +289            )
    +290
    +291            if loadSettings:
    +292                self.load_settings(mass_spec, output_parameters)
    +293
    +294            return mass_spec
    +295
    +296        else:
    +297            dataframe = self.get_dataframe()
    +298
    +299            self.check_columns(dataframe.columns)
    +300
    +301            output_parameters = self.get_output_parameters(polarity)
    +302
    +303            self.clean_data_frame(dataframe)
    +304
    +305            dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    +306
    +307            mass_spec = MassSpecProfile(
    +308                dataframe.to_dict(orient="list"),
    +309                output_parameters,
    +310                auto_process=auto_process,
    +311            )
    +312
    +313            if loadSettings:
    +314                self.load_settings(mass_spec, output_parameters)
    +315
    +316            return mass_spec
     
    @@ -1140,63 +1274,73 @@
    Inherited Members
    -
    277class ReadBrukerXMLList(MassListBaseClass):
    -278    """
    -279    The ReadBrukerXMLList object reads Bruker XML objects
    -280    and returns the mass spectrum object.
    -281    See MassListBaseClass for details
    -282
    -283    Parameters
    -284    ----------
    -285    MassListBaseClass : class
    -286        The base class for reading mass list data types and returning the mass spectrum object.
    -287
    -288    Methods
    -289    -------
    -290    * get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.
    -291
    -292    """
    -293
    -294    def get_mass_spectrum(self, polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True):
    -295        """
    -296        Reads mass list data types and returns the mass spectrum object.
    -297
    -298        Parameters
    -299        ----------
    -300        polarity : bool, optional
    -301            The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
    -302        scan : int, optional
    -303            The scan number of the mass spectrum. Default is 0.
    -304        auto_process : bool, optional
    -305            Whether to automatically process the mass spectrum. Default is True.
    -306        loadSettings : bool, optional
    -307            Whether to load the settings for the mass spectrum. Default is True.
    -308
    -309        Returns
    -310        -------
    -311        mass_spec : MassSpecCentroid
    -312            The mass spectrum object representing the centroided mass spectrum.
    -313        """
    -314        # delimiter = "  " or " " or  "," or "\t" etc
    -315
    -316        if polarity == None:
    -317            polarity = self.get_xml_polarity()
    -318        dataframe = self.get_dataframe()
    -319
    -320        self.check_columns(dataframe.columns)
    -321
    -322        self.clean_data_frame(dataframe)
    -323
    -324        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -325
    -326        output_parameters = self.get_output_parameters(polarity)
    -327
    -328        mass_spec = MassSpecCentroid(dataframe.to_dict(orient='list'), output_parameters, auto_process=auto_process)
    +            
    319class ReadBrukerXMLList(MassListBaseClass):
    +320    """
    +321    The ReadBrukerXMLList object reads Bruker XML objects
    +322    and returns the mass spectrum object.
    +323    See MassListBaseClass for details
    +324
    +325    Parameters
    +326    ----------
    +327    MassListBaseClass : class
    +328        The base class for reading mass list data types and returning the mass spectrum object.
     329
    -330        if loadSettings:
    -331            self.load_settings(mass_spec, output_parameters)
    -332
    -333        return mass_spec
    +330    Methods
    +331    -------
    +332    * get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.
    +333
    +334    """
    +335
    +336    def get_mass_spectrum(
    +337        self,
    +338        polarity: bool = None,
    +339        scan: int = 0,
    +340        auto_process: bool = True,
    +341        loadSettings: bool = True,
    +342    ):
    +343        """
    +344        Reads mass list data types and returns the mass spectrum object.
    +345
    +346        Parameters
    +347        ----------
    +348        polarity : bool, optional
    +349            The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
    +350        scan : int, optional
    +351            The scan number of the mass spectrum. Default is 0.
    +352        auto_process : bool, optional
    +353            Whether to automatically process the mass spectrum. Default is True.
    +354        loadSettings : bool, optional
    +355            Whether to load the settings for the mass spectrum. Default is True.
    +356
    +357        Returns
    +358        -------
    +359        mass_spec : MassSpecCentroid
    +360            The mass spectrum object representing the centroided mass spectrum.
    +361        """
    +362        # delimiter = "  " or " " or  "," or "\t" etc
    +363
    +364        if polarity == None:
    +365            polarity = self.get_xml_polarity()
    +366        dataframe = self.get_dataframe()
    +367
    +368        self.check_columns(dataframe.columns)
    +369
    +370        self.clean_data_frame(dataframe)
    +371
    +372        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    +373
    +374        output_parameters = self.get_output_parameters(polarity)
    +375
    +376        mass_spec = MassSpecCentroid(
    +377            dataframe.to_dict(orient="list"),
    +378            output_parameters,
    +379            auto_process=auto_process,
    +380        )
    +381
    +382        if loadSettings:
    +383            self.load_settings(mass_spec, output_parameters)
    +384
    +385        return mass_spec
     
    @@ -1230,46 +1374,56 @@
    Methods
    -
    294    def get_mass_spectrum(self, polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True):
    -295        """
    -296        Reads mass list data types and returns the mass spectrum object.
    -297
    -298        Parameters
    -299        ----------
    -300        polarity : bool, optional
    -301            The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
    -302        scan : int, optional
    -303            The scan number of the mass spectrum. Default is 0.
    -304        auto_process : bool, optional
    -305            Whether to automatically process the mass spectrum. Default is True.
    -306        loadSettings : bool, optional
    -307            Whether to load the settings for the mass spectrum. Default is True.
    -308
    -309        Returns
    -310        -------
    -311        mass_spec : MassSpecCentroid
    -312            The mass spectrum object representing the centroided mass spectrum.
    -313        """
    -314        # delimiter = "  " or " " or  "," or "\t" etc
    -315
    -316        if polarity == None:
    -317            polarity = self.get_xml_polarity()
    -318        dataframe = self.get_dataframe()
    -319
    -320        self.check_columns(dataframe.columns)
    -321
    -322        self.clean_data_frame(dataframe)
    -323
    -324        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    -325
    -326        output_parameters = self.get_output_parameters(polarity)
    -327
    -328        mass_spec = MassSpecCentroid(dataframe.to_dict(orient='list'), output_parameters, auto_process=auto_process)
    -329
    -330        if loadSettings:
    -331            self.load_settings(mass_spec, output_parameters)
    -332
    -333        return mass_spec
    +            
    336    def get_mass_spectrum(
    +337        self,
    +338        polarity: bool = None,
    +339        scan: int = 0,
    +340        auto_process: bool = True,
    +341        loadSettings: bool = True,
    +342    ):
    +343        """
    +344        Reads mass list data types and returns the mass spectrum object.
    +345
    +346        Parameters
    +347        ----------
    +348        polarity : bool, optional
    +349            The polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
    +350        scan : int, optional
    +351            The scan number of the mass spectrum. Default is 0.
    +352        auto_process : bool, optional
    +353            Whether to automatically process the mass spectrum. Default is True.
    +354        loadSettings : bool, optional
    +355            Whether to load the settings for the mass spectrum. Default is True.
    +356
    +357        Returns
    +358        -------
    +359        mass_spec : MassSpecCentroid
    +360            The mass spectrum object representing the centroided mass spectrum.
    +361        """
    +362        # delimiter = "  " or " " or  "," or "\t" etc
    +363
    +364        if polarity == None:
    +365            polarity = self.get_xml_polarity()
    +366        dataframe = self.get_dataframe()
    +367
    +368        self.check_columns(dataframe.columns)
    +369
    +370        self.clean_data_frame(dataframe)
    +371
    +372        dataframe.rename(columns=self.parameters.header_translate, inplace=True)
    +373
    +374        output_parameters = self.get_output_parameters(polarity)
    +375
    +376        mass_spec = MassSpecCentroid(
    +377            dataframe.to_dict(orient="list"),
    +378            output_parameters,
    +379            auto_process=auto_process,
    +380        )
    +381
    +382        if loadSettings:
    +383            self.load_settings(mass_spec, output_parameters)
    +384
    +385        return mass_spec
     
    diff --git a/docs/corems/mass_spectrum/input/numpyArray.html b/docs/corems/mass_spectrum/input/numpyArray.html index af8ebf3b..7eb418d7 100644 --- a/docs/corems/mass_spectrum/input/numpyArray.html +++ b/docs/corems/mass_spectrum/input/numpyArray.html @@ -63,110 +63,136 @@

      1__author__ = "Yuri E. Corilo"
       2__date__ = "Oct 23, 2019"
       3
    -  4from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecProfile, MassSpecCentroid
    +  4from corems.encapsulation.constant import Labels
       5from corems.encapsulation.factory.parameters import default_parameters
    -  6from corems.encapsulation.constant import Labels
    -  7
    -  8def ms_from_array_profile(mz, abundance, dataname:str, polarity:int = -1, auto_process:bool = True, data_type:str = Labels.simulated_profile):
    -  9    """Create a MassSpecProfile object from an array of m/z values and abundance values.
    +  6from corems.mass_spectrum.factory.MassSpectrumClasses import (
    +  7    MassSpecCentroid,
    +  8    MassSpecProfile,
    +  9)
      10
    - 11    Parameters
    - 12    ----------
    - 13    mz : numpy.ndarray
    - 14        Array of m/z values.
    - 15    abundance : numpy.ndarray
    - 16        Array of abundance values.
    - 17    dataname : str
    - 18        Name of the data.
    - 19    polarity : int, optional
    - 20        Polarity of the data. The default is -1.
    - 21    auto_process : bool, optional
    - 22        Flag to automatically process the data. The default is True.
    - 23    data_type : str, optional
    - 24        Type of the data. The default is Labels.simulated_profile.
    - 25
    - 26    Returns
    - 27    -------
    - 28    MassSpecProfile
    - 29        The created MassSpecProfile object.
    - 30    """
    - 31    data_dict = {Labels.mz: mz, Labels.abundance: abundance}
    - 32    
    - 33    output_parameters = get_output_parameters(polarity, dataname)
    - 34    
    - 35    output_parameters[Labels.label] = data_type
    + 11
    + 12def ms_from_array_profile(
    + 13    mz,
    + 14    abundance,
    + 15    dataname: str,
    + 16    polarity: int = -1,
    + 17    auto_process: bool = True,
    + 18    data_type: str = Labels.simulated_profile,
    + 19):
    + 20    """Create a MassSpecProfile object from an array of m/z values and abundance values.
    + 21
    + 22    Parameters
    + 23    ----------
    + 24    mz : numpy.ndarray
    + 25        Array of m/z values.
    + 26    abundance : numpy.ndarray
    + 27        Array of abundance values.
    + 28    dataname : str
    + 29        Name of the data.
    + 30    polarity : int, optional
    + 31        Polarity of the data. The default is -1.
    + 32    auto_process : bool, optional
    + 33        Flag to automatically process the data. The default is True.
    + 34    data_type : str, optional
    + 35        Type of the data. The default is Labels.simulated_profile.
      36
    - 37    ms = MassSpecProfile(data_dict, output_parameters, auto_process=auto_process)
    - 38    
    - 39    return ms
    - 40
    - 41def ms_from_array_centroid(mz, abundance, rp:list[float], s2n:list[float], dataname:str, polarity:int=-1, auto_process:bool=True):
    - 42    """Create a MassSpecCentroid object from an array of m/z values, abundance values, resolution power, and signal-to-noise ratio.
    + 37    Returns
    + 38    -------
    + 39    MassSpecProfile
    + 40        The created MassSpecProfile object.
    + 41    """
    + 42    data_dict = {Labels.mz: mz, Labels.abundance: abundance}
      43
    - 44    Parameters
    - 45    ----------
    - 46    mz : numpy.ndarray
    - 47        Array of m/z values.
    - 48    abundance : numpy.ndarray
    - 49        Array of abundance values.
    - 50    rp : list(float)
    - 51        List of resolving power values.
    - 52    s2n : list(float)
    - 53        List of signal-to-noise ratio values.
    - 54    dataname : str
    - 55        Name of the data.
    - 56    polarity : int, optional
    - 57        Polarity of the data. The default is -1.
    - 58    auto_process : bool, optional
    - 59
    - 60    Returns
    - 61    -------
    - 62    MassSpecCentroid
    - 63        The created MassSpecCentroid object.
    - 64    """
    - 65    data_dict = {Labels.mz: mz, Labels.abundance: abundance, Labels.s2n : s2n, Labels.rp: rp}
    - 66    
    - 67    output_parameters = get_output_parameters(polarity, dataname)
    - 68    output_parameters[Labels.label] = Labels.corems_centroid
    - 69    
    - 70    return MassSpecCentroid(data_dict, output_parameters, auto_process)
    - 71    
    - 72def get_output_parameters(polarity:int, file_location:str):
    - 73    """Generate the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
    - 74
    - 75    Parameters
    - 76    ----------
    - 77    polarity : int
    - 78        Polarity of the data.
    - 79    file_location : str
    - 80        File location.
    - 81
    - 82    Returns
    - 83    -------
    - 84    dict
    - 85        Output parameters.
    - 86    """
    - 87    d_params = default_parameters(file_location)
    - 88    
    - 89    d_params['analyzer'] = 'Generic Simulated'
    - 90
    - 91    d_params['instrument_label'] = 'Generic Simulated'
    - 92
    - 93    d_params["polarity"] = polarity
    - 94    
    - 95    d_params["filename_path"] = file_location
    - 96    
    - 97    d_params["mobility_scan"] = 0
    - 98    
    - 99    d_params["mobility_rt"] = 0
    -100    
    -101    d_params["scan_number"] = 0
    -102    
    -103    d_params["rt"] = 0
    -104
    -105    d_params[Labels.label] = Labels.simulated_profile
    -106    
    -107    return d_params
    + 44    output_parameters = get_output_parameters(polarity, dataname)
    + 45
    + 46    output_parameters[Labels.label] = data_type
    + 47
    + 48    ms = MassSpecProfile(data_dict, output_parameters, auto_process=auto_process)
    + 49
    + 50    return ms
    + 51
    + 52
    + 53def ms_from_array_centroid(
    + 54    mz,
    + 55    abundance,
    + 56    rp: list[float],
    + 57    s2n: list[float],
    + 58    dataname: str,
    + 59    polarity: int = -1,
    + 60    auto_process: bool = True,
    + 61):
    + 62    """Create a MassSpecCentroid object from an array of m/z values, abundance values, resolution power, and signal-to-noise ratio.
    + 63
    + 64    Parameters
    + 65    ----------
    + 66    mz : numpy.ndarray
    + 67        Array of m/z values.
    + 68    abundance : numpy.ndarray
    + 69        Array of abundance values.
    + 70    rp : list(float)
    + 71        List of resolving power values.
    + 72    s2n : list(float)
    + 73        List of signal-to-noise ratio values.
    + 74    dataname : str
    + 75        Name of the data.
    + 76    polarity : int, optional
    + 77        Polarity of the data. The default is -1.
    + 78    auto_process : bool, optional
    + 79
    + 80    Returns
    + 81    -------
    + 82    MassSpecCentroid
    + 83        The created MassSpecCentroid object.
    + 84    """
    + 85    data_dict = {
    + 86        Labels.mz: mz,
    + 87        Labels.abundance: abundance,
    + 88        Labels.s2n: s2n,
    + 89        Labels.rp: rp,
    + 90    }
    + 91
    + 92    output_parameters = get_output_parameters(polarity, dataname)
    + 93    output_parameters[Labels.label] = Labels.corems_centroid
    + 94
    + 95    return MassSpecCentroid(data_dict, output_parameters, auto_process)
    + 96
    + 97
    + 98def get_output_parameters(polarity: int, file_location: str):
    + 99    """Generate the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
    +100
    +101    Parameters
    +102    ----------
    +103    polarity : int
    +104        Polarity of the data.
    +105    file_location : str
    +106        File location.
    +107
    +108    Returns
    +109    -------
    +110    dict
    +111        Output parameters.
    +112    """
    +113    d_params = default_parameters(file_location)
    +114
    +115    d_params["analyzer"] = "Generic Simulated"
    +116
    +117    d_params["instrument_label"] = "Generic Simulated"
    +118
    +119    d_params["polarity"] = polarity
    +120
    +121    d_params["filename_path"] = file_location
    +122
    +123    d_params["mobility_scan"] = 0
    +124
    +125    d_params["mobility_rt"] = 0
    +126
    +127    d_params["scan_number"] = 0
    +128
    +129    d_params["rt"] = 0
    +130
    +131    d_params[Labels.label] = Labels.simulated_profile
    +132
    +133    return d_params
     
    @@ -182,38 +208,45 @@

    -
    10def ms_from_array_profile(mz, abundance, dataname:str, polarity:int = -1, auto_process:bool = True, data_type:str = Labels.simulated_profile):
    -11    """Create a MassSpecProfile object from an array of m/z values and abundance values.
    -12
    -13    Parameters
    -14    ----------
    -15    mz : numpy.ndarray
    -16        Array of m/z values.
    -17    abundance : numpy.ndarray
    -18        Array of abundance values.
    -19    dataname : str
    -20        Name of the data.
    -21    polarity : int, optional
    -22        Polarity of the data. The default is -1.
    -23    auto_process : bool, optional
    -24        Flag to automatically process the data. The default is True.
    -25    data_type : str, optional
    -26        Type of the data. The default is Labels.simulated_profile.
    -27
    -28    Returns
    -29    -------
    -30    MassSpecProfile
    -31        The created MassSpecProfile object.
    -32    """
    -33    data_dict = {Labels.mz: mz, Labels.abundance: abundance}
    -34    
    -35    output_parameters = get_output_parameters(polarity, dataname)
    -36    
    -37    output_parameters[Labels.label] = data_type
    -38
    -39    ms = MassSpecProfile(data_dict, output_parameters, auto_process=auto_process)
    -40    
    -41    return ms
    +            
    13def ms_from_array_profile(
    +14    mz,
    +15    abundance,
    +16    dataname: str,
    +17    polarity: int = -1,
    +18    auto_process: bool = True,
    +19    data_type: str = Labels.simulated_profile,
    +20):
    +21    """Create a MassSpecProfile object from an array of m/z values and abundance values.
    +22
    +23    Parameters
    +24    ----------
    +25    mz : numpy.ndarray
    +26        Array of m/z values.
    +27    abundance : numpy.ndarray
    +28        Array of abundance values.
    +29    dataname : str
    +30        Name of the data.
    +31    polarity : int, optional
    +32        Polarity of the data. The default is -1.
    +33    auto_process : bool, optional
    +34        Flag to automatically process the data. The default is True.
    +35    data_type : str, optional
    +36        Type of the data. The default is Labels.simulated_profile.
    +37
    +38    Returns
    +39    -------
    +40    MassSpecProfile
    +41        The created MassSpecProfile object.
    +42    """
    +43    data_dict = {Labels.mz: mz, Labels.abundance: abundance}
    +44
    +45    output_parameters = get_output_parameters(polarity, dataname)
    +46
    +47    output_parameters[Labels.label] = data_type
    +48
    +49    ms = MassSpecProfile(data_dict, output_parameters, auto_process=auto_process)
    +50
    +51    return ms
     
    @@ -256,36 +289,49 @@
    Returns
    -
    43def ms_from_array_centroid(mz, abundance, rp:list[float], s2n:list[float], dataname:str, polarity:int=-1, auto_process:bool=True):
    -44    """Create a MassSpecCentroid object from an array of m/z values, abundance values, resolution power, and signal-to-noise ratio.
    -45
    -46    Parameters
    -47    ----------
    -48    mz : numpy.ndarray
    -49        Array of m/z values.
    -50    abundance : numpy.ndarray
    -51        Array of abundance values.
    -52    rp : list(float)
    -53        List of resolving power values.
    -54    s2n : list(float)
    -55        List of signal-to-noise ratio values.
    -56    dataname : str
    -57        Name of the data.
    -58    polarity : int, optional
    -59        Polarity of the data. The default is -1.
    -60    auto_process : bool, optional
    -61
    -62    Returns
    -63    -------
    -64    MassSpecCentroid
    -65        The created MassSpecCentroid object.
    -66    """
    -67    data_dict = {Labels.mz: mz, Labels.abundance: abundance, Labels.s2n : s2n, Labels.rp: rp}
    -68    
    -69    output_parameters = get_output_parameters(polarity, dataname)
    -70    output_parameters[Labels.label] = Labels.corems_centroid
    -71    
    -72    return MassSpecCentroid(data_dict, output_parameters, auto_process)
    +            
    54def ms_from_array_centroid(
    +55    mz,
    +56    abundance,
    +57    rp: list[float],
    +58    s2n: list[float],
    +59    dataname: str,
    +60    polarity: int = -1,
    +61    auto_process: bool = True,
    +62):
    +63    """Create a MassSpecCentroid object from an array of m/z values, abundance values, resolution power, and signal-to-noise ratio.
    +64
    +65    Parameters
    +66    ----------
    +67    mz : numpy.ndarray
    +68        Array of m/z values.
    +69    abundance : numpy.ndarray
    +70        Array of abundance values.
    +71    rp : list(float)
    +72        List of resolving power values.
    +73    s2n : list(float)
    +74        List of signal-to-noise ratio values.
    +75    dataname : str
    +76        Name of the data.
    +77    polarity : int, optional
    +78        Polarity of the data. The default is -1.
    +79    auto_process : bool, optional
    +80
    +81    Returns
    +82    -------
    +83    MassSpecCentroid
    +84        The created MassSpecCentroid object.
    +85    """
    +86    data_dict = {
    +87        Labels.mz: mz,
    +88        Labels.abundance: abundance,
    +89        Labels.s2n: s2n,
    +90        Labels.rp: rp,
    +91    }
    +92
    +93    output_parameters = get_output_parameters(polarity, dataname)
    +94    output_parameters[Labels.label] = Labels.corems_centroid
    +95
    +96    return MassSpecCentroid(data_dict, output_parameters, auto_process)
     
    @@ -329,42 +375,42 @@
    Returns
    -
     74def get_output_parameters(polarity:int, file_location:str):
    - 75    """Generate the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
    - 76
    - 77    Parameters
    - 78    ----------
    - 79    polarity : int
    - 80        Polarity of the data.
    - 81    file_location : str
    - 82        File location.
    - 83
    - 84    Returns
    - 85    -------
    - 86    dict
    - 87        Output parameters.
    - 88    """
    - 89    d_params = default_parameters(file_location)
    - 90    
    - 91    d_params['analyzer'] = 'Generic Simulated'
    - 92
    - 93    d_params['instrument_label'] = 'Generic Simulated'
    - 94
    - 95    d_params["polarity"] = polarity
    - 96    
    - 97    d_params["filename_path"] = file_location
    - 98    
    - 99    d_params["mobility_scan"] = 0
    -100    
    -101    d_params["mobility_rt"] = 0
    -102    
    -103    d_params["scan_number"] = 0
    -104    
    -105    d_params["rt"] = 0
    -106
    -107    d_params[Labels.label] = Labels.simulated_profile
    -108    
    -109    return d_params
    +            
     99def get_output_parameters(polarity: int, file_location: str):
    +100    """Generate the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
    +101
    +102    Parameters
    +103    ----------
    +104    polarity : int
    +105        Polarity of the data.
    +106    file_location : str
    +107        File location.
    +108
    +109    Returns
    +110    -------
    +111    dict
    +112        Output parameters.
    +113    """
    +114    d_params = default_parameters(file_location)
    +115
    +116    d_params["analyzer"] = "Generic Simulated"
    +117
    +118    d_params["instrument_label"] = "Generic Simulated"
    +119
    +120    d_params["polarity"] = polarity
    +121
    +122    d_params["filename_path"] = file_location
    +123
    +124    d_params["mobility_scan"] = 0
    +125
    +126    d_params["mobility_rt"] = 0
    +127
    +128    d_params["scan_number"] = 0
    +129
    +130    d_params["rt"] = 0
    +131
    +132    d_params[Labels.label] = Labels.simulated_profile
    +133
    +134    return d_params
     
    diff --git a/docs/corems/mass_spectrum/output/export.html b/docs/corems/mass_spectrum/output/export.html index 2af544b5..1acfccc7 100644 --- a/docs/corems/mass_spectrum/output/export.html +++ b/docs/corems/mass_spectrum/output/export.html @@ -123,747 +123,826 @@

      1__author__ = "Yuri E. Corilo"
       2__date__ = "Nov 11, 2019"
       3
    -  4from threading import Thread
    -  5from pathlib import Path
    -  6
    -  7from numpy import string_, array, NaN, empty
    -  8from pandas import DataFrame
    -  9import json
    +  4import json
    +  5from datetime import datetime, timezone
    +  6from pathlib import Path
    +  7from threading import Thread
    +  8
    +  9import h5py
      10import toml
    - 11
    - 12from corems.encapsulation.constant import Atoms
    - 13from corems.encapsulation.constant import Labels
    - 14from corems.encapsulation.output import parameter_to_dict
    - 15from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecfromFreq
    - 16
    - 17import h5py
    - 18from datetime import datetime, timezone
    - 19
    - 20class HighResMassSpecExport(Thread):
    - 21    """A class for exporting high-resolution mass spectra.
    - 22
    - 23    Parameters
    - 24    ----------
    - 25    out_file_path : str
    - 26        The output file path.
    - 27    mass_spectrum : MassSpectrum
    - 28        The mass spectrum to export.
    - 29    output_type : str, optional
    - 30        The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
    - 31
    - 32    Attributes
    - 33    ----------
    - 34    output_file : Path
    - 35        The output file path.
    - 36    output_type : str
    - 37        The type of output file.
    - 38    mass_spectrum : MassSpectrum
    - 39        The mass spectrum to export.
    - 40    atoms_order_list : list
    - 41        The list of assigned atoms in the order specified by Atoms.atoms_order list.
    - 42    columns_label : list
    - 43        The column labels in order.
    - 44
    - 45    Methods
    - 46    -------
    - 47    * save().
    - 48        Save the mass spectrum data to the output file.
    - 49    * run().
    - 50        Run the export process.
    - 51    * get_pandas_df().
    - 52        Returns the mass spectrum data as a pandas DataFrame.
    - 53    * write_settings(output_path, mass_spectrum).
    - 54        Writes the settings of the mass spectrum to a JSON file.
    - 55    * to_pandas(write_metadata=True).
    - 56        Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    - 57    * to_excel(write_metadata=True).
    - 58        Exports the mass spectrum data to an Excel file.
    - 59    * to_csv(write_metadata=True).
    - 60        Exports the mass spectrum data to a CSV file.
    - 61    * to_json().
    - 62        Exports the mass spectrum data to a JSON string.
    - 63    * to_hdf().
    - 64        Exports the mass spectrum data to an HDF5 file.
    - 65    * parameters_to_toml().
    - 66        Converts the mass spectrum parameters to a TOML string.
    - 67    * parameters_to_json().
    - 68        Converts the mass spectrum parameters to a JSON string.
    - 69    * get_mass_spec_attrs(mass_spectrum).
    - 70        Returns the mass spectrum attributes as a dictionary.
    - 71    * get_all_used_atoms_in_order(mass_spectrum).
    - 72        Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    - 73    * list_dict_to_list(mass_spectrum, is_hdf5=False).
    - 74        Returns the mass spectrum data as a list of dictionaries.
    - 75    * get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False).
    - 76        Returns the mass spectrum data as a list of dictionaries.
    - 77        
    - 78    """  
    - 79
    - 80    def __init__(self, out_file_path, mass_spectrum, output_type='excel'):
    + 11from numpy import NaN, empty
    + 12from pandas import DataFrame
    + 13
    + 14from corems.encapsulation.constant import Atoms, Labels #Labels is accessed in the eval() function
    + 15from corems.encapsulation.output import parameter_to_dict
    + 16from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecfromFreq
    + 17
    + 18
    + 19class HighResMassSpecExport(Thread):
    + 20    """A class for exporting high-resolution mass spectra.
    + 21
    + 22    Parameters
    + 23    ----------
    + 24    out_file_path : str
    + 25        The output file path.
    + 26    mass_spectrum : MassSpectrum
    + 27        The mass spectrum to export.
    + 28    output_type : str, optional
    + 29        The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
    + 30
    + 31    Attributes
    + 32    ----------
    + 33    output_file : Path
    + 34        The output file path.
    + 35    output_type : str
    + 36        The type of output file.
    + 37    mass_spectrum : MassSpectrum
    + 38        The mass spectrum to export.
    + 39    atoms_order_list : list
    + 40        The list of assigned atoms in the order specified by Atoms.atoms_order list.
    + 41    columns_label : list
    + 42        The column labels in order.
    + 43
    + 44    Methods
    + 45    -------
    + 46    * save().
    + 47        Save the mass spectrum data to the output file.
    + 48    * run().
    + 49        Run the export process.
    + 50    * get_pandas_df().
    + 51        Returns the mass spectrum data as a pandas DataFrame.
    + 52    * write_settings(output_path, mass_spectrum).
    + 53        Writes the settings of the mass spectrum to a JSON file.
    + 54    * to_pandas(write_metadata=True).
    + 55        Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    + 56    * to_excel(write_metadata=True).
    + 57        Exports the mass spectrum data to an Excel file.
    + 58    * to_csv(write_metadata=True).
    + 59        Exports the mass spectrum data to a CSV file.
    + 60    * to_json().
    + 61        Exports the mass spectrum data to a JSON string.
    + 62    * to_hdf().
    + 63        Exports the mass spectrum data to an HDF5 file.
    + 64    * parameters_to_toml().
    + 65        Converts the mass spectrum parameters to a TOML string.
    + 66    * parameters_to_json().
    + 67        Converts the mass spectrum parameters to a JSON string.
    + 68    * get_mass_spec_attrs(mass_spectrum).
    + 69        Returns the mass spectrum attributes as a dictionary.
    + 70    * get_all_used_atoms_in_order(mass_spectrum).
    + 71        Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    + 72    * list_dict_to_list(mass_spectrum, is_hdf5=False).
    + 73        Returns the mass spectrum data as a list of dictionaries.
    + 74    * get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False).
    + 75        Returns the mass spectrum data as a list of dictionaries.
    + 76
    + 77    """
    + 78
    + 79    def __init__(self, out_file_path, mass_spectrum, output_type="excel"):
    + 80        Thread.__init__(self)
      81
    - 82        Thread.__init__(self)
    + 82        self.output_file = Path(out_file_path)
      83
    - 84        self.output_file = Path(out_file_path)
    - 85
    - 86        # 'excel', 'csv' or 'pandas'
    - 87        self.output_type = output_type
    + 84        # 'excel', 'csv' or 'pandas'
    + 85        self.output_type = output_type
    + 86
    + 87        self.mass_spectrum = mass_spectrum
      88
    - 89        self.mass_spectrum = mass_spectrum
    - 90
    - 91        # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list
    - 92        self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum)
    + 89        # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list
    + 90        self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum)
    + 91
    + 92        self._init_columns()
      93
    - 94        self._init_columns()
    - 95
    - 96    def _init_columns(self):
    - 97        """Initialize the columns for the mass spectrum output."""
    - 98        # column labels in order
    - 99        self.columns_label = ['Index',
    -100                              'm/z',
    -101                              'Calibrated m/z',
    -102                              'Calculated m/z',
    -103                              'Peak Height',
    -104                              'Peak Area',
    -105                              'Resolving Power',
    -106                              'S/N',
    -107                              'Ion Charge',
    -108                              'm/z Error (ppm)',
    -109                              'm/z Error Score',
    -110                              'Isotopologue Similarity',
    -111                              'Confidence Score',
    -112                              'DBE',
    -113                              'O/C',
    -114                              'H/C',
    -115                              'Heteroatom Class',
    -116                              'Ion Type',
    -117                              'Adduct',
    -118                              'Is Isotopologue',
    -119                              'Mono Isotopic Index',
    -120                              'Molecular Formula'
    -121                              ]
    -122
    -123    @property
    -124    def output_type(self):
    -125        """Returns the output type of the mass spectrum."""
    -126        return self._output_type
    -127
    -128    @output_type.setter
    -129    def output_type(self, output_type):
    -130        output_types = ['excel', 'csv', 'pandas', 'hdf5']
    -131        if output_type in output_types:
    -132            self._output_type = output_type
    -133        else:
    -134            raise TypeError(
    -135                'Supported types are "excel", "csv" or "pandas", %s entered' % output_type)
    -136
    -137    def save(self):
    -138        """Save the mass spectrum data to the output file.
    -139        
    -140        Raises
    -141        ------
    -142        ValueError
    -143            If the output type is not supported.
    -144        """
    -145
    -146        if self.output_type == 'excel':
    -147            self.to_excel()
    -148        elif self.output_type == 'csv':
    -149            self.to_csv()
    -150        elif self.output_type == 'pandas':
    -151            self.to_pandas()
    -152        elif self.output_type == 'hdf5':
    -153            self.to_hdf()
    -154        else:
    -155            raise ValueError(
    -156                "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'" % self.output_type)
    -157
    -158    def run(self):
    -159        """ Run the export process.
    -160        
    -161        This method is called when the thread starts.
    -162        It calls the save method to perform the export."""
    -163        self.save()
    -164
    -165    def get_pandas_df(self, additional_columns=None):
    -166        """Returns the mass spectrum data as a pandas DataFrame.
    -167        
    -168        Parameters
    -169        ----------
    -170        additional_columns : list, optional
    -171            Additional columns to include in the DataFrame. Defaults to None.
    -172            Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
    -173
    -174        Returns
    -175        -------
    -176        DataFrame
    -177            The mass spectrum data as a pandas DataFrame.
    -178        """
    -179        if additional_columns is  not None:
    -180            possible_additional_columns = ['Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)']
    -181            if additional_columns:
    -182                for column in additional_columns:
    -183                    if column not in possible_additional_columns:
    -184                        raise ValueError("Invalid additional column: %s" % column)
    -185            columns = self.columns_label + additional_columns + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -186        else:
    -187            columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -188        dict_data_list = self.get_list_dict_data(self.mass_spectrum, additional_columns=additional_columns)
    -189        df = DataFrame(dict_data_list, columns=columns)
    -190        df.name = self.output_file
    -191        return df
    -192
    -193    def write_settings(self, output_path, mass_spectrum):
    -194        """Writes the settings of the mass spectrum to a JSON file.
    -195        
    -196        Parameters
    -197        ----------
    -198        output_path : str
    -199            The output file path.
    -200        mass_spectrum : MassSpectrum
    -201            The mass spectrum to export.
    -202        """
    -203
    -204        import json
    -205
    -206        dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    + 94    def _init_columns(self):
    + 95        """Initialize the columns for the mass spectrum output."""
    + 96        # column labels in order
    + 97        self.columns_label = [
    + 98            "Index",
    + 99            "m/z",
    +100            "Calibrated m/z",
    +101            "Calculated m/z",
    +102            "Peak Height",
    +103            "Peak Area",
    +104            "Resolving Power",
    +105            "S/N",
    +106            "Ion Charge",
    +107            "m/z Error (ppm)",
    +108            "m/z Error Score",
    +109            "Isotopologue Similarity",
    +110            "Confidence Score",
    +111            "DBE",
    +112            "O/C",
    +113            "H/C",
    +114            "Heteroatom Class",
    +115            "Ion Type",
    +116            "Adduct",
    +117            "Is Isotopologue",
    +118            "Mono Isotopic Index",
    +119            "Molecular Formula",
    +120        ]
    +121
    +122    @property
    +123    def output_type(self):
    +124        """Returns the output type of the mass spectrum."""
    +125        return self._output_type
    +126
    +127    @output_type.setter
    +128    def output_type(self, output_type):
    +129        output_types = ["excel", "csv", "pandas", "hdf5"]
    +130        if output_type in output_types:
    +131            self._output_type = output_type
    +132        else:
    +133            raise TypeError(
    +134                'Supported types are "excel", "csv" or "pandas", %s entered'
    +135                % output_type
    +136            )
    +137
    +138    def save(self):
    +139        """Save the mass spectrum data to the output file.
    +140
    +141        Raises
    +142        ------
    +143        ValueError
    +144            If the output type is not supported.
    +145        """
    +146
    +147        if self.output_type == "excel":
    +148            self.to_excel()
    +149        elif self.output_type == "csv":
    +150            self.to_csv()
    +151        elif self.output_type == "pandas":
    +152            self.to_pandas()
    +153        elif self.output_type == "hdf5":
    +154            self.to_hdf()
    +155        else:
    +156            raise ValueError(
    +157                "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'"
    +158                % self.output_type
    +159            )
    +160
    +161    def run(self):
    +162        """Run the export process.
    +163
    +164        This method is called when the thread starts.
    +165        It calls the save method to perform the export."""
    +166        self.save()
    +167
    +168    def get_pandas_df(self, additional_columns=None):
    +169        """Returns the mass spectrum data as a pandas DataFrame.
    +170
    +171        Parameters
    +172        ----------
    +173        additional_columns : list, optional
    +174            Additional columns to include in the DataFrame. Defaults to None.
    +175            Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
    +176
    +177        Returns
    +178        -------
    +179        DataFrame
    +180            The mass spectrum data as a pandas DataFrame.
    +181        """
    +182        if additional_columns is not None:
    +183            possible_additional_columns = [
    +184                "Aromaticity Index",
    +185                "NOSC",
    +186                "Aromaticity Index (modified)",
    +187            ]
    +188            if additional_columns:
    +189                for column in additional_columns:
    +190                    if column not in possible_additional_columns:
    +191                        raise ValueError("Invalid additional column: %s" % column)
    +192            columns = (
    +193                self.columns_label
    +194                + additional_columns
    +195                + self.get_all_used_atoms_in_order(self.mass_spectrum)
    +196            )
    +197        else:
    +198            columns = self.columns_label + self.get_all_used_atoms_in_order(
    +199                self.mass_spectrum
    +200            )
    +201        dict_data_list = self.get_list_dict_data(
    +202            self.mass_spectrum, additional_columns=additional_columns
    +203        )
    +204        df = DataFrame(dict_data_list, columns=columns)
    +205        df.name = self.output_file
    +206        return df
     207
    -208        dict_setting['MassSpecAttrs'] = self.get_mass_spec_attrs(mass_spectrum)
    -209        dict_setting['analyzer'] = mass_spectrum.analyzer
    -210        dict_setting['instrument_label'] = mass_spectrum.instrument_label
    -211        dict_setting['sample_name'] = mass_spectrum.sample_name
    -212
    -213        with open(output_path.with_suffix('.json'), 'w', encoding='utf8', ) as outfile:
    -214
    -215            output = json.dumps(dict_setting, sort_keys=True, indent=4, separators=(',', ': '))
    -216            outfile.write(output)
    -217
    -218    def to_pandas(self, write_metadata=True):
    -219        """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    -220        
    -221        Parameters
    -222        ----------
    -223        write_metadata : bool, optional
    -224            Whether to write the metadata to a JSON file. Defaults to True.
    -225        """
    -226
    -227        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -228
    -229        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    -230
    -231        df = DataFrame(dict_data_list, columns=columns)
    -232
    -233        df.to_pickle(self.output_file.with_suffix('.pkl'))
    -234
    -235        if write_metadata:
    -236            self.write_settings(self.output_file, self.mass_spectrum)
    +208    def write_settings(self, output_path, mass_spectrum):
    +209        """Writes the settings of the mass spectrum to a JSON file.
    +210
    +211        Parameters
    +212        ----------
    +213        output_path : str
    +214            The output file path.
    +215        mass_spectrum : MassSpectrum
    +216            The mass spectrum to export.
    +217        """
    +218
    +219        import json
    +220
    +221        dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    +222
    +223        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(mass_spectrum)
    +224        dict_setting["analyzer"] = mass_spectrum.analyzer
    +225        dict_setting["instrument_label"] = mass_spectrum.instrument_label
    +226        dict_setting["sample_name"] = mass_spectrum.sample_name
    +227
    +228        with open(
    +229            output_path.with_suffix(".json"),
    +230            "w",
    +231            encoding="utf8",
    +232        ) as outfile:
    +233            output = json.dumps(
    +234                dict_setting, sort_keys=True, indent=4, separators=(",", ": ")
    +235            )
    +236            outfile.write(output)
     237
    -238    def to_excel(self, write_metadata=True):
    -239        """Exports the mass spectrum data to an Excel file.
    -240        
    +238    def to_pandas(self, write_metadata=True):
    +239        """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    +240
     241        Parameters
     242        ----------
     243        write_metadata : bool, optional
     244            Whether to write the metadata to a JSON file. Defaults to True.
     245        """
     246
    -247        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -248
    -249        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +247        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +248            self.mass_spectrum
    +249        )
     250
    -251        df = DataFrame(dict_data_list, columns=columns)
    +251        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
     252
    -253        df.to_excel(self.output_file.with_suffix('.xlsx'))
    +253        df = DataFrame(dict_data_list, columns=columns)
     254
    -255        if write_metadata:
    -256            self.write_settings(self.output_file, self.mass_spectrum)
    -257
    -258    def to_csv(self, write_metadata=True):
    -259        """Exports the mass spectrum data to a CSV file.
    -260
    -261        Parameters
    -262        ----------
    -263        write_metadata : bool, optional
    -264            Whether to write the metadata to a JSON file. Defaults to True.
    -265        """
    -266
    -267        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    +255        df.to_pickle(self.output_file.with_suffix(".pkl"))
    +256
    +257        if write_metadata:
    +258            self.write_settings(self.output_file, self.mass_spectrum)
    +259
    +260    def to_excel(self, write_metadata=True):
    +261        """Exports the mass spectrum data to an Excel file.
    +262
    +263        Parameters
    +264        ----------
    +265        write_metadata : bool, optional
    +266            Whether to write the metadata to a JSON file. Defaults to True.
    +267        """
     268
    -269        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    -270
    -271        import csv
    -272        try:
    -273            with open(self.output_file.with_suffix('.csv'), 'w', newline='') as csvfile:
    -274                writer = csv.DictWriter(csvfile, fieldnames=columns)
    -275                writer.writeheader()
    -276                for data in dict_data_list:
    -277                    writer.writerow(data)
    -278            if write_metadata:
    -279                self.write_settings(self.output_file, self.mass_spectrum)
    -280
    -281        except IOError as ioerror:
    -282            print(ioerror)
    -283
    -284    def to_json(self):
    -285        """Exports the mass spectrum data to a JSON string."""
    -286
    -287        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -288
    -289        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +269        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +270            self.mass_spectrum
    +271        )
    +272
    +273        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +274
    +275        df = DataFrame(dict_data_list, columns=columns)
    +276
    +277        df.to_excel(self.output_file.with_suffix(".xlsx"))
    +278
    +279        if write_metadata:
    +280            self.write_settings(self.output_file, self.mass_spectrum)
    +281
    +282    def to_csv(self, write_metadata=True):
    +283        """Exports the mass spectrum data to a CSV file.
    +284
    +285        Parameters
    +286        ----------
    +287        write_metadata : bool, optional
    +288            Whether to write the metadata to a JSON file. Defaults to True.
    +289        """
     290
    -291        df = DataFrame(dict_data_list, columns=columns)
    -292
    -293        # for key, values in dict_data.items():
    -294        #    if not values: dict_data[key] = NaN
    -295
    -296        # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': '))
    -297        return df.to_json(orient='records')
    +291        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +292            self.mass_spectrum
    +293        )
    +294
    +295        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +296
    +297        import csv
     298
    -299    def add_mass_spectrum_to_hdf5(self, hdf_handle, mass_spectrum, group_key, mass_spectra_group=None, export_raw=True):
    -300        """Adds the mass spectrum data to an HDF5 file.
    -301        
    -302        Parameters
    -303        ----------
    -304        hdf_handle : h5py.File
    -305            The HDF5 file handle.
    -306        mass_spectrum : MassSpectrum
    -307            The mass spectrum to add to the HDF5 file.
    -308        group_key : str
    -309            The group key (where to add the mass spectrum data within the HDF5 file).
    -310        mass_spectra_group : h5py.Group, optional
    -311            The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
    -312        export_raw : bool, optional
    -313            Whether to export the raw data. Defaults to True. 
    -314            If False, only the processed data (peaks) is exported (essentially centroided data).
    -315        """
    -316        if mass_spectra_group is None:
    +299        try:
    +300            with open(self.output_file.with_suffix(".csv"), "w", newline="") as csvfile:
    +301                writer = csv.DictWriter(csvfile, fieldnames=columns)
    +302                writer.writeheader()
    +303                for data in dict_data_list:
    +304                    writer.writerow(data)
    +305            if write_metadata:
    +306                self.write_settings(self.output_file, self.mass_spectrum)
    +307
    +308        except IOError as ioerror:
    +309            print(ioerror)
    +310
    +311    def to_json(self):
    +312        """Exports the mass spectrum data to a JSON string."""
    +313
    +314        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +315            self.mass_spectrum
    +316        )
     317
    -318            # Check if the file has the necessary attributes and add them if not
    -319            # This assumes that if there is a mass_spectra_group, these attributes were already added to the file
    -320            if not hdf_handle.attrs.get("date_utc"):
    -321                timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    -322                hdf_handle.attrs["date_utc"] = timenow
    -323                hdf_handle.attrs["file_name"] = mass_spectrum.filename.name
    -324                hdf_handle.attrs["data_structure"] = "mass_spectrum"
    -325                hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer
    -326                hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label
    -327                hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name
    -328        
    -329        list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True)
    -330
    -331        dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum)
    -332
    -333        setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    -334
    -335        columns_labels = json.dumps(
    -336            self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum),
    -337            sort_keys=False,
    -338            indent=4,
    -339            separators=(",", ": "),
    -340        )
    -341
    -342        group_key = group_key
    -343
    -344        if mass_spectra_group is not None:
    -345            hdf_handle = mass_spectra_group
    -346
    -347        if group_key not in hdf_handle.keys():
    -348            scan_group = hdf_handle.create_group(group_key)
    -349
    -350            # If there is raw data (from profile data) save it
    -351            if not mass_spectrum.is_centroid and export_raw:
    -352                mz_abun_array = empty(
    -353                    shape=(2, len(mass_spectrum.abundance_profile))
    -354                )
    -355
    -356                mz_abun_array[0] = mass_spectrum.abundance_profile
    -357                mz_abun_array[1] = mass_spectrum.mz_exp_profile
    -358
    -359                raw_ms_dataset = scan_group.create_dataset(
    -360                    "raw_ms", data=mz_abun_array, dtype="f8"
    -361                )
    -362
    -363            else:
    -364                #  create empy dataset for missing raw data
    -365                raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8")
    -366
    -367            raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs)
    -368
    -369            if isinstance(mass_spectrum, MassSpecfromFreq):
    -370                raw_ms_dataset.attrs["TransientSetting"] = json.dumps(
    -371                    setting_dicts.get("TransientSetting"),
    -372                    sort_keys=False,
    -373                    indent=4,
    -374                    separators=(",", ": "),
    -375                )
    -376
    -377        else:
    -378            scan_group = hdf_handle.get(group_key)
    -379
    -380        # if there is not processed data len = 0, otherwise len() will return next index
    -381        index_processed_data = str(len(scan_group.keys()))
    -382
    -383        timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    -384
    -385        processed_dset = scan_group.create_dataset(
    -386            index_processed_data, data=list_results
    -387        )
    -388
    -389        processed_dset.attrs["date_utc"] = timenow
    +318        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +319
    +320        df = DataFrame(dict_data_list, columns=columns)
    +321
    +322        # for key, values in dict_data.items():
    +323        #    if not values: dict_data[key] = NaN
    +324
    +325        # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': '))
    +326        return df.to_json(orient="records")
    +327
    +328    def add_mass_spectrum_to_hdf5(
    +329        self,
    +330        hdf_handle,
    +331        mass_spectrum,
    +332        group_key,
    +333        mass_spectra_group=None,
    +334        export_raw=True,
    +335    ):
    +336        """Adds the mass spectrum data to an HDF5 file.
    +337
    +338        Parameters
    +339        ----------
    +340        hdf_handle : h5py.File
    +341            The HDF5 file handle.
    +342        mass_spectrum : MassSpectrum
    +343            The mass spectrum to add to the HDF5 file.
    +344        group_key : str
    +345            The group key (where to add the mass spectrum data within the HDF5 file).
    +346        mass_spectra_group : h5py.Group, optional
    +347            The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
    +348        export_raw : bool, optional
    +349            Whether to export the raw data. Defaults to True.
    +350            If False, only the processed data (peaks) is exported (essentially centroided data).
    +351        """
    +352        if mass_spectra_group is None:
    +353            # Check if the file has the necessary attributes and add them if not
    +354            # This assumes that if there is a mass_spectra_group, these attributes were already added to the file
    +355            if not hdf_handle.attrs.get("date_utc"):
    +356                timenow = str(
    +357                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
    +358                )
    +359                hdf_handle.attrs["date_utc"] = timenow
    +360                hdf_handle.attrs["file_name"] = mass_spectrum.filename.name
    +361                hdf_handle.attrs["data_structure"] = "mass_spectrum"
    +362                hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer
    +363                hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label
    +364                hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name
    +365
    +366        list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True)
    +367
    +368        dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum)
    +369
    +370        setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    +371
    +372        columns_labels = json.dumps(
    +373            self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum),
    +374            sort_keys=False,
    +375            indent=4,
    +376            separators=(",", ": "),
    +377        )
    +378
    +379        group_key = group_key
    +380
    +381        if mass_spectra_group is not None:
    +382            hdf_handle = mass_spectra_group
    +383
    +384        if group_key not in hdf_handle.keys():
    +385            scan_group = hdf_handle.create_group(group_key)
    +386
    +387            # If there is raw data (from profile data) save it
    +388            if not mass_spectrum.is_centroid and export_raw:
    +389                mz_abun_array = empty(shape=(2, len(mass_spectrum.abundance_profile)))
     390
    -391        processed_dset.attrs["ColumnsLabels"] = columns_labels
    -392
    -393        processed_dset.attrs["MoleculaSearchSetting"] = json.dumps(
    -394            setting_dicts.get("MoleculaSearch"),
    -395            sort_keys=False,
    -396            indent=4,
    -397            separators=(",", ": "),
    -398        )
    -399
    -400        processed_dset.attrs["MassSpecPeakSetting"] = json.dumps(
    -401            setting_dicts.get("MassSpecPeak"),
    -402            sort_keys=False,
    -403            indent=4,
    -404            separators=(",", ": "),
    -405        )
    -406
    -407        processed_dset.attrs["MassSpectrumSetting"] = json.dumps(
    -408            setting_dicts.get("MassSpectrum"),
    -409            sort_keys=False,
    -410            indent=4,
    -411            separators=(",", ": "),
    -412        )
    -413
    -414    def to_hdf(self):
    -415        """Exports the mass spectrum data to an HDF5 file."""
    -416
    -417        with h5py.File(self.output_file.with_suffix('.hdf5'), 'a') as hdf_handle:
    -418
    -419            self.add_mass_spectrum_to_hdf5(hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number))
    -420
    -421    def parameters_to_toml(self):
    -422        """Converts the mass spectrum parameters to a TOML string.
    -423        
    -424        Returns
    -425        -------
    -426        str
    -427            The TOML string of the mass spectrum parameters.
    -428        """
    -429        
    -430        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    -431
    -432        dict_setting['MassSpecAttrs'] = self.get_mass_spec_attrs(self.mass_spectrum)
    -433        dict_setting['analyzer'] = self.mass_spectrum.analyzer
    -434        dict_setting['instrument_label'] = self.mass_spectrum.instrument_label
    -435        dict_setting['sample_name'] = self.mass_spectrum.sample_name
    -436
    -437        output = toml.dumps(dict_setting)
    -438        
    -439        return output
    -440
    -441    def parameters_to_json(self):
    -442        """Converts the mass spectrum parameters to a JSON string.
    -443
    -444        Returns
    -445        -------
    -446        str
    -447            The JSON string of the mass spectrum parameters.
    -448        """
    -449
    -450        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    +391                mz_abun_array[0] = mass_spectrum.abundance_profile
    +392                mz_abun_array[1] = mass_spectrum.mz_exp_profile
    +393
    +394                raw_ms_dataset = scan_group.create_dataset(
    +395                    "raw_ms", data=mz_abun_array, dtype="f8"
    +396                )
    +397
    +398            else:
    +399                #  create empy dataset for missing raw data
    +400                raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8")
    +401
    +402            raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs)
    +403
    +404            if isinstance(mass_spectrum, MassSpecfromFreq):
    +405                raw_ms_dataset.attrs["TransientSetting"] = json.dumps(
    +406                    setting_dicts.get("TransientSetting"),
    +407                    sort_keys=False,
    +408                    indent=4,
    +409                    separators=(",", ": "),
    +410                )
    +411
    +412        else:
    +413            scan_group = hdf_handle.get(group_key)
    +414
    +415        # if there is not processed data len = 0, otherwise len() will return next index
    +416        index_processed_data = str(len(scan_group.keys()))
    +417
    +418        timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    +419
    +420        processed_dset = scan_group.create_dataset(
    +421            index_processed_data, data=list_results
    +422        )
    +423
    +424        processed_dset.attrs["date_utc"] = timenow
    +425
    +426        processed_dset.attrs["ColumnsLabels"] = columns_labels
    +427
    +428        processed_dset.attrs["MoleculaSearchSetting"] = json.dumps(
    +429            setting_dicts.get("MoleculaSearch"),
    +430            sort_keys=False,
    +431            indent=4,
    +432            separators=(",", ": "),
    +433        )
    +434
    +435        processed_dset.attrs["MassSpecPeakSetting"] = json.dumps(
    +436            setting_dicts.get("MassSpecPeak"),
    +437            sort_keys=False,
    +438            indent=4,
    +439            separators=(",", ": "),
    +440        )
    +441
    +442        processed_dset.attrs["MassSpectrumSetting"] = json.dumps(
    +443            setting_dicts.get("MassSpectrum"),
    +444            sort_keys=False,
    +445            indent=4,
    +446            separators=(",", ": "),
    +447        )
    +448
    +449    def to_hdf(self):
    +450        """Exports the mass spectrum data to an HDF5 file."""
     451
    -452        dict_setting['MassSpecAttrs'] = self.get_mass_spec_attrs(self.mass_spectrum)
    -453        dict_setting['analyzer'] = self.mass_spectrum.analyzer
    -454        dict_setting['instrument_label'] = self.mass_spectrum.instrument_label
    -455        dict_setting['sample_name'] = self.mass_spectrum.sample_name
    +452        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    +453            self.add_mass_spectrum_to_hdf5(
    +454                hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number)
    +455            )
     456
    -457        output = json.dumps(dict_setting)
    -458
    -459        return output
    -460
    -461    def get_mass_spec_attrs(self, mass_spectrum):
    -462        """Returns the mass spectrum attributes as a dictionary.
    -463        
    -464        Parameters
    -465        ----------
    -466        mass_spectrum : MassSpectrum
    -467            The mass spectrum to export.
    -468            
    -469        Returns
    -470        -------
    -471        dict
    -472            The mass spectrum attributes.
    -473        """
    +457    def parameters_to_toml(self):
    +458        """Converts the mass spectrum parameters to a TOML string.
    +459
    +460        Returns
    +461        -------
    +462        str
    +463            The TOML string of the mass spectrum parameters.
    +464        """
    +465
    +466        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    +467
    +468        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
    +469        dict_setting["analyzer"] = self.mass_spectrum.analyzer
    +470        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
    +471        dict_setting["sample_name"] = self.mass_spectrum.sample_name
    +472
    +473        output = toml.dumps(dict_setting)
     474
    -475        dict_ms_attrs = {}
    -476        dict_ms_attrs['polarity'] = mass_spectrum.polarity
    -477        dict_ms_attrs['rt'] = mass_spectrum.retention_time
    -478        dict_ms_attrs['tic'] = mass_spectrum.tic
    -479        dict_ms_attrs['mobility_scan'] = mass_spectrum.mobility_scan
    -480        dict_ms_attrs['mobility_rt'] = mass_spectrum.mobility_rt
    -481        dict_ms_attrs['Aterm'] = mass_spectrum.Aterm
    -482        dict_ms_attrs['Bterm'] = mass_spectrum.Bterm
    -483        dict_ms_attrs['Cterm'] = mass_spectrum.Cterm
    -484        dict_ms_attrs['baseline_noise'] = mass_spectrum.baseline_noise
    -485        dict_ms_attrs['baseline_noise_std'] = mass_spectrum.baseline_noise_std
    -486
    -487        return dict_ms_attrs
    -488
    -489    def get_all_used_atoms_in_order(self, mass_spectrum):
    -490        """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    -491
    -492        Parameters
    -493        ----------
    -494        mass_spectrum : MassSpectrum
    -495            The mass spectrum to export.
    +475        return output
    +476
    +477    def parameters_to_json(self):
    +478        """Converts the mass spectrum parameters to a JSON string.
    +479
    +480        Returns
    +481        -------
    +482        str
    +483            The JSON string of the mass spectrum parameters.
    +484        """
    +485
    +486        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    +487
    +488        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
    +489        dict_setting["analyzer"] = self.mass_spectrum.analyzer
    +490        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
    +491        dict_setting["sample_name"] = self.mass_spectrum.sample_name
    +492
    +493        output = json.dumps(dict_setting)
    +494
    +495        return output
     496
    -497        Returns
    -498        -------
    -499        list
    -500            The list of assigned atoms in the order specified by Atoms.atoms_order list.
    -501        """
    -502
    -503        atoms_in_order = Atoms.atoms_order
    -504        all_used_atoms = set()
    -505        if mass_spectrum:
    -506            for ms_peak in mass_spectrum:
    -507                if ms_peak:
    -508                    for m_formula in ms_peak:
    -509                        for atom in m_formula.atoms:
    -510                            all_used_atoms.add(atom)
    -511
    -512        def sort_method(atom):
    -513            return [atoms_in_order.index(atom)]
    -514
    -515        return sorted(all_used_atoms, key=sort_method)
    -516
    -517    def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
    -518        """Returns the mass spectrum data as a list of dictionaries.
    -519        
    -520        Parameters
    -521        ----------
    -522        mass_spectrum : MassSpectrum
    -523            The mass spectrum to export.
    -524        is_hdf5 : bool, optional
    -525            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    -526            
    -527        Returns
    -528        -------
    -529        list
    -530            The mass spectrum data as a list of dictionaries.
    -531        """
    +497    def get_mass_spec_attrs(self, mass_spectrum):
    +498        """Returns the mass spectrum attributes as a dictionary.
    +499
    +500        Parameters
    +501        ----------
    +502        mass_spectrum : MassSpectrum
    +503            The mass spectrum to export.
    +504
    +505        Returns
    +506        -------
    +507        dict
    +508            The mass spectrum attributes.
    +509        """
    +510
    +511        dict_ms_attrs = {}
    +512        dict_ms_attrs["polarity"] = mass_spectrum.polarity
    +513        dict_ms_attrs["rt"] = mass_spectrum.retention_time
    +514        dict_ms_attrs["tic"] = mass_spectrum.tic
    +515        dict_ms_attrs["mobility_scan"] = mass_spectrum.mobility_scan
    +516        dict_ms_attrs["mobility_rt"] = mass_spectrum.mobility_rt
    +517        dict_ms_attrs["Aterm"] = mass_spectrum.Aterm
    +518        dict_ms_attrs["Bterm"] = mass_spectrum.Bterm
    +519        dict_ms_attrs["Cterm"] = mass_spectrum.Cterm
    +520        dict_ms_attrs["baseline_noise"] = mass_spectrum.baseline_noise
    +521        dict_ms_attrs["baseline_noise_std"] = mass_spectrum.baseline_noise_std
    +522
    +523        return dict_ms_attrs
    +524
    +525    def get_all_used_atoms_in_order(self, mass_spectrum):
    +526        """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    +527
    +528        Parameters
    +529        ----------
    +530        mass_spectrum : MassSpectrum
    +531            The mass spectrum to export.
     532
    -533        column_labels = self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum)
    -534
    -535        dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5)
    -536
    -537        all_lines = []
    -538        for dict_res in dict_list:
    -539
    -540            result_line = [NaN] * len(column_labels)
    -541
    -542            for label, value in dict_res.items():
    -543
    -544                label_index = column_labels.index(label)
    -545                result_line[label_index] = value
    -546
    -547            all_lines.append(result_line)
    -548
    -549        return all_lines
    +533        Returns
    +534        -------
    +535        list
    +536            The list of assigned atoms in the order specified by Atoms.atoms_order list.
    +537        """
    +538
    +539        atoms_in_order = Atoms.atoms_order
    +540        all_used_atoms = set()
    +541        if mass_spectrum:
    +542            for ms_peak in mass_spectrum:
    +543                if ms_peak:
    +544                    for m_formula in ms_peak:
    +545                        for atom in m_formula.atoms:
    +546                            all_used_atoms.add(atom)
    +547
    +548        def sort_method(atom):
    +549            return [atoms_in_order.index(atom)]
     550
    -551    def get_list_dict_data(self, mass_spectrum, include_no_match=True, include_isotopologues=True,
    -552                           isotopologue_inline=True, no_match_inline=False, is_hdf5=False,
    -553                           additional_columns=None):
    +551        return sorted(all_used_atoms, key=sort_method)
    +552
    +553    def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
     554        """Returns the mass spectrum data as a list of dictionaries.
     555
     556        Parameters
     557        ----------
     558        mass_spectrum : MassSpectrum
     559            The mass spectrum to export.
    -560        include_no_match : bool, optional
    -561            Whether to include unassigned (no match) data. Defaults to True.
    -562        include_isotopologues : bool, optional
    -563            Whether to include isotopologues. Defaults to True.
    -564        isotopologue_inline : bool, optional
    -565            Whether to include isotopologues inline. Defaults to True.
    -566        no_match_inline : bool, optional
    -567            Whether to include unassigned (no match) data inline. Defaults to False.
    -568        is_hdf5 : bool, optional
    -569            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    -570
    -571        Returns
    -572        -------
    -573        list
    -574            The mass spectrum data as a list of dictionaries.
    -575        """
    -576
    -577        dict_data_list = []
    +560        is_hdf5 : bool, optional
    +561            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    +562
    +563        Returns
    +564        -------
    +565        list
    +566            The mass spectrum data as a list of dictionaries.
    +567        """
    +568
    +569        column_labels = self.columns_label + self.get_all_used_atoms_in_order(
    +570            mass_spectrum
    +571        )
    +572
    +573        dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5)
    +574
    +575        all_lines = []
    +576        for dict_res in dict_list:
    +577            result_line = [NaN] * len(column_labels)
     578
    -579        if is_hdf5:
    -580            encode = ".encode('utf-8')"
    -581        else:
    -582            encode = ""
    -583
    -584        def add_no_match_dict_data(index, ms_peak):
    -585            '''
    -586            Export dictionary of mspeak info for unassigned (no match) data
    -587            '''
    -588            dict_result = {'Index': index,
    -589                           'm/z': ms_peak._mz_exp,
    -590                           'Calibrated m/z': ms_peak.mz_exp,
    -591                           'Peak Height': ms_peak.abundance,
    -592                           'Peak Area': ms_peak.area,
    -593                           'Resolving Power': ms_peak.resolving_power,
    -594                           'S/N': ms_peak.signal_to_noise,
    -595                           'Ion Charge': ms_peak.ion_charge,
    -596                           'Heteroatom Class': eval("Labels.unassigned{}".format(encode)),
    -597                           }
    +579            for label, value in dict_res.items():
    +580                label_index = column_labels.index(label)
    +581                result_line[label_index] = value
    +582
    +583            all_lines.append(result_line)
    +584
    +585        return all_lines
    +586
    +587    def get_list_dict_data(
    +588        self,
    +589        mass_spectrum,
    +590        include_no_match=True,
    +591        include_isotopologues=True,
    +592        isotopologue_inline=True,
    +593        no_match_inline=False,
    +594        is_hdf5=False,
    +595        additional_columns=None,
    +596    ):
    +597        """Returns the mass spectrum data as a list of dictionaries.
     598
    -599            dict_data_list.append(dict_result)
    -600
    -601        def add_match_dict_data(index, ms_peak, mformula, additional_columns=None):
    -602            '''
    -603            Export dictionary of mspeak info for assigned (match) data
    -604            '''
    -605            formula_dict = mformula.to_dict()
    -606
    -607            dict_result = {'Index': index,
    -608                           'm/z': ms_peak._mz_exp,
    -609                           'Calibrated m/z': ms_peak.mz_exp,
    -610                           'Calculated m/z': mformula.mz_calc,
    -611                           'Peak Height': ms_peak.abundance,
    -612                           'Peak Area': ms_peak.area,
    -613                           'Resolving Power': ms_peak.resolving_power,
    -614                           'S/N': ms_peak.signal_to_noise,
    -615                           'Ion Charge': ms_peak.ion_charge,
    -616                           'm/z Error (ppm)': mformula.mz_error,
    -617                           'Confidence Score': mformula.confidence_score,
    -618                           'Isotopologue Similarity': mformula.isotopologue_similarity,
    -619                           'm/z Error Score': mformula.average_mz_error_score,
    -620                           'DBE': mformula.dbe,
    -621                           'Heteroatom Class': eval("mformula.class_label{}".format(encode)),
    -622                           'H/C': mformula.H_C,
    -623                           'O/C': mformula.O_C,
    -624                           'Ion Type': eval("mformula.ion_type.lower(){}".format(encode)),
    -625                           'Is Isotopologue': int(mformula.is_isotopologue),
    -626                           'Molecular Formula': eval("mformula.string{}".format(encode))
    -627                           }
    -628            if additional_columns is not None:
    -629                possible_dict = {
    -630                    'Aromaticity Index':mformula.A_I, 
    -631                    'NOSC':mformula.nosc, 
    -632                    'Aromaticity Index (modified)':mformula.A_I_mod
    -633                    }
    -634                for column in additional_columns:
    -635                    dict_result[column] = possible_dict.get(column)
    -636            
    -637            if mformula.adduct_atom:
    -638                dict_result['Adduct'] = eval("mformula.adduct_atom{}".format(encode))
    -639
    -640            if mformula.is_isotopologue:
    -641                dict_result['Mono Isotopic Index'] = mformula.mspeak_index_mono_isotopic
    +599        Parameters
    +600        ----------
    +601        mass_spectrum : MassSpectrum
    +602            The mass spectrum to export.
    +603        include_no_match : bool, optional
    +604            Whether to include unassigned (no match) data. Defaults to True.
    +605        include_isotopologues : bool, optional
    +606            Whether to include isotopologues. Defaults to True.
    +607        isotopologue_inline : bool, optional
    +608            Whether to include isotopologues inline. Defaults to True.
    +609        no_match_inline : bool, optional
    +610            Whether to include unassigned (no match) data inline. Defaults to False.
    +611        is_hdf5 : bool, optional
    +612            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    +613
    +614        Returns
    +615        -------
    +616        list
    +617            The mass spectrum data as a list of dictionaries.
    +618        """
    +619
    +620        dict_data_list = []
    +621
    +622        if is_hdf5:
    +623            encode = ".encode('utf-8')"
    +624        else:
    +625            encode = ""
    +626
    +627        def add_no_match_dict_data(index, ms_peak):
    +628            """
    +629            Export dictionary of mspeak info for unassigned (no match) data
    +630            """
    +631            dict_result = {
    +632                "Index": index,
    +633                "m/z": ms_peak._mz_exp,
    +634                "Calibrated m/z": ms_peak.mz_exp,
    +635                "Peak Height": ms_peak.abundance,
    +636                "Peak Area": ms_peak.area,
    +637                "Resolving Power": ms_peak.resolving_power,
    +638                "S/N": ms_peak.signal_to_noise,
    +639                "Ion Charge": ms_peak.ion_charge,
    +640                "Heteroatom Class": eval("Labels.unassigned{}".format(encode)),
    +641            }
     642
    -643            if self.atoms_order_list is None:
    -644                atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum)
    -645            else:
    -646                atoms_order_list = self.atoms_order_list
    -647
    -648            for atom in atoms_order_list:
    -649                if atom in formula_dict.keys():
    -650                    dict_result[atom] = formula_dict.get(atom)
    -651
    -652            dict_data_list.append(dict_result)
    -653
    -654        score_methods = mass_spectrum.molecular_search_settings.score_methods
    -655        selected_score_method = mass_spectrum.molecular_search_settings.output_score_method
    -656
    -657        if selected_score_method in score_methods:
    -658
    -659            # temp set score method as the one chosen in the output
    -660            current_method = mass_spectrum.molecular_search_settings.score_method
    -661            mass_spectrum.molecular_search_settings.score_method = selected_score_method
    -662
    -663            for index, ms_peak in enumerate(mass_spectrum):
    -664
    -665                # print(ms_peak.mz_exp)
    -666
    -667                if ms_peak:
    -668
    -669                    m_formula = ms_peak.best_molecular_formula_candidate
    -670
    -671                    if m_formula:
    -672
    -673                        if not m_formula.is_isotopologue:
    -674
    -675                            add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -676
    -677                            for iso_mspeak_index, iso_mf_formula in m_formula.mspeak_mf_isotopologues_indexes:
    -678                                iso_ms_peak = mass_spectrum[iso_mspeak_index]
    -679                                add_match_dict_data(iso_mspeak_index, iso_ms_peak, iso_mf_formula, additional_columns=additional_columns)
    -680                else:
    +643            dict_data_list.append(dict_result)
    +644
    +645        def add_match_dict_data(index, ms_peak, mformula, additional_columns=None):
    +646            """
    +647            Export dictionary of mspeak info for assigned (match) data
    +648            """
    +649            formula_dict = mformula.to_dict()
    +650
    +651            dict_result = {
    +652                "Index": index,
    +653                "m/z": ms_peak._mz_exp,
    +654                "Calibrated m/z": ms_peak.mz_exp,
    +655                "Calculated m/z": mformula.mz_calc,
    +656                "Peak Height": ms_peak.abundance,
    +657                "Peak Area": ms_peak.area,
    +658                "Resolving Power": ms_peak.resolving_power,
    +659                "S/N": ms_peak.signal_to_noise,
    +660                "Ion Charge": ms_peak.ion_charge,
    +661                "m/z Error (ppm)": mformula.mz_error,
    +662                "Confidence Score": mformula.confidence_score,
    +663                "Isotopologue Similarity": mformula.isotopologue_similarity,
    +664                "m/z Error Score": mformula.average_mz_error_score,
    +665                "DBE": mformula.dbe,
    +666                "Heteroatom Class": eval("mformula.class_label{}".format(encode)),
    +667                "H/C": mformula.H_C,
    +668                "O/C": mformula.O_C,
    +669                "Ion Type": eval("mformula.ion_type.lower(){}".format(encode)),
    +670                "Is Isotopologue": int(mformula.is_isotopologue),
    +671                "Molecular Formula": eval("mformula.string{}".format(encode)),
    +672            }
    +673            if additional_columns is not None:
    +674                possible_dict = {
    +675                    "Aromaticity Index": mformula.A_I,
    +676                    "NOSC": mformula.nosc,
    +677                    "Aromaticity Index (modified)": mformula.A_I_mod,
    +678                }
    +679                for column in additional_columns:
    +680                    dict_result[column] = possible_dict.get(column)
     681
    -682                    if include_no_match and no_match_inline:
    -683                        add_no_match_dict_data(index, ms_peak)
    +682            if mformula.adduct_atom:
    +683                dict_result["Adduct"] = eval("mformula.adduct_atom{}".format(encode))
     684
    -685            if include_no_match and not no_match_inline:
    -686
    -687                for index, ms_peak in enumerate(mass_spectrum):
    -688                    if not ms_peak:
    -689                        add_no_match_dict_data(index, ms_peak)     
    -690            # reset score method as the one chosen in the output
    -691            mass_spectrum.molecular_search_settings.score_method = current_method
    +685            if mformula.is_isotopologue:
    +686                dict_result["Mono Isotopic Index"] = mformula.mspeak_index_mono_isotopic
    +687
    +688            if self.atoms_order_list is None:
    +689                atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum)
    +690            else:
    +691                atoms_order_list = self.atoms_order_list
     692
    -693        else:
    -694
    -695            for index, ms_peak in enumerate(mass_spectrum):
    +693            for atom in atoms_order_list:
    +694                if atom in formula_dict.keys():
    +695                    dict_result[atom] = formula_dict.get(atom)
     696
    -697                # check if there is a molecular formula candidate for the msPeak
    +697            dict_data_list.append(dict_result)
     698
    -699                if ms_peak:
    -700                    # m_formula = ms_peak.molecular_formula_lowest_error
    -701                    for m_formula in ms_peak:
    -702
    -703                        if mass_spectrum.molecular_search_settings.output_min_score > 0:
    -704
    -705                            if m_formula.confidence_score >= mass_spectrum.molecular_search_settings.output_min_score:
    -706
    -707                                if m_formula.is_isotopologue:  # isotopologues inline
    -708                                    if include_isotopologues and isotopologue_inline:
    -709                                        add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -710                                else:
    -711                                    add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)  # add monoisotopic peak
    -712
    -713                            # cutoff because of low score
    -714                            else:
    -715                                add_no_match_dict_data(index, ms_peak)
    -716
    -717                        else:
    -718                            if m_formula.is_isotopologue:  # isotopologues inline
    -719                                if include_isotopologues and isotopologue_inline:
    -720                                    add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -721                            else:
    -722                                add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)  # add monoisotopic peak
    -723                else:
    -724                    # include not_match
    -725                    if include_no_match and no_match_inline:
    -726                        add_no_match_dict_data(index, ms_peak)
    -727
    -728            if include_isotopologues and not isotopologue_inline:
    -729                for index, ms_peak in enumerate(mass_spectrum):
    -730                    for m_formula in ms_peak:
    -731                        if m_formula.is_isotopologue:
    -732                            if m_formula.confidence_score >= mass_spectrum.molecular_search_settings.output_min_score:
    -733                                add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -734
    -735            if include_no_match and not no_match_inline:
    -736                for index, ms_peak in enumerate(mass_spectrum):
    -737                    if not ms_peak:
    -738                        add_no_match_dict_data(index, ms_peak)
    -739
    -740        # remove duplicated add_match data possibly introduced on the output_score_filter step
    -741        res = []
    -742        [res.append(x) for x in dict_data_list if x not in res]
    -743
    -744        return res
    +699        score_methods = mass_spectrum.molecular_search_settings.score_methods
    +700        selected_score_method = (
    +701            mass_spectrum.molecular_search_settings.output_score_method
    +702        )
    +703
    +704        if selected_score_method in score_methods:
    +705            # temp set score method as the one chosen in the output
    +706            current_method = mass_spectrum.molecular_search_settings.score_method
    +707            mass_spectrum.molecular_search_settings.score_method = selected_score_method
    +708
    +709            for index, ms_peak in enumerate(mass_spectrum):
    +710                # print(ms_peak.mz_exp)
    +711
    +712                if ms_peak:
    +713                    m_formula = ms_peak.best_molecular_formula_candidate
    +714
    +715                    if m_formula:
    +716                        if not m_formula.is_isotopologue:
    +717                            add_match_dict_data(
    +718                                index,
    +719                                ms_peak,
    +720                                m_formula,
    +721                                additional_columns=additional_columns,
    +722                            )
    +723
    +724                            for (
    +725                                iso_mspeak_index,
    +726                                iso_mf_formula,
    +727                            ) in m_formula.mspeak_mf_isotopologues_indexes:
    +728                                iso_ms_peak = mass_spectrum[iso_mspeak_index]
    +729                                add_match_dict_data(
    +730                                    iso_mspeak_index,
    +731                                    iso_ms_peak,
    +732                                    iso_mf_formula,
    +733                                    additional_columns=additional_columns,
    +734                                )
    +735                else:
    +736                    if include_no_match and no_match_inline:
    +737                        add_no_match_dict_data(index, ms_peak)
    +738
    +739            if include_no_match and not no_match_inline:
    +740                for index, ms_peak in enumerate(mass_spectrum):
    +741                    if not ms_peak:
    +742                        add_no_match_dict_data(index, ms_peak)
    +743            # reset score method as the one chosen in the output
    +744            mass_spectrum.molecular_search_settings.score_method = current_method
    +745
    +746        else:
    +747            for index, ms_peak in enumerate(mass_spectrum):
    +748                # check if there is a molecular formula candidate for the msPeak
    +749
    +750                if ms_peak:
    +751                    # m_formula = ms_peak.molecular_formula_lowest_error
    +752                    for m_formula in ms_peak:
    +753                        if mass_spectrum.molecular_search_settings.output_min_score > 0:
    +754                            if (
    +755                                m_formula.confidence_score
    +756                                >= mass_spectrum.molecular_search_settings.output_min_score
    +757                            ):
    +758                                if m_formula.is_isotopologue:  # isotopologues inline
    +759                                    if include_isotopologues and isotopologue_inline:
    +760                                        add_match_dict_data(
    +761                                            index,
    +762                                            ms_peak,
    +763                                            m_formula,
    +764                                            additional_columns=additional_columns,
    +765                                        )
    +766                                else:
    +767                                    add_match_dict_data(
    +768                                        index,
    +769                                        ms_peak,
    +770                                        m_formula,
    +771                                        additional_columns=additional_columns,
    +772                                    )  # add monoisotopic peak
    +773
    +774                            # cutoff because of low score
    +775                            else:
    +776                                add_no_match_dict_data(index, ms_peak)
    +777
    +778                        else:
    +779                            if m_formula.is_isotopologue:  # isotopologues inline
    +780                                if include_isotopologues and isotopologue_inline:
    +781                                    add_match_dict_data(
    +782                                        index,
    +783                                        ms_peak,
    +784                                        m_formula,
    +785                                        additional_columns=additional_columns,
    +786                                    )
    +787                            else:
    +788                                add_match_dict_data(
    +789                                    index,
    +790                                    ms_peak,
    +791                                    m_formula,
    +792                                    additional_columns=additional_columns,
    +793                                )  # add monoisotopic peak
    +794                else:
    +795                    # include not_match
    +796                    if include_no_match and no_match_inline:
    +797                        add_no_match_dict_data(index, ms_peak)
    +798
    +799            if include_isotopologues and not isotopologue_inline:
    +800                for index, ms_peak in enumerate(mass_spectrum):
    +801                    for m_formula in ms_peak:
    +802                        if m_formula.is_isotopologue:
    +803                            if (
    +804                                m_formula.confidence_score
    +805                                >= mass_spectrum.molecular_search_settings.output_min_score
    +806                            ):
    +807                                add_match_dict_data(
    +808                                    index,
    +809                                    ms_peak,
    +810                                    m_formula,
    +811                                    additional_columns=additional_columns,
    +812                                )
    +813
    +814            if include_no_match and not no_match_inline:
    +815                for index, ms_peak in enumerate(mass_spectrum):
    +816                    if not ms_peak:
    +817                        add_no_match_dict_data(index, ms_peak)
    +818
    +819        # remove duplicated add_match data possibly introduced on the output_score_filter step
    +820        res = []
    +821        [res.append(x) for x in dict_data_list if x not in res]
    +822
    +823        return res
     
    @@ -879,731 +958,811 @@

    -
     21class HighResMassSpecExport(Thread):
    - 22    """A class for exporting high-resolution mass spectra.
    - 23
    - 24    Parameters
    - 25    ----------
    - 26    out_file_path : str
    - 27        The output file path.
    - 28    mass_spectrum : MassSpectrum
    - 29        The mass spectrum to export.
    - 30    output_type : str, optional
    - 31        The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
    - 32
    - 33    Attributes
    - 34    ----------
    - 35    output_file : Path
    - 36        The output file path.
    - 37    output_type : str
    - 38        The type of output file.
    - 39    mass_spectrum : MassSpectrum
    - 40        The mass spectrum to export.
    - 41    atoms_order_list : list
    - 42        The list of assigned atoms in the order specified by Atoms.atoms_order list.
    - 43    columns_label : list
    - 44        The column labels in order.
    - 45
    - 46    Methods
    - 47    -------
    - 48    * save().
    - 49        Save the mass spectrum data to the output file.
    - 50    * run().
    - 51        Run the export process.
    - 52    * get_pandas_df().
    - 53        Returns the mass spectrum data as a pandas DataFrame.
    - 54    * write_settings(output_path, mass_spectrum).
    - 55        Writes the settings of the mass spectrum to a JSON file.
    - 56    * to_pandas(write_metadata=True).
    - 57        Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    - 58    * to_excel(write_metadata=True).
    - 59        Exports the mass spectrum data to an Excel file.
    - 60    * to_csv(write_metadata=True).
    - 61        Exports the mass spectrum data to a CSV file.
    - 62    * to_json().
    - 63        Exports the mass spectrum data to a JSON string.
    - 64    * to_hdf().
    - 65        Exports the mass spectrum data to an HDF5 file.
    - 66    * parameters_to_toml().
    - 67        Converts the mass spectrum parameters to a TOML string.
    - 68    * parameters_to_json().
    - 69        Converts the mass spectrum parameters to a JSON string.
    - 70    * get_mass_spec_attrs(mass_spectrum).
    - 71        Returns the mass spectrum attributes as a dictionary.
    - 72    * get_all_used_atoms_in_order(mass_spectrum).
    - 73        Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    - 74    * list_dict_to_list(mass_spectrum, is_hdf5=False).
    - 75        Returns the mass spectrum data as a list of dictionaries.
    - 76    * get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False).
    - 77        Returns the mass spectrum data as a list of dictionaries.
    - 78        
    - 79    """  
    - 80
    - 81    def __init__(self, out_file_path, mass_spectrum, output_type='excel'):
    +            
     20class HighResMassSpecExport(Thread):
    + 21    """A class for exporting high-resolution mass spectra.
    + 22
    + 23    Parameters
    + 24    ----------
    + 25    out_file_path : str
    + 26        The output file path.
    + 27    mass_spectrum : MassSpectrum
    + 28        The mass spectrum to export.
    + 29    output_type : str, optional
    + 30        The type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
    + 31
    + 32    Attributes
    + 33    ----------
    + 34    output_file : Path
    + 35        The output file path.
    + 36    output_type : str
    + 37        The type of output file.
    + 38    mass_spectrum : MassSpectrum
    + 39        The mass spectrum to export.
    + 40    atoms_order_list : list
    + 41        The list of assigned atoms in the order specified by Atoms.atoms_order list.
    + 42    columns_label : list
    + 43        The column labels in order.
    + 44
    + 45    Methods
    + 46    -------
    + 47    * save().
    + 48        Save the mass spectrum data to the output file.
    + 49    * run().
    + 50        Run the export process.
    + 51    * get_pandas_df().
    + 52        Returns the mass spectrum data as a pandas DataFrame.
    + 53    * write_settings(output_path, mass_spectrum).
    + 54        Writes the settings of the mass spectrum to a JSON file.
    + 55    * to_pandas(write_metadata=True).
    + 56        Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    + 57    * to_excel(write_metadata=True).
    + 58        Exports the mass spectrum data to an Excel file.
    + 59    * to_csv(write_metadata=True).
    + 60        Exports the mass spectrum data to a CSV file.
    + 61    * to_json().
    + 62        Exports the mass spectrum data to a JSON string.
    + 63    * to_hdf().
    + 64        Exports the mass spectrum data to an HDF5 file.
    + 65    * parameters_to_toml().
    + 66        Converts the mass spectrum parameters to a TOML string.
    + 67    * parameters_to_json().
    + 68        Converts the mass spectrum parameters to a JSON string.
    + 69    * get_mass_spec_attrs(mass_spectrum).
    + 70        Returns the mass spectrum attributes as a dictionary.
    + 71    * get_all_used_atoms_in_order(mass_spectrum).
    + 72        Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    + 73    * list_dict_to_list(mass_spectrum, is_hdf5=False).
    + 74        Returns the mass spectrum data as a list of dictionaries.
    + 75    * get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False).
    + 76        Returns the mass spectrum data as a list of dictionaries.
    + 77
    + 78    """
    + 79
    + 80    def __init__(self, out_file_path, mass_spectrum, output_type="excel"):
    + 81        Thread.__init__(self)
      82
    - 83        Thread.__init__(self)
    + 83        self.output_file = Path(out_file_path)
      84
    - 85        self.output_file = Path(out_file_path)
    - 86
    - 87        # 'excel', 'csv' or 'pandas'
    - 88        self.output_type = output_type
    + 85        # 'excel', 'csv' or 'pandas'
    + 86        self.output_type = output_type
    + 87
    + 88        self.mass_spectrum = mass_spectrum
      89
    - 90        self.mass_spectrum = mass_spectrum
    - 91
    - 92        # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list
    - 93        self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum)
    + 90        # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list
    + 91        self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum)
    + 92
    + 93        self._init_columns()
      94
    - 95        self._init_columns()
    - 96
    - 97    def _init_columns(self):
    - 98        """Initialize the columns for the mass spectrum output."""
    - 99        # column labels in order
    -100        self.columns_label = ['Index',
    -101                              'm/z',
    -102                              'Calibrated m/z',
    -103                              'Calculated m/z',
    -104                              'Peak Height',
    -105                              'Peak Area',
    -106                              'Resolving Power',
    -107                              'S/N',
    -108                              'Ion Charge',
    -109                              'm/z Error (ppm)',
    -110                              'm/z Error Score',
    -111                              'Isotopologue Similarity',
    -112                              'Confidence Score',
    -113                              'DBE',
    -114                              'O/C',
    -115                              'H/C',
    -116                              'Heteroatom Class',
    -117                              'Ion Type',
    -118                              'Adduct',
    -119                              'Is Isotopologue',
    -120                              'Mono Isotopic Index',
    -121                              'Molecular Formula'
    -122                              ]
    -123
    -124    @property
    -125    def output_type(self):
    -126        """Returns the output type of the mass spectrum."""
    -127        return self._output_type
    -128
    -129    @output_type.setter
    -130    def output_type(self, output_type):
    -131        output_types = ['excel', 'csv', 'pandas', 'hdf5']
    -132        if output_type in output_types:
    -133            self._output_type = output_type
    -134        else:
    -135            raise TypeError(
    -136                'Supported types are "excel", "csv" or "pandas", %s entered' % output_type)
    -137
    -138    def save(self):
    -139        """Save the mass spectrum data to the output file.
    -140        
    -141        Raises
    -142        ------
    -143        ValueError
    -144            If the output type is not supported.
    -145        """
    -146
    -147        if self.output_type == 'excel':
    -148            self.to_excel()
    -149        elif self.output_type == 'csv':
    -150            self.to_csv()
    -151        elif self.output_type == 'pandas':
    -152            self.to_pandas()
    -153        elif self.output_type == 'hdf5':
    -154            self.to_hdf()
    -155        else:
    -156            raise ValueError(
    -157                "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'" % self.output_type)
    -158
    -159    def run(self):
    -160        """ Run the export process.
    -161        
    -162        This method is called when the thread starts.
    -163        It calls the save method to perform the export."""
    -164        self.save()
    -165
    -166    def get_pandas_df(self, additional_columns=None):
    -167        """Returns the mass spectrum data as a pandas DataFrame.
    -168        
    -169        Parameters
    -170        ----------
    -171        additional_columns : list, optional
    -172            Additional columns to include in the DataFrame. Defaults to None.
    -173            Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
    -174
    -175        Returns
    -176        -------
    -177        DataFrame
    -178            The mass spectrum data as a pandas DataFrame.
    -179        """
    -180        if additional_columns is  not None:
    -181            possible_additional_columns = ['Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)']
    -182            if additional_columns:
    -183                for column in additional_columns:
    -184                    if column not in possible_additional_columns:
    -185                        raise ValueError("Invalid additional column: %s" % column)
    -186            columns = self.columns_label + additional_columns + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -187        else:
    -188            columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -189        dict_data_list = self.get_list_dict_data(self.mass_spectrum, additional_columns=additional_columns)
    -190        df = DataFrame(dict_data_list, columns=columns)
    -191        df.name = self.output_file
    -192        return df
    -193
    -194    def write_settings(self, output_path, mass_spectrum):
    -195        """Writes the settings of the mass spectrum to a JSON file.
    -196        
    -197        Parameters
    -198        ----------
    -199        output_path : str
    -200            The output file path.
    -201        mass_spectrum : MassSpectrum
    -202            The mass spectrum to export.
    -203        """
    -204
    -205        import json
    -206
    -207        dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    + 95    def _init_columns(self):
    + 96        """Initialize the columns for the mass spectrum output."""
    + 97        # column labels in order
    + 98        self.columns_label = [
    + 99            "Index",
    +100            "m/z",
    +101            "Calibrated m/z",
    +102            "Calculated m/z",
    +103            "Peak Height",
    +104            "Peak Area",
    +105            "Resolving Power",
    +106            "S/N",
    +107            "Ion Charge",
    +108            "m/z Error (ppm)",
    +109            "m/z Error Score",
    +110            "Isotopologue Similarity",
    +111            "Confidence Score",
    +112            "DBE",
    +113            "O/C",
    +114            "H/C",
    +115            "Heteroatom Class",
    +116            "Ion Type",
    +117            "Adduct",
    +118            "Is Isotopologue",
    +119            "Mono Isotopic Index",
    +120            "Molecular Formula",
    +121        ]
    +122
    +123    @property
    +124    def output_type(self):
    +125        """Returns the output type of the mass spectrum."""
    +126        return self._output_type
    +127
    +128    @output_type.setter
    +129    def output_type(self, output_type):
    +130        output_types = ["excel", "csv", "pandas", "hdf5"]
    +131        if output_type in output_types:
    +132            self._output_type = output_type
    +133        else:
    +134            raise TypeError(
    +135                'Supported types are "excel", "csv" or "pandas", %s entered'
    +136                % output_type
    +137            )
    +138
    +139    def save(self):
    +140        """Save the mass spectrum data to the output file.
    +141
    +142        Raises
    +143        ------
    +144        ValueError
    +145            If the output type is not supported.
    +146        """
    +147
    +148        if self.output_type == "excel":
    +149            self.to_excel()
    +150        elif self.output_type == "csv":
    +151            self.to_csv()
    +152        elif self.output_type == "pandas":
    +153            self.to_pandas()
    +154        elif self.output_type == "hdf5":
    +155            self.to_hdf()
    +156        else:
    +157            raise ValueError(
    +158                "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'"
    +159                % self.output_type
    +160            )
    +161
    +162    def run(self):
    +163        """Run the export process.
    +164
    +165        This method is called when the thread starts.
    +166        It calls the save method to perform the export."""
    +167        self.save()
    +168
    +169    def get_pandas_df(self, additional_columns=None):
    +170        """Returns the mass spectrum data as a pandas DataFrame.
    +171
    +172        Parameters
    +173        ----------
    +174        additional_columns : list, optional
    +175            Additional columns to include in the DataFrame. Defaults to None.
    +176            Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
    +177
    +178        Returns
    +179        -------
    +180        DataFrame
    +181            The mass spectrum data as a pandas DataFrame.
    +182        """
    +183        if additional_columns is not None:
    +184            possible_additional_columns = [
    +185                "Aromaticity Index",
    +186                "NOSC",
    +187                "Aromaticity Index (modified)",
    +188            ]
    +189            if additional_columns:
    +190                for column in additional_columns:
    +191                    if column not in possible_additional_columns:
    +192                        raise ValueError("Invalid additional column: %s" % column)
    +193            columns = (
    +194                self.columns_label
    +195                + additional_columns
    +196                + self.get_all_used_atoms_in_order(self.mass_spectrum)
    +197            )
    +198        else:
    +199            columns = self.columns_label + self.get_all_used_atoms_in_order(
    +200                self.mass_spectrum
    +201            )
    +202        dict_data_list = self.get_list_dict_data(
    +203            self.mass_spectrum, additional_columns=additional_columns
    +204        )
    +205        df = DataFrame(dict_data_list, columns=columns)
    +206        df.name = self.output_file
    +207        return df
     208
    -209        dict_setting['MassSpecAttrs'] = self.get_mass_spec_attrs(mass_spectrum)
    -210        dict_setting['analyzer'] = mass_spectrum.analyzer
    -211        dict_setting['instrument_label'] = mass_spectrum.instrument_label
    -212        dict_setting['sample_name'] = mass_spectrum.sample_name
    -213
    -214        with open(output_path.with_suffix('.json'), 'w', encoding='utf8', ) as outfile:
    -215
    -216            output = json.dumps(dict_setting, sort_keys=True, indent=4, separators=(',', ': '))
    -217            outfile.write(output)
    -218
    -219    def to_pandas(self, write_metadata=True):
    -220        """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    -221        
    -222        Parameters
    -223        ----------
    -224        write_metadata : bool, optional
    -225            Whether to write the metadata to a JSON file. Defaults to True.
    -226        """
    -227
    -228        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -229
    -230        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    -231
    -232        df = DataFrame(dict_data_list, columns=columns)
    -233
    -234        df.to_pickle(self.output_file.with_suffix('.pkl'))
    -235
    -236        if write_metadata:
    -237            self.write_settings(self.output_file, self.mass_spectrum)
    +209    def write_settings(self, output_path, mass_spectrum):
    +210        """Writes the settings of the mass spectrum to a JSON file.
    +211
    +212        Parameters
    +213        ----------
    +214        output_path : str
    +215            The output file path.
    +216        mass_spectrum : MassSpectrum
    +217            The mass spectrum to export.
    +218        """
    +219
    +220        import json
    +221
    +222        dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    +223
    +224        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(mass_spectrum)
    +225        dict_setting["analyzer"] = mass_spectrum.analyzer
    +226        dict_setting["instrument_label"] = mass_spectrum.instrument_label
    +227        dict_setting["sample_name"] = mass_spectrum.sample_name
    +228
    +229        with open(
    +230            output_path.with_suffix(".json"),
    +231            "w",
    +232            encoding="utf8",
    +233        ) as outfile:
    +234            output = json.dumps(
    +235                dict_setting, sort_keys=True, indent=4, separators=(",", ": ")
    +236            )
    +237            outfile.write(output)
     238
    -239    def to_excel(self, write_metadata=True):
    -240        """Exports the mass spectrum data to an Excel file.
    -241        
    +239    def to_pandas(self, write_metadata=True):
    +240        """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    +241
     242        Parameters
     243        ----------
     244        write_metadata : bool, optional
     245            Whether to write the metadata to a JSON file. Defaults to True.
     246        """
     247
    -248        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -249
    -250        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +248        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +249            self.mass_spectrum
    +250        )
     251
    -252        df = DataFrame(dict_data_list, columns=columns)
    +252        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
     253
    -254        df.to_excel(self.output_file.with_suffix('.xlsx'))
    +254        df = DataFrame(dict_data_list, columns=columns)
     255
    -256        if write_metadata:
    -257            self.write_settings(self.output_file, self.mass_spectrum)
    -258
    -259    def to_csv(self, write_metadata=True):
    -260        """Exports the mass spectrum data to a CSV file.
    -261
    -262        Parameters
    -263        ----------
    -264        write_metadata : bool, optional
    -265            Whether to write the metadata to a JSON file. Defaults to True.
    -266        """
    -267
    -268        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    +256        df.to_pickle(self.output_file.with_suffix(".pkl"))
    +257
    +258        if write_metadata:
    +259            self.write_settings(self.output_file, self.mass_spectrum)
    +260
    +261    def to_excel(self, write_metadata=True):
    +262        """Exports the mass spectrum data to an Excel file.
    +263
    +264        Parameters
    +265        ----------
    +266        write_metadata : bool, optional
    +267            Whether to write the metadata to a JSON file. Defaults to True.
    +268        """
     269
    -270        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    -271
    -272        import csv
    -273        try:
    -274            with open(self.output_file.with_suffix('.csv'), 'w', newline='') as csvfile:
    -275                writer = csv.DictWriter(csvfile, fieldnames=columns)
    -276                writer.writeheader()
    -277                for data in dict_data_list:
    -278                    writer.writerow(data)
    -279            if write_metadata:
    -280                self.write_settings(self.output_file, self.mass_spectrum)
    -281
    -282        except IOError as ioerror:
    -283            print(ioerror)
    -284
    -285    def to_json(self):
    -286        """Exports the mass spectrum data to a JSON string."""
    -287
    -288        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -289
    -290        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +270        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +271            self.mass_spectrum
    +272        )
    +273
    +274        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +275
    +276        df = DataFrame(dict_data_list, columns=columns)
    +277
    +278        df.to_excel(self.output_file.with_suffix(".xlsx"))
    +279
    +280        if write_metadata:
    +281            self.write_settings(self.output_file, self.mass_spectrum)
    +282
    +283    def to_csv(self, write_metadata=True):
    +284        """Exports the mass spectrum data to a CSV file.
    +285
    +286        Parameters
    +287        ----------
    +288        write_metadata : bool, optional
    +289            Whether to write the metadata to a JSON file. Defaults to True.
    +290        """
     291
    -292        df = DataFrame(dict_data_list, columns=columns)
    -293
    -294        # for key, values in dict_data.items():
    -295        #    if not values: dict_data[key] = NaN
    -296
    -297        # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': '))
    -298        return df.to_json(orient='records')
    +292        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +293            self.mass_spectrum
    +294        )
    +295
    +296        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +297
    +298        import csv
     299
    -300    def add_mass_spectrum_to_hdf5(self, hdf_handle, mass_spectrum, group_key, mass_spectra_group=None, export_raw=True):
    -301        """Adds the mass spectrum data to an HDF5 file.
    -302        
    -303        Parameters
    -304        ----------
    -305        hdf_handle : h5py.File
    -306            The HDF5 file handle.
    -307        mass_spectrum : MassSpectrum
    -308            The mass spectrum to add to the HDF5 file.
    -309        group_key : str
    -310            The group key (where to add the mass spectrum data within the HDF5 file).
    -311        mass_spectra_group : h5py.Group, optional
    -312            The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
    -313        export_raw : bool, optional
    -314            Whether to export the raw data. Defaults to True. 
    -315            If False, only the processed data (peaks) is exported (essentially centroided data).
    -316        """
    -317        if mass_spectra_group is None:
    +300        try:
    +301            with open(self.output_file.with_suffix(".csv"), "w", newline="") as csvfile:
    +302                writer = csv.DictWriter(csvfile, fieldnames=columns)
    +303                writer.writeheader()
    +304                for data in dict_data_list:
    +305                    writer.writerow(data)
    +306            if write_metadata:
    +307                self.write_settings(self.output_file, self.mass_spectrum)
    +308
    +309        except IOError as ioerror:
    +310            print(ioerror)
    +311
    +312    def to_json(self):
    +313        """Exports the mass spectrum data to a JSON string."""
    +314
    +315        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +316            self.mass_spectrum
    +317        )
     318
    -319            # Check if the file has the necessary attributes and add them if not
    -320            # This assumes that if there is a mass_spectra_group, these attributes were already added to the file
    -321            if not hdf_handle.attrs.get("date_utc"):
    -322                timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    -323                hdf_handle.attrs["date_utc"] = timenow
    -324                hdf_handle.attrs["file_name"] = mass_spectrum.filename.name
    -325                hdf_handle.attrs["data_structure"] = "mass_spectrum"
    -326                hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer
    -327                hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label
    -328                hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name
    -329        
    -330        list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True)
    -331
    -332        dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum)
    -333
    -334        setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    -335
    -336        columns_labels = json.dumps(
    -337            self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum),
    -338            sort_keys=False,
    -339            indent=4,
    -340            separators=(",", ": "),
    -341        )
    -342
    -343        group_key = group_key
    -344
    -345        if mass_spectra_group is not None:
    -346            hdf_handle = mass_spectra_group
    -347
    -348        if group_key not in hdf_handle.keys():
    -349            scan_group = hdf_handle.create_group(group_key)
    -350
    -351            # If there is raw data (from profile data) save it
    -352            if not mass_spectrum.is_centroid and export_raw:
    -353                mz_abun_array = empty(
    -354                    shape=(2, len(mass_spectrum.abundance_profile))
    -355                )
    -356
    -357                mz_abun_array[0] = mass_spectrum.abundance_profile
    -358                mz_abun_array[1] = mass_spectrum.mz_exp_profile
    -359
    -360                raw_ms_dataset = scan_group.create_dataset(
    -361                    "raw_ms", data=mz_abun_array, dtype="f8"
    -362                )
    -363
    -364            else:
    -365                #  create empy dataset for missing raw data
    -366                raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8")
    -367
    -368            raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs)
    -369
    -370            if isinstance(mass_spectrum, MassSpecfromFreq):
    -371                raw_ms_dataset.attrs["TransientSetting"] = json.dumps(
    -372                    setting_dicts.get("TransientSetting"),
    -373                    sort_keys=False,
    -374                    indent=4,
    -375                    separators=(",", ": "),
    -376                )
    -377
    -378        else:
    -379            scan_group = hdf_handle.get(group_key)
    -380
    -381        # if there is not processed data len = 0, otherwise len() will return next index
    -382        index_processed_data = str(len(scan_group.keys()))
    -383
    -384        timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    -385
    -386        processed_dset = scan_group.create_dataset(
    -387            index_processed_data, data=list_results
    -388        )
    -389
    -390        processed_dset.attrs["date_utc"] = timenow
    +319        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +320
    +321        df = DataFrame(dict_data_list, columns=columns)
    +322
    +323        # for key, values in dict_data.items():
    +324        #    if not values: dict_data[key] = NaN
    +325
    +326        # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': '))
    +327        return df.to_json(orient="records")
    +328
    +329    def add_mass_spectrum_to_hdf5(
    +330        self,
    +331        hdf_handle,
    +332        mass_spectrum,
    +333        group_key,
    +334        mass_spectra_group=None,
    +335        export_raw=True,
    +336    ):
    +337        """Adds the mass spectrum data to an HDF5 file.
    +338
    +339        Parameters
    +340        ----------
    +341        hdf_handle : h5py.File
    +342            The HDF5 file handle.
    +343        mass_spectrum : MassSpectrum
    +344            The mass spectrum to add to the HDF5 file.
    +345        group_key : str
    +346            The group key (where to add the mass spectrum data within the HDF5 file).
    +347        mass_spectra_group : h5py.Group, optional
    +348            The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
    +349        export_raw : bool, optional
    +350            Whether to export the raw data. Defaults to True.
    +351            If False, only the processed data (peaks) is exported (essentially centroided data).
    +352        """
    +353        if mass_spectra_group is None:
    +354            # Check if the file has the necessary attributes and add them if not
    +355            # This assumes that if there is a mass_spectra_group, these attributes were already added to the file
    +356            if not hdf_handle.attrs.get("date_utc"):
    +357                timenow = str(
    +358                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
    +359                )
    +360                hdf_handle.attrs["date_utc"] = timenow
    +361                hdf_handle.attrs["file_name"] = mass_spectrum.filename.name
    +362                hdf_handle.attrs["data_structure"] = "mass_spectrum"
    +363                hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer
    +364                hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label
    +365                hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name
    +366
    +367        list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True)
    +368
    +369        dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum)
    +370
    +371        setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    +372
    +373        columns_labels = json.dumps(
    +374            self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum),
    +375            sort_keys=False,
    +376            indent=4,
    +377            separators=(",", ": "),
    +378        )
    +379
    +380        group_key = group_key
    +381
    +382        if mass_spectra_group is not None:
    +383            hdf_handle = mass_spectra_group
    +384
    +385        if group_key not in hdf_handle.keys():
    +386            scan_group = hdf_handle.create_group(group_key)
    +387
    +388            # If there is raw data (from profile data) save it
    +389            if not mass_spectrum.is_centroid and export_raw:
    +390                mz_abun_array = empty(shape=(2, len(mass_spectrum.abundance_profile)))
     391
    -392        processed_dset.attrs["ColumnsLabels"] = columns_labels
    -393
    -394        processed_dset.attrs["MoleculaSearchSetting"] = json.dumps(
    -395            setting_dicts.get("MoleculaSearch"),
    -396            sort_keys=False,
    -397            indent=4,
    -398            separators=(",", ": "),
    -399        )
    -400
    -401        processed_dset.attrs["MassSpecPeakSetting"] = json.dumps(
    -402            setting_dicts.get("MassSpecPeak"),
    -403            sort_keys=False,
    -404            indent=4,
    -405            separators=(",", ": "),
    -406        )
    -407
    -408        processed_dset.attrs["MassSpectrumSetting"] = json.dumps(
    -409            setting_dicts.get("MassSpectrum"),
    -410            sort_keys=False,
    -411            indent=4,
    -412            separators=(",", ": "),
    -413        )
    -414
    -415    def to_hdf(self):
    -416        """Exports the mass spectrum data to an HDF5 file."""
    -417
    -418        with h5py.File(self.output_file.with_suffix('.hdf5'), 'a') as hdf_handle:
    -419
    -420            self.add_mass_spectrum_to_hdf5(hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number))
    -421
    -422    def parameters_to_toml(self):
    -423        """Converts the mass spectrum parameters to a TOML string.
    -424        
    -425        Returns
    -426        -------
    -427        str
    -428            The TOML string of the mass spectrum parameters.
    -429        """
    -430        
    -431        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    -432
    -433        dict_setting['MassSpecAttrs'] = self.get_mass_spec_attrs(self.mass_spectrum)
    -434        dict_setting['analyzer'] = self.mass_spectrum.analyzer
    -435        dict_setting['instrument_label'] = self.mass_spectrum.instrument_label
    -436        dict_setting['sample_name'] = self.mass_spectrum.sample_name
    -437
    -438        output = toml.dumps(dict_setting)
    -439        
    -440        return output
    -441
    -442    def parameters_to_json(self):
    -443        """Converts the mass spectrum parameters to a JSON string.
    -444
    -445        Returns
    -446        -------
    -447        str
    -448            The JSON string of the mass spectrum parameters.
    -449        """
    -450
    -451        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    +392                mz_abun_array[0] = mass_spectrum.abundance_profile
    +393                mz_abun_array[1] = mass_spectrum.mz_exp_profile
    +394
    +395                raw_ms_dataset = scan_group.create_dataset(
    +396                    "raw_ms", data=mz_abun_array, dtype="f8"
    +397                )
    +398
    +399            else:
    +400                #  create empy dataset for missing raw data
    +401                raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8")
    +402
    +403            raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs)
    +404
    +405            if isinstance(mass_spectrum, MassSpecfromFreq):
    +406                raw_ms_dataset.attrs["TransientSetting"] = json.dumps(
    +407                    setting_dicts.get("TransientSetting"),
    +408                    sort_keys=False,
    +409                    indent=4,
    +410                    separators=(",", ": "),
    +411                )
    +412
    +413        else:
    +414            scan_group = hdf_handle.get(group_key)
    +415
    +416        # if there is not processed data len = 0, otherwise len() will return next index
    +417        index_processed_data = str(len(scan_group.keys()))
    +418
    +419        timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    +420
    +421        processed_dset = scan_group.create_dataset(
    +422            index_processed_data, data=list_results
    +423        )
    +424
    +425        processed_dset.attrs["date_utc"] = timenow
    +426
    +427        processed_dset.attrs["ColumnsLabels"] = columns_labels
    +428
    +429        processed_dset.attrs["MoleculaSearchSetting"] = json.dumps(
    +430            setting_dicts.get("MoleculaSearch"),
    +431            sort_keys=False,
    +432            indent=4,
    +433            separators=(",", ": "),
    +434        )
    +435
    +436        processed_dset.attrs["MassSpecPeakSetting"] = json.dumps(
    +437            setting_dicts.get("MassSpecPeak"),
    +438            sort_keys=False,
    +439            indent=4,
    +440            separators=(",", ": "),
    +441        )
    +442
    +443        processed_dset.attrs["MassSpectrumSetting"] = json.dumps(
    +444            setting_dicts.get("MassSpectrum"),
    +445            sort_keys=False,
    +446            indent=4,
    +447            separators=(",", ": "),
    +448        )
    +449
    +450    def to_hdf(self):
    +451        """Exports the mass spectrum data to an HDF5 file."""
     452
    -453        dict_setting['MassSpecAttrs'] = self.get_mass_spec_attrs(self.mass_spectrum)
    -454        dict_setting['analyzer'] = self.mass_spectrum.analyzer
    -455        dict_setting['instrument_label'] = self.mass_spectrum.instrument_label
    -456        dict_setting['sample_name'] = self.mass_spectrum.sample_name
    +453        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    +454            self.add_mass_spectrum_to_hdf5(
    +455                hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number)
    +456            )
     457
    -458        output = json.dumps(dict_setting)
    -459
    -460        return output
    -461
    -462    def get_mass_spec_attrs(self, mass_spectrum):
    -463        """Returns the mass spectrum attributes as a dictionary.
    -464        
    -465        Parameters
    -466        ----------
    -467        mass_spectrum : MassSpectrum
    -468            The mass spectrum to export.
    -469            
    -470        Returns
    -471        -------
    -472        dict
    -473            The mass spectrum attributes.
    -474        """
    +458    def parameters_to_toml(self):
    +459        """Converts the mass spectrum parameters to a TOML string.
    +460
    +461        Returns
    +462        -------
    +463        str
    +464            The TOML string of the mass spectrum parameters.
    +465        """
    +466
    +467        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    +468
    +469        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
    +470        dict_setting["analyzer"] = self.mass_spectrum.analyzer
    +471        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
    +472        dict_setting["sample_name"] = self.mass_spectrum.sample_name
    +473
    +474        output = toml.dumps(dict_setting)
     475
    -476        dict_ms_attrs = {}
    -477        dict_ms_attrs['polarity'] = mass_spectrum.polarity
    -478        dict_ms_attrs['rt'] = mass_spectrum.retention_time
    -479        dict_ms_attrs['tic'] = mass_spectrum.tic
    -480        dict_ms_attrs['mobility_scan'] = mass_spectrum.mobility_scan
    -481        dict_ms_attrs['mobility_rt'] = mass_spectrum.mobility_rt
    -482        dict_ms_attrs['Aterm'] = mass_spectrum.Aterm
    -483        dict_ms_attrs['Bterm'] = mass_spectrum.Bterm
    -484        dict_ms_attrs['Cterm'] = mass_spectrum.Cterm
    -485        dict_ms_attrs['baseline_noise'] = mass_spectrum.baseline_noise
    -486        dict_ms_attrs['baseline_noise_std'] = mass_spectrum.baseline_noise_std
    -487
    -488        return dict_ms_attrs
    -489
    -490    def get_all_used_atoms_in_order(self, mass_spectrum):
    -491        """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    -492
    -493        Parameters
    -494        ----------
    -495        mass_spectrum : MassSpectrum
    -496            The mass spectrum to export.
    +476        return output
    +477
    +478    def parameters_to_json(self):
    +479        """Converts the mass spectrum parameters to a JSON string.
    +480
    +481        Returns
    +482        -------
    +483        str
    +484            The JSON string of the mass spectrum parameters.
    +485        """
    +486
    +487        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    +488
    +489        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
    +490        dict_setting["analyzer"] = self.mass_spectrum.analyzer
    +491        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
    +492        dict_setting["sample_name"] = self.mass_spectrum.sample_name
    +493
    +494        output = json.dumps(dict_setting)
    +495
    +496        return output
     497
    -498        Returns
    -499        -------
    -500        list
    -501            The list of assigned atoms in the order specified by Atoms.atoms_order list.
    -502        """
    -503
    -504        atoms_in_order = Atoms.atoms_order
    -505        all_used_atoms = set()
    -506        if mass_spectrum:
    -507            for ms_peak in mass_spectrum:
    -508                if ms_peak:
    -509                    for m_formula in ms_peak:
    -510                        for atom in m_formula.atoms:
    -511                            all_used_atoms.add(atom)
    -512
    -513        def sort_method(atom):
    -514            return [atoms_in_order.index(atom)]
    -515
    -516        return sorted(all_used_atoms, key=sort_method)
    -517
    -518    def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
    -519        """Returns the mass spectrum data as a list of dictionaries.
    -520        
    -521        Parameters
    -522        ----------
    -523        mass_spectrum : MassSpectrum
    -524            The mass spectrum to export.
    -525        is_hdf5 : bool, optional
    -526            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    -527            
    -528        Returns
    -529        -------
    -530        list
    -531            The mass spectrum data as a list of dictionaries.
    -532        """
    +498    def get_mass_spec_attrs(self, mass_spectrum):
    +499        """Returns the mass spectrum attributes as a dictionary.
    +500
    +501        Parameters
    +502        ----------
    +503        mass_spectrum : MassSpectrum
    +504            The mass spectrum to export.
    +505
    +506        Returns
    +507        -------
    +508        dict
    +509            The mass spectrum attributes.
    +510        """
    +511
    +512        dict_ms_attrs = {}
    +513        dict_ms_attrs["polarity"] = mass_spectrum.polarity
    +514        dict_ms_attrs["rt"] = mass_spectrum.retention_time
    +515        dict_ms_attrs["tic"] = mass_spectrum.tic
    +516        dict_ms_attrs["mobility_scan"] = mass_spectrum.mobility_scan
    +517        dict_ms_attrs["mobility_rt"] = mass_spectrum.mobility_rt
    +518        dict_ms_attrs["Aterm"] = mass_spectrum.Aterm
    +519        dict_ms_attrs["Bterm"] = mass_spectrum.Bterm
    +520        dict_ms_attrs["Cterm"] = mass_spectrum.Cterm
    +521        dict_ms_attrs["baseline_noise"] = mass_spectrum.baseline_noise
    +522        dict_ms_attrs["baseline_noise_std"] = mass_spectrum.baseline_noise_std
    +523
    +524        return dict_ms_attrs
    +525
    +526    def get_all_used_atoms_in_order(self, mass_spectrum):
    +527        """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    +528
    +529        Parameters
    +530        ----------
    +531        mass_spectrum : MassSpectrum
    +532            The mass spectrum to export.
     533
    -534        column_labels = self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum)
    -535
    -536        dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5)
    -537
    -538        all_lines = []
    -539        for dict_res in dict_list:
    -540
    -541            result_line = [NaN] * len(column_labels)
    -542
    -543            for label, value in dict_res.items():
    -544
    -545                label_index = column_labels.index(label)
    -546                result_line[label_index] = value
    -547
    -548            all_lines.append(result_line)
    -549
    -550        return all_lines
    +534        Returns
    +535        -------
    +536        list
    +537            The list of assigned atoms in the order specified by Atoms.atoms_order list.
    +538        """
    +539
    +540        atoms_in_order = Atoms.atoms_order
    +541        all_used_atoms = set()
    +542        if mass_spectrum:
    +543            for ms_peak in mass_spectrum:
    +544                if ms_peak:
    +545                    for m_formula in ms_peak:
    +546                        for atom in m_formula.atoms:
    +547                            all_used_atoms.add(atom)
    +548
    +549        def sort_method(atom):
    +550            return [atoms_in_order.index(atom)]
     551
    -552    def get_list_dict_data(self, mass_spectrum, include_no_match=True, include_isotopologues=True,
    -553                           isotopologue_inline=True, no_match_inline=False, is_hdf5=False,
    -554                           additional_columns=None):
    +552        return sorted(all_used_atoms, key=sort_method)
    +553
    +554    def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
     555        """Returns the mass spectrum data as a list of dictionaries.
     556
     557        Parameters
     558        ----------
     559        mass_spectrum : MassSpectrum
     560            The mass spectrum to export.
    -561        include_no_match : bool, optional
    -562            Whether to include unassigned (no match) data. Defaults to True.
    -563        include_isotopologues : bool, optional
    -564            Whether to include isotopologues. Defaults to True.
    -565        isotopologue_inline : bool, optional
    -566            Whether to include isotopologues inline. Defaults to True.
    -567        no_match_inline : bool, optional
    -568            Whether to include unassigned (no match) data inline. Defaults to False.
    -569        is_hdf5 : bool, optional
    -570            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    -571
    -572        Returns
    -573        -------
    -574        list
    -575            The mass spectrum data as a list of dictionaries.
    -576        """
    -577
    -578        dict_data_list = []
    +561        is_hdf5 : bool, optional
    +562            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    +563
    +564        Returns
    +565        -------
    +566        list
    +567            The mass spectrum data as a list of dictionaries.
    +568        """
    +569
    +570        column_labels = self.columns_label + self.get_all_used_atoms_in_order(
    +571            mass_spectrum
    +572        )
    +573
    +574        dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5)
    +575
    +576        all_lines = []
    +577        for dict_res in dict_list:
    +578            result_line = [NaN] * len(column_labels)
     579
    -580        if is_hdf5:
    -581            encode = ".encode('utf-8')"
    -582        else:
    -583            encode = ""
    -584
    -585        def add_no_match_dict_data(index, ms_peak):
    -586            '''
    -587            Export dictionary of mspeak info for unassigned (no match) data
    -588            '''
    -589            dict_result = {'Index': index,
    -590                           'm/z': ms_peak._mz_exp,
    -591                           'Calibrated m/z': ms_peak.mz_exp,
    -592                           'Peak Height': ms_peak.abundance,
    -593                           'Peak Area': ms_peak.area,
    -594                           'Resolving Power': ms_peak.resolving_power,
    -595                           'S/N': ms_peak.signal_to_noise,
    -596                           'Ion Charge': ms_peak.ion_charge,
    -597                           'Heteroatom Class': eval("Labels.unassigned{}".format(encode)),
    -598                           }
    +580            for label, value in dict_res.items():
    +581                label_index = column_labels.index(label)
    +582                result_line[label_index] = value
    +583
    +584            all_lines.append(result_line)
    +585
    +586        return all_lines
    +587
    +588    def get_list_dict_data(
    +589        self,
    +590        mass_spectrum,
    +591        include_no_match=True,
    +592        include_isotopologues=True,
    +593        isotopologue_inline=True,
    +594        no_match_inline=False,
    +595        is_hdf5=False,
    +596        additional_columns=None,
    +597    ):
    +598        """Returns the mass spectrum data as a list of dictionaries.
     599
    -600            dict_data_list.append(dict_result)
    -601
    -602        def add_match_dict_data(index, ms_peak, mformula, additional_columns=None):
    -603            '''
    -604            Export dictionary of mspeak info for assigned (match) data
    -605            '''
    -606            formula_dict = mformula.to_dict()
    -607
    -608            dict_result = {'Index': index,
    -609                           'm/z': ms_peak._mz_exp,
    -610                           'Calibrated m/z': ms_peak.mz_exp,
    -611                           'Calculated m/z': mformula.mz_calc,
    -612                           'Peak Height': ms_peak.abundance,
    -613                           'Peak Area': ms_peak.area,
    -614                           'Resolving Power': ms_peak.resolving_power,
    -615                           'S/N': ms_peak.signal_to_noise,
    -616                           'Ion Charge': ms_peak.ion_charge,
    -617                           'm/z Error (ppm)': mformula.mz_error,
    -618                           'Confidence Score': mformula.confidence_score,
    -619                           'Isotopologue Similarity': mformula.isotopologue_similarity,
    -620                           'm/z Error Score': mformula.average_mz_error_score,
    -621                           'DBE': mformula.dbe,
    -622                           'Heteroatom Class': eval("mformula.class_label{}".format(encode)),
    -623                           'H/C': mformula.H_C,
    -624                           'O/C': mformula.O_C,
    -625                           'Ion Type': eval("mformula.ion_type.lower(){}".format(encode)),
    -626                           'Is Isotopologue': int(mformula.is_isotopologue),
    -627                           'Molecular Formula': eval("mformula.string{}".format(encode))
    -628                           }
    -629            if additional_columns is not None:
    -630                possible_dict = {
    -631                    'Aromaticity Index':mformula.A_I, 
    -632                    'NOSC':mformula.nosc, 
    -633                    'Aromaticity Index (modified)':mformula.A_I_mod
    -634                    }
    -635                for column in additional_columns:
    -636                    dict_result[column] = possible_dict.get(column)
    -637            
    -638            if mformula.adduct_atom:
    -639                dict_result['Adduct'] = eval("mformula.adduct_atom{}".format(encode))
    -640
    -641            if mformula.is_isotopologue:
    -642                dict_result['Mono Isotopic Index'] = mformula.mspeak_index_mono_isotopic
    +600        Parameters
    +601        ----------
    +602        mass_spectrum : MassSpectrum
    +603            The mass spectrum to export.
    +604        include_no_match : bool, optional
    +605            Whether to include unassigned (no match) data. Defaults to True.
    +606        include_isotopologues : bool, optional
    +607            Whether to include isotopologues. Defaults to True.
    +608        isotopologue_inline : bool, optional
    +609            Whether to include isotopologues inline. Defaults to True.
    +610        no_match_inline : bool, optional
    +611            Whether to include unassigned (no match) data inline. Defaults to False.
    +612        is_hdf5 : bool, optional
    +613            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    +614
    +615        Returns
    +616        -------
    +617        list
    +618            The mass spectrum data as a list of dictionaries.
    +619        """
    +620
    +621        dict_data_list = []
    +622
    +623        if is_hdf5:
    +624            encode = ".encode('utf-8')"
    +625        else:
    +626            encode = ""
    +627
    +628        def add_no_match_dict_data(index, ms_peak):
    +629            """
    +630            Export dictionary of mspeak info for unassigned (no match) data
    +631            """
    +632            dict_result = {
    +633                "Index": index,
    +634                "m/z": ms_peak._mz_exp,
    +635                "Calibrated m/z": ms_peak.mz_exp,
    +636                "Peak Height": ms_peak.abundance,
    +637                "Peak Area": ms_peak.area,
    +638                "Resolving Power": ms_peak.resolving_power,
    +639                "S/N": ms_peak.signal_to_noise,
    +640                "Ion Charge": ms_peak.ion_charge,
    +641                "Heteroatom Class": eval("Labels.unassigned{}".format(encode)),
    +642            }
     643
    -644            if self.atoms_order_list is None:
    -645                atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum)
    -646            else:
    -647                atoms_order_list = self.atoms_order_list
    -648
    -649            for atom in atoms_order_list:
    -650                if atom in formula_dict.keys():
    -651                    dict_result[atom] = formula_dict.get(atom)
    -652
    -653            dict_data_list.append(dict_result)
    -654
    -655        score_methods = mass_spectrum.molecular_search_settings.score_methods
    -656        selected_score_method = mass_spectrum.molecular_search_settings.output_score_method
    -657
    -658        if selected_score_method in score_methods:
    -659
    -660            # temp set score method as the one chosen in the output
    -661            current_method = mass_spectrum.molecular_search_settings.score_method
    -662            mass_spectrum.molecular_search_settings.score_method = selected_score_method
    -663
    -664            for index, ms_peak in enumerate(mass_spectrum):
    -665
    -666                # print(ms_peak.mz_exp)
    -667
    -668                if ms_peak:
    -669
    -670                    m_formula = ms_peak.best_molecular_formula_candidate
    -671
    -672                    if m_formula:
    -673
    -674                        if not m_formula.is_isotopologue:
    -675
    -676                            add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -677
    -678                            for iso_mspeak_index, iso_mf_formula in m_formula.mspeak_mf_isotopologues_indexes:
    -679                                iso_ms_peak = mass_spectrum[iso_mspeak_index]
    -680                                add_match_dict_data(iso_mspeak_index, iso_ms_peak, iso_mf_formula, additional_columns=additional_columns)
    -681                else:
    +644            dict_data_list.append(dict_result)
    +645
    +646        def add_match_dict_data(index, ms_peak, mformula, additional_columns=None):
    +647            """
    +648            Export dictionary of mspeak info for assigned (match) data
    +649            """
    +650            formula_dict = mformula.to_dict()
    +651
    +652            dict_result = {
    +653                "Index": index,
    +654                "m/z": ms_peak._mz_exp,
    +655                "Calibrated m/z": ms_peak.mz_exp,
    +656                "Calculated m/z": mformula.mz_calc,
    +657                "Peak Height": ms_peak.abundance,
    +658                "Peak Area": ms_peak.area,
    +659                "Resolving Power": ms_peak.resolving_power,
    +660                "S/N": ms_peak.signal_to_noise,
    +661                "Ion Charge": ms_peak.ion_charge,
    +662                "m/z Error (ppm)": mformula.mz_error,
    +663                "Confidence Score": mformula.confidence_score,
    +664                "Isotopologue Similarity": mformula.isotopologue_similarity,
    +665                "m/z Error Score": mformula.average_mz_error_score,
    +666                "DBE": mformula.dbe,
    +667                "Heteroatom Class": eval("mformula.class_label{}".format(encode)),
    +668                "H/C": mformula.H_C,
    +669                "O/C": mformula.O_C,
    +670                "Ion Type": eval("mformula.ion_type.lower(){}".format(encode)),
    +671                "Is Isotopologue": int(mformula.is_isotopologue),
    +672                "Molecular Formula": eval("mformula.string{}".format(encode)),
    +673            }
    +674            if additional_columns is not None:
    +675                possible_dict = {
    +676                    "Aromaticity Index": mformula.A_I,
    +677                    "NOSC": mformula.nosc,
    +678                    "Aromaticity Index (modified)": mformula.A_I_mod,
    +679                }
    +680                for column in additional_columns:
    +681                    dict_result[column] = possible_dict.get(column)
     682
    -683                    if include_no_match and no_match_inline:
    -684                        add_no_match_dict_data(index, ms_peak)
    +683            if mformula.adduct_atom:
    +684                dict_result["Adduct"] = eval("mformula.adduct_atom{}".format(encode))
     685
    -686            if include_no_match and not no_match_inline:
    -687
    -688                for index, ms_peak in enumerate(mass_spectrum):
    -689                    if not ms_peak:
    -690                        add_no_match_dict_data(index, ms_peak)     
    -691            # reset score method as the one chosen in the output
    -692            mass_spectrum.molecular_search_settings.score_method = current_method
    +686            if mformula.is_isotopologue:
    +687                dict_result["Mono Isotopic Index"] = mformula.mspeak_index_mono_isotopic
    +688
    +689            if self.atoms_order_list is None:
    +690                atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum)
    +691            else:
    +692                atoms_order_list = self.atoms_order_list
     693
    -694        else:
    -695
    -696            for index, ms_peak in enumerate(mass_spectrum):
    +694            for atom in atoms_order_list:
    +695                if atom in formula_dict.keys():
    +696                    dict_result[atom] = formula_dict.get(atom)
     697
    -698                # check if there is a molecular formula candidate for the msPeak
    +698            dict_data_list.append(dict_result)
     699
    -700                if ms_peak:
    -701                    # m_formula = ms_peak.molecular_formula_lowest_error
    -702                    for m_formula in ms_peak:
    -703
    -704                        if mass_spectrum.molecular_search_settings.output_min_score > 0:
    -705
    -706                            if m_formula.confidence_score >= mass_spectrum.molecular_search_settings.output_min_score:
    -707
    -708                                if m_formula.is_isotopologue:  # isotopologues inline
    -709                                    if include_isotopologues and isotopologue_inline:
    -710                                        add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -711                                else:
    -712                                    add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)  # add monoisotopic peak
    -713
    -714                            # cutoff because of low score
    -715                            else:
    -716                                add_no_match_dict_data(index, ms_peak)
    -717
    -718                        else:
    -719                            if m_formula.is_isotopologue:  # isotopologues inline
    -720                                if include_isotopologues and isotopologue_inline:
    -721                                    add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -722                            else:
    -723                                add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)  # add monoisotopic peak
    -724                else:
    -725                    # include not_match
    -726                    if include_no_match and no_match_inline:
    -727                        add_no_match_dict_data(index, ms_peak)
    -728
    -729            if include_isotopologues and not isotopologue_inline:
    -730                for index, ms_peak in enumerate(mass_spectrum):
    -731                    for m_formula in ms_peak:
    -732                        if m_formula.is_isotopologue:
    -733                            if m_formula.confidence_score >= mass_spectrum.molecular_search_settings.output_min_score:
    -734                                add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -735
    -736            if include_no_match and not no_match_inline:
    -737                for index, ms_peak in enumerate(mass_spectrum):
    -738                    if not ms_peak:
    -739                        add_no_match_dict_data(index, ms_peak)
    -740
    -741        # remove duplicated add_match data possibly introduced on the output_score_filter step
    -742        res = []
    -743        [res.append(x) for x in dict_data_list if x not in res]
    -744
    -745        return res
    +700        score_methods = mass_spectrum.molecular_search_settings.score_methods
    +701        selected_score_method = (
    +702            mass_spectrum.molecular_search_settings.output_score_method
    +703        )
    +704
    +705        if selected_score_method in score_methods:
    +706            # temp set score method as the one chosen in the output
    +707            current_method = mass_spectrum.molecular_search_settings.score_method
    +708            mass_spectrum.molecular_search_settings.score_method = selected_score_method
    +709
    +710            for index, ms_peak in enumerate(mass_spectrum):
    +711                # print(ms_peak.mz_exp)
    +712
    +713                if ms_peak:
    +714                    m_formula = ms_peak.best_molecular_formula_candidate
    +715
    +716                    if m_formula:
    +717                        if not m_formula.is_isotopologue:
    +718                            add_match_dict_data(
    +719                                index,
    +720                                ms_peak,
    +721                                m_formula,
    +722                                additional_columns=additional_columns,
    +723                            )
    +724
    +725                            for (
    +726                                iso_mspeak_index,
    +727                                iso_mf_formula,
    +728                            ) in m_formula.mspeak_mf_isotopologues_indexes:
    +729                                iso_ms_peak = mass_spectrum[iso_mspeak_index]
    +730                                add_match_dict_data(
    +731                                    iso_mspeak_index,
    +732                                    iso_ms_peak,
    +733                                    iso_mf_formula,
    +734                                    additional_columns=additional_columns,
    +735                                )
    +736                else:
    +737                    if include_no_match and no_match_inline:
    +738                        add_no_match_dict_data(index, ms_peak)
    +739
    +740            if include_no_match and not no_match_inline:
    +741                for index, ms_peak in enumerate(mass_spectrum):
    +742                    if not ms_peak:
    +743                        add_no_match_dict_data(index, ms_peak)
    +744            # reset score method as the one chosen in the output
    +745            mass_spectrum.molecular_search_settings.score_method = current_method
    +746
    +747        else:
    +748            for index, ms_peak in enumerate(mass_spectrum):
    +749                # check if there is a molecular formula candidate for the msPeak
    +750
    +751                if ms_peak:
    +752                    # m_formula = ms_peak.molecular_formula_lowest_error
    +753                    for m_formula in ms_peak:
    +754                        if mass_spectrum.molecular_search_settings.output_min_score > 0:
    +755                            if (
    +756                                m_formula.confidence_score
    +757                                >= mass_spectrum.molecular_search_settings.output_min_score
    +758                            ):
    +759                                if m_formula.is_isotopologue:  # isotopologues inline
    +760                                    if include_isotopologues and isotopologue_inline:
    +761                                        add_match_dict_data(
    +762                                            index,
    +763                                            ms_peak,
    +764                                            m_formula,
    +765                                            additional_columns=additional_columns,
    +766                                        )
    +767                                else:
    +768                                    add_match_dict_data(
    +769                                        index,
    +770                                        ms_peak,
    +771                                        m_formula,
    +772                                        additional_columns=additional_columns,
    +773                                    )  # add monoisotopic peak
    +774
    +775                            # cutoff because of low score
    +776                            else:
    +777                                add_no_match_dict_data(index, ms_peak)
    +778
    +779                        else:
    +780                            if m_formula.is_isotopologue:  # isotopologues inline
    +781                                if include_isotopologues and isotopologue_inline:
    +782                                    add_match_dict_data(
    +783                                        index,
    +784                                        ms_peak,
    +785                                        m_formula,
    +786                                        additional_columns=additional_columns,
    +787                                    )
    +788                            else:
    +789                                add_match_dict_data(
    +790                                    index,
    +791                                    ms_peak,
    +792                                    m_formula,
    +793                                    additional_columns=additional_columns,
    +794                                )  # add monoisotopic peak
    +795                else:
    +796                    # include not_match
    +797                    if include_no_match and no_match_inline:
    +798                        add_no_match_dict_data(index, ms_peak)
    +799
    +800            if include_isotopologues and not isotopologue_inline:
    +801                for index, ms_peak in enumerate(mass_spectrum):
    +802                    for m_formula in ms_peak:
    +803                        if m_formula.is_isotopologue:
    +804                            if (
    +805                                m_formula.confidence_score
    +806                                >= mass_spectrum.molecular_search_settings.output_min_score
    +807                            ):
    +808                                add_match_dict_data(
    +809                                    index,
    +810                                    ms_peak,
    +811                                    m_formula,
    +812                                    additional_columns=additional_columns,
    +813                                )
    +814
    +815            if include_no_match and not no_match_inline:
    +816                for index, ms_peak in enumerate(mass_spectrum):
    +817                    if not ms_peak:
    +818                        add_no_match_dict_data(index, ms_peak)
    +819
    +820        # remove duplicated add_match data possibly introduced on the output_score_filter step
    +821        res = []
    +822        [res.append(x) for x in dict_data_list if x not in res]
    +823
    +824        return res
     
    @@ -1682,21 +1841,20 @@
    Methods
    -
    81    def __init__(self, out_file_path, mass_spectrum, output_type='excel'):
    +            
    80    def __init__(self, out_file_path, mass_spectrum, output_type="excel"):
    +81        Thread.__init__(self)
     82
    -83        Thread.__init__(self)
    +83        self.output_file = Path(out_file_path)
     84
    -85        self.output_file = Path(out_file_path)
    -86
    -87        # 'excel', 'csv' or 'pandas'
    -88        self.output_type = output_type
    +85        # 'excel', 'csv' or 'pandas'
    +86        self.output_type = output_type
    +87
    +88        self.mass_spectrum = mass_spectrum
     89
    -90        self.mass_spectrum = mass_spectrum
    -91
    -92        # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list
    -93        self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum)
    -94
    -95        self._init_columns()
    +90        # collect all assigned atoms and order them accordingly to the Atoms.atoms_order list
    +91        self.atoms_order_list = self.get_all_used_atoms_in_order(self.mass_spectrum)
    +92
    +93        self._init_columns()
     
    @@ -1780,26 +1938,28 @@
    Methods
    -
    138    def save(self):
    -139        """Save the mass spectrum data to the output file.
    -140        
    -141        Raises
    -142        ------
    -143        ValueError
    -144            If the output type is not supported.
    -145        """
    -146
    -147        if self.output_type == 'excel':
    -148            self.to_excel()
    -149        elif self.output_type == 'csv':
    -150            self.to_csv()
    -151        elif self.output_type == 'pandas':
    -152            self.to_pandas()
    -153        elif self.output_type == 'hdf5':
    -154            self.to_hdf()
    -155        else:
    -156            raise ValueError(
    -157                "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'" % self.output_type)
    +            
    139    def save(self):
    +140        """Save the mass spectrum data to the output file.
    +141
    +142        Raises
    +143        ------
    +144        ValueError
    +145            If the output type is not supported.
    +146        """
    +147
    +148        if self.output_type == "excel":
    +149            self.to_excel()
    +150        elif self.output_type == "csv":
    +151            self.to_csv()
    +152        elif self.output_type == "pandas":
    +153            self.to_pandas()
    +154        elif self.output_type == "hdf5":
    +155            self.to_hdf()
    +156        else:
    +157            raise ValueError(
    +158                "Unkown output type: %s; it can be 'excel', 'csv' or 'pandas'"
    +159                % self.output_type
    +160            )
     
    @@ -1825,12 +1985,12 @@
    Raises
    -
    159    def run(self):
    -160        """ Run the export process.
    -161        
    -162        This method is called when the thread starts.
    -163        It calls the save method to perform the export."""
    -164        self.save()
    +            
    162    def run(self):
    +163        """Run the export process.
    +164
    +165        This method is called when the thread starts.
    +166        It calls the save method to perform the export."""
    +167        self.save()
     
    @@ -1853,33 +2013,45 @@
    Raises
    -
    166    def get_pandas_df(self, additional_columns=None):
    -167        """Returns the mass spectrum data as a pandas DataFrame.
    -168        
    -169        Parameters
    -170        ----------
    -171        additional_columns : list, optional
    -172            Additional columns to include in the DataFrame. Defaults to None.
    -173            Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
    -174
    -175        Returns
    -176        -------
    -177        DataFrame
    -178            The mass spectrum data as a pandas DataFrame.
    -179        """
    -180        if additional_columns is  not None:
    -181            possible_additional_columns = ['Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)']
    -182            if additional_columns:
    -183                for column in additional_columns:
    -184                    if column not in possible_additional_columns:
    -185                        raise ValueError("Invalid additional column: %s" % column)
    -186            columns = self.columns_label + additional_columns + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -187        else:
    -188            columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -189        dict_data_list = self.get_list_dict_data(self.mass_spectrum, additional_columns=additional_columns)
    -190        df = DataFrame(dict_data_list, columns=columns)
    -191        df.name = self.output_file
    -192        return df
    +            
    169    def get_pandas_df(self, additional_columns=None):
    +170        """Returns the mass spectrum data as a pandas DataFrame.
    +171
    +172        Parameters
    +173        ----------
    +174        additional_columns : list, optional
    +175            Additional columns to include in the DataFrame. Defaults to None.
    +176            Suitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
    +177
    +178        Returns
    +179        -------
    +180        DataFrame
    +181            The mass spectrum data as a pandas DataFrame.
    +182        """
    +183        if additional_columns is not None:
    +184            possible_additional_columns = [
    +185                "Aromaticity Index",
    +186                "NOSC",
    +187                "Aromaticity Index (modified)",
    +188            ]
    +189            if additional_columns:
    +190                for column in additional_columns:
    +191                    if column not in possible_additional_columns:
    +192                        raise ValueError("Invalid additional column: %s" % column)
    +193            columns = (
    +194                self.columns_label
    +195                + additional_columns
    +196                + self.get_all_used_atoms_in_order(self.mass_spectrum)
    +197            )
    +198        else:
    +199            columns = self.columns_label + self.get_all_used_atoms_in_order(
    +200                self.mass_spectrum
    +201            )
    +202        dict_data_list = self.get_list_dict_data(
    +203            self.mass_spectrum, additional_columns=additional_columns
    +204        )
    +205        df = DataFrame(dict_data_list, columns=columns)
    +206        df.name = self.output_file
    +207        return df
     
    @@ -1913,30 +2085,35 @@
    Returns
    -
    194    def write_settings(self, output_path, mass_spectrum):
    -195        """Writes the settings of the mass spectrum to a JSON file.
    -196        
    -197        Parameters
    -198        ----------
    -199        output_path : str
    -200            The output file path.
    -201        mass_spectrum : MassSpectrum
    -202            The mass spectrum to export.
    -203        """
    -204
    -205        import json
    -206
    -207        dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    -208
    -209        dict_setting['MassSpecAttrs'] = self.get_mass_spec_attrs(mass_spectrum)
    -210        dict_setting['analyzer'] = mass_spectrum.analyzer
    -211        dict_setting['instrument_label'] = mass_spectrum.instrument_label
    -212        dict_setting['sample_name'] = mass_spectrum.sample_name
    -213
    -214        with open(output_path.with_suffix('.json'), 'w', encoding='utf8', ) as outfile:
    -215
    -216            output = json.dumps(dict_setting, sort_keys=True, indent=4, separators=(',', ': '))
    -217            outfile.write(output)
    +            
    209    def write_settings(self, output_path, mass_spectrum):
    +210        """Writes the settings of the mass spectrum to a JSON file.
    +211
    +212        Parameters
    +213        ----------
    +214        output_path : str
    +215            The output file path.
    +216        mass_spectrum : MassSpectrum
    +217            The mass spectrum to export.
    +218        """
    +219
    +220        import json
    +221
    +222        dict_setting = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    +223
    +224        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(mass_spectrum)
    +225        dict_setting["analyzer"] = mass_spectrum.analyzer
    +226        dict_setting["instrument_label"] = mass_spectrum.instrument_label
    +227        dict_setting["sample_name"] = mass_spectrum.sample_name
    +228
    +229        with open(
    +230            output_path.with_suffix(".json"),
    +231            "w",
    +232            encoding="utf8",
    +233        ) as outfile:
    +234            output = json.dumps(
    +235                dict_setting, sort_keys=True, indent=4, separators=(",", ": ")
    +236            )
    +237            outfile.write(output)
     
    @@ -1965,25 +2142,27 @@
    Parameters
    -
    219    def to_pandas(self, write_metadata=True):
    -220        """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    -221        
    -222        Parameters
    -223        ----------
    -224        write_metadata : bool, optional
    -225            Whether to write the metadata to a JSON file. Defaults to True.
    -226        """
    -227
    -228        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -229
    -230        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    -231
    -232        df = DataFrame(dict_data_list, columns=columns)
    -233
    -234        df.to_pickle(self.output_file.with_suffix('.pkl'))
    -235
    -236        if write_metadata:
    -237            self.write_settings(self.output_file, self.mass_spectrum)
    +            
    239    def to_pandas(self, write_metadata=True):
    +240        """Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    +241
    +242        Parameters
    +243        ----------
    +244        write_metadata : bool, optional
    +245            Whether to write the metadata to a JSON file. Defaults to True.
    +246        """
    +247
    +248        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +249            self.mass_spectrum
    +250        )
    +251
    +252        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +253
    +254        df = DataFrame(dict_data_list, columns=columns)
    +255
    +256        df.to_pickle(self.output_file.with_suffix(".pkl"))
    +257
    +258        if write_metadata:
    +259            self.write_settings(self.output_file, self.mass_spectrum)
     
    @@ -2010,25 +2189,27 @@
    Parameters
    -
    239    def to_excel(self, write_metadata=True):
    -240        """Exports the mass spectrum data to an Excel file.
    -241        
    -242        Parameters
    -243        ----------
    -244        write_metadata : bool, optional
    -245            Whether to write the metadata to a JSON file. Defaults to True.
    -246        """
    -247
    -248        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -249
    -250        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    -251
    -252        df = DataFrame(dict_data_list, columns=columns)
    -253
    -254        df.to_excel(self.output_file.with_suffix('.xlsx'))
    -255
    -256        if write_metadata:
    -257            self.write_settings(self.output_file, self.mass_spectrum)
    +            
    261    def to_excel(self, write_metadata=True):
    +262        """Exports the mass spectrum data to an Excel file.
    +263
    +264        Parameters
    +265        ----------
    +266        write_metadata : bool, optional
    +267            Whether to write the metadata to a JSON file. Defaults to True.
    +268        """
    +269
    +270        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +271            self.mass_spectrum
    +272        )
    +273
    +274        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +275
    +276        df = DataFrame(dict_data_list, columns=columns)
    +277
    +278        df.to_excel(self.output_file.with_suffix(".xlsx"))
    +279
    +280        if write_metadata:
    +281            self.write_settings(self.output_file, self.mass_spectrum)
     
    @@ -2055,31 +2236,34 @@
    Parameters
    -
    259    def to_csv(self, write_metadata=True):
    -260        """Exports the mass spectrum data to a CSV file.
    -261
    -262        Parameters
    -263        ----------
    -264        write_metadata : bool, optional
    -265            Whether to write the metadata to a JSON file. Defaults to True.
    -266        """
    -267
    -268        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -269
    -270        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    -271
    -272        import csv
    -273        try:
    -274            with open(self.output_file.with_suffix('.csv'), 'w', newline='') as csvfile:
    -275                writer = csv.DictWriter(csvfile, fieldnames=columns)
    -276                writer.writeheader()
    -277                for data in dict_data_list:
    -278                    writer.writerow(data)
    -279            if write_metadata:
    -280                self.write_settings(self.output_file, self.mass_spectrum)
    -281
    -282        except IOError as ioerror:
    -283            print(ioerror)
    +            
    283    def to_csv(self, write_metadata=True):
    +284        """Exports the mass spectrum data to a CSV file.
    +285
    +286        Parameters
    +287        ----------
    +288        write_metadata : bool, optional
    +289            Whether to write the metadata to a JSON file. Defaults to True.
    +290        """
    +291
    +292        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +293            self.mass_spectrum
    +294        )
    +295
    +296        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +297
    +298        import csv
    +299
    +300        try:
    +301            with open(self.output_file.with_suffix(".csv"), "w", newline="") as csvfile:
    +302                writer = csv.DictWriter(csvfile, fieldnames=columns)
    +303                writer.writeheader()
    +304                for data in dict_data_list:
    +305                    writer.writerow(data)
    +306            if write_metadata:
    +307                self.write_settings(self.output_file, self.mass_spectrum)
    +308
    +309        except IOError as ioerror:
    +310            print(ioerror)
     
    @@ -2106,20 +2290,22 @@
    Parameters
    -
    285    def to_json(self):
    -286        """Exports the mass spectrum data to a JSON string."""
    -287
    -288        columns = self.columns_label + self.get_all_used_atoms_in_order(self.mass_spectrum)
    -289
    -290        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    -291
    -292        df = DataFrame(dict_data_list, columns=columns)
    -293
    -294        # for key, values in dict_data.items():
    -295        #    if not values: dict_data[key] = NaN
    -296
    -297        # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': '))
    -298        return df.to_json(orient='records')
    +            
    312    def to_json(self):
    +313        """Exports the mass spectrum data to a JSON string."""
    +314
    +315        columns = self.columns_label + self.get_all_used_atoms_in_order(
    +316            self.mass_spectrum
    +317        )
    +318
    +319        dict_data_list = self.get_list_dict_data(self.mass_spectrum)
    +320
    +321        df = DataFrame(dict_data_list, columns=columns)
    +322
    +323        # for key, values in dict_data.items():
    +324        #    if not values: dict_data[key] = NaN
    +325
    +326        # output = json.dumps(dict_data, sort_keys=True, indent=4, separators=(',', ': '))
    +327        return df.to_json(orient="records")
     
    @@ -2139,120 +2325,126 @@
    Parameters
    -
    300    def add_mass_spectrum_to_hdf5(self, hdf_handle, mass_spectrum, group_key, mass_spectra_group=None, export_raw=True):
    -301        """Adds the mass spectrum data to an HDF5 file.
    -302        
    -303        Parameters
    -304        ----------
    -305        hdf_handle : h5py.File
    -306            The HDF5 file handle.
    -307        mass_spectrum : MassSpectrum
    -308            The mass spectrum to add to the HDF5 file.
    -309        group_key : str
    -310            The group key (where to add the mass spectrum data within the HDF5 file).
    -311        mass_spectra_group : h5py.Group, optional
    -312            The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
    -313        export_raw : bool, optional
    -314            Whether to export the raw data. Defaults to True. 
    -315            If False, only the processed data (peaks) is exported (essentially centroided data).
    -316        """
    -317        if mass_spectra_group is None:
    -318
    -319            # Check if the file has the necessary attributes and add them if not
    -320            # This assumes that if there is a mass_spectra_group, these attributes were already added to the file
    -321            if not hdf_handle.attrs.get("date_utc"):
    -322                timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    -323                hdf_handle.attrs["date_utc"] = timenow
    -324                hdf_handle.attrs["file_name"] = mass_spectrum.filename.name
    -325                hdf_handle.attrs["data_structure"] = "mass_spectrum"
    -326                hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer
    -327                hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label
    -328                hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name
    -329        
    -330        list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True)
    -331
    -332        dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum)
    -333
    -334        setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    -335
    -336        columns_labels = json.dumps(
    -337            self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum),
    -338            sort_keys=False,
    -339            indent=4,
    -340            separators=(",", ": "),
    -341        )
    -342
    -343        group_key = group_key
    -344
    -345        if mass_spectra_group is not None:
    -346            hdf_handle = mass_spectra_group
    -347
    -348        if group_key not in hdf_handle.keys():
    -349            scan_group = hdf_handle.create_group(group_key)
    -350
    -351            # If there is raw data (from profile data) save it
    -352            if not mass_spectrum.is_centroid and export_raw:
    -353                mz_abun_array = empty(
    -354                    shape=(2, len(mass_spectrum.abundance_profile))
    -355                )
    -356
    -357                mz_abun_array[0] = mass_spectrum.abundance_profile
    -358                mz_abun_array[1] = mass_spectrum.mz_exp_profile
    -359
    -360                raw_ms_dataset = scan_group.create_dataset(
    -361                    "raw_ms", data=mz_abun_array, dtype="f8"
    -362                )
    -363
    -364            else:
    -365                #  create empy dataset for missing raw data
    -366                raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8")
    -367
    -368            raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs)
    -369
    -370            if isinstance(mass_spectrum, MassSpecfromFreq):
    -371                raw_ms_dataset.attrs["TransientSetting"] = json.dumps(
    -372                    setting_dicts.get("TransientSetting"),
    -373                    sort_keys=False,
    -374                    indent=4,
    -375                    separators=(",", ": "),
    -376                )
    -377
    -378        else:
    -379            scan_group = hdf_handle.get(group_key)
    -380
    -381        # if there is not processed data len = 0, otherwise len() will return next index
    -382        index_processed_data = str(len(scan_group.keys()))
    -383
    -384        timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    -385
    -386        processed_dset = scan_group.create_dataset(
    -387            index_processed_data, data=list_results
    -388        )
    -389
    -390        processed_dset.attrs["date_utc"] = timenow
    +            
    329    def add_mass_spectrum_to_hdf5(
    +330        self,
    +331        hdf_handle,
    +332        mass_spectrum,
    +333        group_key,
    +334        mass_spectra_group=None,
    +335        export_raw=True,
    +336    ):
    +337        """Adds the mass spectrum data to an HDF5 file.
    +338
    +339        Parameters
    +340        ----------
    +341        hdf_handle : h5py.File
    +342            The HDF5 file handle.
    +343        mass_spectrum : MassSpectrum
    +344            The mass spectrum to add to the HDF5 file.
    +345        group_key : str
    +346            The group key (where to add the mass spectrum data within the HDF5 file).
    +347        mass_spectra_group : h5py.Group, optional
    +348            The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
    +349        export_raw : bool, optional
    +350            Whether to export the raw data. Defaults to True.
    +351            If False, only the processed data (peaks) is exported (essentially centroided data).
    +352        """
    +353        if mass_spectra_group is None:
    +354            # Check if the file has the necessary attributes and add them if not
    +355            # This assumes that if there is a mass_spectra_group, these attributes were already added to the file
    +356            if not hdf_handle.attrs.get("date_utc"):
    +357                timenow = str(
    +358                    datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z")
    +359                )
    +360                hdf_handle.attrs["date_utc"] = timenow
    +361                hdf_handle.attrs["file_name"] = mass_spectrum.filename.name
    +362                hdf_handle.attrs["data_structure"] = "mass_spectrum"
    +363                hdf_handle.attrs["analyzer"] = mass_spectrum.analyzer
    +364                hdf_handle.attrs["instrument_label"] = mass_spectrum.instrument_label
    +365                hdf_handle.attrs["sample_name"] = mass_spectrum.sample_name
    +366
    +367        list_results = self.list_dict_to_list(mass_spectrum, is_hdf5=True)
    +368
    +369        dict_ms_attrs = self.get_mass_spec_attrs(mass_spectrum)
    +370
    +371        setting_dicts = parameter_to_dict.get_dict_data_ms(mass_spectrum)
    +372
    +373        columns_labels = json.dumps(
    +374            self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum),
    +375            sort_keys=False,
    +376            indent=4,
    +377            separators=(",", ": "),
    +378        )
    +379
    +380        group_key = group_key
    +381
    +382        if mass_spectra_group is not None:
    +383            hdf_handle = mass_spectra_group
    +384
    +385        if group_key not in hdf_handle.keys():
    +386            scan_group = hdf_handle.create_group(group_key)
    +387
    +388            # If there is raw data (from profile data) save it
    +389            if not mass_spectrum.is_centroid and export_raw:
    +390                mz_abun_array = empty(shape=(2, len(mass_spectrum.abundance_profile)))
     391
    -392        processed_dset.attrs["ColumnsLabels"] = columns_labels
    -393
    -394        processed_dset.attrs["MoleculaSearchSetting"] = json.dumps(
    -395            setting_dicts.get("MoleculaSearch"),
    -396            sort_keys=False,
    -397            indent=4,
    -398            separators=(",", ": "),
    -399        )
    -400
    -401        processed_dset.attrs["MassSpecPeakSetting"] = json.dumps(
    -402            setting_dicts.get("MassSpecPeak"),
    -403            sort_keys=False,
    -404            indent=4,
    -405            separators=(",", ": "),
    -406        )
    -407
    -408        processed_dset.attrs["MassSpectrumSetting"] = json.dumps(
    -409            setting_dicts.get("MassSpectrum"),
    -410            sort_keys=False,
    -411            indent=4,
    -412            separators=(",", ": "),
    -413        )
    +392                mz_abun_array[0] = mass_spectrum.abundance_profile
    +393                mz_abun_array[1] = mass_spectrum.mz_exp_profile
    +394
    +395                raw_ms_dataset = scan_group.create_dataset(
    +396                    "raw_ms", data=mz_abun_array, dtype="f8"
    +397                )
    +398
    +399            else:
    +400                #  create empy dataset for missing raw data
    +401                raw_ms_dataset = scan_group.create_dataset("raw_ms", dtype="f8")
    +402
    +403            raw_ms_dataset.attrs["MassSpecAttrs"] = json.dumps(dict_ms_attrs)
    +404
    +405            if isinstance(mass_spectrum, MassSpecfromFreq):
    +406                raw_ms_dataset.attrs["TransientSetting"] = json.dumps(
    +407                    setting_dicts.get("TransientSetting"),
    +408                    sort_keys=False,
    +409                    indent=4,
    +410                    separators=(",", ": "),
    +411                )
    +412
    +413        else:
    +414            scan_group = hdf_handle.get(group_key)
    +415
    +416        # if there is not processed data len = 0, otherwise len() will return next index
    +417        index_processed_data = str(len(scan_group.keys()))
    +418
    +419        timenow = str(datetime.now(timezone.utc).strftime("%d/%m/%Y %H:%M:%S %Z"))
    +420
    +421        processed_dset = scan_group.create_dataset(
    +422            index_processed_data, data=list_results
    +423        )
    +424
    +425        processed_dset.attrs["date_utc"] = timenow
    +426
    +427        processed_dset.attrs["ColumnsLabels"] = columns_labels
    +428
    +429        processed_dset.attrs["MoleculaSearchSetting"] = json.dumps(
    +430            setting_dicts.get("MoleculaSearch"),
    +431            sort_keys=False,
    +432            indent=4,
    +433            separators=(",", ": "),
    +434        )
    +435
    +436        processed_dset.attrs["MassSpecPeakSetting"] = json.dumps(
    +437            setting_dicts.get("MassSpecPeak"),
    +438            sort_keys=False,
    +439            indent=4,
    +440            separators=(",", ": "),
    +441        )
    +442
    +443        processed_dset.attrs["MassSpectrumSetting"] = json.dumps(
    +444            setting_dicts.get("MassSpectrum"),
    +445            sort_keys=False,
    +446            indent=4,
    +447            separators=(",", ": "),
    +448        )
     
    @@ -2270,7 +2462,7 @@
    Parameters
  • mass_spectra_group (h5py.Group, optional): The mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
  • export_raw (bool, optional): -Whether to export the raw data. Defaults to True. +Whether to export the raw data. Defaults to True. If False, only the processed data (peaks) is exported (essentially centroided data).
  • @@ -2288,12 +2480,13 @@
    Parameters
    -
    415    def to_hdf(self):
    -416        """Exports the mass spectrum data to an HDF5 file."""
    -417
    -418        with h5py.File(self.output_file.with_suffix('.hdf5'), 'a') as hdf_handle:
    -419
    -420            self.add_mass_spectrum_to_hdf5(hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number))
    +            
    450    def to_hdf(self):
    +451        """Exports the mass spectrum data to an HDF5 file."""
    +452
    +453        with h5py.File(self.output_file.with_suffix(".hdf5"), "a") as hdf_handle:
    +454            self.add_mass_spectrum_to_hdf5(
    +455                hdf_handle, self.mass_spectrum, str(self.mass_spectrum.scan_number)
    +456            )
     
    @@ -2313,25 +2506,25 @@
    Parameters
    -
    422    def parameters_to_toml(self):
    -423        """Converts the mass spectrum parameters to a TOML string.
    -424        
    -425        Returns
    -426        -------
    -427        str
    -428            The TOML string of the mass spectrum parameters.
    -429        """
    -430        
    -431        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    -432
    -433        dict_setting['MassSpecAttrs'] = self.get_mass_spec_attrs(self.mass_spectrum)
    -434        dict_setting['analyzer'] = self.mass_spectrum.analyzer
    -435        dict_setting['instrument_label'] = self.mass_spectrum.instrument_label
    -436        dict_setting['sample_name'] = self.mass_spectrum.sample_name
    -437
    -438        output = toml.dumps(dict_setting)
    -439        
    -440        return output
    +            
    458    def parameters_to_toml(self):
    +459        """Converts the mass spectrum parameters to a TOML string.
    +460
    +461        Returns
    +462        -------
    +463        str
    +464            The TOML string of the mass spectrum parameters.
    +465        """
    +466
    +467        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    +468
    +469        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
    +470        dict_setting["analyzer"] = self.mass_spectrum.analyzer
    +471        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
    +472        dict_setting["sample_name"] = self.mass_spectrum.sample_name
    +473
    +474        output = toml.dumps(dict_setting)
    +475
    +476        return output
     
    @@ -2357,25 +2550,25 @@
    Returns
    -
    442    def parameters_to_json(self):
    -443        """Converts the mass spectrum parameters to a JSON string.
    -444
    -445        Returns
    -446        -------
    -447        str
    -448            The JSON string of the mass spectrum parameters.
    -449        """
    -450
    -451        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    -452
    -453        dict_setting['MassSpecAttrs'] = self.get_mass_spec_attrs(self.mass_spectrum)
    -454        dict_setting['analyzer'] = self.mass_spectrum.analyzer
    -455        dict_setting['instrument_label'] = self.mass_spectrum.instrument_label
    -456        dict_setting['sample_name'] = self.mass_spectrum.sample_name
    -457
    -458        output = json.dumps(dict_setting)
    -459
    -460        return output
    +            
    478    def parameters_to_json(self):
    +479        """Converts the mass spectrum parameters to a JSON string.
    +480
    +481        Returns
    +482        -------
    +483        str
    +484            The JSON string of the mass spectrum parameters.
    +485        """
    +486
    +487        dict_setting = parameter_to_dict.get_dict_data_ms(self.mass_spectrum)
    +488
    +489        dict_setting["MassSpecAttrs"] = self.get_mass_spec_attrs(self.mass_spectrum)
    +490        dict_setting["analyzer"] = self.mass_spectrum.analyzer
    +491        dict_setting["instrument_label"] = self.mass_spectrum.instrument_label
    +492        dict_setting["sample_name"] = self.mass_spectrum.sample_name
    +493
    +494        output = json.dumps(dict_setting)
    +495
    +496        return output
     
    @@ -2401,33 +2594,33 @@
    Returns
    -
    462    def get_mass_spec_attrs(self, mass_spectrum):
    -463        """Returns the mass spectrum attributes as a dictionary.
    -464        
    -465        Parameters
    -466        ----------
    -467        mass_spectrum : MassSpectrum
    -468            The mass spectrum to export.
    -469            
    -470        Returns
    -471        -------
    -472        dict
    -473            The mass spectrum attributes.
    -474        """
    -475
    -476        dict_ms_attrs = {}
    -477        dict_ms_attrs['polarity'] = mass_spectrum.polarity
    -478        dict_ms_attrs['rt'] = mass_spectrum.retention_time
    -479        dict_ms_attrs['tic'] = mass_spectrum.tic
    -480        dict_ms_attrs['mobility_scan'] = mass_spectrum.mobility_scan
    -481        dict_ms_attrs['mobility_rt'] = mass_spectrum.mobility_rt
    -482        dict_ms_attrs['Aterm'] = mass_spectrum.Aterm
    -483        dict_ms_attrs['Bterm'] = mass_spectrum.Bterm
    -484        dict_ms_attrs['Cterm'] = mass_spectrum.Cterm
    -485        dict_ms_attrs['baseline_noise'] = mass_spectrum.baseline_noise
    -486        dict_ms_attrs['baseline_noise_std'] = mass_spectrum.baseline_noise_std
    -487
    -488        return dict_ms_attrs
    +            
    498    def get_mass_spec_attrs(self, mass_spectrum):
    +499        """Returns the mass spectrum attributes as a dictionary.
    +500
    +501        Parameters
    +502        ----------
    +503        mass_spectrum : MassSpectrum
    +504            The mass spectrum to export.
    +505
    +506        Returns
    +507        -------
    +508        dict
    +509            The mass spectrum attributes.
    +510        """
    +511
    +512        dict_ms_attrs = {}
    +513        dict_ms_attrs["polarity"] = mass_spectrum.polarity
    +514        dict_ms_attrs["rt"] = mass_spectrum.retention_time
    +515        dict_ms_attrs["tic"] = mass_spectrum.tic
    +516        dict_ms_attrs["mobility_scan"] = mass_spectrum.mobility_scan
    +517        dict_ms_attrs["mobility_rt"] = mass_spectrum.mobility_rt
    +518        dict_ms_attrs["Aterm"] = mass_spectrum.Aterm
    +519        dict_ms_attrs["Bterm"] = mass_spectrum.Bterm
    +520        dict_ms_attrs["Cterm"] = mass_spectrum.Cterm
    +521        dict_ms_attrs["baseline_noise"] = mass_spectrum.baseline_noise
    +522        dict_ms_attrs["baseline_noise_std"] = mass_spectrum.baseline_noise_std
    +523
    +524        return dict_ms_attrs
     
    @@ -2460,33 +2653,33 @@
    Returns
    -
    490    def get_all_used_atoms_in_order(self, mass_spectrum):
    -491        """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    -492
    -493        Parameters
    -494        ----------
    -495        mass_spectrum : MassSpectrum
    -496            The mass spectrum to export.
    -497
    -498        Returns
    -499        -------
    -500        list
    -501            The list of assigned atoms in the order specified by Atoms.atoms_order list.
    -502        """
    -503
    -504        atoms_in_order = Atoms.atoms_order
    -505        all_used_atoms = set()
    -506        if mass_spectrum:
    -507            for ms_peak in mass_spectrum:
    -508                if ms_peak:
    -509                    for m_formula in ms_peak:
    -510                        for atom in m_formula.atoms:
    -511                            all_used_atoms.add(atom)
    -512
    -513        def sort_method(atom):
    -514            return [atoms_in_order.index(atom)]
    -515
    -516        return sorted(all_used_atoms, key=sort_method)
    +            
    526    def get_all_used_atoms_in_order(self, mass_spectrum):
    +527        """Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    +528
    +529        Parameters
    +530        ----------
    +531        mass_spectrum : MassSpectrum
    +532            The mass spectrum to export.
    +533
    +534        Returns
    +535        -------
    +536        list
    +537            The list of assigned atoms in the order specified by Atoms.atoms_order list.
    +538        """
    +539
    +540        atoms_in_order = Atoms.atoms_order
    +541        all_used_atoms = set()
    +542        if mass_spectrum:
    +543            for ms_peak in mass_spectrum:
    +544                if ms_peak:
    +545                    for m_formula in ms_peak:
    +546                        for atom in m_formula.atoms:
    +547                            all_used_atoms.add(atom)
    +548
    +549        def sort_method(atom):
    +550            return [atoms_in_order.index(atom)]
    +551
    +552        return sorted(all_used_atoms, key=sort_method)
     
    @@ -2519,39 +2712,39 @@
    Returns
    -
    518    def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
    -519        """Returns the mass spectrum data as a list of dictionaries.
    -520        
    -521        Parameters
    -522        ----------
    -523        mass_spectrum : MassSpectrum
    -524            The mass spectrum to export.
    -525        is_hdf5 : bool, optional
    -526            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    -527            
    -528        Returns
    -529        -------
    -530        list
    -531            The mass spectrum data as a list of dictionaries.
    -532        """
    -533
    -534        column_labels = self.columns_label + self.get_all_used_atoms_in_order(mass_spectrum)
    -535
    -536        dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5)
    -537
    -538        all_lines = []
    -539        for dict_res in dict_list:
    -540
    -541            result_line = [NaN] * len(column_labels)
    -542
    -543            for label, value in dict_res.items():
    -544
    -545                label_index = column_labels.index(label)
    -546                result_line[label_index] = value
    -547
    -548            all_lines.append(result_line)
    -549
    -550        return all_lines
    +            
    554    def list_dict_to_list(self, mass_spectrum, is_hdf5=False):
    +555        """Returns the mass spectrum data as a list of dictionaries.
    +556
    +557        Parameters
    +558        ----------
    +559        mass_spectrum : MassSpectrum
    +560            The mass spectrum to export.
    +561        is_hdf5 : bool, optional
    +562            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    +563
    +564        Returns
    +565        -------
    +566        list
    +567            The mass spectrum data as a list of dictionaries.
    +568        """
    +569
    +570        column_labels = self.columns_label + self.get_all_used_atoms_in_order(
    +571            mass_spectrum
    +572        )
    +573
    +574        dict_list = self.get_list_dict_data(mass_spectrum, is_hdf5=is_hdf5)
    +575
    +576        all_lines = []
    +577        for dict_res in dict_list:
    +578            result_line = [NaN] * len(column_labels)
    +579
    +580            for label, value in dict_res.items():
    +581                label_index = column_labels.index(label)
    +582                result_line[label_index] = value
    +583
    +584            all_lines.append(result_line)
    +585
    +586        return all_lines
     
    @@ -2586,200 +2779,243 @@
    Returns
    -
    552    def get_list_dict_data(self, mass_spectrum, include_no_match=True, include_isotopologues=True,
    -553                           isotopologue_inline=True, no_match_inline=False, is_hdf5=False,
    -554                           additional_columns=None):
    -555        """Returns the mass spectrum data as a list of dictionaries.
    -556
    -557        Parameters
    -558        ----------
    -559        mass_spectrum : MassSpectrum
    -560            The mass spectrum to export.
    -561        include_no_match : bool, optional
    -562            Whether to include unassigned (no match) data. Defaults to True.
    -563        include_isotopologues : bool, optional
    -564            Whether to include isotopologues. Defaults to True.
    -565        isotopologue_inline : bool, optional
    -566            Whether to include isotopologues inline. Defaults to True.
    -567        no_match_inline : bool, optional
    -568            Whether to include unassigned (no match) data inline. Defaults to False.
    -569        is_hdf5 : bool, optional
    -570            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    -571
    -572        Returns
    -573        -------
    -574        list
    -575            The mass spectrum data as a list of dictionaries.
    -576        """
    -577
    -578        dict_data_list = []
    -579
    -580        if is_hdf5:
    -581            encode = ".encode('utf-8')"
    -582        else:
    -583            encode = ""
    -584
    -585        def add_no_match_dict_data(index, ms_peak):
    -586            '''
    -587            Export dictionary of mspeak info for unassigned (no match) data
    -588            '''
    -589            dict_result = {'Index': index,
    -590                           'm/z': ms_peak._mz_exp,
    -591                           'Calibrated m/z': ms_peak.mz_exp,
    -592                           'Peak Height': ms_peak.abundance,
    -593                           'Peak Area': ms_peak.area,
    -594                           'Resolving Power': ms_peak.resolving_power,
    -595                           'S/N': ms_peak.signal_to_noise,
    -596                           'Ion Charge': ms_peak.ion_charge,
    -597                           'Heteroatom Class': eval("Labels.unassigned{}".format(encode)),
    -598                           }
    +            
    588    def get_list_dict_data(
    +589        self,
    +590        mass_spectrum,
    +591        include_no_match=True,
    +592        include_isotopologues=True,
    +593        isotopologue_inline=True,
    +594        no_match_inline=False,
    +595        is_hdf5=False,
    +596        additional_columns=None,
    +597    ):
    +598        """Returns the mass spectrum data as a list of dictionaries.
     599
    -600            dict_data_list.append(dict_result)
    -601
    -602        def add_match_dict_data(index, ms_peak, mformula, additional_columns=None):
    -603            '''
    -604            Export dictionary of mspeak info for assigned (match) data
    -605            '''
    -606            formula_dict = mformula.to_dict()
    -607
    -608            dict_result = {'Index': index,
    -609                           'm/z': ms_peak._mz_exp,
    -610                           'Calibrated m/z': ms_peak.mz_exp,
    -611                           'Calculated m/z': mformula.mz_calc,
    -612                           'Peak Height': ms_peak.abundance,
    -613                           'Peak Area': ms_peak.area,
    -614                           'Resolving Power': ms_peak.resolving_power,
    -615                           'S/N': ms_peak.signal_to_noise,
    -616                           'Ion Charge': ms_peak.ion_charge,
    -617                           'm/z Error (ppm)': mformula.mz_error,
    -618                           'Confidence Score': mformula.confidence_score,
    -619                           'Isotopologue Similarity': mformula.isotopologue_similarity,
    -620                           'm/z Error Score': mformula.average_mz_error_score,
    -621                           'DBE': mformula.dbe,
    -622                           'Heteroatom Class': eval("mformula.class_label{}".format(encode)),
    -623                           'H/C': mformula.H_C,
    -624                           'O/C': mformula.O_C,
    -625                           'Ion Type': eval("mformula.ion_type.lower(){}".format(encode)),
    -626                           'Is Isotopologue': int(mformula.is_isotopologue),
    -627                           'Molecular Formula': eval("mformula.string{}".format(encode))
    -628                           }
    -629            if additional_columns is not None:
    -630                possible_dict = {
    -631                    'Aromaticity Index':mformula.A_I, 
    -632                    'NOSC':mformula.nosc, 
    -633                    'Aromaticity Index (modified)':mformula.A_I_mod
    -634                    }
    -635                for column in additional_columns:
    -636                    dict_result[column] = possible_dict.get(column)
    -637            
    -638            if mformula.adduct_atom:
    -639                dict_result['Adduct'] = eval("mformula.adduct_atom{}".format(encode))
    -640
    -641            if mformula.is_isotopologue:
    -642                dict_result['Mono Isotopic Index'] = mformula.mspeak_index_mono_isotopic
    +600        Parameters
    +601        ----------
    +602        mass_spectrum : MassSpectrum
    +603            The mass spectrum to export.
    +604        include_no_match : bool, optional
    +605            Whether to include unassigned (no match) data. Defaults to True.
    +606        include_isotopologues : bool, optional
    +607            Whether to include isotopologues. Defaults to True.
    +608        isotopologue_inline : bool, optional
    +609            Whether to include isotopologues inline. Defaults to True.
    +610        no_match_inline : bool, optional
    +611            Whether to include unassigned (no match) data inline. Defaults to False.
    +612        is_hdf5 : bool, optional
    +613            Whether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    +614
    +615        Returns
    +616        -------
    +617        list
    +618            The mass spectrum data as a list of dictionaries.
    +619        """
    +620
    +621        dict_data_list = []
    +622
    +623        if is_hdf5:
    +624            encode = ".encode('utf-8')"
    +625        else:
    +626            encode = ""
    +627
    +628        def add_no_match_dict_data(index, ms_peak):
    +629            """
    +630            Export dictionary of mspeak info for unassigned (no match) data
    +631            """
    +632            dict_result = {
    +633                "Index": index,
    +634                "m/z": ms_peak._mz_exp,
    +635                "Calibrated m/z": ms_peak.mz_exp,
    +636                "Peak Height": ms_peak.abundance,
    +637                "Peak Area": ms_peak.area,
    +638                "Resolving Power": ms_peak.resolving_power,
    +639                "S/N": ms_peak.signal_to_noise,
    +640                "Ion Charge": ms_peak.ion_charge,
    +641                "Heteroatom Class": eval("Labels.unassigned{}".format(encode)),
    +642            }
     643
    -644            if self.atoms_order_list is None:
    -645                atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum)
    -646            else:
    -647                atoms_order_list = self.atoms_order_list
    -648
    -649            for atom in atoms_order_list:
    -650                if atom in formula_dict.keys():
    -651                    dict_result[atom] = formula_dict.get(atom)
    -652
    -653            dict_data_list.append(dict_result)
    -654
    -655        score_methods = mass_spectrum.molecular_search_settings.score_methods
    -656        selected_score_method = mass_spectrum.molecular_search_settings.output_score_method
    -657
    -658        if selected_score_method in score_methods:
    -659
    -660            # temp set score method as the one chosen in the output
    -661            current_method = mass_spectrum.molecular_search_settings.score_method
    -662            mass_spectrum.molecular_search_settings.score_method = selected_score_method
    -663
    -664            for index, ms_peak in enumerate(mass_spectrum):
    -665
    -666                # print(ms_peak.mz_exp)
    -667
    -668                if ms_peak:
    -669
    -670                    m_formula = ms_peak.best_molecular_formula_candidate
    -671
    -672                    if m_formula:
    -673
    -674                        if not m_formula.is_isotopologue:
    -675
    -676                            add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -677
    -678                            for iso_mspeak_index, iso_mf_formula in m_formula.mspeak_mf_isotopologues_indexes:
    -679                                iso_ms_peak = mass_spectrum[iso_mspeak_index]
    -680                                add_match_dict_data(iso_mspeak_index, iso_ms_peak, iso_mf_formula, additional_columns=additional_columns)
    -681                else:
    +644            dict_data_list.append(dict_result)
    +645
    +646        def add_match_dict_data(index, ms_peak, mformula, additional_columns=None):
    +647            """
    +648            Export dictionary of mspeak info for assigned (match) data
    +649            """
    +650            formula_dict = mformula.to_dict()
    +651
    +652            dict_result = {
    +653                "Index": index,
    +654                "m/z": ms_peak._mz_exp,
    +655                "Calibrated m/z": ms_peak.mz_exp,
    +656                "Calculated m/z": mformula.mz_calc,
    +657                "Peak Height": ms_peak.abundance,
    +658                "Peak Area": ms_peak.area,
    +659                "Resolving Power": ms_peak.resolving_power,
    +660                "S/N": ms_peak.signal_to_noise,
    +661                "Ion Charge": ms_peak.ion_charge,
    +662                "m/z Error (ppm)": mformula.mz_error,
    +663                "Confidence Score": mformula.confidence_score,
    +664                "Isotopologue Similarity": mformula.isotopologue_similarity,
    +665                "m/z Error Score": mformula.average_mz_error_score,
    +666                "DBE": mformula.dbe,
    +667                "Heteroatom Class": eval("mformula.class_label{}".format(encode)),
    +668                "H/C": mformula.H_C,
    +669                "O/C": mformula.O_C,
    +670                "Ion Type": eval("mformula.ion_type.lower(){}".format(encode)),
    +671                "Is Isotopologue": int(mformula.is_isotopologue),
    +672                "Molecular Formula": eval("mformula.string{}".format(encode)),
    +673            }
    +674            if additional_columns is not None:
    +675                possible_dict = {
    +676                    "Aromaticity Index": mformula.A_I,
    +677                    "NOSC": mformula.nosc,
    +678                    "Aromaticity Index (modified)": mformula.A_I_mod,
    +679                }
    +680                for column in additional_columns:
    +681                    dict_result[column] = possible_dict.get(column)
     682
    -683                    if include_no_match and no_match_inline:
    -684                        add_no_match_dict_data(index, ms_peak)
    +683            if mformula.adduct_atom:
    +684                dict_result["Adduct"] = eval("mformula.adduct_atom{}".format(encode))
     685
    -686            if include_no_match and not no_match_inline:
    -687
    -688                for index, ms_peak in enumerate(mass_spectrum):
    -689                    if not ms_peak:
    -690                        add_no_match_dict_data(index, ms_peak)     
    -691            # reset score method as the one chosen in the output
    -692            mass_spectrum.molecular_search_settings.score_method = current_method
    +686            if mformula.is_isotopologue:
    +687                dict_result["Mono Isotopic Index"] = mformula.mspeak_index_mono_isotopic
    +688
    +689            if self.atoms_order_list is None:
    +690                atoms_order_list = self.get_all_used_atoms_in_order(mass_spectrum)
    +691            else:
    +692                atoms_order_list = self.atoms_order_list
     693
    -694        else:
    -695
    -696            for index, ms_peak in enumerate(mass_spectrum):
    +694            for atom in atoms_order_list:
    +695                if atom in formula_dict.keys():
    +696                    dict_result[atom] = formula_dict.get(atom)
     697
    -698                # check if there is a molecular formula candidate for the msPeak
    +698            dict_data_list.append(dict_result)
     699
    -700                if ms_peak:
    -701                    # m_formula = ms_peak.molecular_formula_lowest_error
    -702                    for m_formula in ms_peak:
    -703
    -704                        if mass_spectrum.molecular_search_settings.output_min_score > 0:
    -705
    -706                            if m_formula.confidence_score >= mass_spectrum.molecular_search_settings.output_min_score:
    -707
    -708                                if m_formula.is_isotopologue:  # isotopologues inline
    -709                                    if include_isotopologues and isotopologue_inline:
    -710                                        add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -711                                else:
    -712                                    add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)  # add monoisotopic peak
    -713
    -714                            # cutoff because of low score
    -715                            else:
    -716                                add_no_match_dict_data(index, ms_peak)
    -717
    -718                        else:
    -719                            if m_formula.is_isotopologue:  # isotopologues inline
    -720                                if include_isotopologues and isotopologue_inline:
    -721                                    add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -722                            else:
    -723                                add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)  # add monoisotopic peak
    -724                else:
    -725                    # include not_match
    -726                    if include_no_match and no_match_inline:
    -727                        add_no_match_dict_data(index, ms_peak)
    -728
    -729            if include_isotopologues and not isotopologue_inline:
    -730                for index, ms_peak in enumerate(mass_spectrum):
    -731                    for m_formula in ms_peak:
    -732                        if m_formula.is_isotopologue:
    -733                            if m_formula.confidence_score >= mass_spectrum.molecular_search_settings.output_min_score:
    -734                                add_match_dict_data(index, ms_peak, m_formula, additional_columns=additional_columns)
    -735
    -736            if include_no_match and not no_match_inline:
    -737                for index, ms_peak in enumerate(mass_spectrum):
    -738                    if not ms_peak:
    -739                        add_no_match_dict_data(index, ms_peak)
    -740
    -741        # remove duplicated add_match data possibly introduced on the output_score_filter step
    -742        res = []
    -743        [res.append(x) for x in dict_data_list if x not in res]
    -744
    -745        return res
    +700        score_methods = mass_spectrum.molecular_search_settings.score_methods
    +701        selected_score_method = (
    +702            mass_spectrum.molecular_search_settings.output_score_method
    +703        )
    +704
    +705        if selected_score_method in score_methods:
    +706            # temp set score method as the one chosen in the output
    +707            current_method = mass_spectrum.molecular_search_settings.score_method
    +708            mass_spectrum.molecular_search_settings.score_method = selected_score_method
    +709
    +710            for index, ms_peak in enumerate(mass_spectrum):
    +711                # print(ms_peak.mz_exp)
    +712
    +713                if ms_peak:
    +714                    m_formula = ms_peak.best_molecular_formula_candidate
    +715
    +716                    if m_formula:
    +717                        if not m_formula.is_isotopologue:
    +718                            add_match_dict_data(
    +719                                index,
    +720                                ms_peak,
    +721                                m_formula,
    +722                                additional_columns=additional_columns,
    +723                            )
    +724
    +725                            for (
    +726                                iso_mspeak_index,
    +727                                iso_mf_formula,
    +728                            ) in m_formula.mspeak_mf_isotopologues_indexes:
    +729                                iso_ms_peak = mass_spectrum[iso_mspeak_index]
    +730                                add_match_dict_data(
    +731                                    iso_mspeak_index,
    +732                                    iso_ms_peak,
    +733                                    iso_mf_formula,
    +734                                    additional_columns=additional_columns,
    +735                                )
    +736                else:
    +737                    if include_no_match and no_match_inline:
    +738                        add_no_match_dict_data(index, ms_peak)
    +739
    +740            if include_no_match and not no_match_inline:
    +741                for index, ms_peak in enumerate(mass_spectrum):
    +742                    if not ms_peak:
    +743                        add_no_match_dict_data(index, ms_peak)
    +744            # reset score method as the one chosen in the output
    +745            mass_spectrum.molecular_search_settings.score_method = current_method
    +746
    +747        else:
    +748            for index, ms_peak in enumerate(mass_spectrum):
    +749                # check if there is a molecular formula candidate for the msPeak
    +750
    +751                if ms_peak:
    +752                    # m_formula = ms_peak.molecular_formula_lowest_error
    +753                    for m_formula in ms_peak:
    +754                        if mass_spectrum.molecular_search_settings.output_min_score > 0:
    +755                            if (
    +756                                m_formula.confidence_score
    +757                                >= mass_spectrum.molecular_search_settings.output_min_score
    +758                            ):
    +759                                if m_formula.is_isotopologue:  # isotopologues inline
    +760                                    if include_isotopologues and isotopologue_inline:
    +761                                        add_match_dict_data(
    +762                                            index,
    +763                                            ms_peak,
    +764                                            m_formula,
    +765                                            additional_columns=additional_columns,
    +766                                        )
    +767                                else:
    +768                                    add_match_dict_data(
    +769                                        index,
    +770                                        ms_peak,
    +771                                        m_formula,
    +772                                        additional_columns=additional_columns,
    +773                                    )  # add monoisotopic peak
    +774
    +775                            # cutoff because of low score
    +776                            else:
    +777                                add_no_match_dict_data(index, ms_peak)
    +778
    +779                        else:
    +780                            if m_formula.is_isotopologue:  # isotopologues inline
    +781                                if include_isotopologues and isotopologue_inline:
    +782                                    add_match_dict_data(
    +783                                        index,
    +784                                        ms_peak,
    +785                                        m_formula,
    +786                                        additional_columns=additional_columns,
    +787                                    )
    +788                            else:
    +789                                add_match_dict_data(
    +790                                    index,
    +791                                    ms_peak,
    +792                                    m_formula,
    +793                                    additional_columns=additional_columns,
    +794                                )  # add monoisotopic peak
    +795                else:
    +796                    # include not_match
    +797                    if include_no_match and no_match_inline:
    +798                        add_no_match_dict_data(index, ms_peak)
    +799
    +800            if include_isotopologues and not isotopologue_inline:
    +801                for index, ms_peak in enumerate(mass_spectrum):
    +802                    for m_formula in ms_peak:
    +803                        if m_formula.is_isotopologue:
    +804                            if (
    +805                                m_formula.confidence_score
    +806                                >= mass_spectrum.molecular_search_settings.output_min_score
    +807                            ):
    +808                                add_match_dict_data(
    +809                                    index,
    +810                                    ms_peak,
    +811                                    m_formula,
    +812                                    additional_columns=additional_columns,
    +813                                )
    +814
    +815            if include_no_match and not no_match_inline:
    +816                for index, ms_peak in enumerate(mass_spectrum):
    +817                    if not ms_peak:
    +818                        add_no_match_dict_data(index, ms_peak)
    +819
    +820        # remove duplicated add_match data possibly introduced on the output_score_filter step
    +821        res = []
    +822        [res.append(x) for x in dict_data_list if x not in res]
    +823
    +824        return res
     
    diff --git a/docs/corems/molecular_formula/calc/MolecularFormulaCalc.html b/docs/corems/molecular_formula/calc/MolecularFormulaCalc.html index 667f4160..eec5b2a0 100644 --- a/docs/corems/molecular_formula/calc/MolecularFormulaCalc.html +++ b/docs/corems/molecular_formula/calc/MolecularFormulaCalc.html @@ -73,835 +73,887 @@

    2__date__ = "Jun 24, 2019" 3 4 - 5from numpy import isnan, power, exp, nextafter, array - 6import warnings - 7from pandas import DataFrame - 8from scipy.stats import pearsonr, spearmanr, kendalltau + 5import warnings + 6 + 7import IsoSpecPy + 8from numpy import array, exp, isnan, nextafter, power 9 - 10from corems.encapsulation.constant import Atoms - 11from corems.encapsulation.constant import Labels - 12from corems.encapsulation.factory.parameters import MSParameters - 13from corems.molecular_id.calc.SpectralSimilarity import SpectralSimilarity - 14 - 15# this is to handle both versions of IsoSpecPy, 2.0.2 and 2.2.2 - 16# TODO in a future release remove support for legacy isospecpy - 17from packaging import version - 18import IsoSpecPy - 19isospec_version = IsoSpecPy.__version__ - 20if version.parse(isospec_version) > version.parse('2.0.2'): - 21 legacy_isospec = False - 22else: legacy_isospec = True + 10# this is to handle both versions of IsoSpecPy, 2.0.2 and 2.2.2 + 11# TODO in a future release remove support for legacy isospecpy + 12from packaging import version + 13 + 14from corems.encapsulation.constant import Atoms, Labels + 15from corems.encapsulation.factory.parameters import MSParameters + 16from corems.molecular_id.calc.SpectralSimilarity import SpectralSimilarity + 17 + 18isospec_version = IsoSpecPy.__version__ + 19if version.parse(isospec_version) > version.parse("2.0.2"): + 20 legacy_isospec = False + 21else: + 22 legacy_isospec = True 23if legacy_isospec: 24 from IsoSpecPy import IsoSpecPy - 25 warnings.warn(f"IsoSpecPy version {isospec_version} is installed, and support is deprecated. Please update to 2.2.2", DeprecationWarning) - 26 - 27class MolecularFormulaCalc: - 28 """Class of calculations related to molecular formula - 29 - 30 This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula - 31 - 32 Attributes - 33 ---------- - 34 mz_calc : float - 35 The m/z value of the molecular formula. - 36 neutral_mass : float - 37 The neutral mass of the molecular formula. - 38 ion_charge : int - 39 The ion charge of the molecular formula. - 40 _external_mz : float - 41 The externally provided m/z value of the molecular formula. - 42 _d_molecular_formula : dict - 43 The dictionary representation of the molecular formula. - 44 _mspeak_parent : object - 45 The parent MS peak object associated with the molecular formula. - 46 _assignment_mass_error : float - 47 The mass error of the molecular formula. - 48 - 49 Methods - 50 ------- - 51 * _calc_resolving_power_low_pressure(B, T) - 52 Calculate the resolving power at low pressure. - 53 * _calc_resolving_power_high_pressure(B, T) - 54 Calculate the resolving power at high pressure. - 55 * _adduct_mz(adduct_atom, ion_charge) - 56 Get the m/z value of an adducted ion version of the molecular formula. - 57 * _protonated_mz(ion_charge) - 58 Get the m/z value of a protonated or deprotonated ion version of the molecular formula. - 59 * _radical_mz(ion_charge) - 60 Get the m/z value of a radical ion version of the molecular formula. - 61 * _neutral_mass() - 62 Get the neutral mass of the molecular formula. - 63 * _calc_mz() - 64 Get the m/z value of the molecular formula. - 65 * _calc_assignment_mass_error(method='ppm') - 66 Calculate the mass error of the molecular formula. - 67 * _calc_mz_confidence(mean=0) - 68 Calculate the m/z confidence of the molecular formula. - 69 * _calc_isotopologue_confidence() - 70 Calculate the isotopologue confidence of the molecular formula. - 71 * normalize_distance(dist, dist_range) - 72 Normalize the distance value. - 73 * subtract_formula(formula_obj, formated=True) - 74 Subtract a formula from the current formula object. - 75 * _calc_average_mz_score() - 76 Calculate the average m/z error score of the molecular formula identification, including the isotopologues. - 77 """ - 78 - 79 def _calc_resolving_power_low_pressure(self, B, T): - 80 ''' - 81 Calculate the resolving power at low pressure. - 82 - 83 Parameters - 84 ---------- - 85 B : float - 86 Magnetic Strength (Testa). - 87 T : float - 88 Transient time (seconds). - 89 - 90 ''' - 91 return (1.274 * 10000000 * B * T) * (1 / self.mz_calc) - 92 - 93 def _calc_resolving_power_high_pressure(self, B, T): - 94 ''' - 95 Calculate the resolving power at high pressure. - 96 - 97 Parameters - 98 ---------- - 99 B : float -100 Magnetic Strength (Testa). -101 T : float -102 Transient time (seconds). -103 -104 ''' -105 return (2.758 * 10000000 * B * T) * (1 / self.mz_calc) -106 -107 def _adduct_mz(self, adduct_atom, ion_charge): -108 """Get the m/z value of an adducted ion version of the molecular formula. -109 -110 Parameters -111 ---------- -112 adduct_atom : str -113 The adduct atom. -114 ion_charge : int -115 The ion charge. -116 -117 """ -118 return (self.neutral_mass + (Atoms.atomic_masses.get(adduct_atom)) + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge) -119 -120 def _protonated_mz(self, ion_charge): -121 """Get the m/z value of a protonated or deprotonated ion version of the molecular formula. -122 -123 Parameters -124 ---------- -125 ion_charge : int -126 The ion charge. -127 """ -128 return (self.neutral_mass + (ion_charge * Atoms.atomic_masses.get("H")) + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge) -129 -130 def _radical_mz(self, ion_charge): -131 """Get the m/z value of a radical ion version of the molecular formula. -132 -133 Parameters -134 ---------- -135 ion_charge : int -136 The ion charge. -137 """ -138 return (self.neutral_mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge) -139 -140 def _neutral_mass(self): -141 """Get the neutral mass of the molecular formula.""" + 25 + 26 warnings.warn( + 27 f"IsoSpecPy version {isospec_version} is installed, and support is deprecated. Please update to 2.2.2", + 28 DeprecationWarning, + 29 ) + 30 + 31 + 32class MolecularFormulaCalc: + 33 """Class of calculations related to molecular formula + 34 + 35 This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula + 36 + 37 Attributes + 38 ---------- + 39 mz_calc : float + 40 The m/z value of the molecular formula. + 41 neutral_mass : float + 42 The neutral mass of the molecular formula. + 43 ion_charge : int + 44 The ion charge of the molecular formula. + 45 _external_mz : float + 46 The externally provided m/z value of the molecular formula. + 47 _d_molecular_formula : dict + 48 The dictionary representation of the molecular formula. + 49 _mspeak_parent : object + 50 The parent MS peak object associated with the molecular formula. + 51 _assignment_mass_error : float + 52 The mass error of the molecular formula. + 53 + 54 Methods + 55 ------- + 56 * _calc_resolving_power_low_pressure(B, T) + 57 Calculate the resolving power at low pressure. + 58 * _calc_resolving_power_high_pressure(B, T) + 59 Calculate the resolving power at high pressure. + 60 * _adduct_mz(adduct_atom, ion_charge) + 61 Get the m/z value of an adducted ion version of the molecular formula. + 62 * _protonated_mz(ion_charge) + 63 Get the m/z value of a protonated or deprotonated ion version of the molecular formula. + 64 * _radical_mz(ion_charge) + 65 Get the m/z value of a radical ion version of the molecular formula. + 66 * _neutral_mass() + 67 Get the neutral mass of the molecular formula. + 68 * _calc_mz() + 69 Get the m/z value of the molecular formula. + 70 * _calc_assignment_mass_error(method='ppm') + 71 Calculate the mass error of the molecular formula. + 72 * _calc_mz_confidence(mean=0) + 73 Calculate the m/z confidence of the molecular formula. + 74 * _calc_isotopologue_confidence() + 75 Calculate the isotopologue confidence of the molecular formula. + 76 * normalize_distance(dist, dist_range) + 77 Normalize the distance value. + 78 * subtract_formula(formula_obj, formated=True) + 79 Subtract a formula from the current formula object. + 80 * _calc_average_mz_score() + 81 Calculate the average m/z error score of the molecular formula identification, including the isotopologues. + 82 """ + 83 + 84 def _calc_resolving_power_low_pressure(self, B, T): + 85 """ + 86 Calculate the resolving power at low pressure. + 87 + 88 Parameters + 89 ---------- + 90 B : float + 91 Magnetic Strength (Testa). + 92 T : float + 93 Transient time (seconds). + 94 + 95 """ + 96 return (1.274 * 10000000 * B * T) * (1 / self.mz_calc) + 97 + 98 def _calc_resolving_power_high_pressure(self, B, T): + 99 """ +100 Calculate the resolving power at high pressure. +101 +102 Parameters +103 ---------- +104 B : float +105 Magnetic Strength (Testa). +106 T : float +107 Transient time (seconds). +108 +109 """ +110 return (2.758 * 10000000 * B * T) * (1 / self.mz_calc) +111 +112 def _adduct_mz(self, adduct_atom, ion_charge): +113 """Get the m/z value of an adducted ion version of the molecular formula. +114 +115 Parameters +116 ---------- +117 adduct_atom : str +118 The adduct atom. +119 ion_charge : int +120 The ion charge. +121 +122 """ +123 return ( +124 self.neutral_mass +125 + (Atoms.atomic_masses.get(adduct_atom)) +126 + (ion_charge * -1 * Atoms.electron_mass) +127 ) / abs(ion_charge) +128 +129 def _protonated_mz(self, ion_charge): +130 """Get the m/z value of a protonated or deprotonated ion version of the molecular formula. +131 +132 Parameters +133 ---------- +134 ion_charge : int +135 The ion charge. +136 """ +137 return ( +138 self.neutral_mass +139 + (ion_charge * Atoms.atomic_masses.get("H")) +140 + (ion_charge * -1 * Atoms.electron_mass) +141 ) / abs(ion_charge) 142 -143 mass = 0 -144 -145 for each_atom in self._d_molecular_formula.keys() : -146 -147 if each_atom != Labels.ion_type and each_atom != 'HC': -148 -149 try: -150 -151 mass = mass + Atoms.atomic_masses[each_atom] * self._d_molecular_formula.get(each_atom) -152 -153 except: print(Labels.ion_type, each_atom) -154 -155 return mass -156 -157 def _calc_mz(self): -158 """Get the m/z value of the molecular formula, based on the ion charge and ion type. -159 -160 """ -161 -162 if self.ion_charge is not None: -163 -164 if self._external_mz: -165 return self._external_mz -166 -167 else: -168 ion_type = self._d_molecular_formula.get(Labels.ion_type) +143 def _radical_mz(self, ion_charge): +144 """Get the m/z value of a radical ion version of the molecular formula. +145 +146 Parameters +147 ---------- +148 ion_charge : int +149 The ion charge. +150 """ +151 return (self.neutral_mass + (ion_charge * -1 * Atoms.electron_mass)) / abs( +152 ion_charge +153 ) +154 +155 def _neutral_mass(self): +156 """Get the neutral mass of the molecular formula.""" +157 +158 mass = 0 +159 +160 for each_atom in self._d_molecular_formula.keys(): +161 if each_atom != Labels.ion_type and each_atom != "HC": +162 try: +163 mass = mass + Atoms.atomic_masses[ +164 each_atom +165 ] * self._d_molecular_formula.get(each_atom) +166 +167 except: +168 print(Labels.ion_type, each_atom) 169 -170 if ion_type == Labels.protonated_de_ion: -171 return self.protonated_mz -172 -173 elif ion_type == Labels.radical_ion or ion_type == Labels.adduct_ion: -174 return self.radical_mz -175 -176 elif ion_type == Labels.neutral: -177 -178 return self.neutral_mass -179 -180 elif self.ion_charge == 0: -181 -182 return self.neutral_mass -183 -184 else: -185 #formula is probably ion form used for bruker ref list -186 return self.neutral_mass -187 -188 else: -189 -190 raise Exception("Please set ion charge first") -191 -192 def _calc_assignment_mass_error(self, method='ppm'): -193 """Calculate the mass error of the molecular formula, based on the experimental m/z and the calculated m/z. -194 -195 Parameters -196 ---------- -197 method : str, optional -198 The method to calculate the mass error, by default 'ppm', but can be 'ppb' -199 -200 Raises -201 ------ -202 Exception -203 If the method is not 'ppm' or 'ppb'. -204 Exception -205 If there is no ms peak associated with the molecular formula instance. -206 """ -207 -208 if method == 'ppm': -209 multi_factor = 1000000 -210 -211 elif method == 'ppb': -212 multi_factor = 1000000 -213 -214 else: -215 raise Exception("method needs to be ppm or ppb, you have entered %s" % method) -216 -217 if self._mspeak_parent.mz_exp: -218 -219 self._assignment_mass_error = ((self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc) * multi_factor -220 -221 return ((self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc) * multi_factor -222 +170 return mass +171 +172 def _calc_mz(self): +173 """Get the m/z value of the molecular formula, based on the ion charge and ion type.""" +174 +175 if self.ion_charge is not None: +176 if self._external_mz: +177 return self._external_mz +178 +179 else: +180 ion_type = self._d_molecular_formula.get(Labels.ion_type) +181 +182 if ion_type == Labels.protonated_de_ion: +183 return self.protonated_mz +184 +185 elif ion_type == Labels.radical_ion or ion_type == Labels.adduct_ion: +186 return self.radical_mz +187 +188 elif ion_type == Labels.neutral: +189 return self.neutral_mass +190 +191 elif self.ion_charge == 0: +192 return self.neutral_mass +193 +194 else: +195 # formula is probably ion form used for bruker ref list +196 return self.neutral_mass +197 +198 else: +199 raise Exception("Please set ion charge first") +200 +201 def _calc_assignment_mass_error(self, method="ppm"): +202 """Calculate the mass error of the molecular formula, based on the experimental m/z and the calculated m/z. +203 +204 Parameters +205 ---------- +206 method : str, optional +207 The method to calculate the mass error, by default 'ppm', but can be 'ppb' +208 +209 Raises +210 ------ +211 Exception +212 If the method is not 'ppm' or 'ppb'. +213 Exception +214 If there is no ms peak associated with the molecular formula instance. +215 """ +216 +217 if method == "ppm": +218 multi_factor = 1000000 +219 +220 elif method == "ppb": +221 multi_factor = 1000000 +222 223 else: -224 -225 raise Exception("No ms peak associated with the molecular formula instance %s", self) -226 -227 -228 def _calc_mz_confidence(self, mean=0): -229 """Calculate the m/z confidence of the molecular formula, based on the experimental m/z and the calculated m/z. -230 -231 Parameters -232 ---------- -233 mean : int, optional -234 The mean of the m/z error, by default 0 -235 -236 """ -237 -238 # predicted std not set, using 0.3 -239 if not self._mspeak_parent.predicted_std: self._mspeak_parent.predicted_std = 1.66 -240 -241 #print( self._mspeak_parent.predicted_std) -242 -243 return exp(-1 * (power((self.mz_error - mean), 2) / (2 * power(self._mspeak_parent.predicted_std, 2)))) -244 -245 def _calc_isotopologue_confidence(self): -246 """Calculate the isotopologue confidence of the molecular formula, based on the isotopologue similarity. -247 -248 Returns -249 ------- -250 float -251 The isotopologue confidence of the molecular formula. -252 """ -253 -254 if self.is_isotopologue: -255 # confidence of isotopologue is pure mz error -256 # TODO add more features here -257 -258 mformula_index = self.mono_isotopic_formula_index -259 mspeak_index = self.mspeak_index_mono_isotopic -260 -261 mspeak = self._mspeak_parent._ms_parent[mspeak_index] -262 -263 expected_isotopologues = mspeak[mformula_index].expected_isotopologues -264 -265 mono_mz = mspeak[mformula_index].mz_calc -266 mono_abundance = mspeak.abundance -267 -268 else: -269 -270 mono_mz = self.mz_calc -271 mono_abundance = self._mspeak_parent.abundance -272 -273 expected_isotopologues = self.expected_isotopologues -274 # has isotopologues based on current dinamic range -275 -276 if expected_isotopologues: -277 -278 dict_mz_abund_ref = {'mz': [mono_mz], 'abundance': [mono_abundance]} -279 -280 # get reference data -281 for mf in expected_isotopologues: -282 dict_mz_abund_ref['abundance'].append(mf.abundance_calc) -283 dict_mz_abund_ref['mz'].append(mf.mz_calc) -284 -285 dict_mz_abund_exp = {mono_mz: mono_abundance} -286 -287 # get experimental data -288 for mf in expected_isotopologues: -289 -290 # molecular formula has been assigned to a peak -291 if mf._mspeak_parent: -292 #stores mspeak abundance -293 dict_mz_abund_exp[mf.mz_calc] = mf._mspeak_parent.abundance -294 -295 else: -296 # fill missing mz with abundance 0 and mz error score of 0 -297 dict_mz_abund_exp[mf.mz_calc] = nextafter(0, 1) -298 -299 distance = SpectralSimilarity(dict_mz_abund_exp, dict_mz_abund_ref).manhattan_distance() -300 correlation = 1 - self.normalize_distance(distance, [0, 2]) -301 #correlation = dwt_correlation(dict_mz_abund_exp, dict_mz_abund_ref) -302 #correlation = cosine_correlation(dict_mz_abund_exp, dict_mz_abund_ref) -303 -304 if correlation == 1: -305 print(dict_mz_abund_exp, dict_mz_abund_ref) -306 if isnan(correlation): -307 #print(dict_mz_abund_exp, dict_mz_abund_ref) -308 correlation = 0.00001 -309 -310 else: -311 -312 # no isotopologue expected giving a correlation score of 0.0 but it needs optimization -313 correlation = 0.0 -314 -315 return correlation +224 raise Exception( +225 "method needs to be ppm or ppb, you have entered %s" % method +226 ) +227 +228 if self._mspeak_parent.mz_exp: +229 self._assignment_mass_error = ( +230 (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc +231 ) * multi_factor +232 +233 return ( +234 (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc +235 ) * multi_factor +236 +237 else: +238 raise Exception( +239 "No ms peak associated with the molecular formula instance %s", self +240 ) +241 +242 def _calc_mz_confidence(self, mean=0): +243 """Calculate the m/z confidence of the molecular formula, based on the experimental m/z and the calculated m/z. +244 +245 Parameters +246 ---------- +247 mean : int, optional +248 The mean of the m/z error, by default 0 +249 +250 """ +251 +252 # predicted std not set, using 0.3 +253 if not self._mspeak_parent.predicted_std: +254 self._mspeak_parent.predicted_std = 1.66 +255 +256 # print( self._mspeak_parent.predicted_std) +257 +258 return exp( +259 -1 +260 * ( +261 power((self.mz_error - mean), 2) +262 / (2 * power(self._mspeak_parent.predicted_std, 2)) +263 ) +264 ) +265 +266 def _calc_isotopologue_confidence(self): +267 """Calculate the isotopologue confidence of the molecular formula, based on the isotopologue similarity. +268 +269 Returns +270 ------- +271 float +272 The isotopologue confidence of the molecular formula. +273 """ +274 +275 if self.is_isotopologue: +276 # confidence of isotopologue is pure mz error +277 # TODO add more features here +278 +279 mformula_index = self.mono_isotopic_formula_index +280 mspeak_index = self.mspeak_index_mono_isotopic +281 +282 mspeak = self._mspeak_parent._ms_parent[mspeak_index] +283 +284 expected_isotopologues = mspeak[mformula_index].expected_isotopologues +285 +286 mono_mz = mspeak[mformula_index].mz_calc +287 mono_abundance = mspeak.abundance +288 +289 else: +290 mono_mz = self.mz_calc +291 mono_abundance = self._mspeak_parent.abundance +292 +293 expected_isotopologues = self.expected_isotopologues +294 # has isotopologues based on current dinamic range +295 +296 if expected_isotopologues: +297 dict_mz_abund_ref = {"mz": [mono_mz], "abundance": [mono_abundance]} +298 +299 # get reference data +300 for mf in expected_isotopologues: +301 dict_mz_abund_ref["abundance"].append(mf.abundance_calc) +302 dict_mz_abund_ref["mz"].append(mf.mz_calc) +303 +304 dict_mz_abund_exp = {mono_mz: mono_abundance} +305 +306 # get experimental data +307 for mf in expected_isotopologues: +308 # molecular formula has been assigned to a peak +309 if mf._mspeak_parent: +310 # stores mspeak abundance +311 dict_mz_abund_exp[mf.mz_calc] = mf._mspeak_parent.abundance +312 +313 else: +314 # fill missing mz with abundance 0 and mz error score of 0 +315 dict_mz_abund_exp[mf.mz_calc] = nextafter(0, 1) 316 -317 def normalize_distance(self, dist, dist_range): -318 """ -319 Normalize the distance value. -320 -321 Parameters -322 ---------- -323 dist : float -324 The distance value to be normalized. -325 dist_range : list -326 The range of the distance value. -327 -328 """ -329 result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0]) -330 -331 if result < 0: -332 result = 0. -333 elif result > 1: -334 result = 1. +317 distance = SpectralSimilarity( +318 dict_mz_abund_exp, dict_mz_abund_ref +319 ).manhattan_distance() +320 correlation = 1 - self.normalize_distance(distance, [0, 2]) +321 # correlation = dwt_correlation(dict_mz_abund_exp, dict_mz_abund_ref) +322 # correlation = cosine_correlation(dict_mz_abund_exp, dict_mz_abund_ref) +323 +324 if correlation == 1: +325 print(dict_mz_abund_exp, dict_mz_abund_ref) +326 if isnan(correlation): +327 # print(dict_mz_abund_exp, dict_mz_abund_ref) +328 correlation = 0.00001 +329 +330 else: +331 # no isotopologue expected giving a correlation score of 0.0 but it needs optimization +332 correlation = 0.0 +333 +334 return correlation 335 -336 return result -337 -338 def subtract_formula(self, formula_obj, formated=True): -339 """Subtract a formula from the current formula object -340 -341 Parameters -342 ---------- -343 formula_obj : MolecularFormula -344 MolecularFormula object to be subtracted from the current formula object -345 formated : bool, optional -346 If True, returns the formula in string format, by default True -347 -348 """ -349 subtraction = {} -350 for atom, value in self.to_dict().items(): -351 if atom != Labels.ion_type: -352 if formula_obj.get(atom): -353 #value_subtraction = value - formula_obj.get(atom) -354 if value - formula_obj.get(atom) > 0: -355 subtraction[atom] = value - formula_obj.get(atom) -356 else: -357 subtraction[atom] = value -358 if formated: -359 SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉") -360 SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹") -361 else: -362 SUB = str.maketrans("0123456789", "0123456789") -363 SUP = str.maketrans("0123456789", "0123456789") -364 formula_srt = '' -365 for atom in Atoms.atoms_order: -366 if atom in subtraction.keys(): -367 formula_srt += atom.translate(SUP) + str(int(subtraction.get(atom))).translate(SUB) -368 -369 return formula_srt -370 -371 -372 def _calc_average_mz_score(self): -373 """Calculate the average m/z error score of the molecular formula identification, including the isotopologues.""" -374 if self.is_isotopologue: -375 # confidence of isotopologue is pure mz error -376 # TODO add more features here -377 -378 mformula_index = self.mono_isotopic_formula_index -379 mspeak_index = self.mspeak_index_mono_isotopic -380 -381 mspeak = self._mspeak_parent._ms_parent[mspeak_index] -382 -383 expected_isotopologues = mspeak[mformula_index].expected_isotopologues -384 -385 else: -386 -387 expected_isotopologues = self.expected_isotopologues -388 # has isotopologues based on current dinamic range -389 -390 accumulated_mz_score = [self.mz_error_score] -391 -392 if expected_isotopologues: -393 -394 for mf in expected_isotopologues: -395 # molecular formula has been assigned to a peak -396 if mf._mspeak_parent: -397 #stores mspeak abundance -398 accumulated_mz_score.append(mf.mz_error_score) -399 else: -400 # fill missing mz with abundance 0 and mz error score of 0 -401 accumulated_mz_score.append(0.0) +336 def normalize_distance(self, dist, dist_range): +337 """ +338 Normalize the distance value. +339 +340 Parameters +341 ---------- +342 dist : float +343 The distance value to be normalized. +344 dist_range : list +345 The range of the distance value. +346 +347 """ +348 result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0]) +349 +350 if result < 0: +351 result = 0.0 +352 elif result > 1: +353 result = 1.0 +354 +355 return result +356 +357 def subtract_formula(self, formula_obj, formated=True): +358 """Subtract a formula from the current formula object +359 +360 Parameters +361 ---------- +362 formula_obj : MolecularFormula +363 MolecularFormula object to be subtracted from the current formula object +364 formated : bool, optional +365 If True, returns the formula in string format, by default True +366 +367 """ +368 subtraction = {} +369 for atom, value in self.to_dict().items(): +370 if atom != Labels.ion_type: +371 if formula_obj.get(atom): +372 # value_subtraction = value - formula_obj.get(atom) +373 if value - formula_obj.get(atom) > 0: +374 subtraction[atom] = value - formula_obj.get(atom) +375 else: +376 subtraction[atom] = value +377 if formated: +378 SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉") +379 SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹") +380 else: +381 SUB = str.maketrans("0123456789", "0123456789") +382 SUP = str.maketrans("0123456789", "0123456789") +383 formula_srt = "" +384 for atom in Atoms.atoms_order: +385 if atom in subtraction.keys(): +386 formula_srt += atom.translate(SUP) + str( +387 int(subtraction.get(atom)) +388 ).translate(SUB) +389 +390 return formula_srt +391 +392 def _calc_average_mz_score(self): +393 """Calculate the average m/z error score of the molecular formula identification, including the isotopologues.""" +394 if self.is_isotopologue: +395 # confidence of isotopologue is pure mz error +396 # TODO add more features here +397 +398 mformula_index = self.mono_isotopic_formula_index +399 mspeak_index = self.mspeak_index_mono_isotopic +400 +401 mspeak = self._mspeak_parent._ms_parent[mspeak_index] 402 -403 average_mz_score = sum(accumulated_mz_score)/len(accumulated_mz_score) -404 -405 if isnan(average_mz_score): -406 average_mz_score = 0.0 -407 -408 return average_mz_score -409 -410 def _calc_confidence_score(self): -411 """Calculate the confidence score of the molecular formula identification, including the isotopologues.""" -412 -413 ### Assumes random mass error, i.e, spectrum has to be calibrated and with zero mean -414 #### TODO: Add spectral similarity -415 -416 ## Parameters -417 #---------- -418 #### mz_exp: -419 #### Experimental m/z -420 #### predicted_std: -421 #### Standart deviation calculated from Resolving power optimization or constant set by User -422 -423 -424 isotopologue_correlation = self.isotopologue_similarity -425 average_mz_score = self.average_mz_error_score -426 # add monoisotopic peak mz error score -427 -428 # calculate score with higher weight for mass error -429 #score = power(((isotopologue_correlation) * (power(average_mz_score,3))),1/4) -430 a = self._mspeak_parent._ms_parent.molecular_search_settings.mz_error_score_weight -431 b = self._mspeak_parent._ms_parent.molecular_search_settings.isotopologue_score_weight -432 -433 score = (isotopologue_correlation*b) + (average_mz_score*a) -434 -435 #if round(average_mz_score,2) == 0.00: -436 # print(a,b, average_mz_score, isotopologue_correlation, score, isotopologue_correlation*b) -437 -438 -439 return score +403 expected_isotopologues = mspeak[mformula_index].expected_isotopologues +404 +405 else: +406 expected_isotopologues = self.expected_isotopologues +407 # has isotopologues based on current dinamic range +408 +409 accumulated_mz_score = [self.mz_error_score] +410 +411 if expected_isotopologues: +412 for mf in expected_isotopologues: +413 # molecular formula has been assigned to a peak +414 if mf._mspeak_parent: +415 # stores mspeak abundance +416 accumulated_mz_score.append(mf.mz_error_score) +417 else: +418 # fill missing mz with abundance 0 and mz error score of 0 +419 accumulated_mz_score.append(0.0) +420 +421 average_mz_score = sum(accumulated_mz_score) / len(accumulated_mz_score) +422 +423 if isnan(average_mz_score): +424 average_mz_score = 0.0 +425 +426 return average_mz_score +427 +428 def _calc_confidence_score(self): +429 """Calculate the confidence score of the molecular formula identification, including the isotopologues.""" +430 +431 ### Assumes random mass error, i.e, spectrum has to be calibrated and with zero mean +432 #### TODO: Add spectral similarity +433 +434 ## Parameters +435 # ---------- +436 #### mz_exp: +437 #### Experimental m/z +438 #### predicted_std: +439 #### Standart deviation calculated from Resolving power optimization or constant set by User 440 -441 -442 def _calc_abundance_error(self, method='percentile'): -443 """Calculate the abundance error of the molecular formula, based on the experimental abundance and the calculated abundance. -444 -445 Parameters -446 ---------- -447 method : str, optional -448 The method to calculate the abundance error, by default 'percentile', but can be 'ppm' or 'ppb' -449 -450 Returns -451 ------- -452 float -453 The abundance error of the molecular formula. -454 -455 Raises -456 ------ -457 Exception -458 If isotopologues were not calculated. -459 """ -460 -461 mult_factor = 100 -462 -463 iso_abundance = self._mspeak_parent.abundance -464 mono_abundance =self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].abundance -465 -466 if self.prob_ratio: -467 -468 theor_abundance = mono_abundance* self.prob_ratio -469 #self.parent need to have a MassSpecPeak associated with the MolecularFormula class -470 return ((theor_abundance - iso_abundance )/theor_abundance)*mult_factor -471 -472 else: -473 -474 raise Exception("Please calc_isotopologues") +441 isotopologue_correlation = self.isotopologue_similarity +442 average_mz_score = self.average_mz_error_score +443 # add monoisotopic peak mz error score +444 +445 # calculate score with higher weight for mass error +446 # score = power(((isotopologue_correlation) * (power(average_mz_score,3))),1/4) +447 a = self._mspeak_parent._ms_parent.molecular_search_settings.mz_error_score_weight +448 b = self._mspeak_parent._ms_parent.molecular_search_settings.isotopologue_score_weight +449 +450 score = (isotopologue_correlation * b) + (average_mz_score * a) +451 +452 # if round(average_mz_score,2) == 0.00: +453 # print(a,b, average_mz_score, isotopologue_correlation, score, isotopologue_correlation*b) +454 +455 return score +456 +457 def _calc_abundance_error(self, method="percentile"): +458 """Calculate the abundance error of the molecular formula, based on the experimental abundance and the calculated abundance. +459 +460 Parameters +461 ---------- +462 method : str, optional +463 The method to calculate the abundance error, by default 'percentile', but can be 'ppm' or 'ppb' +464 +465 Returns +466 ------- +467 float +468 The abundance error of the molecular formula. +469 +470 Raises +471 ------ +472 Exception +473 If isotopologues were not calculated. +474 """ 475 -476 def _calc_area_error(self, method='percentile'): -477 """Calculate the area error of the molecular formula, based on the experimental area and the calculated area. -478 -479 Parameters -480 ---------- -481 method : str, optional -482 The method to calculate the area error, by default 'percentile', but can be 'ppm' or 'ppb' -483 -484 Returns -485 ------- -486 float -487 The area error of the molecular formula. -488 -489 Raises -490 ------ -491 Exception -492 If isotopologues were not calculated. -493 """ -494 -495 mult_factor = 100 -496 -497 iso_area = self._mspeak_parent.area -498 mono_area =self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].area -499 -500 if self.prob_ratio: -501 -502 if mono_area and iso_area: +476 mult_factor = 100 +477 +478 iso_abundance = self._mspeak_parent.abundance +479 mono_abundance = self._mspeak_parent._ms_parent[ +480 self.mspeak_index_mono_isotopic +481 ].abundance +482 +483 if self.prob_ratio: +484 theor_abundance = mono_abundance * self.prob_ratio +485 # self.parent need to have a MassSpecPeak associated with the MolecularFormula class +486 return ((theor_abundance - iso_abundance) / theor_abundance) * mult_factor +487 +488 else: +489 raise Exception("Please calc_isotopologues") +490 +491 def _calc_area_error(self, method="percentile"): +492 """Calculate the area error of the molecular formula, based on the experimental area and the calculated area. +493 +494 Parameters +495 ---------- +496 method : str, optional +497 The method to calculate the area error, by default 'percentile', but can be 'ppm' or 'ppb' +498 +499 Returns +500 ------- +501 float +502 The area error of the molecular formula. 503 -504 #exp_ratio = iso_area/mono_area -505 -506 area_calc = mono_area* self.prob_ratio -507 -508 #self.parent need to have a MassSpecPeak associated with the MolecularFormula class -509 return ((area_calc - iso_area )/area_calc)*mult_factor -510 #return ((self.prob_ratio - exp_ratio )/self.prob_ratio)*mult_factor -511 -512 else: -513 -514 #centroid mass spectrum -515 return 0 -516 else: -517 -518 raise Exception("Please calc_isotopologues") -519 -520 def _calc_aromaticity_index_mod(self): -521 """Calculate the modified aromaticity index of the molecular formula. -522 -523 Returns -524 ------- -525 float -526 The aromaticity index of the molecular formula. -527 -528 Notes -529 ----- -530 Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386 -531 corrected in https://doi.org/10.1002/rcm.7433 -532 """ -533 # Prepare empty dictionary to store the number of atoms of each element -534 ai_es = {'C':0, 'H':0, 'O':0, 'N':0, 'S':0} -535 -536 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes -537 for element in ai_es: -538 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] -539 for element_w_iso in elements_w_iso: -540 if element_w_iso in self._d_molecular_formula: -541 ai_es[element] += self._d_molecular_formula[element_w_iso] -542 -543 ai_n = 1 + ai_es['C'] - (0.5 * ai_es['O']) - ai_es['S'] - (0.5 * (ai_es['N'] + ai_es['H'])) -544 ai_d = ai_es['C'] - (0.5 * ai_es['O']) - ai_es['N'] - ai_es['S'] -545 -546 ai = ai_n/ai_d -547 -548 if ai < 0: -549 ai = 0 -550 if ai > 1: -551 ai = 1 -552 -553 return ai -554 -555 def _calc_aromaticity_index(self): -556 """Calculate the aromaticity index of the molecular formula. -557 -558 Returns -559 ------- -560 float -561 The aromaticity index of the molecular formula. +504 Raises +505 ------ +506 Exception +507 If isotopologues were not calculated. +508 """ +509 +510 mult_factor = 100 +511 +512 iso_area = self._mspeak_parent.area +513 mono_area = self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].area +514 +515 if self.prob_ratio: +516 if mono_area and iso_area: +517 # exp_ratio = iso_area/mono_area +518 +519 area_calc = mono_area * self.prob_ratio +520 +521 # self.parent need to have a MassSpecPeak associated with the MolecularFormula class +522 return ((area_calc - iso_area) / area_calc) * mult_factor +523 # return ((self.prob_ratio - exp_ratio )/self.prob_ratio)*mult_factor +524 +525 else: +526 # centroid mass spectrum +527 return 0 +528 else: +529 raise Exception("Please calc_isotopologues") +530 +531 def _calc_aromaticity_index_mod(self): +532 """Calculate the modified aromaticity index of the molecular formula. +533 +534 Returns +535 ------- +536 float +537 The aromaticity index of the molecular formula. +538 +539 Notes +540 ----- +541 Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386 +542 corrected in https://doi.org/10.1002/rcm.7433 +543 """ +544 # Prepare empty dictionary to store the number of atoms of each element +545 ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0} +546 +547 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes +548 for element in ai_es: +549 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] +550 for element_w_iso in elements_w_iso: +551 if element_w_iso in self._d_molecular_formula: +552 ai_es[element] += self._d_molecular_formula[element_w_iso] +553 +554 ai_n = ( +555 1 +556 + ai_es["C"] +557 - (0.5 * ai_es["O"]) +558 - ai_es["S"] +559 - (0.5 * (ai_es["N"] + ai_es["H"])) +560 ) +561 ai_d = ai_es["C"] - (0.5 * ai_es["O"]) - ai_es["N"] - ai_es["S"] 562 -563 Notes -564 ----- -565 Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386 -566 corrected in https://doi.org/10.1002/rcm.7433 -567 """ -568 # Prepare empty dictionary to store the number of atoms of each element -569 ai_es = {'C':0, 'H':0, 'O':0, 'N':0, 'S':0} -570 -571 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes -572 for element in ai_es: -573 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] -574 for element_w_iso in elements_w_iso: -575 if element_w_iso in self._d_molecular_formula: -576 ai_es[element] += self._d_molecular_formula[element_w_iso] -577 -578 ai_n = 1 + ai_es['C'] - (ai_es['O']) - ai_es['S'] - (0.5 * (ai_es['N'] + ai_es['H'])) -579 ai_d = ai_es['C'] - (ai_es['O']) - ai_es['N'] - ai_es['S'] -580 -581 ai = ai_n/ai_d -582 -583 if ai < 0: -584 ai = 0 -585 if ai > 1: -586 ai = 1 +563 ai = ai_n / ai_d +564 +565 if ai < 0: +566 ai = 0 +567 if ai > 1: +568 ai = 1 +569 +570 return ai +571 +572 def _calc_aromaticity_index(self): +573 """Calculate the aromaticity index of the molecular formula. +574 +575 Returns +576 ------- +577 float +578 The aromaticity index of the molecular formula. +579 +580 Notes +581 ----- +582 Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386 +583 corrected in https://doi.org/10.1002/rcm.7433 +584 """ +585 # Prepare empty dictionary to store the number of atoms of each element +586 ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0} 587 -588 return ai -589 -590 def _calc_nosc(self): -591 """Calculate the average nominal oxidation state of carbon -592 -593 Returns -594 ------- -595 float -596 The average nominal oxidation state of carbon -597 -598 Notes -599 ----- -600 Source LaRowe and Van Cappellen, 2011 https://doi.org/10.1016/j.gca.2011.01.020 -601 """ -602 # Prepare empty dictionary to store the number of atoms of each element -603 nosc_es = {'C':0, 'H':0, 'O':0, 'N':0, 'S':0, 'P':0} -604 -605 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes -606 for element in nosc_es: -607 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] -608 for element_w_iso in elements_w_iso: -609 if element_w_iso in self._d_molecular_formula: -610 nosc_es[element] += self._d_molecular_formula[element_w_iso] -611 -612 nosc = -( (4 * nosc_es['C'] + nosc_es['H'] - 3 * nosc_es['N'] - 2 * nosc_es['O'] + 5 * nosc_es['P'] - 2 * nosc_es['S']) / nosc_es['C']) + 4 -613 -614 # If nosc is infinite or negative infinity, set it to nan -615 if nosc == float('inf') or nosc == float('-inf'): -616 nosc = float('nan') -617 -618 return nosc -619 -620 @property -621 def dbe_ai(self): -622 """Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens.""" -623 -624 carbons = self._d_molecular_formula.get('C') -625 hydrogens = self._d_molecular_formula.get('H') -626 oxygens = self._d_molecular_formula.get('O') -627 return 1 + (((2*carbons) - hydrogens - (2*oxygens))*0.5) -628 -629 def _calc_dbe(self): -630 """Calculate the double bond equivalent (DBE) of the molecular formula""" -631 -632 individual_dbe = 0 -633 -634 for atom in self._d_molecular_formula.keys(): -635 -636 if atom != Labels.ion_type: -637 -638 n_atom = int(self._d_molecular_formula.get(atom)) -639 -640 clean_atom = ''.join([i for i in atom if not i.isdigit()]) -641 -642 if self._mspeak_parent: -643 valencia = self._mspeak_parent._ms_parent.molecular_search_settings.used_atom_valences.get(clean_atom) -644 else: -645 valencia = MSParameters.molecular_search.used_atom_valences.get(clean_atom) -646 #valencia = Atoms.atoms_covalence.get(atom) -647 -648 if type(valencia) is tuple: -649 valencia = valencia[0] -650 if valencia > 0: -651 #print atom, valencia, n_atom, individual_dbe -652 individual_dbe = individual_dbe + (n_atom * (valencia - 2)) -653 else: -654 continue -655 -656 dbe = 1 + (0.5 * individual_dbe) -657 -658 if self.ion_type == Labels.adduct_ion: -659 dbe = dbe + 0.5 -660 -661 return dbe -662 -663 def _calc_kmd(self, dict_base): -664 """Calculate the Kendrick mass defect (KMD) of the molecular formula, based on the monoisotopic mass and the Kendrick mass. -665 -666 Parameters -667 ---------- -668 dict_base : dict -669 The dictionary of the base formula, e.g. {'C':1, 'H':2} -670 -671 Returns -672 ------- -673 tuple -674 The tuple of the KMD, Kendrick mass, and nominal Kendrick mass. -675 """ -676 mass = 0 -677 for atom in dict_base.keys(): -678 mass = mass + Atoms.atomic_masses.get(atom) * dict_base.get(atom) -679 -680 kendrick_mass = (int(mass)/mass)* self.mz_calc -681 -682 nominal_km =int(kendrick_mass) -683 -684 kmd = (nominal_km - kendrick_mass) * 100 -685 -686 #kmd = (nominal_km - km) * 1 -687 kmd = round(kmd,0) -688 -689 return kmd, kendrick_mass, nominal_km -690 -691 def _cal_isotopologues(self, formula_dict, min_abundance, current_abundance, ms_dynamic_range): -692 """Calculate the isotopologues for a given molecular formula. +588 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes +589 for element in ai_es: +590 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] +591 for element_w_iso in elements_w_iso: +592 if element_w_iso in self._d_molecular_formula: +593 ai_es[element] += self._d_molecular_formula[element_w_iso] +594 +595 ai_n = ( +596 1 +597 + ai_es["C"] +598 - (ai_es["O"]) +599 - ai_es["S"] +600 - (0.5 * (ai_es["N"] + ai_es["H"])) +601 ) +602 ai_d = ai_es["C"] - (ai_es["O"]) - ai_es["N"] - ai_es["S"] +603 +604 ai = ai_n / ai_d +605 +606 if ai < 0: +607 ai = 0 +608 if ai > 1: +609 ai = 1 +610 +611 return ai +612 +613 def _calc_nosc(self): +614 """Calculate the average nominal oxidation state of carbon +615 +616 Returns +617 ------- +618 float +619 The average nominal oxidation state of carbon +620 +621 Notes +622 ----- +623 Source LaRowe and Van Cappellen, 2011 https://doi.org/10.1016/j.gca.2011.01.020 +624 """ +625 # Prepare empty dictionary to store the number of atoms of each element +626 nosc_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0, "P": 0} +627 +628 # Count the number of atoms of each element in the molecular formula, inclusive of isotopes +629 for element in nosc_es: +630 elements_w_iso = [element] + Atoms.isotopes.get(element)[1] +631 for element_w_iso in elements_w_iso: +632 if element_w_iso in self._d_molecular_formula: +633 nosc_es[element] += self._d_molecular_formula[element_w_iso] +634 +635 nosc = ( +636 -( +637 ( +638 4 * nosc_es["C"] +639 + nosc_es["H"] +640 - 3 * nosc_es["N"] +641 - 2 * nosc_es["O"] +642 + 5 * nosc_es["P"] +643 - 2 * nosc_es["S"] +644 ) +645 / nosc_es["C"] +646 ) +647 + 4 +648 ) +649 +650 # If nosc is infinite or negative infinity, set it to nan +651 if nosc == float("inf") or nosc == float("-inf"): +652 nosc = float("nan") +653 +654 return nosc +655 +656 @property +657 def dbe_ai(self): +658 """Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens.""" +659 +660 carbons = self._d_molecular_formula.get("C") +661 hydrogens = self._d_molecular_formula.get("H") +662 oxygens = self._d_molecular_formula.get("O") +663 return 1 + (((2 * carbons) - hydrogens - (2 * oxygens)) * 0.5) +664 +665 def _calc_dbe(self): +666 """Calculate the double bond equivalent (DBE) of the molecular formula""" +667 +668 individual_dbe = 0 +669 +670 for atom in self._d_molecular_formula.keys(): +671 if atom != Labels.ion_type: +672 n_atom = int(self._d_molecular_formula.get(atom)) +673 +674 clean_atom = "".join([i for i in atom if not i.isdigit()]) +675 +676 if self._mspeak_parent: +677 valencia = self._mspeak_parent._ms_parent.molecular_search_settings.used_atom_valences.get( +678 clean_atom +679 ) +680 else: +681 valencia = MSParameters.molecular_search.used_atom_valences.get( +682 clean_atom +683 ) +684 # valencia = Atoms.atoms_covalence.get(atom) +685 +686 if type(valencia) is tuple: +687 valencia = valencia[0] +688 if valencia > 0: +689 # print atom, valencia, n_atom, individual_dbe +690 individual_dbe = individual_dbe + (n_atom * (valencia - 2)) +691 else: +692 continue 693 -694 Parameters -695 ---------- -696 formula_dict : dict -697 The dictionary of the molecular formula. Example: {'C':10, 'H', 20, 'O', 2} -698 min_abundance : float -699 The minimum abundance. -700 current_abundance : float -701 The current monoisotopic abundance. -702 ms_dynamic_range : float -703 The dynamic range. -704 -705 -706 Notes -707 ----- -708 This is the primary function to look for isotopologues based on a monoisotopic molecular formula. -709 It needs to be expanded to include the calculation of resolving power and plot the results. -710 Use this function at runtime during the molecular identification algorithm only when a positive ID is observed to the monoisotopic ion. -711 Use this function to simulate mass spectrum (needs resolving power calculation to be fully operational). -712 It might break when adding non-conventional atoms (not yet tested). -713 This function employs the IsoSpecPy library https://github.com/MatteoLacki/IsoSpec. -714 -715 -716 """ -717 -718 #last update on 05-26-2020, Yuri E. Corilo +694 dbe = 1 + (0.5 * individual_dbe) +695 +696 if self.ion_type == Labels.adduct_ion: +697 dbe = dbe + 0.5 +698 +699 return dbe +700 +701 def _calc_kmd(self, dict_base): +702 """Calculate the Kendrick mass defect (KMD) of the molecular formula, based on the monoisotopic mass and the Kendrick mass. +703 +704 Parameters +705 ---------- +706 dict_base : dict +707 The dictionary of the base formula, e.g. {'C':1, 'H':2} +708 +709 Returns +710 ------- +711 tuple +712 The tuple of the KMD, Kendrick mass, and nominal Kendrick mass. +713 """ +714 mass = 0 +715 for atom in dict_base.keys(): +716 mass = mass + Atoms.atomic_masses.get(atom) * dict_base.get(atom) +717 +718 kendrick_mass = (int(mass) / mass) * self.mz_calc 719 -720 # updated it to reflect min possible mass peak abundance -721 cut_off_to_IsoSpeccPy = 1-(1/ms_dynamic_range) -722 -723 #print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range) -724 #print(cut_off_to_IsoSpeccPy) -725 atoms_labels = (atom for atom in formula_dict.keys() if atom != Labels.ion_type and atom != 'H') -726 -727 atoms_count = [] -728 masses_list_tuples = [] -729 props_list_tuples = [] -730 all_atoms_list = [] -731 -732 for atom_label in atoms_labels: -733 -734 if Atoms.isotopes.get(atom_label)[1][0] is None: -735 'This atom_label has no heavy isotope' -736 atoms_count.append(formula_dict.get(atom_label)) -737 mass = Atoms.atomic_masses.get(atom_label) -738 prop = Atoms.isotopic_abundance.get(atom_label) -739 masses_list_tuples.append([mass]) -740 props_list_tuples.append([prop]) -741 all_atoms_list.append(atom_label) -742 -743 else: -744 -745 isotopes_label_list = Atoms.isotopes.get(atom_label)[1] -746 -747 if len(isotopes_label_list) > 1: -748 'This atom_label has two or more heavy isotope' -749 isotopos_labels = [i for i in isotopes_label_list] -750 else: -751 'This atom_label only has one heavy isotope' -752 isotopos_labels = [isotopes_label_list[0]] -753 -754 #all_atoms_list.extend(isotopos_labels) -755 isotopos_labels = [atom_label] + isotopos_labels -756 -757 all_atoms_list.extend(isotopos_labels) -758 -759 masses = [Atoms.atomic_masses.get(atom_label) for atom_label in isotopos_labels] -760 props = [Atoms.isotopic_abundance.get(atom_label) for atom_label in isotopos_labels] -761 -762 atoms_count.append(formula_dict.get(atom_label)) -763 masses_list_tuples.append(masses) -764 props_list_tuples.append(props) -765 if legacy_isospec: -766 iso = IsoSpecPy.IsoSpec(atoms_count,masses_list_tuples,props_list_tuples, cut_off_to_IsoSpeccPy) -767 conf = iso.getConfs() -768 masses = conf[0] -769 probs = exp(conf[1]) -770 molecular_formulas = conf[2] -771 #print('conf', conf) -772 #print('probs', conf[1]) -773 else: -774 # This syntax in IsoSpecPy 2.2.2 yields the same information as the legacy approach -775 iso = IsoSpecPy.IsoTotalProb(atomCounts = atoms_count, isotopeMasses = masses_list_tuples, -776 isotopeProbabilities = props_list_tuples, prob_to_cover =cut_off_to_IsoSpeccPy, get_confs=True) -777 masses = list(iso.masses) -778 probs = array(list(iso.probs)) -779 confs = list(iso.confs) -780 -781 molecular_formulas = [] -782 for x in confs: -783 tmplist = [] -784 for y in x: -785 tmplist.extend(list(y)) -786 molecular_formulas.append(tmplist) -787 +720 nominal_km = int(kendrick_mass) +721 +722 kmd = (nominal_km - kendrick_mass) * 100 +723 +724 # kmd = (nominal_km - km) * 1 +725 kmd = round(kmd, 0) +726 +727 return kmd, kendrick_mass, nominal_km +728 +729 def _cal_isotopologues( +730 self, formula_dict, min_abundance, current_abundance, ms_dynamic_range +731 ): +732 """Calculate the isotopologues for a given molecular formula. +733 +734 Parameters +735 ---------- +736 formula_dict : dict +737 The dictionary of the molecular formula. Example: {'C':10, 'H', 20, 'O', 2} +738 min_abundance : float +739 The minimum abundance. +740 current_abundance : float +741 The current monoisotopic abundance. +742 ms_dynamic_range : float +743 The dynamic range. +744 +745 +746 Notes +747 ----- +748 This is the primary function to look for isotopologues based on a monoisotopic molecular formula. +749 It needs to be expanded to include the calculation of resolving power and plot the results. +750 Use this function at runtime during the molecular identification algorithm only when a positive ID is observed to the monoisotopic ion. +751 Use this function to simulate mass spectrum (needs resolving power calculation to be fully operational). +752 It might break when adding non-conventional atoms (not yet tested). +753 This function employs the IsoSpecPy library https://github.com/MatteoLacki/IsoSpec. +754 +755 +756 """ +757 +758 # last update on 05-26-2020, Yuri E. Corilo +759 +760 # updated it to reflect min possible mass peak abundance +761 cut_off_to_IsoSpeccPy = 1 - (1 / ms_dynamic_range) +762 +763 # print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range) +764 # print(cut_off_to_IsoSpeccPy) +765 atoms_labels = ( +766 atom +767 for atom in formula_dict.keys() +768 if atom != Labels.ion_type and atom != "H" +769 ) +770 +771 atoms_count = [] +772 masses_list_tuples = [] +773 props_list_tuples = [] +774 all_atoms_list = [] +775 +776 for atom_label in atoms_labels: +777 if Atoms.isotopes.get(atom_label)[1][0] is None: +778 "This atom_label has no heavy isotope" +779 atoms_count.append(formula_dict.get(atom_label)) +780 mass = Atoms.atomic_masses.get(atom_label) +781 prop = Atoms.isotopic_abundance.get(atom_label) +782 masses_list_tuples.append([mass]) +783 props_list_tuples.append([prop]) +784 all_atoms_list.append(atom_label) +785 +786 else: +787 isotopes_label_list = Atoms.isotopes.get(atom_label)[1] 788 -789 -790 -791 new_formulas = [] -792 -793 for isotopologue_index in range(len(iso)): -794 #skip_mono_isotopic -795 -796 formula_list = molecular_formulas[isotopologue_index] -797 new_formula_dict = dict(zip(all_atoms_list, formula_list)) -798 new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type) -799 if formula_dict.get('H'): -800 new_formula_dict['H'] = formula_dict.get('H') -801 -802 new_formulas.append({x:y for x,y in new_formula_dict.items() if y!=0}) -803 -804 # formula_dict in new_formulas check if monoisotopic is being returned -805 if new_formulas:# and formula_dict in new_formulas: -806 -807 #print(conf) -808 #print(new_formulas) -809 #print(atoms_count) -810 #print(all_atoms_list) -811 #print(masses_list_tuples) -812 #print(props_list_tuples) -813 # find where monoisotopic is -814 index_mono = new_formulas.index(formula_dict) -815 # calculate ratio iso/mono -816 probs = list(probs/probs[index_mono]) -817 -818 # delete the monoisotopic -819 del probs[index_mono] -820 del new_formulas[index_mono] -821 -822 #print('probs_exp', probs) -823 for formulas, prob in zip(new_formulas, probs): -824 -825 theor_abundance = current_abundance* prob -826 if theor_abundance > min_abundance: -827 #print(prob, theor_abundance, current_abundance) -828 yield (formulas, prob) -829 #return zip(new_formulas, probs ) -830 -831 #else: -832 # return [] -833 +789 if len(isotopes_label_list) > 1: +790 "This atom_label has two or more heavy isotope" +791 isotopos_labels = [i for i in isotopes_label_list] +792 else: +793 "This atom_label only has one heavy isotope" +794 isotopos_labels = [isotopes_label_list[0]] +795 +796 # all_atoms_list.extend(isotopos_labels) +797 isotopos_labels = [atom_label] + isotopos_labels +798 +799 all_atoms_list.extend(isotopos_labels) +800 +801 masses = [ +802 Atoms.atomic_masses.get(atom_label) +803 for atom_label in isotopos_labels +804 ] +805 props = [ +806 Atoms.isotopic_abundance.get(atom_label) +807 for atom_label in isotopos_labels +808 ] +809 +810 atoms_count.append(formula_dict.get(atom_label)) +811 masses_list_tuples.append(masses) +812 props_list_tuples.append(props) +813 if legacy_isospec: +814 iso = IsoSpecPy.IsoSpec( +815 atoms_count, +816 masses_list_tuples, +817 props_list_tuples, +818 cut_off_to_IsoSpeccPy, +819 ) +820 conf = iso.getConfs() +821 masses = conf[0] +822 probs = exp(conf[1]) +823 molecular_formulas = conf[2] +824 # print('conf', conf) +825 # print('probs', conf[1]) +826 else: +827 # This syntax in IsoSpecPy 2.2.2 yields the same information as the legacy approach +828 iso = IsoSpecPy.IsoTotalProb( +829 atomCounts=atoms_count, +830 isotopeMasses=masses_list_tuples, +831 isotopeProbabilities=props_list_tuples, +832 prob_to_cover=cut_off_to_IsoSpeccPy, +833 get_confs=True, +834 ) +835 masses = list(iso.masses) +836 probs = array(list(iso.probs)) +837 confs = list(iso.confs) +838 +839 molecular_formulas = [] +840 for x in confs: +841 tmplist = [] +842 for y in x: +843 tmplist.extend(list(y)) +844 molecular_formulas.append(tmplist) +845 +846 new_formulas = [] +847 +848 for isotopologue_index in range(len(iso)): +849 # skip_mono_isotopic +850 +851 formula_list = molecular_formulas[isotopologue_index] +852 new_formula_dict = dict(zip(all_atoms_list, formula_list)) +853 new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type) +854 if formula_dict.get("H"): +855 new_formula_dict["H"] = formula_dict.get("H") +856 +857 new_formulas.append({x: y for x, y in new_formula_dict.items() if y != 0}) +858 +859 # formula_dict in new_formulas check if monoisotopic is being returned +860 if new_formulas: # and formula_dict in new_formulas: +861 # print(conf) +862 # print(new_formulas) +863 # print(atoms_count) +864 # print(all_atoms_list) +865 # print(masses_list_tuples) +866 # print(props_list_tuples) +867 # find where monoisotopic is +868 index_mono = new_formulas.index(formula_dict) +869 # calculate ratio iso/mono +870 probs = list(probs / probs[index_mono]) +871 +872 # delete the monoisotopic +873 del probs[index_mono] +874 del new_formulas[index_mono] +875 +876 # print('probs_exp', probs) +877 for formulas, prob in zip(new_formulas, probs): +878 theor_abundance = current_abundance * prob +879 if theor_abundance > min_abundance: +880 # print(prob, theor_abundance, current_abundance) +881 yield (formulas, prob) +882 # return zip(new_formulas, probs ) +883 +884 # else: +885 # return []

    @@ -929,812 +981,860 @@

    -
     28class MolecularFormulaCalc:
    - 29    """Class of calculations related to molecular formula
    - 30
    - 31    This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula
    - 32    
    - 33    Attributes
    - 34    ----------
    - 35    mz_calc : float
    - 36        The m/z value of the molecular formula.
    - 37    neutral_mass : float
    - 38        The neutral mass of the molecular formula.
    - 39    ion_charge : int
    - 40        The ion charge of the molecular formula.
    - 41    _external_mz : float
    - 42        The externally provided m/z value of the molecular formula.
    - 43    _d_molecular_formula : dict
    - 44        The dictionary representation of the molecular formula.
    - 45    _mspeak_parent : object
    - 46        The parent MS peak object associated with the molecular formula.
    - 47    _assignment_mass_error : float
    - 48        The mass error of the molecular formula.
    - 49    
    - 50    Methods
    - 51    -------
    - 52    * _calc_resolving_power_low_pressure(B, T) 
    - 53        Calculate the resolving power at low pressure.
    - 54    * _calc_resolving_power_high_pressure(B, T)
    - 55        Calculate the resolving power at high pressure.
    - 56    * _adduct_mz(adduct_atom, ion_charge)
    - 57        Get the m/z value of an adducted ion version of the molecular formula.
    - 58    * _protonated_mz(ion_charge)
    - 59        Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
    - 60    * _radical_mz(ion_charge)
    - 61        Get the m/z value of a radical ion version of the molecular formula.
    - 62    * _neutral_mass()
    - 63        Get the neutral mass of the molecular formula.
    - 64    * _calc_mz()
    - 65        Get the m/z value of the molecular formula.
    - 66    * _calc_assignment_mass_error(method='ppm')
    - 67        Calculate the mass error of the molecular formula.
    - 68    * _calc_mz_confidence(mean=0)
    - 69        Calculate the m/z confidence of the molecular formula.
    - 70    * _calc_isotopologue_confidence()
    - 71        Calculate the isotopologue confidence of the molecular formula.
    - 72    * normalize_distance(dist, dist_range)
    - 73        Normalize the distance value.
    - 74    * subtract_formula(formula_obj, formated=True)
    - 75        Subtract a formula from the current formula object.
    - 76    * _calc_average_mz_score()
    - 77        Calculate the average m/z error score of the molecular formula identification, including the isotopologues.
    - 78    """
    - 79    
    - 80    def _calc_resolving_power_low_pressure(self, B, T):
    - 81        '''
    - 82        Calculate the resolving power at low pressure.
    - 83
    - 84        Parameters
    - 85        ----------
    - 86        B : float
    - 87            Magnetic Strength (Testa).
    - 88        T : float
    - 89            Transient time (seconds).
    - 90
    - 91        '''
    - 92        return (1.274 * 10000000 * B * T) * (1 / self.mz_calc)    
    - 93
    - 94    def _calc_resolving_power_high_pressure(self, B, T):
    - 95        '''
    - 96        Calculate the resolving power at high pressure.
    - 97
    - 98        Parameters
    - 99        ----------
    -100        B : float
    -101            Magnetic Strength (Testa).
    -102        T : float
    -103            Transient time (seconds).
    -104
    -105        '''
    -106        return (2.758 * 10000000 * B * T) * (1 / self.mz_calc)    
    -107
    -108    def _adduct_mz(self, adduct_atom, ion_charge):
    -109        """Get the m/z value of an adducted ion version of the molecular formula.
    -110        
    -111        Parameters
    -112        ----------
    -113        adduct_atom : str
    -114            The adduct atom.
    -115        ion_charge : int
    -116            The ion charge.
    -117            
    -118        """
    -119        return (self.neutral_mass + (Atoms.atomic_masses.get(adduct_atom)) + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge)
    -120
    -121    def _protonated_mz(self, ion_charge):
    -122        """Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
    -123        
    -124        Parameters
    -125        ----------
    -126        ion_charge : int
    -127            The ion charge.
    -128        """
    -129        return (self.neutral_mass + (ion_charge * Atoms.atomic_masses.get("H")) + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge)
    -130
    -131    def _radical_mz(self, ion_charge):
    -132        """Get the m/z value of a radical ion version of the molecular formula.
    -133
    -134        Parameters
    -135        ----------
    -136        ion_charge : int
    -137            The ion charge.
    -138        """    
    -139        return (self.neutral_mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge)
    -140
    -141    def _neutral_mass(self):
    -142        """Get the neutral mass of the molecular formula."""
    +            
     33class MolecularFormulaCalc:
    + 34    """Class of calculations related to molecular formula
    + 35
    + 36    This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula
    + 37
    + 38    Attributes
    + 39    ----------
    + 40    mz_calc : float
    + 41        The m/z value of the molecular formula.
    + 42    neutral_mass : float
    + 43        The neutral mass of the molecular formula.
    + 44    ion_charge : int
    + 45        The ion charge of the molecular formula.
    + 46    _external_mz : float
    + 47        The externally provided m/z value of the molecular formula.
    + 48    _d_molecular_formula : dict
    + 49        The dictionary representation of the molecular formula.
    + 50    _mspeak_parent : object
    + 51        The parent MS peak object associated with the molecular formula.
    + 52    _assignment_mass_error : float
    + 53        The mass error of the molecular formula.
    + 54
    + 55    Methods
    + 56    -------
    + 57    * _calc_resolving_power_low_pressure(B, T)
    + 58        Calculate the resolving power at low pressure.
    + 59    * _calc_resolving_power_high_pressure(B, T)
    + 60        Calculate the resolving power at high pressure.
    + 61    * _adduct_mz(adduct_atom, ion_charge)
    + 62        Get the m/z value of an adducted ion version of the molecular formula.
    + 63    * _protonated_mz(ion_charge)
    + 64        Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
    + 65    * _radical_mz(ion_charge)
    + 66        Get the m/z value of a radical ion version of the molecular formula.
    + 67    * _neutral_mass()
    + 68        Get the neutral mass of the molecular formula.
    + 69    * _calc_mz()
    + 70        Get the m/z value of the molecular formula.
    + 71    * _calc_assignment_mass_error(method='ppm')
    + 72        Calculate the mass error of the molecular formula.
    + 73    * _calc_mz_confidence(mean=0)
    + 74        Calculate the m/z confidence of the molecular formula.
    + 75    * _calc_isotopologue_confidence()
    + 76        Calculate the isotopologue confidence of the molecular formula.
    + 77    * normalize_distance(dist, dist_range)
    + 78        Normalize the distance value.
    + 79    * subtract_formula(formula_obj, formated=True)
    + 80        Subtract a formula from the current formula object.
    + 81    * _calc_average_mz_score()
    + 82        Calculate the average m/z error score of the molecular formula identification, including the isotopologues.
    + 83    """
    + 84
    + 85    def _calc_resolving_power_low_pressure(self, B, T):
    + 86        """
    + 87        Calculate the resolving power at low pressure.
    + 88
    + 89        Parameters
    + 90        ----------
    + 91        B : float
    + 92            Magnetic Strength (Testa).
    + 93        T : float
    + 94            Transient time (seconds).
    + 95
    + 96        """
    + 97        return (1.274 * 10000000 * B * T) * (1 / self.mz_calc)
    + 98
    + 99    def _calc_resolving_power_high_pressure(self, B, T):
    +100        """
    +101        Calculate the resolving power at high pressure.
    +102
    +103        Parameters
    +104        ----------
    +105        B : float
    +106            Magnetic Strength (Testa).
    +107        T : float
    +108            Transient time (seconds).
    +109
    +110        """
    +111        return (2.758 * 10000000 * B * T) * (1 / self.mz_calc)
    +112
    +113    def _adduct_mz(self, adduct_atom, ion_charge):
    +114        """Get the m/z value of an adducted ion version of the molecular formula.
    +115
    +116        Parameters
    +117        ----------
    +118        adduct_atom : str
    +119            The adduct atom.
    +120        ion_charge : int
    +121            The ion charge.
    +122
    +123        """
    +124        return (
    +125            self.neutral_mass
    +126            + (Atoms.atomic_masses.get(adduct_atom))
    +127            + (ion_charge * -1 * Atoms.electron_mass)
    +128        ) / abs(ion_charge)
    +129
    +130    def _protonated_mz(self, ion_charge):
    +131        """Get the m/z value of a protonated or deprotonated ion version of the molecular formula.
    +132
    +133        Parameters
    +134        ----------
    +135        ion_charge : int
    +136            The ion charge.
    +137        """
    +138        return (
    +139            self.neutral_mass
    +140            + (ion_charge * Atoms.atomic_masses.get("H"))
    +141            + (ion_charge * -1 * Atoms.electron_mass)
    +142        ) / abs(ion_charge)
     143
    -144        mass = 0
    -145
    -146        for each_atom in self._d_molecular_formula.keys() :
    -147                
    -148            if each_atom != Labels.ion_type and each_atom != 'HC':
    -149                
    -150                try:
    -151                
    -152                    mass = mass + Atoms.atomic_masses[each_atom] * self._d_molecular_formula.get(each_atom)
    -153                
    -154                except: print(Labels.ion_type, each_atom) 
    -155        
    -156        return mass
    -157
    -158    def _calc_mz(self):
    -159        """Get the m/z value of the molecular formula, based on the ion charge and ion type.
    -160        
    -161        """
    -162        
    -163        if self.ion_charge is not None:
    -164            
    -165            if self._external_mz:
    -166                return self._external_mz
    -167            
    -168            else:
    -169                ion_type = self._d_molecular_formula.get(Labels.ion_type)
    +144    def _radical_mz(self, ion_charge):
    +145        """Get the m/z value of a radical ion version of the molecular formula.
    +146
    +147        Parameters
    +148        ----------
    +149        ion_charge : int
    +150            The ion charge.
    +151        """
    +152        return (self.neutral_mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(
    +153            ion_charge
    +154        )
    +155
    +156    def _neutral_mass(self):
    +157        """Get the neutral mass of the molecular formula."""
    +158
    +159        mass = 0
    +160
    +161        for each_atom in self._d_molecular_formula.keys():
    +162            if each_atom != Labels.ion_type and each_atom != "HC":
    +163                try:
    +164                    mass = mass + Atoms.atomic_masses[
    +165                        each_atom
    +166                    ] * self._d_molecular_formula.get(each_atom)
    +167
    +168                except:
    +169                    print(Labels.ion_type, each_atom)
     170
    -171                if ion_type == Labels.protonated_de_ion:
    -172                    return self.protonated_mz
    -173                
    -174                elif ion_type == Labels.radical_ion or ion_type == Labels.adduct_ion:   
    -175                    return self.radical_mz
    -176                
    -177                elif ion_type == Labels.neutral:
    -178                
    -179                    return self.neutral_mass
    -180                
    -181                elif self.ion_charge == 0:
    -182                
    -183                    return self.neutral_mass
    -184
    -185                else:
    -186                    #formula is probably ion form used for bruker ref list
    -187                    return self.neutral_mass
    -188                
    -189        else:
    -190            
    -191            raise Exception("Please set ion charge first")
    -192         
    -193    def _calc_assignment_mass_error(self, method='ppm'):
    -194        """Calculate the mass error of the molecular formula, based on the experimental m/z and the calculated m/z.
    -195        
    -196        Parameters
    -197        ----------
    -198        method : str, optional
    -199            The method to calculate the mass error, by default 'ppm', but can be 'ppb'
    -200            
    -201        Raises
    -202        ------
    -203        Exception
    -204            If the method is not 'ppm' or 'ppb'.
    -205        Exception
    -206            If there is no ms peak associated with the molecular formula instance.
    -207        """
    -208         
    -209        if method == 'ppm':
    -210            multi_factor = 1000000
    -211        
    -212        elif method == 'ppb':
    -213            multi_factor = 1000000
    -214        
    -215        else:
    -216            raise Exception("method needs to be ppm or ppb, you have entered %s" % method)
    -217              
    -218        if self._mspeak_parent.mz_exp:
    -219            
    -220            self._assignment_mass_error = ((self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc) * multi_factor
    -221
    -222            return ((self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc) * multi_factor
    -223        
    +171        return mass
    +172
    +173    def _calc_mz(self):
    +174        """Get the m/z value of the molecular formula, based on the ion charge and ion type."""
    +175
    +176        if self.ion_charge is not None:
    +177            if self._external_mz:
    +178                return self._external_mz
    +179
    +180            else:
    +181                ion_type = self._d_molecular_formula.get(Labels.ion_type)
    +182
    +183                if ion_type == Labels.protonated_de_ion:
    +184                    return self.protonated_mz
    +185
    +186                elif ion_type == Labels.radical_ion or ion_type == Labels.adduct_ion:
    +187                    return self.radical_mz
    +188
    +189                elif ion_type == Labels.neutral:
    +190                    return self.neutral_mass
    +191
    +192                elif self.ion_charge == 0:
    +193                    return self.neutral_mass
    +194
    +195                else:
    +196                    # formula is probably ion form used for bruker ref list
    +197                    return self.neutral_mass
    +198
    +199        else:
    +200            raise Exception("Please set ion charge first")
    +201
    +202    def _calc_assignment_mass_error(self, method="ppm"):
    +203        """Calculate the mass error of the molecular formula, based on the experimental m/z and the calculated m/z.
    +204
    +205        Parameters
    +206        ----------
    +207        method : str, optional
    +208            The method to calculate the mass error, by default 'ppm', but can be 'ppb'
    +209
    +210        Raises
    +211        ------
    +212        Exception
    +213            If the method is not 'ppm' or 'ppb'.
    +214        Exception
    +215            If there is no ms peak associated with the molecular formula instance.
    +216        """
    +217
    +218        if method == "ppm":
    +219            multi_factor = 1000000
    +220
    +221        elif method == "ppb":
    +222            multi_factor = 1000000
    +223
     224        else:
    -225            
    -226            raise Exception("No ms peak associated with the molecular formula instance %s", self)    
    -227    
    -228    
    -229    def _calc_mz_confidence(self, mean=0):
    -230        """Calculate the m/z confidence of the molecular formula, based on the experimental m/z and the calculated m/z.
    -231
    -232        Parameters
    -233        ----------
    -234        mean : int, optional
    -235            The mean of the m/z error, by default 0
    -236        
    -237        """
    -238        
    -239        # predicted std not set, using 0.3
    -240        if not self._mspeak_parent.predicted_std: self._mspeak_parent.predicted_std = 1.66
    -241        
    -242        #print( self._mspeak_parent.predicted_std)
    -243        
    -244        return exp(-1 * (power((self.mz_error - mean), 2) / (2 * power(self._mspeak_parent.predicted_std, 2))))
    -245    
    -246    def _calc_isotopologue_confidence(self):
    -247        """Calculate the isotopologue confidence of the molecular formula, based on the isotopologue similarity.
    -248
    -249        Returns
    -250        -------
    -251        float
    -252            The isotopologue confidence of the molecular formula.
    -253        """
    -254
    -255        if self.is_isotopologue:
    -256            # confidence of isotopologue is pure mz error 
    -257            # TODO add more features here 
    -258            
    -259            mformula_index = self.mono_isotopic_formula_index
    -260            mspeak_index = self.mspeak_index_mono_isotopic
    -261
    -262            mspeak = self._mspeak_parent._ms_parent[mspeak_index]
    -263            
    -264            expected_isotopologues = mspeak[mformula_index].expected_isotopologues
    -265            
    -266            mono_mz = mspeak[mformula_index].mz_calc
    -267            mono_abundance = mspeak.abundance
    -268
    -269        else:
    -270
    -271            mono_mz = self.mz_calc
    -272            mono_abundance = self._mspeak_parent.abundance
    -273
    -274            expected_isotopologues = self.expected_isotopologues
    -275            # has isotopologues based on current dinamic range
    -276            
    -277        if expected_isotopologues:
    -278            
    -279            dict_mz_abund_ref = {'mz': [mono_mz], 'abundance': [mono_abundance]}
    -280            
    -281            # get reference data
    -282            for mf in expected_isotopologues:
    -283                dict_mz_abund_ref['abundance'].append(mf.abundance_calc)
    -284                dict_mz_abund_ref['mz'].append(mf.mz_calc)
    -285
    -286            dict_mz_abund_exp = {mono_mz: mono_abundance}
    -287            
    -288            # get experimental data
    -289            for mf in expected_isotopologues:
    -290                
    -291                # molecular formula has been assigned to a peak
    -292                if mf._mspeak_parent:
    -293                    #stores mspeak abundance
    -294                    dict_mz_abund_exp[mf.mz_calc] = mf._mspeak_parent.abundance
    -295                    
    -296                else:
    -297                    # fill missing mz with abundance 0 and mz error score of 0
    -298                    dict_mz_abund_exp[mf.mz_calc] = nextafter(0, 1)
    -299            
    -300            distance = SpectralSimilarity(dict_mz_abund_exp, dict_mz_abund_ref).manhattan_distance()
    -301            correlation = 1 - self.normalize_distance(distance, [0, 2])
    -302            #correlation = dwt_correlation(dict_mz_abund_exp, dict_mz_abund_ref)
    -303            #correlation = cosine_correlation(dict_mz_abund_exp, dict_mz_abund_ref)
    -304            
    -305            if correlation == 1:
    -306                print(dict_mz_abund_exp, dict_mz_abund_ref)
    -307            if isnan(correlation):
    -308                #print(dict_mz_abund_exp, dict_mz_abund_ref)
    -309                correlation = 0.00001
    -310        
    -311        else:
    -312            
    -313            # no isotopologue expected giving a correlation score of 0.0 but it needs optimization
    -314            correlation = 0.0
    -315
    -316        return correlation
    +225            raise Exception(
    +226                "method needs to be ppm or ppb, you have entered %s" % method
    +227            )
    +228
    +229        if self._mspeak_parent.mz_exp:
    +230            self._assignment_mass_error = (
    +231                (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc
    +232            ) * multi_factor
    +233
    +234            return (
    +235                (self._mspeak_parent.mz_exp - self.mz_calc) / self.mz_calc
    +236            ) * multi_factor
    +237
    +238        else:
    +239            raise Exception(
    +240                "No ms peak associated with the molecular formula instance %s", self
    +241            )
    +242
    +243    def _calc_mz_confidence(self, mean=0):
    +244        """Calculate the m/z confidence of the molecular formula, based on the experimental m/z and the calculated m/z.
    +245
    +246        Parameters
    +247        ----------
    +248        mean : int, optional
    +249            The mean of the m/z error, by default 0
    +250
    +251        """
    +252
    +253        # predicted std not set, using 0.3
    +254        if not self._mspeak_parent.predicted_std:
    +255            self._mspeak_parent.predicted_std = 1.66
    +256
    +257        # print( self._mspeak_parent.predicted_std)
    +258
    +259        return exp(
    +260            -1
    +261            * (
    +262                power((self.mz_error - mean), 2)
    +263                / (2 * power(self._mspeak_parent.predicted_std, 2))
    +264            )
    +265        )
    +266
    +267    def _calc_isotopologue_confidence(self):
    +268        """Calculate the isotopologue confidence of the molecular formula, based on the isotopologue similarity.
    +269
    +270        Returns
    +271        -------
    +272        float
    +273            The isotopologue confidence of the molecular formula.
    +274        """
    +275
    +276        if self.is_isotopologue:
    +277            # confidence of isotopologue is pure mz error
    +278            # TODO add more features here
    +279
    +280            mformula_index = self.mono_isotopic_formula_index
    +281            mspeak_index = self.mspeak_index_mono_isotopic
    +282
    +283            mspeak = self._mspeak_parent._ms_parent[mspeak_index]
    +284
    +285            expected_isotopologues = mspeak[mformula_index].expected_isotopologues
    +286
    +287            mono_mz = mspeak[mformula_index].mz_calc
    +288            mono_abundance = mspeak.abundance
    +289
    +290        else:
    +291            mono_mz = self.mz_calc
    +292            mono_abundance = self._mspeak_parent.abundance
    +293
    +294            expected_isotopologues = self.expected_isotopologues
    +295            # has isotopologues based on current dinamic range
    +296
    +297        if expected_isotopologues:
    +298            dict_mz_abund_ref = {"mz": [mono_mz], "abundance": [mono_abundance]}
    +299
    +300            # get reference data
    +301            for mf in expected_isotopologues:
    +302                dict_mz_abund_ref["abundance"].append(mf.abundance_calc)
    +303                dict_mz_abund_ref["mz"].append(mf.mz_calc)
    +304
    +305            dict_mz_abund_exp = {mono_mz: mono_abundance}
    +306
    +307            # get experimental data
    +308            for mf in expected_isotopologues:
    +309                # molecular formula has been assigned to a peak
    +310                if mf._mspeak_parent:
    +311                    # stores mspeak abundance
    +312                    dict_mz_abund_exp[mf.mz_calc] = mf._mspeak_parent.abundance
    +313
    +314                else:
    +315                    # fill missing mz with abundance 0 and mz error score of 0
    +316                    dict_mz_abund_exp[mf.mz_calc] = nextafter(0, 1)
     317
    -318    def normalize_distance(self, dist, dist_range):
    -319        """
    -320        Normalize the distance value.
    -321
    -322        Parameters
    -323        ----------
    -324        dist : float
    -325            The distance value to be normalized.
    -326        dist_range : list
    -327            The range of the distance value.
    -328
    -329        """
    -330        result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0])
    -331
    -332        if result < 0:
    -333            result = 0.
    -334        elif result > 1:
    -335            result = 1.
    +318            distance = SpectralSimilarity(
    +319                dict_mz_abund_exp, dict_mz_abund_ref
    +320            ).manhattan_distance()
    +321            correlation = 1 - self.normalize_distance(distance, [0, 2])
    +322            # correlation = dwt_correlation(dict_mz_abund_exp, dict_mz_abund_ref)
    +323            # correlation = cosine_correlation(dict_mz_abund_exp, dict_mz_abund_ref)
    +324
    +325            if correlation == 1:
    +326                print(dict_mz_abund_exp, dict_mz_abund_ref)
    +327            if isnan(correlation):
    +328                # print(dict_mz_abund_exp, dict_mz_abund_ref)
    +329                correlation = 0.00001
    +330
    +331        else:
    +332            # no isotopologue expected giving a correlation score of 0.0 but it needs optimization
    +333            correlation = 0.0
    +334
    +335        return correlation
     336
    -337        return result
    -338
    -339    def subtract_formula(self, formula_obj, formated=True):
    -340        """Subtract a formula from the current formula object
    -341        
    -342        Parameters
    -343        ----------
    -344        formula_obj : MolecularFormula
    -345            MolecularFormula object to be subtracted from the current formula object
    -346        formated : bool, optional
    -347            If True, returns the formula in string format, by default True
    -348            
    -349        """
    -350        subtraction = {}
    -351        for atom, value in self.to_dict().items():
    -352            if atom != Labels.ion_type:
    -353                if formula_obj.get(atom):
    -354                    #value_subtraction = value - formula_obj.get(atom)
    -355                    if value - formula_obj.get(atom) > 0:
    -356                        subtraction[atom] = value - formula_obj.get(atom)
    -357                else:
    -358                    subtraction[atom] = value
    -359        if formated:            
    -360            SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
    -361            SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
    -362        else:
    -363            SUB = str.maketrans("0123456789", "0123456789")
    -364            SUP = str.maketrans("0123456789", "0123456789")
    -365        formula_srt = ''
    -366        for atom in Atoms.atoms_order:
    -367            if atom in subtraction.keys():
    -368                formula_srt += atom.translate(SUP) + str(int(subtraction.get(atom))).translate(SUB)
    -369        
    -370        return formula_srt
    -371                   
    -372
    -373    def _calc_average_mz_score(self):
    -374        """Calculate the average m/z error score of the molecular formula identification, including the isotopologues."""
    -375        if self.is_isotopologue:
    -376            # confidence of isotopologue is pure mz error 
    -377            # TODO add more features here 
    -378            
    -379            mformula_index = self.mono_isotopic_formula_index
    -380            mspeak_index = self.mspeak_index_mono_isotopic
    -381
    -382            mspeak = self._mspeak_parent._ms_parent[mspeak_index]
    -383            
    -384            expected_isotopologues = mspeak[mformula_index].expected_isotopologues
    -385
    -386        else:
    -387            
    -388            expected_isotopologues = self.expected_isotopologues
    -389            # has isotopologues based on current dinamic range
    -390        
    -391        accumulated_mz_score = [self.mz_error_score]
    -392        
    -393        if expected_isotopologues:
    -394            
    -395            for mf in expected_isotopologues:
    -396                # molecular formula has been assigned to a peak
    -397                if mf._mspeak_parent:
    -398                    #stores mspeak abundance
    -399                    accumulated_mz_score.append(mf.mz_error_score)
    -400                else:
    -401                    # fill missing mz with abundance 0 and mz error score of 0
    -402                    accumulated_mz_score.append(0.0)
    +337    def normalize_distance(self, dist, dist_range):
    +338        """
    +339        Normalize the distance value.
    +340
    +341        Parameters
    +342        ----------
    +343        dist : float
    +344            The distance value to be normalized.
    +345        dist_range : list
    +346            The range of the distance value.
    +347
    +348        """
    +349        result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0])
    +350
    +351        if result < 0:
    +352            result = 0.0
    +353        elif result > 1:
    +354            result = 1.0
    +355
    +356        return result
    +357
    +358    def subtract_formula(self, formula_obj, formated=True):
    +359        """Subtract a formula from the current formula object
    +360
    +361        Parameters
    +362        ----------
    +363        formula_obj : MolecularFormula
    +364            MolecularFormula object to be subtracted from the current formula object
    +365        formated : bool, optional
    +366            If True, returns the formula in string format, by default True
    +367
    +368        """
    +369        subtraction = {}
    +370        for atom, value in self.to_dict().items():
    +371            if atom != Labels.ion_type:
    +372                if formula_obj.get(atom):
    +373                    # value_subtraction = value - formula_obj.get(atom)
    +374                    if value - formula_obj.get(atom) > 0:
    +375                        subtraction[atom] = value - formula_obj.get(atom)
    +376                else:
    +377                    subtraction[atom] = value
    +378        if formated:
    +379            SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
    +380            SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
    +381        else:
    +382            SUB = str.maketrans("0123456789", "0123456789")
    +383            SUP = str.maketrans("0123456789", "0123456789")
    +384        formula_srt = ""
    +385        for atom in Atoms.atoms_order:
    +386            if atom in subtraction.keys():
    +387                formula_srt += atom.translate(SUP) + str(
    +388                    int(subtraction.get(atom))
    +389                ).translate(SUB)
    +390
    +391        return formula_srt
    +392
    +393    def _calc_average_mz_score(self):
    +394        """Calculate the average m/z error score of the molecular formula identification, including the isotopologues."""
    +395        if self.is_isotopologue:
    +396            # confidence of isotopologue is pure mz error
    +397            # TODO add more features here
    +398
    +399            mformula_index = self.mono_isotopic_formula_index
    +400            mspeak_index = self.mspeak_index_mono_isotopic
    +401
    +402            mspeak = self._mspeak_parent._ms_parent[mspeak_index]
     403
    -404        average_mz_score = sum(accumulated_mz_score)/len(accumulated_mz_score)
    -405        
    -406        if isnan(average_mz_score):
    -407                average_mz_score = 0.0
    -408
    -409        return average_mz_score       
    -410
    -411    def _calc_confidence_score(self):
    -412        """Calculate the confidence score of the molecular formula identification, including the isotopologues."""
    -413        
    -414        ### Assumes random mass error, i.e, spectrum has to be calibrated and with zero mean
    -415        #### TODO: Add spectral similarity 
    -416
    -417        ## Parameters
    -418        #----------
    -419        #### mz_exp:
    -420        ####    Experimental m/z 
    -421        #### predicted_std:
    -422        ####    Standart deviation calculated from Resolving power optimization or constant set by User 
    -423        
    -424        
    -425        isotopologue_correlation = self.isotopologue_similarity
    -426        average_mz_score = self.average_mz_error_score
    -427        # add monoisotopic peak mz error score
    -428        
    -429        # calculate score with higher weight for mass error
    -430        #score = power(((isotopologue_correlation) * (power(average_mz_score,3))),1/4)
    -431        a = self._mspeak_parent._ms_parent.molecular_search_settings.mz_error_score_weight
    -432        b = self._mspeak_parent._ms_parent.molecular_search_settings.isotopologue_score_weight
    -433        
    -434        score = (isotopologue_correlation*b) + (average_mz_score*a)
    -435        
    -436        #if round(average_mz_score,2) == 0.00:
    -437        #    print(a,b, average_mz_score, isotopologue_correlation, score, isotopologue_correlation*b)
    -438        
    -439
    -440        return score
    +404            expected_isotopologues = mspeak[mformula_index].expected_isotopologues
    +405
    +406        else:
    +407            expected_isotopologues = self.expected_isotopologues
    +408            # has isotopologues based on current dinamic range
    +409
    +410        accumulated_mz_score = [self.mz_error_score]
    +411
    +412        if expected_isotopologues:
    +413            for mf in expected_isotopologues:
    +414                # molecular formula has been assigned to a peak
    +415                if mf._mspeak_parent:
    +416                    # stores mspeak abundance
    +417                    accumulated_mz_score.append(mf.mz_error_score)
    +418                else:
    +419                    # fill missing mz with abundance 0 and mz error score of 0
    +420                    accumulated_mz_score.append(0.0)
    +421
    +422        average_mz_score = sum(accumulated_mz_score) / len(accumulated_mz_score)
    +423
    +424        if isnan(average_mz_score):
    +425            average_mz_score = 0.0
    +426
    +427        return average_mz_score
    +428
    +429    def _calc_confidence_score(self):
    +430        """Calculate the confidence score of the molecular formula identification, including the isotopologues."""
    +431
    +432        ### Assumes random mass error, i.e, spectrum has to be calibrated and with zero mean
    +433        #### TODO: Add spectral similarity
    +434
    +435        ## Parameters
    +436        # ----------
    +437        #### mz_exp:
    +438        ####    Experimental m/z
    +439        #### predicted_std:
    +440        ####    Standart deviation calculated from Resolving power optimization or constant set by User
     441
    -442
    -443    def _calc_abundance_error(self, method='percentile'):
    -444        """Calculate the abundance error of the molecular formula, based on the experimental abundance and the calculated abundance.
    -445        
    -446        Parameters
    -447        ----------
    -448        method : str, optional
    -449            The method to calculate the abundance error, by default 'percentile', but can be 'ppm' or 'ppb'
    -450            
    -451        Returns
    -452        -------
    -453        float
    -454            The abundance error of the molecular formula.
    -455        
    -456        Raises
    -457        ------
    -458        Exception
    -459            If isotopologues were not calculated.
    -460        """
    -461       
    -462        mult_factor = 100
    -463
    -464        iso_abundance = self._mspeak_parent.abundance
    -465        mono_abundance =self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].abundance
    -466
    -467        if self.prob_ratio:
    -468            
    -469            theor_abundance = mono_abundance* self.prob_ratio
    -470            #self.parent need to have a MassSpecPeak associated with the MolecularFormula class
    -471            return ((theor_abundance - iso_abundance )/theor_abundance)*mult_factor
    -472        
    -473        else:
    -474            
    -475            raise Exception("Please calc_isotopologues")    
    +442        isotopologue_correlation = self.isotopologue_similarity
    +443        average_mz_score = self.average_mz_error_score
    +444        # add monoisotopic peak mz error score
    +445
    +446        # calculate score with higher weight for mass error
    +447        # score = power(((isotopologue_correlation) * (power(average_mz_score,3))),1/4)
    +448        a = self._mspeak_parent._ms_parent.molecular_search_settings.mz_error_score_weight
    +449        b = self._mspeak_parent._ms_parent.molecular_search_settings.isotopologue_score_weight
    +450
    +451        score = (isotopologue_correlation * b) + (average_mz_score * a)
    +452
    +453        # if round(average_mz_score,2) == 0.00:
    +454        #    print(a,b, average_mz_score, isotopologue_correlation, score, isotopologue_correlation*b)
    +455
    +456        return score
    +457
    +458    def _calc_abundance_error(self, method="percentile"):
    +459        """Calculate the abundance error of the molecular formula, based on the experimental abundance and the calculated abundance.
    +460
    +461        Parameters
    +462        ----------
    +463        method : str, optional
    +464            The method to calculate the abundance error, by default 'percentile', but can be 'ppm' or 'ppb'
    +465
    +466        Returns
    +467        -------
    +468        float
    +469            The abundance error of the molecular formula.
    +470
    +471        Raises
    +472        ------
    +473        Exception
    +474            If isotopologues were not calculated.
    +475        """
     476
    -477    def _calc_area_error(self, method='percentile'):
    -478        """Calculate the area error of the molecular formula, based on the experimental area and the calculated area.
    -479
    -480        Parameters
    -481        ----------
    -482        method : str, optional
    -483            The method to calculate the area error, by default 'percentile', but can be 'ppm' or 'ppb'
    -484        
    -485        Returns
    -486        -------
    -487        float
    -488            The area error of the molecular formula.
    -489
    -490        Raises
    -491        ------
    -492        Exception
    -493            If isotopologues were not calculated.
    -494        """
    -495       
    -496        mult_factor = 100
    -497        
    -498        iso_area = self._mspeak_parent.area
    -499        mono_area =self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].area
    -500
    -501        if self.prob_ratio:
    -502            
    -503            if mono_area and iso_area: 
    +477        mult_factor = 100
    +478
    +479        iso_abundance = self._mspeak_parent.abundance
    +480        mono_abundance = self._mspeak_parent._ms_parent[
    +481            self.mspeak_index_mono_isotopic
    +482        ].abundance
    +483
    +484        if self.prob_ratio:
    +485            theor_abundance = mono_abundance * self.prob_ratio
    +486            # self.parent need to have a MassSpecPeak associated with the MolecularFormula class
    +487            return ((theor_abundance - iso_abundance) / theor_abundance) * mult_factor
    +488
    +489        else:
    +490            raise Exception("Please calc_isotopologues")
    +491
    +492    def _calc_area_error(self, method="percentile"):
    +493        """Calculate the area error of the molecular formula, based on the experimental area and the calculated area.
    +494
    +495        Parameters
    +496        ----------
    +497        method : str, optional
    +498            The method to calculate the area error, by default 'percentile', but can be 'ppm' or 'ppb'
    +499
    +500        Returns
    +501        -------
    +502        float
    +503            The area error of the molecular formula.
     504
    -505                #exp_ratio = iso_area/mono_area  
    -506                          
    -507                area_calc = mono_area* self.prob_ratio
    -508
    -509                #self.parent need to have a MassSpecPeak associated with the MolecularFormula class
    -510                return ((area_calc - iso_area )/area_calc)*mult_factor
    -511                #return ((self.prob_ratio - exp_ratio )/self.prob_ratio)*mult_factor
    -512            
    -513            else:
    -514                
    -515                #centroid mass spectrum
    -516                return 0
    -517        else:
    -518            
    -519            raise Exception("Please calc_isotopologues")    
    -520
    -521    def _calc_aromaticity_index_mod(self):
    -522        """Calculate the modified aromaticity index of the molecular formula.
    -523        
    -524        Returns
    -525        -------
    -526        float
    -527            The aromaticity index of the molecular formula.
    -528
    -529        Notes
    -530        -----
    -531        Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386
    -532        corrected in https://doi.org/10.1002/rcm.7433
    -533        """
    -534        # Prepare empty dictionary to store the number of atoms of each element
    -535        ai_es = {'C':0, 'H':0, 'O':0, 'N':0, 'S':0}
    -536
    -537        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
    -538        for element in ai_es:
    -539            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
    -540            for element_w_iso in elements_w_iso:
    -541                if element_w_iso in self._d_molecular_formula:
    -542                    ai_es[element] += self._d_molecular_formula[element_w_iso]
    -543        
    -544        ai_n = 1 + ai_es['C'] - (0.5 * ai_es['O']) - ai_es['S'] - (0.5 * (ai_es['N'] + ai_es['H']))
    -545        ai_d = ai_es['C'] - (0.5 * ai_es['O']) - ai_es['N'] - ai_es['S']
    -546
    -547        ai = ai_n/ai_d
    -548
    -549        if ai < 0:
    -550            ai = 0
    -551        if ai > 1:
    -552            ai = 1
    -553
    -554        return ai
    -555    
    -556    def _calc_aromaticity_index(self):
    -557        """Calculate the aromaticity index of the molecular formula.
    -558        
    -559        Returns
    -560        -------
    -561        float
    -562            The aromaticity index of the molecular formula.
    +505        Raises
    +506        ------
    +507        Exception
    +508            If isotopologues were not calculated.
    +509        """
    +510
    +511        mult_factor = 100
    +512
    +513        iso_area = self._mspeak_parent.area
    +514        mono_area = self._mspeak_parent._ms_parent[self.mspeak_index_mono_isotopic].area
    +515
    +516        if self.prob_ratio:
    +517            if mono_area and iso_area:
    +518                # exp_ratio = iso_area/mono_area
    +519
    +520                area_calc = mono_area * self.prob_ratio
    +521
    +522                # self.parent need to have a MassSpecPeak associated with the MolecularFormula class
    +523                return ((area_calc - iso_area) / area_calc) * mult_factor
    +524                # return ((self.prob_ratio - exp_ratio )/self.prob_ratio)*mult_factor
    +525
    +526            else:
    +527                # centroid mass spectrum
    +528                return 0
    +529        else:
    +530            raise Exception("Please calc_isotopologues")
    +531
    +532    def _calc_aromaticity_index_mod(self):
    +533        """Calculate the modified aromaticity index of the molecular formula.
    +534
    +535        Returns
    +536        -------
    +537        float
    +538            The aromaticity index of the molecular formula.
    +539
    +540        Notes
    +541        -----
    +542        Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386
    +543        corrected in https://doi.org/10.1002/rcm.7433
    +544        """
    +545        # Prepare empty dictionary to store the number of atoms of each element
    +546        ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0}
    +547
    +548        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
    +549        for element in ai_es:
    +550            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
    +551            for element_w_iso in elements_w_iso:
    +552                if element_w_iso in self._d_molecular_formula:
    +553                    ai_es[element] += self._d_molecular_formula[element_w_iso]
    +554
    +555        ai_n = (
    +556            1
    +557            + ai_es["C"]
    +558            - (0.5 * ai_es["O"])
    +559            - ai_es["S"]
    +560            - (0.5 * (ai_es["N"] + ai_es["H"]))
    +561        )
    +562        ai_d = ai_es["C"] - (0.5 * ai_es["O"]) - ai_es["N"] - ai_es["S"]
     563
    -564        Notes
    -565        -----
    -566        Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386
    -567        corrected in https://doi.org/10.1002/rcm.7433
    -568        """
    -569        # Prepare empty dictionary to store the number of atoms of each element
    -570        ai_es = {'C':0, 'H':0, 'O':0, 'N':0, 'S':0}
    -571
    -572        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
    -573        for element in ai_es:
    -574            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
    -575            for element_w_iso in elements_w_iso:
    -576                if element_w_iso in self._d_molecular_formula:
    -577                    ai_es[element] += self._d_molecular_formula[element_w_iso]
    -578        
    -579                ai_n = 1 + ai_es['C'] - (ai_es['O']) - ai_es['S'] - (0.5 * (ai_es['N'] + ai_es['H']))
    -580        ai_d = ai_es['C'] - (ai_es['O']) - ai_es['N'] - ai_es['S']
    -581
    -582        ai = ai_n/ai_d
    -583
    -584        if ai < 0:
    -585            ai = 0
    -586        if ai > 1:
    -587            ai = 1
    +564        ai = ai_n / ai_d
    +565
    +566        if ai < 0:
    +567            ai = 0
    +568        if ai > 1:
    +569            ai = 1
    +570
    +571        return ai
    +572
    +573    def _calc_aromaticity_index(self):
    +574        """Calculate the aromaticity index of the molecular formula.
    +575
    +576        Returns
    +577        -------
    +578        float
    +579            The aromaticity index of the molecular formula.
    +580
    +581        Notes
    +582        -----
    +583        Source Koch and Dittmar, 2006 https://doi.org/10.1002/rcm.2386
    +584        corrected in https://doi.org/10.1002/rcm.7433
    +585        """
    +586        # Prepare empty dictionary to store the number of atoms of each element
    +587        ai_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0}
     588
    -589        return ai
    -590        
    -591    def _calc_nosc(self):
    -592        """Calculate the average nominal oxidation state of carbon
    -593        
    -594        Returns
    -595        -------
    -596        float
    -597            The average nominal oxidation state of carbon
    -598
    -599        Notes
    -600        -----
    -601        Source LaRowe and Van Cappellen, 2011 https://doi.org/10.1016/j.gca.2011.01.020
    -602        """
    -603        # Prepare empty dictionary to store the number of atoms of each element
    -604        nosc_es = {'C':0, 'H':0, 'O':0, 'N':0, 'S':0, 'P':0}
    -605
    -606        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
    -607        for element in nosc_es:
    -608            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
    -609            for element_w_iso in elements_w_iso:
    -610                if element_w_iso in self._d_molecular_formula:
    -611                    nosc_es[element] += self._d_molecular_formula[element_w_iso]
    -612    
    -613        nosc = -( (4 * nosc_es['C'] + nosc_es['H'] - 3 * nosc_es['N'] - 2 * nosc_es['O'] + 5 * nosc_es['P'] - 2 * nosc_es['S']) / nosc_es['C']) + 4
    -614
    -615        # If nosc is infinite or negative infinity, set it to nan
    -616        if nosc == float('inf') or nosc == float('-inf'):
    -617            nosc = float('nan')
    -618
    -619        return nosc
    -620
    -621    @property
    -622    def dbe_ai(self):
    -623        """Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens."""
    -624            
    -625        carbons =  self._d_molecular_formula.get('C')
    -626        hydrogens = self._d_molecular_formula.get('H')
    -627        oxygens = self._d_molecular_formula.get('O')
    -628        return 1 + (((2*carbons) - hydrogens - (2*oxygens))*0.5)
    -629
    -630    def _calc_dbe(self):
    -631        """Calculate the double bond equivalent (DBE) of the molecular formula"""
    -632            
    -633        individual_dbe = 0
    -634        
    -635        for atom in self._d_molecular_formula.keys():
    -636            
    -637            if atom != Labels.ion_type:
    -638                
    -639                n_atom = int(self._d_molecular_formula.get(atom))
    -640                
    -641                clean_atom = ''.join([i for i in atom if not i.isdigit()]) 
    -642                
    -643                if self._mspeak_parent:
    -644                    valencia = self._mspeak_parent._ms_parent.molecular_search_settings.used_atom_valences.get(clean_atom)
    -645                else:
    -646                    valencia = MSParameters.molecular_search.used_atom_valences.get(clean_atom)
    -647                #valencia = Atoms.atoms_covalence.get(atom)
    -648                
    -649                if type(valencia) is tuple:
    -650                    valencia = valencia[0]
    -651                if valencia > 0:
    -652                    #print atom, valencia, n_atom, individual_dbe
    -653                    individual_dbe = individual_dbe + (n_atom * (valencia - 2))
    -654                else:
    -655                    continue
    -656        
    -657        dbe = 1 + (0.5 * individual_dbe)
    -658        
    -659        if self.ion_type == Labels.adduct_ion:
    -660            dbe = dbe + 0.5
    -661        
    -662        return dbe
    -663
    -664    def _calc_kmd(self, dict_base):
    -665        """Calculate the Kendrick mass defect (KMD) of the molecular formula, based on the monoisotopic mass and the Kendrick mass.
    -666
    -667        Parameters
    -668        ----------
    -669        dict_base : dict
    -670            The dictionary of the base formula, e.g. {'C':1, 'H':2}
    -671        
    -672        Returns
    -673        -------
    -674        tuple
    -675            The tuple of the KMD, Kendrick mass, and nominal Kendrick mass.
    -676        """
    -677        mass = 0
    -678        for atom in dict_base.keys():
    -679            mass = mass + Atoms.atomic_masses.get(atom) * dict_base.get(atom)
    -680        
    -681        kendrick_mass = (int(mass)/mass)* self.mz_calc
    -682        
    -683        nominal_km =int(kendrick_mass)
    -684       
    -685        kmd = (nominal_km - kendrick_mass) * 100
    -686        
    -687        #kmd = (nominal_km - km) * 1
    -688        kmd  = round(kmd,0)
    -689        
    -690        return kmd, kendrick_mass, nominal_km
    -691
    -692    def _cal_isotopologues(self, formula_dict, min_abundance, current_abundance, ms_dynamic_range):
    -693        """Calculate the isotopologues for a given molecular formula.
    +589        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
    +590        for element in ai_es:
    +591            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
    +592            for element_w_iso in elements_w_iso:
    +593                if element_w_iso in self._d_molecular_formula:
    +594                    ai_es[element] += self._d_molecular_formula[element_w_iso]
    +595
    +596                ai_n = (
    +597                    1
    +598                    + ai_es["C"]
    +599                    - (ai_es["O"])
    +600                    - ai_es["S"]
    +601                    - (0.5 * (ai_es["N"] + ai_es["H"]))
    +602                )
    +603        ai_d = ai_es["C"] - (ai_es["O"]) - ai_es["N"] - ai_es["S"]
    +604
    +605        ai = ai_n / ai_d
    +606
    +607        if ai < 0:
    +608            ai = 0
    +609        if ai > 1:
    +610            ai = 1
    +611
    +612        return ai
    +613
    +614    def _calc_nosc(self):
    +615        """Calculate the average nominal oxidation state of carbon
    +616
    +617        Returns
    +618        -------
    +619        float
    +620            The average nominal oxidation state of carbon
    +621
    +622        Notes
    +623        -----
    +624        Source LaRowe and Van Cappellen, 2011 https://doi.org/10.1016/j.gca.2011.01.020
    +625        """
    +626        # Prepare empty dictionary to store the number of atoms of each element
    +627        nosc_es = {"C": 0, "H": 0, "O": 0, "N": 0, "S": 0, "P": 0}
    +628
    +629        # Count the number of atoms of each element in the molecular formula, inclusive of isotopes
    +630        for element in nosc_es:
    +631            elements_w_iso = [element] + Atoms.isotopes.get(element)[1]
    +632            for element_w_iso in elements_w_iso:
    +633                if element_w_iso in self._d_molecular_formula:
    +634                    nosc_es[element] += self._d_molecular_formula[element_w_iso]
    +635
    +636        nosc = (
    +637            -(
    +638                (
    +639                    4 * nosc_es["C"]
    +640                    + nosc_es["H"]
    +641                    - 3 * nosc_es["N"]
    +642                    - 2 * nosc_es["O"]
    +643                    + 5 * nosc_es["P"]
    +644                    - 2 * nosc_es["S"]
    +645                )
    +646                / nosc_es["C"]
    +647            )
    +648            + 4
    +649        )
    +650
    +651        # If nosc is infinite or negative infinity, set it to nan
    +652        if nosc == float("inf") or nosc == float("-inf"):
    +653            nosc = float("nan")
    +654
    +655        return nosc
    +656
    +657    @property
    +658    def dbe_ai(self):
    +659        """Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens."""
    +660
    +661        carbons = self._d_molecular_formula.get("C")
    +662        hydrogens = self._d_molecular_formula.get("H")
    +663        oxygens = self._d_molecular_formula.get("O")
    +664        return 1 + (((2 * carbons) - hydrogens - (2 * oxygens)) * 0.5)
    +665
    +666    def _calc_dbe(self):
    +667        """Calculate the double bond equivalent (DBE) of the molecular formula"""
    +668
    +669        individual_dbe = 0
    +670
    +671        for atom in self._d_molecular_formula.keys():
    +672            if atom != Labels.ion_type:
    +673                n_atom = int(self._d_molecular_formula.get(atom))
    +674
    +675                clean_atom = "".join([i for i in atom if not i.isdigit()])
    +676
    +677                if self._mspeak_parent:
    +678                    valencia = self._mspeak_parent._ms_parent.molecular_search_settings.used_atom_valences.get(
    +679                        clean_atom
    +680                    )
    +681                else:
    +682                    valencia = MSParameters.molecular_search.used_atom_valences.get(
    +683                        clean_atom
    +684                    )
    +685                # valencia = Atoms.atoms_covalence.get(atom)
    +686
    +687                if type(valencia) is tuple:
    +688                    valencia = valencia[0]
    +689                if valencia > 0:
    +690                    # print atom, valencia, n_atom, individual_dbe
    +691                    individual_dbe = individual_dbe + (n_atom * (valencia - 2))
    +692                else:
    +693                    continue
     694
    -695        Parameters
    -696        ----------
    -697        formula_dict : dict
    -698            The dictionary of the molecular formula. Example: {'C':10, 'H', 20, 'O', 2}
    -699        min_abundance : float
    -700            The minimum abundance.
    -701        current_abundance : float
    -702            The current monoisotopic abundance.
    -703        ms_dynamic_range : float
    -704            The dynamic range.
    -705
    -706        
    -707        Notes
    -708        -----
    -709        This is the primary function to look for isotopologues based on a monoisotopic molecular formula. 
    -710        It needs to be expanded to include the calculation of resolving power and plot the results.
    -711        Use this function at runtime during the molecular identification algorithm only when a positive ID is observed to the monoisotopic ion.
    -712        Use this function to simulate mass spectrum (needs resolving power calculation to be fully operational).
    -713        It might break when adding non-conventional atoms (not yet tested).
    -714        This function employs the IsoSpecPy library https://github.com/MatteoLacki/IsoSpec.
    -715
    -716
    -717        """
    -718             
    -719        #last update on 05-26-2020, Yuri E. Corilo 
    +695        dbe = 1 + (0.5 * individual_dbe)
    +696
    +697        if self.ion_type == Labels.adduct_ion:
    +698            dbe = dbe + 0.5
    +699
    +700        return dbe
    +701
    +702    def _calc_kmd(self, dict_base):
    +703        """Calculate the Kendrick mass defect (KMD) of the molecular formula, based on the monoisotopic mass and the Kendrick mass.
    +704
    +705        Parameters
    +706        ----------
    +707        dict_base : dict
    +708            The dictionary of the base formula, e.g. {'C':1, 'H':2}
    +709
    +710        Returns
    +711        -------
    +712        tuple
    +713            The tuple of the KMD, Kendrick mass, and nominal Kendrick mass.
    +714        """
    +715        mass = 0
    +716        for atom in dict_base.keys():
    +717            mass = mass + Atoms.atomic_masses.get(atom) * dict_base.get(atom)
    +718
    +719        kendrick_mass = (int(mass) / mass) * self.mz_calc
     720
    -721        # updated it to reflect min possible mass peak abundance
    -722        cut_off_to_IsoSpeccPy = 1-(1/ms_dynamic_range)
    -723        
    -724        #print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range)
    -725        #print(cut_off_to_IsoSpeccPy)
    -726        atoms_labels = (atom for atom in formula_dict.keys() if atom != Labels.ion_type and atom != 'H')
    -727       
    -728        atoms_count = []
    -729        masses_list_tuples = []
    -730        props_list_tuples = []
    -731        all_atoms_list = []
    -732        
    -733        for atom_label in atoms_labels:
    -734            
    -735            if Atoms.isotopes.get(atom_label)[1][0] is None:
    -736                'This atom_label has no heavy isotope'
    -737                atoms_count.append(formula_dict.get(atom_label))
    -738                mass = Atoms.atomic_masses.get(atom_label)
    -739                prop = Atoms.isotopic_abundance.get(atom_label)
    -740                masses_list_tuples.append([mass])
    -741                props_list_tuples.append([prop])
    -742                all_atoms_list.append(atom_label)
    -743                
    -744            else:
    -745                
    -746                isotopes_label_list = Atoms.isotopes.get(atom_label)[1]
    -747            
    -748                if len(isotopes_label_list) > 1:
    -749                    'This atom_label has two or more heavy isotope'
    -750                    isotopos_labels = [i for i in isotopes_label_list]
    -751                else:
    -752                    'This atom_label only has one heavy isotope'
    -753                    isotopos_labels = [isotopes_label_list[0]]
    -754                
    -755                #all_atoms_list.extend(isotopos_labels) 
    -756                isotopos_labels = [atom_label] + isotopos_labels
    -757                
    -758                all_atoms_list.extend(isotopos_labels)
    -759                
    -760                masses = [Atoms.atomic_masses.get(atom_label) for atom_label in isotopos_labels]
    -761                props = [Atoms.isotopic_abundance.get(atom_label) for atom_label in isotopos_labels]
    -762                
    -763                atoms_count.append(formula_dict.get(atom_label))
    -764                masses_list_tuples.append(masses)
    -765                props_list_tuples.append(props)
    -766        if legacy_isospec:
    -767            iso = IsoSpecPy.IsoSpec(atoms_count,masses_list_tuples,props_list_tuples, cut_off_to_IsoSpeccPy)
    -768            conf = iso.getConfs()
    -769            masses = conf[0]
    -770            probs = exp(conf[1])
    -771            molecular_formulas = conf[2]
    -772            #print('conf', conf)
    -773            #print('probs', conf[1])
    -774        else:
    -775            # This syntax in IsoSpecPy 2.2.2 yields the same information as the legacy approach
    -776            iso = IsoSpecPy.IsoTotalProb(atomCounts = atoms_count, isotopeMasses = masses_list_tuples, 
    -777                           isotopeProbabilities = props_list_tuples, prob_to_cover =cut_off_to_IsoSpeccPy, get_confs=True)
    -778            masses = list(iso.masses)
    -779            probs = array(list(iso.probs))
    -780            confs = list(iso.confs)
    -781
    -782            molecular_formulas = []
    -783            for x in confs:
    -784                tmplist = []
    -785                for y in x:
    -786                    tmplist.extend(list(y))
    -787                molecular_formulas.append(tmplist)
    -788
    +721        nominal_km = int(kendrick_mass)
    +722
    +723        kmd = (nominal_km - kendrick_mass) * 100
    +724
    +725        # kmd = (nominal_km - km) * 1
    +726        kmd = round(kmd, 0)
    +727
    +728        return kmd, kendrick_mass, nominal_km
    +729
    +730    def _cal_isotopologues(
    +731        self, formula_dict, min_abundance, current_abundance, ms_dynamic_range
    +732    ):
    +733        """Calculate the isotopologues for a given molecular formula.
    +734
    +735        Parameters
    +736        ----------
    +737        formula_dict : dict
    +738            The dictionary of the molecular formula. Example: {'C':10, 'H', 20, 'O', 2}
    +739        min_abundance : float
    +740            The minimum abundance.
    +741        current_abundance : float
    +742            The current monoisotopic abundance.
    +743        ms_dynamic_range : float
    +744            The dynamic range.
    +745
    +746
    +747        Notes
    +748        -----
    +749        This is the primary function to look for isotopologues based on a monoisotopic molecular formula.
    +750        It needs to be expanded to include the calculation of resolving power and plot the results.
    +751        Use this function at runtime during the molecular identification algorithm only when a positive ID is observed to the monoisotopic ion.
    +752        Use this function to simulate mass spectrum (needs resolving power calculation to be fully operational).
    +753        It might break when adding non-conventional atoms (not yet tested).
    +754        This function employs the IsoSpecPy library https://github.com/MatteoLacki/IsoSpec.
    +755
    +756
    +757        """
    +758
    +759        # last update on 05-26-2020, Yuri E. Corilo
    +760
    +761        # updated it to reflect min possible mass peak abundance
    +762        cut_off_to_IsoSpeccPy = 1 - (1 / ms_dynamic_range)
    +763
    +764        # print("cut_off_to_IsoSpeccPy", cut_off_to_IsoSpeccPy, current_abundance, min_abundance, ms_dynamic_range)
    +765        # print(cut_off_to_IsoSpeccPy)
    +766        atoms_labels = (
    +767            atom
    +768            for atom in formula_dict.keys()
    +769            if atom != Labels.ion_type and atom != "H"
    +770        )
    +771
    +772        atoms_count = []
    +773        masses_list_tuples = []
    +774        props_list_tuples = []
    +775        all_atoms_list = []
    +776
    +777        for atom_label in atoms_labels:
    +778            if Atoms.isotopes.get(atom_label)[1][0] is None:
    +779                "This atom_label has no heavy isotope"
    +780                atoms_count.append(formula_dict.get(atom_label))
    +781                mass = Atoms.atomic_masses.get(atom_label)
    +782                prop = Atoms.isotopic_abundance.get(atom_label)
    +783                masses_list_tuples.append([mass])
    +784                props_list_tuples.append([prop])
    +785                all_atoms_list.append(atom_label)
    +786
    +787            else:
    +788                isotopes_label_list = Atoms.isotopes.get(atom_label)[1]
     789
    -790
    -791        
    -792        new_formulas = []
    -793        
    -794        for isotopologue_index in range(len(iso)):
    -795            #skip_mono_isotopic 
    -796            
    -797            formula_list = molecular_formulas[isotopologue_index]
    -798            new_formula_dict = dict(zip(all_atoms_list, formula_list))
    -799            new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type)
    -800            if formula_dict.get('H'):
    -801                new_formula_dict['H'] = formula_dict.get('H')
    -802
    -803            new_formulas.append({x:y for x,y in new_formula_dict.items() if y!=0})
    -804        
    -805        # formula_dict in new_formulas check if monoisotopic is being returned
    -806        if new_formulas:# and formula_dict in new_formulas:
    -807            
    -808            #print(conf)    
    -809            #print(new_formulas)    
    -810            #print(atoms_count)
    -811            #print(all_atoms_list)
    -812            #print(masses_list_tuples)
    -813            #print(props_list_tuples)
    -814            # find where monoisotopic is
    -815            index_mono = new_formulas.index(formula_dict)   
    -816            # calculate ratio iso/mono
    -817            probs = list(probs/probs[index_mono])
    -818            
    -819            # delete the monoisotopic
    -820            del probs[index_mono]
    -821            del new_formulas[index_mono]
    -822            
    -823            #print('probs_exp', probs)
    -824            for formulas, prob in zip(new_formulas, probs):
    -825                
    -826                theor_abundance = current_abundance* prob
    -827                if theor_abundance > min_abundance:
    -828                    #print(prob, theor_abundance, current_abundance)
    -829                    yield (formulas, prob)
    -830            #return zip(new_formulas, probs )
    -831    
    -832        #else:
    -833        #    return []    
    +790                if len(isotopes_label_list) > 1:
    +791                    "This atom_label has two or more heavy isotope"
    +792                    isotopos_labels = [i for i in isotopes_label_list]
    +793                else:
    +794                    "This atom_label only has one heavy isotope"
    +795                    isotopos_labels = [isotopes_label_list[0]]
    +796
    +797                # all_atoms_list.extend(isotopos_labels)
    +798                isotopos_labels = [atom_label] + isotopos_labels
    +799
    +800                all_atoms_list.extend(isotopos_labels)
    +801
    +802                masses = [
    +803                    Atoms.atomic_masses.get(atom_label)
    +804                    for atom_label in isotopos_labels
    +805                ]
    +806                props = [
    +807                    Atoms.isotopic_abundance.get(atom_label)
    +808                    for atom_label in isotopos_labels
    +809                ]
    +810
    +811                atoms_count.append(formula_dict.get(atom_label))
    +812                masses_list_tuples.append(masses)
    +813                props_list_tuples.append(props)
    +814        if legacy_isospec:
    +815            iso = IsoSpecPy.IsoSpec(
    +816                atoms_count,
    +817                masses_list_tuples,
    +818                props_list_tuples,
    +819                cut_off_to_IsoSpeccPy,
    +820            )
    +821            conf = iso.getConfs()
    +822            masses = conf[0]
    +823            probs = exp(conf[1])
    +824            molecular_formulas = conf[2]
    +825            # print('conf', conf)
    +826            # print('probs', conf[1])
    +827        else:
    +828            # This syntax in IsoSpecPy 2.2.2 yields the same information as the legacy approach
    +829            iso = IsoSpecPy.IsoTotalProb(
    +830                atomCounts=atoms_count,
    +831                isotopeMasses=masses_list_tuples,
    +832                isotopeProbabilities=props_list_tuples,
    +833                prob_to_cover=cut_off_to_IsoSpeccPy,
    +834                get_confs=True,
    +835            )
    +836            masses = list(iso.masses)
    +837            probs = array(list(iso.probs))
    +838            confs = list(iso.confs)
    +839
    +840            molecular_formulas = []
    +841            for x in confs:
    +842                tmplist = []
    +843                for y in x:
    +844                    tmplist.extend(list(y))
    +845                molecular_formulas.append(tmplist)
    +846
    +847        new_formulas = []
    +848
    +849        for isotopologue_index in range(len(iso)):
    +850            # skip_mono_isotopic
    +851
    +852            formula_list = molecular_formulas[isotopologue_index]
    +853            new_formula_dict = dict(zip(all_atoms_list, formula_list))
    +854            new_formula_dict[Labels.ion_type] = formula_dict.get(Labels.ion_type)
    +855            if formula_dict.get("H"):
    +856                new_formula_dict["H"] = formula_dict.get("H")
    +857
    +858            new_formulas.append({x: y for x, y in new_formula_dict.items() if y != 0})
    +859
    +860        # formula_dict in new_formulas check if monoisotopic is being returned
    +861        if new_formulas:  # and formula_dict in new_formulas:
    +862            # print(conf)
    +863            # print(new_formulas)
    +864            # print(atoms_count)
    +865            # print(all_atoms_list)
    +866            # print(masses_list_tuples)
    +867            # print(props_list_tuples)
    +868            # find where monoisotopic is
    +869            index_mono = new_formulas.index(formula_dict)
    +870            # calculate ratio iso/mono
    +871            probs = list(probs / probs[index_mono])
    +872
    +873            # delete the monoisotopic
    +874            del probs[index_mono]
    +875            del new_formulas[index_mono]
    +876
    +877            # print('probs_exp', probs)
    +878            for formulas, prob in zip(new_formulas, probs):
    +879                theor_abundance = current_abundance * prob
    +880                if theor_abundance > min_abundance:
    +881                    # print(prob, theor_abundance, current_abundance)
    +882                    yield (formulas, prob)
    +883            # return zip(new_formulas, probs )
    +884
    +885        # else:
    +886        #    return []
     
    @@ -1764,7 +1864,7 @@
    Attributes
    Methods
      -
    • _calc_resolving_power_low_pressure(B, T) +
    • _calc_resolving_power_low_pressure(B, T) Calculate the resolving power at low pressure.
    • _calc_resolving_power_high_pressure(B, T) Calculate the resolving power at high pressure.
    • @@ -1805,26 +1905,26 @@
      Methods
    -
    318    def normalize_distance(self, dist, dist_range):
    -319        """
    -320        Normalize the distance value.
    -321
    -322        Parameters
    -323        ----------
    -324        dist : float
    -325            The distance value to be normalized.
    -326        dist_range : list
    -327            The range of the distance value.
    -328
    -329        """
    -330        result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0])
    -331
    -332        if result < 0:
    -333            result = 0.
    -334        elif result > 1:
    -335            result = 1.
    -336
    -337        return result
    +            
    337    def normalize_distance(self, dist, dist_range):
    +338        """
    +339        Normalize the distance value.
    +340
    +341        Parameters
    +342        ----------
    +343        dist : float
    +344            The distance value to be normalized.
    +345        dist_range : list
    +346            The range of the distance value.
    +347
    +348        """
    +349        result = (dist - dist_range[0]) / (dist_range[1] - dist_range[0])
    +350
    +351        if result < 0:
    +352            result = 0.0
    +353        elif result > 1:
    +354            result = 1.0
    +355
    +356        return result
     
    @@ -1853,38 +1953,40 @@
    Parameters
    -
    339    def subtract_formula(self, formula_obj, formated=True):
    -340        """Subtract a formula from the current formula object
    -341        
    -342        Parameters
    -343        ----------
    -344        formula_obj : MolecularFormula
    -345            MolecularFormula object to be subtracted from the current formula object
    -346        formated : bool, optional
    -347            If True, returns the formula in string format, by default True
    -348            
    -349        """
    -350        subtraction = {}
    -351        for atom, value in self.to_dict().items():
    -352            if atom != Labels.ion_type:
    -353                if formula_obj.get(atom):
    -354                    #value_subtraction = value - formula_obj.get(atom)
    -355                    if value - formula_obj.get(atom) > 0:
    -356                        subtraction[atom] = value - formula_obj.get(atom)
    -357                else:
    -358                    subtraction[atom] = value
    -359        if formated:            
    -360            SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
    -361            SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
    -362        else:
    -363            SUB = str.maketrans("0123456789", "0123456789")
    -364            SUP = str.maketrans("0123456789", "0123456789")
    -365        formula_srt = ''
    -366        for atom in Atoms.atoms_order:
    -367            if atom in subtraction.keys():
    -368                formula_srt += atom.translate(SUP) + str(int(subtraction.get(atom))).translate(SUB)
    -369        
    -370        return formula_srt
    +            
    358    def subtract_formula(self, formula_obj, formated=True):
    +359        """Subtract a formula from the current formula object
    +360
    +361        Parameters
    +362        ----------
    +363        formula_obj : MolecularFormula
    +364            MolecularFormula object to be subtracted from the current formula object
    +365        formated : bool, optional
    +366            If True, returns the formula in string format, by default True
    +367
    +368        """
    +369        subtraction = {}
    +370        for atom, value in self.to_dict().items():
    +371            if atom != Labels.ion_type:
    +372                if formula_obj.get(atom):
    +373                    # value_subtraction = value - formula_obj.get(atom)
    +374                    if value - formula_obj.get(atom) > 0:
    +375                        subtraction[atom] = value - formula_obj.get(atom)
    +376                else:
    +377                    subtraction[atom] = value
    +378        if formated:
    +379            SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
    +380            SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
    +381        else:
    +382            SUB = str.maketrans("0123456789", "0123456789")
    +383            SUP = str.maketrans("0123456789", "0123456789")
    +384        formula_srt = ""
    +385        for atom in Atoms.atoms_order:
    +386            if atom in subtraction.keys():
    +387                formula_srt += atom.translate(SUP) + str(
    +388                    int(subtraction.get(atom))
    +389                ).translate(SUB)
    +390
    +391        return formula_srt
     
    diff --git a/docs/corems/molecular_formula/factory/MolecularFormulaFactory.html b/docs/corems/molecular_formula/factory/MolecularFormulaFactory.html index 44572022..0e5a8a60 100644 --- a/docs/corems/molecular_formula/factory/MolecularFormulaFactory.html +++ b/docs/corems/molecular_formula/factory/MolecularFormulaFactory.html @@ -234,797 +234,901 @@

    -
      1from corems.molecular_formula.calc.MolecularFormulaCalc import MolecularFormulaCalc
    -  2from corems.encapsulation.constant import Atoms, Labels
    -  3
    -  4import re
    +                        
      1import re
    +  2
    +  3from corems.encapsulation.constant import Atoms, Labels
    +  4from corems.molecular_formula.calc.MolecularFormulaCalc import MolecularFormulaCalc
       5
       6__author__ = "Yuri E. Corilo"
       7__date__ = "Jun 24, 2019"
       8
    -  9class MolecularFormulaBase(MolecularFormulaCalc):
    - 10    """Base class for representing a molecular formula.
    - 11
    - 12    Parameters
    - 13    ----------
    - 14    molecular_formula : dict, list, str
    - 15        The molecular formula.
    - 16    ion_charge : int
    - 17        The ion charge.
    - 18    ion_type : str, optional
    - 19        The ion type. Defaults to None.
    - 20    adduct_atom : str, optional
    - 21        The adduct atom. Defaults to None.
    - 22    mspeak_parent : _MSPeak, optional
    - 23        The parent mass spectrum peak object instance. Defaults to None.
    - 24    external_mz : float, optional
    - 25        The external m/z value. Defaults to None.
    - 26
    - 27    Raises
    - 28    ------
    - 29    TypeError
    - 30        If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT'.
    - 31
    - 32    Attributes
    - 33    ----------
    - 34    isotopologue_count_percentile : float
    - 35        The isotopologue count percentile.
    - 36    O_C : float
    - 37        The O/C ratio.
    - 38    H_C : float
    - 39        The H/C ratio.
    - 40    dbe : float
    - 41        The double bond equivalent.
    - 42    mz_nominal_calc : int
    - 43        The nominal m/z value.
    - 44    mz_error : float
    - 45        The m/z error.
    - 46    mz_calc : float
    - 47        The m/z value.
    - 48    protonated_mz : float
    - 49        The protonated or deprotonated m/z value.
    - 50    radical_mz : float
    - 51        The radical m/z value.
    - 52    neutral_mass : float
    - 53        The neutral mass.
    - 54    ion_type : str
    - 55        The ion type.
    - 56    ion_charge : int
    - 57        The ion charge.
    - 58    atoms : list
    - 59        The atoms in the molecular formula.
    - 60    confidence_score : float
    - 61        The confidence score of the molecular formula identification.
    - 62    isotopologue_similarity : float
    - 63        The isotopologue similarity score of the molecular formula identification.
    - 64    average_mz_error_score : float
    - 65        The average m/z error score of the molecular formula identification, including the isotopologues.
    - 66    mz_error_score : float
    - 67        The m/z error score of the molecular formula identification.
    - 68    kmd : float
    - 69        The Kendrick mass defect (KMD).
    - 70    kendrick_mass : float
    - 71        The Kendrick mass.
    - 72    knm : float
    - 73        The nominal Kendrick mass.
    - 74    string : str
    - 75        The molecular formula string.
    - 76    string_formated : str
    - 77        The molecular formula string formated with subscripts and superscripts.
    - 78    class_label : str
    - 79        The class label.
    - 80    class_dict : dict
    - 81        The class dictionary.
    - 82
    - 83    Methods
    - 84    -------
    - 85    * change_kendrick_base(kendrick_dict_base).
    - 86        Change the Kendrick base.
    - 87    * isotopologues(min_abundance, current_mono_abundance, dynamic_range).
    - 88        Calculate the isotopologues.
    - 89    * atoms_qnt(atom).
    - 90        Get the atom quantity.
    - 91    * atoms_symbol(atom).
    - 92        Get the atom symbol without the mass number.
    - 93    * to_dict().
    - 94        Get the molecular formula as a dictionary.
    - 95    * to_list().
    - 96        Get the molecular formula as a list.
    - 97    """    
    - 98
    - 99    def __init__(self, molecular_formula, ion_charge, ion_type=None, 
    -100                adduct_atom=None, mspeak_parent=None, external_mz=None):
    -101        # clear dictionary of atoms with 0 value
    -102        if  type(molecular_formula) is dict:
    -103                self._from_dict(molecular_formula, ion_type, adduct_atom)   
    -104        
    -105        elif type(molecular_formula) is list:
    -106                self._from_list(molecular_formula, ion_type, adduct_atom)   
    -107        
    -108        elif type(molecular_formula) is str:
    -109                self._from_str(molecular_formula, ion_type, adduct_atom)   
    -110
    -111        self._ion_charge = ion_charge
    -112        self._external_mz = external_mz
    -113        self._confidence_score = None        
    -114        self._isotopologue_similarity = None
    -115        self._mz_error_score = None
    -116        self._mass_error_average_score = None
    -117
    -118        self.is_isotopologue = False
    -119        
    -120        # parent mass spectrum peak obj instance
    -121        self._mspeak_parent = mspeak_parent
    -122
    -123        self.expected_isotopologues = []
    -124        self.mspeak_mf_isotopologues_indexes = []
    -125        
    -126        if self._mspeak_parent:
    -127            kendrick_dict_base = self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base
    -128        else:
    -129            kendrick_dict_base = {'C':1, 'H':2}
    -130        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
    -131            kendrick_dict_base)  
    -132        
    -133    def __repr__(self):
    -134
    -135        return "MolecularFormula({0},{1},ion type = {2}".format(self._d_molecular_formula, self.ion_charge, self.ion_type)
    -136    
    -137    def __str__(self):
    -138
    -139        return "MolecularFormula {0}, ion_charge:{1}, ion type:{2}, m/z:{3} ".format(self.string, self.ion_charge, self.ion_type, self.mz_calc)
    -140    
    -141    def __len__(self):
    -142        
    -143        # crash if keys are not ordered
    -144        return len(self._d_molecular_formula.keys())
    -145        
    -146    def __getitem__(self, atom):
    -147        
    -148            #atom = list(self._d_molecular_formula.keys())[position]
    -149            if atom in self._d_molecular_formula.keys():
    -150                return self._d_molecular_formula[atom]
    -151            else:
    -152                return 0
    -153    def get(self, atom):
    -154        """Get the atom quantity of a specific atom.
    -155        
    -156        Parameters
    -157        ----------
    -158        atom : str
    -159            The atom symbol.
    -160            
    -161        Returns
    -162        -------
    -163        int
    -164            The atom quantity.
    -165        """
    -166        #atom = list(self._d_molecular_formula.keys())[position]
    -167        if atom in self._d_molecular_formula.keys():
    -168            return self._d_molecular_formula[atom]
    -169        else:
    -170            return 0
    -171                
    -172    def _from_dict(self, molecular_formula, ion_type, adduct_atom):
    -173        
    -174        self._d_molecular_formula = {key:val for key, val in molecular_formula.items() if val != 0}
    -175        
    -176        if ion_type is not None:
    -177            self._d_molecular_formula[Labels.ion_type] = ion_type
    -178            
    -179        if adduct_atom:
    -180            if adduct_atom in self._d_molecular_formula:
    -181                self._d_molecular_formula[adduct_atom] += 1 
    -182            else: self._d_molecular_formula[adduct_atom] = 1 
    -183        self.adduct_atom = adduct_atom
    -184
    -185    def _from_list(self, molecular_formula_list, ion_type, adduct_atom):
    -186        # list has to be in the format 
    -187        #['C', 10, 'H', 21, '13C', 1, 'Cl', 1, etc]  
    -188        self._d_molecular_formula = {}
    -189        for each in range(0, len(molecular_formula_list),2):
    -190            
    -191            atoms_label =  molecular_formula_list[each]
    -192            atoms_count = int(molecular_formula_list[each+1])
    -193            
    -194            if atoms_count > 0:
    -195                self._d_molecular_formula[atoms_label] = int(atoms_count)
    -196        
    -197        self._d_molecular_formula[Labels.ion_type] = ion_type
    -198        if adduct_atom:
    -199            self.adduct_atom = adduct_atom
    -200            if adduct_atom in self._d_molecular_formula:
    -201                self._d_molecular_formula[adduct_atom] += 1 
    -202            else: self._d_molecular_formula[adduct_atom] = 1 
    -203        else:
    -204            self.adduct_atom = None
    -205
    -206    def _from_str(self, molecular_formula_str,  ion_type, adduct_atom):
    -207        # string has to be in the format 
    -208        #'C10 H21 13C1 Cl1 37Cl1 etc'
    -209        # Check if there are spaces in the string
    -210        if ' ' not in molecular_formula_str:
    -211            raise ValueError("The molecular formula string should have spaces, input: %s" % molecular_formula_str)
    -212
    -213        # Split the string by spaces
    -214        # Grab the text before a digit for each element after splitting on spaces (atoms)
    -215        elements = [re.sub(r'\d+$', '', x) for x in molecular_formula_str.split()]
    -216        # Grab the digits at the end of each element after splitting on spaces (counts)
    -217        counts = [re.findall(r'\d+$', x)[0] for x in molecular_formula_str.split()]
    -218        # Check that the number of elements and counts are the same
    -219        if len(elements) != len(counts):
    -220            raise ValueError("The number of elements and counts do not match, input: %s" % molecular_formula_str)
    -221        
    -222        # Create a dictionary from the elements and counts and add it to the molecular formula
    -223        dict_ = dict(zip(elements, counts))
    -224        # Cast counts to integers
    -225        dict_ = {key: int(val) for key, val in dict_.items()}
    -226        self._from_dict(dict_, ion_type, adduct_atom)
    -227
    -228
    -229    def split(self, delimiters, string, maxsplit=0): #pragma: no cover
    -230        """Splits the molecular formula string.
    -231        
    -232        Parameters
    -233        ----------
    -234        delimiters : list
    -235            The list of delimiters.
    -236        string : str
    -237            The molecular formula string.
    -238        maxsplit : int, optional
    -239            The maximum number of splits. Defaults to 0.
    -240
    -241        Returns
    -242        -------
    -243        list
    -244            The molecular formula list.
    -245
    -246        Notes
    -247        -----
    -248        Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
    -249        """
    -250        regexPattern = '|'.join(map(re.escape, delimiters)) #pragma: no cover
    -251        isotopes = re.findall(regexPattern, string) #pragma: no cover
    -252        counts = re.split(regexPattern, string, maxsplit)  #pragma: no cover
    -253       
    -254        return [isotopes[0], int(counts[1])]
    -255
    -256    @property
    -257    def isotopologue_count_percentile(self, ):
    -258        if not len(self.expected_isotopologues) == 0:
    -259            return (len(self.mspeak_mf_isotopologues_indexes)/len(self.expected_isotopologues))*100
    -260        else: 
    -261            return 100
    -262
    -263    @property
    -264    def O_C(self): 
    -265            if 'O' in self._d_molecular_formula.keys():
    -266                # gather all the Os and Hs, regardless of the isotopic composition
    -267                Os =sum([self._d_molecular_formula.get(key) for key in ['O'] + Atoms.isotopes['O'][1] if key in self._d_molecular_formula.keys()])
    -268                Cs = sum([self._d_molecular_formula.get(key) for key in ['C'] + Atoms.isotopes['C'][1] if key in self._d_molecular_formula.keys()])
    -269                return Os/Cs
    -270            else:
    -271                return 0    
    -272    
    -273    @property
    -274    def H_C(self): 
    -275        # gather all the Cs and Hs, regardless of the isotopic composition
    -276        Cs = sum([self._d_molecular_formula.get(key) for key in ['C'] + Atoms.isotopes['C'][1] if key in self._d_molecular_formula.keys()])
    -277        Hs = sum([self._d_molecular_formula.get(key) for key in ['H'] + Atoms.isotopes['H'][1] if key in self._d_molecular_formula.keys()])
    -278        return Hs/Cs
    -279
    -280    @property
    -281    def A_I(self):
    -282        """Aromaticity index"""
    -283        return self._calc_aromaticity_index()
    -284
    -285    @property
    -286    def A_I_mod(self):
    -287        """Modified aromaticity index"""
    -288        return self._calc_aromaticity_index_mod()
    -289
    -290    @property
    -291    def nosc(self):
    -292        """Nominal oxidation state of carbon"""
    -293        return self._calc_nosc()
    -294    
    -295    @property
    -296    def dbe(self): return self._calc_dbe()
    -297    
    -298    @property
    -299    def mz_nominal_calc(self): return int(self._calc_mz())
    -300
    -301    @property    
    -302    def mz_error(self): return self._calc_assignment_mass_error()
    -303
    -304    @property
    -305    def mz_calc(self): return self._calc_mz()
    -306
    -307    @property
    -308    def protonated_mz(self): return self._protonated_mz(self.ion_charge)
    -309    
    -310    @property
    -311    def radical_mz(self): return self._radical_mz(self.ion_charge)
    -312    
    -313    @property
    -314    def neutral_mass(self): return self._neutral_mass()
    -315    
    -316    def adduct_mz(self, adduct_atom): 
    -317        """Get m/z of an adducted ion version of the molecular formula.
    -318        
    -319        Parameters
    -320        ----------
    -321        adduct_atom : str
    -322            The adduct atom.
    -323            
    -324        Returns
    -325        -------
    -326        float
    -327            The m/z value of the adducted ion version of the molecular formula.
    -328        """
    -329        return self._adduct_mz(adduct_atom, self.ion_charge)
    -330
    -331    @property
    -332    def ion_type(self): 
    -333        
    -334        ion_type = self._d_molecular_formula.get(Labels.ion_type)
    -335        if ion_type == Labels.protonated_de_ion:
    -336            if self.ion_charge > 0: 
    -337                return Labels.protonated
    -338            else: 
    -339                return Labels.de_protonated    
    -340        else:
    -341            return ion_type
    +  9
    + 10class MolecularFormulaBase(MolecularFormulaCalc):
    + 11    """Base class for representing a molecular formula.
    + 12
    + 13    Parameters
    + 14    ----------
    + 15    molecular_formula : dict, list, str
    + 16        The molecular formula.
    + 17    ion_charge : int
    + 18        The ion charge.
    + 19    ion_type : str, optional
    + 20        The ion type. Defaults to None.
    + 21    adduct_atom : str, optional
    + 22        The adduct atom. Defaults to None.
    + 23    mspeak_parent : _MSPeak, optional
    + 24        The parent mass spectrum peak object instance. Defaults to None.
    + 25    external_mz : float, optional
    + 26        The external m/z value. Defaults to None.
    + 27
    + 28    Raises
    + 29    ------
    + 30    TypeError
    + 31        If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT'.
    + 32
    + 33    Attributes
    + 34    ----------
    + 35    isotopologue_count_percentile : float
    + 36        The isotopologue count percentile.
    + 37    O_C : float
    + 38        The O/C ratio.
    + 39    H_C : float
    + 40        The H/C ratio.
    + 41    dbe : float
    + 42        The double bond equivalent.
    + 43    mz_nominal_calc : int
    + 44        The nominal m/z value.
    + 45    mz_error : float
    + 46        The m/z error.
    + 47    mz_calc : float
    + 48        The m/z value.
    + 49    protonated_mz : float
    + 50        The protonated or deprotonated m/z value.
    + 51    radical_mz : float
    + 52        The radical m/z value.
    + 53    neutral_mass : float
    + 54        The neutral mass.
    + 55    ion_type : str
    + 56        The ion type.
    + 57    ion_charge : int
    + 58        The ion charge.
    + 59    atoms : list
    + 60        The atoms in the molecular formula.
    + 61    confidence_score : float
    + 62        The confidence score of the molecular formula identification.
    + 63    isotopologue_similarity : float
    + 64        The isotopologue similarity score of the molecular formula identification.
    + 65    average_mz_error_score : float
    + 66        The average m/z error score of the molecular formula identification, including the isotopologues.
    + 67    mz_error_score : float
    + 68        The m/z error score of the molecular formula identification.
    + 69    kmd : float
    + 70        The Kendrick mass defect (KMD).
    + 71    kendrick_mass : float
    + 72        The Kendrick mass.
    + 73    knm : float
    + 74        The nominal Kendrick mass.
    + 75    string : str
    + 76        The molecular formula string.
    + 77    string_formated : str
    + 78        The molecular formula string formated with subscripts and superscripts.
    + 79    class_label : str
    + 80        The class label.
    + 81    class_dict : dict
    + 82        The class dictionary.
    + 83
    + 84    Methods
    + 85    -------
    + 86    * change_kendrick_base(kendrick_dict_base).
    + 87        Change the Kendrick base.
    + 88    * isotopologues(min_abundance, current_mono_abundance, dynamic_range).
    + 89        Calculate the isotopologues.
    + 90    * atoms_qnt(atom).
    + 91        Get the atom quantity.
    + 92    * atoms_symbol(atom).
    + 93        Get the atom symbol without the mass number.
    + 94    * to_dict().
    + 95        Get the molecular formula as a dictionary.
    + 96    * to_list().
    + 97        Get the molecular formula as a list.
    + 98    """
    + 99
    +100    def __init__(
    +101        self,
    +102        molecular_formula,
    +103        ion_charge,
    +104        ion_type=None,
    +105        adduct_atom=None,
    +106        mspeak_parent=None,
    +107        external_mz=None,
    +108    ):
    +109        # clear dictionary of atoms with 0 value
    +110        if type(molecular_formula) is dict:
    +111            self._from_dict(molecular_formula, ion_type, adduct_atom)
    +112
    +113        elif type(molecular_formula) is list:
    +114            self._from_list(molecular_formula, ion_type, adduct_atom)
    +115
    +116        elif type(molecular_formula) is str:
    +117            self._from_str(molecular_formula, ion_type, adduct_atom)
    +118
    +119        self._ion_charge = ion_charge
    +120        self._external_mz = external_mz
    +121        self._confidence_score = None
    +122        self._isotopologue_similarity = None
    +123        self._mz_error_score = None
    +124        self._mass_error_average_score = None
    +125
    +126        self.is_isotopologue = False
    +127
    +128        # parent mass spectrum peak obj instance
    +129        self._mspeak_parent = mspeak_parent
    +130
    +131        self.expected_isotopologues = []
    +132        self.mspeak_mf_isotopologues_indexes = []
    +133
    +134        if self._mspeak_parent:
    +135            kendrick_dict_base = (
    +136                self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base
    +137            )
    +138        else:
    +139            kendrick_dict_base = {"C": 1, "H": 2}
    +140        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
    +141            kendrick_dict_base
    +142        )
    +143
    +144    def __repr__(self):
    +145        return "MolecularFormula({0},{1},ion type = {2}".format(
    +146            self._d_molecular_formula, self.ion_charge, self.ion_type
    +147        )
    +148
    +149    def __str__(self):
    +150        return "MolecularFormula {0}, ion_charge:{1}, ion type:{2}, m/z:{3} ".format(
    +151            self.string, self.ion_charge, self.ion_type, self.mz_calc
    +152        )
    +153
    +154    def __len__(self):
    +155        # crash if keys are not ordered
    +156        return len(self._d_molecular_formula.keys())
    +157
    +158    def __getitem__(self, atom):
    +159        # atom = list(self._d_molecular_formula.keys())[position]
    +160        if atom in self._d_molecular_formula.keys():
    +161            return self._d_molecular_formula[atom]
    +162        else:
    +163            return 0
    +164
    +165    def get(self, atom):
    +166        """Get the atom quantity of a specific atom.
    +167
    +168        Parameters
    +169        ----------
    +170        atom : str
    +171            The atom symbol.
    +172
    +173        Returns
    +174        -------
    +175        int
    +176            The atom quantity.
    +177        """
    +178        # atom = list(self._d_molecular_formula.keys())[position]
    +179        if atom in self._d_molecular_formula.keys():
    +180            return self._d_molecular_formula[atom]
    +181        else:
    +182            return 0
    +183
    +184    def _from_dict(self, molecular_formula, ion_type, adduct_atom):
    +185        self._d_molecular_formula = {
    +186            key: val for key, val in molecular_formula.items() if val != 0
    +187        }
    +188
    +189        if ion_type is not None:
    +190            self._d_molecular_formula[Labels.ion_type] = ion_type
    +191
    +192        if adduct_atom:
    +193            if adduct_atom in self._d_molecular_formula:
    +194                self._d_molecular_formula[adduct_atom] += 1
    +195            else:
    +196                self._d_molecular_formula[adduct_atom] = 1
    +197        self.adduct_atom = adduct_atom
    +198
    +199    def _from_list(self, molecular_formula_list, ion_type, adduct_atom):
    +200        # list has to be in the format
    +201        # ['C', 10, 'H', 21, '13C', 1, 'Cl', 1, etc]
    +202        self._d_molecular_formula = {}
    +203        for each in range(0, len(molecular_formula_list), 2):
    +204            atoms_label = molecular_formula_list[each]
    +205            atoms_count = int(molecular_formula_list[each + 1])
    +206
    +207            if atoms_count > 0:
    +208                self._d_molecular_formula[atoms_label] = int(atoms_count)
    +209
    +210        self._d_molecular_formula[Labels.ion_type] = ion_type
    +211        if adduct_atom:
    +212            self.adduct_atom = adduct_atom
    +213            if adduct_atom in self._d_molecular_formula:
    +214                self._d_molecular_formula[adduct_atom] += 1
    +215            else:
    +216                self._d_molecular_formula[adduct_atom] = 1
    +217        else:
    +218            self.adduct_atom = None
    +219
    +220    def _from_str(self, molecular_formula_str, ion_type, adduct_atom):
    +221        # string has to be in the format
    +222        #'C10 H21 13C1 Cl1 37Cl1 etc'
    +223        # Check if there are spaces in the string
    +224        if " " not in molecular_formula_str:
    +225            raise ValueError(
    +226                "The molecular formula string should have spaces, input: %s"
    +227                % molecular_formula_str
    +228            )
    +229
    +230        # Split the string by spaces
    +231        # Grab the text before a digit for each element after splitting on spaces (atoms)
    +232        elements = [re.sub(r"\d+$", "", x) for x in molecular_formula_str.split()]
    +233        # Grab the digits at the end of each element after splitting on spaces (counts)
    +234        counts = [re.findall(r"\d+$", x)[0] for x in molecular_formula_str.split()]
    +235        # Check that the number of elements and counts are the same
    +236        if len(elements) != len(counts):
    +237            raise ValueError(
    +238                "The number of elements and counts do not match, input: %s"
    +239                % molecular_formula_str
    +240            )
    +241
    +242        # Create a dictionary from the elements and counts and add it to the molecular formula
    +243        dict_ = dict(zip(elements, counts))
    +244        # Cast counts to integers
    +245        dict_ = {key: int(val) for key, val in dict_.items()}
    +246        self._from_dict(dict_, ion_type, adduct_atom)
    +247
    +248    def split(self, delimiters, string, maxsplit=0):  # pragma: no cover
    +249        """Splits the molecular formula string.
    +250
    +251        Parameters
    +252        ----------
    +253        delimiters : list
    +254            The list of delimiters.
    +255        string : str
    +256            The molecular formula string.
    +257        maxsplit : int, optional
    +258            The maximum number of splits. Defaults to 0.
    +259
    +260        Returns
    +261        -------
    +262        list
    +263            The molecular formula list.
    +264
    +265        Notes
    +266        -----
    +267        Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
    +268        """
    +269        regexPattern = "|".join(map(re.escape, delimiters))  # pragma: no cover
    +270        isotopes = re.findall(regexPattern, string)  # pragma: no cover
    +271        counts = re.split(regexPattern, string, maxsplit)  # pragma: no cover
    +272
    +273        return [isotopes[0], int(counts[1])]
    +274
    +275    @property
    +276    def isotopologue_count_percentile(
    +277        self,
    +278    ):
    +279        if not len(self.expected_isotopologues) == 0:
    +280            return (
    +281                len(self.mspeak_mf_isotopologues_indexes)
    +282                / len(self.expected_isotopologues)
    +283            ) * 100
    +284        else:
    +285            return 100
    +286
    +287    @property
    +288    def O_C(self):
    +289        if "O" in self._d_molecular_formula.keys():
    +290            # gather all the Os and Hs, regardless of the isotopic composition
    +291            Os = sum(
    +292                [
    +293                    self._d_molecular_formula.get(key)
    +294                    for key in ["O"] + Atoms.isotopes["O"][1]
    +295                    if key in self._d_molecular_formula.keys()
    +296                ]
    +297            )
    +298            Cs = sum(
    +299                [
    +300                    self._d_molecular_formula.get(key)
    +301                    for key in ["C"] + Atoms.isotopes["C"][1]
    +302                    if key in self._d_molecular_formula.keys()
    +303                ]
    +304            )
    +305            return Os / Cs
    +306        else:
    +307            return 0
    +308
    +309    @property
    +310    def H_C(self):
    +311        # gather all the Cs and Hs, regardless of the isotopic composition
    +312        Cs = sum(
    +313            [
    +314                self._d_molecular_formula.get(key)
    +315                for key in ["C"] + Atoms.isotopes["C"][1]
    +316                if key in self._d_molecular_formula.keys()
    +317            ]
    +318        )
    +319        Hs = sum(
    +320            [
    +321                self._d_molecular_formula.get(key)
    +322                for key in ["H"] + Atoms.isotopes["H"][1]
    +323                if key in self._d_molecular_formula.keys()
    +324            ]
    +325        )
    +326        return Hs / Cs
    +327
    +328    @property
    +329    def A_I(self):
    +330        """Aromaticity index"""
    +331        return self._calc_aromaticity_index()
    +332
    +333    @property
    +334    def A_I_mod(self):
    +335        """Modified aromaticity index"""
    +336        return self._calc_aromaticity_index_mod()
    +337
    +338    @property
    +339    def nosc(self):
    +340        """Nominal oxidation state of carbon"""
    +341        return self._calc_nosc()
     342
    -343    @ion_type.setter
    -344    def ion_type(self, ion_type):
    -345        if  ion_type in [Labels.protonated_de_ion, Labels.adduct_ion, Labels.radical_ion]:
    -346            self._d_molecular_formula[Labels.ion_type] = ion_type
    -347        else:
    -348            raise TypeError("Ion type can only be: 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT', not %s"%ion_type)   
    -349
    -350    @property
    -351    def ion_charge(self): return self._ion_charge
    -352    
    -353    @property
    -354    def atoms(self): 
    -355        """Get the atoms in the molecular formula."""
    -356        # if there is an adduct_atom, them reduce it from the atoms list
    -357        if self.adduct_atom is None:
    -358            return [key for key in self._d_molecular_formula.keys() if key != Labels.ion_type]
    -359        else:
    -360            temp_dict = self._d_molecular_formula.copy()
    -361            temp_dict[self.adduct_atom] -= 1
    -362            return [key for key,val in temp_dict.items() if key != Labels.ion_type and val > 0]
    -363
    -364    
    -365    @property
    -366    def confidence_score(self): 
    -367        
    -368        if not self._confidence_score:
    -369            
    -370            self._confidence_score = self._calc_confidence_score()
    -371        
    -372        return self._confidence_score
    +343    @property
    +344    def dbe(self):
    +345        return self._calc_dbe()
    +346
    +347    @property
    +348    def mz_nominal_calc(self):
    +349        return int(self._calc_mz())
    +350
    +351    @property
    +352    def mz_error(self):
    +353        return self._calc_assignment_mass_error()
    +354
    +355    @property
    +356    def mz_calc(self):
    +357        return self._calc_mz()
    +358
    +359    @property
    +360    def protonated_mz(self):
    +361        return self._protonated_mz(self.ion_charge)
    +362
    +363    @property
    +364    def radical_mz(self):
    +365        return self._radical_mz(self.ion_charge)
    +366
    +367    @property
    +368    def neutral_mass(self):
    +369        return self._neutral_mass()
    +370
    +371    def adduct_mz(self, adduct_atom):
    +372        """Get m/z of an adducted ion version of the molecular formula.
     373
    -374    @property
    -375    def isotopologue_similarity(self): 
    -376        
    -377        if not self._isotopologue_similarity:
    -378           
    -379           self._isotopologue_similarity = self._calc_isotopologue_confidence()  
    -380       
    -381        return self._isotopologue_similarity  
    -382    
    -383    @property
    -384    def average_mz_error_score(self): 
    -385        
    -386        # includes the isotopologues
    -387        
    -388        if not self._mass_error_average_score:
    -389           
    -390           self._mass_error_average_score = self._calc_average_mz_score()  
    -391        
    -392        return self._mass_error_average_score
    -393
    -394    @property
    -395    def mz_error_score(self): 
    -396        if not self._mz_error_score:
    -397           
    -398           self._mz_error_score = self._calc_mz_confidence()  
    -399        
    -400        return self._mz_error_score
    -401    
    -402    @property
    -403    def kmd(self): return self._kmd
    -404
    -405    @property
    -406    def kendrick_mass(self): return self._kendrick_mass
    -407
    -408    @property
    -409    def knm(self): return self._nominal_km
    +374        Parameters
    +375        ----------
    +376        adduct_atom : str
    +377            The adduct atom.
    +378
    +379        Returns
    +380        -------
    +381        float
    +382            The m/z value of the adducted ion version of the molecular formula.
    +383        """
    +384        return self._adduct_mz(adduct_atom, self.ion_charge)
    +385
    +386    @property
    +387    def ion_type(self):
    +388        ion_type = self._d_molecular_formula.get(Labels.ion_type)
    +389        if ion_type == Labels.protonated_de_ion:
    +390            if self.ion_charge > 0:
    +391                return Labels.protonated
    +392            else:
    +393                return Labels.de_protonated
    +394        else:
    +395            return ion_type
    +396
    +397    @ion_type.setter
    +398    def ion_type(self, ion_type):
    +399        if ion_type in [
    +400            Labels.protonated_de_ion,
    +401            Labels.adduct_ion,
    +402            Labels.radical_ion,
    +403        ]:
    +404            self._d_molecular_formula[Labels.ion_type] = ion_type
    +405        else:
    +406            raise TypeError(
    +407                "Ion type can only be: 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT', not %s"
    +408                % ion_type
    +409            )
     410
    -411    def change_kendrick_base(self, kendrick_dict_base):
    -412        """Change the Kendrick base.
    -413
    -414        Parameters
    -415        ----------
    -416        kendrick_dict_base : dict
    -417            The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
    -418        """ 
    -419        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(kendrick_dict_base)
    -420                
    -421    def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range): 
    -422        """Calculate the isotopologues for a given molecular formula.
    -423
    -424        Parameters
    -425        ----------
    -426        min_abundance : float
    -427            The minimum abundance.
    -428        current_mono_abundance : float
    -429            The current monoisotopic abundance.
    -430        dynamic_range : float
    -431            The dynamic range.
    -432
    -433        Yields
    -434        ------
    -435        MolecularFormulaIsotopologue
    -436            The molecular formula isotopologue.
    -437
    -438        Notes
    -439        -----
    -440        This calculation ignores the hydrogen isotopes.
    -441        """
    -442        isotopologues = []
    -443        for mf in self._cal_isotopologues(self._d_molecular_formula, min_abundance, current_mono_abundance, dynamic_range ):
    -444            isotopologues.append(mf)
    -445        
    -446        # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions. 
    -447        sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True)
    -448
    -449        for mf in sorted_isotopologues:
    -450            yield MolecularFormulaIsotopologue(
    -451                *mf, 
    -452                current_mono_abundance, 
    -453                self.ion_charge, 
    -454                ion_type=self.ion_type, 
    -455                adduct_atom=self.adduct_atom
    -456                )
    -457    
    -458    def atoms_qnt(self,atom): 
    -459        """Get the atom quantity of a specific atom in the molecular formula."""
    -460        if atom in self._d_molecular_formula:
    -461            return self._d_molecular_formula.get(atom)
    -462        else:
    -463            raise Warning('Could not find %s in this Molecular Formula object'%str(atom))
    -464    
    -465    def atoms_symbol(self, atom): 
    -466        """Get the atom symbol without the mass number."""
    -467        return ''.join([i for i in atom if not i.isdigit()])
    -468
    -469    @property       
    -470    def string(self):
    -471        """Returns the molecular formula as a string."""
    -472        if self._d_molecular_formula:
    -473            if self.adduct_atom is None:
    -474                mol_form_dict = self._d_molecular_formula
    -475            else:
    -476                mol_form_dict = self._d_molecular_formula.copy()
    -477                if self.adduct_atom not in mol_form_dict.keys():
    -478                    raise Exception("Adduct atom not found in molecular formula dict")
    -479                mol_form_dict[self.adduct_atom] -= 1
    -480                mol_form_dict = {key:val for key, val in mol_form_dict.items() if val != 0}
    -481            formula_srt = ''
    -482            for atom in Atoms.atoms_order:
    -483                if atom in mol_form_dict.keys():
    -484                    formula_srt += atom + str(int(mol_form_dict.get(atom))) + ' '
    -485            return formula_srt.strip()
    -486        
    -487        else:
    -488            raise Exception("Molecular formula identification not performed yet")    
    -489    
    -490    @property
    -491    def string_formated(self):
    -492        
    -493        SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
    -494        SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
    -495        
    -496        if self._d_molecular_formula:
    -497            formula_srt = ''
    -498            for atom in Atoms.atoms_order:
    -499                if atom in self.to_dict().keys():
    -500                    formula_srt += atom.translate(SUP) + str(int(self.to_dict().get(atom))).translate(SUB)
    -501            return formula_srt
    -502        
    -503        else:
    -504            raise Exception("Molecular formula identification not performed yet")    
    -505
    -506    def to_dict(self):
    -507        """Returns the molecular formula as a dictionary.
    -508        
    -509        Returns
    -510        -------
    -511        dict
    -512            The molecular formula as a dictionary.
    -513        """
    -514        return self._d_molecular_formula
    -515
    -516    def to_list(self):
    -517        """Returns the molecular formula as a list.
    -518        
    -519        Returns
    -520        -------
    -521        list
    -522            The molecular formula as a list.
    -523            
    -524        Raises
    -525        ------
    -526        Exception
    -527            If the molecular formula identification was not performed yet.
    -528        """
    -529        #TODO ensure self._d_molecular_formula is a orderedDict
    -530        
    -531        if self._d_molecular_formula:
    -532            formula_list = []    
    -533            
    -534            for atom, atom_number in self._d_molecular_formula.items():
    -535    
    -536                if atom != Labels.ion_type:
    -537                    
    -538                    formula_list.append(atom)
    -539                    formula_list.append(atom_number)
    -540    
    -541            return formula_list
    -542        else:
    -543            raise Exception("Molecular formula identification not performed yet")
    -544    
    -545    @property
    -546    def class_label(self):
    -547        
    -548        if self._d_molecular_formula:
    -549            
    -550            formulalist = self.to_list()
    -551            classstring = '' 
    -552            
    -553            for each in range(0, len(formulalist),2):
    -554                
    -555                if formulalist[each] != 'C' and formulalist[each] != 'H' and formulalist[each] != 'HC':
    -556                     
    -557                    classstring = classstring + str(formulalist[each]) + str(formulalist[each+1]) + ' '    
    -558            
    -559            if classstring == '': classstring = 'HC'
    -560                
    -561            classstring = classstring.strip()
    -562            
    -563            if self._d_molecular_formula.get(Labels.ion_type) == Labels.radical_ion:    
    -564                
    -565                return classstring + ' -R'
    -566            
    -567            #elif self._d_molecular_formula.get(Labels.ion_type) == Labels.adduct_ion:    
    -568                
    -569            #    return classstring + ' -A'
    +411    @property
    +412    def ion_charge(self):
    +413        return self._ion_charge
    +414
    +415    @property
    +416    def atoms(self):
    +417        """Get the atoms in the molecular formula."""
    +418        # if there is an adduct_atom, them reduce it from the atoms list
    +419        if self.adduct_atom is None:
    +420            return [
    +421                key
    +422                for key in self._d_molecular_formula.keys()
    +423                if key != Labels.ion_type
    +424            ]
    +425        else:
    +426            temp_dict = self._d_molecular_formula.copy()
    +427            temp_dict[self.adduct_atom] -= 1
    +428            return [
    +429                key
    +430                for key, val in temp_dict.items()
    +431                if key != Labels.ion_type and val > 0
    +432            ]
    +433
    +434    @property
    +435    def confidence_score(self):
    +436        if not self._confidence_score:
    +437            self._confidence_score = self._calc_confidence_score()
    +438
    +439        return self._confidence_score
    +440
    +441    @property
    +442    def isotopologue_similarity(self):
    +443        if not self._isotopologue_similarity:
    +444            self._isotopologue_similarity = self._calc_isotopologue_confidence()
    +445
    +446        return self._isotopologue_similarity
    +447
    +448    @property
    +449    def average_mz_error_score(self):
    +450        # includes the isotopologues
    +451
    +452        if not self._mass_error_average_score:
    +453            self._mass_error_average_score = self._calc_average_mz_score()
    +454
    +455        return self._mass_error_average_score
    +456
    +457    @property
    +458    def mz_error_score(self):
    +459        if not self._mz_error_score:
    +460            self._mz_error_score = self._calc_mz_confidence()
    +461
    +462        return self._mz_error_score
    +463
    +464    @property
    +465    def kmd(self):
    +466        return self._kmd
    +467
    +468    @property
    +469    def kendrick_mass(self):
    +470        return self._kendrick_mass
    +471
    +472    @property
    +473    def knm(self):
    +474        return self._nominal_km
    +475
    +476    def change_kendrick_base(self, kendrick_dict_base):
    +477        """Change the Kendrick base.
    +478
    +479        Parameters
    +480        ----------
    +481        kendrick_dict_base : dict
    +482            The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
    +483        """
    +484        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
    +485            kendrick_dict_base
    +486        )
    +487
    +488    def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range):
    +489        """Calculate the isotopologues for a given molecular formula.
    +490
    +491        Parameters
    +492        ----------
    +493        min_abundance : float
    +494            The minimum abundance.
    +495        current_mono_abundance : float
    +496            The current monoisotopic abundance.
    +497        dynamic_range : float
    +498            The dynamic range.
    +499
    +500        Yields
    +501        ------
    +502        MolecularFormulaIsotopologue
    +503            The molecular formula isotopologue.
    +504
    +505        Notes
    +506        -----
    +507        This calculation ignores the hydrogen isotopes.
    +508        """
    +509        isotopologues = []
    +510        for mf in self._cal_isotopologues(
    +511            self._d_molecular_formula,
    +512            min_abundance,
    +513            current_mono_abundance,
    +514            dynamic_range,
    +515        ):
    +516            isotopologues.append(mf)
    +517
    +518        # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions.
    +519        sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True)
    +520
    +521        for mf in sorted_isotopologues:
    +522            yield MolecularFormulaIsotopologue(
    +523                *mf,
    +524                current_mono_abundance,
    +525                self.ion_charge,
    +526                ion_type=self.ion_type,
    +527                adduct_atom=self.adduct_atom,
    +528            )
    +529
    +530    def atoms_qnt(self, atom):
    +531        """Get the atom quantity of a specific atom in the molecular formula."""
    +532        if atom in self._d_molecular_formula:
    +533            return self._d_molecular_formula.get(atom)
    +534        else:
    +535            raise Warning(
    +536                "Could not find %s in this Molecular Formula object" % str(atom)
    +537            )
    +538
    +539    def atoms_symbol(self, atom):
    +540        """Get the atom symbol without the mass number."""
    +541        return "".join([i for i in atom if not i.isdigit()])
    +542
    +543    @property
    +544    def string(self):
    +545        """Returns the molecular formula as a string."""
    +546        if self._d_molecular_formula:
    +547            if self.adduct_atom is None:
    +548                mol_form_dict = self._d_molecular_formula
    +549            else:
    +550                mol_form_dict = self._d_molecular_formula.copy()
    +551                if self.adduct_atom not in mol_form_dict.keys():
    +552                    raise Exception("Adduct atom not found in molecular formula dict")
    +553                mol_form_dict[self.adduct_atom] -= 1
    +554                mol_form_dict = {
    +555                    key: val for key, val in mol_form_dict.items() if val != 0
    +556                }
    +557            formula_srt = ""
    +558            for atom in Atoms.atoms_order:
    +559                if atom in mol_form_dict.keys():
    +560                    formula_srt += atom + str(int(mol_form_dict.get(atom))) + " "
    +561            return formula_srt.strip()
    +562
    +563        else:
    +564            raise Exception("Molecular formula identification not performed yet")
    +565
    +566    @property
    +567    def string_formated(self):
    +568        SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
    +569        SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
     570
    -571            else: return classstring
    -572            
    -573            #'dict, tuple or string'
    -574        
    -575        else:
    -576            
    -577            raise Exception("Molecular formula identification not performed yet")        
    -578    
    -579    @property
    -580    def class_dict(self):
    -581        
    -582        if self._d_molecular_formula:
    -583            
    -584            class_dict = {}
    -585            
    -586            for atom, qnt in self._d_molecular_formula.items():
    -587    
    -588                if atom != Labels.ion_type and atom !='C' and atom !='H':
    -589                    
    -590                    class_dict[atom] = qnt
    -591                    
    -592            return class_dict
    -593        
    -594        raise Exception("Molecular formula identification not performed yet")           
    -595    
    -596
    -597class MolecularFormulaIsotopologue(MolecularFormulaBase):
    -598    """Class for representing a molecular formula isotopologue.
    -599    
    -600    Parameters
    -601    ----------
    -602    _d_molecular_formula : dict
    -603        The molecular formula as a dictionary.
    -604    prob_ratio : float
    -605        The probability ratio.
    -606    mono_abundance : float
    -607        The monoisotopic abundance.
    -608    ion_charge : int
    -609        The ion charge.
    -610    mspeak_parent : object, optional
    -611        The parent mass spectrum peak object instance. Defaults to None.
    -612    ion_type : str, optional
    -613        The ion type. Defaults to None.
    -614    adduct_atom : str, optional
    -615        The adduct atom. Defaults to None.
    -616    
    -617    Attributes
    -618    ----------
    -619    prob_ratio : float
    -620        The probability ratio.
    -621    abundance_calc : float
    -622        The calculated abundance.
    -623    area_error : float
    -624        The area error.
    -625    abundance_error : float
    -626        The abundance error.
    -627    is_isotopologue : bool
    -628        The isotopologue flag. Defaults to True.
    -629    mspeak_index_mono_isotopic : int
    -630        The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
    -631    mono_isotopic_formula_index : int
    -632        The index of the monoisotopic formula in the molecular formula list. Defaults to None.
    -633    """
    -634    def __init__(
    -635            self, 
    -636            _d_molecular_formula, 
    -637            prob_ratio, 
    -638            mono_abundance, 
    -639            ion_charge, 
    -640            mspeak_parent=None,
    -641            ion_type = None,
    -642            adduct_atom = None
    -643            ):
    -644        
    -645        if ion_type is None:
    -646            # check if ion type or adduct_atom is in the molecular formula dict
    -647            if Labels.ion_type in _d_molecular_formula:
    -648                ion_type = _d_molecular_formula.get(Labels.ion_type)
    -649            else:
    -650                ion_type = None
    -651        else:
    -652            ion_type = Labels.ion_type_translate.get(ion_type)
    -653        
    -654        if ion_type == Labels.adduct_ion:
    -655            adduct_atom_int = None
    -656            if adduct_atom in _d_molecular_formula.keys():
    -657                adduct_atom_int = adduct_atom
    -658            else:
    -659                # Check to see if adduct_atom should actually be an isotope of the adduct atom
    -660                for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]:
    -661                    if adduct_iso in _d_molecular_formula.keys():
    -662                        adduct_atom_int = adduct_iso
    -663            adduct_atom = adduct_atom_int
    -664            if adduct_atom is None:
    -665                raise Exception("adduct_atom is required for adduct ion")
    -666            _d_molecular_formula[adduct_atom] -= 1
    -667            _d_molecular_formula = {key:val for key, val in _d_molecular_formula.items() if val != 0}
    -668
    -669        
    -670        super().__init__(
    -671            molecular_formula =_d_molecular_formula, 
    -672            ion_charge = ion_charge, 
    -673            ion_type=ion_type,
    -674            adduct_atom=adduct_atom
    -675            )
    -676        #prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic
    -677        
    -678        self.prob_ratio = prob_ratio
    -679        
    -680        self.abundance_calc = mono_abundance * prob_ratio
    -681
    -682        self.is_isotopologue = True
    -683        
    -684        self.mspeak_index_mono_isotopic = None
    -685
    -686        self.mono_isotopic_formula_index = None
    -687        # parent mass spectrum peak obj instance
    -688        self._mspeak_parent = mspeak_parent
    -689
    -690    
    -691    @property
    -692    def area_error(self):
    -693        return self._calc_area_error()
    -694
    -695    @property
    -696    def abundance_error(self):
    -697        return self._calc_abundance_error()
    -698
    -699class LCMSLibRefMolecularFormula(MolecularFormulaBase):
    -700    """Class for representing a molecular formula associated with a molecule in a LCMS library reference.
    -701
    -702    Parameters
    -703    ----------
    -704    molecular_formula : dict, list, str
    -705        The molecular formula.
    -706    ion_charge : int
    -707        The ion charge.
    -708    ion_type : str, optional
    -709        The ion type. Defaults to None.
    -710    adduct_atom : str, optional
    -711        The adduct atom. Defaults to None.
    -712    mspeak_parent : object, optional
    -713        The parent mass spectrum peak object instance. Defaults to None.
    -714    name : str, optional
    -715        The name of the reference molecule. Defaults to None.
    -716    kegg_id : str, optional
    -717        The KEGG ID of the reference molecule. Defaults to None.
    -718    cas : str, optional
    -719        The CAS number of the reference molecule. Defaults to None.
    -720
    -721    """
    -722    
    -723    def __init__(self, molecular_formula, ion_charge, ion_type=None, 
    -724                    adduct_atom=None, mspeak_parent=None, name=None, kegg_id=None, cas=None) -> None:
    -725        
    -726        super().__init__(molecular_formula, ion_charge, ion_type=ion_type, 
    -727                    adduct_atom=adduct_atom, mspeak_parent=mspeak_parent)
    -728
    -729        self._name = name
    -730        self._kegg_id = kegg_id
    -731        self._cas = cas    
    -732    
    -733    @property
    -734    def name(self):
    -735        return self._name
    -736
    -737    @name.setter
    -738    def name(self, name):
    -739        if isinstance(name, str):
    -740            self._name = name
    -741        else:
    -742            raise TypeError('name: {} should be type string')    
    -743
    -744    @property
    -745    def kegg_id(self):
    -746        return self._kegg_id
    -747    
    -748    @kegg_id.setter
    -749    def kegg_id(self, kegg_id):
    -750        self._kegg_id = kegg_id
    -751        #if isinstance(kegg_id, str):
    -752        #    self._kegg_id = kegg_id
    -753        #else:
    -754        #    print(kegg_id)
    -755        #    raise TypeError('name: {} should be type string') 
    +571        if self._d_molecular_formula:
    +572            formula_srt = ""
    +573            for atom in Atoms.atoms_order:
    +574                if atom in self.to_dict().keys():
    +575                    formula_srt += atom.translate(SUP) + str(
    +576                        int(self.to_dict().get(atom))
    +577                    ).translate(SUB)
    +578            return formula_srt
    +579
    +580        else:
    +581            raise Exception("Molecular formula identification not performed yet")
    +582
    +583    def to_dict(self):
    +584        """Returns the molecular formula as a dictionary.
    +585
    +586        Returns
    +587        -------
    +588        dict
    +589            The molecular formula as a dictionary.
    +590        """
    +591        return self._d_molecular_formula
    +592
    +593    def to_list(self):
    +594        """Returns the molecular formula as a list.
    +595
    +596        Returns
    +597        -------
    +598        list
    +599            The molecular formula as a list.
    +600
    +601        Raises
    +602        ------
    +603        Exception
    +604            If the molecular formula identification was not performed yet.
    +605        """
    +606        # TODO ensure self._d_molecular_formula is a orderedDict
    +607
    +608        if self._d_molecular_formula:
    +609            formula_list = []
    +610
    +611            for atom, atom_number in self._d_molecular_formula.items():
    +612                if atom != Labels.ion_type:
    +613                    formula_list.append(atom)
    +614                    formula_list.append(atom_number)
    +615
    +616            return formula_list
    +617        else:
    +618            raise Exception("Molecular formula identification not performed yet")
    +619
    +620    @property
    +621    def class_label(self):
    +622        if self._d_molecular_formula:
    +623            formulalist = self.to_list()
    +624            classstring = ""
    +625
    +626            for each in range(0, len(formulalist), 2):
    +627                if (
    +628                    formulalist[each] != "C"
    +629                    and formulalist[each] != "H"
    +630                    and formulalist[each] != "HC"
    +631                ):
    +632                    classstring = (
    +633                        classstring
    +634                        + str(formulalist[each])
    +635                        + str(formulalist[each + 1])
    +636                        + " "
    +637                    )
    +638
    +639            if classstring == "":
    +640                classstring = "HC"
    +641
    +642            classstring = classstring.strip()
    +643
    +644            if self._d_molecular_formula.get(Labels.ion_type) == Labels.radical_ion:
    +645                return classstring + " -R"
    +646
    +647            # elif self._d_molecular_formula.get(Labels.ion_type) == Labels.adduct_ion:
    +648
    +649            #    return classstring + ' -A'
    +650
    +651            else:
    +652                return classstring
    +653
    +654            #'dict, tuple or string'
    +655
    +656        else:
    +657            raise Exception("Molecular formula identification not performed yet")
    +658
    +659    @property
    +660    def class_dict(self):
    +661        if self._d_molecular_formula:
    +662            class_dict = {}
    +663
    +664            for atom, qnt in self._d_molecular_formula.items():
    +665                if atom != Labels.ion_type and atom != "C" and atom != "H":
    +666                    class_dict[atom] = qnt
    +667
    +668            return class_dict
    +669
    +670        raise Exception("Molecular formula identification not performed yet")
    +671
    +672
    +673class MolecularFormulaIsotopologue(MolecularFormulaBase):
    +674    """Class for representing a molecular formula isotopologue.
    +675
    +676    Parameters
    +677    ----------
    +678    _d_molecular_formula : dict
    +679        The molecular formula as a dictionary.
    +680    prob_ratio : float
    +681        The probability ratio.
    +682    mono_abundance : float
    +683        The monoisotopic abundance.
    +684    ion_charge : int
    +685        The ion charge.
    +686    mspeak_parent : object, optional
    +687        The parent mass spectrum peak object instance. Defaults to None.
    +688    ion_type : str, optional
    +689        The ion type. Defaults to None.
    +690    adduct_atom : str, optional
    +691        The adduct atom. Defaults to None.
    +692
    +693    Attributes
    +694    ----------
    +695    prob_ratio : float
    +696        The probability ratio.
    +697    abundance_calc : float
    +698        The calculated abundance.
    +699    area_error : float
    +700        The area error.
    +701    abundance_error : float
    +702        The abundance error.
    +703    is_isotopologue : bool
    +704        The isotopologue flag. Defaults to True.
    +705    mspeak_index_mono_isotopic : int
    +706        The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
    +707    mono_isotopic_formula_index : int
    +708        The index of the monoisotopic formula in the molecular formula list. Defaults to None.
    +709    """
    +710
    +711    def __init__(
    +712        self,
    +713        _d_molecular_formula,
    +714        prob_ratio,
    +715        mono_abundance,
    +716        ion_charge,
    +717        mspeak_parent=None,
    +718        ion_type=None,
    +719        adduct_atom=None,
    +720    ):
    +721        if ion_type is None:
    +722            # check if ion type or adduct_atom is in the molecular formula dict
    +723            if Labels.ion_type in _d_molecular_formula:
    +724                ion_type = _d_molecular_formula.get(Labels.ion_type)
    +725            else:
    +726                ion_type = None
    +727        else:
    +728            ion_type = Labels.ion_type_translate.get(ion_type)
    +729
    +730        if ion_type == Labels.adduct_ion:
    +731            adduct_atom_int = None
    +732            if adduct_atom in _d_molecular_formula.keys():
    +733                adduct_atom_int = adduct_atom
    +734            else:
    +735                # Check to see if adduct_atom should actually be an isotope of the adduct atom
    +736                for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]:
    +737                    if adduct_iso in _d_molecular_formula.keys():
    +738                        adduct_atom_int = adduct_iso
    +739            adduct_atom = adduct_atom_int
    +740            if adduct_atom is None:
    +741                raise Exception("adduct_atom is required for adduct ion")
    +742            _d_molecular_formula[adduct_atom] -= 1
    +743            _d_molecular_formula = {
    +744                key: val for key, val in _d_molecular_formula.items() if val != 0
    +745            }
    +746
    +747        super().__init__(
    +748            molecular_formula=_d_molecular_formula,
    +749            ion_charge=ion_charge,
    +750            ion_type=ion_type,
    +751            adduct_atom=adduct_atom,
    +752        )
    +753        # prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic
    +754
    +755        self.prob_ratio = prob_ratio
     756
    -757    @property
    -758    def cas(self):
    -759        return self._cas    
    -760    
    -761    @cas.setter
    -762    def cas(self, cas):
    -763        self._cas = cas
    -764        #if isinstance(cas, str):
    -765        #    self._cas = cas
    -766        #else:
    -767        #    raise TypeError('name: {} should be type string') 
    -768    
    -769class MolecularFormula(MolecularFormulaBase):
    -770    """General class for representing a molecular formula.
    -771
    -772    Parameters
    -773    ----------
    -774    molecular_formula : dict, list, str
    -775        The molecular formula.
    -776    ion_charge : int
    -777        The ion charge.
    -778    ion_type : str, optional
    -779        The ion type. Defaults to None.
    -780    adduct_atom : str, optional
    -781        The adduct atom. Defaults to None.
    -782    mspeak_parent : object, optional
    -783        The parent mass spectrum peak object instance. Defaults to None.
    -784    external_mz : float, optional
    -785        The external m/z value. Defaults to False.
    -786    """
    -787
    -788    def __init__(self, molecular_formula, ion_charge, ion_type=None, 
    -789                adduct_atom=None, mspeak_parent=None, external_mz=False):
    -790        super().__init__(molecular_formula, ion_charge, ion_type=ion_type, 
    -791                adduct_atom=adduct_atom, mspeak_parent=mspeak_parent, external_mz=external_mz)
    +757        self.abundance_calc = mono_abundance * prob_ratio
    +758
    +759        self.is_isotopologue = True
    +760
    +761        self.mspeak_index_mono_isotopic = None
    +762
    +763        self.mono_isotopic_formula_index = None
    +764        # parent mass spectrum peak obj instance
    +765        self._mspeak_parent = mspeak_parent
    +766
    +767    @property
    +768    def area_error(self):
    +769        return self._calc_area_error()
    +770
    +771    @property
    +772    def abundance_error(self):
    +773        return self._calc_abundance_error()
    +774
    +775
    +776class LCMSLibRefMolecularFormula(MolecularFormulaBase):
    +777    """Class for representing a molecular formula associated with a molecule in a LCMS library reference.
    +778
    +779    Parameters
    +780    ----------
    +781    molecular_formula : dict, list, str
    +782        The molecular formula.
    +783    ion_charge : int
    +784        The ion charge.
    +785    ion_type : str, optional
    +786        The ion type. Defaults to None.
    +787    adduct_atom : str, optional
    +788        The adduct atom. Defaults to None.
    +789    mspeak_parent : object, optional
    +790        The parent mass spectrum peak object instance. Defaults to None.
    +791    name : str, optional
    +792        The name of the reference molecule. Defaults to None.
    +793    kegg_id : str, optional
    +794        The KEGG ID of the reference molecule. Defaults to None.
    +795    cas : str, optional
    +796        The CAS number of the reference molecule. Defaults to None.
    +797
    +798    """
    +799
    +800    def __init__(
    +801        self,
    +802        molecular_formula,
    +803        ion_charge,
    +804        ion_type=None,
    +805        adduct_atom=None,
    +806        mspeak_parent=None,
    +807        name=None,
    +808        kegg_id=None,
    +809        cas=None,
    +810    ) -> None:
    +811        super().__init__(
    +812            molecular_formula,
    +813            ion_charge,
    +814            ion_type=ion_type,
    +815            adduct_atom=adduct_atom,
    +816            mspeak_parent=mspeak_parent,
    +817        )
    +818
    +819        self._name = name
    +820        self._kegg_id = kegg_id
    +821        self._cas = cas
    +822
    +823    @property
    +824    def name(self):
    +825        return self._name
    +826
    +827    @name.setter
    +828    def name(self, name):
    +829        if isinstance(name, str):
    +830            self._name = name
    +831        else:
    +832            raise TypeError("name: {} should be type string")
    +833
    +834    @property
    +835    def kegg_id(self):
    +836        return self._kegg_id
    +837
    +838    @kegg_id.setter
    +839    def kegg_id(self, kegg_id):
    +840        self._kegg_id = kegg_id
    +841        # if isinstance(kegg_id, str):
    +842        #    self._kegg_id = kegg_id
    +843        # else:
    +844        #    print(kegg_id)
    +845        #    raise TypeError('name: {} should be type string')
    +846
    +847    @property
    +848    def cas(self):
    +849        return self._cas
    +850
    +851    @cas.setter
    +852    def cas(self, cas):
    +853        self._cas = cas
    +854        # if isinstance(cas, str):
    +855        #    self._cas = cas
    +856        # else:
    +857        #    raise TypeError('name: {} should be type string')
    +858
    +859
    +860class MolecularFormula(MolecularFormulaBase):
    +861    """General class for representing a molecular formula.
    +862
    +863    Parameters
    +864    ----------
    +865    molecular_formula : dict, list, str
    +866        The molecular formula.
    +867    ion_charge : int
    +868        The ion charge.
    +869    ion_type : str, optional
    +870        The ion type. Defaults to None.
    +871    adduct_atom : str, optional
    +872        The adduct atom. Defaults to None.
    +873    mspeak_parent : object, optional
    +874        The parent mass spectrum peak object instance. Defaults to None.
    +875    external_mz : float, optional
    +876        The external m/z value. Defaults to False.
    +877    """
    +878
    +879    def __init__(
    +880        self,
    +881        molecular_formula,
    +882        ion_charge,
    +883        ion_type=None,
    +884        adduct_atom=None,
    +885        mspeak_parent=None,
    +886        external_mz=False,
    +887    ):
    +888        super().__init__(
    +889            molecular_formula,
    +890            ion_charge,
    +891            ion_type=ion_type,
    +892            adduct_atom=adduct_atom,
    +893            mspeak_parent=mspeak_parent,
    +894            external_mz=external_mz,
    +895        )
     
    @@ -1040,592 +1144,667 @@

    -
     10class MolecularFormulaBase(MolecularFormulaCalc):
    - 11    """Base class for representing a molecular formula.
    - 12
    - 13    Parameters
    - 14    ----------
    - 15    molecular_formula : dict, list, str
    - 16        The molecular formula.
    - 17    ion_charge : int
    - 18        The ion charge.
    - 19    ion_type : str, optional
    - 20        The ion type. Defaults to None.
    - 21    adduct_atom : str, optional
    - 22        The adduct atom. Defaults to None.
    - 23    mspeak_parent : _MSPeak, optional
    - 24        The parent mass spectrum peak object instance. Defaults to None.
    - 25    external_mz : float, optional
    - 26        The external m/z value. Defaults to None.
    - 27
    - 28    Raises
    - 29    ------
    - 30    TypeError
    - 31        If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT'.
    - 32
    - 33    Attributes
    - 34    ----------
    - 35    isotopologue_count_percentile : float
    - 36        The isotopologue count percentile.
    - 37    O_C : float
    - 38        The O/C ratio.
    - 39    H_C : float
    - 40        The H/C ratio.
    - 41    dbe : float
    - 42        The double bond equivalent.
    - 43    mz_nominal_calc : int
    - 44        The nominal m/z value.
    - 45    mz_error : float
    - 46        The m/z error.
    - 47    mz_calc : float
    - 48        The m/z value.
    - 49    protonated_mz : float
    - 50        The protonated or deprotonated m/z value.
    - 51    radical_mz : float
    - 52        The radical m/z value.
    - 53    neutral_mass : float
    - 54        The neutral mass.
    - 55    ion_type : str
    - 56        The ion type.
    - 57    ion_charge : int
    - 58        The ion charge.
    - 59    atoms : list
    - 60        The atoms in the molecular formula.
    - 61    confidence_score : float
    - 62        The confidence score of the molecular formula identification.
    - 63    isotopologue_similarity : float
    - 64        The isotopologue similarity score of the molecular formula identification.
    - 65    average_mz_error_score : float
    - 66        The average m/z error score of the molecular formula identification, including the isotopologues.
    - 67    mz_error_score : float
    - 68        The m/z error score of the molecular formula identification.
    - 69    kmd : float
    - 70        The Kendrick mass defect (KMD).
    - 71    kendrick_mass : float
    - 72        The Kendrick mass.
    - 73    knm : float
    - 74        The nominal Kendrick mass.
    - 75    string : str
    - 76        The molecular formula string.
    - 77    string_formated : str
    - 78        The molecular formula string formated with subscripts and superscripts.
    - 79    class_label : str
    - 80        The class label.
    - 81    class_dict : dict
    - 82        The class dictionary.
    - 83
    - 84    Methods
    - 85    -------
    - 86    * change_kendrick_base(kendrick_dict_base).
    - 87        Change the Kendrick base.
    - 88    * isotopologues(min_abundance, current_mono_abundance, dynamic_range).
    - 89        Calculate the isotopologues.
    - 90    * atoms_qnt(atom).
    - 91        Get the atom quantity.
    - 92    * atoms_symbol(atom).
    - 93        Get the atom symbol without the mass number.
    - 94    * to_dict().
    - 95        Get the molecular formula as a dictionary.
    - 96    * to_list().
    - 97        Get the molecular formula as a list.
    - 98    """    
    - 99
    -100    def __init__(self, molecular_formula, ion_charge, ion_type=None, 
    -101                adduct_atom=None, mspeak_parent=None, external_mz=None):
    -102        # clear dictionary of atoms with 0 value
    -103        if  type(molecular_formula) is dict:
    -104                self._from_dict(molecular_formula, ion_type, adduct_atom)   
    -105        
    -106        elif type(molecular_formula) is list:
    -107                self._from_list(molecular_formula, ion_type, adduct_atom)   
    -108        
    -109        elif type(molecular_formula) is str:
    -110                self._from_str(molecular_formula, ion_type, adduct_atom)   
    -111
    -112        self._ion_charge = ion_charge
    -113        self._external_mz = external_mz
    -114        self._confidence_score = None        
    -115        self._isotopologue_similarity = None
    -116        self._mz_error_score = None
    -117        self._mass_error_average_score = None
    -118
    -119        self.is_isotopologue = False
    -120        
    -121        # parent mass spectrum peak obj instance
    -122        self._mspeak_parent = mspeak_parent
    -123
    -124        self.expected_isotopologues = []
    -125        self.mspeak_mf_isotopologues_indexes = []
    -126        
    -127        if self._mspeak_parent:
    -128            kendrick_dict_base = self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base
    -129        else:
    -130            kendrick_dict_base = {'C':1, 'H':2}
    -131        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
    -132            kendrick_dict_base)  
    -133        
    -134    def __repr__(self):
    -135
    -136        return "MolecularFormula({0},{1},ion type = {2}".format(self._d_molecular_formula, self.ion_charge, self.ion_type)
    -137    
    -138    def __str__(self):
    -139
    -140        return "MolecularFormula {0}, ion_charge:{1}, ion type:{2}, m/z:{3} ".format(self.string, self.ion_charge, self.ion_type, self.mz_calc)
    -141    
    -142    def __len__(self):
    -143        
    -144        # crash if keys are not ordered
    -145        return len(self._d_molecular_formula.keys())
    -146        
    -147    def __getitem__(self, atom):
    -148        
    -149            #atom = list(self._d_molecular_formula.keys())[position]
    -150            if atom in self._d_molecular_formula.keys():
    -151                return self._d_molecular_formula[atom]
    -152            else:
    -153                return 0
    -154    def get(self, atom):
    -155        """Get the atom quantity of a specific atom.
    -156        
    -157        Parameters
    -158        ----------
    -159        atom : str
    -160            The atom symbol.
    -161            
    -162        Returns
    -163        -------
    -164        int
    -165            The atom quantity.
    -166        """
    -167        #atom = list(self._d_molecular_formula.keys())[position]
    -168        if atom in self._d_molecular_formula.keys():
    -169            return self._d_molecular_formula[atom]
    -170        else:
    -171            return 0
    -172                
    -173    def _from_dict(self, molecular_formula, ion_type, adduct_atom):
    -174        
    -175        self._d_molecular_formula = {key:val for key, val in molecular_formula.items() if val != 0}
    -176        
    -177        if ion_type is not None:
    -178            self._d_molecular_formula[Labels.ion_type] = ion_type
    -179            
    -180        if adduct_atom:
    -181            if adduct_atom in self._d_molecular_formula:
    -182                self._d_molecular_formula[adduct_atom] += 1 
    -183            else: self._d_molecular_formula[adduct_atom] = 1 
    -184        self.adduct_atom = adduct_atom
    -185
    -186    def _from_list(self, molecular_formula_list, ion_type, adduct_atom):
    -187        # list has to be in the format 
    -188        #['C', 10, 'H', 21, '13C', 1, 'Cl', 1, etc]  
    -189        self._d_molecular_formula = {}
    -190        for each in range(0, len(molecular_formula_list),2):
    -191            
    -192            atoms_label =  molecular_formula_list[each]
    -193            atoms_count = int(molecular_formula_list[each+1])
    -194            
    -195            if atoms_count > 0:
    -196                self._d_molecular_formula[atoms_label] = int(atoms_count)
    -197        
    -198        self._d_molecular_formula[Labels.ion_type] = ion_type
    -199        if adduct_atom:
    -200            self.adduct_atom = adduct_atom
    -201            if adduct_atom in self._d_molecular_formula:
    -202                self._d_molecular_formula[adduct_atom] += 1 
    -203            else: self._d_molecular_formula[adduct_atom] = 1 
    -204        else:
    -205            self.adduct_atom = None
    -206
    -207    def _from_str(self, molecular_formula_str,  ion_type, adduct_atom):
    -208        # string has to be in the format 
    -209        #'C10 H21 13C1 Cl1 37Cl1 etc'
    -210        # Check if there are spaces in the string
    -211        if ' ' not in molecular_formula_str:
    -212            raise ValueError("The molecular formula string should have spaces, input: %s" % molecular_formula_str)
    -213
    -214        # Split the string by spaces
    -215        # Grab the text before a digit for each element after splitting on spaces (atoms)
    -216        elements = [re.sub(r'\d+$', '', x) for x in molecular_formula_str.split()]
    -217        # Grab the digits at the end of each element after splitting on spaces (counts)
    -218        counts = [re.findall(r'\d+$', x)[0] for x in molecular_formula_str.split()]
    -219        # Check that the number of elements and counts are the same
    -220        if len(elements) != len(counts):
    -221            raise ValueError("The number of elements and counts do not match, input: %s" % molecular_formula_str)
    -222        
    -223        # Create a dictionary from the elements and counts and add it to the molecular formula
    -224        dict_ = dict(zip(elements, counts))
    -225        # Cast counts to integers
    -226        dict_ = {key: int(val) for key, val in dict_.items()}
    -227        self._from_dict(dict_, ion_type, adduct_atom)
    -228
    -229
    -230    def split(self, delimiters, string, maxsplit=0): #pragma: no cover
    -231        """Splits the molecular formula string.
    -232        
    -233        Parameters
    -234        ----------
    -235        delimiters : list
    -236            The list of delimiters.
    -237        string : str
    -238            The molecular formula string.
    -239        maxsplit : int, optional
    -240            The maximum number of splits. Defaults to 0.
    -241
    -242        Returns
    -243        -------
    -244        list
    -245            The molecular formula list.
    -246
    -247        Notes
    -248        -----
    -249        Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
    -250        """
    -251        regexPattern = '|'.join(map(re.escape, delimiters)) #pragma: no cover
    -252        isotopes = re.findall(regexPattern, string) #pragma: no cover
    -253        counts = re.split(regexPattern, string, maxsplit)  #pragma: no cover
    -254       
    -255        return [isotopes[0], int(counts[1])]
    -256
    -257    @property
    -258    def isotopologue_count_percentile(self, ):
    -259        if not len(self.expected_isotopologues) == 0:
    -260            return (len(self.mspeak_mf_isotopologues_indexes)/len(self.expected_isotopologues))*100
    -261        else: 
    -262            return 100
    -263
    -264    @property
    -265    def O_C(self): 
    -266            if 'O' in self._d_molecular_formula.keys():
    -267                # gather all the Os and Hs, regardless of the isotopic composition
    -268                Os =sum([self._d_molecular_formula.get(key) for key in ['O'] + Atoms.isotopes['O'][1] if key in self._d_molecular_formula.keys()])
    -269                Cs = sum([self._d_molecular_formula.get(key) for key in ['C'] + Atoms.isotopes['C'][1] if key in self._d_molecular_formula.keys()])
    -270                return Os/Cs
    -271            else:
    -272                return 0    
    -273    
    -274    @property
    -275    def H_C(self): 
    -276        # gather all the Cs and Hs, regardless of the isotopic composition
    -277        Cs = sum([self._d_molecular_formula.get(key) for key in ['C'] + Atoms.isotopes['C'][1] if key in self._d_molecular_formula.keys()])
    -278        Hs = sum([self._d_molecular_formula.get(key) for key in ['H'] + Atoms.isotopes['H'][1] if key in self._d_molecular_formula.keys()])
    -279        return Hs/Cs
    -280
    -281    @property
    -282    def A_I(self):
    -283        """Aromaticity index"""
    -284        return self._calc_aromaticity_index()
    -285
    -286    @property
    -287    def A_I_mod(self):
    -288        """Modified aromaticity index"""
    -289        return self._calc_aromaticity_index_mod()
    -290
    -291    @property
    -292    def nosc(self):
    -293        """Nominal oxidation state of carbon"""
    -294        return self._calc_nosc()
    -295    
    -296    @property
    -297    def dbe(self): return self._calc_dbe()
    -298    
    -299    @property
    -300    def mz_nominal_calc(self): return int(self._calc_mz())
    -301
    -302    @property    
    -303    def mz_error(self): return self._calc_assignment_mass_error()
    -304
    -305    @property
    -306    def mz_calc(self): return self._calc_mz()
    -307
    -308    @property
    -309    def protonated_mz(self): return self._protonated_mz(self.ion_charge)
    -310    
    -311    @property
    -312    def radical_mz(self): return self._radical_mz(self.ion_charge)
    -313    
    -314    @property
    -315    def neutral_mass(self): return self._neutral_mass()
    -316    
    -317    def adduct_mz(self, adduct_atom): 
    -318        """Get m/z of an adducted ion version of the molecular formula.
    -319        
    -320        Parameters
    -321        ----------
    -322        adduct_atom : str
    -323            The adduct atom.
    -324            
    -325        Returns
    -326        -------
    -327        float
    -328            The m/z value of the adducted ion version of the molecular formula.
    -329        """
    -330        return self._adduct_mz(adduct_atom, self.ion_charge)
    -331
    -332    @property
    -333    def ion_type(self): 
    -334        
    -335        ion_type = self._d_molecular_formula.get(Labels.ion_type)
    -336        if ion_type == Labels.protonated_de_ion:
    -337            if self.ion_charge > 0: 
    -338                return Labels.protonated
    -339            else: 
    -340                return Labels.de_protonated    
    -341        else:
    -342            return ion_type
    +            
     11class MolecularFormulaBase(MolecularFormulaCalc):
    + 12    """Base class for representing a molecular formula.
    + 13
    + 14    Parameters
    + 15    ----------
    + 16    molecular_formula : dict, list, str
    + 17        The molecular formula.
    + 18    ion_charge : int
    + 19        The ion charge.
    + 20    ion_type : str, optional
    + 21        The ion type. Defaults to None.
    + 22    adduct_atom : str, optional
    + 23        The adduct atom. Defaults to None.
    + 24    mspeak_parent : _MSPeak, optional
    + 25        The parent mass spectrum peak object instance. Defaults to None.
    + 26    external_mz : float, optional
    + 27        The external m/z value. Defaults to None.
    + 28
    + 29    Raises
    + 30    ------
    + 31    TypeError
    + 32        If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT'.
    + 33
    + 34    Attributes
    + 35    ----------
    + 36    isotopologue_count_percentile : float
    + 37        The isotopologue count percentile.
    + 38    O_C : float
    + 39        The O/C ratio.
    + 40    H_C : float
    + 41        The H/C ratio.
    + 42    dbe : float
    + 43        The double bond equivalent.
    + 44    mz_nominal_calc : int
    + 45        The nominal m/z value.
    + 46    mz_error : float
    + 47        The m/z error.
    + 48    mz_calc : float
    + 49        The m/z value.
    + 50    protonated_mz : float
    + 51        The protonated or deprotonated m/z value.
    + 52    radical_mz : float
    + 53        The radical m/z value.
    + 54    neutral_mass : float
    + 55        The neutral mass.
    + 56    ion_type : str
    + 57        The ion type.
    + 58    ion_charge : int
    + 59        The ion charge.
    + 60    atoms : list
    + 61        The atoms in the molecular formula.
    + 62    confidence_score : float
    + 63        The confidence score of the molecular formula identification.
    + 64    isotopologue_similarity : float
    + 65        The isotopologue similarity score of the molecular formula identification.
    + 66    average_mz_error_score : float
    + 67        The average m/z error score of the molecular formula identification, including the isotopologues.
    + 68    mz_error_score : float
    + 69        The m/z error score of the molecular formula identification.
    + 70    kmd : float
    + 71        The Kendrick mass defect (KMD).
    + 72    kendrick_mass : float
    + 73        The Kendrick mass.
    + 74    knm : float
    + 75        The nominal Kendrick mass.
    + 76    string : str
    + 77        The molecular formula string.
    + 78    string_formated : str
    + 79        The molecular formula string formated with subscripts and superscripts.
    + 80    class_label : str
    + 81        The class label.
    + 82    class_dict : dict
    + 83        The class dictionary.
    + 84
    + 85    Methods
    + 86    -------
    + 87    * change_kendrick_base(kendrick_dict_base).
    + 88        Change the Kendrick base.
    + 89    * isotopologues(min_abundance, current_mono_abundance, dynamic_range).
    + 90        Calculate the isotopologues.
    + 91    * atoms_qnt(atom).
    + 92        Get the atom quantity.
    + 93    * atoms_symbol(atom).
    + 94        Get the atom symbol without the mass number.
    + 95    * to_dict().
    + 96        Get the molecular formula as a dictionary.
    + 97    * to_list().
    + 98        Get the molecular formula as a list.
    + 99    """
    +100
    +101    def __init__(
    +102        self,
    +103        molecular_formula,
    +104        ion_charge,
    +105        ion_type=None,
    +106        adduct_atom=None,
    +107        mspeak_parent=None,
    +108        external_mz=None,
    +109    ):
    +110        # clear dictionary of atoms with 0 value
    +111        if type(molecular_formula) is dict:
    +112            self._from_dict(molecular_formula, ion_type, adduct_atom)
    +113
    +114        elif type(molecular_formula) is list:
    +115            self._from_list(molecular_formula, ion_type, adduct_atom)
    +116
    +117        elif type(molecular_formula) is str:
    +118            self._from_str(molecular_formula, ion_type, adduct_atom)
    +119
    +120        self._ion_charge = ion_charge
    +121        self._external_mz = external_mz
    +122        self._confidence_score = None
    +123        self._isotopologue_similarity = None
    +124        self._mz_error_score = None
    +125        self._mass_error_average_score = None
    +126
    +127        self.is_isotopologue = False
    +128
    +129        # parent mass spectrum peak obj instance
    +130        self._mspeak_parent = mspeak_parent
    +131
    +132        self.expected_isotopologues = []
    +133        self.mspeak_mf_isotopologues_indexes = []
    +134
    +135        if self._mspeak_parent:
    +136            kendrick_dict_base = (
    +137                self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base
    +138            )
    +139        else:
    +140            kendrick_dict_base = {"C": 1, "H": 2}
    +141        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
    +142            kendrick_dict_base
    +143        )
    +144
    +145    def __repr__(self):
    +146        return "MolecularFormula({0},{1},ion type = {2}".format(
    +147            self._d_molecular_formula, self.ion_charge, self.ion_type
    +148        )
    +149
    +150    def __str__(self):
    +151        return "MolecularFormula {0}, ion_charge:{1}, ion type:{2}, m/z:{3} ".format(
    +152            self.string, self.ion_charge, self.ion_type, self.mz_calc
    +153        )
    +154
    +155    def __len__(self):
    +156        # crash if keys are not ordered
    +157        return len(self._d_molecular_formula.keys())
    +158
    +159    def __getitem__(self, atom):
    +160        # atom = list(self._d_molecular_formula.keys())[position]
    +161        if atom in self._d_molecular_formula.keys():
    +162            return self._d_molecular_formula[atom]
    +163        else:
    +164            return 0
    +165
    +166    def get(self, atom):
    +167        """Get the atom quantity of a specific atom.
    +168
    +169        Parameters
    +170        ----------
    +171        atom : str
    +172            The atom symbol.
    +173
    +174        Returns
    +175        -------
    +176        int
    +177            The atom quantity.
    +178        """
    +179        # atom = list(self._d_molecular_formula.keys())[position]
    +180        if atom in self._d_molecular_formula.keys():
    +181            return self._d_molecular_formula[atom]
    +182        else:
    +183            return 0
    +184
    +185    def _from_dict(self, molecular_formula, ion_type, adduct_atom):
    +186        self._d_molecular_formula = {
    +187            key: val for key, val in molecular_formula.items() if val != 0
    +188        }
    +189
    +190        if ion_type is not None:
    +191            self._d_molecular_formula[Labels.ion_type] = ion_type
    +192
    +193        if adduct_atom:
    +194            if adduct_atom in self._d_molecular_formula:
    +195                self._d_molecular_formula[adduct_atom] += 1
    +196            else:
    +197                self._d_molecular_formula[adduct_atom] = 1
    +198        self.adduct_atom = adduct_atom
    +199
    +200    def _from_list(self, molecular_formula_list, ion_type, adduct_atom):
    +201        # list has to be in the format
    +202        # ['C', 10, 'H', 21, '13C', 1, 'Cl', 1, etc]
    +203        self._d_molecular_formula = {}
    +204        for each in range(0, len(molecular_formula_list), 2):
    +205            atoms_label = molecular_formula_list[each]
    +206            atoms_count = int(molecular_formula_list[each + 1])
    +207
    +208            if atoms_count > 0:
    +209                self._d_molecular_formula[atoms_label] = int(atoms_count)
    +210
    +211        self._d_molecular_formula[Labels.ion_type] = ion_type
    +212        if adduct_atom:
    +213            self.adduct_atom = adduct_atom
    +214            if adduct_atom in self._d_molecular_formula:
    +215                self._d_molecular_formula[adduct_atom] += 1
    +216            else:
    +217                self._d_molecular_formula[adduct_atom] = 1
    +218        else:
    +219            self.adduct_atom = None
    +220
    +221    def _from_str(self, molecular_formula_str, ion_type, adduct_atom):
    +222        # string has to be in the format
    +223        #'C10 H21 13C1 Cl1 37Cl1 etc'
    +224        # Check if there are spaces in the string
    +225        if " " not in molecular_formula_str:
    +226            raise ValueError(
    +227                "The molecular formula string should have spaces, input: %s"
    +228                % molecular_formula_str
    +229            )
    +230
    +231        # Split the string by spaces
    +232        # Grab the text before a digit for each element after splitting on spaces (atoms)
    +233        elements = [re.sub(r"\d+$", "", x) for x in molecular_formula_str.split()]
    +234        # Grab the digits at the end of each element after splitting on spaces (counts)
    +235        counts = [re.findall(r"\d+$", x)[0] for x in molecular_formula_str.split()]
    +236        # Check that the number of elements and counts are the same
    +237        if len(elements) != len(counts):
    +238            raise ValueError(
    +239                "The number of elements and counts do not match, input: %s"
    +240                % molecular_formula_str
    +241            )
    +242
    +243        # Create a dictionary from the elements and counts and add it to the molecular formula
    +244        dict_ = dict(zip(elements, counts))
    +245        # Cast counts to integers
    +246        dict_ = {key: int(val) for key, val in dict_.items()}
    +247        self._from_dict(dict_, ion_type, adduct_atom)
    +248
    +249    def split(self, delimiters, string, maxsplit=0):  # pragma: no cover
    +250        """Splits the molecular formula string.
    +251
    +252        Parameters
    +253        ----------
    +254        delimiters : list
    +255            The list of delimiters.
    +256        string : str
    +257            The molecular formula string.
    +258        maxsplit : int, optional
    +259            The maximum number of splits. Defaults to 0.
    +260
    +261        Returns
    +262        -------
    +263        list
    +264            The molecular formula list.
    +265
    +266        Notes
    +267        -----
    +268        Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
    +269        """
    +270        regexPattern = "|".join(map(re.escape, delimiters))  # pragma: no cover
    +271        isotopes = re.findall(regexPattern, string)  # pragma: no cover
    +272        counts = re.split(regexPattern, string, maxsplit)  # pragma: no cover
    +273
    +274        return [isotopes[0], int(counts[1])]
    +275
    +276    @property
    +277    def isotopologue_count_percentile(
    +278        self,
    +279    ):
    +280        if not len(self.expected_isotopologues) == 0:
    +281            return (
    +282                len(self.mspeak_mf_isotopologues_indexes)
    +283                / len(self.expected_isotopologues)
    +284            ) * 100
    +285        else:
    +286            return 100
    +287
    +288    @property
    +289    def O_C(self):
    +290        if "O" in self._d_molecular_formula.keys():
    +291            # gather all the Os and Hs, regardless of the isotopic composition
    +292            Os = sum(
    +293                [
    +294                    self._d_molecular_formula.get(key)
    +295                    for key in ["O"] + Atoms.isotopes["O"][1]
    +296                    if key in self._d_molecular_formula.keys()
    +297                ]
    +298            )
    +299            Cs = sum(
    +300                [
    +301                    self._d_molecular_formula.get(key)
    +302                    for key in ["C"] + Atoms.isotopes["C"][1]
    +303                    if key in self._d_molecular_formula.keys()
    +304                ]
    +305            )
    +306            return Os / Cs
    +307        else:
    +308            return 0
    +309
    +310    @property
    +311    def H_C(self):
    +312        # gather all the Cs and Hs, regardless of the isotopic composition
    +313        Cs = sum(
    +314            [
    +315                self._d_molecular_formula.get(key)
    +316                for key in ["C"] + Atoms.isotopes["C"][1]
    +317                if key in self._d_molecular_formula.keys()
    +318            ]
    +319        )
    +320        Hs = sum(
    +321            [
    +322                self._d_molecular_formula.get(key)
    +323                for key in ["H"] + Atoms.isotopes["H"][1]
    +324                if key in self._d_molecular_formula.keys()
    +325            ]
    +326        )
    +327        return Hs / Cs
    +328
    +329    @property
    +330    def A_I(self):
    +331        """Aromaticity index"""
    +332        return self._calc_aromaticity_index()
    +333
    +334    @property
    +335    def A_I_mod(self):
    +336        """Modified aromaticity index"""
    +337        return self._calc_aromaticity_index_mod()
    +338
    +339    @property
    +340    def nosc(self):
    +341        """Nominal oxidation state of carbon"""
    +342        return self._calc_nosc()
     343
    -344    @ion_type.setter
    -345    def ion_type(self, ion_type):
    -346        if  ion_type in [Labels.protonated_de_ion, Labels.adduct_ion, Labels.radical_ion]:
    -347            self._d_molecular_formula[Labels.ion_type] = ion_type
    -348        else:
    -349            raise TypeError("Ion type can only be: 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT', not %s"%ion_type)   
    -350
    -351    @property
    -352    def ion_charge(self): return self._ion_charge
    -353    
    -354    @property
    -355    def atoms(self): 
    -356        """Get the atoms in the molecular formula."""
    -357        # if there is an adduct_atom, them reduce it from the atoms list
    -358        if self.adduct_atom is None:
    -359            return [key for key in self._d_molecular_formula.keys() if key != Labels.ion_type]
    -360        else:
    -361            temp_dict = self._d_molecular_formula.copy()
    -362            temp_dict[self.adduct_atom] -= 1
    -363            return [key for key,val in temp_dict.items() if key != Labels.ion_type and val > 0]
    -364
    -365    
    -366    @property
    -367    def confidence_score(self): 
    -368        
    -369        if not self._confidence_score:
    -370            
    -371            self._confidence_score = self._calc_confidence_score()
    -372        
    -373        return self._confidence_score
    +344    @property
    +345    def dbe(self):
    +346        return self._calc_dbe()
    +347
    +348    @property
    +349    def mz_nominal_calc(self):
    +350        return int(self._calc_mz())
    +351
    +352    @property
    +353    def mz_error(self):
    +354        return self._calc_assignment_mass_error()
    +355
    +356    @property
    +357    def mz_calc(self):
    +358        return self._calc_mz()
    +359
    +360    @property
    +361    def protonated_mz(self):
    +362        return self._protonated_mz(self.ion_charge)
    +363
    +364    @property
    +365    def radical_mz(self):
    +366        return self._radical_mz(self.ion_charge)
    +367
    +368    @property
    +369    def neutral_mass(self):
    +370        return self._neutral_mass()
    +371
    +372    def adduct_mz(self, adduct_atom):
    +373        """Get m/z of an adducted ion version of the molecular formula.
     374
    -375    @property
    -376    def isotopologue_similarity(self): 
    -377        
    -378        if not self._isotopologue_similarity:
    -379           
    -380           self._isotopologue_similarity = self._calc_isotopologue_confidence()  
    -381       
    -382        return self._isotopologue_similarity  
    -383    
    -384    @property
    -385    def average_mz_error_score(self): 
    -386        
    -387        # includes the isotopologues
    -388        
    -389        if not self._mass_error_average_score:
    -390           
    -391           self._mass_error_average_score = self._calc_average_mz_score()  
    -392        
    -393        return self._mass_error_average_score
    -394
    -395    @property
    -396    def mz_error_score(self): 
    -397        if not self._mz_error_score:
    -398           
    -399           self._mz_error_score = self._calc_mz_confidence()  
    -400        
    -401        return self._mz_error_score
    -402    
    -403    @property
    -404    def kmd(self): return self._kmd
    -405
    -406    @property
    -407    def kendrick_mass(self): return self._kendrick_mass
    -408
    -409    @property
    -410    def knm(self): return self._nominal_km
    +375        Parameters
    +376        ----------
    +377        adduct_atom : str
    +378            The adduct atom.
    +379
    +380        Returns
    +381        -------
    +382        float
    +383            The m/z value of the adducted ion version of the molecular formula.
    +384        """
    +385        return self._adduct_mz(adduct_atom, self.ion_charge)
    +386
    +387    @property
    +388    def ion_type(self):
    +389        ion_type = self._d_molecular_formula.get(Labels.ion_type)
    +390        if ion_type == Labels.protonated_de_ion:
    +391            if self.ion_charge > 0:
    +392                return Labels.protonated
    +393            else:
    +394                return Labels.de_protonated
    +395        else:
    +396            return ion_type
    +397
    +398    @ion_type.setter
    +399    def ion_type(self, ion_type):
    +400        if ion_type in [
    +401            Labels.protonated_de_ion,
    +402            Labels.adduct_ion,
    +403            Labels.radical_ion,
    +404        ]:
    +405            self._d_molecular_formula[Labels.ion_type] = ion_type
    +406        else:
    +407            raise TypeError(
    +408                "Ion type can only be: 'DE_OR_PROTONATED', 'RADICAL' or  'ADDUCT', not %s"
    +409                % ion_type
    +410            )
     411
    -412    def change_kendrick_base(self, kendrick_dict_base):
    -413        """Change the Kendrick base.
    -414
    -415        Parameters
    -416        ----------
    -417        kendrick_dict_base : dict
    -418            The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
    -419        """ 
    -420        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(kendrick_dict_base)
    -421                
    -422    def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range): 
    -423        """Calculate the isotopologues for a given molecular formula.
    -424
    -425        Parameters
    -426        ----------
    -427        min_abundance : float
    -428            The minimum abundance.
    -429        current_mono_abundance : float
    -430            The current monoisotopic abundance.
    -431        dynamic_range : float
    -432            The dynamic range.
    -433
    -434        Yields
    -435        ------
    -436        MolecularFormulaIsotopologue
    -437            The molecular formula isotopologue.
    -438
    -439        Notes
    -440        -----
    -441        This calculation ignores the hydrogen isotopes.
    -442        """
    -443        isotopologues = []
    -444        for mf in self._cal_isotopologues(self._d_molecular_formula, min_abundance, current_mono_abundance, dynamic_range ):
    -445            isotopologues.append(mf)
    -446        
    -447        # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions. 
    -448        sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True)
    -449
    -450        for mf in sorted_isotopologues:
    -451            yield MolecularFormulaIsotopologue(
    -452                *mf, 
    -453                current_mono_abundance, 
    -454                self.ion_charge, 
    -455                ion_type=self.ion_type, 
    -456                adduct_atom=self.adduct_atom
    -457                )
    -458    
    -459    def atoms_qnt(self,atom): 
    -460        """Get the atom quantity of a specific atom in the molecular formula."""
    -461        if atom in self._d_molecular_formula:
    -462            return self._d_molecular_formula.get(atom)
    -463        else:
    -464            raise Warning('Could not find %s in this Molecular Formula object'%str(atom))
    -465    
    -466    def atoms_symbol(self, atom): 
    -467        """Get the atom symbol without the mass number."""
    -468        return ''.join([i for i in atom if not i.isdigit()])
    -469
    -470    @property       
    -471    def string(self):
    -472        """Returns the molecular formula as a string."""
    -473        if self._d_molecular_formula:
    -474            if self.adduct_atom is None:
    -475                mol_form_dict = self._d_molecular_formula
    -476            else:
    -477                mol_form_dict = self._d_molecular_formula.copy()
    -478                if self.adduct_atom not in mol_form_dict.keys():
    -479                    raise Exception("Adduct atom not found in molecular formula dict")
    -480                mol_form_dict[self.adduct_atom] -= 1
    -481                mol_form_dict = {key:val for key, val in mol_form_dict.items() if val != 0}
    -482            formula_srt = ''
    -483            for atom in Atoms.atoms_order:
    -484                if atom in mol_form_dict.keys():
    -485                    formula_srt += atom + str(int(mol_form_dict.get(atom))) + ' '
    -486            return formula_srt.strip()
    -487        
    -488        else:
    -489            raise Exception("Molecular formula identification not performed yet")    
    -490    
    -491    @property
    -492    def string_formated(self):
    -493        
    -494        SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
    -495        SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
    -496        
    -497        if self._d_molecular_formula:
    -498            formula_srt = ''
    -499            for atom in Atoms.atoms_order:
    -500                if atom in self.to_dict().keys():
    -501                    formula_srt += atom.translate(SUP) + str(int(self.to_dict().get(atom))).translate(SUB)
    -502            return formula_srt
    -503        
    -504        else:
    -505            raise Exception("Molecular formula identification not performed yet")    
    -506
    -507    def to_dict(self):
    -508        """Returns the molecular formula as a dictionary.
    -509        
    -510        Returns
    -511        -------
    -512        dict
    -513            The molecular formula as a dictionary.
    -514        """
    -515        return self._d_molecular_formula
    -516
    -517    def to_list(self):
    -518        """Returns the molecular formula as a list.
    -519        
    -520        Returns
    -521        -------
    -522        list
    -523            The molecular formula as a list.
    -524            
    -525        Raises
    -526        ------
    -527        Exception
    -528            If the molecular formula identification was not performed yet.
    -529        """
    -530        #TODO ensure self._d_molecular_formula is a orderedDict
    -531        
    -532        if self._d_molecular_formula:
    -533            formula_list = []    
    -534            
    -535            for atom, atom_number in self._d_molecular_formula.items():
    -536    
    -537                if atom != Labels.ion_type:
    -538                    
    -539                    formula_list.append(atom)
    -540                    formula_list.append(atom_number)
    -541    
    -542            return formula_list
    -543        else:
    -544            raise Exception("Molecular formula identification not performed yet")
    -545    
    -546    @property
    -547    def class_label(self):
    -548        
    -549        if self._d_molecular_formula:
    -550            
    -551            formulalist = self.to_list()
    -552            classstring = '' 
    -553            
    -554            for each in range(0, len(formulalist),2):
    -555                
    -556                if formulalist[each] != 'C' and formulalist[each] != 'H' and formulalist[each] != 'HC':
    -557                     
    -558                    classstring = classstring + str(formulalist[each]) + str(formulalist[each+1]) + ' '    
    -559            
    -560            if classstring == '': classstring = 'HC'
    -561                
    -562            classstring = classstring.strip()
    -563            
    -564            if self._d_molecular_formula.get(Labels.ion_type) == Labels.radical_ion:    
    -565                
    -566                return classstring + ' -R'
    -567            
    -568            #elif self._d_molecular_formula.get(Labels.ion_type) == Labels.adduct_ion:    
    -569                
    -570            #    return classstring + ' -A'
    +412    @property
    +413    def ion_charge(self):
    +414        return self._ion_charge
    +415
    +416    @property
    +417    def atoms(self):
    +418        """Get the atoms in the molecular formula."""
    +419        # if there is an adduct_atom, them reduce it from the atoms list
    +420        if self.adduct_atom is None:
    +421            return [
    +422                key
    +423                for key in self._d_molecular_formula.keys()
    +424                if key != Labels.ion_type
    +425            ]
    +426        else:
    +427            temp_dict = self._d_molecular_formula.copy()
    +428            temp_dict[self.adduct_atom] -= 1
    +429            return [
    +430                key
    +431                for key, val in temp_dict.items()
    +432                if key != Labels.ion_type and val > 0
    +433            ]
    +434
    +435    @property
    +436    def confidence_score(self):
    +437        if not self._confidence_score:
    +438            self._confidence_score = self._calc_confidence_score()
    +439
    +440        return self._confidence_score
    +441
    +442    @property
    +443    def isotopologue_similarity(self):
    +444        if not self._isotopologue_similarity:
    +445            self._isotopologue_similarity = self._calc_isotopologue_confidence()
    +446
    +447        return self._isotopologue_similarity
    +448
    +449    @property
    +450    def average_mz_error_score(self):
    +451        # includes the isotopologues
    +452
    +453        if not self._mass_error_average_score:
    +454            self._mass_error_average_score = self._calc_average_mz_score()
    +455
    +456        return self._mass_error_average_score
    +457
    +458    @property
    +459    def mz_error_score(self):
    +460        if not self._mz_error_score:
    +461            self._mz_error_score = self._calc_mz_confidence()
    +462
    +463        return self._mz_error_score
    +464
    +465    @property
    +466    def kmd(self):
    +467        return self._kmd
    +468
    +469    @property
    +470    def kendrick_mass(self):
    +471        return self._kendrick_mass
    +472
    +473    @property
    +474    def knm(self):
    +475        return self._nominal_km
    +476
    +477    def change_kendrick_base(self, kendrick_dict_base):
    +478        """Change the Kendrick base.
    +479
    +480        Parameters
    +481        ----------
    +482        kendrick_dict_base : dict
    +483            The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
    +484        """
    +485        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
    +486            kendrick_dict_base
    +487        )
    +488
    +489    def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range):
    +490        """Calculate the isotopologues for a given molecular formula.
    +491
    +492        Parameters
    +493        ----------
    +494        min_abundance : float
    +495            The minimum abundance.
    +496        current_mono_abundance : float
    +497            The current monoisotopic abundance.
    +498        dynamic_range : float
    +499            The dynamic range.
    +500
    +501        Yields
    +502        ------
    +503        MolecularFormulaIsotopologue
    +504            The molecular formula isotopologue.
    +505
    +506        Notes
    +507        -----
    +508        This calculation ignores the hydrogen isotopes.
    +509        """
    +510        isotopologues = []
    +511        for mf in self._cal_isotopologues(
    +512            self._d_molecular_formula,
    +513            min_abundance,
    +514            current_mono_abundance,
    +515            dynamic_range,
    +516        ):
    +517            isotopologues.append(mf)
    +518
    +519        # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions.
    +520        sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True)
    +521
    +522        for mf in sorted_isotopologues:
    +523            yield MolecularFormulaIsotopologue(
    +524                *mf,
    +525                current_mono_abundance,
    +526                self.ion_charge,
    +527                ion_type=self.ion_type,
    +528                adduct_atom=self.adduct_atom,
    +529            )
    +530
    +531    def atoms_qnt(self, atom):
    +532        """Get the atom quantity of a specific atom in the molecular formula."""
    +533        if atom in self._d_molecular_formula:
    +534            return self._d_molecular_formula.get(atom)
    +535        else:
    +536            raise Warning(
    +537                "Could not find %s in this Molecular Formula object" % str(atom)
    +538            )
    +539
    +540    def atoms_symbol(self, atom):
    +541        """Get the atom symbol without the mass number."""
    +542        return "".join([i for i in atom if not i.isdigit()])
    +543
    +544    @property
    +545    def string(self):
    +546        """Returns the molecular formula as a string."""
    +547        if self._d_molecular_formula:
    +548            if self.adduct_atom is None:
    +549                mol_form_dict = self._d_molecular_formula
    +550            else:
    +551                mol_form_dict = self._d_molecular_formula.copy()
    +552                if self.adduct_atom not in mol_form_dict.keys():
    +553                    raise Exception("Adduct atom not found in molecular formula dict")
    +554                mol_form_dict[self.adduct_atom] -= 1
    +555                mol_form_dict = {
    +556                    key: val for key, val in mol_form_dict.items() if val != 0
    +557                }
    +558            formula_srt = ""
    +559            for atom in Atoms.atoms_order:
    +560                if atom in mol_form_dict.keys():
    +561                    formula_srt += atom + str(int(mol_form_dict.get(atom))) + " "
    +562            return formula_srt.strip()
    +563
    +564        else:
    +565            raise Exception("Molecular formula identification not performed yet")
    +566
    +567    @property
    +568    def string_formated(self):
    +569        SUB = str.maketrans("0123456789", "₀₁₂₃₄₅₆₇₈₉")
    +570        SUP = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
     571
    -572            else: return classstring
    -573            
    -574            #'dict, tuple or string'
    -575        
    -576        else:
    -577            
    -578            raise Exception("Molecular formula identification not performed yet")        
    -579    
    -580    @property
    -581    def class_dict(self):
    -582        
    -583        if self._d_molecular_formula:
    -584            
    -585            class_dict = {}
    -586            
    -587            for atom, qnt in self._d_molecular_formula.items():
    -588    
    -589                if atom != Labels.ion_type and atom !='C' and atom !='H':
    -590                    
    -591                    class_dict[atom] = qnt
    -592                    
    -593            return class_dict
    -594        
    -595        raise Exception("Molecular formula identification not performed yet")           
    +572        if self._d_molecular_formula:
    +573            formula_srt = ""
    +574            for atom in Atoms.atoms_order:
    +575                if atom in self.to_dict().keys():
    +576                    formula_srt += atom.translate(SUP) + str(
    +577                        int(self.to_dict().get(atom))
    +578                    ).translate(SUB)
    +579            return formula_srt
    +580
    +581        else:
    +582            raise Exception("Molecular formula identification not performed yet")
    +583
    +584    def to_dict(self):
    +585        """Returns the molecular formula as a dictionary.
    +586
    +587        Returns
    +588        -------
    +589        dict
    +590            The molecular formula as a dictionary.
    +591        """
    +592        return self._d_molecular_formula
    +593
    +594    def to_list(self):
    +595        """Returns the molecular formula as a list.
    +596
    +597        Returns
    +598        -------
    +599        list
    +600            The molecular formula as a list.
    +601
    +602        Raises
    +603        ------
    +604        Exception
    +605            If the molecular formula identification was not performed yet.
    +606        """
    +607        # TODO ensure self._d_molecular_formula is a orderedDict
    +608
    +609        if self._d_molecular_formula:
    +610            formula_list = []
    +611
    +612            for atom, atom_number in self._d_molecular_formula.items():
    +613                if atom != Labels.ion_type:
    +614                    formula_list.append(atom)
    +615                    formula_list.append(atom_number)
    +616
    +617            return formula_list
    +618        else:
    +619            raise Exception("Molecular formula identification not performed yet")
    +620
    +621    @property
    +622    def class_label(self):
    +623        if self._d_molecular_formula:
    +624            formulalist = self.to_list()
    +625            classstring = ""
    +626
    +627            for each in range(0, len(formulalist), 2):
    +628                if (
    +629                    formulalist[each] != "C"
    +630                    and formulalist[each] != "H"
    +631                    and formulalist[each] != "HC"
    +632                ):
    +633                    classstring = (
    +634                        classstring
    +635                        + str(formulalist[each])
    +636                        + str(formulalist[each + 1])
    +637                        + " "
    +638                    )
    +639
    +640            if classstring == "":
    +641                classstring = "HC"
    +642
    +643            classstring = classstring.strip()
    +644
    +645            if self._d_molecular_formula.get(Labels.ion_type) == Labels.radical_ion:
    +646                return classstring + " -R"
    +647
    +648            # elif self._d_molecular_formula.get(Labels.ion_type) == Labels.adduct_ion:
    +649
    +650            #    return classstring + ' -A'
    +651
    +652            else:
    +653                return classstring
    +654
    +655            #'dict, tuple or string'
    +656
    +657        else:
    +658            raise Exception("Molecular formula identification not performed yet")
    +659
    +660    @property
    +661    def class_dict(self):
    +662        if self._d_molecular_formula:
    +663            class_dict = {}
    +664
    +665            for atom, qnt in self._d_molecular_formula.items():
    +666                if atom != Labels.ion_type and atom != "C" and atom != "H":
    +667                    class_dict[atom] = qnt
    +668
    +669            return class_dict
    +670
    +671        raise Exception("Molecular formula identification not performed yet")
     
    @@ -1736,39 +1915,49 @@
    Methods
    -
    100    def __init__(self, molecular_formula, ion_charge, ion_type=None, 
    -101                adduct_atom=None, mspeak_parent=None, external_mz=None):
    -102        # clear dictionary of atoms with 0 value
    -103        if  type(molecular_formula) is dict:
    -104                self._from_dict(molecular_formula, ion_type, adduct_atom)   
    -105        
    -106        elif type(molecular_formula) is list:
    -107                self._from_list(molecular_formula, ion_type, adduct_atom)   
    -108        
    -109        elif type(molecular_formula) is str:
    -110                self._from_str(molecular_formula, ion_type, adduct_atom)   
    -111
    -112        self._ion_charge = ion_charge
    -113        self._external_mz = external_mz
    -114        self._confidence_score = None        
    -115        self._isotopologue_similarity = None
    -116        self._mz_error_score = None
    -117        self._mass_error_average_score = None
    -118
    -119        self.is_isotopologue = False
    -120        
    -121        # parent mass spectrum peak obj instance
    -122        self._mspeak_parent = mspeak_parent
    -123
    -124        self.expected_isotopologues = []
    -125        self.mspeak_mf_isotopologues_indexes = []
    -126        
    -127        if self._mspeak_parent:
    -128            kendrick_dict_base = self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base
    -129        else:
    -130            kendrick_dict_base = {'C':1, 'H':2}
    -131        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
    -132            kendrick_dict_base)  
    +            
    101    def __init__(
    +102        self,
    +103        molecular_formula,
    +104        ion_charge,
    +105        ion_type=None,
    +106        adduct_atom=None,
    +107        mspeak_parent=None,
    +108        external_mz=None,
    +109    ):
    +110        # clear dictionary of atoms with 0 value
    +111        if type(molecular_formula) is dict:
    +112            self._from_dict(molecular_formula, ion_type, adduct_atom)
    +113
    +114        elif type(molecular_formula) is list:
    +115            self._from_list(molecular_formula, ion_type, adduct_atom)
    +116
    +117        elif type(molecular_formula) is str:
    +118            self._from_str(molecular_formula, ion_type, adduct_atom)
    +119
    +120        self._ion_charge = ion_charge
    +121        self._external_mz = external_mz
    +122        self._confidence_score = None
    +123        self._isotopologue_similarity = None
    +124        self._mz_error_score = None
    +125        self._mass_error_average_score = None
    +126
    +127        self.is_isotopologue = False
    +128
    +129        # parent mass spectrum peak obj instance
    +130        self._mspeak_parent = mspeak_parent
    +131
    +132        self.expected_isotopologues = []
    +133        self.mspeak_mf_isotopologues_indexes = []
    +134
    +135        if self._mspeak_parent:
    +136            kendrick_dict_base = (
    +137                self._mspeak_parent._ms_parent.mspeaks_settings.kendrick_base
    +138            )
    +139        else:
    +140            kendrick_dict_base = {"C": 1, "H": 2}
    +141        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
    +142            kendrick_dict_base
    +143        )
     
    @@ -1819,24 +2008,24 @@
    Methods
    -
    154    def get(self, atom):
    -155        """Get the atom quantity of a specific atom.
    -156        
    -157        Parameters
    -158        ----------
    -159        atom : str
    -160            The atom symbol.
    -161            
    -162        Returns
    -163        -------
    -164        int
    -165            The atom quantity.
    -166        """
    -167        #atom = list(self._d_molecular_formula.keys())[position]
    -168        if atom in self._d_molecular_formula.keys():
    -169            return self._d_molecular_formula[atom]
    -170        else:
    -171            return 0
    +            
    166    def get(self, atom):
    +167        """Get the atom quantity of a specific atom.
    +168
    +169        Parameters
    +170        ----------
    +171        atom : str
    +172            The atom symbol.
    +173
    +174        Returns
    +175        -------
    +176        int
    +177            The atom quantity.
    +178        """
    +179        # atom = list(self._d_molecular_formula.keys())[position]
    +180        if atom in self._d_molecular_formula.keys():
    +181            return self._d_molecular_formula[atom]
    +182        else:
    +183            return 0
     
    @@ -1869,32 +2058,32 @@
    Returns
    -
    230    def split(self, delimiters, string, maxsplit=0): #pragma: no cover
    -231        """Splits the molecular formula string.
    -232        
    -233        Parameters
    -234        ----------
    -235        delimiters : list
    -236            The list of delimiters.
    -237        string : str
    -238            The molecular formula string.
    -239        maxsplit : int, optional
    -240            The maximum number of splits. Defaults to 0.
    -241
    -242        Returns
    -243        -------
    -244        list
    -245            The molecular formula list.
    -246
    -247        Notes
    -248        -----
    -249        Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
    -250        """
    -251        regexPattern = '|'.join(map(re.escape, delimiters)) #pragma: no cover
    -252        isotopes = re.findall(regexPattern, string) #pragma: no cover
    -253        counts = re.split(regexPattern, string, maxsplit)  #pragma: no cover
    -254       
    -255        return [isotopes[0], int(counts[1])]
    +            
    249    def split(self, delimiters, string, maxsplit=0):  # pragma: no cover
    +250        """Splits the molecular formula string.
    +251
    +252        Parameters
    +253        ----------
    +254        delimiters : list
    +255            The list of delimiters.
    +256        string : str
    +257            The molecular formula string.
    +258        maxsplit : int, optional
    +259            The maximum number of splits. Defaults to 0.
    +260
    +261        Returns
    +262        -------
    +263        list
    +264            The molecular formula list.
    +265
    +266        Notes
    +267        -----
    +268        Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.
    +269        """
    +270        regexPattern = "|".join(map(re.escape, delimiters))  # pragma: no cover
    +271        isotopes = re.findall(regexPattern, string)  # pragma: no cover
    +272        counts = re.split(regexPattern, string, maxsplit)  # pragma: no cover
    +273
    +274        return [isotopes[0], int(counts[1])]
     
    @@ -2084,20 +2273,20 @@
    Notes
    -
    317    def adduct_mz(self, adduct_atom): 
    -318        """Get m/z of an adducted ion version of the molecular formula.
    -319        
    -320        Parameters
    -321        ----------
    -322        adduct_atom : str
    -323            The adduct atom.
    -324            
    -325        Returns
    -326        -------
    -327        float
    -328            The m/z value of the adducted ion version of the molecular formula.
    -329        """
    -330        return self._adduct_mz(adduct_atom, self.ion_charge)
    +            
    372    def adduct_mz(self, adduct_atom):
    +373        """Get m/z of an adducted ion version of the molecular formula.
    +374
    +375        Parameters
    +376        ----------
    +377        adduct_atom : str
    +378            The adduct atom.
    +379
    +380        Returns
    +381        -------
    +382        float
    +383            The m/z value of the adducted ion version of the molecular formula.
    +384        """
    +385        return self._adduct_mz(adduct_atom, self.ion_charge)
     
    @@ -2242,15 +2431,17 @@
    Returns
    -
    412    def change_kendrick_base(self, kendrick_dict_base):
    -413        """Change the Kendrick base.
    -414
    -415        Parameters
    -416        ----------
    -417        kendrick_dict_base : dict
    -418            The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
    -419        """ 
    -420        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(kendrick_dict_base)
    +            
    477    def change_kendrick_base(self, kendrick_dict_base):
    +478        """Change the Kendrick base.
    +479
    +480        Parameters
    +481        ----------
    +482        kendrick_dict_base : dict
    +483            The Kendrick base dictionary. Ex: {"C": 1, "H": 2}
    +484        """
    +485        self._kmd, self._kendrick_mass, self._nominal_km = self._calc_kmd(
    +486            kendrick_dict_base
    +487        )
     
    @@ -2277,42 +2468,47 @@
    Parameters
    -
    422    def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range): 
    -423        """Calculate the isotopologues for a given molecular formula.
    -424
    -425        Parameters
    -426        ----------
    -427        min_abundance : float
    -428            The minimum abundance.
    -429        current_mono_abundance : float
    -430            The current monoisotopic abundance.
    -431        dynamic_range : float
    -432            The dynamic range.
    -433
    -434        Yields
    -435        ------
    -436        MolecularFormulaIsotopologue
    -437            The molecular formula isotopologue.
    -438
    -439        Notes
    -440        -----
    -441        This calculation ignores the hydrogen isotopes.
    -442        """
    -443        isotopologues = []
    -444        for mf in self._cal_isotopologues(self._d_molecular_formula, min_abundance, current_mono_abundance, dynamic_range ):
    -445            isotopologues.append(mf)
    -446        
    -447        # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions. 
    -448        sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True)
    -449
    -450        for mf in sorted_isotopologues:
    -451            yield MolecularFormulaIsotopologue(
    -452                *mf, 
    -453                current_mono_abundance, 
    -454                self.ion_charge, 
    -455                ion_type=self.ion_type, 
    -456                adduct_atom=self.adduct_atom
    -457                )
    +            
    489    def isotopologues(self, min_abundance, current_mono_abundance, dynamic_range):
    +490        """Calculate the isotopologues for a given molecular formula.
    +491
    +492        Parameters
    +493        ----------
    +494        min_abundance : float
    +495            The minimum abundance.
    +496        current_mono_abundance : float
    +497            The current monoisotopic abundance.
    +498        dynamic_range : float
    +499            The dynamic range.
    +500
    +501        Yields
    +502        ------
    +503        MolecularFormulaIsotopologue
    +504            The molecular formula isotopologue.
    +505
    +506        Notes
    +507        -----
    +508        This calculation ignores the hydrogen isotopes.
    +509        """
    +510        isotopologues = []
    +511        for mf in self._cal_isotopologues(
    +512            self._d_molecular_formula,
    +513            min_abundance,
    +514            current_mono_abundance,
    +515            dynamic_range,
    +516        ):
    +517            isotopologues.append(mf)
    +518
    +519        # To account for differences in how the isotopologue outputs are sorted between IsoSpec versions.
    +520        sorted_isotopologues = sorted(isotopologues, key=lambda mf: mf[1], reverse=True)
    +521
    +522        for mf in sorted_isotopologues:
    +523            yield MolecularFormulaIsotopologue(
    +524                *mf,
    +525                current_mono_abundance,
    +526                self.ion_charge,
    +527                ion_type=self.ion_type,
    +528                adduct_atom=self.adduct_atom,
    +529            )
     
    @@ -2353,12 +2549,14 @@
    Notes
    -
    459    def atoms_qnt(self,atom): 
    -460        """Get the atom quantity of a specific atom in the molecular formula."""
    -461        if atom in self._d_molecular_formula:
    -462            return self._d_molecular_formula.get(atom)
    -463        else:
    -464            raise Warning('Could not find %s in this Molecular Formula object'%str(atom))
    +            
    531    def atoms_qnt(self, atom):
    +532        """Get the atom quantity of a specific atom in the molecular formula."""
    +533        if atom in self._d_molecular_formula:
    +534            return self._d_molecular_formula.get(atom)
    +535        else:
    +536            raise Warning(
    +537                "Could not find %s in this Molecular Formula object" % str(atom)
    +538            )
     
    @@ -2378,9 +2576,9 @@
    Notes
    -
    466    def atoms_symbol(self, atom): 
    -467        """Get the atom symbol without the mass number."""
    -468        return ''.join([i for i in atom if not i.isdigit()])
    +            
    540    def atoms_symbol(self, atom):
    +541        """Get the atom symbol without the mass number."""
    +542        return "".join([i for i in atom if not i.isdigit()])
     
    @@ -2424,15 +2622,15 @@
    Notes
    -
    507    def to_dict(self):
    -508        """Returns the molecular formula as a dictionary.
    -509        
    -510        Returns
    -511        -------
    -512        dict
    -513            The molecular formula as a dictionary.
    -514        """
    -515        return self._d_molecular_formula
    +            
    584    def to_dict(self):
    +585        """Returns the molecular formula as a dictionary.
    +586
    +587        Returns
    +588        -------
    +589        dict
    +590            The molecular formula as a dictionary.
    +591        """
    +592        return self._d_molecular_formula
     
    @@ -2458,34 +2656,32 @@
    Returns
    -
    517    def to_list(self):
    -518        """Returns the molecular formula as a list.
    -519        
    -520        Returns
    -521        -------
    -522        list
    -523            The molecular formula as a list.
    -524            
    -525        Raises
    -526        ------
    -527        Exception
    -528            If the molecular formula identification was not performed yet.
    -529        """
    -530        #TODO ensure self._d_molecular_formula is a orderedDict
    -531        
    -532        if self._d_molecular_formula:
    -533            formula_list = []    
    -534            
    -535            for atom, atom_number in self._d_molecular_formula.items():
    -536    
    -537                if atom != Labels.ion_type:
    -538                    
    -539                    formula_list.append(atom)
    -540                    formula_list.append(atom_number)
    -541    
    -542            return formula_list
    -543        else:
    -544            raise Exception("Molecular formula identification not performed yet")
    +            
    594    def to_list(self):
    +595        """Returns the molecular formula as a list.
    +596
    +597        Returns
    +598        -------
    +599        list
    +600            The molecular formula as a list.
    +601
    +602        Raises
    +603        ------
    +604        Exception
    +605            If the molecular formula identification was not performed yet.
    +606        """
    +607        # TODO ensure self._d_molecular_formula is a orderedDict
    +608
    +609        if self._d_molecular_formula:
    +610            formula_list = []
    +611
    +612            for atom, atom_number in self._d_molecular_formula.items():
    +613                if atom != Labels.ion_type:
    +614                    formula_list.append(atom)
    +615                    formula_list.append(atom_number)
    +616
    +617            return formula_list
    +618        else:
    +619            raise Exception("Molecular formula identification not performed yet")
     
    @@ -2551,107 +2747,107 @@
    Inherited Members
    -
    598class MolecularFormulaIsotopologue(MolecularFormulaBase):
    -599    """Class for representing a molecular formula isotopologue.
    -600    
    -601    Parameters
    -602    ----------
    -603    _d_molecular_formula : dict
    -604        The molecular formula as a dictionary.
    -605    prob_ratio : float
    -606        The probability ratio.
    -607    mono_abundance : float
    -608        The monoisotopic abundance.
    -609    ion_charge : int
    -610        The ion charge.
    -611    mspeak_parent : object, optional
    -612        The parent mass spectrum peak object instance. Defaults to None.
    -613    ion_type : str, optional
    -614        The ion type. Defaults to None.
    -615    adduct_atom : str, optional
    -616        The adduct atom. Defaults to None.
    -617    
    -618    Attributes
    -619    ----------
    -620    prob_ratio : float
    -621        The probability ratio.
    -622    abundance_calc : float
    -623        The calculated abundance.
    -624    area_error : float
    -625        The area error.
    -626    abundance_error : float
    -627        The abundance error.
    -628    is_isotopologue : bool
    -629        The isotopologue flag. Defaults to True.
    -630    mspeak_index_mono_isotopic : int
    -631        The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
    -632    mono_isotopic_formula_index : int
    -633        The index of the monoisotopic formula in the molecular formula list. Defaults to None.
    -634    """
    -635    def __init__(
    -636            self, 
    -637            _d_molecular_formula, 
    -638            prob_ratio, 
    -639            mono_abundance, 
    -640            ion_charge, 
    -641            mspeak_parent=None,
    -642            ion_type = None,
    -643            adduct_atom = None
    -644            ):
    -645        
    -646        if ion_type is None:
    -647            # check if ion type or adduct_atom is in the molecular formula dict
    -648            if Labels.ion_type in _d_molecular_formula:
    -649                ion_type = _d_molecular_formula.get(Labels.ion_type)
    -650            else:
    -651                ion_type = None
    -652        else:
    -653            ion_type = Labels.ion_type_translate.get(ion_type)
    -654        
    -655        if ion_type == Labels.adduct_ion:
    -656            adduct_atom_int = None
    -657            if adduct_atom in _d_molecular_formula.keys():
    -658                adduct_atom_int = adduct_atom
    -659            else:
    -660                # Check to see if adduct_atom should actually be an isotope of the adduct atom
    -661                for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]:
    -662                    if adduct_iso in _d_molecular_formula.keys():
    -663                        adduct_atom_int = adduct_iso
    -664            adduct_atom = adduct_atom_int
    -665            if adduct_atom is None:
    -666                raise Exception("adduct_atom is required for adduct ion")
    -667            _d_molecular_formula[adduct_atom] -= 1
    -668            _d_molecular_formula = {key:val for key, val in _d_molecular_formula.items() if val != 0}
    -669
    -670        
    -671        super().__init__(
    -672            molecular_formula =_d_molecular_formula, 
    -673            ion_charge = ion_charge, 
    -674            ion_type=ion_type,
    -675            adduct_atom=adduct_atom
    -676            )
    -677        #prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic
    -678        
    -679        self.prob_ratio = prob_ratio
    -680        
    -681        self.abundance_calc = mono_abundance * prob_ratio
    -682
    -683        self.is_isotopologue = True
    -684        
    -685        self.mspeak_index_mono_isotopic = None
    -686
    -687        self.mono_isotopic_formula_index = None
    -688        # parent mass spectrum peak obj instance
    -689        self._mspeak_parent = mspeak_parent
    -690
    -691    
    -692    @property
    -693    def area_error(self):
    -694        return self._calc_area_error()
    -695
    -696    @property
    -697    def abundance_error(self):
    -698        return self._calc_abundance_error()
    +            
    674class MolecularFormulaIsotopologue(MolecularFormulaBase):
    +675    """Class for representing a molecular formula isotopologue.
    +676
    +677    Parameters
    +678    ----------
    +679    _d_molecular_formula : dict
    +680        The molecular formula as a dictionary.
    +681    prob_ratio : float
    +682        The probability ratio.
    +683    mono_abundance : float
    +684        The monoisotopic abundance.
    +685    ion_charge : int
    +686        The ion charge.
    +687    mspeak_parent : object, optional
    +688        The parent mass spectrum peak object instance. Defaults to None.
    +689    ion_type : str, optional
    +690        The ion type. Defaults to None.
    +691    adduct_atom : str, optional
    +692        The adduct atom. Defaults to None.
    +693
    +694    Attributes
    +695    ----------
    +696    prob_ratio : float
    +697        The probability ratio.
    +698    abundance_calc : float
    +699        The calculated abundance.
    +700    area_error : float
    +701        The area error.
    +702    abundance_error : float
    +703        The abundance error.
    +704    is_isotopologue : bool
    +705        The isotopologue flag. Defaults to True.
    +706    mspeak_index_mono_isotopic : int
    +707        The index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
    +708    mono_isotopic_formula_index : int
    +709        The index of the monoisotopic formula in the molecular formula list. Defaults to None.
    +710    """
    +711
    +712    def __init__(
    +713        self,
    +714        _d_molecular_formula,
    +715        prob_ratio,
    +716        mono_abundance,
    +717        ion_charge,
    +718        mspeak_parent=None,
    +719        ion_type=None,
    +720        adduct_atom=None,
    +721    ):
    +722        if ion_type is None:
    +723            # check if ion type or adduct_atom is in the molecular formula dict
    +724            if Labels.ion_type in _d_molecular_formula:
    +725                ion_type = _d_molecular_formula.get(Labels.ion_type)
    +726            else:
    +727                ion_type = None
    +728        else:
    +729            ion_type = Labels.ion_type_translate.get(ion_type)
    +730
    +731        if ion_type == Labels.adduct_ion:
    +732            adduct_atom_int = None
    +733            if adduct_atom in _d_molecular_formula.keys():
    +734                adduct_atom_int = adduct_atom
    +735            else:
    +736                # Check to see if adduct_atom should actually be an isotope of the adduct atom
    +737                for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]:
    +738                    if adduct_iso in _d_molecular_formula.keys():
    +739                        adduct_atom_int = adduct_iso
    +740            adduct_atom = adduct_atom_int
    +741            if adduct_atom is None:
    +742                raise Exception("adduct_atom is required for adduct ion")
    +743            _d_molecular_formula[adduct_atom] -= 1
    +744            _d_molecular_formula = {
    +745                key: val for key, val in _d_molecular_formula.items() if val != 0
    +746            }
    +747
    +748        super().__init__(
    +749            molecular_formula=_d_molecular_formula,
    +750            ion_charge=ion_charge,
    +751            ion_type=ion_type,
    +752            adduct_atom=adduct_atom,
    +753        )
    +754        # prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic
    +755
    +756        self.prob_ratio = prob_ratio
    +757
    +758        self.abundance_calc = mono_abundance * prob_ratio
    +759
    +760        self.is_isotopologue = True
    +761
    +762        self.mspeak_index_mono_isotopic = None
    +763
    +764        self.mono_isotopic_formula_index = None
    +765        # parent mass spectrum peak obj instance
    +766        self._mspeak_parent = mspeak_parent
    +767
    +768    @property
    +769    def area_error(self):
    +770        return self._calc_area_error()
    +771
    +772    @property
    +773    def abundance_error(self):
    +774        return self._calc_abundance_error()
     
    @@ -2707,61 +2903,61 @@
    Attributes
    -
    635    def __init__(
    -636            self, 
    -637            _d_molecular_formula, 
    -638            prob_ratio, 
    -639            mono_abundance, 
    -640            ion_charge, 
    -641            mspeak_parent=None,
    -642            ion_type = None,
    -643            adduct_atom = None
    -644            ):
    -645        
    -646        if ion_type is None:
    -647            # check if ion type or adduct_atom is in the molecular formula dict
    -648            if Labels.ion_type in _d_molecular_formula:
    -649                ion_type = _d_molecular_formula.get(Labels.ion_type)
    -650            else:
    -651                ion_type = None
    -652        else:
    -653            ion_type = Labels.ion_type_translate.get(ion_type)
    -654        
    -655        if ion_type == Labels.adduct_ion:
    -656            adduct_atom_int = None
    -657            if adduct_atom in _d_molecular_formula.keys():
    -658                adduct_atom_int = adduct_atom
    -659            else:
    -660                # Check to see if adduct_atom should actually be an isotope of the adduct atom
    -661                for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]:
    -662                    if adduct_iso in _d_molecular_formula.keys():
    -663                        adduct_atom_int = adduct_iso
    -664            adduct_atom = adduct_atom_int
    -665            if adduct_atom is None:
    -666                raise Exception("adduct_atom is required for adduct ion")
    -667            _d_molecular_formula[adduct_atom] -= 1
    -668            _d_molecular_formula = {key:val for key, val in _d_molecular_formula.items() if val != 0}
    -669
    -670        
    -671        super().__init__(
    -672            molecular_formula =_d_molecular_formula, 
    -673            ion_charge = ion_charge, 
    -674            ion_type=ion_type,
    -675            adduct_atom=adduct_atom
    -676            )
    -677        #prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic
    -678        
    -679        self.prob_ratio = prob_ratio
    -680        
    -681        self.abundance_calc = mono_abundance * prob_ratio
    -682
    -683        self.is_isotopologue = True
    -684        
    -685        self.mspeak_index_mono_isotopic = None
    -686
    -687        self.mono_isotopic_formula_index = None
    -688        # parent mass spectrum peak obj instance
    -689        self._mspeak_parent = mspeak_parent
    +            
    712    def __init__(
    +713        self,
    +714        _d_molecular_formula,
    +715        prob_ratio,
    +716        mono_abundance,
    +717        ion_charge,
    +718        mspeak_parent=None,
    +719        ion_type=None,
    +720        adduct_atom=None,
    +721    ):
    +722        if ion_type is None:
    +723            # check if ion type or adduct_atom is in the molecular formula dict
    +724            if Labels.ion_type in _d_molecular_formula:
    +725                ion_type = _d_molecular_formula.get(Labels.ion_type)
    +726            else:
    +727                ion_type = None
    +728        else:
    +729            ion_type = Labels.ion_type_translate.get(ion_type)
    +730
    +731        if ion_type == Labels.adduct_ion:
    +732            adduct_atom_int = None
    +733            if adduct_atom in _d_molecular_formula.keys():
    +734                adduct_atom_int = adduct_atom
    +735            else:
    +736                # Check to see if adduct_atom should actually be an isotope of the adduct atom
    +737                for adduct_iso in Atoms.isotopes.get(adduct_atom)[1]:
    +738                    if adduct_iso in _d_molecular_formula.keys():
    +739                        adduct_atom_int = adduct_iso
    +740            adduct_atom = adduct_atom_int
    +741            if adduct_atom is None:
    +742                raise Exception("adduct_atom is required for adduct ion")
    +743            _d_molecular_formula[adduct_atom] -= 1
    +744            _d_molecular_formula = {
    +745                key: val for key, val in _d_molecular_formula.items() if val != 0
    +746            }
    +747
    +748        super().__init__(
    +749            molecular_formula=_d_molecular_formula,
    +750            ion_charge=ion_charge,
    +751            ion_type=ion_type,
    +752            adduct_atom=adduct_atom,
    +753        )
    +754        # prob_ratio is relative to the monoisotopic peak p_isotopologue/p_mono_isotopic
    +755
    +756        self.prob_ratio = prob_ratio
    +757
    +758        self.abundance_calc = mono_abundance * prob_ratio
    +759
    +760        self.is_isotopologue = True
    +761
    +762        self.mspeak_index_mono_isotopic = None
    +763
    +764        self.mono_isotopic_formula_index = None
    +765        # parent mass spectrum peak obj instance
    +766        self._mspeak_parent = mspeak_parent
     
    @@ -2909,75 +3105,88 @@
    Inherited Members
    -
    700class LCMSLibRefMolecularFormula(MolecularFormulaBase):
    -701    """Class for representing a molecular formula associated with a molecule in a LCMS library reference.
    -702
    -703    Parameters
    -704    ----------
    -705    molecular_formula : dict, list, str
    -706        The molecular formula.
    -707    ion_charge : int
    -708        The ion charge.
    -709    ion_type : str, optional
    -710        The ion type. Defaults to None.
    -711    adduct_atom : str, optional
    -712        The adduct atom. Defaults to None.
    -713    mspeak_parent : object, optional
    -714        The parent mass spectrum peak object instance. Defaults to None.
    -715    name : str, optional
    -716        The name of the reference molecule. Defaults to None.
    -717    kegg_id : str, optional
    -718        The KEGG ID of the reference molecule. Defaults to None.
    -719    cas : str, optional
    -720        The CAS number of the reference molecule. Defaults to None.
    -721
    -722    """
    -723    
    -724    def __init__(self, molecular_formula, ion_charge, ion_type=None, 
    -725                    adduct_atom=None, mspeak_parent=None, name=None, kegg_id=None, cas=None) -> None:
    -726        
    -727        super().__init__(molecular_formula, ion_charge, ion_type=ion_type, 
    -728                    adduct_atom=adduct_atom, mspeak_parent=mspeak_parent)
    -729
    -730        self._name = name
    -731        self._kegg_id = kegg_id
    -732        self._cas = cas    
    -733    
    -734    @property
    -735    def name(self):
    -736        return self._name
    -737
    -738    @name.setter
    -739    def name(self, name):
    -740        if isinstance(name, str):
    -741            self._name = name
    -742        else:
    -743            raise TypeError('name: {} should be type string')    
    -744
    -745    @property
    -746    def kegg_id(self):
    -747        return self._kegg_id
    -748    
    -749    @kegg_id.setter
    -750    def kegg_id(self, kegg_id):
    -751        self._kegg_id = kegg_id
    -752        #if isinstance(kegg_id, str):
    -753        #    self._kegg_id = kegg_id
    -754        #else:
    -755        #    print(kegg_id)
    -756        #    raise TypeError('name: {} should be type string') 
    -757
    -758    @property
    -759    def cas(self):
    -760        return self._cas    
    -761    
    -762    @cas.setter
    -763    def cas(self, cas):
    -764        self._cas = cas
    -765        #if isinstance(cas, str):
    -766        #    self._cas = cas
    -767        #else:
    -768        #    raise TypeError('name: {} should be type string') 
    +            
    777class LCMSLibRefMolecularFormula(MolecularFormulaBase):
    +778    """Class for representing a molecular formula associated with a molecule in a LCMS library reference.
    +779
    +780    Parameters
    +781    ----------
    +782    molecular_formula : dict, list, str
    +783        The molecular formula.
    +784    ion_charge : int
    +785        The ion charge.
    +786    ion_type : str, optional
    +787        The ion type. Defaults to None.
    +788    adduct_atom : str, optional
    +789        The adduct atom. Defaults to None.
    +790    mspeak_parent : object, optional
    +791        The parent mass spectrum peak object instance. Defaults to None.
    +792    name : str, optional
    +793        The name of the reference molecule. Defaults to None.
    +794    kegg_id : str, optional
    +795        The KEGG ID of the reference molecule. Defaults to None.
    +796    cas : str, optional
    +797        The CAS number of the reference molecule. Defaults to None.
    +798
    +799    """
    +800
    +801    def __init__(
    +802        self,
    +803        molecular_formula,
    +804        ion_charge,
    +805        ion_type=None,
    +806        adduct_atom=None,
    +807        mspeak_parent=None,
    +808        name=None,
    +809        kegg_id=None,
    +810        cas=None,
    +811    ) -> None:
    +812        super().__init__(
    +813            molecular_formula,
    +814            ion_charge,
    +815            ion_type=ion_type,
    +816            adduct_atom=adduct_atom,
    +817            mspeak_parent=mspeak_parent,
    +818        )
    +819
    +820        self._name = name
    +821        self._kegg_id = kegg_id
    +822        self._cas = cas
    +823
    +824    @property
    +825    def name(self):
    +826        return self._name
    +827
    +828    @name.setter
    +829    def name(self, name):
    +830        if isinstance(name, str):
    +831            self._name = name
    +832        else:
    +833            raise TypeError("name: {} should be type string")
    +834
    +835    @property
    +836    def kegg_id(self):
    +837        return self._kegg_id
    +838
    +839    @kegg_id.setter
    +840    def kegg_id(self, kegg_id):
    +841        self._kegg_id = kegg_id
    +842        # if isinstance(kegg_id, str):
    +843        #    self._kegg_id = kegg_id
    +844        # else:
    +845        #    print(kegg_id)
    +846        #    raise TypeError('name: {} should be type string')
    +847
    +848    @property
    +849    def cas(self):
    +850        return self._cas
    +851
    +852    @cas.setter
    +853    def cas(self, cas):
    +854        self._cas = cas
    +855        # if isinstance(cas, str):
    +856        #    self._cas = cas
    +857        # else:
    +858        #    raise TypeError('name: {} should be type string')
     
    @@ -3016,15 +3225,28 @@
    Parameters
    -
    724    def __init__(self, molecular_formula, ion_charge, ion_type=None, 
    -725                    adduct_atom=None, mspeak_parent=None, name=None, kegg_id=None, cas=None) -> None:
    -726        
    -727        super().__init__(molecular_formula, ion_charge, ion_type=ion_type, 
    -728                    adduct_atom=adduct_atom, mspeak_parent=mspeak_parent)
    -729
    -730        self._name = name
    -731        self._kegg_id = kegg_id
    -732        self._cas = cas    
    +            
    801    def __init__(
    +802        self,
    +803        molecular_formula,
    +804        ion_charge,
    +805        ion_type=None,
    +806        adduct_atom=None,
    +807        mspeak_parent=None,
    +808        name=None,
    +809        kegg_id=None,
    +810        cas=None,
    +811    ) -> None:
    +812        super().__init__(
    +813            molecular_formula,
    +814            ion_charge,
    +815            ion_type=ion_type,
    +816            adduct_atom=adduct_atom,
    +817            mspeak_parent=mspeak_parent,
    +818        )
    +819
    +820        self._name = name
    +821        self._kegg_id = kegg_id
    +822        self._cas = cas
     
    @@ -3129,29 +3351,42 @@
    Inherited Members
    -
    770class MolecularFormula(MolecularFormulaBase):
    -771    """General class for representing a molecular formula.
    -772
    -773    Parameters
    -774    ----------
    -775    molecular_formula : dict, list, str
    -776        The molecular formula.
    -777    ion_charge : int
    -778        The ion charge.
    -779    ion_type : str, optional
    -780        The ion type. Defaults to None.
    -781    adduct_atom : str, optional
    -782        The adduct atom. Defaults to None.
    -783    mspeak_parent : object, optional
    -784        The parent mass spectrum peak object instance. Defaults to None.
    -785    external_mz : float, optional
    -786        The external m/z value. Defaults to False.
    -787    """
    -788
    -789    def __init__(self, molecular_formula, ion_charge, ion_type=None, 
    -790                adduct_atom=None, mspeak_parent=None, external_mz=False):
    -791        super().__init__(molecular_formula, ion_charge, ion_type=ion_type, 
    -792                adduct_atom=adduct_atom, mspeak_parent=mspeak_parent, external_mz=external_mz)
    +            
    861class MolecularFormula(MolecularFormulaBase):
    +862    """General class for representing a molecular formula.
    +863
    +864    Parameters
    +865    ----------
    +866    molecular_formula : dict, list, str
    +867        The molecular formula.
    +868    ion_charge : int
    +869        The ion charge.
    +870    ion_type : str, optional
    +871        The ion type. Defaults to None.
    +872    adduct_atom : str, optional
    +873        The adduct atom. Defaults to None.
    +874    mspeak_parent : object, optional
    +875        The parent mass spectrum peak object instance. Defaults to None.
    +876    external_mz : float, optional
    +877        The external m/z value. Defaults to False.
    +878    """
    +879
    +880    def __init__(
    +881        self,
    +882        molecular_formula,
    +883        ion_charge,
    +884        ion_type=None,
    +885        adduct_atom=None,
    +886        mspeak_parent=None,
    +887        external_mz=False,
    +888    ):
    +889        super().__init__(
    +890            molecular_formula,
    +891            ion_charge,
    +892            ion_type=ion_type,
    +893            adduct_atom=adduct_atom,
    +894            mspeak_parent=mspeak_parent,
    +895            external_mz=external_mz,
    +896        )
     
    @@ -3186,10 +3421,23 @@
    Parameters
    -
    789    def __init__(self, molecular_formula, ion_charge, ion_type=None, 
    -790                adduct_atom=None, mspeak_parent=None, external_mz=False):
    -791        super().__init__(molecular_formula, ion_charge, ion_type=ion_type, 
    -792                adduct_atom=adduct_atom, mspeak_parent=mspeak_parent, external_mz=external_mz)
    +            
    880    def __init__(
    +881        self,
    +882        molecular_formula,
    +883        ion_charge,
    +884        ion_type=None,
    +885        adduct_atom=None,
    +886        mspeak_parent=None,
    +887        external_mz=False,
    +888    ):
    +889        super().__init__(
    +890            molecular_formula,
    +891            ion_charge,
    +892            ion_type=ion_type,
    +893            adduct_atom=adduct_atom,
    +894            mspeak_parent=mspeak_parent,
    +895            external_mz=external_mz,
    +896        )
     
    diff --git a/docs/corems/molecular_formula/input/masslist_ref.html b/docs/corems/molecular_formula/input/masslist_ref.html index e899e1f7..e9c7ade0 100644 --- a/docs/corems/molecular_formula/input/masslist_ref.html +++ b/docs/corems/molecular_formula/input/masslist_ref.html @@ -117,364 +117,360 @@

      1__author__ = "Yuri E. Corilo"
       2__date__ = "Oct 24, 2019"
       3
    -  4from threading import Thread
    -  5from pathlib import Path
    -  6import sys, re, json
    -  7from typing import Dict, List
    -  8
    -  9import pandas as pd
    - 10
    - 11sys.path.append('.')
    - 12
    - 13from corems.molecular_formula.factory.MolecularFormulaFactory import LCMSLibRefMolecularFormula, MolecularFormula 
    - 14from corems.encapsulation.constant import Labels
    - 15from corems.encapsulation.constant import Atoms
    - 16from corems.molecular_id.factory.molecularSQL import CarbonHydrogen, MolecularFormulaLink, HeteroAtoms
    - 17
    - 18class MolecularFormulaLinkProxy:
    - 19    """Proxy class for MolecularFormulaLink to be used in the molecular formula ref file import
    - 20    
    - 21    Parameters
    - 22    ----------
    - 23    molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    - 24        corems MolecularFormula or LCMSLibRefMolecularFormula object
    - 25    mz : float
    - 26        target m/z
    - 27        
    - 28    Attributes
    - 29    ----------
    - 30    C : int
    - 31        number of carbon atoms
    - 32    H : int
    - 33        number of hydrogen atoms
    - 34    H_C : float
    - 35        ratio of hydrogen to carbon atoms
    - 36    class_label : str
    - 37        molecular formula class label
    - 38    mz_calc : float
    - 39        calculated m/z
    - 40    dbe : int
    - 41        double bond equivalent
    - 42    formula_dict : dict
    - 43        molecular formula dictionary
    - 44
    - 45    Methods
    - 46    -------
    - 47    * to_dict(). 
    - 48        return molecular formula dictionary
    - 49
    - 50    """
    - 51        
    - 52    def __init__(self, molecular_formula, mz):
    - 53
    - 54        self.C = molecular_formula.get('C')
    - 55        self.H = molecular_formula.get('H')
    - 56        self.H_C = molecular_formula.get('H')/molecular_formula.get('C')
    - 57        self.class_label = json.dumps(molecular_formula.class_dict)
    - 58        self.mz_calc =  float(mz)                       
    - 59        self.dbe = molecular_formula.dbe
    - 60        self.formula_dict = molecular_formula.to_dict()
    - 61    
    - 62    def to_dict(self):
    - 63        return self.formula_dict
    - 64
    - 65
    - 66class ImportMassListRef():#Thread
    - 67    """Import Mass List from Reference File
    - 68    
    - 69    Parameters
    - 70    ----------
    - 71    ref_file_location : str
    - 72        path to the reference file
    - 73
    - 74    Attributes
    - 75    ----------
    - 76    ref_file_location : str
    - 77        path to the reference file
    - 78
    - 79    Methods
    - 80    -------
    - 81    * molecular_formula_ref(mz, molecular_formula). 
    - 82        Return MolecularFormulaLinkProxy object
    - 83    * from_lcms_lib_file(ion_charge, ion_types).
    - 84        Return Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file
    - 85    * from_bruker_ref_file().
    - 86        Return List[MolecularFormula] from Bruker reference file
    - 87    * from_corems_ref_file(delimiter).
    - 88        Return List[MolecularFormula] from CoreMS reference file
    - 89    * split(delimiters, string, maxsplit).
    - 90        Splits a string using a list of delimiters.
    - 91    * mformula_s_to_dict(s_mformulatring, iontype).
    - 92        Converts a molecular formula string to a dict
    - 93    """
    - 94    def __init__(self, ref_file_location) :
    - 95            
    - 96            #Thread.__init__(self)
    - 97            
    - 98            self.ref_file_location = Path(ref_file_location)
    - 99            
    -100            if not self.ref_file_location.exists():
    -101                tb = sys.exc_info()[2]
    -102                raise FileNotFoundError(ref_file_location).with_traceback(tb)
    -103    
    -104    def molecular_formula_ref( self, mz, molecular_formula):
    -105        """Instantiate a MolecularFormulaLinkProxy object
    -106
    -107        Parameters
    -108        ----------
    -109        mz : float
    -110            target m/z
    -111        molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    -112            corems MolecularFormula or LCMSLibRefMolecularFormula object
    -113        
    -114        Returns
    -115        -------
    -116        MolecularFormulaLinkProxy
    -117            MolecularFormulaLinkProxy object
    -118        """        
    -119        return MolecularFormulaLinkProxy(molecular_formula, mz)
    -120    
    -121    def from_lcms_lib_file(self, ion_charge: float, ion_types: List[str]) -> Dict [str, Dict[float, List[LCMSLibRefMolecularFormula] ] ]:
    -122        """Create a dictionary of LCMSLibRefMolecularFormula objects from LCMS library reference file
    -123        
    -124        Parameters
    -125        ----------
    -126        ion_charge : float
    -127            ion charge
    -128        ion_types : List[str]
    -129            list of ion types
    -130        
    -131        Returns
    -132        -------
    -133        Dict 
    -134            Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file. m/z is the target m/z; standard_name is the name of the molecular standard mix; MolecularFormula is the corems molecular formula class
    -135        """
    -136       
    -137        data = {}
    -138        
    -139        with open(self.ref_file_location) as ref_f:
    -140            
    -141            df = pd.read_csv(ref_f, header=0,  encoding= 'unicode_escape')
    -142            
    -143            for index, row in df.iterrows():
    -144                
    -145                formula_s = row["Neutral Formula"]
    -146                formula_dict = self.mformula_s_to_dict(formula_s, Labels.neutral)
    -147                name = row["Compound Name"]
    -148                kegg_id = row["KEGG ID"]
    -149                standard_name = row["NEW MIX"]
    -150                cas = row["KEGG ID"]
    -151                #print(row["Neutral Formula"], formula_dict)
    -152                molf_formula = LCMSLibRefMolecularFormula(formula_dict, ion_charge, Labels.neutral, 
    -153                                                name=name, kegg_id=kegg_id, cas=cas)
    -154                #if round(molf_formula.mz_calc, 4) != round(row['Mass Adduct -H'],4):
    -155                #    print(formula_s)
    -156                #    print(round(molf_formula.mz_calc, 4) , round(row['Mass Adduct -H'],4))
    -157        
    -158                if standard_name in data.keys():
    -159
    -160                    #TODO change it to target ion types and add ion type in the data structure   
    -161                    mz_calc = molf_formula.protonated_mz
    -162                    
    -163                    if mz_calc in data.get(standard_name).keys():
    -164                       
    -165                       data.get(standard_name).get(mz_calc).append(molf_formula)
    -166                    
    -167                    else:   
    -168                        data[standard_name][mz_calc] = [molf_formula]
    -169                else:
    -170                    
    -171                    data[standard_name] = {molf_formula.mz_calc: [molf_formula]}
    -172                #print(formula_s, formula_dict)
    -173                #if molf_formula.ion_type != 'de-protonated':
    -174                #    print( 'ha', molf_formula.ion_type )
    -175                #print(formula_dict)
    -176                #print(row['c1'], row['c2'])
    -177        
    -178        return data
    -179
    -180    def from_bruker_ref_file(self) -> List[MolecularFormula]:
    -181        """Create a list of MolecularFormula objects from Bruker reference file
    +  4import json
    +  5import re
    +  6import sys
    +  7from pathlib import Path
    +  8from typing import Dict, List
    +  9
    + 10import pandas as pd
    + 11
    + 12sys.path.append(".")
    + 13
    + 14from corems.encapsulation.constant import Atoms, Labels
    + 15from corems.molecular_formula.factory.MolecularFormulaFactory import (
    + 16    LCMSLibRefMolecularFormula,
    + 17    MolecularFormula,
    + 18)
    + 19
    + 20
    + 21class MolecularFormulaLinkProxy:
    + 22    """Proxy class for MolecularFormulaLink to be used in the molecular formula ref file import
    + 23
    + 24    Parameters
    + 25    ----------
    + 26    molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    + 27        corems MolecularFormula or LCMSLibRefMolecularFormula object
    + 28    mz : float
    + 29        target m/z
    + 30
    + 31    Attributes
    + 32    ----------
    + 33    C : int
    + 34        number of carbon atoms
    + 35    H : int
    + 36        number of hydrogen atoms
    + 37    H_C : float
    + 38        ratio of hydrogen to carbon atoms
    + 39    class_label : str
    + 40        molecular formula class label
    + 41    mz_calc : float
    + 42        calculated m/z
    + 43    dbe : int
    + 44        double bond equivalent
    + 45    formula_dict : dict
    + 46        molecular formula dictionary
    + 47
    + 48    Methods
    + 49    -------
    + 50    * to_dict().
    + 51        return molecular formula dictionary
    + 52
    + 53    """
    + 54
    + 55    def __init__(self, molecular_formula, mz):
    + 56        self.C = molecular_formula.get("C")
    + 57        self.H = molecular_formula.get("H")
    + 58        self.H_C = molecular_formula.get("H") / molecular_formula.get("C")
    + 59        self.class_label = json.dumps(molecular_formula.class_dict)
    + 60        self.mz_calc = float(mz)
    + 61        self.dbe = molecular_formula.dbe
    + 62        self.formula_dict = molecular_formula.to_dict()
    + 63
    + 64    def to_dict(self):
    + 65        return self.formula_dict
    + 66
    + 67
    + 68class ImportMassListRef:  # Thread
    + 69    """Import Mass List from Reference File
    + 70
    + 71    Parameters
    + 72    ----------
    + 73    ref_file_location : str
    + 74        path to the reference file
    + 75
    + 76    Attributes
    + 77    ----------
    + 78    ref_file_location : str
    + 79        path to the reference file
    + 80
    + 81    Methods
    + 82    -------
    + 83    * molecular_formula_ref(mz, molecular_formula).
    + 84        Return MolecularFormulaLinkProxy object
    + 85    * from_lcms_lib_file(ion_charge, ion_types).
    + 86        Return Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file
    + 87    * from_bruker_ref_file().
    + 88        Return List[MolecularFormula] from Bruker reference file
    + 89    * from_corems_ref_file(delimiter).
    + 90        Return List[MolecularFormula] from CoreMS reference file
    + 91    * split(delimiters, string, maxsplit).
    + 92        Splits a string using a list of delimiters.
    + 93    * mformula_s_to_dict(s_mformulatring, iontype).
    + 94        Converts a molecular formula string to a dict
    + 95    """
    + 96
    + 97    def __init__(self, ref_file_location):
    + 98        # Thread.__init__(self)
    + 99
    +100        self.ref_file_location = Path(ref_file_location)
    +101
    +102        if not self.ref_file_location.exists():
    +103            tb = sys.exc_info()[2]
    +104            raise FileNotFoundError(ref_file_location).with_traceback(tb)
    +105
    +106    def molecular_formula_ref(self, mz, molecular_formula):
    +107        """Instantiate a MolecularFormulaLinkProxy object
    +108
    +109        Parameters
    +110        ----------
    +111        mz : float
    +112            target m/z
    +113        molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    +114            corems MolecularFormula or LCMSLibRefMolecularFormula object
    +115
    +116        Returns
    +117        -------
    +118        MolecularFormulaLinkProxy
    +119            MolecularFormulaLinkProxy object
    +120        """
    +121        return MolecularFormulaLinkProxy(molecular_formula, mz)
    +122
    +123    def from_lcms_lib_file(
    +124        self, ion_charge: float, ion_types: List[str]
    +125    ) -> Dict[str, Dict[float, List[LCMSLibRefMolecularFormula]]]:
    +126        """Create a dictionary of LCMSLibRefMolecularFormula objects from LCMS library reference file
    +127
    +128        Parameters
    +129        ----------
    +130        ion_charge : float
    +131            ion charge
    +132        ion_types : List[str]
    +133            list of ion types
    +134
    +135        Returns
    +136        -------
    +137        Dict
    +138            Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file. m/z is the target m/z; standard_name is the name of the molecular standard mix; MolecularFormula is the corems molecular formula class
    +139        """
    +140
    +141        data = {}
    +142
    +143        with open(self.ref_file_location) as ref_f:
    +144            df = pd.read_csv(ref_f, header=0, encoding="unicode_escape")
    +145
    +146            for index, row in df.iterrows():
    +147                formula_s = row["Neutral Formula"]
    +148                formula_dict = self.mformula_s_to_dict(formula_s, Labels.neutral)
    +149                name = row["Compound Name"]
    +150                kegg_id = row["KEGG ID"]
    +151                standard_name = row["NEW MIX"]
    +152                cas = row["KEGG ID"]
    +153                # print(row["Neutral Formula"], formula_dict)
    +154                molf_formula = LCMSLibRefMolecularFormula(
    +155                    formula_dict,
    +156                    ion_charge,
    +157                    Labels.neutral,
    +158                    name=name,
    +159                    kegg_id=kegg_id,
    +160                    cas=cas,
    +161                )
    +162                # if round(molf_formula.mz_calc, 4) != round(row['Mass Adduct -H'],4):
    +163                #    print(formula_s)
    +164                #    print(round(molf_formula.mz_calc, 4) , round(row['Mass Adduct -H'],4))
    +165
    +166                if standard_name in data.keys():
    +167                    # TODO change it to target ion types and add ion type in the data structure
    +168                    mz_calc = molf_formula.protonated_mz
    +169
    +170                    if mz_calc in data.get(standard_name).keys():
    +171                        data.get(standard_name).get(mz_calc).append(molf_formula)
    +172
    +173                    else:
    +174                        data[standard_name][mz_calc] = [molf_formula]
    +175                else:
    +176                    data[standard_name] = {molf_formula.mz_calc: [molf_formula]}
    +177                # print(formula_s, formula_dict)
    +178                # if molf_formula.ion_type != 'de-protonated':
    +179                #    print( 'ha', molf_formula.ion_type )
    +180                # print(formula_dict)
    +181                # print(row['c1'], row['c2'])
     182
    -183        Returns
    -184        -------
    -185        List[MolecularFormula]
    -186            List of MolecularFormula objects from Bruker reference file
    -187        """
    -188
    -189        import csv
    -190        
    -191        list_mf_obj = []
    -192
    -193        with open(self.ref_file_location) as ref_f:
    -194
    -195            labels = ref_f.readline().strip('\n').split(';')
    -196            
    -197            for line in ref_f.readlines():
    -198                
    -199                if line != '\n':
    -200        
    -201                    list_ref = (line.strip('\n').split(' '))
    -202                    
    -203                    if list_ref[2][-1] == '+': 
    -204                        
    -205                        ion_charge =  int(list_ref[2][:-1])
    -206                    
    -207                    else:
    -208                        
    -209                        ion_charge =  -1* int(list_ref[2][:-1])
    +183        return data
    +184
    +185    def from_bruker_ref_file(self) -> List[MolecularFormula]:
    +186        """Create a list of MolecularFormula objects from Bruker reference file
    +187
    +188        Returns
    +189        -------
    +190        List[MolecularFormula]
    +191            List of MolecularFormula objects from Bruker reference file
    +192        """
    +193
    +194        import csv
    +195
    +196        list_mf_obj = []
    +197
    +198        with open(self.ref_file_location) as ref_f:
    +199            labels = ref_f.readline().strip("\n").split(";")
    +200
    +201            for line in ref_f.readlines():
    +202                if line != "\n":
    +203                    list_ref = line.strip("\n").split(" ")
    +204
    +205                    if list_ref[2][-1] == "+":
    +206                        ion_charge = int(list_ref[2][:-1])
    +207
    +208                    else:
    +209                        ion_charge = -1 * int(list_ref[2][:-1])
     210
    -211                    
    -212                    ion_mol_formula = list_ref[0]
    -213                    mz = float(list_ref[1])
    -214                    formula_dict = self.mformula_s_to_dict(ion_mol_formula)
    -215                    
    -216                    list_mf_obj.append(MolecularFormula(formula_dict, ion_charge, external_mz=mz))
    -217        
    -218        return  list_mf_obj           
    -219
    -220    def from_corems_ref_file(self, delimiter="\t"): #pragma: no cover
    -221        """Create a list of MolecularFormula objects from CoreMS reference file
    -222        
    -223        Not being used
    -224        
    -225        Parameters
    -226        ----------
    -227        delimiter : str
    -228            delimiter used in the reference file
    -229        
    -230        Returns
    -231        -------
    -232        List[MolecularFormula]
    -233            List of MolecularFormula objects from CoreMS reference file
    -234        """
    -235        #not being used
    -236        import csv
    -237
    -238        list_mf_obj = []
    -239
    -240        with open('res/RefMassLists/Crude-Pos-ESI.ref') as ref_f:
    -241
    -242            labels = ref_f.readline().strip('\n').split(delimiter)
    -243            
    +211                    ion_mol_formula = list_ref[0]
    +212                    mz = float(list_ref[1])
    +213                    formula_dict = self.mformula_s_to_dict(ion_mol_formula)
    +214
    +215                    list_mf_obj.append(
    +216                        MolecularFormula(formula_dict, ion_charge, external_mz=mz)
    +217                    )
    +218
    +219        return list_mf_obj
    +220
    +221    def from_corems_ref_file(self, delimiter="\t"):  # pragma: no cover
    +222        """Create a list of MolecularFormula objects from CoreMS reference file
    +223
    +224        Not being used
    +225
    +226        Parameters
    +227        ----------
    +228        delimiter : str
    +229            delimiter used in the reference file
    +230
    +231        Returns
    +232        -------
    +233        List[MolecularFormula]
    +234            List of MolecularFormula objects from CoreMS reference file
    +235        """
    +236        # not being used
    +237        import csv
    +238
    +239        list_mf_obj = []
    +240
    +241        with open("res/RefMassLists/Crude-Pos-ESI.ref") as ref_f:
    +242            labels = ref_f.readline().strip("\n").split(delimiter)
    +243
     244            for line in ref_f.readlines():
    -245                
    -246                if line != '\n':
    -247        
    -248                    list_ref = (line.strip('\n').split(delimiter))
    -249                    
    -250                    formula_string = list_ref[0]
    -251                    ion_charge = int(list_ref[1])
    -252                    ion_type = list_ref[2]
    -253
    -254                    molform = MolecularFormula(formula_string, ion_charge, ion_type=ion_type)
    -255                    
    +245                if line != "\n":
    +246                    list_ref = line.strip("\n").split(delimiter)
    +247
    +248                    formula_string = list_ref[0]
    +249                    ion_charge = int(list_ref[1])
    +250                    ion_type = list_ref[2]
    +251
    +252                    molform = MolecularFormula(
    +253                        formula_string, ion_charge, ion_type=ion_type
    +254                    )
    +255
     256                    list_mf_obj.append(self.molecular_formula_ref(molform))
     257
    -258        return  list_mf_obj           
    +258        return list_mf_obj
     259
    -260
    -261    def split(self, delimiters, string, maxsplit=0): #pragma: no cover
    -262        """Splits a string using a list of delimiters.
    -263        
    -264        Does not work when formula has atoms with same characters, i.e - C10H21NNa
    -265        
    -266        Parameters
    -267        ----------
    -268        delimiters : list
    -269            list of delimiters
    -270        string : str
    -271            string to be split
    -272        maxsplit : int, optional
    -273            maximum number of splits. Default is 0
    -274            
    -275        Returns
    -276        -------
    -277        list
    -278            list of strings obtained after splitting the string
    -279        list
    -280            list of counts obtained after splitting the string
    -281        """
    -282        regexPattern = '|'.join(map(re.escape, delimiters)) #pragma: no cover
    -283        isotopes = re.findall(regexPattern, string) #pragma: no cover
    -284        counts = re.split(regexPattern, string, maxsplit)  #pragma: no cover
    -285        return isotopes, counts
    -286
    -287    def mformula_s_to_dict(self, s_mformulatring, iontype='unknown'):
    -288        """Converts a molecular formula string to a dict
    -289        
    -290        Parameters
    -291        ----------
    -292        s_mformulatring : str
    -293            molecular formula string, i.e. 'C10H21NNa'
    -294        iontype : str, optional
    -295            ion type. Default is 'unknown'
    -296        
    -297        Returns
    -298        -------
    -299        dict
    -300            molecular formula dictionary
    -301
    -302        Notes
    -303        -----
    -304        Does not work if the atomic mass number is passed i.e. 37Cl, 81Br, convention follow the light isotope labeling 35Cl is Cl, 12C is C, etc.
    -305        If you need to use heavy isotopes please use another reference file format that separate the formula string by a blank space and parse it using the function corems_ref_file
    -306
    -307        Raises
    -308        ------
    -309        TypeError
    -310            Atom does not exist in Atoms.atoms_order list
    -311        Exception
    -312            Empty molecular formula
    -313        """
    -314        if s_mformulatring:
    -315            
    -316            #find the case C122
    -317            all_atoms = re.findall(r'[A-Z]{1}[0-9]{1,10000}', s_mformulatring)
    -318            
    -319            #find the case Br2
    -320            all_atoms2 = re.findall(r'[A-Z]{1}[a-z]{1}[0-9]{1,10000}', s_mformulatring)
    -321            #find the case N
    -322            single_digit_atoms_one = re.findall(r'[A-Z]{1}(?![0-9])(?![a-z])', s_mformulatring)
    -323            #print(single_digit_atoms_one)
    -324            #find the case Na
    -325            due_digit_atoms_one = re.findall(r'[A-Z]{1}[a-z]{1}(?![0-9])', s_mformulatring)
    -326            
    -327            all_atoms = all_atoms + all_atoms2 + due_digit_atoms_one +single_digit_atoms_one
    -328            
    -329            dict_res = {}
    -330            
    -331            for each_atom_count in all_atoms:
    -332                
    -333                
    -334                count = re.findall(r'[0-9]{1,10000}', each_atom_count)
    -335                atom = ''.join(re.findall(r'[A-z]', each_atom_count))
    -336                
    -337                if atom in Atoms.atoms_order:
    -338                    
    -339                    if count:
    -340                        dict_res[atom] = int(count[0])
    -341                    else:
    -342                        dict_res[atom] = 1
    -343                
    -344                else:
    -345                    
    +260    def split(self, delimiters, string, maxsplit=0):  # pragma: no cover
    +261        """Splits a string using a list of delimiters.
    +262
    +263        Does not work when formula has atoms with same characters, i.e - C10H21NNa
    +264
    +265        Parameters
    +266        ----------
    +267        delimiters : list
    +268            list of delimiters
    +269        string : str
    +270            string to be split
    +271        maxsplit : int, optional
    +272            maximum number of splits. Default is 0
    +273
    +274        Returns
    +275        -------
    +276        list
    +277            list of strings obtained after splitting the string
    +278        list
    +279            list of counts obtained after splitting the string
    +280        """
    +281        regexPattern = "|".join(map(re.escape, delimiters))  # pragma: no cover
    +282        isotopes = re.findall(regexPattern, string)  # pragma: no cover
    +283        counts = re.split(regexPattern, string, maxsplit)  # pragma: no cover
    +284        return isotopes, counts
    +285
    +286    def mformula_s_to_dict(self, s_mformulatring, iontype="unknown"):
    +287        """Converts a molecular formula string to a dict
    +288
    +289        Parameters
    +290        ----------
    +291        s_mformulatring : str
    +292            molecular formula string, i.e. 'C10H21NNa'
    +293        iontype : str, optional
    +294            ion type. Default is 'unknown'
    +295
    +296        Returns
    +297        -------
    +298        dict
    +299            molecular formula dictionary
    +300
    +301        Notes
    +302        -----
    +303        Does not work if the atomic mass number is passed i.e. 37Cl, 81Br, convention follow the light isotope labeling 35Cl is Cl, 12C is C, etc.
    +304        If you need to use heavy isotopes please use another reference file format that separate the formula string by a blank space and parse it using the function corems_ref_file
    +305
    +306        Raises
    +307        ------
    +308        TypeError
    +309            Atom does not exist in Atoms.atoms_order list
    +310        Exception
    +311            Empty molecular formula
    +312        """
    +313        if s_mformulatring:
    +314            # find the case C122
    +315            all_atoms = re.findall(r"[A-Z]{1}[0-9]{1,10000}", s_mformulatring)
    +316
    +317            # find the case Br2
    +318            all_atoms2 = re.findall(r"[A-Z]{1}[a-z]{1}[0-9]{1,10000}", s_mformulatring)
    +319            # find the case N
    +320            single_digit_atoms_one = re.findall(
    +321                r"[A-Z]{1}(?![0-9])(?![a-z])", s_mformulatring
    +322            )
    +323            # print(single_digit_atoms_one)
    +324            # find the case Na
    +325            due_digit_atoms_one = re.findall(
    +326                r"[A-Z]{1}[a-z]{1}(?![0-9])", s_mformulatring
    +327            )
    +328
    +329            all_atoms = (
    +330                all_atoms + all_atoms2 + due_digit_atoms_one + single_digit_atoms_one
    +331            )
    +332
    +333            dict_res = {}
    +334
    +335            for each_atom_count in all_atoms:
    +336                count = re.findall(r"[0-9]{1,10000}", each_atom_count)
    +337                atom = "".join(re.findall(r"[A-z]", each_atom_count))
    +338
    +339                if atom in Atoms.atoms_order:
    +340                    if count:
    +341                        dict_res[atom] = int(count[0])
    +342                    else:
    +343                        dict_res[atom] = 1
    +344
    +345                else:
     346                    tb = sys.exc_info()[2]
    -347                    raise TypeError("Atom %s does not exist in Atoms.atoms_order list" % atom).with_traceback(tb)
    -348            
    -349            dict_res[Labels.ion_type]  = iontype
    +347                    raise TypeError(
    +348                        "Atom %s does not exist in Atoms.atoms_order list" % atom
    +349                    ).with_traceback(tb)
     350
    -351            return dict_res
    -352        
    -353        else: 
    -354            
    -355            tb = sys.exc_info()[2]
    -356            raise Exception('Empty molecular formula').with_traceback(tb)
    -357
    -358    
    -359    
    -360        
    -361                
    +351            dict_res[Labels.ion_type] = iontype
    +352
    +353            return dict_res
    +354
    +355        else:
    +356            tb = sys.exc_info()[2]
    +357            raise Exception("Empty molecular formula").with_traceback(tb)
     
    @@ -490,52 +486,51 @@

    -
    19class MolecularFormulaLinkProxy:
    -20    """Proxy class for MolecularFormulaLink to be used in the molecular formula ref file import
    -21    
    -22    Parameters
    -23    ----------
    -24    molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    -25        corems MolecularFormula or LCMSLibRefMolecularFormula object
    -26    mz : float
    -27        target m/z
    -28        
    -29    Attributes
    -30    ----------
    -31    C : int
    -32        number of carbon atoms
    -33    H : int
    -34        number of hydrogen atoms
    -35    H_C : float
    -36        ratio of hydrogen to carbon atoms
    -37    class_label : str
    -38        molecular formula class label
    -39    mz_calc : float
    -40        calculated m/z
    -41    dbe : int
    -42        double bond equivalent
    -43    formula_dict : dict
    -44        molecular formula dictionary
    -45
    -46    Methods
    -47    -------
    -48    * to_dict(). 
    -49        return molecular formula dictionary
    -50
    -51    """
    -52        
    -53    def __init__(self, molecular_formula, mz):
    -54
    -55        self.C = molecular_formula.get('C')
    -56        self.H = molecular_formula.get('H')
    -57        self.H_C = molecular_formula.get('H')/molecular_formula.get('C')
    -58        self.class_label = json.dumps(molecular_formula.class_dict)
    -59        self.mz_calc =  float(mz)                       
    -60        self.dbe = molecular_formula.dbe
    -61        self.formula_dict = molecular_formula.to_dict()
    -62    
    -63    def to_dict(self):
    -64        return self.formula_dict
    +            
    22class MolecularFormulaLinkProxy:
    +23    """Proxy class for MolecularFormulaLink to be used in the molecular formula ref file import
    +24
    +25    Parameters
    +26    ----------
    +27    molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    +28        corems MolecularFormula or LCMSLibRefMolecularFormula object
    +29    mz : float
    +30        target m/z
    +31
    +32    Attributes
    +33    ----------
    +34    C : int
    +35        number of carbon atoms
    +36    H : int
    +37        number of hydrogen atoms
    +38    H_C : float
    +39        ratio of hydrogen to carbon atoms
    +40    class_label : str
    +41        molecular formula class label
    +42    mz_calc : float
    +43        calculated m/z
    +44    dbe : int
    +45        double bond equivalent
    +46    formula_dict : dict
    +47        molecular formula dictionary
    +48
    +49    Methods
    +50    -------
    +51    * to_dict().
    +52        return molecular formula dictionary
    +53
    +54    """
    +55
    +56    def __init__(self, molecular_formula, mz):
    +57        self.C = molecular_formula.get("C")
    +58        self.H = molecular_formula.get("H")
    +59        self.H_C = molecular_formula.get("H") / molecular_formula.get("C")
    +60        self.class_label = json.dumps(molecular_formula.class_dict)
    +61        self.mz_calc = float(mz)
    +62        self.dbe = molecular_formula.dbe
    +63        self.formula_dict = molecular_formula.to_dict()
    +64
    +65    def to_dict(self):
    +66        return self.formula_dict
     
    @@ -572,7 +567,7 @@
    Attributes
    Methods
      -
    • to_dict(). +
    • to_dict(). return molecular formula dictionary
    @@ -588,15 +583,14 @@

    Methods
    -
    53    def __init__(self, molecular_formula, mz):
    -54
    -55        self.C = molecular_formula.get('C')
    -56        self.H = molecular_formula.get('H')
    -57        self.H_C = molecular_formula.get('H')/molecular_formula.get('C')
    -58        self.class_label = json.dumps(molecular_formula.class_dict)
    -59        self.mz_calc =  float(mz)                       
    -60        self.dbe = molecular_formula.dbe
    -61        self.formula_dict = molecular_formula.to_dict()
    +            
    56    def __init__(self, molecular_formula, mz):
    +57        self.C = molecular_formula.get("C")
    +58        self.H = molecular_formula.get("H")
    +59        self.H_C = molecular_formula.get("H") / molecular_formula.get("C")
    +60        self.class_label = json.dumps(molecular_formula.class_dict)
    +61        self.mz_calc = float(mz)
    +62        self.dbe = molecular_formula.dbe
    +63        self.formula_dict = molecular_formula.to_dict()
     
    @@ -691,8 +685,8 @@
    Methods
    -
    63    def to_dict(self):
    -64        return self.formula_dict
    +            
    65    def to_dict(self):
    +66        return self.formula_dict
     
    @@ -711,297 +705,296 @@
    Methods
    -
     67class ImportMassListRef():#Thread
    - 68    """Import Mass List from Reference File
    - 69    
    - 70    Parameters
    - 71    ----------
    - 72    ref_file_location : str
    - 73        path to the reference file
    - 74
    - 75    Attributes
    - 76    ----------
    - 77    ref_file_location : str
    - 78        path to the reference file
    - 79
    - 80    Methods
    - 81    -------
    - 82    * molecular_formula_ref(mz, molecular_formula). 
    - 83        Return MolecularFormulaLinkProxy object
    - 84    * from_lcms_lib_file(ion_charge, ion_types).
    - 85        Return Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file
    - 86    * from_bruker_ref_file().
    - 87        Return List[MolecularFormula] from Bruker reference file
    - 88    * from_corems_ref_file(delimiter).
    - 89        Return List[MolecularFormula] from CoreMS reference file
    - 90    * split(delimiters, string, maxsplit).
    - 91        Splits a string using a list of delimiters.
    - 92    * mformula_s_to_dict(s_mformulatring, iontype).
    - 93        Converts a molecular formula string to a dict
    - 94    """
    - 95    def __init__(self, ref_file_location) :
    - 96            
    - 97            #Thread.__init__(self)
    - 98            
    - 99            self.ref_file_location = Path(ref_file_location)
    -100            
    -101            if not self.ref_file_location.exists():
    -102                tb = sys.exc_info()[2]
    -103                raise FileNotFoundError(ref_file_location).with_traceback(tb)
    -104    
    -105    def molecular_formula_ref( self, mz, molecular_formula):
    -106        """Instantiate a MolecularFormulaLinkProxy object
    -107
    -108        Parameters
    -109        ----------
    -110        mz : float
    -111            target m/z
    -112        molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    -113            corems MolecularFormula or LCMSLibRefMolecularFormula object
    -114        
    -115        Returns
    -116        -------
    -117        MolecularFormulaLinkProxy
    -118            MolecularFormulaLinkProxy object
    -119        """        
    -120        return MolecularFormulaLinkProxy(molecular_formula, mz)
    -121    
    -122    def from_lcms_lib_file(self, ion_charge: float, ion_types: List[str]) -> Dict [str, Dict[float, List[LCMSLibRefMolecularFormula] ] ]:
    -123        """Create a dictionary of LCMSLibRefMolecularFormula objects from LCMS library reference file
    -124        
    -125        Parameters
    -126        ----------
    -127        ion_charge : float
    -128            ion charge
    -129        ion_types : List[str]
    -130            list of ion types
    -131        
    -132        Returns
    -133        -------
    -134        Dict 
    -135            Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file. m/z is the target m/z; standard_name is the name of the molecular standard mix; MolecularFormula is the corems molecular formula class
    -136        """
    -137       
    -138        data = {}
    -139        
    -140        with open(self.ref_file_location) as ref_f:
    -141            
    -142            df = pd.read_csv(ref_f, header=0,  encoding= 'unicode_escape')
    -143            
    -144            for index, row in df.iterrows():
    -145                
    -146                formula_s = row["Neutral Formula"]
    -147                formula_dict = self.mformula_s_to_dict(formula_s, Labels.neutral)
    -148                name = row["Compound Name"]
    -149                kegg_id = row["KEGG ID"]
    -150                standard_name = row["NEW MIX"]
    -151                cas = row["KEGG ID"]
    -152                #print(row["Neutral Formula"], formula_dict)
    -153                molf_formula = LCMSLibRefMolecularFormula(formula_dict, ion_charge, Labels.neutral, 
    -154                                                name=name, kegg_id=kegg_id, cas=cas)
    -155                #if round(molf_formula.mz_calc, 4) != round(row['Mass Adduct -H'],4):
    -156                #    print(formula_s)
    -157                #    print(round(molf_formula.mz_calc, 4) , round(row['Mass Adduct -H'],4))
    -158        
    -159                if standard_name in data.keys():
    -160
    -161                    #TODO change it to target ion types and add ion type in the data structure   
    -162                    mz_calc = molf_formula.protonated_mz
    -163                    
    -164                    if mz_calc in data.get(standard_name).keys():
    -165                       
    -166                       data.get(standard_name).get(mz_calc).append(molf_formula)
    -167                    
    -168                    else:   
    -169                        data[standard_name][mz_calc] = [molf_formula]
    -170                else:
    -171                    
    -172                    data[standard_name] = {molf_formula.mz_calc: [molf_formula]}
    -173                #print(formula_s, formula_dict)
    -174                #if molf_formula.ion_type != 'de-protonated':
    -175                #    print( 'ha', molf_formula.ion_type )
    -176                #print(formula_dict)
    -177                #print(row['c1'], row['c2'])
    -178        
    -179        return data
    -180
    -181    def from_bruker_ref_file(self) -> List[MolecularFormula]:
    -182        """Create a list of MolecularFormula objects from Bruker reference file
    +            
     69class ImportMassListRef:  # Thread
    + 70    """Import Mass List from Reference File
    + 71
    + 72    Parameters
    + 73    ----------
    + 74    ref_file_location : str
    + 75        path to the reference file
    + 76
    + 77    Attributes
    + 78    ----------
    + 79    ref_file_location : str
    + 80        path to the reference file
    + 81
    + 82    Methods
    + 83    -------
    + 84    * molecular_formula_ref(mz, molecular_formula).
    + 85        Return MolecularFormulaLinkProxy object
    + 86    * from_lcms_lib_file(ion_charge, ion_types).
    + 87        Return Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file
    + 88    * from_bruker_ref_file().
    + 89        Return List[MolecularFormula] from Bruker reference file
    + 90    * from_corems_ref_file(delimiter).
    + 91        Return List[MolecularFormula] from CoreMS reference file
    + 92    * split(delimiters, string, maxsplit).
    + 93        Splits a string using a list of delimiters.
    + 94    * mformula_s_to_dict(s_mformulatring, iontype).
    + 95        Converts a molecular formula string to a dict
    + 96    """
    + 97
    + 98    def __init__(self, ref_file_location):
    + 99        # Thread.__init__(self)
    +100
    +101        self.ref_file_location = Path(ref_file_location)
    +102
    +103        if not self.ref_file_location.exists():
    +104            tb = sys.exc_info()[2]
    +105            raise FileNotFoundError(ref_file_location).with_traceback(tb)
    +106
    +107    def molecular_formula_ref(self, mz, molecular_formula):
    +108        """Instantiate a MolecularFormulaLinkProxy object
    +109
    +110        Parameters
    +111        ----------
    +112        mz : float
    +113            target m/z
    +114        molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    +115            corems MolecularFormula or LCMSLibRefMolecularFormula object
    +116
    +117        Returns
    +118        -------
    +119        MolecularFormulaLinkProxy
    +120            MolecularFormulaLinkProxy object
    +121        """
    +122        return MolecularFormulaLinkProxy(molecular_formula, mz)
    +123
    +124    def from_lcms_lib_file(
    +125        self, ion_charge: float, ion_types: List[str]
    +126    ) -> Dict[str, Dict[float, List[LCMSLibRefMolecularFormula]]]:
    +127        """Create a dictionary of LCMSLibRefMolecularFormula objects from LCMS library reference file
    +128
    +129        Parameters
    +130        ----------
    +131        ion_charge : float
    +132            ion charge
    +133        ion_types : List[str]
    +134            list of ion types
    +135
    +136        Returns
    +137        -------
    +138        Dict
    +139            Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file. m/z is the target m/z; standard_name is the name of the molecular standard mix; MolecularFormula is the corems molecular formula class
    +140        """
    +141
    +142        data = {}
    +143
    +144        with open(self.ref_file_location) as ref_f:
    +145            df = pd.read_csv(ref_f, header=0, encoding="unicode_escape")
    +146
    +147            for index, row in df.iterrows():
    +148                formula_s = row["Neutral Formula"]
    +149                formula_dict = self.mformula_s_to_dict(formula_s, Labels.neutral)
    +150                name = row["Compound Name"]
    +151                kegg_id = row["KEGG ID"]
    +152                standard_name = row["NEW MIX"]
    +153                cas = row["KEGG ID"]
    +154                # print(row["Neutral Formula"], formula_dict)
    +155                molf_formula = LCMSLibRefMolecularFormula(
    +156                    formula_dict,
    +157                    ion_charge,
    +158                    Labels.neutral,
    +159                    name=name,
    +160                    kegg_id=kegg_id,
    +161                    cas=cas,
    +162                )
    +163                # if round(molf_formula.mz_calc, 4) != round(row['Mass Adduct -H'],4):
    +164                #    print(formula_s)
    +165                #    print(round(molf_formula.mz_calc, 4) , round(row['Mass Adduct -H'],4))
    +166
    +167                if standard_name in data.keys():
    +168                    # TODO change it to target ion types and add ion type in the data structure
    +169                    mz_calc = molf_formula.protonated_mz
    +170
    +171                    if mz_calc in data.get(standard_name).keys():
    +172                        data.get(standard_name).get(mz_calc).append(molf_formula)
    +173
    +174                    else:
    +175                        data[standard_name][mz_calc] = [molf_formula]
    +176                else:
    +177                    data[standard_name] = {molf_formula.mz_calc: [molf_formula]}
    +178                # print(formula_s, formula_dict)
    +179                # if molf_formula.ion_type != 'de-protonated':
    +180                #    print( 'ha', molf_formula.ion_type )
    +181                # print(formula_dict)
    +182                # print(row['c1'], row['c2'])
     183
    -184        Returns
    -185        -------
    -186        List[MolecularFormula]
    -187            List of MolecularFormula objects from Bruker reference file
    -188        """
    -189
    -190        import csv
    -191        
    -192        list_mf_obj = []
    -193
    -194        with open(self.ref_file_location) as ref_f:
    -195
    -196            labels = ref_f.readline().strip('\n').split(';')
    -197            
    -198            for line in ref_f.readlines():
    -199                
    -200                if line != '\n':
    -201        
    -202                    list_ref = (line.strip('\n').split(' '))
    -203                    
    -204                    if list_ref[2][-1] == '+': 
    -205                        
    -206                        ion_charge =  int(list_ref[2][:-1])
    -207                    
    -208                    else:
    -209                        
    -210                        ion_charge =  -1* int(list_ref[2][:-1])
    +184        return data
    +185
    +186    def from_bruker_ref_file(self) -> List[MolecularFormula]:
    +187        """Create a list of MolecularFormula objects from Bruker reference file
    +188
    +189        Returns
    +190        -------
    +191        List[MolecularFormula]
    +192            List of MolecularFormula objects from Bruker reference file
    +193        """
    +194
    +195        import csv
    +196
    +197        list_mf_obj = []
    +198
    +199        with open(self.ref_file_location) as ref_f:
    +200            labels = ref_f.readline().strip("\n").split(";")
    +201
    +202            for line in ref_f.readlines():
    +203                if line != "\n":
    +204                    list_ref = line.strip("\n").split(" ")
    +205
    +206                    if list_ref[2][-1] == "+":
    +207                        ion_charge = int(list_ref[2][:-1])
    +208
    +209                    else:
    +210                        ion_charge = -1 * int(list_ref[2][:-1])
     211
    -212                    
    -213                    ion_mol_formula = list_ref[0]
    -214                    mz = float(list_ref[1])
    -215                    formula_dict = self.mformula_s_to_dict(ion_mol_formula)
    -216                    
    -217                    list_mf_obj.append(MolecularFormula(formula_dict, ion_charge, external_mz=mz))
    -218        
    -219        return  list_mf_obj           
    -220
    -221    def from_corems_ref_file(self, delimiter="\t"): #pragma: no cover
    -222        """Create a list of MolecularFormula objects from CoreMS reference file
    -223        
    -224        Not being used
    -225        
    -226        Parameters
    -227        ----------
    -228        delimiter : str
    -229            delimiter used in the reference file
    -230        
    -231        Returns
    -232        -------
    -233        List[MolecularFormula]
    -234            List of MolecularFormula objects from CoreMS reference file
    -235        """
    -236        #not being used
    -237        import csv
    -238
    -239        list_mf_obj = []
    -240
    -241        with open('res/RefMassLists/Crude-Pos-ESI.ref') as ref_f:
    -242
    -243            labels = ref_f.readline().strip('\n').split(delimiter)
    -244            
    +212                    ion_mol_formula = list_ref[0]
    +213                    mz = float(list_ref[1])
    +214                    formula_dict = self.mformula_s_to_dict(ion_mol_formula)
    +215
    +216                    list_mf_obj.append(
    +217                        MolecularFormula(formula_dict, ion_charge, external_mz=mz)
    +218                    )
    +219
    +220        return list_mf_obj
    +221
    +222    def from_corems_ref_file(self, delimiter="\t"):  # pragma: no cover
    +223        """Create a list of MolecularFormula objects from CoreMS reference file
    +224
    +225        Not being used
    +226
    +227        Parameters
    +228        ----------
    +229        delimiter : str
    +230            delimiter used in the reference file
    +231
    +232        Returns
    +233        -------
    +234        List[MolecularFormula]
    +235            List of MolecularFormula objects from CoreMS reference file
    +236        """
    +237        # not being used
    +238        import csv
    +239
    +240        list_mf_obj = []
    +241
    +242        with open("res/RefMassLists/Crude-Pos-ESI.ref") as ref_f:
    +243            labels = ref_f.readline().strip("\n").split(delimiter)
    +244
     245            for line in ref_f.readlines():
    -246                
    -247                if line != '\n':
    -248        
    -249                    list_ref = (line.strip('\n').split(delimiter))
    -250                    
    -251                    formula_string = list_ref[0]
    -252                    ion_charge = int(list_ref[1])
    -253                    ion_type = list_ref[2]
    -254
    -255                    molform = MolecularFormula(formula_string, ion_charge, ion_type=ion_type)
    -256                    
    +246                if line != "\n":
    +247                    list_ref = line.strip("\n").split(delimiter)
    +248
    +249                    formula_string = list_ref[0]
    +250                    ion_charge = int(list_ref[1])
    +251                    ion_type = list_ref[2]
    +252
    +253                    molform = MolecularFormula(
    +254                        formula_string, ion_charge, ion_type=ion_type
    +255                    )
    +256
     257                    list_mf_obj.append(self.molecular_formula_ref(molform))
     258
    -259        return  list_mf_obj           
    +259        return list_mf_obj
     260
    -261
    -262    def split(self, delimiters, string, maxsplit=0): #pragma: no cover
    -263        """Splits a string using a list of delimiters.
    -264        
    -265        Does not work when formula has atoms with same characters, i.e - C10H21NNa
    -266        
    -267        Parameters
    -268        ----------
    -269        delimiters : list
    -270            list of delimiters
    -271        string : str
    -272            string to be split
    -273        maxsplit : int, optional
    -274            maximum number of splits. Default is 0
    -275            
    -276        Returns
    -277        -------
    -278        list
    -279            list of strings obtained after splitting the string
    -280        list
    -281            list of counts obtained after splitting the string
    -282        """
    -283        regexPattern = '|'.join(map(re.escape, delimiters)) #pragma: no cover
    -284        isotopes = re.findall(regexPattern, string) #pragma: no cover
    -285        counts = re.split(regexPattern, string, maxsplit)  #pragma: no cover
    -286        return isotopes, counts
    -287
    -288    def mformula_s_to_dict(self, s_mformulatring, iontype='unknown'):
    -289        """Converts a molecular formula string to a dict
    -290        
    -291        Parameters
    -292        ----------
    -293        s_mformulatring : str
    -294            molecular formula string, i.e. 'C10H21NNa'
    -295        iontype : str, optional
    -296            ion type. Default is 'unknown'
    -297        
    -298        Returns
    -299        -------
    -300        dict
    -301            molecular formula dictionary
    -302
    -303        Notes
    -304        -----
    -305        Does not work if the atomic mass number is passed i.e. 37Cl, 81Br, convention follow the light isotope labeling 35Cl is Cl, 12C is C, etc.
    -306        If you need to use heavy isotopes please use another reference file format that separate the formula string by a blank space and parse it using the function corems_ref_file
    -307
    -308        Raises
    -309        ------
    -310        TypeError
    -311            Atom does not exist in Atoms.atoms_order list
    -312        Exception
    -313            Empty molecular formula
    -314        """
    -315        if s_mformulatring:
    -316            
    -317            #find the case C122
    -318            all_atoms = re.findall(r'[A-Z]{1}[0-9]{1,10000}', s_mformulatring)
    -319            
    -320            #find the case Br2
    -321            all_atoms2 = re.findall(r'[A-Z]{1}[a-z]{1}[0-9]{1,10000}', s_mformulatring)
    -322            #find the case N
    -323            single_digit_atoms_one = re.findall(r'[A-Z]{1}(?![0-9])(?![a-z])', s_mformulatring)
    -324            #print(single_digit_atoms_one)
    -325            #find the case Na
    -326            due_digit_atoms_one = re.findall(r'[A-Z]{1}[a-z]{1}(?![0-9])', s_mformulatring)
    -327            
    -328            all_atoms = all_atoms + all_atoms2 + due_digit_atoms_one +single_digit_atoms_one
    -329            
    -330            dict_res = {}
    -331            
    -332            for each_atom_count in all_atoms:
    -333                
    -334                
    -335                count = re.findall(r'[0-9]{1,10000}', each_atom_count)
    -336                atom = ''.join(re.findall(r'[A-z]', each_atom_count))
    -337                
    -338                if atom in Atoms.atoms_order:
    -339                    
    -340                    if count:
    -341                        dict_res[atom] = int(count[0])
    -342                    else:
    -343                        dict_res[atom] = 1
    -344                
    -345                else:
    -346                    
    +261    def split(self, delimiters, string, maxsplit=0):  # pragma: no cover
    +262        """Splits a string using a list of delimiters.
    +263
    +264        Does not work when formula has atoms with same characters, i.e - C10H21NNa
    +265
    +266        Parameters
    +267        ----------
    +268        delimiters : list
    +269            list of delimiters
    +270        string : str
    +271            string to be split
    +272        maxsplit : int, optional
    +273            maximum number of splits. Default is 0
    +274
    +275        Returns
    +276        -------
    +277        list
    +278            list of strings obtained after splitting the string
    +279        list
    +280            list of counts obtained after splitting the string
    +281        """
    +282        regexPattern = "|".join(map(re.escape, delimiters))  # pragma: no cover
    +283        isotopes = re.findall(regexPattern, string)  # pragma: no cover
    +284        counts = re.split(regexPattern, string, maxsplit)  # pragma: no cover
    +285        return isotopes, counts
    +286
    +287    def mformula_s_to_dict(self, s_mformulatring, iontype="unknown"):
    +288        """Converts a molecular formula string to a dict
    +289
    +290        Parameters
    +291        ----------
    +292        s_mformulatring : str
    +293            molecular formula string, i.e. 'C10H21NNa'
    +294        iontype : str, optional
    +295            ion type. Default is 'unknown'
    +296
    +297        Returns
    +298        -------
    +299        dict
    +300            molecular formula dictionary
    +301
    +302        Notes
    +303        -----
    +304        Does not work if the atomic mass number is passed i.e. 37Cl, 81Br, convention follow the light isotope labeling 35Cl is Cl, 12C is C, etc.
    +305        If you need to use heavy isotopes please use another reference file format that separate the formula string by a blank space and parse it using the function corems_ref_file
    +306
    +307        Raises
    +308        ------
    +309        TypeError
    +310            Atom does not exist in Atoms.atoms_order list
    +311        Exception
    +312            Empty molecular formula
    +313        """
    +314        if s_mformulatring:
    +315            # find the case C122
    +316            all_atoms = re.findall(r"[A-Z]{1}[0-9]{1,10000}", s_mformulatring)
    +317
    +318            # find the case Br2
    +319            all_atoms2 = re.findall(r"[A-Z]{1}[a-z]{1}[0-9]{1,10000}", s_mformulatring)
    +320            # find the case N
    +321            single_digit_atoms_one = re.findall(
    +322                r"[A-Z]{1}(?![0-9])(?![a-z])", s_mformulatring
    +323            )
    +324            # print(single_digit_atoms_one)
    +325            # find the case Na
    +326            due_digit_atoms_one = re.findall(
    +327                r"[A-Z]{1}[a-z]{1}(?![0-9])", s_mformulatring
    +328            )
    +329
    +330            all_atoms = (
    +331                all_atoms + all_atoms2 + due_digit_atoms_one + single_digit_atoms_one
    +332            )
    +333
    +334            dict_res = {}
    +335
    +336            for each_atom_count in all_atoms:
    +337                count = re.findall(r"[0-9]{1,10000}", each_atom_count)
    +338                atom = "".join(re.findall(r"[A-z]", each_atom_count))
    +339
    +340                if atom in Atoms.atoms_order:
    +341                    if count:
    +342                        dict_res[atom] = int(count[0])
    +343                    else:
    +344                        dict_res[atom] = 1
    +345
    +346                else:
     347                    tb = sys.exc_info()[2]
    -348                    raise TypeError("Atom %s does not exist in Atoms.atoms_order list" % atom).with_traceback(tb)
    -349            
    -350            dict_res[Labels.ion_type]  = iontype
    +348                    raise TypeError(
    +349                        "Atom %s does not exist in Atoms.atoms_order list" % atom
    +350                    ).with_traceback(tb)
     351
    -352            return dict_res
    -353        
    -354        else: 
    -355            
    -356            tb = sys.exc_info()[2]
    -357            raise Exception('Empty molecular formula').with_traceback(tb)
    +352            dict_res[Labels.ion_type] = iontype
    +353
    +354            return dict_res
    +355
    +356        else:
    +357            tb = sys.exc_info()[2]
    +358            raise Exception("Empty molecular formula").with_traceback(tb)
     
    @@ -1024,7 +1017,7 @@
    Attributes
    Methods
      -
    • molecular_formula_ref(mz, molecular_formula). +
    • molecular_formula_ref(mz, molecular_formula). Return MolecularFormulaLinkProxy object
    • from_lcms_lib_file(ion_charge, ion_types). Return Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file
    • @@ -1050,15 +1043,14 @@
      Methods
    -
     95    def __init__(self, ref_file_location) :
    - 96            
    - 97            #Thread.__init__(self)
    - 98            
    - 99            self.ref_file_location = Path(ref_file_location)
    -100            
    -101            if not self.ref_file_location.exists():
    -102                tb = sys.exc_info()[2]
    -103                raise FileNotFoundError(ref_file_location).with_traceback(tb)
    +            
     98    def __init__(self, ref_file_location):
    + 99        # Thread.__init__(self)
    +100
    +101        self.ref_file_location = Path(ref_file_location)
    +102
    +103        if not self.ref_file_location.exists():
    +104            tb = sys.exc_info()[2]
    +105            raise FileNotFoundError(ref_file_location).with_traceback(tb)
     
    @@ -1087,22 +1079,22 @@
    Methods
    -
    105    def molecular_formula_ref( self, mz, molecular_formula):
    -106        """Instantiate a MolecularFormulaLinkProxy object
    -107
    -108        Parameters
    -109        ----------
    -110        mz : float
    -111            target m/z
    -112        molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    -113            corems MolecularFormula or LCMSLibRefMolecularFormula object
    -114        
    -115        Returns
    -116        -------
    -117        MolecularFormulaLinkProxy
    -118            MolecularFormulaLinkProxy object
    -119        """        
    -120        return MolecularFormulaLinkProxy(molecular_formula, mz)
    +            
    107    def molecular_formula_ref(self, mz, molecular_formula):
    +108        """Instantiate a MolecularFormulaLinkProxy object
    +109
    +110        Parameters
    +111        ----------
    +112        mz : float
    +113            target m/z
    +114        molecular_formula : MolecularFormula | LCMSLibRefMolecularFormula
    +115            corems MolecularFormula or LCMSLibRefMolecularFormula object
    +116
    +117        Returns
    +118        -------
    +119        MolecularFormulaLinkProxy
    +120            MolecularFormulaLinkProxy object
    +121        """
    +122        return MolecularFormulaLinkProxy(molecular_formula, mz)
     
    @@ -1137,64 +1129,67 @@
    Returns
    -
    122    def from_lcms_lib_file(self, ion_charge: float, ion_types: List[str]) -> Dict [str, Dict[float, List[LCMSLibRefMolecularFormula] ] ]:
    -123        """Create a dictionary of LCMSLibRefMolecularFormula objects from LCMS library reference file
    -124        
    -125        Parameters
    -126        ----------
    -127        ion_charge : float
    -128            ion charge
    -129        ion_types : List[str]
    -130            list of ion types
    -131        
    -132        Returns
    -133        -------
    -134        Dict 
    -135            Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file. m/z is the target m/z; standard_name is the name of the molecular standard mix; MolecularFormula is the corems molecular formula class
    -136        """
    -137       
    -138        data = {}
    -139        
    -140        with open(self.ref_file_location) as ref_f:
    -141            
    -142            df = pd.read_csv(ref_f, header=0,  encoding= 'unicode_escape')
    -143            
    -144            for index, row in df.iterrows():
    -145                
    -146                formula_s = row["Neutral Formula"]
    -147                formula_dict = self.mformula_s_to_dict(formula_s, Labels.neutral)
    -148                name = row["Compound Name"]
    -149                kegg_id = row["KEGG ID"]
    -150                standard_name = row["NEW MIX"]
    -151                cas = row["KEGG ID"]
    -152                #print(row["Neutral Formula"], formula_dict)
    -153                molf_formula = LCMSLibRefMolecularFormula(formula_dict, ion_charge, Labels.neutral, 
    -154                                                name=name, kegg_id=kegg_id, cas=cas)
    -155                #if round(molf_formula.mz_calc, 4) != round(row['Mass Adduct -H'],4):
    -156                #    print(formula_s)
    -157                #    print(round(molf_formula.mz_calc, 4) , round(row['Mass Adduct -H'],4))
    -158        
    -159                if standard_name in data.keys():
    -160
    -161                    #TODO change it to target ion types and add ion type in the data structure   
    -162                    mz_calc = molf_formula.protonated_mz
    -163                    
    -164                    if mz_calc in data.get(standard_name).keys():
    -165                       
    -166                       data.get(standard_name).get(mz_calc).append(molf_formula)
    -167                    
    -168                    else:   
    -169                        data[standard_name][mz_calc] = [molf_formula]
    -170                else:
    -171                    
    -172                    data[standard_name] = {molf_formula.mz_calc: [molf_formula]}
    -173                #print(formula_s, formula_dict)
    -174                #if molf_formula.ion_type != 'de-protonated':
    -175                #    print( 'ha', molf_formula.ion_type )
    -176                #print(formula_dict)
    -177                #print(row['c1'], row['c2'])
    -178        
    -179        return data
    +            
    124    def from_lcms_lib_file(
    +125        self, ion_charge: float, ion_types: List[str]
    +126    ) -> Dict[str, Dict[float, List[LCMSLibRefMolecularFormula]]]:
    +127        """Create a dictionary of LCMSLibRefMolecularFormula objects from LCMS library reference file
    +128
    +129        Parameters
    +130        ----------
    +131        ion_charge : float
    +132            ion charge
    +133        ion_types : List[str]
    +134            list of ion types
    +135
    +136        Returns
    +137        -------
    +138        Dict
    +139            Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file. m/z is the target m/z; standard_name is the name of the molecular standard mix; MolecularFormula is the corems molecular formula class
    +140        """
    +141
    +142        data = {}
    +143
    +144        with open(self.ref_file_location) as ref_f:
    +145            df = pd.read_csv(ref_f, header=0, encoding="unicode_escape")
    +146
    +147            for index, row in df.iterrows():
    +148                formula_s = row["Neutral Formula"]
    +149                formula_dict = self.mformula_s_to_dict(formula_s, Labels.neutral)
    +150                name = row["Compound Name"]
    +151                kegg_id = row["KEGG ID"]
    +152                standard_name = row["NEW MIX"]
    +153                cas = row["KEGG ID"]
    +154                # print(row["Neutral Formula"], formula_dict)
    +155                molf_formula = LCMSLibRefMolecularFormula(
    +156                    formula_dict,
    +157                    ion_charge,
    +158                    Labels.neutral,
    +159                    name=name,
    +160                    kegg_id=kegg_id,
    +161                    cas=cas,
    +162                )
    +163                # if round(molf_formula.mz_calc, 4) != round(row['Mass Adduct -H'],4):
    +164                #    print(formula_s)
    +165                #    print(round(molf_formula.mz_calc, 4) , round(row['Mass Adduct -H'],4))
    +166
    +167                if standard_name in data.keys():
    +168                    # TODO change it to target ion types and add ion type in the data structure
    +169                    mz_calc = molf_formula.protonated_mz
    +170
    +171                    if mz_calc in data.get(standard_name).keys():
    +172                        data.get(standard_name).get(mz_calc).append(molf_formula)
    +173
    +174                    else:
    +175                        data[standard_name][mz_calc] = [molf_formula]
    +176                else:
    +177                    data[standard_name] = {molf_formula.mz_calc: [molf_formula]}
    +178                # print(formula_s, formula_dict)
    +179                # if molf_formula.ion_type != 'de-protonated':
    +180                #    print( 'ha', molf_formula.ion_type )
    +181                # print(formula_dict)
    +182                # print(row['c1'], row['c2'])
    +183
    +184        return data
     
    @@ -1229,45 +1224,41 @@
    Returns
    -
    181    def from_bruker_ref_file(self) -> List[MolecularFormula]:
    -182        """Create a list of MolecularFormula objects from Bruker reference file
    -183
    -184        Returns
    -185        -------
    -186        List[MolecularFormula]
    -187            List of MolecularFormula objects from Bruker reference file
    -188        """
    -189
    -190        import csv
    -191        
    -192        list_mf_obj = []
    -193
    -194        with open(self.ref_file_location) as ref_f:
    -195
    -196            labels = ref_f.readline().strip('\n').split(';')
    -197            
    -198            for line in ref_f.readlines():
    -199                
    -200                if line != '\n':
    -201        
    -202                    list_ref = (line.strip('\n').split(' '))
    -203                    
    -204                    if list_ref[2][-1] == '+': 
    -205                        
    -206                        ion_charge =  int(list_ref[2][:-1])
    -207                    
    -208                    else:
    -209                        
    -210                        ion_charge =  -1* int(list_ref[2][:-1])
    +            
    186    def from_bruker_ref_file(self) -> List[MolecularFormula]:
    +187        """Create a list of MolecularFormula objects from Bruker reference file
    +188
    +189        Returns
    +190        -------
    +191        List[MolecularFormula]
    +192            List of MolecularFormula objects from Bruker reference file
    +193        """
    +194
    +195        import csv
    +196
    +197        list_mf_obj = []
    +198
    +199        with open(self.ref_file_location) as ref_f:
    +200            labels = ref_f.readline().strip("\n").split(";")
    +201
    +202            for line in ref_f.readlines():
    +203                if line != "\n":
    +204                    list_ref = line.strip("\n").split(" ")
    +205
    +206                    if list_ref[2][-1] == "+":
    +207                        ion_charge = int(list_ref[2][:-1])
    +208
    +209                    else:
    +210                        ion_charge = -1 * int(list_ref[2][:-1])
     211
    -212                    
    -213                    ion_mol_formula = list_ref[0]
    -214                    mz = float(list_ref[1])
    -215                    formula_dict = self.mformula_s_to_dict(ion_mol_formula)
    -216                    
    -217                    list_mf_obj.append(MolecularFormula(formula_dict, ion_charge, external_mz=mz))
    -218        
    -219        return  list_mf_obj           
    +212                    ion_mol_formula = list_ref[0]
    +213                    mz = float(list_ref[1])
    +214                    formula_dict = self.mformula_s_to_dict(ion_mol_formula)
    +215
    +216                    list_mf_obj.append(
    +217                        MolecularFormula(formula_dict, ion_charge, external_mz=mz)
    +218                    )
    +219
    +220        return list_mf_obj
     
    @@ -1293,45 +1284,44 @@
    Returns
    -
    221    def from_corems_ref_file(self, delimiter="\t"): #pragma: no cover
    -222        """Create a list of MolecularFormula objects from CoreMS reference file
    -223        
    -224        Not being used
    -225        
    -226        Parameters
    -227        ----------
    -228        delimiter : str
    -229            delimiter used in the reference file
    -230        
    -231        Returns
    -232        -------
    -233        List[MolecularFormula]
    -234            List of MolecularFormula objects from CoreMS reference file
    -235        """
    -236        #not being used
    -237        import csv
    -238
    -239        list_mf_obj = []
    -240
    -241        with open('res/RefMassLists/Crude-Pos-ESI.ref') as ref_f:
    -242
    -243            labels = ref_f.readline().strip('\n').split(delimiter)
    -244            
    +            
    222    def from_corems_ref_file(self, delimiter="\t"):  # pragma: no cover
    +223        """Create a list of MolecularFormula objects from CoreMS reference file
    +224
    +225        Not being used
    +226
    +227        Parameters
    +228        ----------
    +229        delimiter : str
    +230            delimiter used in the reference file
    +231
    +232        Returns
    +233        -------
    +234        List[MolecularFormula]
    +235            List of MolecularFormula objects from CoreMS reference file
    +236        """
    +237        # not being used
    +238        import csv
    +239
    +240        list_mf_obj = []
    +241
    +242        with open("res/RefMassLists/Crude-Pos-ESI.ref") as ref_f:
    +243            labels = ref_f.readline().strip("\n").split(delimiter)
    +244
     245            for line in ref_f.readlines():
    -246                
    -247                if line != '\n':
    -248        
    -249                    list_ref = (line.strip('\n').split(delimiter))
    -250                    
    -251                    formula_string = list_ref[0]
    -252                    ion_charge = int(list_ref[1])
    -253                    ion_type = list_ref[2]
    -254
    -255                    molform = MolecularFormula(formula_string, ion_charge, ion_type=ion_type)
    -256                    
    +246                if line != "\n":
    +247                    list_ref = line.strip("\n").split(delimiter)
    +248
    +249                    formula_string = list_ref[0]
    +250                    ion_charge = int(list_ref[1])
    +251                    ion_type = list_ref[2]
    +252
    +253                    molform = MolecularFormula(
    +254                        formula_string, ion_charge, ion_type=ion_type
    +255                    )
    +256
     257                    list_mf_obj.append(self.molecular_formula_ref(molform))
     258
    -259        return  list_mf_obj           
    +259        return list_mf_obj
     
    @@ -1366,31 +1356,31 @@
    Returns
    -
    262    def split(self, delimiters, string, maxsplit=0): #pragma: no cover
    -263        """Splits a string using a list of delimiters.
    -264        
    -265        Does not work when formula has atoms with same characters, i.e - C10H21NNa
    -266        
    -267        Parameters
    -268        ----------
    -269        delimiters : list
    -270            list of delimiters
    -271        string : str
    -272            string to be split
    -273        maxsplit : int, optional
    -274            maximum number of splits. Default is 0
    -275            
    -276        Returns
    -277        -------
    -278        list
    -279            list of strings obtained after splitting the string
    -280        list
    -281            list of counts obtained after splitting the string
    -282        """
    -283        regexPattern = '|'.join(map(re.escape, delimiters)) #pragma: no cover
    -284        isotopes = re.findall(regexPattern, string) #pragma: no cover
    -285        counts = re.split(regexPattern, string, maxsplit)  #pragma: no cover
    -286        return isotopes, counts
    +            
    261    def split(self, delimiters, string, maxsplit=0):  # pragma: no cover
    +262        """Splits a string using a list of delimiters.
    +263
    +264        Does not work when formula has atoms with same characters, i.e - C10H21NNa
    +265
    +266        Parameters
    +267        ----------
    +268        delimiters : list
    +269            list of delimiters
    +270        string : str
    +271            string to be split
    +272        maxsplit : int, optional
    +273            maximum number of splits. Default is 0
    +274
    +275        Returns
    +276        -------
    +277        list
    +278            list of strings obtained after splitting the string
    +279        list
    +280            list of counts obtained after splitting the string
    +281        """
    +282        regexPattern = "|".join(map(re.escape, delimiters))  # pragma: no cover
    +283        isotopes = re.findall(regexPattern, string)  # pragma: no cover
    +284        counts = re.split(regexPattern, string, maxsplit)  # pragma: no cover
    +285        return isotopes, counts
     
    @@ -1430,76 +1420,78 @@
    Returns
    -
    288    def mformula_s_to_dict(self, s_mformulatring, iontype='unknown'):
    -289        """Converts a molecular formula string to a dict
    -290        
    -291        Parameters
    -292        ----------
    -293        s_mformulatring : str
    -294            molecular formula string, i.e. 'C10H21NNa'
    -295        iontype : str, optional
    -296            ion type. Default is 'unknown'
    -297        
    -298        Returns
    -299        -------
    -300        dict
    -301            molecular formula dictionary
    -302
    -303        Notes
    -304        -----
    -305        Does not work if the atomic mass number is passed i.e. 37Cl, 81Br, convention follow the light isotope labeling 35Cl is Cl, 12C is C, etc.
    -306        If you need to use heavy isotopes please use another reference file format that separate the formula string by a blank space and parse it using the function corems_ref_file
    -307
    -308        Raises
    -309        ------
    -310        TypeError
    -311            Atom does not exist in Atoms.atoms_order list
    -312        Exception
    -313            Empty molecular formula
    -314        """
    -315        if s_mformulatring:
    -316            
    -317            #find the case C122
    -318            all_atoms = re.findall(r'[A-Z]{1}[0-9]{1,10000}', s_mformulatring)
    -319            
    -320            #find the case Br2
    -321            all_atoms2 = re.findall(r'[A-Z]{1}[a-z]{1}[0-9]{1,10000}', s_mformulatring)
    -322            #find the case N
    -323            single_digit_atoms_one = re.findall(r'[A-Z]{1}(?![0-9])(?![a-z])', s_mformulatring)
    -324            #print(single_digit_atoms_one)
    -325            #find the case Na
    -326            due_digit_atoms_one = re.findall(r'[A-Z]{1}[a-z]{1}(?![0-9])', s_mformulatring)
    -327            
    -328            all_atoms = all_atoms + all_atoms2 + due_digit_atoms_one +single_digit_atoms_one
    -329            
    -330            dict_res = {}
    -331            
    -332            for each_atom_count in all_atoms:
    -333                
    -334                
    -335                count = re.findall(r'[0-9]{1,10000}', each_atom_count)
    -336                atom = ''.join(re.findall(r'[A-z]', each_atom_count))
    -337                
    -338                if atom in Atoms.atoms_order:
    -339                    
    -340                    if count:
    -341                        dict_res[atom] = int(count[0])
    -342                    else:
    -343                        dict_res[atom] = 1
    -344                
    -345                else:
    -346                    
    +            
    287    def mformula_s_to_dict(self, s_mformulatring, iontype="unknown"):
    +288        """Converts a molecular formula string to a dict
    +289
    +290        Parameters
    +291        ----------
    +292        s_mformulatring : str
    +293            molecular formula string, i.e. 'C10H21NNa'
    +294        iontype : str, optional
    +295            ion type. Default is 'unknown'
    +296
    +297        Returns
    +298        -------
    +299        dict
    +300            molecular formula dictionary
    +301
    +302        Notes
    +303        -----
    +304        Does not work if the atomic mass number is passed i.e. 37Cl, 81Br, convention follow the light isotope labeling 35Cl is Cl, 12C is C, etc.
    +305        If you need to use heavy isotopes please use another reference file format that separate the formula string by a blank space and parse it using the function corems_ref_file
    +306
    +307        Raises
    +308        ------
    +309        TypeError
    +310            Atom does not exist in Atoms.atoms_order list
    +311        Exception
    +312            Empty molecular formula
    +313        """
    +314        if s_mformulatring:
    +315            # find the case C122
    +316            all_atoms = re.findall(r"[A-Z]{1}[0-9]{1,10000}", s_mformulatring)
    +317
    +318            # find the case Br2
    +319            all_atoms2 = re.findall(r"[A-Z]{1}[a-z]{1}[0-9]{1,10000}", s_mformulatring)
    +320            # find the case N
    +321            single_digit_atoms_one = re.findall(
    +322                r"[A-Z]{1}(?![0-9])(?![a-z])", s_mformulatring
    +323            )
    +324            # print(single_digit_atoms_one)
    +325            # find the case Na
    +326            due_digit_atoms_one = re.findall(
    +327                r"[A-Z]{1}[a-z]{1}(?![0-9])", s_mformulatring
    +328            )
    +329
    +330            all_atoms = (
    +331                all_atoms + all_atoms2 + due_digit_atoms_one + single_digit_atoms_one
    +332            )
    +333
    +334            dict_res = {}
    +335
    +336            for each_atom_count in all_atoms:
    +337                count = re.findall(r"[0-9]{1,10000}", each_atom_count)
    +338                atom = "".join(re.findall(r"[A-z]", each_atom_count))
    +339
    +340                if atom in Atoms.atoms_order:
    +341                    if count:
    +342                        dict_res[atom] = int(count[0])
    +343                    else:
    +344                        dict_res[atom] = 1
    +345
    +346                else:
     347                    tb = sys.exc_info()[2]
    -348                    raise TypeError("Atom %s does not exist in Atoms.atoms_order list" % atom).with_traceback(tb)
    -349            
    -350            dict_res[Labels.ion_type]  = iontype
    +348                    raise TypeError(
    +349                        "Atom %s does not exist in Atoms.atoms_order list" % atom
    +350                    ).with_traceback(tb)
     351
    -352            return dict_res
    -353        
    -354        else: 
    -355            
    -356            tb = sys.exc_info()[2]
    -357            raise Exception('Empty molecular formula').with_traceback(tb)
    +352            dict_res[Labels.ion_type] = iontype
    +353
    +354            return dict_res
    +355
    +356        else:
    +357            tb = sys.exc_info()[2]
    +358            raise Exception("Empty molecular formula").with_traceback(tb)
     
    diff --git a/docs/corems/molecular_id/calc.html b/docs/corems/molecular_id/calc.html index 8d320d38..ac5fdf3b 100644 --- a/docs/corems/molecular_id/calc.html +++ b/docs/corems/molecular_id/calc.html @@ -30,7 +30,6 @@

    Submodules

    • ClusterFilter
    • -
    • KendrickGroup
    • MolecularFilter
    • SpectralSimilarity
    • math_distance
    • diff --git a/docs/corems/molecular_id/calc/ClusterFilter.html b/docs/corems/molecular_id/calc/ClusterFilter.html index 7c8f5dea..b7715494 100644 --- a/docs/corems/molecular_id/calc/ClusterFilter.html +++ b/docs/corems/molecular_id/calc/ClusterFilter.html @@ -72,257 +72,257 @@

      -
        1from sklearn.cluster import DBSCAN
      -  2from sklearn.preprocessing import StandardScaler
      -  3from sklearn.cluster import MeanShift, estimate_bandwidth
      +                        
        1import pandas as pd
      +  2from sklearn.cluster import DBSCAN
      +  3from sklearn.preprocessing import StandardScaler
         4
      -  5
      +  5# import matplotlib.pyplot as plt
         6
      -  7import numpy as np
      -  8import pandas as pd
      -  9#import matplotlib.pyplot as plt
      +  7
      +  8class ClusteringFilter:
      +  9    """Class for filtering and clustering mass spectra data using various algorithms.
        10
      - 11class ClusteringFilter():
      - 12    """ Class for filtering and clustering mass spectra data using various algorithms.
      - 13
      - 14    Attributes
      - 15    -------
      - 16    mass_spectrum : MassSpectrum
      - 17        Mass spectrum object.
      - 18    ms_peaks : list
      - 19        List of mass peaks.
      - 20    ms_peak_indexes : list
      - 21        List of peak indexes.
      - 22    min_samples : int
      - 23        Minimum number of samples in a cluster.
      - 24    eps : float
      - 25        The maximum distance between two samples for one to be considered as in the neighborhood of the other.
      - 26    bandwidth : float
      - 27        Bandwidth used in MeanShift algorithm.
      - 28    quantile : float
      - 29        Quantile used in estimate_bandwidth function.
      - 30    n_samples : int
      - 31        Number of samples used in estimate_bandwidth function.
      - 32    bin_seeding : bool
      - 33        If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized.
      - 34    min_peaks_per_class : int
      - 35        Minimum number of peaks per class.
      - 36        
      - 37    Methods
      - 38    -------
      - 39    * get_mass_error_matrix_data(ms_peaks).
      - 40        Get the mass error matrix data from a list of mass peaks.  
      - 41    * get_kendrick_matrix_data(mass_spectrum).
      - 42        Get the Kendrick matrix data from a mass spectrum.  
      - 43    * filter_kendrick(mass_spectrum).
      - 44        Filter the mass spectrum data using the Kendrick algorithm.  
      - 45    * filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj).
      - 46        Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.  
      - 47    * remove_assignment_by_mass_error(mass_spectrum).
      - 48        Remove assignments from the mass spectrum based on mass error.  
      - 49    
      - 50
      - 51    """
      - 52    def get_mass_error_matrix_data(self, ms_peaks):
      - 53        """Get the mass error matrix data from a list of mass peaks.
      - 54
      - 55        Parameters
      - 56        ----------
      - 57        ms_peaks : list
      - 58            List of mass peaks.
      - 59
      - 60        Returns
      - 61        -------
      - 62        matrix_data : ndarray
      - 63            Matrix data containing mass and error values.
      - 64        list_indexes_mass_spec : list 
      - 65            List of indexes of mass peaks in the original mass spectrum.
      - 66        """
      - 67        mass_list = list()
      - 68        error_list = list()
      - 69        list_indexes_mass_spec = []
      - 70        
      - 71        for index, mspeak in enumerate(ms_peaks):
      - 72
      - 73            if mspeak.is_assigned:
      - 74                    
      - 75                #print(mspeak.mz_exp, len(mspeak))
      - 76                for mformula in mspeak:
      - 77                    mass_list.append(mspeak.mz_exp)
      - 78                    error_list.append(mformula.mz_error)
      - 79                    list_indexes_mass_spec.append(index)
      - 80        
      - 81        kendrick_dict = {'mass': mass_list, 'error': error_list}  
      - 82        df = pd.DataFrame(kendrick_dict) 
      - 83        matrix_data = df.values.astype("float32", copy = False)
      - 84        return matrix_data, list_indexes_mass_spec
      - 85
      - 86    def get_kendrick_matrix_data(self, mass_spectrum):
      - 87        """Get the Kendrick matrix data from a mass spectrum.
      - 88
      - 89        Parameters
      - 90        ----------
      - 91        mass_spectrum : MassSpectrum
      - 92            Mass spectrum object.
      - 93
      - 94        Returns
      - 95        -------
      - 96        matrix_data : ndarray
      - 97            Matrix data containing Kendrick mass and Kendrick mass defect values.
      - 98        """
      - 99        km = mass_spectrum.kendrick_mass
      -100        kmd = mass_spectrum.kmd
      -101        kendrick_dict = {'km': km, 'kmd': kmd}  
      -102        df = pd.DataFrame(kendrick_dict) 
      -103        matrix_data = df.values.astype("float32", copy = False)
      -104        return matrix_data
      -105
      -106    def filter_kendrick(self, mass_spectrum):
      -107        """ Filter the mass spectrum data using the Kendrick algorithm.
      -108
      -109        Parameters
      -110        ----------
      -111        mass_spectrum : MassSpectrum 
      -112            Mass spectrum object.
      -113
      -114        """
      -115        matrix_data = self.get_kendrick_matrix_data(mass_spectrum)
      + 11    Attributes
      + 12    -------
      + 13    mass_spectrum : MassSpectrum
      + 14        Mass spectrum object.
      + 15    ms_peaks : list
      + 16        List of mass peaks.
      + 17    ms_peak_indexes : list
      + 18        List of peak indexes.
      + 19    min_samples : int
      + 20        Minimum number of samples in a cluster.
      + 21    eps : float
      + 22        The maximum distance between two samples for one to be considered as in the neighborhood of the other.
      + 23    bandwidth : float
      + 24        Bandwidth used in MeanShift algorithm.
      + 25    quantile : float
      + 26        Quantile used in estimate_bandwidth function.
      + 27    n_samples : int
      + 28        Number of samples used in estimate_bandwidth function.
      + 29    bin_seeding : bool
      + 30        If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized.
      + 31    min_peaks_per_class : int
      + 32        Minimum number of peaks per class.
      + 33
      + 34    Methods
      + 35    -------
      + 36    * get_mass_error_matrix_data(ms_peaks).
      + 37        Get the mass error matrix data from a list of mass peaks.
      + 38    * get_kendrick_matrix_data(mass_spectrum).
      + 39        Get the Kendrick matrix data from a mass spectrum.
      + 40    * filter_kendrick(mass_spectrum).
      + 41        Filter the mass spectrum data using the Kendrick algorithm.
      + 42    * filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj).
      + 43        Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      + 44    * remove_assignment_by_mass_error(mass_spectrum).
      + 45        Remove assignments from the mass spectrum based on mass error.
      + 46
      + 47
      + 48    """
      + 49
      + 50    def get_mass_error_matrix_data(self, ms_peaks):
      + 51        """Get the mass error matrix data from a list of mass peaks.
      + 52
      + 53        Parameters
      + 54        ----------
      + 55        ms_peaks : list
      + 56            List of mass peaks.
      + 57
      + 58        Returns
      + 59        -------
      + 60        matrix_data : ndarray
      + 61            Matrix data containing mass and error values.
      + 62        list_indexes_mass_spec : list
      + 63            List of indexes of mass peaks in the original mass spectrum.
      + 64        """
      + 65        mass_list = list()
      + 66        error_list = list()
      + 67        list_indexes_mass_spec = []
      + 68
      + 69        for index, mspeak in enumerate(ms_peaks):
      + 70            if mspeak.is_assigned:
      + 71                # print(mspeak.mz_exp, len(mspeak))
      + 72                for mformula in mspeak:
      + 73                    mass_list.append(mspeak.mz_exp)
      + 74                    error_list.append(mformula.mz_error)
      + 75                    list_indexes_mass_spec.append(index)
      + 76
      + 77        kendrick_dict = {"mass": mass_list, "error": error_list}
      + 78        df = pd.DataFrame(kendrick_dict)
      + 79        matrix_data = df.values.astype("float32", copy=False)
      + 80        return matrix_data, list_indexes_mass_spec
      + 81
      + 82    def get_kendrick_matrix_data(self, mass_spectrum):
      + 83        """Get the Kendrick matrix data from a mass spectrum.
      + 84
      + 85        Parameters
      + 86        ----------
      + 87        mass_spectrum : MassSpectrum
      + 88            Mass spectrum object.
      + 89
      + 90        Returns
      + 91        -------
      + 92        matrix_data : ndarray
      + 93            Matrix data containing Kendrick mass and Kendrick mass defect values.
      + 94        """
      + 95        km = mass_spectrum.kendrick_mass
      + 96        kmd = mass_spectrum.kmd
      + 97        kendrick_dict = {"km": km, "kmd": kmd}
      + 98        df = pd.DataFrame(kendrick_dict)
      + 99        matrix_data = df.values.astype("float32", copy=False)
      +100        return matrix_data
      +101
      +102    def filter_kendrick(self, mass_spectrum):
      +103        """Filter the mass spectrum data using the Kendrick algorithm.
      +104
      +105        Parameters
      +106        ----------
      +107        mass_spectrum : MassSpectrum
      +108            Mass spectrum object.
      +109
      +110        """
      +111        matrix_data = self.get_kendrick_matrix_data(mass_spectrum)
      +112
      +113        stdscaler = StandardScaler().fit(matrix_data)
      +114
      +115        matrix_data_scaled = stdscaler.transform(matrix_data)
       116
      -117        stdscaler = StandardScaler().fit(matrix_data)
      -118        
      -119        matrix_data_scaled = stdscaler.transform(matrix_data)
      -120
      -121        clusters = DBSCAN(eps = .75, min_samples=50).fit_predict(matrix_data_scaled)
      -122        
      -123        # Number of clusters in labels, ignoring noise if present.
      -124        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      -125        n_noise_ = list(clusters).count(-1)
      -126        
      -127        indexes = []
      -128        for i in range(len(clusters)):
      -129            if clusters[i] == -1:
      -130                indexes.append(i)
      -131        
      -132        if mass_spectrum.parameters.mass_spectrum.verbose_processing:
      -133            print('Estimated number of clusters: %d' % n_clusters_)
      -134            print('Estimated number of noise points: %d' % n_noise_)
      -135        mass_spectrum.filter_by_index(indexes)
      -136        #from matplotlib import pyplot as plt
      -137        #plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      -138        #plt.xlabel("km")
      -139        #plt.ylabel("kmd")
      -140        #plt.show()
      -141        #plt.close()
      -142
      -143    def filter_kendrick_by_index(self, ms_peak_indexes, mass_spectrum_obj):
      -144        """ Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      -145
      -146        Parameters
      -147        ----------
      -148        ms_peak_indexes : list 
      -149            List of peak indexes.
      -150        mass_spectrum_obj : MassSpectrum 
      -151            Mass spectrum object.
      -152
      -153        Returns
      -154        -------
      -155        noise_idx : list 
      -156            List of indexes of noise points in the mass spectrum.
      -157        """
      -158        min_samples = mass_spectrum_obj.molecular_search_settings.min_peaks_per_class
      -159
      -160        kendrick_dict = {'km': list(), 'kmd': list()}  
      -161
      -162        if len(ms_peak_indexes) <= 1: return []
      -163        
      -164        for index, _ in ms_peak_indexes:
      -165           kendrick_dict["km"].append(mass_spectrum_obj[index].kendrick_mass)
      -166           kendrick_dict["kmd"].append(mass_spectrum_obj[index].kmd)
      -167           
      -168        # check min data points otherwise StandardScaler().fit(0 will fail
      -169        
      -170        df = pd.DataFrame(kendrick_dict) 
      -171        matrix_data = df.values.astype("float32", copy = False)
      +117        clusters = DBSCAN(eps=0.75, min_samples=50).fit_predict(matrix_data_scaled)
      +118
      +119        # Number of clusters in labels, ignoring noise if present.
      +120        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      +121        n_noise_ = list(clusters).count(-1)
      +122
      +123        indexes = []
      +124        for i in range(len(clusters)):
      +125            if clusters[i] == -1:
      +126                indexes.append(i)
      +127
      +128        if mass_spectrum.parameters.mass_spectrum.verbose_processing:
      +129            print("Estimated number of clusters: %d" % n_clusters_)
      +130            print("Estimated number of noise points: %d" % n_noise_)
      +131        mass_spectrum.filter_by_index(indexes)
      +132        # from matplotlib import pyplot as plt
      +133        # plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      +134        # plt.xlabel("km")
      +135        # plt.ylabel("kmd")
      +136        # plt.show()
      +137        # plt.close()
      +138
      +139    def filter_kendrick_by_index(self, ms_peak_indexes, mass_spectrum_obj):
      +140        """Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      +141
      +142        Parameters
      +143        ----------
      +144        ms_peak_indexes : list
      +145            List of peak indexes.
      +146        mass_spectrum_obj : MassSpectrum
      +147            Mass spectrum object.
      +148
      +149        Returns
      +150        -------
      +151        noise_idx : list
      +152            List of indexes of noise points in the mass spectrum.
      +153        """
      +154        min_samples = mass_spectrum_obj.molecular_search_settings.min_peaks_per_class
      +155
      +156        kendrick_dict = {"km": list(), "kmd": list()}
      +157
      +158        if len(ms_peak_indexes) <= 1:
      +159            return []
      +160
      +161        for index, _ in ms_peak_indexes:
      +162            kendrick_dict["km"].append(mass_spectrum_obj[index].kendrick_mass)
      +163            kendrick_dict["kmd"].append(mass_spectrum_obj[index].kmd)
      +164
      +165        # check min data points otherwise StandardScaler().fit(0 will fail
      +166
      +167        df = pd.DataFrame(kendrick_dict)
      +168        matrix_data = df.values.astype("float32", copy=False)
      +169
      +170        stdscaler = StandardScaler().fit(matrix_data)
      +171        matrix_data_scaled = stdscaler.transform(matrix_data)
       172
      -173        stdscaler = StandardScaler().fit(matrix_data)
      -174        matrix_data_scaled = stdscaler.transform(matrix_data)
      -175
      -176        clusters = DBSCAN(eps = .8, min_samples=min_samples).fit_predict(matrix_data_scaled)
      -177        
      -178        # Number of clusters in labels, ignoring noise if present.
      -179        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      -180        n_noise_ = list(clusters).count(-1)
      -181        
      -182        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
      -183            print('Estimated number of clusters: %d' % n_clusters_)
      -184            print('Estimated number of noise points: %d' % n_noise_)
      -185
      -186        noise_idx = []
      -187        
      -188        other_peaks_idx = []
      -189
      -190        for i in range(len(clusters)):
      -191            
      -192            if clusters[i] == -1:
      -193                noise_idx.append(ms_peak_indexes[i])
      -194            
      -195            else:
      -196                other_peaks_idx.append(ms_peak_indexes[i])    
      +173        clusters = DBSCAN(eps=0.8, min_samples=min_samples).fit_predict(
      +174            matrix_data_scaled
      +175        )
      +176
      +177        # Number of clusters in labels, ignoring noise if present.
      +178        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      +179        n_noise_ = list(clusters).count(-1)
      +180
      +181        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
      +182            print("Estimated number of clusters: %d" % n_clusters_)
      +183            print("Estimated number of noise points: %d" % n_noise_)
      +184
      +185        noise_idx = []
      +186
      +187        other_peaks_idx = []
      +188
      +189        for i in range(len(clusters)):
      +190            if clusters[i] == -1:
      +191                noise_idx.append(ms_peak_indexes[i])
      +192
      +193            else:
      +194                other_peaks_idx.append(ms_peak_indexes[i])
      +195
      +196        # mfs = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in other_peaks_idx]
       197
      -198        #mfs = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in other_peaks_idx]
      -199        
      -200        #mfs_noise = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in noise_idx]
      -201        
      -202        #print(mfs)
      -203        #print(mfs_noise)
      -204
      -205        #from matplotlib import pyplot as plt
      -206        #plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      -207        #plt.xlabel("km")
      -208        #plt.ylabel("kmd")
      -209        #plt.show()
      -210        #plt.close()
      -211        
      -212        return noise_idx      
      -213
      -214    def remove_assignment_by_mass_error(self, mass_spectrum):
      -215        """ Remove assignments from the mass spectrum based on mass error.
      -216
      -217        Parameters
      -218        ----------
      -219        mass_spectrum : MassSpectrum
      -220            Mass spectrum object.
      -221
      -222        """
      -223        #data need to be binned by mz unit or more to be able to use clustering
      -224        
      -225        matrix_data, list_indexes_mass_spec = self.get_mass_error_matrix_data(mass_spectrum)
      +198        # mfs_noise = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in noise_idx]
      +199
      +200        # print(mfs)
      +201        # print(mfs_noise)
      +202
      +203        # from matplotlib import pyplot as plt
      +204        # plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      +205        # plt.xlabel("km")
      +206        # plt.ylabel("kmd")
      +207        # plt.show()
      +208        # plt.close()
      +209
      +210        return noise_idx
      +211
      +212    def remove_assignment_by_mass_error(self, mass_spectrum):
      +213        """Remove assignments from the mass spectrum based on mass error.
      +214
      +215        Parameters
      +216        ----------
      +217        mass_spectrum : MassSpectrum
      +218            Mass spectrum object.
      +219
      +220        """
      +221        # data need to be binned by mz unit or more to be able to use clustering
      +222
      +223        matrix_data, list_indexes_mass_spec = self.get_mass_error_matrix_data(
      +224            mass_spectrum
      +225        )
       226
       227        stdscaler = StandardScaler().fit(matrix_data)
      -228        
      +228
       229        matrix_data_scaled = stdscaler.transform(matrix_data)
      -230        
      -231        #bandwidth = estimate_bandwidth(matrix_data_scaled, quantile=0.3, n_samples=int(len(ms_peaks)/3))
      +230
      +231        # bandwidth = estimate_bandwidth(matrix_data_scaled, quantile=0.3, n_samples=int(len(ms_peaks)/3))
       232
      -233        #clusters = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit_predict(matrix_data_scaled)
      -234        
      -235        #eps and min_samp need to be optimized by precision and number of mspeaks
      -236        clusters = DBSCAN(eps = .15).fit_predict(matrix_data_scaled)
      -237        
      +233        # clusters = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit_predict(matrix_data_scaled)
      +234
      +235        # eps and min_samp need to be optimized by precision and number of mspeaks
      +236        clusters = DBSCAN(eps=0.15).fit_predict(matrix_data_scaled)
      +237
       238        indexes = []
      -239        
      -240        #from matplotlib import pyplot as plt
      -241        #plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="plasma")
      -242        #plt.xlabel("km")
      -243        #plt.ylabel("kmd")
      -244        #plt.show()
      -245        #plt.close()
      +239
      +240        # from matplotlib import pyplot as plt
      +241        # plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="plasma")
      +242        # plt.xlabel("km")
      +243        # plt.ylabel("kmd")
      +244        # plt.show()
      +245        # plt.close()
       246
       247        for i in range(len(clusters)):
       248            if clusters[i] == -1:
       249                indexes.append(list_indexes_mass_spec[i])
      -250        
      -251        mass_spectrum.remove_assignment_by_index(indexes)    
      +250
      +251        mass_spectrum.remove_assignment_by_index(indexes)
       
      @@ -338,247 +338,250 @@

      -
       13class ClusteringFilter():
      - 14    """ Class for filtering and clustering mass spectra data using various algorithms.
      - 15
      - 16    Attributes
      - 17    -------
      - 18    mass_spectrum : MassSpectrum
      - 19        Mass spectrum object.
      - 20    ms_peaks : list
      - 21        List of mass peaks.
      - 22    ms_peak_indexes : list
      - 23        List of peak indexes.
      - 24    min_samples : int
      - 25        Minimum number of samples in a cluster.
      - 26    eps : float
      - 27        The maximum distance between two samples for one to be considered as in the neighborhood of the other.
      - 28    bandwidth : float
      - 29        Bandwidth used in MeanShift algorithm.
      - 30    quantile : float
      - 31        Quantile used in estimate_bandwidth function.
      - 32    n_samples : int
      - 33        Number of samples used in estimate_bandwidth function.
      - 34    bin_seeding : bool
      - 35        If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized.
      - 36    min_peaks_per_class : int
      - 37        Minimum number of peaks per class.
      - 38        
      - 39    Methods
      - 40    -------
      - 41    * get_mass_error_matrix_data(ms_peaks).
      - 42        Get the mass error matrix data from a list of mass peaks.  
      - 43    * get_kendrick_matrix_data(mass_spectrum).
      - 44        Get the Kendrick matrix data from a mass spectrum.  
      - 45    * filter_kendrick(mass_spectrum).
      - 46        Filter the mass spectrum data using the Kendrick algorithm.  
      - 47    * filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj).
      - 48        Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.  
      - 49    * remove_assignment_by_mass_error(mass_spectrum).
      - 50        Remove assignments from the mass spectrum based on mass error.  
      - 51    
      - 52
      - 53    """
      - 54    def get_mass_error_matrix_data(self, ms_peaks):
      - 55        """Get the mass error matrix data from a list of mass peaks.
      - 56
      - 57        Parameters
      - 58        ----------
      - 59        ms_peaks : list
      - 60            List of mass peaks.
      - 61
      - 62        Returns
      - 63        -------
      - 64        matrix_data : ndarray
      - 65            Matrix data containing mass and error values.
      - 66        list_indexes_mass_spec : list 
      - 67            List of indexes of mass peaks in the original mass spectrum.
      - 68        """
      - 69        mass_list = list()
      - 70        error_list = list()
      - 71        list_indexes_mass_spec = []
      - 72        
      - 73        for index, mspeak in enumerate(ms_peaks):
      - 74
      - 75            if mspeak.is_assigned:
      - 76                    
      - 77                #print(mspeak.mz_exp, len(mspeak))
      - 78                for mformula in mspeak:
      - 79                    mass_list.append(mspeak.mz_exp)
      - 80                    error_list.append(mformula.mz_error)
      - 81                    list_indexes_mass_spec.append(index)
      - 82        
      - 83        kendrick_dict = {'mass': mass_list, 'error': error_list}  
      - 84        df = pd.DataFrame(kendrick_dict) 
      - 85        matrix_data = df.values.astype("float32", copy = False)
      - 86        return matrix_data, list_indexes_mass_spec
      - 87
      - 88    def get_kendrick_matrix_data(self, mass_spectrum):
      - 89        """Get the Kendrick matrix data from a mass spectrum.
      +            
        9class ClusteringFilter:
      + 10    """Class for filtering and clustering mass spectra data using various algorithms.
      + 11
      + 12    Attributes
      + 13    -------
      + 14    mass_spectrum : MassSpectrum
      + 15        Mass spectrum object.
      + 16    ms_peaks : list
      + 17        List of mass peaks.
      + 18    ms_peak_indexes : list
      + 19        List of peak indexes.
      + 20    min_samples : int
      + 21        Minimum number of samples in a cluster.
      + 22    eps : float
      + 23        The maximum distance between two samples for one to be considered as in the neighborhood of the other.
      + 24    bandwidth : float
      + 25        Bandwidth used in MeanShift algorithm.
      + 26    quantile : float
      + 27        Quantile used in estimate_bandwidth function.
      + 28    n_samples : int
      + 29        Number of samples used in estimate_bandwidth function.
      + 30    bin_seeding : bool
      + 31        If true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized.
      + 32    min_peaks_per_class : int
      + 33        Minimum number of peaks per class.
      + 34
      + 35    Methods
      + 36    -------
      + 37    * get_mass_error_matrix_data(ms_peaks).
      + 38        Get the mass error matrix data from a list of mass peaks.
      + 39    * get_kendrick_matrix_data(mass_spectrum).
      + 40        Get the Kendrick matrix data from a mass spectrum.
      + 41    * filter_kendrick(mass_spectrum).
      + 42        Filter the mass spectrum data using the Kendrick algorithm.
      + 43    * filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj).
      + 44        Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      + 45    * remove_assignment_by_mass_error(mass_spectrum).
      + 46        Remove assignments from the mass spectrum based on mass error.
      + 47
      + 48
      + 49    """
      + 50
      + 51    def get_mass_error_matrix_data(self, ms_peaks):
      + 52        """Get the mass error matrix data from a list of mass peaks.
      + 53
      + 54        Parameters
      + 55        ----------
      + 56        ms_peaks : list
      + 57            List of mass peaks.
      + 58
      + 59        Returns
      + 60        -------
      + 61        matrix_data : ndarray
      + 62            Matrix data containing mass and error values.
      + 63        list_indexes_mass_spec : list
      + 64            List of indexes of mass peaks in the original mass spectrum.
      + 65        """
      + 66        mass_list = list()
      + 67        error_list = list()
      + 68        list_indexes_mass_spec = []
      + 69
      + 70        for index, mspeak in enumerate(ms_peaks):
      + 71            if mspeak.is_assigned:
      + 72                # print(mspeak.mz_exp, len(mspeak))
      + 73                for mformula in mspeak:
      + 74                    mass_list.append(mspeak.mz_exp)
      + 75                    error_list.append(mformula.mz_error)
      + 76                    list_indexes_mass_spec.append(index)
      + 77
      + 78        kendrick_dict = {"mass": mass_list, "error": error_list}
      + 79        df = pd.DataFrame(kendrick_dict)
      + 80        matrix_data = df.values.astype("float32", copy=False)
      + 81        return matrix_data, list_indexes_mass_spec
      + 82
      + 83    def get_kendrick_matrix_data(self, mass_spectrum):
      + 84        """Get the Kendrick matrix data from a mass spectrum.
      + 85
      + 86        Parameters
      + 87        ----------
      + 88        mass_spectrum : MassSpectrum
      + 89            Mass spectrum object.
        90
      - 91        Parameters
      - 92        ----------
      - 93        mass_spectrum : MassSpectrum
      - 94            Mass spectrum object.
      - 95
      - 96        Returns
      - 97        -------
      - 98        matrix_data : ndarray
      - 99            Matrix data containing Kendrick mass and Kendrick mass defect values.
      -100        """
      -101        km = mass_spectrum.kendrick_mass
      -102        kmd = mass_spectrum.kmd
      -103        kendrick_dict = {'km': km, 'kmd': kmd}  
      -104        df = pd.DataFrame(kendrick_dict) 
      -105        matrix_data = df.values.astype("float32", copy = False)
      -106        return matrix_data
      -107
      -108    def filter_kendrick(self, mass_spectrum):
      -109        """ Filter the mass spectrum data using the Kendrick algorithm.
      + 91        Returns
      + 92        -------
      + 93        matrix_data : ndarray
      + 94            Matrix data containing Kendrick mass and Kendrick mass defect values.
      + 95        """
      + 96        km = mass_spectrum.kendrick_mass
      + 97        kmd = mass_spectrum.kmd
      + 98        kendrick_dict = {"km": km, "kmd": kmd}
      + 99        df = pd.DataFrame(kendrick_dict)
      +100        matrix_data = df.values.astype("float32", copy=False)
      +101        return matrix_data
      +102
      +103    def filter_kendrick(self, mass_spectrum):
      +104        """Filter the mass spectrum data using the Kendrick algorithm.
      +105
      +106        Parameters
      +107        ----------
      +108        mass_spectrum : MassSpectrum
      +109            Mass spectrum object.
       110
      -111        Parameters
      -112        ----------
      -113        mass_spectrum : MassSpectrum 
      -114            Mass spectrum object.
      +111        """
      +112        matrix_data = self.get_kendrick_matrix_data(mass_spectrum)
      +113
      +114        stdscaler = StandardScaler().fit(matrix_data)
       115
      -116        """
      -117        matrix_data = self.get_kendrick_matrix_data(mass_spectrum)
      -118
      -119        stdscaler = StandardScaler().fit(matrix_data)
      -120        
      -121        matrix_data_scaled = stdscaler.transform(matrix_data)
      -122
      -123        clusters = DBSCAN(eps = .75, min_samples=50).fit_predict(matrix_data_scaled)
      -124        
      -125        # Number of clusters in labels, ignoring noise if present.
      -126        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      -127        n_noise_ = list(clusters).count(-1)
      -128        
      -129        indexes = []
      -130        for i in range(len(clusters)):
      -131            if clusters[i] == -1:
      -132                indexes.append(i)
      -133        
      -134        if mass_spectrum.parameters.mass_spectrum.verbose_processing:
      -135            print('Estimated number of clusters: %d' % n_clusters_)
      -136            print('Estimated number of noise points: %d' % n_noise_)
      -137        mass_spectrum.filter_by_index(indexes)
      -138        #from matplotlib import pyplot as plt
      -139        #plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      -140        #plt.xlabel("km")
      -141        #plt.ylabel("kmd")
      -142        #plt.show()
      -143        #plt.close()
      -144
      -145    def filter_kendrick_by_index(self, ms_peak_indexes, mass_spectrum_obj):
      -146        """ Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      -147
      -148        Parameters
      -149        ----------
      -150        ms_peak_indexes : list 
      -151            List of peak indexes.
      -152        mass_spectrum_obj : MassSpectrum 
      -153            Mass spectrum object.
      -154
      -155        Returns
      -156        -------
      -157        noise_idx : list 
      -158            List of indexes of noise points in the mass spectrum.
      -159        """
      -160        min_samples = mass_spectrum_obj.molecular_search_settings.min_peaks_per_class
      +116        matrix_data_scaled = stdscaler.transform(matrix_data)
      +117
      +118        clusters = DBSCAN(eps=0.75, min_samples=50).fit_predict(matrix_data_scaled)
      +119
      +120        # Number of clusters in labels, ignoring noise if present.
      +121        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      +122        n_noise_ = list(clusters).count(-1)
      +123
      +124        indexes = []
      +125        for i in range(len(clusters)):
      +126            if clusters[i] == -1:
      +127                indexes.append(i)
      +128
      +129        if mass_spectrum.parameters.mass_spectrum.verbose_processing:
      +130            print("Estimated number of clusters: %d" % n_clusters_)
      +131            print("Estimated number of noise points: %d" % n_noise_)
      +132        mass_spectrum.filter_by_index(indexes)
      +133        # from matplotlib import pyplot as plt
      +134        # plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      +135        # plt.xlabel("km")
      +136        # plt.ylabel("kmd")
      +137        # plt.show()
      +138        # plt.close()
      +139
      +140    def filter_kendrick_by_index(self, ms_peak_indexes, mass_spectrum_obj):
      +141        """Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      +142
      +143        Parameters
      +144        ----------
      +145        ms_peak_indexes : list
      +146            List of peak indexes.
      +147        mass_spectrum_obj : MassSpectrum
      +148            Mass spectrum object.
      +149
      +150        Returns
      +151        -------
      +152        noise_idx : list
      +153            List of indexes of noise points in the mass spectrum.
      +154        """
      +155        min_samples = mass_spectrum_obj.molecular_search_settings.min_peaks_per_class
      +156
      +157        kendrick_dict = {"km": list(), "kmd": list()}
      +158
      +159        if len(ms_peak_indexes) <= 1:
      +160            return []
       161
      -162        kendrick_dict = {'km': list(), 'kmd': list()}  
      -163
      -164        if len(ms_peak_indexes) <= 1: return []
      -165        
      -166        for index, _ in ms_peak_indexes:
      -167           kendrick_dict["km"].append(mass_spectrum_obj[index].kendrick_mass)
      -168           kendrick_dict["kmd"].append(mass_spectrum_obj[index].kmd)
      -169           
      -170        # check min data points otherwise StandardScaler().fit(0 will fail
      -171        
      -172        df = pd.DataFrame(kendrick_dict) 
      -173        matrix_data = df.values.astype("float32", copy = False)
      -174
      -175        stdscaler = StandardScaler().fit(matrix_data)
      -176        matrix_data_scaled = stdscaler.transform(matrix_data)
      +162        for index, _ in ms_peak_indexes:
      +163            kendrick_dict["km"].append(mass_spectrum_obj[index].kendrick_mass)
      +164            kendrick_dict["kmd"].append(mass_spectrum_obj[index].kmd)
      +165
      +166        # check min data points otherwise StandardScaler().fit(0 will fail
      +167
      +168        df = pd.DataFrame(kendrick_dict)
      +169        matrix_data = df.values.astype("float32", copy=False)
      +170
      +171        stdscaler = StandardScaler().fit(matrix_data)
      +172        matrix_data_scaled = stdscaler.transform(matrix_data)
      +173
      +174        clusters = DBSCAN(eps=0.8, min_samples=min_samples).fit_predict(
      +175            matrix_data_scaled
      +176        )
       177
      -178        clusters = DBSCAN(eps = .8, min_samples=min_samples).fit_predict(matrix_data_scaled)
      -179        
      -180        # Number of clusters in labels, ignoring noise if present.
      -181        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      -182        n_noise_ = list(clusters).count(-1)
      -183        
      -184        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
      -185            print('Estimated number of clusters: %d' % n_clusters_)
      -186            print('Estimated number of noise points: %d' % n_noise_)
      +178        # Number of clusters in labels, ignoring noise if present.
      +179        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      +180        n_noise_ = list(clusters).count(-1)
      +181
      +182        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
      +183            print("Estimated number of clusters: %d" % n_clusters_)
      +184            print("Estimated number of noise points: %d" % n_noise_)
      +185
      +186        noise_idx = []
       187
      -188        noise_idx = []
      -189        
      -190        other_peaks_idx = []
      -191
      -192        for i in range(len(clusters)):
      -193            
      -194            if clusters[i] == -1:
      -195                noise_idx.append(ms_peak_indexes[i])
      -196            
      -197            else:
      -198                other_peaks_idx.append(ms_peak_indexes[i])    
      -199
      -200        #mfs = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in other_peaks_idx]
      -201        
      -202        #mfs_noise = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in noise_idx]
      -203        
      -204        #print(mfs)
      -205        #print(mfs_noise)
      -206
      -207        #from matplotlib import pyplot as plt
      -208        #plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      -209        #plt.xlabel("km")
      -210        #plt.ylabel("kmd")
      -211        #plt.show()
      -212        #plt.close()
      -213        
      -214        return noise_idx      
      +188        other_peaks_idx = []
      +189
      +190        for i in range(len(clusters)):
      +191            if clusters[i] == -1:
      +192                noise_idx.append(ms_peak_indexes[i])
      +193
      +194            else:
      +195                other_peaks_idx.append(ms_peak_indexes[i])
      +196
      +197        # mfs = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in other_peaks_idx]
      +198
      +199        # mfs_noise = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in noise_idx]
      +200
      +201        # print(mfs)
      +202        # print(mfs_noise)
      +203
      +204        # from matplotlib import pyplot as plt
      +205        # plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      +206        # plt.xlabel("km")
      +207        # plt.ylabel("kmd")
      +208        # plt.show()
      +209        # plt.close()
      +210
      +211        return noise_idx
      +212
      +213    def remove_assignment_by_mass_error(self, mass_spectrum):
      +214        """Remove assignments from the mass spectrum based on mass error.
       215
      -216    def remove_assignment_by_mass_error(self, mass_spectrum):
      -217        """ Remove assignments from the mass spectrum based on mass error.
      -218
      -219        Parameters
      -220        ----------
      -221        mass_spectrum : MassSpectrum
      -222            Mass spectrum object.
      +216        Parameters
      +217        ----------
      +218        mass_spectrum : MassSpectrum
      +219            Mass spectrum object.
      +220
      +221        """
      +222        # data need to be binned by mz unit or more to be able to use clustering
       223
      -224        """
      -225        #data need to be binned by mz unit or more to be able to use clustering
      -226        
      -227        matrix_data, list_indexes_mass_spec = self.get_mass_error_matrix_data(mass_spectrum)
      -228
      -229        stdscaler = StandardScaler().fit(matrix_data)
      -230        
      -231        matrix_data_scaled = stdscaler.transform(matrix_data)
      -232        
      -233        #bandwidth = estimate_bandwidth(matrix_data_scaled, quantile=0.3, n_samples=int(len(ms_peaks)/3))
      -234
      -235        #clusters = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit_predict(matrix_data_scaled)
      -236        
      -237        #eps and min_samp need to be optimized by precision and number of mspeaks
      -238        clusters = DBSCAN(eps = .15).fit_predict(matrix_data_scaled)
      -239        
      -240        indexes = []
      -241        
      -242        #from matplotlib import pyplot as plt
      -243        #plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="plasma")
      -244        #plt.xlabel("km")
      -245        #plt.ylabel("kmd")
      -246        #plt.show()
      -247        #plt.close()
      -248
      -249        for i in range(len(clusters)):
      -250            if clusters[i] == -1:
      -251                indexes.append(list_indexes_mass_spec[i])
      -252        
      -253        mass_spectrum.remove_assignment_by_index(indexes)    
      +224        matrix_data, list_indexes_mass_spec = self.get_mass_error_matrix_data(
      +225            mass_spectrum
      +226        )
      +227
      +228        stdscaler = StandardScaler().fit(matrix_data)
      +229
      +230        matrix_data_scaled = stdscaler.transform(matrix_data)
      +231
      +232        # bandwidth = estimate_bandwidth(matrix_data_scaled, quantile=0.3, n_samples=int(len(ms_peaks)/3))
      +233
      +234        # clusters = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit_predict(matrix_data_scaled)
      +235
      +236        # eps and min_samp need to be optimized by precision and number of mspeaks
      +237        clusters = DBSCAN(eps=0.15).fit_predict(matrix_data_scaled)
      +238
      +239        indexes = []
      +240
      +241        # from matplotlib import pyplot as plt
      +242        # plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="plasma")
      +243        # plt.xlabel("km")
      +244        # plt.ylabel("kmd")
      +245        # plt.show()
      +246        # plt.close()
      +247
      +248        for i in range(len(clusters)):
      +249            if clusters[i] == -1:
      +250                indexes.append(list_indexes_mass_spec[i])
      +251
      +252        mass_spectrum.remove_assignment_by_index(indexes)
       
      @@ -613,13 +616,13 @@
      Methods
      • get_mass_error_matrix_data(ms_peaks). -Get the mass error matrix data from a list of mass peaks.
      • +Get the mass error matrix data from a list of mass peaks.
      • get_kendrick_matrix_data(mass_spectrum). -Get the Kendrick matrix data from a mass spectrum.
      • +Get the Kendrick matrix data from a mass spectrum.
      • filter_kendrick(mass_spectrum). -Filter the mass spectrum data using the Kendrick algorithm.
      • +Filter the mass spectrum data using the Kendrick algorithm.
      • filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj). -Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      • +Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      • remove_assignment_by_mass_error(mass_spectrum). Remove assignments from the mass spectrum based on mass error.
      @@ -637,39 +640,37 @@
      Methods
      -
      54    def get_mass_error_matrix_data(self, ms_peaks):
      -55        """Get the mass error matrix data from a list of mass peaks.
      -56
      -57        Parameters
      -58        ----------
      -59        ms_peaks : list
      -60            List of mass peaks.
      -61
      -62        Returns
      -63        -------
      -64        matrix_data : ndarray
      -65            Matrix data containing mass and error values.
      -66        list_indexes_mass_spec : list 
      -67            List of indexes of mass peaks in the original mass spectrum.
      -68        """
      -69        mass_list = list()
      -70        error_list = list()
      -71        list_indexes_mass_spec = []
      -72        
      -73        for index, mspeak in enumerate(ms_peaks):
      -74
      -75            if mspeak.is_assigned:
      -76                    
      -77                #print(mspeak.mz_exp, len(mspeak))
      -78                for mformula in mspeak:
      -79                    mass_list.append(mspeak.mz_exp)
      -80                    error_list.append(mformula.mz_error)
      -81                    list_indexes_mass_spec.append(index)
      -82        
      -83        kendrick_dict = {'mass': mass_list, 'error': error_list}  
      -84        df = pd.DataFrame(kendrick_dict) 
      -85        matrix_data = df.values.astype("float32", copy = False)
      -86        return matrix_data, list_indexes_mass_spec
      +            
      51    def get_mass_error_matrix_data(self, ms_peaks):
      +52        """Get the mass error matrix data from a list of mass peaks.
      +53
      +54        Parameters
      +55        ----------
      +56        ms_peaks : list
      +57            List of mass peaks.
      +58
      +59        Returns
      +60        -------
      +61        matrix_data : ndarray
      +62            Matrix data containing mass and error values.
      +63        list_indexes_mass_spec : list
      +64            List of indexes of mass peaks in the original mass spectrum.
      +65        """
      +66        mass_list = list()
      +67        error_list = list()
      +68        list_indexes_mass_spec = []
      +69
      +70        for index, mspeak in enumerate(ms_peaks):
      +71            if mspeak.is_assigned:
      +72                # print(mspeak.mz_exp, len(mspeak))
      +73                for mformula in mspeak:
      +74                    mass_list.append(mspeak.mz_exp)
      +75                    error_list.append(mformula.mz_error)
      +76                    list_indexes_mass_spec.append(index)
      +77
      +78        kendrick_dict = {"mass": mass_list, "error": error_list}
      +79        df = pd.DataFrame(kendrick_dict)
      +80        matrix_data = df.values.astype("float32", copy=False)
      +81        return matrix_data, list_indexes_mass_spec
       
      @@ -705,25 +706,25 @@
      Returns
      -
       88    def get_kendrick_matrix_data(self, mass_spectrum):
      - 89        """Get the Kendrick matrix data from a mass spectrum.
      +            
       83    def get_kendrick_matrix_data(self, mass_spectrum):
      + 84        """Get the Kendrick matrix data from a mass spectrum.
      + 85
      + 86        Parameters
      + 87        ----------
      + 88        mass_spectrum : MassSpectrum
      + 89            Mass spectrum object.
        90
      - 91        Parameters
      - 92        ----------
      - 93        mass_spectrum : MassSpectrum
      - 94            Mass spectrum object.
      - 95
      - 96        Returns
      - 97        -------
      - 98        matrix_data : ndarray
      - 99            Matrix data containing Kendrick mass and Kendrick mass defect values.
      -100        """
      -101        km = mass_spectrum.kendrick_mass
      -102        kmd = mass_spectrum.kmd
      -103        kendrick_dict = {'km': km, 'kmd': kmd}  
      -104        df = pd.DataFrame(kendrick_dict) 
      -105        matrix_data = df.values.astype("float32", copy = False)
      -106        return matrix_data
      + 91        Returns
      + 92        -------
      + 93        matrix_data : ndarray
      + 94            Matrix data containing Kendrick mass and Kendrick mass defect values.
      + 95        """
      + 96        km = mass_spectrum.kendrick_mass
      + 97        kmd = mass_spectrum.kmd
      + 98        kendrick_dict = {"km": km, "kmd": kmd}
      + 99        df = pd.DataFrame(kendrick_dict)
      +100        matrix_data = df.values.astype("float32", copy=False)
      +101        return matrix_data
       
      @@ -757,42 +758,42 @@
      Returns
      -
      108    def filter_kendrick(self, mass_spectrum):
      -109        """ Filter the mass spectrum data using the Kendrick algorithm.
      +            
      103    def filter_kendrick(self, mass_spectrum):
      +104        """Filter the mass spectrum data using the Kendrick algorithm.
      +105
      +106        Parameters
      +107        ----------
      +108        mass_spectrum : MassSpectrum
      +109            Mass spectrum object.
       110
      -111        Parameters
      -112        ----------
      -113        mass_spectrum : MassSpectrum 
      -114            Mass spectrum object.
      +111        """
      +112        matrix_data = self.get_kendrick_matrix_data(mass_spectrum)
      +113
      +114        stdscaler = StandardScaler().fit(matrix_data)
       115
      -116        """
      -117        matrix_data = self.get_kendrick_matrix_data(mass_spectrum)
      -118
      -119        stdscaler = StandardScaler().fit(matrix_data)
      -120        
      -121        matrix_data_scaled = stdscaler.transform(matrix_data)
      -122
      -123        clusters = DBSCAN(eps = .75, min_samples=50).fit_predict(matrix_data_scaled)
      -124        
      -125        # Number of clusters in labels, ignoring noise if present.
      -126        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      -127        n_noise_ = list(clusters).count(-1)
      -128        
      -129        indexes = []
      -130        for i in range(len(clusters)):
      -131            if clusters[i] == -1:
      -132                indexes.append(i)
      -133        
      -134        if mass_spectrum.parameters.mass_spectrum.verbose_processing:
      -135            print('Estimated number of clusters: %d' % n_clusters_)
      -136            print('Estimated number of noise points: %d' % n_noise_)
      -137        mass_spectrum.filter_by_index(indexes)
      -138        #from matplotlib import pyplot as plt
      -139        #plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      -140        #plt.xlabel("km")
      -141        #plt.ylabel("kmd")
      -142        #plt.show()
      -143        #plt.close()
      +116        matrix_data_scaled = stdscaler.transform(matrix_data)
      +117
      +118        clusters = DBSCAN(eps=0.75, min_samples=50).fit_predict(matrix_data_scaled)
      +119
      +120        # Number of clusters in labels, ignoring noise if present.
      +121        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      +122        n_noise_ = list(clusters).count(-1)
      +123
      +124        indexes = []
      +125        for i in range(len(clusters)):
      +126            if clusters[i] == -1:
      +127                indexes.append(i)
      +128
      +129        if mass_spectrum.parameters.mass_spectrum.verbose_processing:
      +130            print("Estimated number of clusters: %d" % n_clusters_)
      +131            print("Estimated number of noise points: %d" % n_noise_)
      +132        mass_spectrum.filter_by_index(indexes)
      +133        # from matplotlib import pyplot as plt
      +134        # plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      +135        # plt.xlabel("km")
      +136        # plt.ylabel("kmd")
      +137        # plt.show()
      +138        # plt.close()
       
      @@ -819,76 +820,78 @@
      Parameters
      -
      145    def filter_kendrick_by_index(self, ms_peak_indexes, mass_spectrum_obj):
      -146        """ Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      -147
      -148        Parameters
      -149        ----------
      -150        ms_peak_indexes : list 
      -151            List of peak indexes.
      -152        mass_spectrum_obj : MassSpectrum 
      -153            Mass spectrum object.
      -154
      -155        Returns
      -156        -------
      -157        noise_idx : list 
      -158            List of indexes of noise points in the mass spectrum.
      -159        """
      -160        min_samples = mass_spectrum_obj.molecular_search_settings.min_peaks_per_class
      +            
      140    def filter_kendrick_by_index(self, ms_peak_indexes, mass_spectrum_obj):
      +141        """Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
      +142
      +143        Parameters
      +144        ----------
      +145        ms_peak_indexes : list
      +146            List of peak indexes.
      +147        mass_spectrum_obj : MassSpectrum
      +148            Mass spectrum object.
      +149
      +150        Returns
      +151        -------
      +152        noise_idx : list
      +153            List of indexes of noise points in the mass spectrum.
      +154        """
      +155        min_samples = mass_spectrum_obj.molecular_search_settings.min_peaks_per_class
      +156
      +157        kendrick_dict = {"km": list(), "kmd": list()}
      +158
      +159        if len(ms_peak_indexes) <= 1:
      +160            return []
       161
      -162        kendrick_dict = {'km': list(), 'kmd': list()}  
      -163
      -164        if len(ms_peak_indexes) <= 1: return []
      -165        
      -166        for index, _ in ms_peak_indexes:
      -167           kendrick_dict["km"].append(mass_spectrum_obj[index].kendrick_mass)
      -168           kendrick_dict["kmd"].append(mass_spectrum_obj[index].kmd)
      -169           
      -170        # check min data points otherwise StandardScaler().fit(0 will fail
      -171        
      -172        df = pd.DataFrame(kendrick_dict) 
      -173        matrix_data = df.values.astype("float32", copy = False)
      -174
      -175        stdscaler = StandardScaler().fit(matrix_data)
      -176        matrix_data_scaled = stdscaler.transform(matrix_data)
      +162        for index, _ in ms_peak_indexes:
      +163            kendrick_dict["km"].append(mass_spectrum_obj[index].kendrick_mass)
      +164            kendrick_dict["kmd"].append(mass_spectrum_obj[index].kmd)
      +165
      +166        # check min data points otherwise StandardScaler().fit(0 will fail
      +167
      +168        df = pd.DataFrame(kendrick_dict)
      +169        matrix_data = df.values.astype("float32", copy=False)
      +170
      +171        stdscaler = StandardScaler().fit(matrix_data)
      +172        matrix_data_scaled = stdscaler.transform(matrix_data)
      +173
      +174        clusters = DBSCAN(eps=0.8, min_samples=min_samples).fit_predict(
      +175            matrix_data_scaled
      +176        )
       177
      -178        clusters = DBSCAN(eps = .8, min_samples=min_samples).fit_predict(matrix_data_scaled)
      -179        
      -180        # Number of clusters in labels, ignoring noise if present.
      -181        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      -182        n_noise_ = list(clusters).count(-1)
      -183        
      -184        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
      -185            print('Estimated number of clusters: %d' % n_clusters_)
      -186            print('Estimated number of noise points: %d' % n_noise_)
      +178        # Number of clusters in labels, ignoring noise if present.
      +179        n_clusters_ = len(set(clusters)) - (1 if -1 in clusters else 0)
      +180        n_noise_ = list(clusters).count(-1)
      +181
      +182        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
      +183            print("Estimated number of clusters: %d" % n_clusters_)
      +184            print("Estimated number of noise points: %d" % n_noise_)
      +185
      +186        noise_idx = []
       187
      -188        noise_idx = []
      -189        
      -190        other_peaks_idx = []
      -191
      -192        for i in range(len(clusters)):
      -193            
      -194            if clusters[i] == -1:
      -195                noise_idx.append(ms_peak_indexes[i])
      -196            
      -197            else:
      -198                other_peaks_idx.append(ms_peak_indexes[i])    
      -199
      -200        #mfs = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in other_peaks_idx]
      -201        
      -202        #mfs_noise = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in noise_idx]
      -203        
      -204        #print(mfs)
      -205        #print(mfs_noise)
      -206
      -207        #from matplotlib import pyplot as plt
      -208        #plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      -209        #plt.xlabel("km")
      -210        #plt.ylabel("kmd")
      -211        #plt.show()
      -212        #plt.close()
      -213        
      -214        return noise_idx      
      +188        other_peaks_idx = []
      +189
      +190        for i in range(len(clusters)):
      +191            if clusters[i] == -1:
      +192                noise_idx.append(ms_peak_indexes[i])
      +193
      +194            else:
      +195                other_peaks_idx.append(ms_peak_indexes[i])
      +196
      +197        # mfs = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in other_peaks_idx]
      +198
      +199        # mfs_noise = [mass_spectrum_obj[index].best_molecular_formula_candidate.string for index in noise_idx]
      +200
      +201        # print(mfs)
      +202        # print(mfs_noise)
      +203
      +204        # from matplotlib import pyplot as plt
      +205        # plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="jet")
      +206        # plt.xlabel("km")
      +207        # plt.ylabel("kmd")
      +208        # plt.show()
      +209        # plt.close()
      +210
      +211        return noise_idx
       
      @@ -924,44 +927,46 @@
      Returns
      -
      216    def remove_assignment_by_mass_error(self, mass_spectrum):
      -217        """ Remove assignments from the mass spectrum based on mass error.
      -218
      -219        Parameters
      -220        ----------
      -221        mass_spectrum : MassSpectrum
      -222            Mass spectrum object.
      +            
      213    def remove_assignment_by_mass_error(self, mass_spectrum):
      +214        """Remove assignments from the mass spectrum based on mass error.
      +215
      +216        Parameters
      +217        ----------
      +218        mass_spectrum : MassSpectrum
      +219            Mass spectrum object.
      +220
      +221        """
      +222        # data need to be binned by mz unit or more to be able to use clustering
       223
      -224        """
      -225        #data need to be binned by mz unit or more to be able to use clustering
      -226        
      -227        matrix_data, list_indexes_mass_spec = self.get_mass_error_matrix_data(mass_spectrum)
      -228
      -229        stdscaler = StandardScaler().fit(matrix_data)
      -230        
      -231        matrix_data_scaled = stdscaler.transform(matrix_data)
      -232        
      -233        #bandwidth = estimate_bandwidth(matrix_data_scaled, quantile=0.3, n_samples=int(len(ms_peaks)/3))
      -234
      -235        #clusters = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit_predict(matrix_data_scaled)
      -236        
      -237        #eps and min_samp need to be optimized by precision and number of mspeaks
      -238        clusters = DBSCAN(eps = .15).fit_predict(matrix_data_scaled)
      -239        
      -240        indexes = []
      -241        
      -242        #from matplotlib import pyplot as plt
      -243        #plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="plasma")
      -244        #plt.xlabel("km")
      -245        #plt.ylabel("kmd")
      -246        #plt.show()
      -247        #plt.close()
      -248
      -249        for i in range(len(clusters)):
      -250            if clusters[i] == -1:
      -251                indexes.append(list_indexes_mass_spec[i])
      -252        
      -253        mass_spectrum.remove_assignment_by_index(indexes)    
      +224        matrix_data, list_indexes_mass_spec = self.get_mass_error_matrix_data(
      +225            mass_spectrum
      +226        )
      +227
      +228        stdscaler = StandardScaler().fit(matrix_data)
      +229
      +230        matrix_data_scaled = stdscaler.transform(matrix_data)
      +231
      +232        # bandwidth = estimate_bandwidth(matrix_data_scaled, quantile=0.3, n_samples=int(len(ms_peaks)/3))
      +233
      +234        # clusters = MeanShift(bandwidth=bandwidth, bin_seeding=True).fit_predict(matrix_data_scaled)
      +235
      +236        # eps and min_samp need to be optimized by precision and number of mspeaks
      +237        clusters = DBSCAN(eps=0.15).fit_predict(matrix_data_scaled)
      +238
      +239        indexes = []
      +240
      +241        # from matplotlib import pyplot as plt
      +242        # plt.scatter(matrix_data[:, 0], matrix_data[:, 1], c=clusters, cmap="plasma")
      +243        # plt.xlabel("km")
      +244        # plt.ylabel("kmd")
      +245        # plt.show()
      +246        # plt.close()
      +247
      +248        for i in range(len(clusters)):
      +249            if clusters[i] == -1:
      +250                indexes.append(list_indexes_mass_spec[i])
      +251
      +252        mass_spectrum.remove_assignment_by_index(indexes)
       
      diff --git a/docs/corems/molecular_id/calc/MolecularFilter.html b/docs/corems/molecular_id/calc/MolecularFilter.html index 6d06cf34..1666a0a1 100644 --- a/docs/corems/molecular_id/calc/MolecularFilter.html +++ b/docs/corems/molecular_id/calc/MolecularFilter.html @@ -68,126 +68,126 @@

        1from corems.molecular_id.calc.ClusterFilter import ClusteringFilter
         2
      -  3class MolecularFormulaSearchFilters:
      -  4    """ Class containing static methods for filtering molecular formulas in a mass spectrum.
      -  5      
      -  6    Methods
      -  7    -------
      -  8    * filter_kendrick(ms_peak_indexes, mass_spectrum_obj).   
      -  9        Apply Kendrick filter to the mass spectrum.  
      - 10    * check_min_peaks(ms_peak_indexes, mass_spectrum_obj).   
      - 11        Check if the number of peaks per class meets the minimum requirement.   
      - 12    * filter_isotopologue(ms_peak_indexes, mass_spectrum_obj).   
      - 13        Apply isotopologue filter to the mass spectrum.   
      - 14
      - 15    """
      - 16    @staticmethod
      - 17    def filter_kendrick( ms_peak_indexes, mass_spectrum_obj):
      - 18        """Apply Kendrick filter to the mass spectrum.
      - 19
      - 20        Parameters
      - 21        ----------
      - 22        ms_peak_indexes : list
      - 23            List of peak indexes and their associated molecular formula objects.
      - 24        mass_spectrum_obj : MassSpectrum 
      - 25            The mass spectrum object.
      - 26
      - 27        Returns
      - 28        -------
      - 29        filtered_ms_peak_indexes : list 
      - 30            List of peak indexes and their associated molecular formula objects after applying the Kendrick filter.
      - 31        """
      - 32        index_to_remove = []
      - 33        
      - 34        if mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter:
      - 35            
      - 36            index_to_remove = ClusteringFilter().filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj)
      - 37
      - 38            #for index in noise_indexes: self.mass_spectrum_obj[index].clear_molecular_formulas()
      - 39        
      - 40        all_index_to_remove = []
      - 41
      - 42        for peak_index, mf_obj in index_to_remove:
      - 43                
      - 44                ms_peak_indexes.remove((peak_index, mf_obj))
      - 45
      - 46                all_index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
      +  3
      +  4class MolecularFormulaSearchFilters:
      +  5    """Class containing static methods for filtering molecular formulas in a mass spectrum.
      +  6
      +  7    Methods
      +  8    -------
      +  9    * filter_kendrick(ms_peak_indexes, mass_spectrum_obj).
      + 10        Apply Kendrick filter to the mass spectrum.
      + 11    * check_min_peaks(ms_peak_indexes, mass_spectrum_obj).
      + 12        Check if the number of peaks per class meets the minimum requirement.
      + 13    * filter_isotopologue(ms_peak_indexes, mass_spectrum_obj).
      + 14        Apply isotopologue filter to the mass spectrum.
      + 15
      + 16    """
      + 17
      + 18    @staticmethod
      + 19    def filter_kendrick(ms_peak_indexes, mass_spectrum_obj):
      + 20        """Apply Kendrick filter to the mass spectrum.
      + 21
      + 22        Parameters
      + 23        ----------
      + 24        ms_peak_indexes : list
      + 25            List of peak indexes and their associated molecular formula objects.
      + 26        mass_spectrum_obj : MassSpectrum
      + 27            The mass spectrum object.
      + 28
      + 29        Returns
      + 30        -------
      + 31        filtered_ms_peak_indexes : list
      + 32            List of peak indexes and their associated molecular formula objects after applying the Kendrick filter.
      + 33        """
      + 34        index_to_remove = []
      + 35
      + 36        if mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter:
      + 37            index_to_remove = ClusteringFilter().filter_kendrick_by_index(
      + 38                ms_peak_indexes, mass_spectrum_obj
      + 39            )
      + 40
      + 41            # for index in noise_indexes: self.mass_spectrum_obj[index].clear_molecular_formulas()
      + 42
      + 43        all_index_to_remove = []
      + 44
      + 45        for peak_index, mf_obj in index_to_remove:
      + 46            ms_peak_indexes.remove((peak_index, mf_obj))
        47
      - 48        all_index_to_remove =  list(set(all_index_to_remove + index_to_remove))
      + 48            all_index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
        49
      - 50        for peak_index, mf_obj in all_index_to_remove:
      - 51            
      - 52            mass_spectrum_obj[peak_index].remove_molecular_formula(mf_obj)
      - 53            
      - 54        return ms_peak_indexes
      - 55
      - 56    @staticmethod
      - 57    def check_min_peaks( ms_peak_indexes, mass_spectrum):
      - 58        """ Check if the number of peaks per class meets the minimum requirement.
      - 59
      - 60        Parameters
      - 61        --------
      - 62        ms_peak_indexes : list
      - 63            List of peak indexes and their associated molecular formula objects.
      - 64        mass_spectrum_obj : MassSpectrum 
      - 65            The mass spectrum object.
      - 66
      - 67        """
      - 68        if mass_spectrum.molecular_search_settings.use_min_peaks_filter:
      - 69
      - 70            if not len(ms_peak_indexes) >= mass_spectrum.molecular_search_settings.min_peaks_per_class:
      - 71                
      - 72                for peak_index, mf_obj in ms_peak_indexes:
      - 73                
      - 74                    mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
      - 75
      - 76    @staticmethod
      - 77    def filter_isotopologue( ms_peak_indexes, mass_spectrum):
      - 78        """ Apply isotopologue filter to the mass spectrum.
      - 79
      - 80        Parameters
      - 81        --------
      - 82        ms_peak_indexes : list
      - 83            List of peak indexes and their associated molecular formula objects.
      - 84        mass_spectrum_obj : MassSpectrum 
      - 85            The mass spectrum object.
      - 86
      - 87        Returns
      - 88        ------------
      - 89        filtered_ms_peak_indexes : list
      - 90            List of peak indexes and their associated molecular formula objects after applying the isotopologue filter.
      - 91        """
      - 92        index_to_remove = []
      - 93        #print(len(ms_peak_indexes))
      - 94        if mass_spectrum.molecular_search_settings.use_isotopologue_filter:
      - 95
      - 96            atoms_iso_filter = mass_spectrum.molecular_search_settings.isotopologue_filter_atoms
      - 97
      - 98            isotopologue_count_threshold = mass_spectrum.molecular_search_settings.isotopologue_filter_threshold
      + 50        all_index_to_remove = list(set(all_index_to_remove + index_to_remove))
      + 51
      + 52        for peak_index, mf_obj in all_index_to_remove:
      + 53            mass_spectrum_obj[peak_index].remove_molecular_formula(mf_obj)
      + 54
      + 55        return ms_peak_indexes
      + 56
      + 57    @staticmethod
      + 58    def check_min_peaks(ms_peak_indexes, mass_spectrum):
      + 59        """Check if the number of peaks per class meets the minimum requirement.
      + 60
      + 61        Parameters
      + 62        --------
      + 63        ms_peak_indexes : list
      + 64            List of peak indexes and their associated molecular formula objects.
      + 65        mass_spectrum_obj : MassSpectrum
      + 66            The mass spectrum object.
      + 67
      + 68        """
      + 69        if mass_spectrum.molecular_search_settings.use_min_peaks_filter:
      + 70            if (
      + 71                not len(ms_peak_indexes)
      + 72                >= mass_spectrum.molecular_search_settings.min_peaks_per_class
      + 73            ):
      + 74                for peak_index, mf_obj in ms_peak_indexes:
      + 75                    mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
      + 76
      + 77    @staticmethod
      + 78    def filter_isotopologue(ms_peak_indexes, mass_spectrum):
      + 79        """Apply isotopologue filter to the mass spectrum.
      + 80
      + 81        Parameters
      + 82        --------
      + 83        ms_peak_indexes : list
      + 84            List of peak indexes and their associated molecular formula objects.
      + 85        mass_spectrum_obj : MassSpectrum
      + 86            The mass spectrum object.
      + 87
      + 88        Returns
      + 89        ------------
      + 90        filtered_ms_peak_indexes : list
      + 91            List of peak indexes and their associated molecular formula objects after applying the isotopologue filter.
      + 92        """
      + 93        index_to_remove = []
      + 94        # print(len(ms_peak_indexes))
      + 95        if mass_spectrum.molecular_search_settings.use_isotopologue_filter:
      + 96            atoms_iso_filter = (
      + 97                mass_spectrum.molecular_search_settings.isotopologue_filter_atoms
      + 98            )
        99
      -100            for mspeak_index, mf_obj in ms_peak_indexes:
      -101            
      -102                if mf_obj.isotopologue_count_percentile < isotopologue_count_threshold:
      -103                    
      -104                    if set(mf_obj.atoms).intersection(atoms_iso_filter):
      -105                        
      -106                        #removes tuple obj from initial list to be used on next filter steps
      -107                        ms_peak_indexes.remove((mspeak_index, mf_obj))
      -108                        
      -109                        # current mf_obj
      -110                        index_to_remove.append((mspeak_index, mf_obj))
      -111                        # all other associated isotopolgues
      -112                        index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
      -113
      -114        #iterate over all indexes to be remove and remove the mf from the mspeak 
      -115        
      -116        #print(len(ms_peak_indexes))
      -117        for peak_index, mf_obj in index_to_remove:
      -118                #print(peak_index, mf_obj)
      -119                mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
      -120
      -121        
      -122        return ms_peak_indexes 
      +100            isotopologue_count_threshold = (
      +101                mass_spectrum.molecular_search_settings.isotopologue_filter_threshold
      +102            )
      +103
      +104            for mspeak_index, mf_obj in ms_peak_indexes:
      +105                if mf_obj.isotopologue_count_percentile < isotopologue_count_threshold:
      +106                    if set(mf_obj.atoms).intersection(atoms_iso_filter):
      +107                        # removes tuple obj from initial list to be used on next filter steps
      +108                        ms_peak_indexes.remove((mspeak_index, mf_obj))
      +109
      +110                        # current mf_obj
      +111                        index_to_remove.append((mspeak_index, mf_obj))
      +112                        # all other associated isotopolgues
      +113                        index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
      +114
      +115        # iterate over all indexes to be remove and remove the mf from the mspeak
      +116
      +117        # print(len(ms_peak_indexes))
      +118        for peak_index, mf_obj in index_to_remove:
      +119            # print(peak_index, mf_obj)
      +120            mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
      +121
      +122        return ms_peak_indexes
       
      @@ -203,126 +203,125 @@

      -
        4class MolecularFormulaSearchFilters:
      -  5    """ Class containing static methods for filtering molecular formulas in a mass spectrum.
      -  6      
      -  7    Methods
      -  8    -------
      -  9    * filter_kendrick(ms_peak_indexes, mass_spectrum_obj).   
      - 10        Apply Kendrick filter to the mass spectrum.  
      - 11    * check_min_peaks(ms_peak_indexes, mass_spectrum_obj).   
      - 12        Check if the number of peaks per class meets the minimum requirement.   
      - 13    * filter_isotopologue(ms_peak_indexes, mass_spectrum_obj).   
      - 14        Apply isotopologue filter to the mass spectrum.   
      - 15
      - 16    """
      - 17    @staticmethod
      - 18    def filter_kendrick( ms_peak_indexes, mass_spectrum_obj):
      - 19        """Apply Kendrick filter to the mass spectrum.
      - 20
      - 21        Parameters
      - 22        ----------
      - 23        ms_peak_indexes : list
      - 24            List of peak indexes and their associated molecular formula objects.
      - 25        mass_spectrum_obj : MassSpectrum 
      - 26            The mass spectrum object.
      - 27
      - 28        Returns
      - 29        -------
      - 30        filtered_ms_peak_indexes : list 
      - 31            List of peak indexes and their associated molecular formula objects after applying the Kendrick filter.
      - 32        """
      - 33        index_to_remove = []
      - 34        
      - 35        if mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter:
      - 36            
      - 37            index_to_remove = ClusteringFilter().filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj)
      - 38
      - 39            #for index in noise_indexes: self.mass_spectrum_obj[index].clear_molecular_formulas()
      - 40        
      - 41        all_index_to_remove = []
      - 42
      - 43        for peak_index, mf_obj in index_to_remove:
      - 44                
      - 45                ms_peak_indexes.remove((peak_index, mf_obj))
      - 46
      - 47                all_index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
      +            
        5class MolecularFormulaSearchFilters:
      +  6    """Class containing static methods for filtering molecular formulas in a mass spectrum.
      +  7
      +  8    Methods
      +  9    -------
      + 10    * filter_kendrick(ms_peak_indexes, mass_spectrum_obj).
      + 11        Apply Kendrick filter to the mass spectrum.
      + 12    * check_min_peaks(ms_peak_indexes, mass_spectrum_obj).
      + 13        Check if the number of peaks per class meets the minimum requirement.
      + 14    * filter_isotopologue(ms_peak_indexes, mass_spectrum_obj).
      + 15        Apply isotopologue filter to the mass spectrum.
      + 16
      + 17    """
      + 18
      + 19    @staticmethod
      + 20    def filter_kendrick(ms_peak_indexes, mass_spectrum_obj):
      + 21        """Apply Kendrick filter to the mass spectrum.
      + 22
      + 23        Parameters
      + 24        ----------
      + 25        ms_peak_indexes : list
      + 26            List of peak indexes and their associated molecular formula objects.
      + 27        mass_spectrum_obj : MassSpectrum
      + 28            The mass spectrum object.
      + 29
      + 30        Returns
      + 31        -------
      + 32        filtered_ms_peak_indexes : list
      + 33            List of peak indexes and their associated molecular formula objects after applying the Kendrick filter.
      + 34        """
      + 35        index_to_remove = []
      + 36
      + 37        if mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter:
      + 38            index_to_remove = ClusteringFilter().filter_kendrick_by_index(
      + 39                ms_peak_indexes, mass_spectrum_obj
      + 40            )
      + 41
      + 42            # for index in noise_indexes: self.mass_spectrum_obj[index].clear_molecular_formulas()
      + 43
      + 44        all_index_to_remove = []
      + 45
      + 46        for peak_index, mf_obj in index_to_remove:
      + 47            ms_peak_indexes.remove((peak_index, mf_obj))
        48
      - 49        all_index_to_remove =  list(set(all_index_to_remove + index_to_remove))
      + 49            all_index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
        50
      - 51        for peak_index, mf_obj in all_index_to_remove:
      - 52            
      - 53            mass_spectrum_obj[peak_index].remove_molecular_formula(mf_obj)
      - 54            
      - 55        return ms_peak_indexes
      - 56
      - 57    @staticmethod
      - 58    def check_min_peaks( ms_peak_indexes, mass_spectrum):
      - 59        """ Check if the number of peaks per class meets the minimum requirement.
      - 60
      - 61        Parameters
      - 62        --------
      - 63        ms_peak_indexes : list
      - 64            List of peak indexes and their associated molecular formula objects.
      - 65        mass_spectrum_obj : MassSpectrum 
      - 66            The mass spectrum object.
      - 67
      - 68        """
      - 69        if mass_spectrum.molecular_search_settings.use_min_peaks_filter:
      - 70
      - 71            if not len(ms_peak_indexes) >= mass_spectrum.molecular_search_settings.min_peaks_per_class:
      - 72                
      - 73                for peak_index, mf_obj in ms_peak_indexes:
      - 74                
      - 75                    mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
      - 76
      - 77    @staticmethod
      - 78    def filter_isotopologue( ms_peak_indexes, mass_spectrum):
      - 79        """ Apply isotopologue filter to the mass spectrum.
      - 80
      - 81        Parameters
      - 82        --------
      - 83        ms_peak_indexes : list
      - 84            List of peak indexes and their associated molecular formula objects.
      - 85        mass_spectrum_obj : MassSpectrum 
      - 86            The mass spectrum object.
      - 87
      - 88        Returns
      - 89        ------------
      - 90        filtered_ms_peak_indexes : list
      - 91            List of peak indexes and their associated molecular formula objects after applying the isotopologue filter.
      - 92        """
      - 93        index_to_remove = []
      - 94        #print(len(ms_peak_indexes))
      - 95        if mass_spectrum.molecular_search_settings.use_isotopologue_filter:
      - 96
      - 97            atoms_iso_filter = mass_spectrum.molecular_search_settings.isotopologue_filter_atoms
      - 98
      - 99            isotopologue_count_threshold = mass_spectrum.molecular_search_settings.isotopologue_filter_threshold
      + 51        all_index_to_remove = list(set(all_index_to_remove + index_to_remove))
      + 52
      + 53        for peak_index, mf_obj in all_index_to_remove:
      + 54            mass_spectrum_obj[peak_index].remove_molecular_formula(mf_obj)
      + 55
      + 56        return ms_peak_indexes
      + 57
      + 58    @staticmethod
      + 59    def check_min_peaks(ms_peak_indexes, mass_spectrum):
      + 60        """Check if the number of peaks per class meets the minimum requirement.
      + 61
      + 62        Parameters
      + 63        --------
      + 64        ms_peak_indexes : list
      + 65            List of peak indexes and their associated molecular formula objects.
      + 66        mass_spectrum_obj : MassSpectrum
      + 67            The mass spectrum object.
      + 68
      + 69        """
      + 70        if mass_spectrum.molecular_search_settings.use_min_peaks_filter:
      + 71            if (
      + 72                not len(ms_peak_indexes)
      + 73                >= mass_spectrum.molecular_search_settings.min_peaks_per_class
      + 74            ):
      + 75                for peak_index, mf_obj in ms_peak_indexes:
      + 76                    mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
      + 77
      + 78    @staticmethod
      + 79    def filter_isotopologue(ms_peak_indexes, mass_spectrum):
      + 80        """Apply isotopologue filter to the mass spectrum.
      + 81
      + 82        Parameters
      + 83        --------
      + 84        ms_peak_indexes : list
      + 85            List of peak indexes and their associated molecular formula objects.
      + 86        mass_spectrum_obj : MassSpectrum
      + 87            The mass spectrum object.
      + 88
      + 89        Returns
      + 90        ------------
      + 91        filtered_ms_peak_indexes : list
      + 92            List of peak indexes and their associated molecular formula objects after applying the isotopologue filter.
      + 93        """
      + 94        index_to_remove = []
      + 95        # print(len(ms_peak_indexes))
      + 96        if mass_spectrum.molecular_search_settings.use_isotopologue_filter:
      + 97            atoms_iso_filter = (
      + 98                mass_spectrum.molecular_search_settings.isotopologue_filter_atoms
      + 99            )
       100
      -101            for mspeak_index, mf_obj in ms_peak_indexes:
      -102            
      -103                if mf_obj.isotopologue_count_percentile < isotopologue_count_threshold:
      -104                    
      -105                    if set(mf_obj.atoms).intersection(atoms_iso_filter):
      -106                        
      -107                        #removes tuple obj from initial list to be used on next filter steps
      -108                        ms_peak_indexes.remove((mspeak_index, mf_obj))
      -109                        
      -110                        # current mf_obj
      -111                        index_to_remove.append((mspeak_index, mf_obj))
      -112                        # all other associated isotopolgues
      -113                        index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
      -114
      -115        #iterate over all indexes to be remove and remove the mf from the mspeak 
      -116        
      -117        #print(len(ms_peak_indexes))
      -118        for peak_index, mf_obj in index_to_remove:
      -119                #print(peak_index, mf_obj)
      -120                mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
      -121
      -122        
      -123        return ms_peak_indexes 
      +101            isotopologue_count_threshold = (
      +102                mass_spectrum.molecular_search_settings.isotopologue_filter_threshold
      +103            )
      +104
      +105            for mspeak_index, mf_obj in ms_peak_indexes:
      +106                if mf_obj.isotopologue_count_percentile < isotopologue_count_threshold:
      +107                    if set(mf_obj.atoms).intersection(atoms_iso_filter):
      +108                        # removes tuple obj from initial list to be used on next filter steps
      +109                        ms_peak_indexes.remove((mspeak_index, mf_obj))
      +110
      +111                        # current mf_obj
      +112                        index_to_remove.append((mspeak_index, mf_obj))
      +113                        # all other associated isotopolgues
      +114                        index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
      +115
      +116        # iterate over all indexes to be remove and remove the mf from the mspeak
      +117
      +118        # print(len(ms_peak_indexes))
      +119        for peak_index, mf_obj in index_to_remove:
      +120            # print(peak_index, mf_obj)
      +121            mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
      +122
      +123        return ms_peak_indexes
       
      @@ -331,11 +330,11 @@

      Methods
        -
      • filter_kendrick(ms_peak_indexes, mass_spectrum_obj).
        -Apply Kendrick filter to the mass spectrum.
      • -
      • check_min_peaks(ms_peak_indexes, mass_spectrum_obj).
        -Check if the number of peaks per class meets the minimum requirement.
      • -
      • filter_isotopologue(ms_peak_indexes, mass_spectrum_obj).
        +
      • filter_kendrick(ms_peak_indexes, mass_spectrum_obj). +Apply Kendrick filter to the mass spectrum.
      • +
      • check_min_peaks(ms_peak_indexes, mass_spectrum_obj). +Check if the number of peaks per class meets the minimum requirement.
      • +
      • filter_isotopologue(ms_peak_indexes, mass_spectrum_obj). Apply isotopologue filter to the mass spectrum.
      @@ -353,45 +352,44 @@

      Methods
    -
    17    @staticmethod
    -18    def filter_kendrick( ms_peak_indexes, mass_spectrum_obj):
    -19        """Apply Kendrick filter to the mass spectrum.
    -20
    -21        Parameters
    -22        ----------
    -23        ms_peak_indexes : list
    -24            List of peak indexes and their associated molecular formula objects.
    -25        mass_spectrum_obj : MassSpectrum 
    -26            The mass spectrum object.
    -27
    -28        Returns
    -29        -------
    -30        filtered_ms_peak_indexes : list 
    -31            List of peak indexes and their associated molecular formula objects after applying the Kendrick filter.
    -32        """
    -33        index_to_remove = []
    -34        
    -35        if mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter:
    -36            
    -37            index_to_remove = ClusteringFilter().filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj)
    -38
    -39            #for index in noise_indexes: self.mass_spectrum_obj[index].clear_molecular_formulas()
    -40        
    -41        all_index_to_remove = []
    -42
    -43        for peak_index, mf_obj in index_to_remove:
    -44                
    -45                ms_peak_indexes.remove((peak_index, mf_obj))
    -46
    -47                all_index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
    +            
    19    @staticmethod
    +20    def filter_kendrick(ms_peak_indexes, mass_spectrum_obj):
    +21        """Apply Kendrick filter to the mass spectrum.
    +22
    +23        Parameters
    +24        ----------
    +25        ms_peak_indexes : list
    +26            List of peak indexes and their associated molecular formula objects.
    +27        mass_spectrum_obj : MassSpectrum
    +28            The mass spectrum object.
    +29
    +30        Returns
    +31        -------
    +32        filtered_ms_peak_indexes : list
    +33            List of peak indexes and their associated molecular formula objects after applying the Kendrick filter.
    +34        """
    +35        index_to_remove = []
    +36
    +37        if mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter:
    +38            index_to_remove = ClusteringFilter().filter_kendrick_by_index(
    +39                ms_peak_indexes, mass_spectrum_obj
    +40            )
    +41
    +42            # for index in noise_indexes: self.mass_spectrum_obj[index].clear_molecular_formulas()
    +43
    +44        all_index_to_remove = []
    +45
    +46        for peak_index, mf_obj in index_to_remove:
    +47            ms_peak_indexes.remove((peak_index, mf_obj))
     48
    -49        all_index_to_remove =  list(set(all_index_to_remove + index_to_remove))
    +49            all_index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
     50
    -51        for peak_index, mf_obj in all_index_to_remove:
    -52            
    -53            mass_spectrum_obj[peak_index].remove_molecular_formula(mf_obj)
    -54            
    -55        return ms_peak_indexes
    +51        all_index_to_remove = list(set(all_index_to_remove + index_to_remove))
    +52
    +53        for peak_index, mf_obj in all_index_to_remove:
    +54            mass_spectrum_obj[peak_index].remove_molecular_formula(mf_obj)
    +55
    +56        return ms_peak_indexes
     
    @@ -428,25 +426,25 @@
    Returns
    -
    57    @staticmethod
    -58    def check_min_peaks( ms_peak_indexes, mass_spectrum):
    -59        """ Check if the number of peaks per class meets the minimum requirement.
    -60
    -61        Parameters
    -62        --------
    -63        ms_peak_indexes : list
    -64            List of peak indexes and their associated molecular formula objects.
    -65        mass_spectrum_obj : MassSpectrum 
    -66            The mass spectrum object.
    -67
    -68        """
    -69        if mass_spectrum.molecular_search_settings.use_min_peaks_filter:
    -70
    -71            if not len(ms_peak_indexes) >= mass_spectrum.molecular_search_settings.min_peaks_per_class:
    -72                
    -73                for peak_index, mf_obj in ms_peak_indexes:
    -74                
    -75                    mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
    +            
    58    @staticmethod
    +59    def check_min_peaks(ms_peak_indexes, mass_spectrum):
    +60        """Check if the number of peaks per class meets the minimum requirement.
    +61
    +62        Parameters
    +63        --------
    +64        ms_peak_indexes : list
    +65            List of peak indexes and their associated molecular formula objects.
    +66        mass_spectrum_obj : MassSpectrum
    +67            The mass spectrum object.
    +68
    +69        """
    +70        if mass_spectrum.molecular_search_settings.use_min_peaks_filter:
    +71            if (
    +72                not len(ms_peak_indexes)
    +73                >= mass_spectrum.molecular_search_settings.min_peaks_per_class
    +74            ):
    +75                for peak_index, mf_obj in ms_peak_indexes:
    +76                    mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
     
    @@ -476,53 +474,52 @@
    Parameters
    -
     77    @staticmethod
    - 78    def filter_isotopologue( ms_peak_indexes, mass_spectrum):
    - 79        """ Apply isotopologue filter to the mass spectrum.
    - 80
    - 81        Parameters
    - 82        --------
    - 83        ms_peak_indexes : list
    - 84            List of peak indexes and their associated molecular formula objects.
    - 85        mass_spectrum_obj : MassSpectrum 
    - 86            The mass spectrum object.
    - 87
    - 88        Returns
    - 89        ------------
    - 90        filtered_ms_peak_indexes : list
    - 91            List of peak indexes and their associated molecular formula objects after applying the isotopologue filter.
    - 92        """
    - 93        index_to_remove = []
    - 94        #print(len(ms_peak_indexes))
    - 95        if mass_spectrum.molecular_search_settings.use_isotopologue_filter:
    - 96
    - 97            atoms_iso_filter = mass_spectrum.molecular_search_settings.isotopologue_filter_atoms
    - 98
    - 99            isotopologue_count_threshold = mass_spectrum.molecular_search_settings.isotopologue_filter_threshold
    +            
     78    @staticmethod
    + 79    def filter_isotopologue(ms_peak_indexes, mass_spectrum):
    + 80        """Apply isotopologue filter to the mass spectrum.
    + 81
    + 82        Parameters
    + 83        --------
    + 84        ms_peak_indexes : list
    + 85            List of peak indexes and their associated molecular formula objects.
    + 86        mass_spectrum_obj : MassSpectrum
    + 87            The mass spectrum object.
    + 88
    + 89        Returns
    + 90        ------------
    + 91        filtered_ms_peak_indexes : list
    + 92            List of peak indexes and their associated molecular formula objects after applying the isotopologue filter.
    + 93        """
    + 94        index_to_remove = []
    + 95        # print(len(ms_peak_indexes))
    + 96        if mass_spectrum.molecular_search_settings.use_isotopologue_filter:
    + 97            atoms_iso_filter = (
    + 98                mass_spectrum.molecular_search_settings.isotopologue_filter_atoms
    + 99            )
     100
    -101            for mspeak_index, mf_obj in ms_peak_indexes:
    -102            
    -103                if mf_obj.isotopologue_count_percentile < isotopologue_count_threshold:
    -104                    
    -105                    if set(mf_obj.atoms).intersection(atoms_iso_filter):
    -106                        
    -107                        #removes tuple obj from initial list to be used on next filter steps
    -108                        ms_peak_indexes.remove((mspeak_index, mf_obj))
    -109                        
    -110                        # current mf_obj
    -111                        index_to_remove.append((mspeak_index, mf_obj))
    -112                        # all other associated isotopolgues
    -113                        index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
    -114
    -115        #iterate over all indexes to be remove and remove the mf from the mspeak 
    -116        
    -117        #print(len(ms_peak_indexes))
    -118        for peak_index, mf_obj in index_to_remove:
    -119                #print(peak_index, mf_obj)
    -120                mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
    -121
    -122        
    -123        return ms_peak_indexes 
    +101            isotopologue_count_threshold = (
    +102                mass_spectrum.molecular_search_settings.isotopologue_filter_threshold
    +103            )
    +104
    +105            for mspeak_index, mf_obj in ms_peak_indexes:
    +106                if mf_obj.isotopologue_count_percentile < isotopologue_count_threshold:
    +107                    if set(mf_obj.atoms).intersection(atoms_iso_filter):
    +108                        # removes tuple obj from initial list to be used on next filter steps
    +109                        ms_peak_indexes.remove((mspeak_index, mf_obj))
    +110
    +111                        # current mf_obj
    +112                        index_to_remove.append((mspeak_index, mf_obj))
    +113                        # all other associated isotopolgues
    +114                        index_to_remove.extend(mf_obj.mspeak_mf_isotopologues_indexes)
    +115
    +116        # iterate over all indexes to be remove and remove the mf from the mspeak
    +117
    +118        # print(len(ms_peak_indexes))
    +119        for peak_index, mf_obj in index_to_remove:
    +120            # print(peak_index, mf_obj)
    +121            mass_spectrum[peak_index].remove_molecular_formula(mf_obj)
    +122
    +123        return ms_peak_indexes
     
    diff --git a/docs/corems/molecular_id/calc/SpectralSimilarity.html b/docs/corems/molecular_id/calc/SpectralSimilarity.html index 1d2d8011..f6d35533 100644 --- a/docs/corems/molecular_id/calc/SpectralSimilarity.html +++ b/docs/corems/molecular_id/calc/SpectralSimilarity.html @@ -148,622 +148,634 @@

    2__date__ = "Jun 09, 2021" 3 4from numpy.fft import rfft - 5from scipy.spatial.distance import cosine, jaccard, euclidean, cityblock - 6from scipy.stats import pearsonr, spearmanr, kendalltau - 7from sklearn.metrics.pairwise import cosine_similarity - 8from numpy import power, dot, absolute, subtract, intersect1d, where, average, corrcoef, sqrt - 9from numpy import sum as np_sum - 10from numpy.linalg import norm - 11from pandas import DataFrame - 12import numpy as np - 13 - 14methods_name = { - 15 #"entropy_distance": "Entropy Distance", - 16 #"weighted_entropy_distance": "Dynamic weighted entropy Distance", - 17 "chebyshev_distance": "Chebyshev Distance", - 18 "squared_euclidean_distance": "Squared Euclidean Distance", - 19 "fidelity_similarity": "Fidelity Similarity", - 20 "matusita_distance": "Matusita Distance", - 21 "squared_chord_distance": "Squared-chord Distance", - 22 #"bhattacharya_1_distance": "Bhattacharya 1 Distance", - 23 #"bhattacharya_2_distance": "Bhattacharya 2 Distance", - 24 "harmonic_mean_similarity": "Harmonic mean Distance", - 25 "Pearson_chi_squared_distance": "Pearson Chi Squared Distance", - 26 "Neyman_chi_squared_distance": "Neyman Chi Squared Distance", - 27 "probabilistic_symmetric_chi_squared_distance": "Probabilistic symmetric X2 Distance", - 28 "topsoe_distance": "Topsoe Distance", - 29 "chernoff_distance": "Chernoff Distance", - 30 "ruzicka_distance": "Ruzicka Distance", - 31 "roberts_distance": "Roberts Distance", - 32 #"intersection_distance": "Intersection Distance", - 33 "motyka_distance": "Motyka Distance", - 34 "canberra_distance": "Canberra Distance", - 35 "canberra_metric": "Canberra Metric", - 36 "kulczynski_1_distance": "Kulczynski 1 Distance", - 37 #"baroni_urbani_buser_distance": "Baroni-Urbani-Buser Distance", - 38 #"penrose_size_distance": "Penrose size Distance", - 39 #"mean_character_distance": "Mean character Distance", - 40 "lorentzian_distance": "Lorentzian Distance", - 41 #"penrose_shape_distance": "Penrose shape Distance", - 42 "clark_distance": "Clark Distance", - 43 "hellinger_distance": "Hellinger Distance", - 44 "whittaker_index_of_association_distance": "Whittaker index of association Distance", - 45 #"similarity_index_distance": "Similarity Index Distance", - 46 #"improved_similarity_distance": "Improved Similarity", - 47 #"absolute_value_distance": "Absolute Value Distance", - 48 "spectral_contrast_angle_distance": "Spectral Contrast Angle", - 49 "wave_hedges_distance": "Wave Hedges Distance", - 50 "dice_similarity": "Dice Similarity", - 51 "inner_product_distance": "Inner Product Distance", - 52 "divergence_distance": "Divergence Distance", - 53 "jensen_difference_distance": "Jensen Differences Distance", - 54 "kumar_johnson_distance": "Kumar Johnson Distance", - 55 "avg_l_distance": "Avg (L1, L8) Distance", - 56 "vicis_wave_hadges_distance": "Vicis Wave Hadges Distance", - 57 "vicis_symmetric_chi_squared_1_distance": "Vicis-Symmetric X2 1 Distance", - 58 "vicis_symmetric_chi_squared_2_distance": "Vicis-Symmetric X2 2 Distance", - 59 "vicis_symmetric_chi_squared_3_distance": "Vicis-Symmetric X2 3 Distance", - 60 "max_symmetric_chi_squared_distance": "Max Symmetric Chi Squared Distance", - 61 "min_symmetric_chi_squared_distance": "Min Symmetric Chi Squared Distance", - 62 #"ms_for_id_v1": "MSforID Distance version 1", - 63 #"ms_for_id": "MSforID Distance", - 64 "additive_sym_chi_sq": "Additive Symmetric Chi Squared", - 65 "bhattacharya_distance": "Battacharya Distance", - 66 "generalized_ochiai_index": "Generalized Ochiai Index", - 67 "gower_distance": "Gower Distance", - 68 "impr_sqrt_cosine_sim": "Improved Square Root Cosine Similarity", - 69 "intersection_sim": "Intersection Similarity", - 70 "j_divergence": "J Divergence", - 71 "jensen_shannon_index": "Jensen Shannon Index", - 72 "k_divergence": "K Divergence", - 73 "VW6": "VW6", - 74 "VW5": "VW5", - 75 "VW4": "VW4", - 76 "VW3": "VW3", - 77 "VW2": "VW2", - 78 "VW1": "VW1", - 79 "taneja_divergence": "Taneja Divergence", - 80 "symmetric_chi_squared_distance": "Symmetric Chi Squared Distance", - 81 "squared_chi_squared_distance": "Squared Chi Squared Distance", - 82 "square_root_cosine_correlation": "Square Root Cosine Correlation", - 83 "sorensen_distance": "Sorensen Distance", - 84 "Minokowski_3": "Minokowski 3 Distance", - 85 "Minokowski_4": "Minokowski 4 Distance", - 86 "kumarjohnson_divergence": "Kumar Johnson Divergence", - 87 "kumarhassebrook_similarity": "Kumar Hassebrook Similarity", - 88 "kullbackleibler_divergence": "Kullback Leibler Divergence", - 89 "soergel_distance": "Soergel Distance", - 90} - 91 - 92methods_scale = { - 93 "entropy": [0, np.log(4)], - 94 "weighted_entropy": [0, np.log(4)], - 95 "absolute_value": [0, 2], - 96 "avg_l": [0, 1.5], - 97 "bhattacharya_1": [0, np.arccos(0) ** 2], - 98 "bhattacharya_2": [0, np.inf], - 99 "canberra": [0, np.inf], -100 "clark": [0, np.inf], -101 "divergence": [0, np.inf], -102 "euclidean": [0, np.sqrt(2)], -103 "hellinger": [0, np.inf], -104 "improved_similarity": [0, np.inf], -105 "lorentzian": [0, np.inf], -106 "manhattan": [0, 2], -107 "matusita": [0, np.sqrt(2)], -108 "mean_character": [0, 2], -109 "motyka": [-0.5, 0], -110 "ms_for_id": [-np.inf, 0], -111 "ms_for_id_v1": [0, np.inf], -112 "pearson_correlation": [-1, 1], -113 "penrose_shape": [0, np.sqrt(2)], -114 "penrose_size": [0, np.inf], -115 "probabilistic_symmetric_chi_squared": [0, 1], -116 "similarity_index": [0, np.inf], -117 "squared_chord": [0, 2], -118 "squared_euclidean": [0, 2], -119 "symmetric_chi_squared": [0, 0.5 * np.sqrt(2)], -120 "topsoe": [0, np.sqrt(2)], -121 "vicis_symmetric_chi_squared_3": [0, 2], -122 "wave_hedges": [0, np.inf], -123 "whittaker_index_of_association": [0, np.inf] -124} -125 -126class SpectralSimilarity(): -127 """ Class containing methods for calculating spectral similarity between two mass spectra. -128 -129 Parameters -130 ---------- -131 ms_mz_abun_dict : dict -132 Dictionary of mass to abundance values for the experimental mass spectrum. -133 ref_obj : dict -134 Dictionary of mass to abundance values for the reference mass spectrum. -135 norm_func : function -136 Function to normalize the abundance values. -137 -138 Attributes -139 ---------- -140 normalize_func : function -141 Function to normalize the abundance values. -142 ms_mz_abun_dict : dict -143 Dictionary of mass to abundance values for the experimental mass spectrum. -144 ref_obj : dict -145 Dictionary of mass to abundance values for the reference mass spectrum. -146 exp_abun : list -147 List of abundance values for the experimental mass spectrum. -148 exp_mz : list -149 List of mass values for the experimental mass spectrum. -150 ref_mz : list -151 List of mass values for the reference mass spectrum. -152 ref_abun : list -153 List of abundance values for the reference mass spectrum. -154 ref_mz_abun_dict : dict -155 Dictionary of mass to abundance values for the reference mass spectrum. -156 df : DataFrame -157 DataFrame containing the experimental and reference mass spectrum data. -158 zero_filled_u_l : tuple -159 Tuple containing the experimental and reference mass spectrum data after zero filling and normalization. -160 common_mz_values : list -161 List of common mass values between the experimental and reference mass spectra. -162 n_x_y : int -163 Number of common mass values between the experimental and reference mass spectra. -164 -165 Methods -166 ------- -167 * nan_fill(df, fill_with=0). -168 Fill missing mass values with a given value. -169 * normalize(x, y, norm_func=sum). -170 Normalize the abundance values. -171 * weighted_cosine_correlation(a=0.5, b=1.3, nanfill=1e-10). -172 Calculate the weighted cosine correlation between the experimental and reference mass spectra. -173 * cosine_correlation(). -174 Calculate the cosine correlation between the experimental and reference mass spectra. -175 * stein_scott(). -176 Calculate the Stein-Scott similarity between the experimental and reference mass spectra. -177 * pearson_correlation(). -178 Calculate the Pearson correlation between the experimental and reference mass spectra. -179 * spearman_correlation(). -180 Calculate the Spearman correlation between the experimental and reference mass spectra. -181 -182 -183 """ -184 def __init__(self, ms_mz_abun_dict, ref_obj, norm_func=sum): + 5from scipy.stats import pearsonr, spearmanr, kendalltau + 6from numpy import ( + 7 power, + 8 dot, + 9 absolute, + 10 sqrt, + 11) + 12from numpy import sum as np_sum + 13from numpy.linalg import norm + 14from pandas import DataFrame + 15import numpy as np + 16 + 17methods_name = { + 18 # "entropy_distance": "Entropy Distance", + 19 # "weighted_entropy_distance": "Dynamic weighted entropy Distance", + 20 "chebyshev_distance": "Chebyshev Distance", + 21 "squared_euclidean_distance": "Squared Euclidean Distance", + 22 "fidelity_similarity": "Fidelity Similarity", + 23 "matusita_distance": "Matusita Distance", + 24 "squared_chord_distance": "Squared-chord Distance", + 25 # "bhattacharya_1_distance": "Bhattacharya 1 Distance", + 26 # "bhattacharya_2_distance": "Bhattacharya 2 Distance", + 27 "harmonic_mean_similarity": "Harmonic mean Distance", + 28 "Pearson_chi_squared_distance": "Pearson Chi Squared Distance", + 29 "Neyman_chi_squared_distance": "Neyman Chi Squared Distance", + 30 "probabilistic_symmetric_chi_squared_distance": "Probabilistic symmetric X2 Distance", + 31 "topsoe_distance": "Topsoe Distance", + 32 "chernoff_distance": "Chernoff Distance", + 33 "ruzicka_distance": "Ruzicka Distance", + 34 "roberts_distance": "Roberts Distance", + 35 # "intersection_distance": "Intersection Distance", + 36 "motyka_distance": "Motyka Distance", + 37 "canberra_distance": "Canberra Distance", + 38 "canberra_metric": "Canberra Metric", + 39 "kulczynski_1_distance": "Kulczynski 1 Distance", + 40 # "baroni_urbani_buser_distance": "Baroni-Urbani-Buser Distance", + 41 # "penrose_size_distance": "Penrose size Distance", + 42 # "mean_character_distance": "Mean character Distance", + 43 "lorentzian_distance": "Lorentzian Distance", + 44 # "penrose_shape_distance": "Penrose shape Distance", + 45 "clark_distance": "Clark Distance", + 46 "hellinger_distance": "Hellinger Distance", + 47 "whittaker_index_of_association_distance": "Whittaker index of association Distance", + 48 # "similarity_index_distance": "Similarity Index Distance", + 49 # "improved_similarity_distance": "Improved Similarity", + 50 # "absolute_value_distance": "Absolute Value Distance", + 51 "spectral_contrast_angle_distance": "Spectral Contrast Angle", + 52 "wave_hedges_distance": "Wave Hedges Distance", + 53 "dice_similarity": "Dice Similarity", + 54 "inner_product_distance": "Inner Product Distance", + 55 "divergence_distance": "Divergence Distance", + 56 "jensen_difference_distance": "Jensen Differences Distance", + 57 "kumar_johnson_distance": "Kumar Johnson Distance", + 58 "avg_l_distance": "Avg (L1, L8) Distance", + 59 "vicis_wave_hadges_distance": "Vicis Wave Hadges Distance", + 60 "vicis_symmetric_chi_squared_1_distance": "Vicis-Symmetric X2 1 Distance", + 61 "vicis_symmetric_chi_squared_2_distance": "Vicis-Symmetric X2 2 Distance", + 62 "vicis_symmetric_chi_squared_3_distance": "Vicis-Symmetric X2 3 Distance", + 63 "max_symmetric_chi_squared_distance": "Max Symmetric Chi Squared Distance", + 64 "min_symmetric_chi_squared_distance": "Min Symmetric Chi Squared Distance", + 65 # "ms_for_id_v1": "MSforID Distance version 1", + 66 # "ms_for_id": "MSforID Distance", + 67 "additive_sym_chi_sq": "Additive Symmetric Chi Squared", + 68 "bhattacharya_distance": "Battacharya Distance", + 69 "generalized_ochiai_index": "Generalized Ochiai Index", + 70 "gower_distance": "Gower Distance", + 71 "impr_sqrt_cosine_sim": "Improved Square Root Cosine Similarity", + 72 "intersection_sim": "Intersection Similarity", + 73 "j_divergence": "J Divergence", + 74 "jensen_shannon_index": "Jensen Shannon Index", + 75 "k_divergence": "K Divergence", + 76 "VW6": "VW6", + 77 "VW5": "VW5", + 78 "VW4": "VW4", + 79 "VW3": "VW3", + 80 "VW2": "VW2", + 81 "VW1": "VW1", + 82 "taneja_divergence": "Taneja Divergence", + 83 "symmetric_chi_squared_distance": "Symmetric Chi Squared Distance", + 84 "squared_chi_squared_distance": "Squared Chi Squared Distance", + 85 "square_root_cosine_correlation": "Square Root Cosine Correlation", + 86 "sorensen_distance": "Sorensen Distance", + 87 "Minokowski_3": "Minokowski 3 Distance", + 88 "Minokowski_4": "Minokowski 4 Distance", + 89 "kumarjohnson_divergence": "Kumar Johnson Divergence", + 90 "kumarhassebrook_similarity": "Kumar Hassebrook Similarity", + 91 "kullbackleibler_divergence": "Kullback Leibler Divergence", + 92 "soergel_distance": "Soergel Distance", + 93} + 94 + 95methods_scale = { + 96 "entropy": [0, np.log(4)], + 97 "weighted_entropy": [0, np.log(4)], + 98 "absolute_value": [0, 2], + 99 "avg_l": [0, 1.5], +100 "bhattacharya_1": [0, np.arccos(0) ** 2], +101 "bhattacharya_2": [0, np.inf], +102 "canberra": [0, np.inf], +103 "clark": [0, np.inf], +104 "divergence": [0, np.inf], +105 "euclidean": [0, np.sqrt(2)], +106 "hellinger": [0, np.inf], +107 "improved_similarity": [0, np.inf], +108 "lorentzian": [0, np.inf], +109 "manhattan": [0, 2], +110 "matusita": [0, np.sqrt(2)], +111 "mean_character": [0, 2], +112 "motyka": [-0.5, 0], +113 "ms_for_id": [-np.inf, 0], +114 "ms_for_id_v1": [0, np.inf], +115 "pearson_correlation": [-1, 1], +116 "penrose_shape": [0, np.sqrt(2)], +117 "penrose_size": [0, np.inf], +118 "probabilistic_symmetric_chi_squared": [0, 1], +119 "similarity_index": [0, np.inf], +120 "squared_chord": [0, 2], +121 "squared_euclidean": [0, 2], +122 "symmetric_chi_squared": [0, 0.5 * np.sqrt(2)], +123 "topsoe": [0, np.sqrt(2)], +124 "vicis_symmetric_chi_squared_3": [0, 2], +125 "wave_hedges": [0, np.inf], +126 "whittaker_index_of_association": [0, np.inf], +127} +128 +129 +130class SpectralSimilarity: +131 """Class containing methods for calculating spectral similarity between two mass spectra. +132 +133 Parameters +134 ---------- +135 ms_mz_abun_dict : dict +136 Dictionary of mass to abundance values for the experimental mass spectrum. +137 ref_obj : dict +138 Dictionary of mass to abundance values for the reference mass spectrum. +139 norm_func : function +140 Function to normalize the abundance values. +141 +142 Attributes +143 ---------- +144 normalize_func : function +145 Function to normalize the abundance values. +146 ms_mz_abun_dict : dict +147 Dictionary of mass to abundance values for the experimental mass spectrum. +148 ref_obj : dict +149 Dictionary of mass to abundance values for the reference mass spectrum. +150 exp_abun : list +151 List of abundance values for the experimental mass spectrum. +152 exp_mz : list +153 List of mass values for the experimental mass spectrum. +154 ref_mz : list +155 List of mass values for the reference mass spectrum. +156 ref_abun : list +157 List of abundance values for the reference mass spectrum. +158 ref_mz_abun_dict : dict +159 Dictionary of mass to abundance values for the reference mass spectrum. +160 df : DataFrame +161 DataFrame containing the experimental and reference mass spectrum data. +162 zero_filled_u_l : tuple +163 Tuple containing the experimental and reference mass spectrum data after zero filling and normalization. +164 common_mz_values : list +165 List of common mass values between the experimental and reference mass spectra. +166 n_x_y : int +167 Number of common mass values between the experimental and reference mass spectra. +168 +169 Methods +170 ------- +171 * nan_fill(df, fill_with=0). +172 Fill missing mass values with a given value. +173 * normalize(x, y, norm_func=sum). +174 Normalize the abundance values. +175 * weighted_cosine_correlation(a=0.5, b=1.3, nanfill=1e-10). +176 Calculate the weighted cosine correlation between the experimental and reference mass spectra. +177 * cosine_correlation(). +178 Calculate the cosine correlation between the experimental and reference mass spectra. +179 * stein_scott(). +180 Calculate the Stein-Scott similarity between the experimental and reference mass spectra. +181 * pearson_correlation(). +182 Calculate the Pearson correlation between the experimental and reference mass spectra. +183 * spearman_correlation(). +184 Calculate the Spearman correlation between the experimental and reference mass spectra. 185 -186 self.normalize_func = norm_func -187 self.ms_mz_abun_dict = ms_mz_abun_dict -188 self.ref_obj = ref_obj -189 -190 self.exp_abun = list(self.ms_mz_abun_dict.values()) -191 self.exp_mz = list(self.ms_mz_abun_dict.keys()) -192 -193 self.ref_mz = self.ref_obj.get("mz") -194 self.ref_abun = self.ref_obj.get("abundance") -195 -196 self.ref_mz_abun_dict = dict(zip(self.ref_mz, self.ref_abun)) -197 -198 # parse to dataframe, easier to zerofill and tranpose -199 self.df = DataFrame([self.ms_mz_abun_dict, self.ref_mz_abun_dict]) -200 -201 # fill missing mz with abundance 0 -202 x, y = self.nan_fill(self.df, fill_with=1e-10) -203 -204 self.zero_filled_u_l = self.normalize(x, y, norm_func=self.normalize_func) -205 -206 # filter out the mass values that have zero intensities in self.exp_abun -207 exp_mz_filtered = set([k for k in self.exp_mz if self.ms_mz_abun_dict[k] != 0]) -208 -209 # filter out the mass values that have zero intensities in self.ref_mz -210 ref_mz_filtered = set([k for k in self.ref_mz if self.ref_mz_abun_dict[k] != 0]) -211 -212 # find the intersection/common mass values of both ref and exp, and sort them -213 self.common_mz_values = sorted(list(exp_mz_filtered.intersection(ref_mz_filtered))) -214 -215 # find the number of common mass values (after filtering 0s) -216 self.n_x_y = len(self.common_mz_values) -217 # print(self.n_x_y) -218 -219 def nan_fill(self, df, fill_with=0): -220 """ Fill missing mass values with a given value. -221 -222 Parameters -223 ---------- -224 df : DataFrame -225 DataFrame containing the experimental and reference mass spectrum data. -226 fill_with : float -227 Value to fill missing mass values with. -228 -229 Returns -230 ------- -231 x : list -232 List of abundance values for the experimental mass spectrum. -233 y : list -234 List of abundance values for the reference mass spectrum.""" -235 df.fillna(fill_with, inplace=True) -236 -237 return df.T[0].values, df.T[1].values -238 -239 def normalize(self, x, y, norm_func=sum): -240 """ Normalize the abundance values. -241 -242 Parameters -243 ---------- -244 x : list -245 List of abundance values for the experimental mass spectrum. -246 y : list -247 List of abundance values for the reference mass spectrum. -248 norm_func : function -249 Function to normalize the abundance values. -250 Default is sum -251 -252 Returns -253 ------- -254 u_l : tuple -255 Tuple containing the experimental and reference mass spectrum data after zero filling and normalization. -256 """ -257 if norm_func is not None: -258 u_l = (x / norm_func(x), y / norm_func(y)) -259 return u_l -260 else: -261 return (x, y) -262 -263 def weighted_cosine_correlation(self, a=0.5, b=1.3, nanfill=1e-10): -264 """ Calculate the weighted cosine correlation between the experimental and reference mass spectra. -265 -266 Parameters -267 ---------- -268 a : float -269 Weighting factor for the abundance values. -270 Default is 0.5 -271 b : float -272 Weighting factor for the mass values. -273 Default is 1.3 -274 nanfill : float -275 Value to fill missing mass values with. -276 Default is 1e-10 -277 -278 Returns -279 ------- -280 correlation : float -281 Weighted cosine correlation between the experimental and reference mass spectra. -282 """ -283 # create dict['mz'] = abundance, for experimental data -284 # ms_mz_abun_dict = mass_spec.mz_abun_dict -285 # weight exp data -286 -287 xc = power(self.exp_abun, a) * power(self.exp_mz, b) -288 -289 # track back to individual mz -290 weighted_exp_dict = dict(zip(self.ms_mz_abun_dict.keys(), xc)) -291 -292 # weight ref data -293 yc = power(self.ref_obj.get("abundance"), a) * power(self.ref_obj.get("mz"), b) +186 +187 """ +188 +189 def __init__(self, ms_mz_abun_dict, ref_obj, norm_func=sum): +190 self.normalize_func = norm_func +191 self.ms_mz_abun_dict = ms_mz_abun_dict +192 self.ref_obj = ref_obj +193 +194 self.exp_abun = list(self.ms_mz_abun_dict.values()) +195 self.exp_mz = list(self.ms_mz_abun_dict.keys()) +196 +197 self.ref_mz = self.ref_obj.get("mz") +198 self.ref_abun = self.ref_obj.get("abundance") +199 +200 self.ref_mz_abun_dict = dict(zip(self.ref_mz, self.ref_abun)) +201 +202 # parse to dataframe, easier to zerofill and tranpose +203 self.df = DataFrame([self.ms_mz_abun_dict, self.ref_mz_abun_dict]) +204 +205 # fill missing mz with abundance 0 +206 x, y = self.nan_fill(self.df, fill_with=1e-10) +207 +208 self.zero_filled_u_l = self.normalize(x, y, norm_func=self.normalize_func) +209 +210 # filter out the mass values that have zero intensities in self.exp_abun +211 exp_mz_filtered = set([k for k in self.exp_mz if self.ms_mz_abun_dict[k] != 0]) +212 +213 # filter out the mass values that have zero intensities in self.ref_mz +214 ref_mz_filtered = set([k for k in self.ref_mz if self.ref_mz_abun_dict[k] != 0]) +215 +216 # find the intersection/common mass values of both ref and exp, and sort them +217 self.common_mz_values = sorted( +218 list(exp_mz_filtered.intersection(ref_mz_filtered)) +219 ) +220 +221 # find the number of common mass values (after filtering 0s) +222 self.n_x_y = len(self.common_mz_values) +223 # print(self.n_x_y) +224 +225 def nan_fill(self, df, fill_with=0): +226 """Fill missing mass values with a given value. +227 +228 Parameters +229 ---------- +230 df : DataFrame +231 DataFrame containing the experimental and reference mass spectrum data. +232 fill_with : float +233 Value to fill missing mass values with. +234 +235 Returns +236 ------- +237 x : list +238 List of abundance values for the experimental mass spectrum. +239 y : list +240 List of abundance values for the reference mass spectrum.""" +241 df.fillna(fill_with, inplace=True) +242 +243 return df.T[0].values, df.T[1].values +244 +245 def normalize(self, x, y, norm_func=sum): +246 """Normalize the abundance values. +247 +248 Parameters +249 ---------- +250 x : list +251 List of abundance values for the experimental mass spectrum. +252 y : list +253 List of abundance values for the reference mass spectrum. +254 norm_func : function +255 Function to normalize the abundance values. +256 Default is sum +257 +258 Returns +259 ------- +260 u_l : tuple +261 Tuple containing the experimental and reference mass spectrum data after zero filling and normalization. +262 """ +263 if norm_func is not None: +264 u_l = (x / norm_func(x), y / norm_func(y)) +265 return u_l +266 else: +267 return (x, y) +268 +269 def weighted_cosine_correlation(self, a=0.5, b=1.3, nanfill=1e-10): +270 """Calculate the weighted cosine correlation between the experimental and reference mass spectra. +271 +272 Parameters +273 ---------- +274 a : float +275 Weighting factor for the abundance values. +276 Default is 0.5 +277 b : float +278 Weighting factor for the mass values. +279 Default is 1.3 +280 nanfill : float +281 Value to fill missing mass values with. +282 Default is 1e-10 +283 +284 Returns +285 ------- +286 correlation : float +287 Weighted cosine correlation between the experimental and reference mass spectra. +288 """ +289 # create dict['mz'] = abundance, for experimental data +290 # ms_mz_abun_dict = mass_spec.mz_abun_dict +291 # weight exp data +292 +293 xc = power(self.exp_abun, a) * power(self.exp_mz, b) 294 -295 ref_mz_abun_dict = dict(zip(self.ref_obj.get("mz"), yc)) -296 -297 # parse to dataframe, easier to zerofill and tranpose -298 df = DataFrame([weighted_exp_dict, ref_mz_abun_dict]) -299 -300 # fill missing mz with weight {abun**a}{m/z**b} to 0 -301 x, y = self.nan_fill(df, fill_with=nanfill) +295 # track back to individual mz +296 weighted_exp_dict = dict(zip(self.ms_mz_abun_dict.keys(), xc)) +297 +298 # weight ref data +299 yc = power(self.ref_obj.get("abundance"), a) * power(self.ref_obj.get("mz"), b) +300 +301 ref_mz_abun_dict = dict(zip(self.ref_obj.get("mz"), yc)) 302 -303 # correlation = (1 - cosine(x, y)) -304 -305 correlation = dot(x, y) / (norm(x) * norm(y)) -306 -307 return correlation +303 # parse to dataframe, easier to zerofill and tranpose +304 df = DataFrame([weighted_exp_dict, ref_mz_abun_dict]) +305 +306 # fill missing mz with weight {abun**a}{m/z**b} to 0 +307 x, y = self.nan_fill(df, fill_with=nanfill) 308 -309 def cosine_correlation(self): -310 """ Calculate the cosine correlation between the experimental and reference mass spectra. -311 -312 Returns -313 ------- -314 correlation : float -315 Cosine correlation between the experimental and reference mass spectra. -316 -317 """ -318 # calculate cosine correlation, -319 x = self.zero_filled_u_l[0] -320 y = self.zero_filled_u_l[1] -321 -322 # correlation = (1 - cosine(x, y)) -323 -324 correlation = dot(x, y) / (norm(x) * norm(y)) -325 -326 return correlation +309 # correlation = (1 - cosine(x, y)) +310 +311 correlation = dot(x, y) / (norm(x) * norm(y)) +312 +313 return correlation +314 +315 def cosine_correlation(self): +316 """Calculate the cosine correlation between the experimental and reference mass spectra. +317 +318 Returns +319 ------- +320 correlation : float +321 Cosine correlation between the experimental and reference mass spectra. +322 +323 """ +324 # calculate cosine correlation, +325 x = self.zero_filled_u_l[0] +326 y = self.zero_filled_u_l[1] 327 -328 def stein_scott(self): -329 """ Calculate the Stein-Scott similarity between the experimental and reference mass spectra. -330 -331 Returns -332 ------- -333 s_ss_x_y : float -334 Stein-Scott similarity between the experimental and reference mass spectra. -335 s_ss_x_y_nist : float -336 Stein-Scott similarity between the experimental and reference mass spectra. -337 """ -338 #TODO check this code -339 if self.n_x_y == 0: return 0, 0 -340 -341 # count number of non-zero abundance/peak intensity values -342 n_x = sum(a != 0 for a in self.exp_abun) -343 -344 s_r_x_y = 0 -345 -346 a, b = 1, 0 +328 # correlation = (1 - cosine(x, y)) +329 +330 correlation = dot(x, y) / (norm(x) * norm(y)) +331 +332 return correlation +333 +334 def stein_scott(self): +335 """Calculate the Stein-Scott similarity between the experimental and reference mass spectra. +336 +337 Returns +338 ------- +339 s_ss_x_y : float +340 Stein-Scott similarity between the experimental and reference mass spectra. +341 s_ss_x_y_nist : float +342 Stein-Scott similarity between the experimental and reference mass spectra. +343 """ +344 # TODO check this code +345 if self.n_x_y == 0: +346 return 0, 0 347 -348 for i in range(1, self.n_x_y): -349 -350 current_value = self.common_mz_values[i] -351 previous_value = self.common_mz_values[i - 1] +348 # count number of non-zero abundance/peak intensity values +349 n_x = sum(a != 0 for a in self.exp_abun) +350 +351 s_r_x_y = 0 352 -353 y_i = self.ref_mz_abun_dict[current_value] -354 y_i_minus1 = self.ref_mz_abun_dict[previous_value] -355 -356 lc_current = power(y_i, a) * power(current_value, b) -357 lc_previous = power(y_i_minus1, a) * power(previous_value, b) +353 a, b = 1, 0 +354 +355 for i in range(1, self.n_x_y): +356 current_value = self.common_mz_values[i] +357 previous_value = self.common_mz_values[i - 1] 358 -359 x_i = self.ms_mz_abun_dict[current_value] -360 x_i_minus1 = self.ms_mz_abun_dict[previous_value] +359 y_i = self.ref_mz_abun_dict[current_value] +360 y_i_minus1 = self.ref_mz_abun_dict[previous_value] 361 -362 uc_current = power(x_i, a) * power(current_value, b) -363 uc_previous = power(x_i_minus1, a) * power(previous_value, b) +362 lc_current = power(y_i, a) * power(current_value, b) +363 lc_previous = power(y_i_minus1, a) * power(previous_value, b) 364 -365 T1 = lc_current / lc_previous -366 -367 T2 = uc_previous / uc_current -368 -369 temp_computation = T1 * T2 +365 x_i = self.ms_mz_abun_dict[current_value] +366 x_i_minus1 = self.ms_mz_abun_dict[previous_value] +367 +368 uc_current = power(x_i, a) * power(current_value, b) +369 uc_previous = power(x_i_minus1, a) * power(previous_value, b) 370 -371 n = 0 -372 if temp_computation <= 1: -373 n = 1 -374 else: -375 n = -1 -376 -377 s_r_x_y = s_r_x_y + power(temp_computation, n) -378 -379 # finish the calculation of S_R(X,Y) -380 -381 s_r_x_y = s_r_x_y / self.n_x_y -382 # using the existing weighted_cosine_correlation function to get S_WC(X,Y) -383 s_wc_x_y = self.weighted_cosine_correlation(a=0.5, b=3, nanfill=0) +371 T1 = lc_current / lc_previous +372 +373 T2 = uc_previous / uc_current +374 +375 temp_computation = T1 * T2 +376 +377 n = 0 +378 if temp_computation <= 1: +379 n = 1 +380 else: +381 n = -1 +382 +383 s_r_x_y = s_r_x_y + power(temp_computation, n) 384 -385 s_ss_x_y = ((n_x * s_wc_x_y) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y) +385 # finish the calculation of S_R(X,Y) 386 -387 s_wc_x_y_nist = self.weighted_cosine_correlation(a=0.5, b=1.3, nanfill=0) -388 -389 s_ss_x_y_nist = ((n_x * s_wc_x_y_nist) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y) -390 # final step -391 -392 return s_ss_x_y, s_ss_x_y_nist -393 -394 def pearson_correlation(self,): -395 """ Calculate the Pearson correlation between the experimental and reference mass spectra. -396 -397 Returns -398 ------- -399 correlation : float -400 Pearson correlation between the experimental and reference mass spectra. -401 """ -402 correlation = pearsonr(self.zero_filled_u_l[0], self.zero_filled_u_l[1]) -403 -404 return correlation[0] -405 -406 def spearman_correlation(self): -407 """ Calculate the Spearman correlation between the experimental and reference mass spectra. -408 -409 Returns -410 ------- -411 coorelation : float -412 Spearman correlation between the experimental and reference mass spectra. -413 """ -414 # calculate Spearman correlation -415 # ## TODO - Check axis -416 correlation = spearmanr(self.zero_filled_u_l[0], self.zero_filled_u_l[1], axis=0) -417 -418 return correlation[0] -419 -420 def kendall_tau(self): -421 """ Calculate the Kendall's tau correlation between the experimental and reference mass spectra. -422 -423 Returns -424 ------- -425 correlation : float -426 Kendall's tau correlation between the experimental and reference mass spectra.""" -427 # create dict['mz'] = abundance, for experimental data -428 # self.ms_mz_abun_dict = mass_spec.mz_abun_dict +387 s_r_x_y = s_r_x_y / self.n_x_y +388 # using the existing weighted_cosine_correlation function to get S_WC(X,Y) +389 s_wc_x_y = self.weighted_cosine_correlation(a=0.5, b=3, nanfill=0) +390 +391 s_ss_x_y = ((n_x * s_wc_x_y) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y) +392 +393 s_wc_x_y_nist = self.weighted_cosine_correlation(a=0.5, b=1.3, nanfill=0) +394 +395 s_ss_x_y_nist = ((n_x * s_wc_x_y_nist) + (self.n_x_y * s_r_x_y)) / ( +396 n_x + self.n_x_y +397 ) +398 # final step +399 +400 return s_ss_x_y, s_ss_x_y_nist +401 +402 def pearson_correlation( +403 self, +404 ): +405 """Calculate the Pearson correlation between the experimental and reference mass spectra. +406 +407 Returns +408 ------- +409 correlation : float +410 Pearson correlation between the experimental and reference mass spectra. +411 """ +412 correlation = pearsonr(self.zero_filled_u_l[0], self.zero_filled_u_l[1]) +413 +414 return correlation[0] +415 +416 def spearman_correlation(self): +417 """Calculate the Spearman correlation between the experimental and reference mass spectra. +418 +419 Returns +420 ------- +421 coorelation : float +422 Spearman correlation between the experimental and reference mass spectra. +423 """ +424 # calculate Spearman correlation +425 # ## TODO - Check axis +426 correlation = spearmanr( +427 self.zero_filled_u_l[0], self.zero_filled_u_l[1], axis=0 +428 ) 429 -430 # create dict['mz'] = abundance, for experimental data +430 return correlation[0] 431 -432 # calculate Kendall's tau -433 correlation = kendalltau(self.zero_filled_u_l[0], self.zero_filled_u_l[1]) +432 def kendall_tau(self): +433 """Calculate the Kendall's tau correlation between the experimental and reference mass spectra. 434 -435 return correlation[0] -436 -437 def dft_correlation(self): -438 """ Calculate the DFT correlation between the experimental and reference mass spectra. -439 -440 Returns -441 ------- -442 correlation : float -443 DFT correlation between the experimental and reference mass spectra. -444 """ -445 if self.n_x_y == 0: -446 return 0 -447 -448 # count number of non-zero abundance/peak intensity values -449 n_x = sum(a != 0 for a in self.exp_abun) -450 -451 x, y = self.nan_fill(self.df, fill_with=0) -452 -453 x, y = self.normalize(x, y, norm_func=self.normalize_func) -454 -455 # get the Fourier transform of x and y -456 x_dft = rfft(x).real -457 y_dft = rfft(y).real -458 -459 s_dft_xy = dot(x_dft, y_dft)/(norm(x_dft)*norm(y_dft)) -460 -461 # using the existing weighted_cosine_correlation function to get S_WC(X,Y) -462 s_wc_x_y = self.weighted_cosine_correlation(nanfill=0) -463 -464 # final step -465 s_dft = (n_x * s_wc_x_y + self.n_x_y * s_dft_xy) / (n_x + self.n_x_y) +435 Returns +436 ------- +437 correlation : float +438 Kendall's tau correlation between the experimental and reference mass spectra.""" +439 # create dict['mz'] = abundance, for experimental data +440 # self.ms_mz_abun_dict = mass_spec.mz_abun_dict +441 +442 # create dict['mz'] = abundance, for experimental data +443 +444 # calculate Kendall's tau +445 correlation = kendalltau(self.zero_filled_u_l[0], self.zero_filled_u_l[1]) +446 +447 return correlation[0] +448 +449 def dft_correlation(self): +450 """Calculate the DFT correlation between the experimental and reference mass spectra. +451 +452 Returns +453 ------- +454 correlation : float +455 DFT correlation between the experimental and reference mass spectra. +456 """ +457 if self.n_x_y == 0: +458 return 0 +459 +460 # count number of non-zero abundance/peak intensity values +461 n_x = sum(a != 0 for a in self.exp_abun) +462 +463 x, y = self.nan_fill(self.df, fill_with=0) +464 +465 x, y = self.normalize(x, y, norm_func=self.normalize_func) 466 -467 return s_dft -468 -469 def dwt_correlation(self): -470 """ Calculate the DWT correlation between the experimental and reference mass spectra. -471 -472 Returns -473 ------- -474 correlation : float -475 DWT correlation between the experimental and reference mass spectra. -476 -477 Notes -478 ----- -479 This function requires the PyWavelets library to be installed. -480 This is not a default requirement as this function is not widely used. -481 """ -482 -483 from pywt import dwt -484 -485 if self.n_x_y == 0: -486 return 0 -487 -488 # count number of non-zero abundance/peak intensity values -489 n_x = sum(a != 0 for a in self.exp_abun) -490 -491 # calculate cosine correlation, -492 x, y = self.nan_fill(self.df, fill_with=0) -493 -494 x, y = self.normalize(x, y, norm_func=self.normalize_func) -495 -496 # Make x and y into an array -497 x_a = list(x) -498 y_a = list(y) +467 # get the Fourier transform of x and y +468 x_dft = rfft(x).real +469 y_dft = rfft(y).real +470 +471 s_dft_xy = dot(x_dft, y_dft) / (norm(x_dft) * norm(y_dft)) +472 +473 # using the existing weighted_cosine_correlation function to get S_WC(X,Y) +474 s_wc_x_y = self.weighted_cosine_correlation(nanfill=0) +475 +476 # final step +477 s_dft = (n_x * s_wc_x_y + self.n_x_y * s_dft_xy) / (n_x + self.n_x_y) +478 +479 return s_dft +480 +481 def dwt_correlation(self): +482 """Calculate the DWT correlation between the experimental and reference mass spectra. +483 +484 Returns +485 ------- +486 correlation : float +487 DWT correlation between the experimental and reference mass spectra. +488 +489 Notes +490 ----- +491 This function requires the PyWavelets library to be installed. +492 This is not a default requirement as this function is not widely used. +493 """ +494 +495 from pywt import dwt +496 +497 if self.n_x_y == 0: +498 return 0 499 -500 # get the wavelet transform of x and y (Daubechies with a filter length of 4. Asymmetric. pywavelets function) -501 # Will only use the detail dwt (dwtDd -502 x_dwtD = dwt(x_a, 'db2')[1] -503 y_dwtD = dwt(y_a, 'db2')[1] -504 -505 s_dwt_xy = dot(x_dwtD, y_dwtD) / (norm(x_dwtD) * norm(y_dwtD)) -506 -507 # using the existing weighted_cosine_correlation function to get S_WC(X,Y) -508 s_wc_x_y = self.weighted_cosine_correlation(nanfill=0) -509 -510 # final step -511 s_dwt = (n_x * s_wc_x_y + self.n_x_y * s_dwt_xy) / (n_x + self.n_x_y) -512 -513 return s_dwt -514 -515 def euclidean_distance(self): -516 """ Calculate the Euclidean distance between the experimental and reference mass spectra. -517 -518 Returns -519 ------- -520 correlation : float -521 Euclidean distance between the experimental and reference mass spectra. -522 """ -523 # correlation = euclidean_distance_manual(self.zero_filled_u_l[0], self.zero_filled_u_l[1]) -524 qlist = self.zero_filled_u_l[0] -525 rlist = self.zero_filled_u_l[1] +500 # count number of non-zero abundance/peak intensity values +501 n_x = sum(a != 0 for a in self.exp_abun) +502 +503 # calculate cosine correlation, +504 x, y = self.nan_fill(self.df, fill_with=0) +505 +506 x, y = self.normalize(x, y, norm_func=self.normalize_func) +507 +508 # Make x and y into an array +509 x_a = list(x) +510 y_a = list(y) +511 +512 # get the wavelet transform of x and y (Daubechies with a filter length of 4. Asymmetric. pywavelets function) +513 # Will only use the detail dwt (dwtDd +514 x_dwtD = dwt(x_a, "db2")[1] +515 y_dwtD = dwt(y_a, "db2")[1] +516 +517 s_dwt_xy = dot(x_dwtD, y_dwtD) / (norm(x_dwtD) * norm(y_dwtD)) +518 +519 # using the existing weighted_cosine_correlation function to get S_WC(X,Y) +520 s_wc_x_y = self.weighted_cosine_correlation(nanfill=0) +521 +522 # final step +523 s_dwt = (n_x * s_wc_x_y + self.n_x_y * s_dwt_xy) / (n_x + self.n_x_y) +524 +525 return s_dwt 526 -527 correlation = sqrt(np_sum(power(qlist - rlist, 2))) -528 -529 return correlation -530 -531 def manhattan_distance(self): -532 """ Calculate the Manhattan distance between the experimental and reference mass spectra. -533 -534 Returns -535 ------- -536 correlation : float -537 Manhattan distance between the experimental and reference mass spectra. -538 """ -539 qlist = self.zero_filled_u_l[0] -540 rlist = self.zero_filled_u_l[1] -541 -542 return np_sum(absolute(qlist - rlist)) -543 -544 def jaccard_distance(self): -545 """ Calculate the Jaccard distance between the experimental and reference mass spectra. -546 -547 Returns -548 ------- -549 correlation : float -550 Jaccard distance between the experimental and reference mass spectra. -551 """ -552 -553 def jaccard_similarity(list1, list2): -554 -555 intersection = len(list(set(list1).intersection(list2))) -556 union = (len(list1) + len(list2)) - intersection -557 return float(intersection) / union +527 def euclidean_distance(self): +528 """Calculate the Euclidean distance between the experimental and reference mass spectra. +529 +530 Returns +531 ------- +532 correlation : float +533 Euclidean distance between the experimental and reference mass spectra. +534 """ +535 # correlation = euclidean_distance_manual(self.zero_filled_u_l[0], self.zero_filled_u_l[1]) +536 qlist = self.zero_filled_u_l[0] +537 rlist = self.zero_filled_u_l[1] +538 +539 correlation = sqrt(np_sum(power(qlist - rlist, 2))) +540 +541 return correlation +542 +543 def manhattan_distance(self): +544 """Calculate the Manhattan distance between the experimental and reference mass spectra. +545 +546 Returns +547 ------- +548 correlation : float +549 Manhattan distance between the experimental and reference mass spectra. +550 """ +551 qlist = self.zero_filled_u_l[0] +552 rlist = self.zero_filled_u_l[1] +553 +554 return np_sum(absolute(qlist - rlist)) +555 +556 def jaccard_distance(self): +557 """Calculate the Jaccard distance between the experimental and reference mass spectra. 558 -559 qlist = self.zero_filled_u_l[0] -560 rlist = self.zero_filled_u_l[1] -561 -562 return np_sum(power(qlist - rlist, 2)) / (np_sum(power(qlist, 2)) + np_sum(power(rlist, 2)) - np_sum(qlist * rlist)) -563 # correlation = jaccard_similarity(self.zero_filled_u_l[0], self.zero_filled_u_l[1]) -564 # @return correlation -565 -566 def extra_distances(self): -567 """ Function to calculate distances using additional metrics defined in math_distance.py -568 -569 Currently, calculates all distances. -570 -571 Returns -572 ------- -573 dict_res : dict -574 Dictionary containing the distances between the experimental and reference mass spectra. -575 -576 """ -577 from corems.molecular_id.calc import math_distance +559 Returns +560 ------- +561 correlation : float +562 Jaccard distance between the experimental and reference mass spectra. +563 """ +564 +565 def jaccard_similarity(list1, list2): +566 intersection = len(list(set(list1).intersection(list2))) +567 union = (len(list1) + len(list2)) - intersection +568 return float(intersection) / union +569 +570 qlist = self.zero_filled_u_l[0] +571 rlist = self.zero_filled_u_l[1] +572 +573 return np_sum(power(qlist - rlist, 2)) / ( +574 np_sum(power(qlist, 2)) + np_sum(power(rlist, 2)) - np_sum(qlist * rlist) +575 ) +576 # correlation = jaccard_similarity(self.zero_filled_u_l[0], self.zero_filled_u_l[1]) +577 # @return correlation 578 -579 #qlist = self.zero_filled_u_l[2] -580 #rlist = self.zero_filled_u_l[3] +579 def extra_distances(self): +580 """Function to calculate distances using additional metrics defined in math_distance.py 581 -582 dict_res = {} +582 Currently, calculates all distances. 583 -584 for method in methods_name: -585 # function_name = method + "_distance" -586 function_name = method -587 if hasattr(math_distance, function_name): -588 f = getattr(math_distance, function_name) -589 -590 if function_name == "canberra_metric": +584 Returns +585 ------- +586 dict_res : dict +587 Dictionary containing the distances between the experimental and reference mass spectra. +588 +589 """ +590 from corems.molecular_id.calc import math_distance 591 -592 x, y = self.nan_fill(self.df, fill_with=0) -593 -594 qlist, rlist = self.normalize(x, y, norm_func=self.normalize_func) -595 # print("qlist:") -596 # print(qlist) -597 # print("rlist:") -598 # print(rlist) -599 -600 else: -601 qlist = self.zero_filled_u_l[0] -602 rlist = self.zero_filled_u_l[1] -603 -604 dist = f(qlist, rlist) -605 #if method == "Minokowski_3": -606 # print("qlist:") -607 # print(qlist) -608 # print("rlist") -609 # print(rlist) -610 # exit() -611 # if dist == np.nan or dis == np.inf: -612 # print(self.exp_abun) -613 # print(self.exp_mz) -614 #print(function_name) -615 # print(len(self.exp_abun)) -616 # print(len(self.exp_mz)) -617 # print(self.zero_filled_u_l[1]) -618 dict_res[method] = dist -619 -620 return dict_res +592 # qlist = self.zero_filled_u_l[2] +593 # rlist = self.zero_filled_u_l[3] +594 +595 dict_res = {} +596 +597 for method in methods_name: +598 # function_name = method + "_distance" +599 function_name = method +600 if hasattr(math_distance, function_name): +601 f = getattr(math_distance, function_name) +602 +603 if function_name == "canberra_metric": +604 x, y = self.nan_fill(self.df, fill_with=0) +605 +606 qlist, rlist = self.normalize(x, y, norm_func=self.normalize_func) +607 # print("qlist:") +608 # print(qlist) +609 # print("rlist:") +610 # print(rlist) +611 +612 else: +613 qlist = self.zero_filled_u_l[0] +614 rlist = self.zero_filled_u_l[1] +615 +616 dist = f(qlist, rlist) +617 # if method == "Minokowski_3": +618 # print("qlist:") +619 # print(qlist) +620 # print("rlist") +621 # print(rlist) +622 # exit() +623 # if dist == np.nan or dis == np.inf: +624 # print(self.exp_abun) +625 # print(self.exp_mz) +626 # print(function_name) +627 # print(len(self.exp_abun)) +628 # print(len(self.exp_mz)) +629 # print(self.zero_filled_u_l[1]) +630 dict_res[method] = dist +631 +632 return dict_res

    @@ -805,501 +817,509 @@

    -
    127class SpectralSimilarity():
    -128    """ Class containing methods for calculating spectral similarity between two mass spectra.
    -129    
    -130    Parameters
    -131    ----------
    -132    ms_mz_abun_dict : dict
    -133        Dictionary of mass to abundance values for the experimental mass spectrum.
    -134    ref_obj : dict
    -135        Dictionary of mass to abundance values for the reference mass spectrum.
    -136    norm_func : function
    -137        Function to normalize the abundance values.
    -138    
    -139    Attributes
    -140    ----------
    -141    normalize_func : function
    -142        Function to normalize the abundance values.
    -143    ms_mz_abun_dict : dict
    -144        Dictionary of mass to abundance values for the experimental mass spectrum.
    -145    ref_obj : dict
    -146        Dictionary of mass to abundance values for the reference mass spectrum.
    -147    exp_abun : list
    -148        List of abundance values for the experimental mass spectrum.
    -149    exp_mz : list
    -150        List of mass values for the experimental mass spectrum.
    -151    ref_mz : list
    -152        List of mass values for the reference mass spectrum.
    -153    ref_abun : list
    -154        List of abundance values for the reference mass spectrum.
    -155    ref_mz_abun_dict : dict
    -156        Dictionary of mass to abundance values for the reference mass spectrum.
    -157    df : DataFrame
    -158        DataFrame containing the experimental and reference mass spectrum data.
    -159    zero_filled_u_l : tuple
    -160        Tuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    -161    common_mz_values : list
    -162        List of common mass values between the experimental and reference mass spectra.
    -163    n_x_y : int
    -164        Number of common mass values between the experimental and reference mass spectra.
    -165    
    -166    Methods
    -167    -------
    -168    * nan_fill(df, fill_with=0).
    -169        Fill missing mass values with a given value.
    -170    * normalize(x, y, norm_func=sum).
    -171        Normalize the abundance values.
    -172    * weighted_cosine_correlation(a=0.5, b=1.3, nanfill=1e-10).
    -173        Calculate the weighted cosine correlation between the experimental and reference mass spectra.
    -174    * cosine_correlation().
    -175        Calculate the cosine correlation between the experimental and reference mass spectra.
    -176    * stein_scott().
    -177        Calculate the Stein-Scott similarity between the experimental and reference mass spectra.
    -178    * pearson_correlation().
    -179        Calculate the Pearson correlation between the experimental and reference mass spectra.
    -180    * spearman_correlation().
    -181        Calculate the Spearman correlation between the experimental and reference mass spectra.
    -182
    -183    
    -184    """
    -185    def __init__(self, ms_mz_abun_dict, ref_obj, norm_func=sum):
    +            
    131class SpectralSimilarity:
    +132    """Class containing methods for calculating spectral similarity between two mass spectra.
    +133
    +134    Parameters
    +135    ----------
    +136    ms_mz_abun_dict : dict
    +137        Dictionary of mass to abundance values for the experimental mass spectrum.
    +138    ref_obj : dict
    +139        Dictionary of mass to abundance values for the reference mass spectrum.
    +140    norm_func : function
    +141        Function to normalize the abundance values.
    +142
    +143    Attributes
    +144    ----------
    +145    normalize_func : function
    +146        Function to normalize the abundance values.
    +147    ms_mz_abun_dict : dict
    +148        Dictionary of mass to abundance values for the experimental mass spectrum.
    +149    ref_obj : dict
    +150        Dictionary of mass to abundance values for the reference mass spectrum.
    +151    exp_abun : list
    +152        List of abundance values for the experimental mass spectrum.
    +153    exp_mz : list
    +154        List of mass values for the experimental mass spectrum.
    +155    ref_mz : list
    +156        List of mass values for the reference mass spectrum.
    +157    ref_abun : list
    +158        List of abundance values for the reference mass spectrum.
    +159    ref_mz_abun_dict : dict
    +160        Dictionary of mass to abundance values for the reference mass spectrum.
    +161    df : DataFrame
    +162        DataFrame containing the experimental and reference mass spectrum data.
    +163    zero_filled_u_l : tuple
    +164        Tuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    +165    common_mz_values : list
    +166        List of common mass values between the experimental and reference mass spectra.
    +167    n_x_y : int
    +168        Number of common mass values between the experimental and reference mass spectra.
    +169
    +170    Methods
    +171    -------
    +172    * nan_fill(df, fill_with=0).
    +173        Fill missing mass values with a given value.
    +174    * normalize(x, y, norm_func=sum).
    +175        Normalize the abundance values.
    +176    * weighted_cosine_correlation(a=0.5, b=1.3, nanfill=1e-10).
    +177        Calculate the weighted cosine correlation between the experimental and reference mass spectra.
    +178    * cosine_correlation().
    +179        Calculate the cosine correlation between the experimental and reference mass spectra.
    +180    * stein_scott().
    +181        Calculate the Stein-Scott similarity between the experimental and reference mass spectra.
    +182    * pearson_correlation().
    +183        Calculate the Pearson correlation between the experimental and reference mass spectra.
    +184    * spearman_correlation().
    +185        Calculate the Spearman correlation between the experimental and reference mass spectra.
     186
    -187        self.normalize_func = norm_func
    -188        self.ms_mz_abun_dict = ms_mz_abun_dict
    -189        self.ref_obj = ref_obj
    -190
    -191        self.exp_abun = list(self.ms_mz_abun_dict.values())
    -192        self.exp_mz = list(self.ms_mz_abun_dict.keys())
    -193
    -194        self.ref_mz = self.ref_obj.get("mz")
    -195        self.ref_abun = self.ref_obj.get("abundance")
    -196
    -197        self.ref_mz_abun_dict = dict(zip(self.ref_mz, self.ref_abun))
    -198
    -199        # parse to dataframe, easier to zerofill and tranpose
    -200        self.df = DataFrame([self.ms_mz_abun_dict, self.ref_mz_abun_dict])
    -201
    -202        # fill missing mz with abundance 0
    -203        x, y = self.nan_fill(self.df, fill_with=1e-10)
    -204        
    -205        self.zero_filled_u_l = self.normalize(x, y, norm_func=self.normalize_func)
    -206        
    -207        # filter out the mass values that have zero intensities in self.exp_abun
    -208        exp_mz_filtered = set([k for k in self.exp_mz if self.ms_mz_abun_dict[k] != 0])
    -209
    -210        # filter out the mass values that have zero intensities in self.ref_mz
    -211        ref_mz_filtered = set([k for k in self.ref_mz if self.ref_mz_abun_dict[k] != 0])
    -212
    -213        # find the intersection/common mass values of both ref and exp, and sort them
    -214        self.common_mz_values = sorted(list(exp_mz_filtered.intersection(ref_mz_filtered)))
    -215
    -216        # find the number of common mass values (after filtering 0s)
    -217        self.n_x_y = len(self.common_mz_values)
    -218        # print(self.n_x_y)
    -219
    -220    def nan_fill(self, df, fill_with=0):
    -221        """ Fill missing mass values with a given value.
    -222        
    -223        Parameters
    -224        ----------
    -225        df : DataFrame
    -226            DataFrame containing the experimental and reference mass spectrum data.
    -227        fill_with : float
    -228            Value to fill missing mass values with.
    -229        
    -230        Returns
    -231        -------
    -232        x : list
    -233            List of abundance values for the experimental mass spectrum.
    -234        y : list
    -235            List of abundance values for the reference mass spectrum."""
    -236        df.fillna(fill_with, inplace=True)
    -237
    -238        return df.T[0].values, df.T[1].values
    -239
    -240    def normalize(self, x, y, norm_func=sum):
    -241        """ Normalize the abundance values.
    -242        
    -243        Parameters
    -244        ----------
    -245        x : list
    -246            List of abundance values for the experimental mass spectrum.
    -247        y : list
    -248            List of abundance values for the reference mass spectrum.
    -249        norm_func : function
    -250            Function to normalize the abundance values.
    -251            Default is sum
    -252        
    -253        Returns
    -254        -------
    -255        u_l : tuple
    -256            Tuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    -257        """
    -258        if norm_func is not None:
    -259            u_l = (x / norm_func(x), y / norm_func(y))
    -260            return u_l
    -261        else:
    -262            return (x, y)
    -263
    -264    def weighted_cosine_correlation(self, a=0.5, b=1.3, nanfill=1e-10):
    -265        """ Calculate the weighted cosine correlation between the experimental and reference mass spectra.
    -266        
    -267        Parameters
    -268        ----------
    -269        a : float
    -270            Weighting factor for the abundance values.
    -271            Default is 0.5
    -272        b : float
    -273            Weighting factor for the mass values.
    -274            Default is 1.3
    -275        nanfill : float
    -276            Value to fill missing mass values with.
    -277            Default is 1e-10
    -278        
    -279        Returns
    -280        -------
    -281        correlation : float
    -282            Weighted cosine correlation between the experimental and reference mass spectra.
    -283        """
    -284        # create dict['mz'] = abundance, for experimental data
    -285        # ms_mz_abun_dict = mass_spec.mz_abun_dict
    -286        # weight exp data
    -287
    -288        xc = power(self.exp_abun, a) * power(self.exp_mz, b)
    -289
    -290        # track back to individual mz
    -291        weighted_exp_dict = dict(zip(self.ms_mz_abun_dict.keys(), xc))
    -292
    -293        # weight ref data
    -294        yc = power(self.ref_obj.get("abundance"), a) * power(self.ref_obj.get("mz"), b)
    +187
    +188    """
    +189
    +190    def __init__(self, ms_mz_abun_dict, ref_obj, norm_func=sum):
    +191        self.normalize_func = norm_func
    +192        self.ms_mz_abun_dict = ms_mz_abun_dict
    +193        self.ref_obj = ref_obj
    +194
    +195        self.exp_abun = list(self.ms_mz_abun_dict.values())
    +196        self.exp_mz = list(self.ms_mz_abun_dict.keys())
    +197
    +198        self.ref_mz = self.ref_obj.get("mz")
    +199        self.ref_abun = self.ref_obj.get("abundance")
    +200
    +201        self.ref_mz_abun_dict = dict(zip(self.ref_mz, self.ref_abun))
    +202
    +203        # parse to dataframe, easier to zerofill and tranpose
    +204        self.df = DataFrame([self.ms_mz_abun_dict, self.ref_mz_abun_dict])
    +205
    +206        # fill missing mz with abundance 0
    +207        x, y = self.nan_fill(self.df, fill_with=1e-10)
    +208
    +209        self.zero_filled_u_l = self.normalize(x, y, norm_func=self.normalize_func)
    +210
    +211        # filter out the mass values that have zero intensities in self.exp_abun
    +212        exp_mz_filtered = set([k for k in self.exp_mz if self.ms_mz_abun_dict[k] != 0])
    +213
    +214        # filter out the mass values that have zero intensities in self.ref_mz
    +215        ref_mz_filtered = set([k for k in self.ref_mz if self.ref_mz_abun_dict[k] != 0])
    +216
    +217        # find the intersection/common mass values of both ref and exp, and sort them
    +218        self.common_mz_values = sorted(
    +219            list(exp_mz_filtered.intersection(ref_mz_filtered))
    +220        )
    +221
    +222        # find the number of common mass values (after filtering 0s)
    +223        self.n_x_y = len(self.common_mz_values)
    +224        # print(self.n_x_y)
    +225
    +226    def nan_fill(self, df, fill_with=0):
    +227        """Fill missing mass values with a given value.
    +228
    +229        Parameters
    +230        ----------
    +231        df : DataFrame
    +232            DataFrame containing the experimental and reference mass spectrum data.
    +233        fill_with : float
    +234            Value to fill missing mass values with.
    +235
    +236        Returns
    +237        -------
    +238        x : list
    +239            List of abundance values for the experimental mass spectrum.
    +240        y : list
    +241            List of abundance values for the reference mass spectrum."""
    +242        df.fillna(fill_with, inplace=True)
    +243
    +244        return df.T[0].values, df.T[1].values
    +245
    +246    def normalize(self, x, y, norm_func=sum):
    +247        """Normalize the abundance values.
    +248
    +249        Parameters
    +250        ----------
    +251        x : list
    +252            List of abundance values for the experimental mass spectrum.
    +253        y : list
    +254            List of abundance values for the reference mass spectrum.
    +255        norm_func : function
    +256            Function to normalize the abundance values.
    +257            Default is sum
    +258
    +259        Returns
    +260        -------
    +261        u_l : tuple
    +262            Tuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    +263        """
    +264        if norm_func is not None:
    +265            u_l = (x / norm_func(x), y / norm_func(y))
    +266            return u_l
    +267        else:
    +268            return (x, y)
    +269
    +270    def weighted_cosine_correlation(self, a=0.5, b=1.3, nanfill=1e-10):
    +271        """Calculate the weighted cosine correlation between the experimental and reference mass spectra.
    +272
    +273        Parameters
    +274        ----------
    +275        a : float
    +276            Weighting factor for the abundance values.
    +277            Default is 0.5
    +278        b : float
    +279            Weighting factor for the mass values.
    +280            Default is 1.3
    +281        nanfill : float
    +282            Value to fill missing mass values with.
    +283            Default is 1e-10
    +284
    +285        Returns
    +286        -------
    +287        correlation : float
    +288            Weighted cosine correlation between the experimental and reference mass spectra.
    +289        """
    +290        # create dict['mz'] = abundance, for experimental data
    +291        # ms_mz_abun_dict = mass_spec.mz_abun_dict
    +292        # weight exp data
    +293
    +294        xc = power(self.exp_abun, a) * power(self.exp_mz, b)
     295
    -296        ref_mz_abun_dict = dict(zip(self.ref_obj.get("mz"), yc))
    -297
    -298        # parse to dataframe, easier to zerofill and tranpose
    -299        df = DataFrame([weighted_exp_dict, ref_mz_abun_dict])
    -300
    -301        # fill missing mz with weight {abun**a}{m/z**b} to 0
    -302        x, y = self.nan_fill(df, fill_with=nanfill)
    +296        # track back to individual mz
    +297        weighted_exp_dict = dict(zip(self.ms_mz_abun_dict.keys(), xc))
    +298
    +299        # weight ref data
    +300        yc = power(self.ref_obj.get("abundance"), a) * power(self.ref_obj.get("mz"), b)
    +301
    +302        ref_mz_abun_dict = dict(zip(self.ref_obj.get("mz"), yc))
     303
    -304        # correlation = (1 - cosine(x, y))
    -305
    -306        correlation = dot(x, y) / (norm(x) * norm(y))
    -307
    -308        return correlation
    +304        # parse to dataframe, easier to zerofill and tranpose
    +305        df = DataFrame([weighted_exp_dict, ref_mz_abun_dict])
    +306
    +307        # fill missing mz with weight {abun**a}{m/z**b} to 0
    +308        x, y = self.nan_fill(df, fill_with=nanfill)
     309
    -310    def cosine_correlation(self):
    -311        """ Calculate the cosine correlation between the experimental and reference mass spectra.
    -312
    -313        Returns
    -314        -------
    -315        correlation : float
    -316            Cosine correlation between the experimental and reference mass spectra.
    -317        
    -318        """
    -319        # calculate cosine correlation,
    -320        x = self.zero_filled_u_l[0]
    -321        y = self.zero_filled_u_l[1]
    -322
    -323        # correlation = (1 - cosine(x, y))
    -324
    -325        correlation = dot(x, y) / (norm(x) * norm(y))
    -326
    -327        return correlation
    +310        # correlation = (1 - cosine(x, y))
    +311
    +312        correlation = dot(x, y) / (norm(x) * norm(y))
    +313
    +314        return correlation
    +315
    +316    def cosine_correlation(self):
    +317        """Calculate the cosine correlation between the experimental and reference mass spectra.
    +318
    +319        Returns
    +320        -------
    +321        correlation : float
    +322            Cosine correlation between the experimental and reference mass spectra.
    +323
    +324        """
    +325        # calculate cosine correlation,
    +326        x = self.zero_filled_u_l[0]
    +327        y = self.zero_filled_u_l[1]
     328
    -329    def stein_scott(self):
    -330        """ Calculate the Stein-Scott similarity between the experimental and reference mass spectra.
    -331        
    -332        Returns
    -333        -------
    -334        s_ss_x_y : float
    -335            Stein-Scott similarity between the experimental and reference mass spectra.
    -336        s_ss_x_y_nist : float
    -337            Stein-Scott similarity between the experimental and reference mass spectra.
    -338        """
    -339        #TODO check this code
    -340        if self.n_x_y == 0: return 0, 0
    -341
    -342        # count number of non-zero abundance/peak intensity values
    -343        n_x = sum(a != 0 for a in self.exp_abun)
    -344
    -345        s_r_x_y = 0
    -346
    -347        a, b = 1, 0
    +329        # correlation = (1 - cosine(x, y))
    +330
    +331        correlation = dot(x, y) / (norm(x) * norm(y))
    +332
    +333        return correlation
    +334
    +335    def stein_scott(self):
    +336        """Calculate the Stein-Scott similarity between the experimental and reference mass spectra.
    +337
    +338        Returns
    +339        -------
    +340        s_ss_x_y : float
    +341            Stein-Scott similarity between the experimental and reference mass spectra.
    +342        s_ss_x_y_nist : float
    +343            Stein-Scott similarity between the experimental and reference mass spectra.
    +344        """
    +345        # TODO check this code
    +346        if self.n_x_y == 0:
    +347            return 0, 0
     348
    -349        for i in range(1, self.n_x_y):
    -350
    -351            current_value = self.common_mz_values[i]
    -352            previous_value = self.common_mz_values[i - 1]
    +349        # count number of non-zero abundance/peak intensity values
    +350        n_x = sum(a != 0 for a in self.exp_abun)
    +351
    +352        s_r_x_y = 0
     353
    -354            y_i = self.ref_mz_abun_dict[current_value]
    -355            y_i_minus1 = self.ref_mz_abun_dict[previous_value]
    -356
    -357            lc_current = power(y_i, a) * power(current_value, b)
    -358            lc_previous = power(y_i_minus1, a) * power(previous_value, b)
    +354        a, b = 1, 0
    +355
    +356        for i in range(1, self.n_x_y):
    +357            current_value = self.common_mz_values[i]
    +358            previous_value = self.common_mz_values[i - 1]
     359
    -360            x_i = self.ms_mz_abun_dict[current_value]
    -361            x_i_minus1 = self.ms_mz_abun_dict[previous_value]
    +360            y_i = self.ref_mz_abun_dict[current_value]
    +361            y_i_minus1 = self.ref_mz_abun_dict[previous_value]
     362
    -363            uc_current = power(x_i, a) * power(current_value, b)
    -364            uc_previous = power(x_i_minus1, a) * power(previous_value, b)
    +363            lc_current = power(y_i, a) * power(current_value, b)
    +364            lc_previous = power(y_i_minus1, a) * power(previous_value, b)
     365
    -366            T1 = lc_current / lc_previous
    -367
    -368            T2 = uc_previous / uc_current
    -369
    -370            temp_computation = T1 * T2
    +366            x_i = self.ms_mz_abun_dict[current_value]
    +367            x_i_minus1 = self.ms_mz_abun_dict[previous_value]
    +368
    +369            uc_current = power(x_i, a) * power(current_value, b)
    +370            uc_previous = power(x_i_minus1, a) * power(previous_value, b)
     371
    -372            n = 0
    -373            if temp_computation <= 1:
    -374                n = 1
    -375            else:
    -376                n = -1
    -377            
    -378            s_r_x_y = s_r_x_y + power(temp_computation, n)
    -379
    -380        # finish the calculation of S_R(X,Y)
    -381
    -382        s_r_x_y = s_r_x_y / self.n_x_y
    -383        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    -384        s_wc_x_y = self.weighted_cosine_correlation(a=0.5, b=3, nanfill=0)
    +372            T1 = lc_current / lc_previous
    +373
    +374            T2 = uc_previous / uc_current
    +375
    +376            temp_computation = T1 * T2
    +377
    +378            n = 0
    +379            if temp_computation <= 1:
    +380                n = 1
    +381            else:
    +382                n = -1
    +383
    +384            s_r_x_y = s_r_x_y + power(temp_computation, n)
     385
    -386        s_ss_x_y = ((n_x * s_wc_x_y) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y)
    +386        # finish the calculation of S_R(X,Y)
     387
    -388        s_wc_x_y_nist = self.weighted_cosine_correlation(a=0.5, b=1.3, nanfill=0)
    -389
    -390        s_ss_x_y_nist = ((n_x * s_wc_x_y_nist) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y)    
    -391        # final step
    -392
    -393        return s_ss_x_y, s_ss_x_y_nist
    -394
    -395    def pearson_correlation(self,):
    -396        """ Calculate the Pearson correlation between the experimental and reference mass spectra.
    -397        
    -398        Returns
    -399        -------
    -400        correlation : float
    -401            Pearson correlation between the experimental and reference mass spectra.    
    -402        """
    -403        correlation = pearsonr(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    -404
    -405        return correlation[0]
    -406
    -407    def spearman_correlation(self):
    -408        """ Calculate the Spearman correlation between the experimental and reference mass spectra.
    -409        
    -410        Returns
    -411        -------
    -412        coorelation : float
    -413            Spearman correlation between the experimental and reference mass spectra.
    -414        """
    -415        # calculate Spearman correlation
    -416        # ## TODO - Check axis
    -417        correlation = spearmanr(self.zero_filled_u_l[0], self.zero_filled_u_l[1], axis=0)
    -418
    -419        return correlation[0]
    -420
    -421    def kendall_tau(self):
    -422        """ Calculate the Kendall's tau correlation between the experimental and reference mass spectra.
    -423        
    -424        Returns
    -425        -------
    -426        correlation : float
    -427            Kendall's tau correlation between the experimental and reference mass spectra."""
    -428        # create dict['mz'] = abundance, for experimental data
    -429        # self.ms_mz_abun_dict = mass_spec.mz_abun_dict
    +388        s_r_x_y = s_r_x_y / self.n_x_y
    +389        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    +390        s_wc_x_y = self.weighted_cosine_correlation(a=0.5, b=3, nanfill=0)
    +391
    +392        s_ss_x_y = ((n_x * s_wc_x_y) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y)
    +393
    +394        s_wc_x_y_nist = self.weighted_cosine_correlation(a=0.5, b=1.3, nanfill=0)
    +395
    +396        s_ss_x_y_nist = ((n_x * s_wc_x_y_nist) + (self.n_x_y * s_r_x_y)) / (
    +397            n_x + self.n_x_y
    +398        )
    +399        # final step
    +400
    +401        return s_ss_x_y, s_ss_x_y_nist
    +402
    +403    def pearson_correlation(
    +404        self,
    +405    ):
    +406        """Calculate the Pearson correlation between the experimental and reference mass spectra.
    +407
    +408        Returns
    +409        -------
    +410        correlation : float
    +411            Pearson correlation between the experimental and reference mass spectra.
    +412        """
    +413        correlation = pearsonr(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +414
    +415        return correlation[0]
    +416
    +417    def spearman_correlation(self):
    +418        """Calculate the Spearman correlation between the experimental and reference mass spectra.
    +419
    +420        Returns
    +421        -------
    +422        coorelation : float
    +423            Spearman correlation between the experimental and reference mass spectra.
    +424        """
    +425        # calculate Spearman correlation
    +426        # ## TODO - Check axis
    +427        correlation = spearmanr(
    +428            self.zero_filled_u_l[0], self.zero_filled_u_l[1], axis=0
    +429        )
     430
    -431        # create dict['mz'] = abundance, for experimental data
    +431        return correlation[0]
     432
    -433        # calculate Kendall's tau
    -434        correlation = kendalltau(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +433    def kendall_tau(self):
    +434        """Calculate the Kendall's tau correlation between the experimental and reference mass spectra.
     435
    -436        return correlation[0]
    -437
    -438    def dft_correlation(self):
    -439        """ Calculate the DFT correlation between the experimental and reference mass spectra.
    -440
    -441        Returns
    -442        -------
    -443        correlation : float
    -444            DFT correlation between the experimental and reference mass spectra.
    -445        """
    -446        if self.n_x_y == 0:
    -447            return 0
    -448
    -449        # count number of non-zero abundance/peak intensity values
    -450        n_x = sum(a != 0 for a in self.exp_abun)
    -451
    -452        x, y = self.nan_fill(self.df, fill_with=0)
    -453        
    -454        x, y = self.normalize(x, y, norm_func=self.normalize_func)
    -455        
    -456        # get the Fourier transform of x and y
    -457        x_dft = rfft(x).real
    -458        y_dft = rfft(y).real
    -459
    -460        s_dft_xy = dot(x_dft, y_dft)/(norm(x_dft)*norm(y_dft))
    -461
    -462        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    -463        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
    -464
    -465        # final step
    -466        s_dft = (n_x * s_wc_x_y + self.n_x_y * s_dft_xy) / (n_x + self.n_x_y)
    +436        Returns
    +437        -------
    +438        correlation : float
    +439            Kendall's tau correlation between the experimental and reference mass spectra."""
    +440        # create dict['mz'] = abundance, for experimental data
    +441        # self.ms_mz_abun_dict = mass_spec.mz_abun_dict
    +442
    +443        # create dict['mz'] = abundance, for experimental data
    +444
    +445        # calculate Kendall's tau
    +446        correlation = kendalltau(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +447
    +448        return correlation[0]
    +449
    +450    def dft_correlation(self):
    +451        """Calculate the DFT correlation between the experimental and reference mass spectra.
    +452
    +453        Returns
    +454        -------
    +455        correlation : float
    +456            DFT correlation between the experimental and reference mass spectra.
    +457        """
    +458        if self.n_x_y == 0:
    +459            return 0
    +460
    +461        # count number of non-zero abundance/peak intensity values
    +462        n_x = sum(a != 0 for a in self.exp_abun)
    +463
    +464        x, y = self.nan_fill(self.df, fill_with=0)
    +465
    +466        x, y = self.normalize(x, y, norm_func=self.normalize_func)
     467
    -468        return s_dft
    -469
    -470    def dwt_correlation(self):
    -471        """ Calculate the DWT correlation between the experimental and reference mass spectra.
    -472
    -473        Returns
    -474        -------
    -475        correlation : float
    -476            DWT correlation between the experimental and reference mass spectra.
    -477        
    -478        Notes
    -479        -----
    -480        This function requires the PyWavelets library to be installed. 
    -481            This is not a default requirement as this function is not widely used. 
    -482        """
    -483
    -484        from pywt import dwt 
    -485        
    -486        if self.n_x_y == 0:
    -487            return 0
    -488
    -489        # count number of non-zero abundance/peak intensity values
    -490        n_x = sum(a != 0 for a in self.exp_abun)
    -491
    -492        # calculate cosine correlation,
    -493        x, y = self.nan_fill(self.df, fill_with=0)
    -494        
    -495        x, y = self.normalize(x, y, norm_func=self.normalize_func)
    -496
    -497        # Make x and y into an array
    -498        x_a = list(x)
    -499        y_a = list(y)
    +468        # get the Fourier transform of x and y
    +469        x_dft = rfft(x).real
    +470        y_dft = rfft(y).real
    +471
    +472        s_dft_xy = dot(x_dft, y_dft) / (norm(x_dft) * norm(y_dft))
    +473
    +474        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    +475        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
    +476
    +477        # final step
    +478        s_dft = (n_x * s_wc_x_y + self.n_x_y * s_dft_xy) / (n_x + self.n_x_y)
    +479
    +480        return s_dft
    +481
    +482    def dwt_correlation(self):
    +483        """Calculate the DWT correlation between the experimental and reference mass spectra.
    +484
    +485        Returns
    +486        -------
    +487        correlation : float
    +488            DWT correlation between the experimental and reference mass spectra.
    +489
    +490        Notes
    +491        -----
    +492        This function requires the PyWavelets library to be installed.
    +493            This is not a default requirement as this function is not widely used.
    +494        """
    +495
    +496        from pywt import dwt
    +497
    +498        if self.n_x_y == 0:
    +499            return 0
     500
    -501        # get the wavelet transform of x and y (Daubechies with a filter length of 4. Asymmetric. pywavelets function)
    -502        # Will only use the detail dwt (dwtDd
    -503        x_dwtD = dwt(x_a, 'db2')[1]
    -504        y_dwtD = dwt(y_a, 'db2')[1]
    -505
    -506        s_dwt_xy = dot(x_dwtD, y_dwtD) / (norm(x_dwtD) * norm(y_dwtD))
    -507
    -508        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    -509        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
    -510
    -511        # final step
    -512        s_dwt = (n_x * s_wc_x_y + self.n_x_y * s_dwt_xy) / (n_x + self.n_x_y)
    -513
    -514        return s_dwt
    -515
    -516    def euclidean_distance(self):
    -517        """ Calculate the Euclidean distance between the experimental and reference mass spectra.
    -518        
    -519        Returns
    -520        -------
    -521        correlation : float
    -522            Euclidean distance between the experimental and reference mass spectra.
    -523        """
    -524        # correlation = euclidean_distance_manual(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    -525        qlist = self.zero_filled_u_l[0]
    -526        rlist = self.zero_filled_u_l[1]
    +501        # count number of non-zero abundance/peak intensity values
    +502        n_x = sum(a != 0 for a in self.exp_abun)
    +503
    +504        # calculate cosine correlation,
    +505        x, y = self.nan_fill(self.df, fill_with=0)
    +506
    +507        x, y = self.normalize(x, y, norm_func=self.normalize_func)
    +508
    +509        # Make x and y into an array
    +510        x_a = list(x)
    +511        y_a = list(y)
    +512
    +513        # get the wavelet transform of x and y (Daubechies with a filter length of 4. Asymmetric. pywavelets function)
    +514        # Will only use the detail dwt (dwtDd
    +515        x_dwtD = dwt(x_a, "db2")[1]
    +516        y_dwtD = dwt(y_a, "db2")[1]
    +517
    +518        s_dwt_xy = dot(x_dwtD, y_dwtD) / (norm(x_dwtD) * norm(y_dwtD))
    +519
    +520        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    +521        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
    +522
    +523        # final step
    +524        s_dwt = (n_x * s_wc_x_y + self.n_x_y * s_dwt_xy) / (n_x + self.n_x_y)
    +525
    +526        return s_dwt
     527
    -528        correlation = sqrt(np_sum(power(qlist - rlist, 2)))
    -529
    -530        return correlation
    -531
    -532    def manhattan_distance(self):
    -533        """ Calculate the Manhattan distance between the experimental and reference mass spectra.
    -534        
    -535        Returns
    -536        -------
    -537        correlation : float
    -538            Manhattan distance between the experimental and reference mass spectra.
    -539        """
    -540        qlist = self.zero_filled_u_l[0]
    -541        rlist = self.zero_filled_u_l[1]
    -542
    -543        return np_sum(absolute(qlist - rlist))
    -544
    -545    def jaccard_distance(self):
    -546        """ Calculate the Jaccard distance between the experimental and reference mass spectra.
    -547        
    -548        Returns
    -549        -------
    -550        correlation : float
    -551            Jaccard distance between the experimental and reference mass spectra.
    -552        """
    -553
    -554        def jaccard_similarity(list1, list2):
    -555
    -556            intersection = len(list(set(list1).intersection(list2)))
    -557            union = (len(list1) + len(list2)) - intersection
    -558            return float(intersection) / union
    +528    def euclidean_distance(self):
    +529        """Calculate the Euclidean distance between the experimental and reference mass spectra.
    +530
    +531        Returns
    +532        -------
    +533        correlation : float
    +534            Euclidean distance between the experimental and reference mass spectra.
    +535        """
    +536        # correlation = euclidean_distance_manual(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +537        qlist = self.zero_filled_u_l[0]
    +538        rlist = self.zero_filled_u_l[1]
    +539
    +540        correlation = sqrt(np_sum(power(qlist - rlist, 2)))
    +541
    +542        return correlation
    +543
    +544    def manhattan_distance(self):
    +545        """Calculate the Manhattan distance between the experimental and reference mass spectra.
    +546
    +547        Returns
    +548        -------
    +549        correlation : float
    +550            Manhattan distance between the experimental and reference mass spectra.
    +551        """
    +552        qlist = self.zero_filled_u_l[0]
    +553        rlist = self.zero_filled_u_l[1]
    +554
    +555        return np_sum(absolute(qlist - rlist))
    +556
    +557    def jaccard_distance(self):
    +558        """Calculate the Jaccard distance between the experimental and reference mass spectra.
     559
    -560        qlist = self.zero_filled_u_l[0]
    -561        rlist = self.zero_filled_u_l[1]
    -562
    -563        return np_sum(power(qlist - rlist, 2)) / (np_sum(power(qlist, 2)) + np_sum(power(rlist, 2)) - np_sum(qlist * rlist))
    -564        # correlation = jaccard_similarity(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    -565        # @return correlation
    -566
    -567    def extra_distances(self):
    -568        """ Function to calculate distances using additional metrics defined in math_distance.py
    -569
    -570        Currently, calculates all distances.
    -571
    -572        Returns
    -573        -------
    -574        dict_res : dict
    -575            Dictionary containing the distances between the experimental and reference mass spectra.
    -576    
    -577        """
    -578        from corems.molecular_id.calc import math_distance
    +560        Returns
    +561        -------
    +562        correlation : float
    +563            Jaccard distance between the experimental and reference mass spectra.
    +564        """
    +565
    +566        def jaccard_similarity(list1, list2):
    +567            intersection = len(list(set(list1).intersection(list2)))
    +568            union = (len(list1) + len(list2)) - intersection
    +569            return float(intersection) / union
    +570
    +571        qlist = self.zero_filled_u_l[0]
    +572        rlist = self.zero_filled_u_l[1]
    +573
    +574        return np_sum(power(qlist - rlist, 2)) / (
    +575            np_sum(power(qlist, 2)) + np_sum(power(rlist, 2)) - np_sum(qlist * rlist)
    +576        )
    +577        # correlation = jaccard_similarity(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +578        # @return correlation
     579
    -580        #qlist = self.zero_filled_u_l[2]
    -581        #rlist = self.zero_filled_u_l[3]
    +580    def extra_distances(self):
    +581        """Function to calculate distances using additional metrics defined in math_distance.py
     582
    -583        dict_res = {}
    +583        Currently, calculates all distances.
     584
    -585        for method in methods_name:
    -586            # function_name = method + "_distance"
    -587            function_name = method
    -588            if hasattr(math_distance, function_name):
    -589                f = getattr(math_distance, function_name)
    -590
    -591                if function_name == "canberra_metric":
    +585        Returns
    +586        -------
    +587        dict_res : dict
    +588            Dictionary containing the distances between the experimental and reference mass spectra.
    +589
    +590        """
    +591        from corems.molecular_id.calc import math_distance
     592
    -593                    x, y = self.nan_fill(self.df, fill_with=0)
    -594
    -595                    qlist, rlist = self.normalize(x, y, norm_func=self.normalize_func)
    -596                    # print("qlist:")
    -597                    # print(qlist)
    -598                    # print("rlist:")
    -599                    # print(rlist)
    -600
    -601                else:
    -602                    qlist = self.zero_filled_u_l[0]
    -603                    rlist = self.zero_filled_u_l[1]
    -604
    -605                dist = f(qlist, rlist)
    -606                #if method == "Minokowski_3":
    -607                #    print("qlist:")
    -608                #    print(qlist)
    -609                #    print("rlist")
    -610                #    print(rlist)
    -611                #    exit()
    -612                # if dist == np.nan or dis == np.inf:
    -613                    # print(self.exp_abun)
    -614                    # print(self.exp_mz)
    -615                    #print(function_name)
    -616                    # print(len(self.exp_abun))
    -617                    # print(len(self.exp_mz))
    -618                    # print(self.zero_filled_u_l[1])
    -619                dict_res[method] = dist
    -620
    -621        return dict_res
    +593        # qlist = self.zero_filled_u_l[2]
    +594        # rlist = self.zero_filled_u_l[3]
    +595
    +596        dict_res = {}
    +597
    +598        for method in methods_name:
    +599            # function_name = method + "_distance"
    +600            function_name = method
    +601            if hasattr(math_distance, function_name):
    +602                f = getattr(math_distance, function_name)
    +603
    +604                if function_name == "canberra_metric":
    +605                    x, y = self.nan_fill(self.df, fill_with=0)
    +606
    +607                    qlist, rlist = self.normalize(x, y, norm_func=self.normalize_func)
    +608                    # print("qlist:")
    +609                    # print(qlist)
    +610                    # print("rlist:")
    +611                    # print(rlist)
    +612
    +613                else:
    +614                    qlist = self.zero_filled_u_l[0]
    +615                    rlist = self.zero_filled_u_l[1]
    +616
    +617                dist = f(qlist, rlist)
    +618                # if method == "Minokowski_3":
    +619                #    print("qlist:")
    +620                #    print(qlist)
    +621                #    print("rlist")
    +622                #    print(rlist)
    +623                #    exit()
    +624                # if dist == np.nan or dis == np.inf:
    +625                # print(self.exp_abun)
    +626                # print(self.exp_mz)
    +627                # print(function_name)
    +628                # print(len(self.exp_abun))
    +629                # print(len(self.exp_mz))
    +630                # print(self.zero_filled_u_l[1])
    +631                dict_res[method] = dist
    +632
    +633        return dict_res
     
    @@ -1376,40 +1396,41 @@
    Methods
    -
    185    def __init__(self, ms_mz_abun_dict, ref_obj, norm_func=sum):
    -186
    -187        self.normalize_func = norm_func
    -188        self.ms_mz_abun_dict = ms_mz_abun_dict
    -189        self.ref_obj = ref_obj
    -190
    -191        self.exp_abun = list(self.ms_mz_abun_dict.values())
    -192        self.exp_mz = list(self.ms_mz_abun_dict.keys())
    -193
    -194        self.ref_mz = self.ref_obj.get("mz")
    -195        self.ref_abun = self.ref_obj.get("abundance")
    -196
    -197        self.ref_mz_abun_dict = dict(zip(self.ref_mz, self.ref_abun))
    -198
    -199        # parse to dataframe, easier to zerofill and tranpose
    -200        self.df = DataFrame([self.ms_mz_abun_dict, self.ref_mz_abun_dict])
    -201
    -202        # fill missing mz with abundance 0
    -203        x, y = self.nan_fill(self.df, fill_with=1e-10)
    -204        
    -205        self.zero_filled_u_l = self.normalize(x, y, norm_func=self.normalize_func)
    -206        
    -207        # filter out the mass values that have zero intensities in self.exp_abun
    -208        exp_mz_filtered = set([k for k in self.exp_mz if self.ms_mz_abun_dict[k] != 0])
    -209
    -210        # filter out the mass values that have zero intensities in self.ref_mz
    -211        ref_mz_filtered = set([k for k in self.ref_mz if self.ref_mz_abun_dict[k] != 0])
    -212
    -213        # find the intersection/common mass values of both ref and exp, and sort them
    -214        self.common_mz_values = sorted(list(exp_mz_filtered.intersection(ref_mz_filtered)))
    -215
    -216        # find the number of common mass values (after filtering 0s)
    -217        self.n_x_y = len(self.common_mz_values)
    -218        # print(self.n_x_y)
    +            
    190    def __init__(self, ms_mz_abun_dict, ref_obj, norm_func=sum):
    +191        self.normalize_func = norm_func
    +192        self.ms_mz_abun_dict = ms_mz_abun_dict
    +193        self.ref_obj = ref_obj
    +194
    +195        self.exp_abun = list(self.ms_mz_abun_dict.values())
    +196        self.exp_mz = list(self.ms_mz_abun_dict.keys())
    +197
    +198        self.ref_mz = self.ref_obj.get("mz")
    +199        self.ref_abun = self.ref_obj.get("abundance")
    +200
    +201        self.ref_mz_abun_dict = dict(zip(self.ref_mz, self.ref_abun))
    +202
    +203        # parse to dataframe, easier to zerofill and tranpose
    +204        self.df = DataFrame([self.ms_mz_abun_dict, self.ref_mz_abun_dict])
    +205
    +206        # fill missing mz with abundance 0
    +207        x, y = self.nan_fill(self.df, fill_with=1e-10)
    +208
    +209        self.zero_filled_u_l = self.normalize(x, y, norm_func=self.normalize_func)
    +210
    +211        # filter out the mass values that have zero intensities in self.exp_abun
    +212        exp_mz_filtered = set([k for k in self.exp_mz if self.ms_mz_abun_dict[k] != 0])
    +213
    +214        # filter out the mass values that have zero intensities in self.ref_mz
    +215        ref_mz_filtered = set([k for k in self.ref_mz if self.ref_mz_abun_dict[k] != 0])
    +216
    +217        # find the intersection/common mass values of both ref and exp, and sort them
    +218        self.common_mz_values = sorted(
    +219            list(exp_mz_filtered.intersection(ref_mz_filtered))
    +220        )
    +221
    +222        # find the number of common mass values (after filtering 0s)
    +223        self.n_x_y = len(self.common_mz_values)
    +224        # print(self.n_x_y)
     
    @@ -1559,25 +1580,25 @@
    Methods
    -
    220    def nan_fill(self, df, fill_with=0):
    -221        """ Fill missing mass values with a given value.
    -222        
    -223        Parameters
    -224        ----------
    -225        df : DataFrame
    -226            DataFrame containing the experimental and reference mass spectrum data.
    -227        fill_with : float
    -228            Value to fill missing mass values with.
    -229        
    -230        Returns
    -231        -------
    -232        x : list
    -233            List of abundance values for the experimental mass spectrum.
    -234        y : list
    -235            List of abundance values for the reference mass spectrum."""
    -236        df.fillna(fill_with, inplace=True)
    -237
    -238        return df.T[0].values, df.T[1].values
    +            
    226    def nan_fill(self, df, fill_with=0):
    +227        """Fill missing mass values with a given value.
    +228
    +229        Parameters
    +230        ----------
    +231        df : DataFrame
    +232            DataFrame containing the experimental and reference mass spectrum data.
    +233        fill_with : float
    +234            Value to fill missing mass values with.
    +235
    +236        Returns
    +237        -------
    +238        x : list
    +239            List of abundance values for the experimental mass spectrum.
    +240        y : list
    +241            List of abundance values for the reference mass spectrum."""
    +242        df.fillna(fill_with, inplace=True)
    +243
    +244        return df.T[0].values, df.T[1].values
     
    @@ -1615,29 +1636,29 @@
    Returns
    -
    240    def normalize(self, x, y, norm_func=sum):
    -241        """ Normalize the abundance values.
    -242        
    -243        Parameters
    -244        ----------
    -245        x : list
    -246            List of abundance values for the experimental mass spectrum.
    -247        y : list
    -248            List of abundance values for the reference mass spectrum.
    -249        norm_func : function
    -250            Function to normalize the abundance values.
    -251            Default is sum
    -252        
    -253        Returns
    -254        -------
    -255        u_l : tuple
    -256            Tuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    -257        """
    -258        if norm_func is not None:
    -259            u_l = (x / norm_func(x), y / norm_func(y))
    -260            return u_l
    -261        else:
    -262            return (x, y)
    +            
    246    def normalize(self, x, y, norm_func=sum):
    +247        """Normalize the abundance values.
    +248
    +249        Parameters
    +250        ----------
    +251        x : list
    +252            List of abundance values for the experimental mass spectrum.
    +253        y : list
    +254            List of abundance values for the reference mass spectrum.
    +255        norm_func : function
    +256            Function to normalize the abundance values.
    +257            Default is sum
    +258
    +259        Returns
    +260        -------
    +261        u_l : tuple
    +262            Tuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    +263        """
    +264        if norm_func is not None:
    +265            u_l = (x / norm_func(x), y / norm_func(y))
    +266            return u_l
    +267        else:
    +268            return (x, y)
     
    @@ -1676,51 +1697,51 @@
    Returns
    -
    264    def weighted_cosine_correlation(self, a=0.5, b=1.3, nanfill=1e-10):
    -265        """ Calculate the weighted cosine correlation between the experimental and reference mass spectra.
    -266        
    -267        Parameters
    -268        ----------
    -269        a : float
    -270            Weighting factor for the abundance values.
    -271            Default is 0.5
    -272        b : float
    -273            Weighting factor for the mass values.
    -274            Default is 1.3
    -275        nanfill : float
    -276            Value to fill missing mass values with.
    -277            Default is 1e-10
    -278        
    -279        Returns
    -280        -------
    -281        correlation : float
    -282            Weighted cosine correlation between the experimental and reference mass spectra.
    -283        """
    -284        # create dict['mz'] = abundance, for experimental data
    -285        # ms_mz_abun_dict = mass_spec.mz_abun_dict
    -286        # weight exp data
    -287
    -288        xc = power(self.exp_abun, a) * power(self.exp_mz, b)
    -289
    -290        # track back to individual mz
    -291        weighted_exp_dict = dict(zip(self.ms_mz_abun_dict.keys(), xc))
    -292
    -293        # weight ref data
    -294        yc = power(self.ref_obj.get("abundance"), a) * power(self.ref_obj.get("mz"), b)
    +            
    270    def weighted_cosine_correlation(self, a=0.5, b=1.3, nanfill=1e-10):
    +271        """Calculate the weighted cosine correlation between the experimental and reference mass spectra.
    +272
    +273        Parameters
    +274        ----------
    +275        a : float
    +276            Weighting factor for the abundance values.
    +277            Default is 0.5
    +278        b : float
    +279            Weighting factor for the mass values.
    +280            Default is 1.3
    +281        nanfill : float
    +282            Value to fill missing mass values with.
    +283            Default is 1e-10
    +284
    +285        Returns
    +286        -------
    +287        correlation : float
    +288            Weighted cosine correlation between the experimental and reference mass spectra.
    +289        """
    +290        # create dict['mz'] = abundance, for experimental data
    +291        # ms_mz_abun_dict = mass_spec.mz_abun_dict
    +292        # weight exp data
    +293
    +294        xc = power(self.exp_abun, a) * power(self.exp_mz, b)
     295
    -296        ref_mz_abun_dict = dict(zip(self.ref_obj.get("mz"), yc))
    -297
    -298        # parse to dataframe, easier to zerofill and tranpose
    -299        df = DataFrame([weighted_exp_dict, ref_mz_abun_dict])
    -300
    -301        # fill missing mz with weight {abun**a}{m/z**b} to 0
    -302        x, y = self.nan_fill(df, fill_with=nanfill)
    +296        # track back to individual mz
    +297        weighted_exp_dict = dict(zip(self.ms_mz_abun_dict.keys(), xc))
    +298
    +299        # weight ref data
    +300        yc = power(self.ref_obj.get("abundance"), a) * power(self.ref_obj.get("mz"), b)
    +301
    +302        ref_mz_abun_dict = dict(zip(self.ref_obj.get("mz"), yc))
     303
    -304        # correlation = (1 - cosine(x, y))
    -305
    -306        correlation = dot(x, y) / (norm(x) * norm(y))
    -307
    -308        return correlation
    +304        # parse to dataframe, easier to zerofill and tranpose
    +305        df = DataFrame([weighted_exp_dict, ref_mz_abun_dict])
    +306
    +307        # fill missing mz with weight {abun**a}{m/z**b} to 0
    +308        x, y = self.nan_fill(df, fill_with=nanfill)
    +309
    +310        # correlation = (1 - cosine(x, y))
    +311
    +312        correlation = dot(x, y) / (norm(x) * norm(y))
    +313
    +314        return correlation
     
    @@ -1761,24 +1782,24 @@
    Returns
    -
    310    def cosine_correlation(self):
    -311        """ Calculate the cosine correlation between the experimental and reference mass spectra.
    -312
    -313        Returns
    -314        -------
    -315        correlation : float
    -316            Cosine correlation between the experimental and reference mass spectra.
    -317        
    -318        """
    -319        # calculate cosine correlation,
    -320        x = self.zero_filled_u_l[0]
    -321        y = self.zero_filled_u_l[1]
    -322
    -323        # correlation = (1 - cosine(x, y))
    -324
    -325        correlation = dot(x, y) / (norm(x) * norm(y))
    -326
    -327        return correlation
    +            
    316    def cosine_correlation(self):
    +317        """Calculate the cosine correlation between the experimental and reference mass spectra.
    +318
    +319        Returns
    +320        -------
    +321        correlation : float
    +322            Cosine correlation between the experimental and reference mass spectra.
    +323
    +324        """
    +325        # calculate cosine correlation,
    +326        x = self.zero_filled_u_l[0]
    +327        y = self.zero_filled_u_l[1]
    +328
    +329        # correlation = (1 - cosine(x, y))
    +330
    +331        correlation = dot(x, y) / (norm(x) * norm(y))
    +332
    +333        return correlation
     
    @@ -1805,71 +1826,73 @@
    Returns
    -
    329    def stein_scott(self):
    -330        """ Calculate the Stein-Scott similarity between the experimental and reference mass spectra.
    -331        
    -332        Returns
    -333        -------
    -334        s_ss_x_y : float
    -335            Stein-Scott similarity between the experimental and reference mass spectra.
    -336        s_ss_x_y_nist : float
    -337            Stein-Scott similarity between the experimental and reference mass spectra.
    -338        """
    -339        #TODO check this code
    -340        if self.n_x_y == 0: return 0, 0
    -341
    -342        # count number of non-zero abundance/peak intensity values
    -343        n_x = sum(a != 0 for a in self.exp_abun)
    -344
    -345        s_r_x_y = 0
    -346
    -347        a, b = 1, 0
    +            
    335    def stein_scott(self):
    +336        """Calculate the Stein-Scott similarity between the experimental and reference mass spectra.
    +337
    +338        Returns
    +339        -------
    +340        s_ss_x_y : float
    +341            Stein-Scott similarity between the experimental and reference mass spectra.
    +342        s_ss_x_y_nist : float
    +343            Stein-Scott similarity between the experimental and reference mass spectra.
    +344        """
    +345        # TODO check this code
    +346        if self.n_x_y == 0:
    +347            return 0, 0
     348
    -349        for i in range(1, self.n_x_y):
    -350
    -351            current_value = self.common_mz_values[i]
    -352            previous_value = self.common_mz_values[i - 1]
    +349        # count number of non-zero abundance/peak intensity values
    +350        n_x = sum(a != 0 for a in self.exp_abun)
    +351
    +352        s_r_x_y = 0
     353
    -354            y_i = self.ref_mz_abun_dict[current_value]
    -355            y_i_minus1 = self.ref_mz_abun_dict[previous_value]
    -356
    -357            lc_current = power(y_i, a) * power(current_value, b)
    -358            lc_previous = power(y_i_minus1, a) * power(previous_value, b)
    +354        a, b = 1, 0
    +355
    +356        for i in range(1, self.n_x_y):
    +357            current_value = self.common_mz_values[i]
    +358            previous_value = self.common_mz_values[i - 1]
     359
    -360            x_i = self.ms_mz_abun_dict[current_value]
    -361            x_i_minus1 = self.ms_mz_abun_dict[previous_value]
    +360            y_i = self.ref_mz_abun_dict[current_value]
    +361            y_i_minus1 = self.ref_mz_abun_dict[previous_value]
     362
    -363            uc_current = power(x_i, a) * power(current_value, b)
    -364            uc_previous = power(x_i_minus1, a) * power(previous_value, b)
    +363            lc_current = power(y_i, a) * power(current_value, b)
    +364            lc_previous = power(y_i_minus1, a) * power(previous_value, b)
     365
    -366            T1 = lc_current / lc_previous
    -367
    -368            T2 = uc_previous / uc_current
    -369
    -370            temp_computation = T1 * T2
    +366            x_i = self.ms_mz_abun_dict[current_value]
    +367            x_i_minus1 = self.ms_mz_abun_dict[previous_value]
    +368
    +369            uc_current = power(x_i, a) * power(current_value, b)
    +370            uc_previous = power(x_i_minus1, a) * power(previous_value, b)
     371
    -372            n = 0
    -373            if temp_computation <= 1:
    -374                n = 1
    -375            else:
    -376                n = -1
    -377            
    -378            s_r_x_y = s_r_x_y + power(temp_computation, n)
    -379
    -380        # finish the calculation of S_R(X,Y)
    -381
    -382        s_r_x_y = s_r_x_y / self.n_x_y
    -383        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    -384        s_wc_x_y = self.weighted_cosine_correlation(a=0.5, b=3, nanfill=0)
    +372            T1 = lc_current / lc_previous
    +373
    +374            T2 = uc_previous / uc_current
    +375
    +376            temp_computation = T1 * T2
    +377
    +378            n = 0
    +379            if temp_computation <= 1:
    +380                n = 1
    +381            else:
    +382                n = -1
    +383
    +384            s_r_x_y = s_r_x_y + power(temp_computation, n)
     385
    -386        s_ss_x_y = ((n_x * s_wc_x_y) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y)
    +386        # finish the calculation of S_R(X,Y)
     387
    -388        s_wc_x_y_nist = self.weighted_cosine_correlation(a=0.5, b=1.3, nanfill=0)
    -389
    -390        s_ss_x_y_nist = ((n_x * s_wc_x_y_nist) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y)    
    -391        # final step
    -392
    -393        return s_ss_x_y, s_ss_x_y_nist
    +388        s_r_x_y = s_r_x_y / self.n_x_y
    +389        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    +390        s_wc_x_y = self.weighted_cosine_correlation(a=0.5, b=3, nanfill=0)
    +391
    +392        s_ss_x_y = ((n_x * s_wc_x_y) + (self.n_x_y * s_r_x_y)) / (n_x + self.n_x_y)
    +393
    +394        s_wc_x_y_nist = self.weighted_cosine_correlation(a=0.5, b=1.3, nanfill=0)
    +395
    +396        s_ss_x_y_nist = ((n_x * s_wc_x_y_nist) + (self.n_x_y * s_r_x_y)) / (
    +397            n_x + self.n_x_y
    +398        )
    +399        # final step
    +400
    +401        return s_ss_x_y, s_ss_x_y_nist
     
    @@ -1898,17 +1921,19 @@
    Returns
    -
    395    def pearson_correlation(self,):
    -396        """ Calculate the Pearson correlation between the experimental and reference mass spectra.
    -397        
    -398        Returns
    -399        -------
    -400        correlation : float
    -401            Pearson correlation between the experimental and reference mass spectra.    
    -402        """
    -403        correlation = pearsonr(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    -404
    -405        return correlation[0]
    +            
    403    def pearson_correlation(
    +404        self,
    +405    ):
    +406        """Calculate the Pearson correlation between the experimental and reference mass spectra.
    +407
    +408        Returns
    +409        -------
    +410        correlation : float
    +411            Pearson correlation between the experimental and reference mass spectra.
    +412        """
    +413        correlation = pearsonr(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +414
    +415        return correlation[0]
     
    @@ -1935,19 +1960,21 @@
    Returns
    -
    407    def spearman_correlation(self):
    -408        """ Calculate the Spearman correlation between the experimental and reference mass spectra.
    -409        
    -410        Returns
    -411        -------
    -412        coorelation : float
    -413            Spearman correlation between the experimental and reference mass spectra.
    -414        """
    -415        # calculate Spearman correlation
    -416        # ## TODO - Check axis
    -417        correlation = spearmanr(self.zero_filled_u_l[0], self.zero_filled_u_l[1], axis=0)
    -418
    -419        return correlation[0]
    +            
    417    def spearman_correlation(self):
    +418        """Calculate the Spearman correlation between the experimental and reference mass spectra.
    +419
    +420        Returns
    +421        -------
    +422        coorelation : float
    +423            Spearman correlation between the experimental and reference mass spectra.
    +424        """
    +425        # calculate Spearman correlation
    +426        # ## TODO - Check axis
    +427        correlation = spearmanr(
    +428            self.zero_filled_u_l[0], self.zero_filled_u_l[1], axis=0
    +429        )
    +430
    +431        return correlation[0]
     
    @@ -1974,22 +2001,22 @@
    Returns
    -
    421    def kendall_tau(self):
    -422        """ Calculate the Kendall's tau correlation between the experimental and reference mass spectra.
    -423        
    -424        Returns
    -425        -------
    -426        correlation : float
    -427            Kendall's tau correlation between the experimental and reference mass spectra."""
    -428        # create dict['mz'] = abundance, for experimental data
    -429        # self.ms_mz_abun_dict = mass_spec.mz_abun_dict
    -430
    -431        # create dict['mz'] = abundance, for experimental data
    -432
    -433        # calculate Kendall's tau
    -434        correlation = kendalltau(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +            
    433    def kendall_tau(self):
    +434        """Calculate the Kendall's tau correlation between the experimental and reference mass spectra.
     435
    -436        return correlation[0]
    +436        Returns
    +437        -------
    +438        correlation : float
    +439            Kendall's tau correlation between the experimental and reference mass spectra."""
    +440        # create dict['mz'] = abundance, for experimental data
    +441        # self.ms_mz_abun_dict = mass_spec.mz_abun_dict
    +442
    +443        # create dict['mz'] = abundance, for experimental data
    +444
    +445        # calculate Kendall's tau
    +446        correlation = kendalltau(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +447
    +448        return correlation[0]
     
    @@ -2016,37 +2043,37 @@
    Returns
    -
    438    def dft_correlation(self):
    -439        """ Calculate the DFT correlation between the experimental and reference mass spectra.
    -440
    -441        Returns
    -442        -------
    -443        correlation : float
    -444            DFT correlation between the experimental and reference mass spectra.
    -445        """
    -446        if self.n_x_y == 0:
    -447            return 0
    -448
    -449        # count number of non-zero abundance/peak intensity values
    -450        n_x = sum(a != 0 for a in self.exp_abun)
    -451
    -452        x, y = self.nan_fill(self.df, fill_with=0)
    -453        
    -454        x, y = self.normalize(x, y, norm_func=self.normalize_func)
    -455        
    -456        # get the Fourier transform of x and y
    -457        x_dft = rfft(x).real
    -458        y_dft = rfft(y).real
    -459
    -460        s_dft_xy = dot(x_dft, y_dft)/(norm(x_dft)*norm(y_dft))
    -461
    -462        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    -463        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
    -464
    -465        # final step
    -466        s_dft = (n_x * s_wc_x_y + self.n_x_y * s_dft_xy) / (n_x + self.n_x_y)
    +            
    450    def dft_correlation(self):
    +451        """Calculate the DFT correlation between the experimental and reference mass spectra.
    +452
    +453        Returns
    +454        -------
    +455        correlation : float
    +456            DFT correlation between the experimental and reference mass spectra.
    +457        """
    +458        if self.n_x_y == 0:
    +459            return 0
    +460
    +461        # count number of non-zero abundance/peak intensity values
    +462        n_x = sum(a != 0 for a in self.exp_abun)
    +463
    +464        x, y = self.nan_fill(self.df, fill_with=0)
    +465
    +466        x, y = self.normalize(x, y, norm_func=self.normalize_func)
     467
    -468        return s_dft
    +468        # get the Fourier transform of x and y
    +469        x_dft = rfft(x).real
    +470        y_dft = rfft(y).real
    +471
    +472        s_dft_xy = dot(x_dft, y_dft) / (norm(x_dft) * norm(y_dft))
    +473
    +474        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    +475        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
    +476
    +477        # final step
    +478        s_dft = (n_x * s_wc_x_y + self.n_x_y * s_dft_xy) / (n_x + self.n_x_y)
    +479
    +480        return s_dft
     
    @@ -2073,51 +2100,51 @@
    Returns
    -
    470    def dwt_correlation(self):
    -471        """ Calculate the DWT correlation between the experimental and reference mass spectra.
    -472
    -473        Returns
    -474        -------
    -475        correlation : float
    -476            DWT correlation between the experimental and reference mass spectra.
    -477        
    -478        Notes
    -479        -----
    -480        This function requires the PyWavelets library to be installed. 
    -481            This is not a default requirement as this function is not widely used. 
    -482        """
    -483
    -484        from pywt import dwt 
    -485        
    -486        if self.n_x_y == 0:
    -487            return 0
    -488
    -489        # count number of non-zero abundance/peak intensity values
    -490        n_x = sum(a != 0 for a in self.exp_abun)
    -491
    -492        # calculate cosine correlation,
    -493        x, y = self.nan_fill(self.df, fill_with=0)
    -494        
    -495        x, y = self.normalize(x, y, norm_func=self.normalize_func)
    -496
    -497        # Make x and y into an array
    -498        x_a = list(x)
    -499        y_a = list(y)
    +            
    482    def dwt_correlation(self):
    +483        """Calculate the DWT correlation between the experimental and reference mass spectra.
    +484
    +485        Returns
    +486        -------
    +487        correlation : float
    +488            DWT correlation between the experimental and reference mass spectra.
    +489
    +490        Notes
    +491        -----
    +492        This function requires the PyWavelets library to be installed.
    +493            This is not a default requirement as this function is not widely used.
    +494        """
    +495
    +496        from pywt import dwt
    +497
    +498        if self.n_x_y == 0:
    +499            return 0
     500
    -501        # get the wavelet transform of x and y (Daubechies with a filter length of 4. Asymmetric. pywavelets function)
    -502        # Will only use the detail dwt (dwtDd
    -503        x_dwtD = dwt(x_a, 'db2')[1]
    -504        y_dwtD = dwt(y_a, 'db2')[1]
    -505
    -506        s_dwt_xy = dot(x_dwtD, y_dwtD) / (norm(x_dwtD) * norm(y_dwtD))
    -507
    -508        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    -509        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
    -510
    -511        # final step
    -512        s_dwt = (n_x * s_wc_x_y + self.n_x_y * s_dwt_xy) / (n_x + self.n_x_y)
    -513
    -514        return s_dwt
    +501        # count number of non-zero abundance/peak intensity values
    +502        n_x = sum(a != 0 for a in self.exp_abun)
    +503
    +504        # calculate cosine correlation,
    +505        x, y = self.nan_fill(self.df, fill_with=0)
    +506
    +507        x, y = self.normalize(x, y, norm_func=self.normalize_func)
    +508
    +509        # Make x and y into an array
    +510        x_a = list(x)
    +511        y_a = list(y)
    +512
    +513        # get the wavelet transform of x and y (Daubechies with a filter length of 4. Asymmetric. pywavelets function)
    +514        # Will only use the detail dwt (dwtDd
    +515        x_dwtD = dwt(x_a, "db2")[1]
    +516        y_dwtD = dwt(y_a, "db2")[1]
    +517
    +518        s_dwt_xy = dot(x_dwtD, y_dwtD) / (norm(x_dwtD) * norm(y_dwtD))
    +519
    +520        # using the existing weighted_cosine_correlation function to get S_WC(X,Y)
    +521        s_wc_x_y = self.weighted_cosine_correlation(nanfill=0)
    +522
    +523        # final step
    +524        s_dwt = (n_x * s_wc_x_y + self.n_x_y * s_dwt_xy) / (n_x + self.n_x_y)
    +525
    +526        return s_dwt
     
    @@ -2132,7 +2159,7 @@
    Returns
    Notes
    -

    This function requires the PyWavelets library to be installed. +

    This function requires the PyWavelets library to be installed. This is not a default requirement as this function is not widely used.

    @@ -2149,21 +2176,21 @@
    Notes
    -
    516    def euclidean_distance(self):
    -517        """ Calculate the Euclidean distance between the experimental and reference mass spectra.
    -518        
    -519        Returns
    -520        -------
    -521        correlation : float
    -522            Euclidean distance between the experimental and reference mass spectra.
    -523        """
    -524        # correlation = euclidean_distance_manual(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    -525        qlist = self.zero_filled_u_l[0]
    -526        rlist = self.zero_filled_u_l[1]
    -527
    -528        correlation = sqrt(np_sum(power(qlist - rlist, 2)))
    -529
    -530        return correlation
    +            
    528    def euclidean_distance(self):
    +529        """Calculate the Euclidean distance between the experimental and reference mass spectra.
    +530
    +531        Returns
    +532        -------
    +533        correlation : float
    +534            Euclidean distance between the experimental and reference mass spectra.
    +535        """
    +536        # correlation = euclidean_distance_manual(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +537        qlist = self.zero_filled_u_l[0]
    +538        rlist = self.zero_filled_u_l[1]
    +539
    +540        correlation = sqrt(np_sum(power(qlist - rlist, 2)))
    +541
    +542        return correlation
     
    @@ -2190,18 +2217,18 @@
    Returns
    -
    532    def manhattan_distance(self):
    -533        """ Calculate the Manhattan distance between the experimental and reference mass spectra.
    -534        
    -535        Returns
    -536        -------
    -537        correlation : float
    -538            Manhattan distance between the experimental and reference mass spectra.
    -539        """
    -540        qlist = self.zero_filled_u_l[0]
    -541        rlist = self.zero_filled_u_l[1]
    -542
    -543        return np_sum(absolute(qlist - rlist))
    +            
    544    def manhattan_distance(self):
    +545        """Calculate the Manhattan distance between the experimental and reference mass spectra.
    +546
    +547        Returns
    +548        -------
    +549        correlation : float
    +550            Manhattan distance between the experimental and reference mass spectra.
    +551        """
    +552        qlist = self.zero_filled_u_l[0]
    +553        rlist = self.zero_filled_u_l[1]
    +554
    +555        return np_sum(absolute(qlist - rlist))
     
    @@ -2228,27 +2255,28 @@
    Returns
    -
    545    def jaccard_distance(self):
    -546        """ Calculate the Jaccard distance between the experimental and reference mass spectra.
    -547        
    -548        Returns
    -549        -------
    -550        correlation : float
    -551            Jaccard distance between the experimental and reference mass spectra.
    -552        """
    -553
    -554        def jaccard_similarity(list1, list2):
    -555
    -556            intersection = len(list(set(list1).intersection(list2)))
    -557            union = (len(list1) + len(list2)) - intersection
    -558            return float(intersection) / union
    +            
    557    def jaccard_distance(self):
    +558        """Calculate the Jaccard distance between the experimental and reference mass spectra.
     559
    -560        qlist = self.zero_filled_u_l[0]
    -561        rlist = self.zero_filled_u_l[1]
    -562
    -563        return np_sum(power(qlist - rlist, 2)) / (np_sum(power(qlist, 2)) + np_sum(power(rlist, 2)) - np_sum(qlist * rlist))
    -564        # correlation = jaccard_similarity(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    -565        # @return correlation
    +560        Returns
    +561        -------
    +562        correlation : float
    +563            Jaccard distance between the experimental and reference mass spectra.
    +564        """
    +565
    +566        def jaccard_similarity(list1, list2):
    +567            intersection = len(list(set(list1).intersection(list2)))
    +568            union = (len(list1) + len(list2)) - intersection
    +569            return float(intersection) / union
    +570
    +571        qlist = self.zero_filled_u_l[0]
    +572        rlist = self.zero_filled_u_l[1]
    +573
    +574        return np_sum(power(qlist - rlist, 2)) / (
    +575            np_sum(power(qlist, 2)) + np_sum(power(rlist, 2)) - np_sum(qlist * rlist)
    +576        )
    +577        # correlation = jaccard_similarity(self.zero_filled_u_l[0], self.zero_filled_u_l[1])
    +578        # @return correlation
     
    @@ -2275,61 +2303,60 @@
    Returns
    -
    567    def extra_distances(self):
    -568        """ Function to calculate distances using additional metrics defined in math_distance.py
    -569
    -570        Currently, calculates all distances.
    -571
    -572        Returns
    -573        -------
    -574        dict_res : dict
    -575            Dictionary containing the distances between the experimental and reference mass spectra.
    -576    
    -577        """
    -578        from corems.molecular_id.calc import math_distance
    -579
    -580        #qlist = self.zero_filled_u_l[2]
    -581        #rlist = self.zero_filled_u_l[3]
    +            
    580    def extra_distances(self):
    +581        """Function to calculate distances using additional metrics defined in math_distance.py
     582
    -583        dict_res = {}
    +583        Currently, calculates all distances.
     584
    -585        for method in methods_name:
    -586            # function_name = method + "_distance"
    -587            function_name = method
    -588            if hasattr(math_distance, function_name):
    -589                f = getattr(math_distance, function_name)
    -590
    -591                if function_name == "canberra_metric":
    +585        Returns
    +586        -------
    +587        dict_res : dict
    +588            Dictionary containing the distances between the experimental and reference mass spectra.
    +589
    +590        """
    +591        from corems.molecular_id.calc import math_distance
     592
    -593                    x, y = self.nan_fill(self.df, fill_with=0)
    -594
    -595                    qlist, rlist = self.normalize(x, y, norm_func=self.normalize_func)
    -596                    # print("qlist:")
    -597                    # print(qlist)
    -598                    # print("rlist:")
    -599                    # print(rlist)
    -600
    -601                else:
    -602                    qlist = self.zero_filled_u_l[0]
    -603                    rlist = self.zero_filled_u_l[1]
    -604
    -605                dist = f(qlist, rlist)
    -606                #if method == "Minokowski_3":
    -607                #    print("qlist:")
    -608                #    print(qlist)
    -609                #    print("rlist")
    -610                #    print(rlist)
    -611                #    exit()
    -612                # if dist == np.nan or dis == np.inf:
    -613                    # print(self.exp_abun)
    -614                    # print(self.exp_mz)
    -615                    #print(function_name)
    -616                    # print(len(self.exp_abun))
    -617                    # print(len(self.exp_mz))
    -618                    # print(self.zero_filled_u_l[1])
    -619                dict_res[method] = dist
    -620
    -621        return dict_res
    +593        # qlist = self.zero_filled_u_l[2]
    +594        # rlist = self.zero_filled_u_l[3]
    +595
    +596        dict_res = {}
    +597
    +598        for method in methods_name:
    +599            # function_name = method + "_distance"
    +600            function_name = method
    +601            if hasattr(math_distance, function_name):
    +602                f = getattr(math_distance, function_name)
    +603
    +604                if function_name == "canberra_metric":
    +605                    x, y = self.nan_fill(self.df, fill_with=0)
    +606
    +607                    qlist, rlist = self.normalize(x, y, norm_func=self.normalize_func)
    +608                    # print("qlist:")
    +609                    # print(qlist)
    +610                    # print("rlist:")
    +611                    # print(rlist)
    +612
    +613                else:
    +614                    qlist = self.zero_filled_u_l[0]
    +615                    rlist = self.zero_filled_u_l[1]
    +616
    +617                dist = f(qlist, rlist)
    +618                # if method == "Minokowski_3":
    +619                #    print("qlist:")
    +620                #    print(qlist)
    +621                #    print("rlist")
    +622                #    print(rlist)
    +623                #    exit()
    +624                # if dist == np.nan or dis == np.inf:
    +625                # print(self.exp_abun)
    +626                # print(self.exp_mz)
    +627                # print(function_name)
    +628                # print(len(self.exp_abun))
    +629                # print(len(self.exp_mz))
    +630                # print(self.zero_filled_u_l[1])
    +631                dict_res[method] = dist
    +632
    +633        return dict_res
     
    diff --git a/docs/corems/molecular_id/calc/math_distance.html b/docs/corems/molecular_id/calc/math_distance.html index fe14f5c1..18ae3908 100644 --- a/docs/corems/molecular_id/calc/math_distance.html +++ b/docs/corems/molecular_id/calc/math_distance.html @@ -270,1594 +270,1643 @@

    -
       1import numpy as np
    -   2import scipy.stats
    -   3import warnings
    -   4""" Collection of spectral similarity methods.
    +                        
       1import warnings
    +   2
    +   3import numpy as np
    +   4import scipy.stats
        5
    -   6Based on  Yuanyue Li code at https://github.com/YuanyueLi/SpectralEntropy/blob/master/spectral_entropy/math_distance.py 
    -   7and paper: Li, Y., Kind, T., Folz, J. et al. Spectral entropy outperforms MS/MS dot product similarity for small-molecule compound identification. Nat Methods 18, 1524–1531 (2021). https://doi.org/10.1038/s41592-021-01331-z
    -   8"""
    -   9
    -  10def entropy_distance(v, y):
    -  11    """ Calculate entropy distance between two vectors
    +   6""" Collection of spectral similarity methods.
    +   7
    +   8Based on  Yuanyue Li code at https://github.com/YuanyueLi/SpectralEntropy/blob/master/spectral_entropy/math_distance.py 
    +   9and paper: Li, Y., Kind, T., Folz, J. et al. Spectral entropy outperforms MS/MS dot product similarity for small-molecule compound identification. Nat Methods 18, 1524–1531 (2021). https://doi.org/10.1038/s41592-021-01331-z
    +  10"""
    +  11
       12
    -  13    Parameters
    -  14    ----------
    -  15    v : array_like
    -  16        Vector 1
    -  17    y : array_like
    -  18        Vector 2
    -  19    
    -  20    Returns
    -  21    -------
    -  22    float
    -  23        Entropy distance between v and y
    -  24    
    -  25    """
    -  26    merged = v + y
    -  27    entropy_increase = 2 * scipy.stats.entropy(merged) - scipy.stats.entropy(v) - scipy.stats.entropy(y)
    -  28    return entropy_increase
    -  29
    -  30def _weight_intensity_for_entropy(x):
    -  31    """ Weight intensity for entropy
    -  32    
    -  33    Parameters
    -  34    ----------
    -  35    x : array_like
    -  36        Vector
    -  37    
    -  38    Returns
    -  39    -------
    -  40    array_like
    -  41        Weighted vector
    -  42    """
    -  43    if sum(x) > 0:
    -  44        WEIGHT_START = 0.25
    -  45        WEIGHT_SLOPE = 0.5
    -  46
    -  47        entropy_x = scipy.stats.entropy(x)
    -  48        weight = WEIGHT_START + WEIGHT_SLOPE * entropy_x
    -  49        x = np.power(x, weight)
    -  50        x = x / sum(x)
    -  51        return x
    -  52
    -  53
    -  54def weighted_entropy_distance(v, y):
    -  55    """ Calculate weighted entropy distance between two vectors
    -  56
    -  57    Parameters
    -  58    ----------
    -  59    v : array_like
    -  60        Vector 1
    -  61    y : array_like
    -  62        Vector 2
    -  63    
    -  64    Returns
    -  65    -------
    -  66    float
    -  67        Weighted entropy distance between v and y
    -  68    """
    -  69    v = _weight_intensity_for_entropy(v)
    -  70    y = _weight_intensity_for_entropy(y)
    +  13def entropy_distance(v, y):
    +  14    """Calculate entropy distance between two vectors
    +  15
    +  16    Parameters
    +  17    ----------
    +  18    v : array_like
    +  19        Vector 1
    +  20    y : array_like
    +  21        Vector 2
    +  22
    +  23    Returns
    +  24    -------
    +  25    float
    +  26        Entropy distance between v and y
    +  27
    +  28    """
    +  29    merged = v + y
    +  30    entropy_increase = (
    +  31        2 * scipy.stats.entropy(merged)
    +  32        - scipy.stats.entropy(v)
    +  33        - scipy.stats.entropy(y)
    +  34    )
    +  35    return entropy_increase
    +  36
    +  37
    +  38def _weight_intensity_for_entropy(x):
    +  39    """Weight intensity for entropy
    +  40
    +  41    Parameters
    +  42    ----------
    +  43    x : array_like
    +  44        Vector
    +  45
    +  46    Returns
    +  47    -------
    +  48    array_like
    +  49        Weighted vector
    +  50    """
    +  51    if sum(x) > 0:
    +  52        WEIGHT_START = 0.25
    +  53        WEIGHT_SLOPE = 0.5
    +  54
    +  55        entropy_x = scipy.stats.entropy(x)
    +  56        weight = WEIGHT_START + WEIGHT_SLOPE * entropy_x
    +  57        x = np.power(x, weight)
    +  58        x = x / sum(x)
    +  59        return x
    +  60
    +  61
    +  62def weighted_entropy_distance(v, y):
    +  63    """Calculate weighted entropy distance between two vectors
    +  64
    +  65    Parameters
    +  66    ----------
    +  67    v : array_like
    +  68        Vector 1
    +  69    y : array_like
    +  70        Vector 2
       71
    -  72    merged = v + y
    -  73    entropy_increase = 2 * scipy.stats.entropy(merged) - scipy.stats.entropy(v) - scipy.stats.entropy(y)
    -  74    return entropy_increase
    -  75
    -  76
    -  77def chebyshev_distance(v, y):
    -  78    r"""Chebyshev distance
    +  72    Returns
    +  73    -------
    +  74    float
    +  75        Weighted entropy distance between v and y
    +  76    """
    +  77    v = _weight_intensity_for_entropy(v)
    +  78    y = _weight_intensity_for_entropy(y)
       79
    -  80    Parameters
    -  81    ----------
    -  82    v : array_like
    -  83        Vector 1
    -  84    y : array_like
    -  85        Vector 2
    -  86    
    -  87    Returns
    -  88    -------
    -  89    float
    -  90        Chebyshev distance between v and y
    +  80    merged = v + y
    +  81    entropy_increase = (
    +  82        2 * scipy.stats.entropy(merged)
    +  83        - scipy.stats.entropy(v)
    +  84        - scipy.stats.entropy(y)
    +  85    )
    +  86    return entropy_increase
    +  87
    +  88
    +  89def chebyshev_distance(v, y):
    +  90    r"""Chebyshev distance
       91
    -  92    Notes
    -  93    -----
    -  94    .. math::
    -  95
    -  96        \underset{i}{\max}{(|v_{i}\ -\ y_{i}|)}
    -  97    """
    -  98    return np.max(np.abs(v - y))
    -  99
    - 100
    - 101def squared_euclidean_distance(v, y):
    - 102    r"""Squared Euclidean distance:
    +  92    Parameters
    +  93    ----------
    +  94    v : array_like
    +  95        Vector 1
    +  96    y : array_like
    +  97        Vector 2
    +  98
    +  99    Returns
    + 100    -------
    + 101    float
    + 102        Chebyshev distance between v and y
      103
    - 104    Parameters
    - 105    ----------
    - 106    v : array_like
    - 107        Vector 1
    - 108    y : array_like
    - 109        Vector 2
    - 110    
    - 111    Returns
    - 112    -------
    - 113    float
    - 114        Squared Euclidean distance between v and y
    + 104    Notes
    + 105    -----
    + 106    .. math::
    + 107
    + 108        \underset{i}{\max}{(|v_{i}\ -\ y_{i}|)}
    + 109    """
    + 110    return np.max(np.abs(v - y))
    + 111
    + 112
    + 113def squared_euclidean_distance(v, y):
    + 114    r"""Squared Euclidean distance:
      115
    - 116    Notes
    - 117    -----
    - 118    .. math::
    - 119
    - 120        \sum(v_{i}-y_{i})^2
    - 121    """
    - 122    return np.sum(np.power(v - y, 2))
    - 123
    - 124
    - 125def fidelity_similarity(v, y):
    - 126    r""" Fidelity similarity:
    + 116    Parameters
    + 117    ----------
    + 118    v : array_like
    + 119        Vector 1
    + 120    y : array_like
    + 121        Vector 2
    + 122
    + 123    Returns
    + 124    -------
    + 125    float
    + 126        Squared Euclidean distance between v and y
      127
    - 128    Parameters
    - 129    ----------
    - 130    v : array_like
    - 131        Vector 1
    - 132    y : array_like
    - 133        Vector 2
    - 134    
    - 135    Returns
    - 136    -------
    - 137    float
    - 138        Fidelity similarity between v and y
    - 139    Notes
    - 140    -----
    - 141    .. math::
    - 142
    - 143        \sum\sqrt{v_{i}y_{i}}
    - 144    """
    - 145    return np.sum(np.sqrt(v * y))
    + 128    Notes
    + 129    -----
    + 130    .. math::
    + 131
    + 132        \sum(v_{i}-y_{i})^2
    + 133    """
    + 134    return np.sum(np.power(v - y, 2))
    + 135
    + 136
    + 137def fidelity_similarity(v, y):
    + 138    r"""Fidelity similarity:
    + 139
    + 140    Parameters
    + 141    ----------
    + 142    v : array_like
    + 143        Vector 1
    + 144    y : array_like
    + 145        Vector 2
      146
    - 147
    - 148def matusita_distance(v, y):
    - 149    r"""Matusita distance:
    - 150
    - 151    Parameters
    - 152    ----------
    - 153    v : array_like
    - 154        Vector 1
    - 155    y : array_like
    - 156        Vector 2
    - 157    
    - 158    Returns
    - 159    -------
    - 160    float
    - 161        Matusita distance between v and y
    + 147    Returns
    + 148    -------
    + 149    float
    + 150        Fidelity similarity between v and y
    + 151    Notes
    + 152    -----
    + 153    .. math::
    + 154
    + 155        \sum\sqrt{v_{i}y_{i}}
    + 156    """
    + 157    return np.sum(np.sqrt(v * y))
    + 158
    + 159
    + 160def matusita_distance(v, y):
    + 161    r"""Matusita distance:
      162
    - 163    Notes
    - 164    -----
    - 165    .. math::
    - 166
    - 167        \sqrt{\sum(\sqrt{v_{i}}-\sqrt{y_{i}})^2}
    - 168    """
    - 169    return np.sqrt(np.sum(np.power(np.sqrt(v) - np.sqrt(y), 2)))
    - 170
    - 171
    - 172def squared_chord_distance(v, y):
    - 173    r"""Squared-chord distance:
    + 163    Parameters
    + 164    ----------
    + 165    v : array_like
    + 166        Vector 1
    + 167    y : array_like
    + 168        Vector 2
    + 169
    + 170    Returns
    + 171    -------
    + 172    float
    + 173        Matusita distance between v and y
      174
    - 175    Parameters
    - 176    ----------
    - 177    v : array_like
    - 178        Vector 1
    - 179    y : array_like
    - 180        Vector 2
    - 181    
    - 182    Returns
    - 183    -------
    - 184    float
    - 185        Squared-chord distance between v and y
    + 175    Notes
    + 176    -----
    + 177    .. math::
    + 178
    + 179        \sqrt{\sum(\sqrt{v_{i}}-\sqrt{y_{i}})^2}
    + 180    """
    + 181    return np.sqrt(np.sum(np.power(np.sqrt(v) - np.sqrt(y), 2)))
    + 182
    + 183
    + 184def squared_chord_distance(v, y):
    + 185    r"""Squared-chord distance:
      186
    - 187    Notes
    - 188    -----
    - 189    .. math::
    - 190
    - 191        \sum(\sqrt{v_{i}}-\sqrt{y_{i}})^2
    - 192    """
    - 193    return np.sum(np.power(np.sqrt(v) - np.sqrt(y), 2))
    - 194
    - 195
    - 196def bhattacharya_1_distance(v, y):
    - 197    r"""Bhattacharya 1 distance:
    + 187    Parameters
    + 188    ----------
    + 189    v : array_like
    + 190        Vector 1
    + 191    y : array_like
    + 192        Vector 2
    + 193
    + 194    Returns
    + 195    -------
    + 196    float
    + 197        Squared-chord distance between v and y
      198
    - 199    Parameters
    - 200    ----------
    - 201    v : array_like
    - 202        Vector 1
    - 203    y : array_like
    - 204        Vector 2
    - 205    
    - 206    Returns
    - 207    -------
    - 208    float
    - 209        Bhattacharya 1 distance between v and y
    + 199    Notes
    + 200    -----
    + 201    .. math::
    + 202
    + 203        \sum(\sqrt{v_{i}}-\sqrt{y_{i}})^2
    + 204    """
    + 205    return np.sum(np.power(np.sqrt(v) - np.sqrt(y), 2))
    + 206
    + 207
    + 208def bhattacharya_1_distance(v, y):
    + 209    r"""Bhattacharya 1 distance:
      210
    - 211    Notes
    - 212    -----
    - 213    .. math::
    - 214
    - 215        (\arccos{(\sum\sqrt{v_{i}y_{i}})})^2
    - 216    """
    - 217    s = np.sum(np.sqrt(v * y))
    - 218    # TODO:Fix this!
    - 219    if s > 1:
    - 220        if s > 1 + 1e-6:
    - 221            warnings.warn("Error in calculating Bhattacharya 1 distance, got arccos {}".format(s))
    - 222        s = 1
    - 223    return np.power(np.arccos(s), 2)
    - 224
    - 225
    - 226def bhattacharya_2_distance(v, y):
    - 227    r"""Bhattacharya 2 distance:
    - 228
    - 229    Parameters
    - 230    ----------
    - 231    v : array_like
    - 232        Vector 1
    - 233    y : array_like
    - 234        Vector 2
    - 235    
    - 236    Returns
    - 237    -------
    - 238    float
    - 239        Bhattacharya 2 distance between v and y
    - 240    Notes
    - 241    -----
    - 242    .. math::
    - 243
    - 244        -\ln{(\sum\sqrt{v_{i}y_{i}})}
    - 245    """
    - 246    s = np.sum(np.sqrt(v * y))
    - 247    if s == 0:
    - 248        return np.inf
    - 249    else:
    - 250        return -np.log(s)
    - 251
    - 252
    - 253def harmonic_mean_similarity(v, y):
    - 254    r"""Harmonic mean similarity:
    - 255
    - 256    Parameters
    - 257    ----------
    - 258    v : array_like
    - 259        Vector 1
    - 260    y : array_like
    - 261        Vector 2
    - 262    
    - 263    Returns
    - 264    -------
    - 265    float
    - 266        Harmonic mean similarity between v and y
    - 267    
    - 268    Notes
    - 269    -----
    - 270    .. math::
    - 271
    - 272        #1-2\sum(\frac{v_{i}y_{i}}{v_{i}+y_{i}})
    - 273        2\sum(\frac{v_{i}y_{i}}{v_{i}+y_{i}})
    - 274    """
    - 275    #return 1 - 2 * np.sum(v * y / (v + y))
    - 276    return 2 * np.sum(v * y / (v + y))
    - 277
    - 278
    - 279#def pearson_chi_squared_distance(v, y):
    - 280#    r"""
    - 281#    Pearson χ2 distance:
    - 282#
    - 283#    .. math::
    - 284#
    - 285#        \sum\frac{(v_{i}-y_{i})^2}{y_{i}}
    - 286#    """
    - 287#    return np.sum(np.power(v - y, 2) / y)
    - 288
    - 289
    - 290#def neyman_chi_squared_distance(v, y):
    - 291#    r"""
    - 292#    Neyman χ2 distance:
    - 293#
    - 294#    .. math::
    - 295#
    - 296#        \sum\frac{(v_{i}-y_{i})^2}{v_{i}}
    - 297#    """
    - 298#    return np.sum(np.power(v - y, 2) / v)
    - 299
    - 300
    - 301#def probabilistic_symmetric_chi_squared_distance(v, y):
    - 302#    r"""
    - 303#    Probabilistic symmetric χ2 distance:
    - 304#
    - 305#    .. math::
    - 306#
    - 307#        \frac{1}{2} \times \sum\frac{(v_{i}-y_{i}\ )^2}{v_{i}+y_{i}\ }
    - 308#    """
    - 309#    return 1 / 2 * np.sum(np.power(v - y, 2) / (v + y))
    - 310
    - 311
    - 312#def topsoe_distance(v, y):
    - 313#    r"""
    - 314#    Topsøe distance:
    - 315#
    - 316#    .. math::
    - 317#
    - 318#        \sum{(v_{i}ln\frac{v_{i}}{Z_i}+y_{i}ln\frac{y_{i}}{Z_i}),\ \ \ Z_i=\frac{1}{2}(v_{i}+y_{i})}
    - 319#    """
    - 320#    z = 1 / 2 * (v + y)
    - 321#    z[z == 0] = 1
    - 322#    vz = v / z
    - 323#    yz = y / z
    - 324#    vz[v == 0] = 1
    - 325#    yz[y == 0] = 1
    - 326#    return np.sum(v * np.log(vz) + y * np.log(yz))
    - 327
    - 328
    - 329def chernoff_distance(v, y):
    - 330    r""" Chernoff distance:
    - 331
    - 332    Parameters
    - 333    ----------
    - 334    v : array_like
    - 335        Vector 1
    - 336    y : array_like
    - 337        Vector 2
    - 338    
    - 339    Returns
    - 340    -------
    - 341    float
    - 342        Chernoff distance between v and y
    - 343
    - 344    Notes
    - 345    -----
    - 346    .. math::
    - 347
    - 348        \max{(-ln\sum(v_{i}^ty_{i}^{1-t})^{1-t})},\ t=0.1,\ 0\le\ t<1
    - 349    """
    - 350    t = 0.1
    - 351    return np.max(-np.log(
    - 352        np.sum(np.power(np.power(v, t) * np.power(y, 1 - t), 1 - t))))
    - 353
    - 354
    - 355def ruzicka_distance(v, y):
    - 356    r""" Ruzicka distance:
    + 211    Parameters
    + 212    ----------
    + 213    v : array_like
    + 214        Vector 1
    + 215    y : array_like
    + 216        Vector 2
    + 217
    + 218    Returns
    + 219    -------
    + 220    float
    + 221        Bhattacharya 1 distance between v and y
    + 222
    + 223    Notes
    + 224    -----
    + 225    .. math::
    + 226
    + 227        (\arccos{(\sum\sqrt{v_{i}y_{i}})})^2
    + 228    """
    + 229    s = np.sum(np.sqrt(v * y))
    + 230    # TODO:Fix this!
    + 231    if s > 1:
    + 232        if s > 1 + 1e-6:
    + 233            warnings.warn(
    + 234                "Error in calculating Bhattacharya 1 distance, got arccos {}".format(s)
    + 235            )
    + 236        s = 1
    + 237    return np.power(np.arccos(s), 2)
    + 238
    + 239
    + 240def bhattacharya_2_distance(v, y):
    + 241    r"""Bhattacharya 2 distance:
    + 242
    + 243    Parameters
    + 244    ----------
    + 245    v : array_like
    + 246        Vector 1
    + 247    y : array_like
    + 248        Vector 2
    + 249
    + 250    Returns
    + 251    -------
    + 252    float
    + 253        Bhattacharya 2 distance between v and y
    + 254    Notes
    + 255    -----
    + 256    .. math::
    + 257
    + 258        -\ln{(\sum\sqrt{v_{i}y_{i}})}
    + 259    """
    + 260    s = np.sum(np.sqrt(v * y))
    + 261    if s == 0:
    + 262        return np.inf
    + 263    else:
    + 264        return -np.log(s)
    + 265
    + 266
    + 267def harmonic_mean_similarity(v, y):
    + 268    r"""Harmonic mean similarity:
    + 269
    + 270    Parameters
    + 271    ----------
    + 272    v : array_like
    + 273        Vector 1
    + 274    y : array_like
    + 275        Vector 2
    + 276
    + 277    Returns
    + 278    -------
    + 279    float
    + 280        Harmonic mean similarity between v and y
    + 281
    + 282    Notes
    + 283    -----
    + 284    .. math::
    + 285
    + 286        #1-2\sum(\frac{v_{i}y_{i}}{v_{i}+y_{i}})
    + 287        2\sum(\frac{v_{i}y_{i}}{v_{i}+y_{i}})
    + 288    """
    + 289    # return 1 - 2 * np.sum(v * y / (v + y))
    + 290    return 2 * np.sum(v * y / (v + y))
    + 291
    + 292
    + 293# def pearson_chi_squared_distance(v, y):
    + 294#    r"""
    + 295#    Pearson χ2 distance:
    + 296#
    + 297#    .. math::
    + 298#
    + 299#        \sum\frac{(v_{i}-y_{i})^2}{y_{i}}
    + 300#    """
    + 301#    return np.sum(np.power(v - y, 2) / y)
    + 302
    + 303
    + 304# def neyman_chi_squared_distance(v, y):
    + 305#    r"""
    + 306#    Neyman χ2 distance:
    + 307#
    + 308#    .. math::
    + 309#
    + 310#        \sum\frac{(v_{i}-y_{i})^2}{v_{i}}
    + 311#    """
    + 312#    return np.sum(np.power(v - y, 2) / v)
    + 313
    + 314
    + 315# def probabilistic_symmetric_chi_squared_distance(v, y):
    + 316#    r"""
    + 317#    Probabilistic symmetric χ2 distance:
    + 318#
    + 319#    .. math::
    + 320#
    + 321#        \frac{1}{2} \times \sum\frac{(v_{i}-y_{i}\ )^2}{v_{i}+y_{i}\ }
    + 322#    """
    + 323#    return 1 / 2 * np.sum(np.power(v - y, 2) / (v + y))
    + 324
    + 325
    + 326# def topsoe_distance(v, y):
    + 327#    r"""
    + 328#    Topsøe distance:
    + 329#
    + 330#    .. math::
    + 331#
    + 332#        \sum{(v_{i}ln\frac{v_{i}}{Z_i}+y_{i}ln\frac{y_{i}}{Z_i}),\ \ \ Z_i=\frac{1}{2}(v_{i}+y_{i})}
    + 333#    """
    + 334#    z = 1 / 2 * (v + y)
    + 335#    z[z == 0] = 1
    + 336#    vz = v / z
    + 337#    yz = y / z
    + 338#    vz[v == 0] = 1
    + 339#    yz[y == 0] = 1
    + 340#    return np.sum(v * np.log(vz) + y * np.log(yz))
    + 341
    + 342
    + 343def chernoff_distance(v, y):
    + 344    r"""Chernoff distance:
    + 345
    + 346    Parameters
    + 347    ----------
    + 348    v : array_like
    + 349        Vector 1
    + 350    y : array_like
    + 351        Vector 2
    + 352
    + 353    Returns
    + 354    -------
    + 355    float
    + 356        Chernoff distance between v and y
      357
    - 358    Parameters
    - 359    ----------
    - 360    v : array_like
    - 361        Vector 1
    - 362    y : array_like
    - 363        Vector 2
    - 364    
    - 365    Returns
    - 366    -------
    - 367    float
    - 368        Ruzicka distance between v and y
    - 369
    - 370    Notes
    - 371    -----
    - 372    .. math::
    - 373
    - 374        \frac{\sum{|v_{i}-y_{i}|}}{\sum{\max(v_{i},y_{i})}}
    - 375    """
    - 376    dist = np.sum(np.abs(v - y)) / np.sum(np.maximum(v, y))
    - 377    return dist
    - 378
    - 379
    - 380def roberts_distance(v, y):
    - 381    r""" Roberts distance:
    + 358    Notes
    + 359    -----
    + 360    .. math::
    + 361
    + 362        \max{(-ln\sum(v_{i}^ty_{i}^{1-t})^{1-t})},\ t=0.1,\ 0\le\ t<1
    + 363    """
    + 364    t = 0.1
    + 365    return np.max(-np.log(np.sum(np.power(np.power(v, t) * np.power(y, 1 - t), 1 - t))))
    + 366
    + 367
    + 368def ruzicka_distance(v, y):
    + 369    r"""Ruzicka distance:
    + 370
    + 371    Parameters
    + 372    ----------
    + 373    v : array_like
    + 374        Vector 1
    + 375    y : array_like
    + 376        Vector 2
    + 377
    + 378    Returns
    + 379    -------
    + 380    float
    + 381        Ruzicka distance between v and y
      382
    - 383    Parameters
    - 384    ----------
    - 385    v : array_like
    - 386        Vector 1
    - 387    y : array_like
    - 388        Vector 2
    - 389    
    - 390    Returns
    - 391    -------
    - 392    float
    - 393        Roberts distance between v and y
    - 394
    - 395    Notes
    - 396    -----
    - 397    .. math::
    - 398
    - 399        1-\sum\frac{(v_{i}+y_{i})\frac{\min{(v_{i},y_{i})}}{\max{(v_{i},y_{i})}}}{\sum(v_{i}+y_{i})}
    - 400    """
    - 401    return 1 - np.sum((v + y) * np.minimum(v, y) / np.maximum(v, y) / np.sum(v + y))
    + 383    Notes
    + 384    -----
    + 385    .. math::
    + 386
    + 387        \frac{\sum{|v_{i}-y_{i}|}}{\sum{\max(v_{i},y_{i})}}
    + 388    """
    + 389    dist = np.sum(np.abs(v - y)) / np.sum(np.maximum(v, y))
    + 390    return dist
    + 391
    + 392
    + 393def roberts_distance(v, y):
    + 394    r"""Roberts distance:
    + 395
    + 396    Parameters
    + 397    ----------
    + 398    v : array_like
    + 399        Vector 1
    + 400    y : array_like
    + 401        Vector 2
      402
    - 403
    - 404def intersection_distance(v, y):
    - 405    r""" Intersection distance:
    - 406
    - 407    Parameters
    - 408    ----------
    - 409    v : array_like
    - 410        Vector 1
    - 411    y : array_like
    - 412        Vector 2
    - 413    
    - 414    Returns
    - 415    -------
    - 416    float
    - 417        Intersection distance between v and y
    - 418
    - 419    Notes
    - 420    -----
    - 421    .. math::
    - 422
    - 423        1-\frac{\sum\min{(v_{i},y_{i})}}{\min(\sum{v_{i},\sum{y_{i})}}}
    - 424    """
    - 425    return 1 - np.sum(np.minimum(v, y)) / min(np.sum(v), np.sum(y))
    + 403    Returns
    + 404    -------
    + 405    float
    + 406        Roberts distance between v and y
    + 407
    + 408    Notes
    + 409    -----
    + 410    .. math::
    + 411
    + 412        1-\sum\frac{(v_{i}+y_{i})\frac{\min{(v_{i},y_{i})}}{\max{(v_{i},y_{i})}}}{\sum(v_{i}+y_{i})}
    + 413    """
    + 414    return 1 - np.sum((v + y) * np.minimum(v, y) / np.maximum(v, y) / np.sum(v + y))
    + 415
    + 416
    + 417def intersection_distance(v, y):
    + 418    r"""Intersection distance:
    + 419
    + 420    Parameters
    + 421    ----------
    + 422    v : array_like
    + 423        Vector 1
    + 424    y : array_like
    + 425        Vector 2
      426
    - 427
    - 428def motyka_distance(v, y):
    - 429    r""" Motyka distance:
    - 430
    - 431    Parameters
    - 432    ----------
    - 433    v : array_like
    - 434        Vector 1
    - 435    y : array_like
    - 436        Vector 2
    - 437    
    - 438    Returns
    - 439    -------
    - 440    float
    - 441        Motyka distance between v and y
    - 442    Notes
    - 443    -----
    - 444    .. math::
    - 445
    - 446        -\frac{\sum\min{(y_{i},v_{i})}}{\sum(y_{i}+v_{i})}
    - 447    """
    - 448    dist = np.sum(np.minimum(v, y)) / np.sum(v + y)
    - 449    return dist
    + 427    Returns
    + 428    -------
    + 429    float
    + 430        Intersection distance between v and y
    + 431
    + 432    Notes
    + 433    -----
    + 434    .. math::
    + 435
    + 436        1-\frac{\sum\min{(v_{i},y_{i})}}{\min(\sum{v_{i},\sum{y_{i})}}}
    + 437    """
    + 438    return 1 - np.sum(np.minimum(v, y)) / min(np.sum(v), np.sum(y))
    + 439
    + 440
    + 441def motyka_distance(v, y):
    + 442    r"""Motyka distance:
    + 443
    + 444    Parameters
    + 445    ----------
    + 446    v : array_like
    + 447        Vector 1
    + 448    y : array_like
    + 449        Vector 2
      450
    - 451
    - 452def canberra_distance(v, y):
    - 453    r""" Canberra distance:
    - 454
    - 455    Parameters
    - 456    ----------
    - 457    v : array_like
    - 458        Vector 1
    - 459    y : array_like
    - 460        Vector 2
    - 461    
    - 462    Returns
    - 463    -------
    - 464    float
    - 465        Canberra distance between v and y
    - 466
    - 467    Notes
    - 468    -----
    - 469    .. math::
    - 470
    - 471        #\sum\frac{|v_{i}-y_{i}|}{|v_{i}|+|y_{i}|}
    - 472        \sum_{i}\frac{|y_{i} - v_{i}|}{y_{i} + v_{i}}
    - 473    """
    - 474    #return np.sum(np.abs(v - y) / (np.abs(v) + np.abs(y)))
    - 475    return np.sum(np.abs(y - v)/(y + v))
    - 476
    - 477def canberra_metric(v, y):
    - 478    r""" Canberra Metric
    + 451    Returns
    + 452    -------
    + 453    float
    + 454        Motyka distance between v and y
    + 455    Notes
    + 456    -----
    + 457    .. math::
    + 458
    + 459        -\frac{\sum\min{(y_{i},v_{i})}}{\sum(y_{i}+v_{i})}
    + 460    """
    + 461    dist = np.sum(np.minimum(v, y)) / np.sum(v + y)
    + 462    return dist
    + 463
    + 464
    + 465def canberra_distance(v, y):
    + 466    r"""Canberra distance:
    + 467
    + 468    Parameters
    + 469    ----------
    + 470    v : array_like
    + 471        Vector 1
    + 472    y : array_like
    + 473        Vector 2
    + 474
    + 475    Returns
    + 476    -------
    + 477    float
    + 478        Canberra distance between v and y
      479
    - 480    Parameters
    - 481    ----------
    - 482    v : array_like
    - 483        Vector 1
    - 484    y : array_like
    - 485        Vector 2
    - 486    
    - 487    Returns
    - 488    -------
    - 489    float
    - 490        Canberra metric between v and y
    - 491    Notes
    - 492    -----
    - 493    .. math::
    - 494
    - 495        \frac{1}{\sum_{i}I(v_{i}\neq 0)}\sum_{i}\frac{|y_{i}-v_{i}|}{(y_{i}+v_{i})}
    - 496    """
    - 497
    - 498    return (1 / np.sum(v > 0)) * np.sum(np.abs(y - v)/(y + v))
    - 499
    + 480    Notes
    + 481    -----
    + 482    .. math::
    + 483
    + 484        #\sum\frac{|v_{i}-y_{i}|}{|v_{i}|+|y_{i}|}
    + 485        \sum_{i}\frac{|y_{i} - v_{i}|}{y_{i} + v_{i}}
    + 486    """
    + 487    # return np.sum(np.abs(v - y) / (np.abs(v) + np.abs(y)))
    + 488    return np.sum(np.abs(y - v) / (y + v))
    + 489
    + 490
    + 491def canberra_metric(v, y):
    + 492    r"""Canberra Metric
    + 493
    + 494    Parameters
    + 495    ----------
    + 496    v : array_like
    + 497        Vector 1
    + 498    y : array_like
    + 499        Vector 2
      500
    - 501def kulczynski_1_distance(v, y):
    - 502    r""" Kulczynski 1 distance:
    - 503
    - 504    Parameters
    - 505    ----------
    - 506    v : array_like
    - 507        Vector 1
    - 508    y : array_like
    - 509        Vector 2
    - 510    
    - 511    Returns
    - 512    -------
    - 513    float
    - 514        Kulczynski 1 distance between v and y
    - 515    
    - 516    Notes
    - 517    -----
    - 518    .. math::
    - 519
    - 520        \frac{\sum{|v_i}-y_i|}{\sum m\ i\ n\ (v_i,y_i)}
    - 521    """
    - 522    return np.sum(np.abs(y - v)) / np.sum(np.minimum(y, v))
    - 523
    + 501    Returns
    + 502    -------
    + 503    float
    + 504        Canberra metric between v and y
    + 505    Notes
    + 506    -----
    + 507    .. math::
    + 508
    + 509        \frac{1}{\sum_{i}I(v_{i}\neq 0)}\sum_{i}\frac{|y_{i}-v_{i}|}{(y_{i}+v_{i})}
    + 510    """
    + 511
    + 512    return (1 / np.sum(v > 0)) * np.sum(np.abs(y - v) / (y + v))
    + 513
    + 514
    + 515def kulczynski_1_distance(v, y):
    + 516    r"""Kulczynski 1 distance:
    + 517
    + 518    Parameters
    + 519    ----------
    + 520    v : array_like
    + 521        Vector 1
    + 522    y : array_like
    + 523        Vector 2
      524
    - 525def baroni_urbani_buser_distance(v, y):
    - 526    r""" Baroni-Urbani-Buser distance:
    - 527
    - 528    Parameters
    - 529    ----------
    - 530    v : array_like
    - 531        Vector 1
    - 532    y : array_like
    - 533        Vector 2
    - 534    
    - 535    Returns
    - 536    -------
    - 537    float
    - 538        Baroni-Urbani-Buser distance between v and y
    - 539
    - 540    Notes
    - 541    -----
    - 542    .. math::
    - 543
    - 544        1-\frac{\sum\min{(v_i,y_i)}+\sqrt{\sum\min{(v_i,y_i)}\sum(\max{(v)}-\max{(v_i,y_i)})}}{\sum{\max{(v_i,y_i)}+\sqrt{\sum{\min{(v_i,y_i)}\sum(\max{(v)}-\max{(v_i,y_i)})}}}}
    - 545    """
    - 546    if np.max(v) < np.max(y):
    - 547        v, y = y, v
    - 548    d1 = np.sqrt(np.sum(np.minimum(v, y) * np.sum(max(v) - np.maximum(v, y))))
    - 549    return 1 - (np.sum(np.minimum(v, y)) + d1) / (np.sum(np.maximum(v, y)) + d1)
    - 550
    - 551
    - 552def penrose_size_distance(v, y):
    - 553    r""" Penrose size distance:
    - 554    
    - 555    Parameters
    - 556    ----------
    - 557    v : array_like
    - 558        Vector 1
    - 559    y : array_like
    - 560        Vector 2
    - 561    
    - 562    Returns
    - 563    -------
    - 564    float
    - 565        Penrose size distance between v and y
    - 566
    - 567    Notes
    - 568    -----
    - 569    .. math::
    - 570
    - 571        \sqrt N\sum{|y_i-v_i|}
    - 572    """
    - 573    n = np.sum(v > 0)
    - 574    return np.sqrt(n) * np.sum(np.abs(y - v))
    + 525    Returns
    + 526    -------
    + 527    float
    + 528        Kulczynski 1 distance between v and y
    + 529
    + 530    Notes
    + 531    -----
    + 532    .. math::
    + 533
    + 534        \frac{\sum{|v_i}-y_i|}{\sum m\ i\ n\ (v_i,y_i)}
    + 535    """
    + 536    return np.sum(np.abs(y - v)) / np.sum(np.minimum(y, v))
    + 537
    + 538
    + 539def baroni_urbani_buser_distance(v, y):
    + 540    r"""Baroni-Urbani-Buser distance:
    + 541
    + 542    Parameters
    + 543    ----------
    + 544    v : array_like
    + 545        Vector 1
    + 546    y : array_like
    + 547        Vector 2
    + 548
    + 549    Returns
    + 550    -------
    + 551    float
    + 552        Baroni-Urbani-Buser distance between v and y
    + 553
    + 554    Notes
    + 555    -----
    + 556    .. math::
    + 557
    + 558        1-\frac{\sum\min{(v_i,y_i)}+\sqrt{\sum\min{(v_i,y_i)}\sum(\max{(v)}-\max{(v_i,y_i)})}}{\sum{\max{(v_i,y_i)}+\sqrt{\sum{\min{(v_i,y_i)}\sum(\max{(v)}-\max{(v_i,y_i)})}}}}
    + 559    """
    + 560    if np.max(v) < np.max(y):
    + 561        v, y = y, v
    + 562    d1 = np.sqrt(np.sum(np.minimum(v, y) * np.sum(max(v) - np.maximum(v, y))))
    + 563    return 1 - (np.sum(np.minimum(v, y)) + d1) / (np.sum(np.maximum(v, y)) + d1)
    + 564
    + 565
    + 566def penrose_size_distance(v, y):
    + 567    r"""Penrose size distance:
    + 568
    + 569    Parameters
    + 570    ----------
    + 571    v : array_like
    + 572        Vector 1
    + 573    y : array_like
    + 574        Vector 2
      575
    - 576
    - 577def mean_character_distance(v, y):
    - 578    r"""
    - 579    Mean character distance:
    + 576    Returns
    + 577    -------
    + 578    float
    + 579        Penrose size distance between v and y
      580
    - 581    Parameters
    - 582    ----------
    - 583    v : array_like
    - 584        Vector 1
    - 585    y : array_like
    - 586        Vector 2
    - 587    
    - 588    Returns
    - 589    -------
    - 590    float
    - 591        Mean character distance between v and y
    - 592
    - 593    Notes
    - 594    -----
    - 595    .. math::
    - 596
    - 597        \frac{1}{N}\sum{|y_i-v_i|}
    - 598    """
    - 599    n = np.sum(v > 0)
    - 600    return 1 / n * np.sum(np.abs(y - v))
    + 581    Notes
    + 582    -----
    + 583    .. math::
    + 584
    + 585        \sqrt N\sum{|y_i-v_i|}
    + 586    """
    + 587    n = np.sum(v > 0)
    + 588    return np.sqrt(n) * np.sum(np.abs(y - v))
    + 589
    + 590
    + 591def mean_character_distance(v, y):
    + 592    r"""
    + 593    Mean character distance:
    + 594
    + 595    Parameters
    + 596    ----------
    + 597    v : array_like
    + 598        Vector 1
    + 599    y : array_like
    + 600        Vector 2
      601
    - 602
    - 603def lorentzian_distance(v, y):
    - 604    r"""
    - 605    Lorentzian distance:
    + 602    Returns
    + 603    -------
    + 604    float
    + 605        Mean character distance between v and y
      606
    - 607    Parameters
    - 608    ----------
    - 609    v : array_like
    - 610        Vector 1
    - 611    y : array_like
    - 612        Vector 2
    - 613    
    - 614    Returns
    - 615    -------
    - 616    float
    - 617        Lorentzian distance between v and y
    - 618
    - 619    Notes
    - 620    -----
    - 621    .. math::
    - 622
    - 623        \sum{\ln(1+|v_i-y_i|)}
    - 624    """
    - 625    return np.sum(np.log(1 + np.abs(y - v)))
    - 626
    + 607    Notes
    + 608    -----
    + 609    .. math::
    + 610
    + 611        \frac{1}{N}\sum{|y_i-v_i|}
    + 612    """
    + 613    n = np.sum(v > 0)
    + 614    return 1 / n * np.sum(np.abs(y - v))
    + 615
    + 616
    + 617def lorentzian_distance(v, y):
    + 618    r"""
    + 619    Lorentzian distance:
    + 620
    + 621    Parameters
    + 622    ----------
    + 623    v : array_like
    + 624        Vector 1
    + 625    y : array_like
    + 626        Vector 2
      627
    - 628def penrose_shape_distance(v, y):
    - 629    r"""
    - 630    Penrose shape distance:
    - 631
    - 632    Parameters
    - 633    ----------
    - 634    v : array_like
    - 635        Vector 1
    - 636    y : array_like
    - 637        Vector 2
    - 638    
    - 639    Returns
    - 640    -------
    - 641    float
    - 642        Penrose shape distance between v and y
    - 643    Notes
    - 644    -----
    - 645    .. math::
    - 646
    - 647        \sqrt{\sum((v_i-\bar{v})-(y_i-\bar{y}))^2}
    - 648    """
    - 649    v_avg = np.mean(v)
    - 650    y_avg = np.mean(y)
    - 651    return np.sqrt(np.sum(np.power((y - y_avg) - (v - v_avg), 2)))
    + 628    Returns
    + 629    -------
    + 630    float
    + 631        Lorentzian distance between v and y
    + 632
    + 633    Notes
    + 634    -----
    + 635    .. math::
    + 636
    + 637        \sum{\ln(1+|v_i-y_i|)}
    + 638    """
    + 639    return np.sum(np.log(1 + np.abs(y - v)))
    + 640
    + 641
    + 642def penrose_shape_distance(v, y):
    + 643    r"""
    + 644    Penrose shape distance:
    + 645
    + 646    Parameters
    + 647    ----------
    + 648    v : array_like
    + 649        Vector 1
    + 650    y : array_like
    + 651        Vector 2
      652
    - 653
    - 654def clark_distance(v, y):
    - 655    r"""
    - 656    Clark distance:
    - 657
    - 658    Parameters
    - 659    ----------
    - 660    v : array_like
    - 661        Vector 1
    - 662    y : array_like
    - 663        Vector 2
    - 664    
    - 665    Returns
    - 666    -------
    - 667    float
    - 668        Clark distance between v and y
    - 669
    - 670    Notes
    - 671    -----
    - 672    .. math::
    - 673
    - 674        #(\frac{1}{N}\sum(\frac{v_i-y_i}{|v_i|+|y_i|})^2)^\frac{1}{2}
    - 675        \sqrt{\sum(\frac{|v_i-y_i|}{v_i+y_i})^2}
    - 676    """
    - 677    #n = np.sum(v > 0)
    - 678    #return np.sqrt(1 / n * np.sum(np.power((v - y) / (np.abs(v) + np.abs(y)), 2)))
    - 679    return np.sqrt(np.sum(np.power(np.abs(y - v) / (y + v), 2)))
    - 680
    - 681
    - 682def hellinger_distance(v, y):
    - 683    r"""
    - 684    Hellinger distance:
    - 685
    - 686    Parameters
    - 687    ----------
    - 688    v : array_like
    - 689        Vector 1
    - 690    y : array_like
    - 691        Vector 2
    - 692    
    - 693    Returns
    - 694    -------
    - 695    float
    - 696        Hellinger distance between v and y
    - 697
    - 698    Notes
    - 699    -----
    - 700    .. math::
    - 701
    - 702        #\sqrt{2\sum(\sqrt{\frac{v_i}{\bar{v}}}-\sqrt{\frac{y_i}{\bar{y}}})^2}
    - 703        \sqrt{2\sum(\sqrt{v_i}-\sqrt{y_i})^2}
    - 704    """
    - 705    #v_avg = np.mean(v)
    - 706    #y_avg = np.mean(y)
    - 707    #return np.sqrt(2 * np.sum(np.power(np.sqrt(v / v_avg) - np.sqrt(y / y_avg), 2)))
    - 708    return np.sqrt(2 * np.sum(np.power(np.sqrt(y) - np.sqrt(v), 2)))
    - 709
    - 710
    - 711def whittaker_index_of_association_distance(v, y):
    - 712    r"""
    - 713    Whittaker index of association distance:
    - 714
    - 715    Parameters
    - 716    ----------
    - 717    v : array_like
    - 718        Vector 1
    - 719    y : array_like
    - 720        Vector 2
    - 721    
    - 722    Returns
    - 723    -------
    - 724    float
    - 725        Whittaker index of association distance between v and y
    - 726
    - 727    Notes
    - 728    -----
    - 729    .. math::
    - 730
    - 731        \frac{1}{2}\sum|\frac{v_i}{\bar{v}}-\frac{y_i}{\bar{y}}|
    - 732    """
    - 733    v_avg = np.mean(v)
    - 734    y_avg = np.mean(y)
    - 735    return 1 / 2 * np.sum(np.abs(v / v_avg - y / y_avg))
    - 736
    - 737
    - 738#def symmetric_chi_squared_distance(v, y):
    - 739#    r"""
    - 740#    Symmetric χ2 distance:
    - 741#
    - 742#    .. math::
    - 743#
    - 744#        \sqrt{\sum{\frac{\bar{v}+\bar{y}}{N(\bar{v}+\bar{y})^2}\frac{(v_i\bar{y}-y_i\bar{v})^2}{v_i+y_i}\ }}
    - 745#    """
    - 746#    v_avg = np.mean(v)
    - 747#    y_avg = np.mean(y)
    - 748#    n = np.sum(v > 0)
    - 749#
    - 750#    d1 = (v_avg + y_avg) / (n * np.power(v_avg + y_avg, 2))
    - 751#    return np.sqrt(d1 * np.sum(np.power(v * y_avg - y * v_avg, 2) / (v + y)))
    - 752
    - 753def similarity_index_distance(v, y):
    - 754    r"""
    - 755    Similarity Index Distance:
    - 756
    - 757    Parameters
    - 758    ----------
    - 759    v : array_like
    - 760        Vector 1
    - 761    y : array_like
    - 762        Vector 2
    - 763    
    - 764    Returns
    - 765    -------
    - 766    float
    - 767        Similarity Index Distance between v and y
    - 768
    - 769    Notes
    - 770    -----
    - 771    .. math::
    - 772
    - 773        \sqrt{\frac{\sum\{\frac{v_i-y_i}{y_i}\}^2}{N}}
    - 774    """
    - 775    n = np.sum(v > 0)
    - 776    return np.sqrt(1 / n * np.sum(np.power((v - y) / y, 2)))
    - 777
    + 653    Returns
    + 654    -------
    + 655    float
    + 656        Penrose shape distance between v and y
    + 657    Notes
    + 658    -----
    + 659    .. math::
    + 660
    + 661        \sqrt{\sum((v_i-\bar{v})-(y_i-\bar{y}))^2}
    + 662    """
    + 663    v_avg = np.mean(v)
    + 664    y_avg = np.mean(y)
    + 665    return np.sqrt(np.sum(np.power((y - y_avg) - (v - v_avg), 2)))
    + 666
    + 667
    + 668def clark_distance(v, y):
    + 669    r"""
    + 670    Clark distance:
    + 671
    + 672    Parameters
    + 673    ----------
    + 674    v : array_like
    + 675        Vector 1
    + 676    y : array_like
    + 677        Vector 2
    + 678
    + 679    Returns
    + 680    -------
    + 681    float
    + 682        Clark distance between v and y
    + 683
    + 684    Notes
    + 685    -----
    + 686    .. math::
    + 687
    + 688        #(\frac{1}{N}\sum(\frac{v_i-y_i}{|v_i|+|y_i|})^2)^\frac{1}{2}
    + 689        \sqrt{\sum(\frac{|v_i-y_i|}{v_i+y_i})^2}
    + 690    """
    + 691    # n = np.sum(v > 0)
    + 692    # return np.sqrt(1 / n * np.sum(np.power((v - y) / (np.abs(v) + np.abs(y)), 2)))
    + 693    return np.sqrt(np.sum(np.power(np.abs(y - v) / (y + v), 2)))
    + 694
    + 695
    + 696def hellinger_distance(v, y):
    + 697    r"""
    + 698    Hellinger distance:
    + 699
    + 700    Parameters
    + 701    ----------
    + 702    v : array_like
    + 703        Vector 1
    + 704    y : array_like
    + 705        Vector 2
    + 706
    + 707    Returns
    + 708    -------
    + 709    float
    + 710        Hellinger distance between v and y
    + 711
    + 712    Notes
    + 713    -----
    + 714    .. math::
    + 715
    + 716        #\sqrt{2\sum(\sqrt{\frac{v_i}{\bar{v}}}-\sqrt{\frac{y_i}{\bar{y}}})^2}
    + 717        \sqrt{2\sum(\sqrt{v_i}-\sqrt{y_i})^2}
    + 718    """
    + 719    # v_avg = np.mean(v)
    + 720    # y_avg = np.mean(y)
    + 721    # return np.sqrt(2 * np.sum(np.power(np.sqrt(v / v_avg) - np.sqrt(y / y_avg), 2)))
    + 722    return np.sqrt(2 * np.sum(np.power(np.sqrt(y) - np.sqrt(v), 2)))
    + 723
    + 724
    + 725def whittaker_index_of_association_distance(v, y):
    + 726    r"""
    + 727    Whittaker index of association distance:
    + 728
    + 729    Parameters
    + 730    ----------
    + 731    v : array_like
    + 732        Vector 1
    + 733    y : array_like
    + 734        Vector 2
    + 735
    + 736    Returns
    + 737    -------
    + 738    float
    + 739        Whittaker index of association distance between v and y
    + 740
    + 741    Notes
    + 742    -----
    + 743    .. math::
    + 744
    + 745        \frac{1}{2}\sum|\frac{v_i}{\bar{v}}-\frac{y_i}{\bar{y}}|
    + 746    """
    + 747    v_avg = np.mean(v)
    + 748    y_avg = np.mean(y)
    + 749    return 1 / 2 * np.sum(np.abs(v / v_avg - y / y_avg))
    + 750
    + 751
    + 752# def symmetric_chi_squared_distance(v, y):
    + 753#    r"""
    + 754#    Symmetric χ2 distance:
    + 755#
    + 756#    .. math::
    + 757#
    + 758#        \sqrt{\sum{\frac{\bar{v}+\bar{y}}{N(\bar{v}+\bar{y})^2}\frac{(v_i\bar{y}-y_i\bar{v})^2}{v_i+y_i}\ }}
    + 759#    """
    + 760#    v_avg = np.mean(v)
    + 761#    y_avg = np.mean(y)
    + 762#    n = np.sum(v > 0)
    + 763#
    + 764#    d1 = (v_avg + y_avg) / (n * np.power(v_avg + y_avg, 2))
    + 765#    return np.sqrt(d1 * np.sum(np.power(v * y_avg - y * v_avg, 2) / (v + y)))
    + 766
    + 767
    + 768def similarity_index_distance(v, y):
    + 769    r"""
    + 770    Similarity Index Distance:
    + 771
    + 772    Parameters
    + 773    ----------
    + 774    v : array_like
    + 775        Vector 1
    + 776    y : array_like
    + 777        Vector 2
      778
    - 779def improved_similarity_distance(v, y):
    - 780    r"""
    - 781    Improved Similarity Index:
    - 782
    - 783    Parameters
    - 784    ----------
    - 785    v : array_like
    - 786        Vector 1
    - 787    y : array_like
    - 788        Vector 2
    - 789    
    - 790    Returns
    - 791    -------
    - 792    float
    - 793        Improved Similarity Index between v and y
    - 794
    - 795    Notes
    - 796    -----
    - 797    .. math::
    - 798
    - 799        \sqrt{\frac{1}{N}\sum\{\frac{y_i-v_i}{y_i+v_i}\}^2}
    - 800    """
    - 801    n = np.sum(v > 0)
    - 802    return np.sqrt(1 / n * np.sum(np.power((y - v) / (y + v), 2)))
    - 803
    + 779    Returns
    + 780    -------
    + 781    float
    + 782        Similarity Index Distance between v and y
    + 783
    + 784    Notes
    + 785    -----
    + 786    .. math::
    + 787
    + 788        \sqrt{\frac{\sum\{\frac{v_i-y_i}{y_i}\}^2}{N}}
    + 789    """
    + 790    n = np.sum(v > 0)
    + 791    return np.sqrt(1 / n * np.sum(np.power((v - y) / y, 2)))
    + 792
    + 793
    + 794def improved_similarity_distance(v, y):
    + 795    r"""
    + 796    Improved Similarity Index:
    + 797
    + 798    Parameters
    + 799    ----------
    + 800    v : array_like
    + 801        Vector 1
    + 802    y : array_like
    + 803        Vector 2
      804
    - 805def absolute_value_distance(v, y):
    - 806    r"""
    - 807    Absolute Value Distance:
    - 808
    - 809    Parameters
    - 810    ----------
    - 811    v : array_like
    - 812        Vector 1
    - 813    y : array_like
    - 814        Vector 2
    - 815    
    - 816    Returns
    - 817    -------
    - 818    float
    - 819        Absolute Value Distance between v and y
    - 820
    - 821    Notes
    - 822    -----
    - 823    .. math::
    - 824
    - 825        \frac { \sum(|y_i-v_i|)}{\sum v_i}
    - 826
    - 827    """
    - 828    dist = np.sum(np.abs(y - v)) / np.sum(v)
    - 829    return dist
    + 805    Returns
    + 806    -------
    + 807    float
    + 808        Improved Similarity Index between v and y
    + 809
    + 810    Notes
    + 811    -----
    + 812    .. math::
    + 813
    + 814        \sqrt{\frac{1}{N}\sum\{\frac{y_i-v_i}{y_i+v_i}\}^2}
    + 815    """
    + 816    n = np.sum(v > 0)
    + 817    return np.sqrt(1 / n * np.sum(np.power((y - v) / (y + v), 2)))
    + 818
    + 819
    + 820def absolute_value_distance(v, y):
    + 821    r"""
    + 822    Absolute Value Distance:
    + 823
    + 824    Parameters
    + 825    ----------
    + 826    v : array_like
    + 827        Vector 1
    + 828    y : array_like
    + 829        Vector 2
      830
    - 831def spectral_contrast_angle_distance(v, y):
    - 832    r"""
    - 833    Spectral Contrast Angle:
    - 834
    - 835    Parameters
    - 836    ----------
    - 837    v : array_like
    - 838        Vector 1
    - 839    y : array_like
    - 840        Vector 2
    - 841    
    - 842    Returns
    - 843    -------
    - 844    float
    - 845        Spectral Contrast Angle between v and y
    + 831    Returns
    + 832    -------
    + 833    float
    + 834        Absolute Value Distance between v and y
    + 835
    + 836    Notes
    + 837    -----
    + 838    .. math::
    + 839
    + 840        \frac { \sum(|y_i-v_i|)}{\sum v_i}
    + 841
    + 842    """
    + 843    dist = np.sum(np.abs(y - v)) / np.sum(v)
    + 844    return dist
    + 845
      846
    - 847    Notes
    - 848    -----
    - 849    .. math::
    + 847def spectral_contrast_angle_distance(v, y):
    + 848    r"""
    + 849    Spectral Contrast Angle:
      850
    - 851        1 - \frac{\sum{y_iv_i}}{\sqrt{\sum y_i^2\sum v_i^2}}
    - 852        \arccos(\frac{\sum_{P}y_{p}^* v_{p}^*}{\sqrt{\sum_{P}y_{p}^{*2} \sum_{P}v_{p}^{*2}}})
    - 853    """
    - 854    #return 1 - np.sum(y * v) / \
    - 855    #       np.sqrt(np.sum(np.power(y, 2)) * np.sum(np.power(v, 2)))
    - 856
    - 857    return np.arccos(np.sum(y * v) / (np.sqrt(np.sum(np.power(y, 2)) * np.sum(np.power(v, 2)))))
    - 858
    - 859
    - 860def wave_hedges_distance(v, y):
    - 861    r"""
    - 862    Wave Hedges distance:
    - 863
    - 864    Parameters
    - 865    ----------
    - 866    v : array_like
    - 867        Vector 1
    - 868    y : array_like
    - 869        Vector 2
    - 870    
    - 871    Returns
    - 872    -------
    - 873    float
    - 874        Wave Hedges distance between v and y
    - 875
    - 876    Notes
    - 877    -----
    - 878    .. math::
    - 879
    - 880        \sum\frac{|v_i-y_i|}{\max{(v_i,y_i)}}
    - 881    """
    - 882    return np.sum(np.abs(v - y) / np.maximum(v, y))
    - 883
    - 884def dice_similarity(v, y):
    - 885    r"""
    - 886    Dice similarity:
    - 887
    - 888    Parameters
    - 889    ----------
    - 890    v : array_like
    - 891        Vector 1
    - 892    y : array_like
    - 893        Vector 2
    - 894    
    - 895    Returns
    - 896    -------
    - 897    float
    - 898        Dice similarity between v and y
    - 899
    - 900    Notes
    - 901    -----
    - 902    .. math::
    - 903
    - 904        \frac{\sum(v_i-y_i)^2}{\sum v_i^2+\sum y_i^2}
    - 905        \frac{2 * \sum_{i}v_{i}y_{i}}{\sum_{i}y_{i}^2 + \sum_{i}v_{i}^2}
    - 906    """
    - 907    return 2 * np.sum(v * y) / (np.sum(np.power(v, 2)) + np.sum(np.power(y, 2)))
    - 908
    - 909
    - 910def inner_product_distance(v, y):
    - 911    r"""
    - 912    Inner Product distance:
    + 851    Parameters
    + 852    ----------
    + 853    v : array_like
    + 854        Vector 1
    + 855    y : array_like
    + 856        Vector 2
    + 857
    + 858    Returns
    + 859    -------
    + 860    float
    + 861        Spectral Contrast Angle between v and y
    + 862
    + 863    Notes
    + 864    -----
    + 865    .. math::
    + 866
    + 867        1 - \frac{\sum{y_iv_i}}{\sqrt{\sum y_i^2\sum v_i^2}}
    + 868        \arccos(\frac{\sum_{P}y_{p}^* v_{p}^*}{\sqrt{\sum_{P}y_{p}^{*2} \sum_{P}v_{p}^{*2}}})
    + 869    """
    + 870    # return 1 - np.sum(y * v) / \
    + 871    #       np.sqrt(np.sum(np.power(y, 2)) * np.sum(np.power(v, 2)))
    + 872
    + 873    return np.arccos(
    + 874        np.sum(y * v) / (np.sqrt(np.sum(np.power(y, 2)) * np.sum(np.power(v, 2))))
    + 875    )
    + 876
    + 877
    + 878def wave_hedges_distance(v, y):
    + 879    r"""
    + 880    Wave Hedges distance:
    + 881
    + 882    Parameters
    + 883    ----------
    + 884    v : array_like
    + 885        Vector 1
    + 886    y : array_like
    + 887        Vector 2
    + 888
    + 889    Returns
    + 890    -------
    + 891    float
    + 892        Wave Hedges distance between v and y
    + 893
    + 894    Notes
    + 895    -----
    + 896    .. math::
    + 897
    + 898        \sum\frac{|v_i-y_i|}{\max{(v_i,y_i)}}
    + 899    """
    + 900    return np.sum(np.abs(v - y) / np.maximum(v, y))
    + 901
    + 902
    + 903def dice_similarity(v, y):
    + 904    r"""
    + 905    Dice similarity:
    + 906
    + 907    Parameters
    + 908    ----------
    + 909    v : array_like
    + 910        Vector 1
    + 911    y : array_like
    + 912        Vector 2
      913
    - 914    Parameters
    - 915    ----------
    - 916    v : array_like
    - 917        Vector 1
    - 918    y : array_like
    - 919        Vector 2
    - 920    
    - 921    Returns
    - 922    -------
    - 923    float
    - 924        Inner product distance between v and y
    - 925
    - 926    Notes
    - 927    -----
    - 928    .. math::
    - 929
    - 930        1-\sum{v_iy_i}
    - 931    """
    - 932    return 1 - np.sum(v * y)
    - 933
    - 934
    - 935def divergence_distance(v, y):
    - 936    r"""
    - 937    Divergence distance:
    - 938
    - 939    Parameters
    - 940    ----------
    - 941    v : array_like
    - 942        Vector 1
    - 943    y : array_like
    - 944        Vector 2
    - 945    
    - 946    Returns
    - 947    -------
    - 948    float
    - 949        Divergence distance between v and y
    - 950
    - 951    Notes
    - 952    -----
    - 953    .. math::
    - 954
    - 955        2\sum\frac{(v_i-y_i)^2}{(v_i+y_i)^2}
    - 956    """
    - 957    return 2 * np.sum((np.power(v - y, 2)) / np.power(v + y, 2))
    - 958
    - 959
    - 960def _chi_squared_distance(v, y):
    - 961    r"""
    - 962    Additive symmetric χ2 distance:
    - 963
    - 964    Parameters
    - 965    ----------
    - 966    v : array_like
    - 967        Vector 1
    - 968    y : array_like
    - 969        Vector 2
    - 970    
    - 971    Returns
    - 972    -------
    - 973    float
    - 974        Additive symmetric χ2 distance between v and y
    - 975
    - 976    Notes
    - 977    -----
    - 978    .. math::
    - 979
    - 980        \sum\frac{(v_i-y_i)^2(v_i+y_i)}{v_iy_i}
    - 981    """
    - 982    dist = np.sum(np.power(v - y, 2) * (v + y) / (v * y))
    - 983    return dist
    - 984
    - 985
    - 986def jensen_difference_distance(v, y):
    - 987    r"""
    - 988    Jensen difference:
    - 989    
    - 990    Parameters
    - 991    ----------
    - 992    v : array_like
    - 993        Vector 1
    - 994    y : array_like
    - 995        Vector 2
    - 996    
    - 997    Returns
    - 998    -------
    - 999    float
    -1000        Jensen difference distance between v and y
    -1001
    -1002    Notes
    -1003    -----
    -1004    .. math::
    -1005
    -1006        \sum[\frac{1}{2}(v_i\ln{v_i}+y_i\ln{y_i})-(\frac{v_i+y_i}{2})\ln{(\frac{v_i+y_i}{2})}]
    -1007    """
    -1008    y_v_avg = (y + v) / 2
    -1009    return np.sum(
    -1010        1 / 2 * (y * np.log(y) + v * np.log(v)) -
    -1011        y_v_avg * np.log(y_v_avg)
    -1012    )
    -1013
    -1014
    -1015def kumar_johnson_distance(v, y):
    -1016    r"""
    -1017    Kumar-Johnson distance:
    -1018
    -1019    Parameters
    -1020    ----------
    -1021    v : array_like
    -1022        Vector 1
    -1023    y : array_like
    -1024        Vector 2
    -1025    
    -1026    Returns
    -1027    -------
    -1028    float
    -1029        Kumar Johnson distance between v and y
    + 914    Returns
    + 915    -------
    + 916    float
    + 917        Dice similarity between v and y
    + 918
    + 919    Notes
    + 920    -----
    + 921    .. math::
    + 922
    + 923        \frac{\sum(v_i-y_i)^2}{\sum v_i^2+\sum y_i^2}
    + 924        \frac{2 * \sum_{i}v_{i}y_{i}}{\sum_{i}y_{i}^2 + \sum_{i}v_{i}^2}
    + 925    """
    + 926    return 2 * np.sum(v * y) / (np.sum(np.power(v, 2)) + np.sum(np.power(y, 2)))
    + 927
    + 928
    + 929def inner_product_distance(v, y):
    + 930    r"""
    + 931    Inner Product distance:
    + 932
    + 933    Parameters
    + 934    ----------
    + 935    v : array_like
    + 936        Vector 1
    + 937    y : array_like
    + 938        Vector 2
    + 939
    + 940    Returns
    + 941    -------
    + 942    float
    + 943        Inner product distance between v and y
    + 944
    + 945    Notes
    + 946    -----
    + 947    .. math::
    + 948
    + 949        1-\sum{v_iy_i}
    + 950    """
    + 951    return 1 - np.sum(v * y)
    + 952
    + 953
    + 954def divergence_distance(v, y):
    + 955    r"""
    + 956    Divergence distance:
    + 957
    + 958    Parameters
    + 959    ----------
    + 960    v : array_like
    + 961        Vector 1
    + 962    y : array_like
    + 963        Vector 2
    + 964
    + 965    Returns
    + 966    -------
    + 967    float
    + 968        Divergence distance between v and y
    + 969
    + 970    Notes
    + 971    -----
    + 972    .. math::
    + 973
    + 974        2\sum\frac{(v_i-y_i)^2}{(v_i+y_i)^2}
    + 975    """
    + 976    return 2 * np.sum((np.power(v - y, 2)) / np.power(v + y, 2))
    + 977
    + 978
    + 979def _chi_squared_distance(v, y):
    + 980    r"""
    + 981    Additive symmetric χ2 distance:
    + 982
    + 983    Parameters
    + 984    ----------
    + 985    v : array_like
    + 986        Vector 1
    + 987    y : array_like
    + 988        Vector 2
    + 989
    + 990    Returns
    + 991    -------
    + 992    float
    + 993        Additive symmetric χ2 distance between v and y
    + 994
    + 995    Notes
    + 996    -----
    + 997    .. math::
    + 998
    + 999        \sum\frac{(v_i-y_i)^2(v_i+y_i)}{v_iy_i}
    +1000    """
    +1001    dist = np.sum(np.power(v - y, 2) * (v + y) / (v * y))
    +1002    return dist
    +1003
    +1004
    +1005def jensen_difference_distance(v, y):
    +1006    r"""
    +1007    Jensen difference:
    +1008
    +1009    Parameters
    +1010    ----------
    +1011    v : array_like
    +1012        Vector 1
    +1013    y : array_like
    +1014        Vector 2
    +1015
    +1016    Returns
    +1017    -------
    +1018    float
    +1019        Jensen difference distance between v and y
    +1020
    +1021    Notes
    +1022    -----
    +1023    .. math::
    +1024
    +1025        \sum[\frac{1}{2}(v_i\ln{v_i}+y_i\ln{y_i})-(\frac{v_i+y_i}{2})\ln{(\frac{v_i+y_i}{2})}]
    +1026    """
    +1027    y_v_avg = (y + v) / 2
    +1028    return np.sum(1 / 2 * (y * np.log(y) + v * np.log(v)) - y_v_avg * np.log(y_v_avg))
    +1029
     1030
    -1031    Notes
    -1032    -----
    -1033    .. math::
    +1031def kumar_johnson_distance(v, y):
    +1032    r"""
    +1033    Kumar-Johnson distance:
     1034
    -1035        \sum\frac{(v_i^2-y_i^2)^2}{2(v_iy_i)^\frac{3}{2}}
    -1036    """
    -1037    return np.sum(
    -1038        np.power(np.power(v, 2) - np.power(y, 2), 2) / \
    -1039        (2 * np.power(v * y, 3 / 2))
    -1040    )
    +1035    Parameters
    +1036    ----------
    +1037    v : array_like
    +1038        Vector 1
    +1039    y : array_like
    +1040        Vector 2
     1041
    -1042
    -1043def avg_l_distance(v, y):
    -1044    r"""
    -1045    Avg (L1, L∞) distance:
    +1042    Returns
    +1043    -------
    +1044    float
    +1045        Kumar Johnson distance between v and y
     1046
    -1047    Parameters
    -1048    ----------
    -1049    v : array_like
    -1050        Vector 1
    -1051    y : array_like
    -1052        Vector 2
    -1053    
    -1054    Returns
    -1055    -------
    -1056    float
    -1057        Average L distance between v and y
    -1058
    -1059    Notes
    -1060    -----
    -1061    .. math::
    -1062
    -1063        \frac{1}{2}(\sum|v_i-y_i|+\underset{i}{\max}{|v_i-y_i|})
    -1064    """
    -1065    return 1 / 2 * (np.sum(np.abs(v - y)) + max(np.abs(v - y)))
    -1066
    -1067
    -1068def vicis_wave_hadges_distance(v, y):
    -1069    r"""
    -1070    Vicis-Wave Hadges distance:
    -1071
    -1072    Parameters
    -1073    ----------
    -1074    v : array_like
    -1075        Vector 1
    -1076    y : array_like
    -1077        Vector 2
    -1078    
    -1079    Returns
    -1080    -------
    -1081    float
    -1082        Vicis Wave Hadges distance between v and y
    -1083
    -1084    Notes
    -1085    -----
    -1086    .. math::
    -1087
    -1088        \sum\frac{|v_i-y_i|}{\min{(v_i,\ y_i)}}
    -1089    """
    -1090    return np.sum(np.abs(v - y) / np.minimum(v, y))
    -1091
    -1092
    -1093def vicis_symmetric_chi_squared_1_distance(v, y):
    -1094    r"""
    -1095    Vicis-Symmetric χ2 1 distance:
    -1096
    -1097    Parameters
    -1098    ----------
    -1099    v : array_like
    -1100        Vector 1
    -1101    y : array_like
    -1102        Vector 2
    -1103    
    -1104    Returns
    -1105    -------
    -1106    float
    -1107        Vici Symmetric χ2 1 distance between v and y
    -1108
    -1109    Notes
    -1110    -----
    -1111    .. math::
    -1112
    -1113        \sum\frac{(v_i-y_i)^2}{\min{(v_i,y_i)^2}}
    -1114    """
    -1115    return np.sum(np.power(v - y, 2) / np.power(np.minimum(v, y), 2))
    -1116
    -1117
    -1118def vicis_symmetric_chi_squared_2_distance(v, y):
    -1119    r"""
    -1120    Vicis-Symmetric χ2 2 distance:
    -1121    
    -1122    Parameters
    -1123    ----------
    -1124    v : array_like
    -1125        Vector 1
    -1126    y : array_like
    -1127        Vector 2
    -1128    
    -1129    Returns
    -1130    -------
    -1131    float
    -1132        Vicis Symmetric χ2 2 distance between v and y
    -1133
    -1134    Notes
    -1135    -----
    +1047    Notes
    +1048    -----
    +1049    .. math::
    +1050
    +1051        \sum\frac{(v_i^2-y_i^2)^2}{2(v_iy_i)^\frac{3}{2}}
    +1052    """
    +1053    return np.sum(
    +1054        np.power(np.power(v, 2) - np.power(y, 2), 2) / (2 * np.power(v * y, 3 / 2))
    +1055    )
    +1056
    +1057
    +1058def avg_l_distance(v, y):
    +1059    r"""
    +1060    Avg (L1, L∞) distance:
    +1061
    +1062    Parameters
    +1063    ----------
    +1064    v : array_like
    +1065        Vector 1
    +1066    y : array_like
    +1067        Vector 2
    +1068
    +1069    Returns
    +1070    -------
    +1071    float
    +1072        Average L distance between v and y
    +1073
    +1074    Notes
    +1075    -----
    +1076    .. math::
    +1077
    +1078        \frac{1}{2}(\sum|v_i-y_i|+\underset{i}{\max}{|v_i-y_i|})
    +1079    """
    +1080    return 1 / 2 * (np.sum(np.abs(v - y)) + max(np.abs(v - y)))
    +1081
    +1082
    +1083def vicis_wave_hadges_distance(v, y):
    +1084    r"""
    +1085    Vicis-Wave Hadges distance:
    +1086
    +1087    Parameters
    +1088    ----------
    +1089    v : array_like
    +1090        Vector 1
    +1091    y : array_like
    +1092        Vector 2
    +1093
    +1094    Returns
    +1095    -------
    +1096    float
    +1097        Vicis Wave Hadges distance between v and y
    +1098
    +1099    Notes
    +1100    -----
    +1101    .. math::
    +1102
    +1103        \sum\frac{|v_i-y_i|}{\min{(v_i,\ y_i)}}
    +1104    """
    +1105    return np.sum(np.abs(v - y) / np.minimum(v, y))
    +1106
    +1107
    +1108def vicis_symmetric_chi_squared_1_distance(v, y):
    +1109    r"""
    +1110    Vicis-Symmetric χ2 1 distance:
    +1111
    +1112    Parameters
    +1113    ----------
    +1114    v : array_like
    +1115        Vector 1
    +1116    y : array_like
    +1117        Vector 2
    +1118
    +1119    Returns
    +1120    -------
    +1121    float
    +1122        Vici Symmetric χ2 1 distance between v and y
    +1123
    +1124    Notes
    +1125    -----
    +1126    .. math::
    +1127
    +1128        \sum\frac{(v_i-y_i)^2}{\min{(v_i,y_i)^2}}
    +1129    """
    +1130    return np.sum(np.power(v - y, 2) / np.power(np.minimum(v, y), 2))
    +1131
    +1132
    +1133def vicis_symmetric_chi_squared_2_distance(v, y):
    +1134    r"""
    +1135    Vicis-Symmetric χ2 2 distance:
     1136
    -1137    .. math::
    -1138
    -1139        \sum\frac{(v_i-y_i)^2}{\min{(v_i,y_i)}}
    -1140    """
    -1141    return np.sum(np.power(v - y, 2) / np.minimum(v, y))
    -1142
    +1137    Parameters
    +1138    ----------
    +1139    v : array_like
    +1140        Vector 1
    +1141    y : array_like
    +1142        Vector 2
     1143
    -1144def vicis_symmetric_chi_squared_3_distance(v, y):
    -1145    r"""
    -1146    Vicis-Symmetric χ2 3 distance:
    -1147    
    -1148    Parameters
    -1149    ----------
    -1150    v : array_like
    -1151        Vector 1
    -1152    y : array_like
    -1153        Vector 2
    -1154    
    -1155    Returns
    -1156    -------
    -1157    float
    -1158        Vici Symmetric χ2 3 distance between v and y
    -1159
    -1160    Notes
    -1161    -----
    +1144    Returns
    +1145    -------
    +1146    float
    +1147        Vicis Symmetric χ2 2 distance between v and y
    +1148
    +1149    Notes
    +1150    -----
    +1151
    +1152    .. math::
    +1153
    +1154        \sum\frac{(v_i-y_i)^2}{\min{(v_i,y_i)}}
    +1155    """
    +1156    return np.sum(np.power(v - y, 2) / np.minimum(v, y))
    +1157
    +1158
    +1159def vicis_symmetric_chi_squared_3_distance(v, y):
    +1160    r"""
    +1161    Vicis-Symmetric χ2 3 distance:
     1162
    -1163    .. math::
    -1164
    -1165        \sum\frac{(v_i-y_i)^2}{\max{(v_i,y_i)}}
    -1166    """
    -1167    return np.sum(np.power(v - y, 2) / np.maximum(v, y))
    -1168
    +1163    Parameters
    +1164    ----------
    +1165    v : array_like
    +1166        Vector 1
    +1167    y : array_like
    +1168        Vector 2
     1169
    -1170def max_symmetric_chi_squared_distance(v, y):
    -1171    r"""
    -1172    Max-Symmetric χ2 distance:
    -1173
    -1174    Parameters
    -1175    ----------
    -1176    v : array_like
    -1177        Vector 1
    -1178    y : array_like
    -1179        Vector 2
    -1180    
    -1181    Returns
    -1182    -------
    -1183    float
    -1184        Max-Symmetric χ2 distance between v and y
    -1185
    -1186    Notes
    -1187    -----
    -1188    .. math::
    -1189
    -1190        \max{(\sum\frac{(v_i-y_i)^2}{v_i},\sum\frac{(v_i-y_i)^2}{y_i})}
    -1191    """
    -1192    return max(np.sum(np.power(v - y, 2) / v), np.sum(np.power(v - y, 2) / y))
    -1193
    -1194
    -1195def min_symmetric_chi_squared_distance(v, y):
    -1196    r"""
    -1197    Min-Symmetric χ2 distance:
    -1198    
    -1199    Parameters
    -1200    ----------
    -1201    v : array_like
    -1202        Vector 1
    -1203    y : array_like
    -1204        Vector 2
    -1205    
    -1206    Returns
    -1207    -------
    -1208    float
    -1209        Min-Symmetric χ2 distance between v and y
    -1210
    -1211    Notes
    -1212    -----
    -1213    .. math::
    -1214
    -1215        \min{(\sum\frac{(v_i-y_i)^2}{v_i},\sum\frac{(v_i-y_i)^2}{y_i})}
    -1216    """
    -1217    return min(np.sum(np.power(v - y, 2) / v), np.sum(np.power(v - y, 2) / y))
    -1218
    -1219
    -1220def additive_sym_chi_sq(v, y):
    -1221    r"""
    -1222    Additive Symmetric χ2 distance:
    -1223    
    -1224    Parameters
    -1225    ----------
    -1226    v : array_like
    -1227        Vector 1
    -1228    y : array_like
    -1229        Vector 2
    -1230    
    -1231    Returns
    -1232    -------
    -1233    float
    -1234        Additive Symmetric χ2 distance between v and y
    -1235
    -1236    Notes
    -1237    -----
    -1238    .. math::
    -1239
    -1240        \sum_{i}\frac{(y_{i} - v_{i})^2(y_{i}+v_{i})}{y_{i}v_{i}}
    -1241    """
    -1242    return np.sum((np.power(y - v, 2) * (y + v))/(y * v))
    -1243
    -1244def bhattacharya_distance(v, y):
    -1245    r"""
    -1246    Bhattacharya Distance:
    -1247
    -1248    Parameters
    -1249    ----------
    -1250    v : array_like
    -1251        Vector 1
    -1252    y : array_like
    -1253        Vector 2
    -1254    
    -1255    Returns
    -1256    -------
    -1257    float
    -1258        Bhattcharya distance between v and y
    +1170    Returns
    +1171    -------
    +1172    float
    +1173        Vici Symmetric χ2 3 distance between v and y
    +1174
    +1175    Notes
    +1176    -----
    +1177
    +1178    .. math::
    +1179
    +1180        \sum\frac{(v_i-y_i)^2}{\max{(v_i,y_i)}}
    +1181    """
    +1182    return np.sum(np.power(v - y, 2) / np.maximum(v, y))
    +1183
    +1184
    +1185def max_symmetric_chi_squared_distance(v, y):
    +1186    r"""
    +1187    Max-Symmetric χ2 distance:
    +1188
    +1189    Parameters
    +1190    ----------
    +1191    v : array_like
    +1192        Vector 1
    +1193    y : array_like
    +1194        Vector 2
    +1195
    +1196    Returns
    +1197    -------
    +1198    float
    +1199        Max-Symmetric χ2 distance between v and y
    +1200
    +1201    Notes
    +1202    -----
    +1203    .. math::
    +1204
    +1205        \max{(\sum\frac{(v_i-y_i)^2}{v_i},\sum\frac{(v_i-y_i)^2}{y_i})}
    +1206    """
    +1207    return max(np.sum(np.power(v - y, 2) / v), np.sum(np.power(v - y, 2) / y))
    +1208
    +1209
    +1210def min_symmetric_chi_squared_distance(v, y):
    +1211    r"""
    +1212    Min-Symmetric χ2 distance:
    +1213
    +1214    Parameters
    +1215    ----------
    +1216    v : array_like
    +1217        Vector 1
    +1218    y : array_like
    +1219        Vector 2
    +1220
    +1221    Returns
    +1222    -------
    +1223    float
    +1224        Min-Symmetric χ2 distance between v and y
    +1225
    +1226    Notes
    +1227    -----
    +1228    .. math::
    +1229
    +1230        \min{(\sum\frac{(v_i-y_i)^2}{v_i},\sum\frac{(v_i-y_i)^2}{y_i})}
    +1231    """
    +1232    return min(np.sum(np.power(v - y, 2) / v), np.sum(np.power(v - y, 2) / y))
    +1233
    +1234
    +1235def additive_sym_chi_sq(v, y):
    +1236    r"""
    +1237    Additive Symmetric χ2 distance:
    +1238
    +1239    Parameters
    +1240    ----------
    +1241    v : array_like
    +1242        Vector 1
    +1243    y : array_like
    +1244        Vector 2
    +1245
    +1246    Returns
    +1247    -------
    +1248    float
    +1249        Additive Symmetric χ2 distance between v and y
    +1250
    +1251    Notes
    +1252    -----
    +1253    .. math::
    +1254
    +1255        \sum_{i}\frac{(y_{i} - v_{i})^2(y_{i}+v_{i})}{y_{i}v_{i}}
    +1256    """
    +1257    return np.sum((np.power(y - v, 2) * (y + v)) / (y * v))
    +1258
     1259
    -1260    Notes
    -1261    -----
    -1262    .. math::
    +1260def bhattacharya_distance(v, y):
    +1261    r"""
    +1262    Bhattacharya Distance:
     1263
    -1264        -ln(\sum_{i}\sqrt{y_{i}v_{i}})
    -1265    """
    -1266    return -1 * np.log(np.sum(np.sqrt(y * v)))
    -1267
    -1268def generalized_ochiai_index(v, y):
    -1269    r"""
    -1270    Generalized Ochiai Index
    -1271    
    -1272    Parameters
    -1273    ----------
    -1274    v : array_like
    -1275        Vector 1
    -1276    y : array_like
    -1277        Vector 2
    -1278    
    -1279    Returns
    -1280    -------
    -1281    float
    -1282        Generalized Ochiai Index between v and y
    +1264    Parameters
    +1265    ----------
    +1266    v : array_like
    +1267        Vector 1
    +1268    y : array_like
    +1269        Vector 2
    +1270
    +1271    Returns
    +1272    -------
    +1273    float
    +1274        Bhattcharya distance between v and y
    +1275
    +1276    Notes
    +1277    -----
    +1278    .. math::
    +1279
    +1280        -ln(\sum_{i}\sqrt{y_{i}v_{i}})
    +1281    """
    +1282    return -1 * np.log(np.sum(np.sqrt(y * v)))
     1283
    -1284    Notes
    -1285    -----
    -1286    .. math::
    -1287
    -1288        1 - \frac{\sum_{i}min(y_{i}, v_{i})}{\sqrt{\sum_{i}y_{i} \sum_{i}v_{i}}}
    -1289    """
    -1290
    -1291    ind = np.sum(np.minimum(y, v)) / np.sqrt(np.sum(y) * np.sum(v))
    -1292    return 1 - ind 
    -1293
    -1294def gower_distance(v, y):
    -1295    r"""
    -1296    Gower Distance
    -1297    
    -1298    Parameters
    -1299    ----------
    -1300    v : array_like
    -1301        Vector 1
    -1302    y : array_like
    -1303        Vector 2
    -1304    
    -1305    Returns
    -1306    -------
    -1307    float
    -1308        Gower distance between v and y
    -1309
    -1310    Notes
    -1311    -----
    -1312
    -1313    .. math::
    -1314
    -1315        \frac{1}{N}\sum_{i}|y_{i} - v_{i}|
    -1316    """
    -1317
    -1318    n = np.sum(y > 0)
    -1319    return (1 / n) * np.sum(np.abs(y - v))
    -1320
    -1321def impr_sqrt_cosine_sim(v, y):
    -1322    r"""
    -1323    Improved Square Root Cosine Similarity
    -1324    
    -1325    Parameters
    -1326    ----------
    -1327    v : array_like
    -1328        Vector 1
    -1329    y : array_like
    -1330        Vector 2
    -1331    
    -1332    Returns
    -1333    -------
    -1334    float
    -1335        Improved Square Root Cosine Similarity between v and y
    -1336
    -1337    Notes
    -1338    -----
    -1339    .. math::
    -1340
    -1341        \frac{\sum_{i}\sqrt{y_{i}v_{i}}}{\sum_{i}\sqrt{y_{i}}\sum_{i}\sqrt{v_{i}}}
    -1342    """
    +1284
    +1285def generalized_ochiai_index(v, y):
    +1286    r"""
    +1287    Generalized Ochiai Index
    +1288
    +1289    Parameters
    +1290    ----------
    +1291    v : array_like
    +1292        Vector 1
    +1293    y : array_like
    +1294        Vector 2
    +1295
    +1296    Returns
    +1297    -------
    +1298    float
    +1299        Generalized Ochiai Index between v and y
    +1300
    +1301    Notes
    +1302    -----
    +1303    .. math::
    +1304
    +1305        1 - \frac{\sum_{i}min(y_{i}, v_{i})}{\sqrt{\sum_{i}y_{i} \sum_{i}v_{i}}}
    +1306    """
    +1307
    +1308    ind = np.sum(np.minimum(y, v)) / np.sqrt(np.sum(y) * np.sum(v))
    +1309    return 1 - ind
    +1310
    +1311
    +1312def gower_distance(v, y):
    +1313    r"""
    +1314    Gower Distance
    +1315
    +1316    Parameters
    +1317    ----------
    +1318    v : array_like
    +1319        Vector 1
    +1320    y : array_like
    +1321        Vector 2
    +1322
    +1323    Returns
    +1324    -------
    +1325    float
    +1326        Gower distance between v and y
    +1327
    +1328    Notes
    +1329    -----
    +1330
    +1331    .. math::
    +1332
    +1333        \frac{1}{N}\sum_{i}|y_{i} - v_{i}|
    +1334    """
    +1335
    +1336    n = np.sum(y > 0)
    +1337    return (1 / n) * np.sum(np.abs(y - v))
    +1338
    +1339
    +1340def impr_sqrt_cosine_sim(v, y):
    +1341    r"""
    +1342    Improved Square Root Cosine Similarity
     1343
    -1344    return np.sum(np.sqrt(y * v)) / (np.sum(np.sqrt(y)) * np.sum(np.sqrt(v)))
    -1345
    -1346def intersection_sim(v, y):
    -1347    r"""
    -1348    Intersection Similarity
    -1349    
    -1350    Parameters
    -1351    ----------
    -1352    v : array_like
    -1353        Vector 1
    -1354    y : array_like
    -1355        Vector 2
    -1356    
    -1357    Returns
    -1358    -------
    -1359    float
    -1360        Intersection Similarity between v and y
    -1361
    -1362    Notes
    -1363    -----
    -1364    .. math::
    +1344    Parameters
    +1345    ----------
    +1346    v : array_like
    +1347        Vector 1
    +1348    y : array_like
    +1349        Vector 2
    +1350
    +1351    Returns
    +1352    -------
    +1353    float
    +1354        Improved Square Root Cosine Similarity between v and y
    +1355
    +1356    Notes
    +1357    -----
    +1358    .. math::
    +1359
    +1360        \frac{\sum_{i}\sqrt{y_{i}v_{i}}}{\sum_{i}\sqrt{y_{i}}\sum_{i}\sqrt{v_{i}}}
    +1361    """
    +1362
    +1363    return np.sum(np.sqrt(y * v)) / (np.sum(np.sqrt(y)) * np.sum(np.sqrt(v)))
    +1364
     1365
    -1366        \sum_{i}min(y_{i}, v_{i})
    -1367    """
    -1368
    -1369    return np.sum(np.minimum(y, v))
    -1370
    -1371def j_divergence(v, y):
    -1372    r"""
    -1373    J Divergence
    -1374    
    -1375    Parameters
    -1376    ----------
    -1377    v : array_like
    -1378        Vector 1
    -1379    y : array_like
    -1380        Vector 2
    -1381    
    -1382    Returns
    -1383    -------
    -1384    float
    -1385        J Divergence between v and y
    -1386
    -1387    Notes
    -1388    -----
    -1389    .. math::
    -1390        
    -1391        \sum_{i}(y_{i} - v_{i}) ln(\frac{y_{i}}{v_{i}})
    -1392    """
    -1393
    -1394    return np.sum((v - y) * np.log(v / y))
    +1366def intersection_sim(v, y):
    +1367    r"""
    +1368    Intersection Similarity
    +1369
    +1370    Parameters
    +1371    ----------
    +1372    v : array_like
    +1373        Vector 1
    +1374    y : array_like
    +1375        Vector 2
    +1376
    +1377    Returns
    +1378    -------
    +1379    float
    +1380        Intersection Similarity between v and y
    +1381
    +1382    Notes
    +1383    -----
    +1384    .. math::
    +1385
    +1386        \sum_{i}min(y_{i}, v_{i})
    +1387    """
    +1388
    +1389    return np.sum(np.minimum(y, v))
    +1390
    +1391
    +1392def j_divergence(v, y):
    +1393    r"""
    +1394    J Divergence
     1395
    -1396def jensen_shannon_index(v, y):
    -1397    r"""
    -1398    Jensen-Shannon Index
    -1399
    -1400    Parameters
    -1401    ----------
    -1402    v : array_like
    -1403        Vector 1
    -1404    y : array_like
    -1405        Vector 2
    -1406    
    -1407    Returns
    -1408    -------
    -1409    float
    -1410        Jensen Shannon Index between v and y
    +1396    Parameters
    +1397    ----------
    +1398    v : array_like
    +1399        Vector 1
    +1400    y : array_like
    +1401        Vector 2
    +1402
    +1403    Returns
    +1404    -------
    +1405    float
    +1406        J Divergence between v and y
    +1407
    +1408    Notes
    +1409    -----
    +1410    .. math::
     1411
    -1412    Notes
    -1413    -----
    -1414    .. math::
    -1415
    -1416        \frac{1}{2}[\sum_{i}y_{i}ln(\frac{2y_{i}}{y_{i} + v_{i}}) + \sum_{i}v_{i}ln(\frac{2v_{i}}{y_{i}+v_{i}})]
    -1417    """
    -1418
    -1419    return (1 / 2) * (np.sum(y * np.log(2 * y / (y + v))) + np.sum(v * np.log(2 * v / (y + v))))
    -1420
    -1421def k_divergence(v, y):
    -1422    r"""
    -1423    K-Divergence
    -1424
    -1425    Parameters
    -1426    ----------
    -1427    v : array_like
    -1428        Vector 1
    -1429    y : array_like
    -1430        Vector 2
    -1431    
    -1432    Returns
    -1433    -------
    -1434    float
    -1435        K-Divergence between v and y
    -1436
    -1437    Notes
    -1438    -----
    -1439    .. math::
    +1412        \sum_{i}(y_{i} - v_{i}) ln(\frac{y_{i}}{v_{i}})
    +1413    """
    +1414
    +1415    return np.sum((v - y) * np.log(v / y))
    +1416
    +1417
    +1418def jensen_shannon_index(v, y):
    +1419    r"""
    +1420    Jensen-Shannon Index
    +1421
    +1422    Parameters
    +1423    ----------
    +1424    v : array_like
    +1425        Vector 1
    +1426    y : array_like
    +1427        Vector 2
    +1428
    +1429    Returns
    +1430    -------
    +1431    float
    +1432        Jensen Shannon Index between v and y
    +1433
    +1434    Notes
    +1435    -----
    +1436    .. math::
    +1437
    +1438        \frac{1}{2}[\sum_{i}y_{i}ln(\frac{2y_{i}}{y_{i} + v_{i}}) + \sum_{i}v_{i}ln(\frac{2v_{i}}{y_{i}+v_{i}})]
    +1439    """
     1440
    -1441        \sum_{i}y_{i}ln(\frac{2y_{i}}{y_{i} + v_{i}})
    -1442    """
    -1443
    -1444    return np.sum(v * np.log((2 * v) / (y + v)))
    +1441    return (1 / 2) * (
    +1442        np.sum(y * np.log(2 * y / (y + v))) + np.sum(v * np.log(2 * v / (y + v)))
    +1443    )
    +1444
     1445
    -1446
    -1447def topsoe_distance(v, y):
    -1448    r""" Topsoe distance
    -1449    
    +1446def k_divergence(v, y):
    +1447    r"""
    +1448    K-Divergence
    +1449
     1450    Parameters
     1451    ----------
     1452    v : array_like
     1453        Vector 1
     1454    y : array_like
     1455        Vector 2
    -1456    
    +1456
     1457    Returns
     1458    -------
     1459    float
    -1460        Topsoe distance between v and y
    -1461        
    +1460        K-Divergence between v and y
    +1461
     1462    Notes
     1463    -----
    -1464    """
    -1465    #[Chae] commented out the previous one; please review
    -1466    #v[v==0] = 1 #added by amt
    -1467    #y[y==0] = 1 #added by amt
    -1468    return np.sum((y * np.log((2 * y)/(y + v))) + (v * np.log((2 * v)/(y + v))))
    -1469
    -1470def probabilistic_symmetric_chi_squared_distance(v, y):
    -1471    r""" Fixed
    -1472    "I commented out the previous one; please review"
    -1473    """
    -1474    return 2 * np.sum(np.sum(np.power(y - v, 2) / (y + v)))
    -1475
    -1476def VW6(v, y):
    -1477    r"""
    -1478    "appears to be the same as max_symmetric_chi_squared_distance"
    -1479    """
    -1480    return min(np.sum(np.power(y - v, 2) / y), np.sum(np.power(y - v, 2) / v))
    +1464    .. math::
    +1465
    +1466        \sum_{i}y_{i}ln(\frac{2y_{i}}{y_{i} + v_{i}})
    +1467    """
    +1468
    +1469    return np.sum(v * np.log((2 * v) / (y + v)))
    +1470
    +1471
    +1472def topsoe_distance(v, y):
    +1473    r"""Topsoe distance
    +1474
    +1475    Parameters
    +1476    ----------
    +1477    v : array_like
    +1478        Vector 1
    +1479    y : array_like
    +1480        Vector 2
     1481
    -1482def VW5(v, y):
    -1483    r"""
    -1484    "appears to be the same as max_symmetric_chi_squared_distance"
    -1485    """
    -1486    return max(np.sum(np.power(y - v, 2) / y), np.sum(np.power(y - v, 2) / v))
    -1487
    -1488def VW4(v, y):
    -1489    r"""
    -1490    "Tecnically the Symmetric chi2 eq63"
    -1491    """
    -1492    return np.sum(np.power(y - v, 2) / np.maximum(y, v))
    -1493
    -1494def VW3(v, y):
    -1495    r"""
    -1496    "New"
    -1497    """
    -1498    return np.sum(np.power(y - v, 2) / np.minimum(y, v))
    -1499
    -1500def VW2(v, y):
    -1501    r"""
    -1502    "New"
    -1503    """
    -1504    return np.sum(np.power(y - v, 2) / np.power(np.minimum(y, v), 2))
    -1505
    -1506def VW1(v, y):
    -1507    r"""
    -1508    "New"
    -1509    """
    -1510    return np.sum(np.abs(y - v) / np.minimum(y, v))
    -1511
    -1512def taneja_divergence(v, y):
    -1513    r"""
    -1514    "New"
    -1515    """
    -1516    return np.sum(((y + v) / 2) * np.log((y + v)/(2 * np.sqrt(y * v))))
    -1517
    -1518def symmetric_chi_squared_distance (v, y):
    -1519    r"""
    -1520    "New"
    -1521    """
    -1522    return np.sum(np.power(y - v, 2) / (y * v))
    +1482    Returns
    +1483    -------
    +1484    float
    +1485        Topsoe distance between v and y
    +1486
    +1487    Notes
    +1488    -----
    +1489    """
    +1490    # [Chae] commented out the previous one; please review
    +1491    # v[v==0] = 1 #added by amt
    +1492    # y[y==0] = 1 #added by amt
    +1493    return np.sum((y * np.log((2 * y) / (y + v))) + (v * np.log((2 * v) / (y + v))))
    +1494
    +1495
    +1496def probabilistic_symmetric_chi_squared_distance(v, y):
    +1497    r"""Fixed
    +1498    "I commented out the previous one; please review"
    +1499    """
    +1500    return 2 * np.sum(np.sum(np.power(y - v, 2) / (y + v)))
    +1501
    +1502
    +1503def VW6(v, y):
    +1504    r"""
    +1505    "appears to be the same as max_symmetric_chi_squared_distance"
    +1506    """
    +1507    return min(np.sum(np.power(y - v, 2) / y), np.sum(np.power(y - v, 2) / v))
    +1508
    +1509
    +1510def VW5(v, y):
    +1511    r"""
    +1512    "appears to be the same as max_symmetric_chi_squared_distance"
    +1513    """
    +1514    return max(np.sum(np.power(y - v, 2) / y), np.sum(np.power(y - v, 2) / v))
    +1515
    +1516
    +1517def VW4(v, y):
    +1518    r"""
    +1519    "Tecnically the Symmetric chi2 eq63"
    +1520    """
    +1521    return np.sum(np.power(y - v, 2) / np.maximum(y, v))
    +1522
     1523
    -1524def squared_chi_squared_distance(v, y):
    +1524def VW3(v, y):
     1525    r"""
     1526    "New"
     1527    """
    -1528    return np.sum(np.power(y - v, 2) / (y + v))
    +1528    return np.sum(np.power(y - v, 2) / np.minimum(y, v))
     1529
    -1530def square_root_cosine_correlation(v, y):
    -1531    r"""
    -1532    "New"
    -1533    """
    -1534    return np.sum(np.sqrt(y * v)) / (np.sum(y) * np.sum(v))
    -1535
    -1536def sorensen_distance(v, y):
    -1537    r"""
    -1538    "New"
    -1539    """
    -1540    return np.sum(np.abs(y - v)) / (np.sum(y + v))
    -1541
    -1542def Pearson_chi_squared_distance(v, y):
    -1543    r"""
    -1544    "New"
    -1545    """
    -1546    return np.sum(np.power(y - v, 2) / v)
    -1547
    -1548def Neyman_chi_squared_distance(v, y):
    -1549    r"""
    -1550    "New"
    -1551    """
    -1552    return np.sum(np.power(y - v, 2) / y)
    -1553
    -1554def Minokowski_3(v, y):
    -1555    r"""
    -1556    "New"
    -1557    """
    -1558    return np.power(np.sum(np.power(np.abs(y - v), 3)), 1/3)
    -1559
    -1560def Minokowski_4(v, y):
    -1561    r"""
    -1562    "New"
    -1563    """
    -1564    return np.power(np.sum(np.power(np.abs(y - v), 4)), 1/4)
    +1530
    +1531def VW2(v, y):
    +1532    r"""
    +1533    "New"
    +1534    """
    +1535    return np.sum(np.power(y - v, 2) / np.power(np.minimum(y, v), 2))
    +1536
    +1537
    +1538def VW1(v, y):
    +1539    r"""
    +1540    "New"
    +1541    """
    +1542    return np.sum(np.abs(y - v) / np.minimum(y, v))
    +1543
    +1544
    +1545def taneja_divergence(v, y):
    +1546    r"""
    +1547    "New"
    +1548    """
    +1549    return np.sum(((y + v) / 2) * np.log((y + v) / (2 * np.sqrt(y * v))))
    +1550
    +1551
    +1552def symmetric_chi_squared_distance(v, y):
    +1553    r"""
    +1554    "New"
    +1555    """
    +1556    return np.sum(np.power(y - v, 2) / (y * v))
    +1557
    +1558
    +1559def squared_chi_squared_distance(v, y):
    +1560    r"""
    +1561    "New"
    +1562    """
    +1563    return np.sum(np.power(y - v, 2) / (y + v))
    +1564
     1565
    -1566def kumarjohnson_divergence(v, y):
    +1566def square_root_cosine_correlation(v, y):
     1567    r"""
     1568    "New"
     1569    """
    -1570    return np.sum(np.power(np.power(y, 2) + np.power(v, 2), 2) / (2* np.power(y * v, 3/2)))
    +1570    return np.sum(np.sqrt(y * v)) / (np.sum(y) * np.sum(v))
     1571
    -1572def kumarhassebrook_similarity(v, y):
    -1573    r"""
    -1574    "New"
    -1575    """
    -1576    return np.sum(y * v) / (np.sum(np.power(y, 2)) + np.sum(np.power(v, 2)) - np.sum(y * v))
    -1577
    -1578def kullbackleibler_divergence (v, y):
    -1579    r"""
    -1580    "New"
    -1581    """
    -1582    return np.sum(v * np.log(v / y))
    -1583
    -1584def soergel_distance(v, y):
    -1585    r"""
    -1586    "New"
    -1587    """
    -1588    return np.sum(np.abs(y - v))/np.sum(np.maximum(y, v))
    +1572
    +1573def sorensen_distance(v, y):
    +1574    r"""
    +1575    "New"
    +1576    """
    +1577    return np.sum(np.abs(y - v)) / (np.sum(y + v))
    +1578
    +1579
    +1580def Pearson_chi_squared_distance(v, y):
    +1581    r"""
    +1582    "New"
    +1583    """
    +1584    return np.sum(np.power(y - v, 2) / v)
    +1585
    +1586
    +1587def Neyman_chi_squared_distance(v, y):
    +1588    r"""
    +1589    "New"
    +1590    """
    +1591    return np.sum(np.power(y - v, 2) / y)
    +1592
    +1593
    +1594def Minokowski_3(v, y):
    +1595    r"""
    +1596    "New"
    +1597    """
    +1598    return np.power(np.sum(np.power(np.abs(y - v), 3)), 1 / 3)
    +1599
    +1600
    +1601def Minokowski_4(v, y):
    +1602    r"""
    +1603    "New"
    +1604    """
    +1605    return np.power(np.sum(np.power(np.abs(y - v), 4)), 1 / 4)
    +1606
    +1607
    +1608def kumarjohnson_divergence(v, y):
    +1609    r"""
    +1610    "New"
    +1611    """
    +1612    return np.sum(
    +1613        np.power(np.power(y, 2) + np.power(v, 2), 2) / (2 * np.power(y * v, 3 / 2))
    +1614    )
    +1615
    +1616
    +1617def kumarhassebrook_similarity(v, y):
    +1618    r"""
    +1619    "New"
    +1620    """
    +1621    return np.sum(y * v) / (
    +1622        np.sum(np.power(y, 2)) + np.sum(np.power(v, 2)) - np.sum(y * v)
    +1623    )
    +1624
    +1625
    +1626def kullbackleibler_divergence(v, y):
    +1627    r"""
    +1628    "New"
    +1629    """
    +1630    return np.sum(v * np.log(v / y))
    +1631
    +1632
    +1633def soergel_distance(v, y):
    +1634    r"""
    +1635    "New"
    +1636    """
    +1637    return np.sum(np.abs(y - v)) / np.sum(np.maximum(y, v))
     
    @@ -1873,25 +1922,29 @@

    -
    11def entropy_distance(v, y):
    -12    """ Calculate entropy distance between two vectors
    -13
    -14    Parameters
    -15    ----------
    -16    v : array_like
    -17        Vector 1
    -18    y : array_like
    -19        Vector 2
    -20    
    -21    Returns
    -22    -------
    -23    float
    -24        Entropy distance between v and y
    -25    
    -26    """
    -27    merged = v + y
    -28    entropy_increase = 2 * scipy.stats.entropy(merged) - scipy.stats.entropy(v) - scipy.stats.entropy(y)
    -29    return entropy_increase
    +            
    14def entropy_distance(v, y):
    +15    """Calculate entropy distance between two vectors
    +16
    +17    Parameters
    +18    ----------
    +19    v : array_like
    +20        Vector 1
    +21    y : array_like
    +22        Vector 2
    +23
    +24    Returns
    +25    -------
    +26    float
    +27        Entropy distance between v and y
    +28
    +29    """
    +30    merged = v + y
    +31    entropy_increase = (
    +32        2 * scipy.stats.entropy(merged)
    +33        - scipy.stats.entropy(v)
    +34        - scipy.stats.entropy(y)
    +35    )
    +36    return entropy_increase
     
    @@ -1926,27 +1979,31 @@
    Returns
    -
    55def weighted_entropy_distance(v, y):
    -56    """ Calculate weighted entropy distance between two vectors
    -57
    -58    Parameters
    -59    ----------
    -60    v : array_like
    -61        Vector 1
    -62    y : array_like
    -63        Vector 2
    -64    
    -65    Returns
    -66    -------
    -67    float
    -68        Weighted entropy distance between v and y
    -69    """
    -70    v = _weight_intensity_for_entropy(v)
    -71    y = _weight_intensity_for_entropy(y)
    +            
    63def weighted_entropy_distance(v, y):
    +64    """Calculate weighted entropy distance between two vectors
    +65
    +66    Parameters
    +67    ----------
    +68    v : array_like
    +69        Vector 1
    +70    y : array_like
    +71        Vector 2
     72
    -73    merged = v + y
    -74    entropy_increase = 2 * scipy.stats.entropy(merged) - scipy.stats.entropy(v) - scipy.stats.entropy(y)
    -75    return entropy_increase
    +73    Returns
    +74    -------
    +75    float
    +76        Weighted entropy distance between v and y
    +77    """
    +78    v = _weight_intensity_for_entropy(v)
    +79    y = _weight_intensity_for_entropy(y)
    +80
    +81    merged = v + y
    +82    entropy_increase = (
    +83        2 * scipy.stats.entropy(merged)
    +84        - scipy.stats.entropy(v)
    +85        - scipy.stats.entropy(y)
    +86    )
    +87    return entropy_increase
     
    @@ -1981,28 +2038,28 @@
    Returns
    -
    78def chebyshev_distance(v, y):
    -79    r"""Chebyshev distance
    -80
    -81    Parameters
    -82    ----------
    -83    v : array_like
    -84        Vector 1
    -85    y : array_like
    -86        Vector 2
    -87    
    -88    Returns
    -89    -------
    -90    float
    -91        Chebyshev distance between v and y
    -92
    -93    Notes
    -94    -----
    -95    .. math::
    -96
    -97        \underset{i}{\max}{(|v_{i}\ -\ y_{i}|)}
    -98    """
    -99    return np.max(np.abs(v - y))
    +            
     90def chebyshev_distance(v, y):
    + 91    r"""Chebyshev distance
    + 92
    + 93    Parameters
    + 94    ----------
    + 95    v : array_like
    + 96        Vector 1
    + 97    y : array_like
    + 98        Vector 2
    + 99
    +100    Returns
    +101    -------
    +102    float
    +103        Chebyshev distance between v and y
    +104
    +105    Notes
    +106    -----
    +107    .. math::
    +108
    +109        \underset{i}{\max}{(|v_{i}\ -\ y_{i}|)}
    +110    """
    +111    return np.max(np.abs(v - y))
     
    @@ -2041,28 +2098,28 @@
    Notes
    -
    102def squared_euclidean_distance(v, y):
    -103    r"""Squared Euclidean distance:
    -104
    -105    Parameters
    -106    ----------
    -107    v : array_like
    -108        Vector 1
    -109    y : array_like
    -110        Vector 2
    -111    
    -112    Returns
    -113    -------
    -114    float
    -115        Squared Euclidean distance between v and y
    +            
    114def squared_euclidean_distance(v, y):
    +115    r"""Squared Euclidean distance:
     116
    -117    Notes
    -118    -----
    -119    .. math::
    -120
    -121        \sum(v_{i}-y_{i})^2
    -122    """
    -123    return np.sum(np.power(v - y, 2))
    +117    Parameters
    +118    ----------
    +119    v : array_like
    +120        Vector 1
    +121    y : array_like
    +122        Vector 2
    +123
    +124    Returns
    +125    -------
    +126    float
    +127        Squared Euclidean distance between v and y
    +128
    +129    Notes
    +130    -----
    +131    .. math::
    +132
    +133        \sum(v_{i}-y_{i})^2
    +134    """
    +135    return np.sum(np.power(v - y, 2))
     
    @@ -2101,27 +2158,27 @@
    Notes
    -
    126def fidelity_similarity(v, y):
    -127    r""" Fidelity similarity:
    -128
    -129    Parameters
    -130    ----------
    -131    v : array_like
    -132        Vector 1
    -133    y : array_like
    -134        Vector 2
    -135    
    -136    Returns
    -137    -------
    -138    float
    -139        Fidelity similarity between v and y
    -140    Notes
    -141    -----
    -142    .. math::
    -143
    -144        \sum\sqrt{v_{i}y_{i}}
    -145    """
    -146    return np.sum(np.sqrt(v * y))
    +            
    138def fidelity_similarity(v, y):
    +139    r"""Fidelity similarity:
    +140
    +141    Parameters
    +142    ----------
    +143    v : array_like
    +144        Vector 1
    +145    y : array_like
    +146        Vector 2
    +147
    +148    Returns
    +149    -------
    +150    float
    +151        Fidelity similarity between v and y
    +152    Notes
    +153    -----
    +154    .. math::
    +155
    +156        \sum\sqrt{v_{i}y_{i}}
    +157    """
    +158    return np.sum(np.sqrt(v * y))
     
    @@ -2160,28 +2217,28 @@
    Notes
    -
    149def matusita_distance(v, y):
    -150    r"""Matusita distance:
    -151
    -152    Parameters
    -153    ----------
    -154    v : array_like
    -155        Vector 1
    -156    y : array_like
    -157        Vector 2
    -158    
    -159    Returns
    -160    -------
    -161    float
    -162        Matusita distance between v and y
    +            
    161def matusita_distance(v, y):
    +162    r"""Matusita distance:
     163
    -164    Notes
    -165    -----
    -166    .. math::
    -167
    -168        \sqrt{\sum(\sqrt{v_{i}}-\sqrt{y_{i}})^2}
    -169    """
    -170    return np.sqrt(np.sum(np.power(np.sqrt(v) - np.sqrt(y), 2)))
    +164    Parameters
    +165    ----------
    +166    v : array_like
    +167        Vector 1
    +168    y : array_like
    +169        Vector 2
    +170
    +171    Returns
    +172    -------
    +173    float
    +174        Matusita distance between v and y
    +175
    +176    Notes
    +177    -----
    +178    .. math::
    +179
    +180        \sqrt{\sum(\sqrt{v_{i}}-\sqrt{y_{i}})^2}
    +181    """
    +182    return np.sqrt(np.sum(np.power(np.sqrt(v) - np.sqrt(y), 2)))
     
    @@ -2220,28 +2277,28 @@
    Notes
    -
    173def squared_chord_distance(v, y):
    -174    r"""Squared-chord distance:
    -175
    -176    Parameters
    -177    ----------
    -178    v : array_like
    -179        Vector 1
    -180    y : array_like
    -181        Vector 2
    -182    
    -183    Returns
    -184    -------
    -185    float
    -186        Squared-chord distance between v and y
    +            
    185def squared_chord_distance(v, y):
    +186    r"""Squared-chord distance:
     187
    -188    Notes
    -189    -----
    -190    .. math::
    -191
    -192        \sum(\sqrt{v_{i}}-\sqrt{y_{i}})^2
    -193    """
    -194    return np.sum(np.power(np.sqrt(v) - np.sqrt(y), 2))
    +188    Parameters
    +189    ----------
    +190    v : array_like
    +191        Vector 1
    +192    y : array_like
    +193        Vector 2
    +194
    +195    Returns
    +196    -------
    +197    float
    +198        Squared-chord distance between v and y
    +199
    +200    Notes
    +201    -----
    +202    .. math::
    +203
    +204        \sum(\sqrt{v_{i}}-\sqrt{y_{i}})^2
    +205    """
    +206    return np.sum(np.power(np.sqrt(v) - np.sqrt(y), 2))
     
    @@ -2280,34 +2337,36 @@
    Notes
    -
    197def bhattacharya_1_distance(v, y):
    -198    r"""Bhattacharya 1 distance:
    -199
    -200    Parameters
    -201    ----------
    -202    v : array_like
    -203        Vector 1
    -204    y : array_like
    -205        Vector 2
    -206    
    -207    Returns
    -208    -------
    -209    float
    -210        Bhattacharya 1 distance between v and y
    +            
    209def bhattacharya_1_distance(v, y):
    +210    r"""Bhattacharya 1 distance:
     211
    -212    Notes
    -213    -----
    -214    .. math::
    -215
    -216        (\arccos{(\sum\sqrt{v_{i}y_{i}})})^2
    -217    """
    -218    s = np.sum(np.sqrt(v * y))
    -219    # TODO:Fix this!
    -220    if s > 1:
    -221        if s > 1 + 1e-6:
    -222            warnings.warn("Error in calculating Bhattacharya 1 distance, got arccos {}".format(s))
    -223        s = 1
    -224    return np.power(np.arccos(s), 2)
    +212    Parameters
    +213    ----------
    +214    v : array_like
    +215        Vector 1
    +216    y : array_like
    +217        Vector 2
    +218
    +219    Returns
    +220    -------
    +221    float
    +222        Bhattacharya 1 distance between v and y
    +223
    +224    Notes
    +225    -----
    +226    .. math::
    +227
    +228        (\arccos{(\sum\sqrt{v_{i}y_{i}})})^2
    +229    """
    +230    s = np.sum(np.sqrt(v * y))
    +231    # TODO:Fix this!
    +232    if s > 1:
    +233        if s > 1 + 1e-6:
    +234            warnings.warn(
    +235                "Error in calculating Bhattacharya 1 distance, got arccos {}".format(s)
    +236            )
    +237        s = 1
    +238    return np.power(np.arccos(s), 2)
     
    @@ -2346,31 +2405,31 @@
    Notes
    -
    227def bhattacharya_2_distance(v, y):
    -228    r"""Bhattacharya 2 distance:
    -229
    -230    Parameters
    -231    ----------
    -232    v : array_like
    -233        Vector 1
    -234    y : array_like
    -235        Vector 2
    -236    
    -237    Returns
    -238    -------
    -239    float
    -240        Bhattacharya 2 distance between v and y
    -241    Notes
    -242    -----
    -243    .. math::
    -244
    -245        -\ln{(\sum\sqrt{v_{i}y_{i}})}
    -246    """
    -247    s = np.sum(np.sqrt(v * y))
    -248    if s == 0:
    -249        return np.inf
    -250    else:
    -251        return -np.log(s)
    +            
    241def bhattacharya_2_distance(v, y):
    +242    r"""Bhattacharya 2 distance:
    +243
    +244    Parameters
    +245    ----------
    +246    v : array_like
    +247        Vector 1
    +248    y : array_like
    +249        Vector 2
    +250
    +251    Returns
    +252    -------
    +253    float
    +254        Bhattacharya 2 distance between v and y
    +255    Notes
    +256    -----
    +257    .. math::
    +258
    +259        -\ln{(\sum\sqrt{v_{i}y_{i}})}
    +260    """
    +261    s = np.sum(np.sqrt(v * y))
    +262    if s == 0:
    +263        return np.inf
    +264    else:
    +265        return -np.log(s)
     
    @@ -2409,30 +2468,30 @@
    Notes
    -
    254def harmonic_mean_similarity(v, y):
    -255    r"""Harmonic mean similarity:
    -256
    -257    Parameters
    -258    ----------
    -259    v : array_like
    -260        Vector 1
    -261    y : array_like
    -262        Vector 2
    -263    
    -264    Returns
    -265    -------
    -266    float
    -267        Harmonic mean similarity between v and y
    -268    
    -269    Notes
    -270    -----
    -271    .. math::
    -272
    -273        #1-2\sum(\frac{v_{i}y_{i}}{v_{i}+y_{i}})
    -274        2\sum(\frac{v_{i}y_{i}}{v_{i}+y_{i}})
    -275    """
    -276    #return 1 - 2 * np.sum(v * y / (v + y))
    -277    return 2 * np.sum(v * y / (v + y))
    +            
    268def harmonic_mean_similarity(v, y):
    +269    r"""Harmonic mean similarity:
    +270
    +271    Parameters
    +272    ----------
    +273    v : array_like
    +274        Vector 1
    +275    y : array_like
    +276        Vector 2
    +277
    +278    Returns
    +279    -------
    +280    float
    +281        Harmonic mean similarity between v and y
    +282
    +283    Notes
    +284    -----
    +285    .. math::
    +286
    +287        #1-2\sum(\frac{v_{i}y_{i}}{v_{i}+y_{i}})
    +288        2\sum(\frac{v_{i}y_{i}}{v_{i}+y_{i}})
    +289    """
    +290    # return 1 - 2 * np.sum(v * y / (v + y))
    +291    return 2 * np.sum(v * y / (v + y))
     
    @@ -2472,30 +2531,29 @@
    Notes
    -
    330def chernoff_distance(v, y):
    -331    r""" Chernoff distance:
    -332
    -333    Parameters
    -334    ----------
    -335    v : array_like
    -336        Vector 1
    -337    y : array_like
    -338        Vector 2
    -339    
    -340    Returns
    -341    -------
    -342    float
    -343        Chernoff distance between v and y
    -344
    -345    Notes
    -346    -----
    -347    .. math::
    -348
    -349        \max{(-ln\sum(v_{i}^ty_{i}^{1-t})^{1-t})},\ t=0.1,\ 0\le\ t<1
    -350    """
    -351    t = 0.1
    -352    return np.max(-np.log(
    -353        np.sum(np.power(np.power(v, t) * np.power(y, 1 - t), 1 - t))))
    +            
    344def chernoff_distance(v, y):
    +345    r"""Chernoff distance:
    +346
    +347    Parameters
    +348    ----------
    +349    v : array_like
    +350        Vector 1
    +351    y : array_like
    +352        Vector 2
    +353
    +354    Returns
    +355    -------
    +356    float
    +357        Chernoff distance between v and y
    +358
    +359    Notes
    +360    -----
    +361    .. math::
    +362
    +363        \max{(-ln\sum(v_{i}^ty_{i}^{1-t})^{1-t})},\ t=0.1,\ 0\le\ t<1
    +364    """
    +365    t = 0.1
    +366    return np.max(-np.log(np.sum(np.power(np.power(v, t) * np.power(y, 1 - t), 1 - t))))
     
    @@ -2534,29 +2592,29 @@
    Notes
    -
    356def ruzicka_distance(v, y):
    -357    r""" Ruzicka distance:
    -358
    -359    Parameters
    -360    ----------
    -361    v : array_like
    -362        Vector 1
    -363    y : array_like
    -364        Vector 2
    -365    
    -366    Returns
    -367    -------
    -368    float
    -369        Ruzicka distance between v and y
    -370
    -371    Notes
    -372    -----
    -373    .. math::
    -374
    -375        \frac{\sum{|v_{i}-y_{i}|}}{\sum{\max(v_{i},y_{i})}}
    -376    """
    -377    dist = np.sum(np.abs(v - y)) / np.sum(np.maximum(v, y))
    -378    return dist
    +            
    369def ruzicka_distance(v, y):
    +370    r"""Ruzicka distance:
    +371
    +372    Parameters
    +373    ----------
    +374    v : array_like
    +375        Vector 1
    +376    y : array_like
    +377        Vector 2
    +378
    +379    Returns
    +380    -------
    +381    float
    +382        Ruzicka distance between v and y
    +383
    +384    Notes
    +385    -----
    +386    .. math::
    +387
    +388        \frac{\sum{|v_{i}-y_{i}|}}{\sum{\max(v_{i},y_{i})}}
    +389    """
    +390    dist = np.sum(np.abs(v - y)) / np.sum(np.maximum(v, y))
    +391    return dist
     
    @@ -2595,28 +2653,28 @@
    Notes
    -
    381def roberts_distance(v, y):
    -382    r""" Roberts distance:
    -383
    -384    Parameters
    -385    ----------
    -386    v : array_like
    -387        Vector 1
    -388    y : array_like
    -389        Vector 2
    -390    
    -391    Returns
    -392    -------
    -393    float
    -394        Roberts distance between v and y
    -395
    -396    Notes
    -397    -----
    -398    .. math::
    -399
    -400        1-\sum\frac{(v_{i}+y_{i})\frac{\min{(v_{i},y_{i})}}{\max{(v_{i},y_{i})}}}{\sum(v_{i}+y_{i})}
    -401    """
    -402    return 1 - np.sum((v + y) * np.minimum(v, y) / np.maximum(v, y) / np.sum(v + y))
    +            
    394def roberts_distance(v, y):
    +395    r"""Roberts distance:
    +396
    +397    Parameters
    +398    ----------
    +399    v : array_like
    +400        Vector 1
    +401    y : array_like
    +402        Vector 2
    +403
    +404    Returns
    +405    -------
    +406    float
    +407        Roberts distance between v and y
    +408
    +409    Notes
    +410    -----
    +411    .. math::
    +412
    +413        1-\sum\frac{(v_{i}+y_{i})\frac{\min{(v_{i},y_{i})}}{\max{(v_{i},y_{i})}}}{\sum(v_{i}+y_{i})}
    +414    """
    +415    return 1 - np.sum((v + y) * np.minimum(v, y) / np.maximum(v, y) / np.sum(v + y))
     
    @@ -2655,28 +2713,28 @@
    Notes
    -
    405def intersection_distance(v, y):
    -406    r""" Intersection distance:
    -407
    -408    Parameters
    -409    ----------
    -410    v : array_like
    -411        Vector 1
    -412    y : array_like
    -413        Vector 2
    -414    
    -415    Returns
    -416    -------
    -417    float
    -418        Intersection distance between v and y
    -419
    -420    Notes
    -421    -----
    -422    .. math::
    -423
    -424        1-\frac{\sum\min{(v_{i},y_{i})}}{\min(\sum{v_{i},\sum{y_{i})}}}
    -425    """
    -426    return 1 - np.sum(np.minimum(v, y)) / min(np.sum(v), np.sum(y))
    +            
    418def intersection_distance(v, y):
    +419    r"""Intersection distance:
    +420
    +421    Parameters
    +422    ----------
    +423    v : array_like
    +424        Vector 1
    +425    y : array_like
    +426        Vector 2
    +427
    +428    Returns
    +429    -------
    +430    float
    +431        Intersection distance between v and y
    +432
    +433    Notes
    +434    -----
    +435    .. math::
    +436
    +437        1-\frac{\sum\min{(v_{i},y_{i})}}{\min(\sum{v_{i},\sum{y_{i})}}}
    +438    """
    +439    return 1 - np.sum(np.minimum(v, y)) / min(np.sum(v), np.sum(y))
     
    @@ -2715,28 +2773,28 @@
    Notes
    -
    429def motyka_distance(v, y):
    -430    r""" Motyka distance:
    -431
    -432    Parameters
    -433    ----------
    -434    v : array_like
    -435        Vector 1
    -436    y : array_like
    -437        Vector 2
    -438    
    -439    Returns
    -440    -------
    -441    float
    -442        Motyka distance between v and y
    -443    Notes
    -444    -----
    -445    .. math::
    -446
    -447        -\frac{\sum\min{(y_{i},v_{i})}}{\sum(y_{i}+v_{i})}
    -448    """
    -449    dist = np.sum(np.minimum(v, y)) / np.sum(v + y)
    -450    return dist
    +            
    442def motyka_distance(v, y):
    +443    r"""Motyka distance:
    +444
    +445    Parameters
    +446    ----------
    +447    v : array_like
    +448        Vector 1
    +449    y : array_like
    +450        Vector 2
    +451
    +452    Returns
    +453    -------
    +454    float
    +455        Motyka distance between v and y
    +456    Notes
    +457    -----
    +458    .. math::
    +459
    +460        -\frac{\sum\min{(y_{i},v_{i})}}{\sum(y_{i}+v_{i})}
    +461    """
    +462    dist = np.sum(np.minimum(v, y)) / np.sum(v + y)
    +463    return dist
     
    @@ -2775,30 +2833,30 @@
    Notes
    -
    453def canberra_distance(v, y):
    -454    r""" Canberra distance:
    -455
    -456    Parameters
    -457    ----------
    -458    v : array_like
    -459        Vector 1
    -460    y : array_like
    -461        Vector 2
    -462    
    -463    Returns
    -464    -------
    -465    float
    -466        Canberra distance between v and y
    -467
    -468    Notes
    -469    -----
    -470    .. math::
    -471
    -472        #\sum\frac{|v_{i}-y_{i}|}{|v_{i}|+|y_{i}|}
    -473        \sum_{i}\frac{|y_{i} - v_{i}|}{y_{i} + v_{i}}
    -474    """
    -475    #return np.sum(np.abs(v - y) / (np.abs(v) + np.abs(y)))
    -476    return np.sum(np.abs(y - v)/(y + v))
    +            
    466def canberra_distance(v, y):
    +467    r"""Canberra distance:
    +468
    +469    Parameters
    +470    ----------
    +471    v : array_like
    +472        Vector 1
    +473    y : array_like
    +474        Vector 2
    +475
    +476    Returns
    +477    -------
    +478    float
    +479        Canberra distance between v and y
    +480
    +481    Notes
    +482    -----
    +483    .. math::
    +484
    +485        #\sum\frac{|v_{i}-y_{i}|}{|v_{i}|+|y_{i}|}
    +486        \sum_{i}\frac{|y_{i} - v_{i}|}{y_{i} + v_{i}}
    +487    """
    +488    # return np.sum(np.abs(v - y) / (np.abs(v) + np.abs(y)))
    +489    return np.sum(np.abs(y - v) / (y + v))
     
    @@ -2838,28 +2896,28 @@
    Notes
    -
    478def canberra_metric(v, y):
    -479    r""" Canberra Metric
    -480
    -481    Parameters
    -482    ----------
    -483    v : array_like
    -484        Vector 1
    -485    y : array_like
    -486        Vector 2
    -487    
    -488    Returns
    -489    -------
    -490    float
    -491        Canberra metric between v and y
    -492    Notes
    -493    -----
    -494    .. math::
    -495
    -496        \frac{1}{\sum_{i}I(v_{i}\neq 0)}\sum_{i}\frac{|y_{i}-v_{i}|}{(y_{i}+v_{i})}
    -497    """
    -498
    -499    return (1 / np.sum(v > 0)) * np.sum(np.abs(y - v)/(y + v))
    +            
    492def canberra_metric(v, y):
    +493    r"""Canberra Metric
    +494
    +495    Parameters
    +496    ----------
    +497    v : array_like
    +498        Vector 1
    +499    y : array_like
    +500        Vector 2
    +501
    +502    Returns
    +503    -------
    +504    float
    +505        Canberra metric between v and y
    +506    Notes
    +507    -----
    +508    .. math::
    +509
    +510        \frac{1}{\sum_{i}I(v_{i}\neq 0)}\sum_{i}\frac{|y_{i}-v_{i}|}{(y_{i}+v_{i})}
    +511    """
    +512
    +513    return (1 / np.sum(v > 0)) * np.sum(np.abs(y - v) / (y + v))
     
    @@ -2898,28 +2956,28 @@
    Notes
    -
    502def kulczynski_1_distance(v, y):
    -503    r""" Kulczynski 1 distance:
    -504
    -505    Parameters
    -506    ----------
    -507    v : array_like
    -508        Vector 1
    -509    y : array_like
    -510        Vector 2
    -511    
    -512    Returns
    -513    -------
    -514    float
    -515        Kulczynski 1 distance between v and y
    -516    
    -517    Notes
    -518    -----
    -519    .. math::
    -520
    -521        \frac{\sum{|v_i}-y_i|}{\sum m\ i\ n\ (v_i,y_i)}
    -522    """
    -523    return np.sum(np.abs(y - v)) / np.sum(np.minimum(y, v))
    +            
    516def kulczynski_1_distance(v, y):
    +517    r"""Kulczynski 1 distance:
    +518
    +519    Parameters
    +520    ----------
    +521    v : array_like
    +522        Vector 1
    +523    y : array_like
    +524        Vector 2
    +525
    +526    Returns
    +527    -------
    +528    float
    +529        Kulczynski 1 distance between v and y
    +530
    +531    Notes
    +532    -----
    +533    .. math::
    +534
    +535        \frac{\sum{|v_i}-y_i|}{\sum m\ i\ n\ (v_i,y_i)}
    +536    """
    +537    return np.sum(np.abs(y - v)) / np.sum(np.minimum(y, v))
     
    @@ -2958,31 +3016,31 @@
    Notes
    -
    526def baroni_urbani_buser_distance(v, y):
    -527    r""" Baroni-Urbani-Buser distance:
    -528
    -529    Parameters
    -530    ----------
    -531    v : array_like
    -532        Vector 1
    -533    y : array_like
    -534        Vector 2
    -535    
    -536    Returns
    -537    -------
    -538    float
    -539        Baroni-Urbani-Buser distance between v and y
    -540
    -541    Notes
    -542    -----
    -543    .. math::
    -544
    -545        1-\frac{\sum\min{(v_i,y_i)}+\sqrt{\sum\min{(v_i,y_i)}\sum(\max{(v)}-\max{(v_i,y_i)})}}{\sum{\max{(v_i,y_i)}+\sqrt{\sum{\min{(v_i,y_i)}\sum(\max{(v)}-\max{(v_i,y_i)})}}}}
    -546    """
    -547    if np.max(v) < np.max(y):
    -548        v, y = y, v
    -549    d1 = np.sqrt(np.sum(np.minimum(v, y) * np.sum(max(v) - np.maximum(v, y))))
    -550    return 1 - (np.sum(np.minimum(v, y)) + d1) / (np.sum(np.maximum(v, y)) + d1)
    +            
    540def baroni_urbani_buser_distance(v, y):
    +541    r"""Baroni-Urbani-Buser distance:
    +542
    +543    Parameters
    +544    ----------
    +545    v : array_like
    +546        Vector 1
    +547    y : array_like
    +548        Vector 2
    +549
    +550    Returns
    +551    -------
    +552    float
    +553        Baroni-Urbani-Buser distance between v and y
    +554
    +555    Notes
    +556    -----
    +557    .. math::
    +558
    +559        1-\frac{\sum\min{(v_i,y_i)}+\sqrt{\sum\min{(v_i,y_i)}\sum(\max{(v)}-\max{(v_i,y_i)})}}{\sum{\max{(v_i,y_i)}+\sqrt{\sum{\min{(v_i,y_i)}\sum(\max{(v)}-\max{(v_i,y_i)})}}}}
    +560    """
    +561    if np.max(v) < np.max(y):
    +562        v, y = y, v
    +563    d1 = np.sqrt(np.sum(np.minimum(v, y) * np.sum(max(v) - np.maximum(v, y))))
    +564    return 1 - (np.sum(np.minimum(v, y)) + d1) / (np.sum(np.maximum(v, y)) + d1)
     
    @@ -3021,29 +3079,29 @@
    Notes
    -
    553def penrose_size_distance(v, y):
    -554    r""" Penrose size distance:
    -555    
    -556    Parameters
    -557    ----------
    -558    v : array_like
    -559        Vector 1
    -560    y : array_like
    -561        Vector 2
    -562    
    -563    Returns
    -564    -------
    -565    float
    -566        Penrose size distance between v and y
    -567
    -568    Notes
    -569    -----
    -570    .. math::
    -571
    -572        \sqrt N\sum{|y_i-v_i|}
    -573    """
    -574    n = np.sum(v > 0)
    -575    return np.sqrt(n) * np.sum(np.abs(y - v))
    +            
    567def penrose_size_distance(v, y):
    +568    r"""Penrose size distance:
    +569
    +570    Parameters
    +571    ----------
    +572    v : array_like
    +573        Vector 1
    +574    y : array_like
    +575        Vector 2
    +576
    +577    Returns
    +578    -------
    +579    float
    +580        Penrose size distance between v and y
    +581
    +582    Notes
    +583    -----
    +584    .. math::
    +585
    +586        \sqrt N\sum{|y_i-v_i|}
    +587    """
    +588    n = np.sum(v > 0)
    +589    return np.sqrt(n) * np.sum(np.abs(y - v))
     
    @@ -3082,30 +3140,30 @@
    Notes
    -
    578def mean_character_distance(v, y):
    -579    r"""
    -580    Mean character distance:
    -581
    -582    Parameters
    -583    ----------
    -584    v : array_like
    -585        Vector 1
    -586    y : array_like
    -587        Vector 2
    -588    
    -589    Returns
    -590    -------
    -591    float
    -592        Mean character distance between v and y
    -593
    -594    Notes
    -595    -----
    -596    .. math::
    -597
    -598        \frac{1}{N}\sum{|y_i-v_i|}
    -599    """
    -600    n = np.sum(v > 0)
    -601    return 1 / n * np.sum(np.abs(y - v))
    +            
    592def mean_character_distance(v, y):
    +593    r"""
    +594    Mean character distance:
    +595
    +596    Parameters
    +597    ----------
    +598    v : array_like
    +599        Vector 1
    +600    y : array_like
    +601        Vector 2
    +602
    +603    Returns
    +604    -------
    +605    float
    +606        Mean character distance between v and y
    +607
    +608    Notes
    +609    -----
    +610    .. math::
    +611
    +612        \frac{1}{N}\sum{|y_i-v_i|}
    +613    """
    +614    n = np.sum(v > 0)
    +615    return 1 / n * np.sum(np.abs(y - v))
     
    @@ -3144,29 +3202,29 @@
    Notes
    -
    604def lorentzian_distance(v, y):
    -605    r"""
    -606    Lorentzian distance:
    -607
    -608    Parameters
    -609    ----------
    -610    v : array_like
    -611        Vector 1
    -612    y : array_like
    -613        Vector 2
    -614    
    -615    Returns
    -616    -------
    -617    float
    -618        Lorentzian distance between v and y
    -619
    -620    Notes
    -621    -----
    -622    .. math::
    -623
    -624        \sum{\ln(1+|v_i-y_i|)}
    -625    """
    -626    return np.sum(np.log(1 + np.abs(y - v)))
    +            
    618def lorentzian_distance(v, y):
    +619    r"""
    +620    Lorentzian distance:
    +621
    +622    Parameters
    +623    ----------
    +624    v : array_like
    +625        Vector 1
    +626    y : array_like
    +627        Vector 2
    +628
    +629    Returns
    +630    -------
    +631    float
    +632        Lorentzian distance between v and y
    +633
    +634    Notes
    +635    -----
    +636    .. math::
    +637
    +638        \sum{\ln(1+|v_i-y_i|)}
    +639    """
    +640    return np.sum(np.log(1 + np.abs(y - v)))
     
    @@ -3205,30 +3263,30 @@
    Notes
    -
    629def penrose_shape_distance(v, y):
    -630    r"""
    -631    Penrose shape distance:
    -632
    -633    Parameters
    -634    ----------
    -635    v : array_like
    -636        Vector 1
    -637    y : array_like
    -638        Vector 2
    -639    
    -640    Returns
    -641    -------
    -642    float
    -643        Penrose shape distance between v and y
    -644    Notes
    -645    -----
    -646    .. math::
    -647
    -648        \sqrt{\sum((v_i-\bar{v})-(y_i-\bar{y}))^2}
    -649    """
    -650    v_avg = np.mean(v)
    -651    y_avg = np.mean(y)
    -652    return np.sqrt(np.sum(np.power((y - y_avg) - (v - v_avg), 2)))
    +            
    643def penrose_shape_distance(v, y):
    +644    r"""
    +645    Penrose shape distance:
    +646
    +647    Parameters
    +648    ----------
    +649    v : array_like
    +650        Vector 1
    +651    y : array_like
    +652        Vector 2
    +653
    +654    Returns
    +655    -------
    +656    float
    +657        Penrose shape distance between v and y
    +658    Notes
    +659    -----
    +660    .. math::
    +661
    +662        \sqrt{\sum((v_i-\bar{v})-(y_i-\bar{y}))^2}
    +663    """
    +664    v_avg = np.mean(v)
    +665    y_avg = np.mean(y)
    +666    return np.sqrt(np.sum(np.power((y - y_avg) - (v - v_avg), 2)))
     
    @@ -3267,32 +3325,32 @@
    Notes
    -
    655def clark_distance(v, y):
    -656    r"""
    -657    Clark distance:
    -658
    -659    Parameters
    -660    ----------
    -661    v : array_like
    -662        Vector 1
    -663    y : array_like
    -664        Vector 2
    -665    
    -666    Returns
    -667    -------
    -668    float
    -669        Clark distance between v and y
    -670
    -671    Notes
    -672    -----
    -673    .. math::
    -674
    -675        #(\frac{1}{N}\sum(\frac{v_i-y_i}{|v_i|+|y_i|})^2)^\frac{1}{2}
    -676        \sqrt{\sum(\frac{|v_i-y_i|}{v_i+y_i})^2}
    -677    """
    -678    #n = np.sum(v > 0)
    -679    #return np.sqrt(1 / n * np.sum(np.power((v - y) / (np.abs(v) + np.abs(y)), 2)))
    -680    return np.sqrt(np.sum(np.power(np.abs(y - v) / (y + v), 2)))
    +            
    669def clark_distance(v, y):
    +670    r"""
    +671    Clark distance:
    +672
    +673    Parameters
    +674    ----------
    +675    v : array_like
    +676        Vector 1
    +677    y : array_like
    +678        Vector 2
    +679
    +680    Returns
    +681    -------
    +682    float
    +683        Clark distance between v and y
    +684
    +685    Notes
    +686    -----
    +687    .. math::
    +688
    +689        #(\frac{1}{N}\sum(\frac{v_i-y_i}{|v_i|+|y_i|})^2)^\frac{1}{2}
    +690        \sqrt{\sum(\frac{|v_i-y_i|}{v_i+y_i})^2}
    +691    """
    +692    # n = np.sum(v > 0)
    +693    # return np.sqrt(1 / n * np.sum(np.power((v - y) / (np.abs(v) + np.abs(y)), 2)))
    +694    return np.sqrt(np.sum(np.power(np.abs(y - v) / (y + v), 2)))
     
    @@ -3332,33 +3390,33 @@
    Notes
    -
    683def hellinger_distance(v, y):
    -684    r"""
    -685    Hellinger distance:
    -686
    -687    Parameters
    -688    ----------
    -689    v : array_like
    -690        Vector 1
    -691    y : array_like
    -692        Vector 2
    -693    
    -694    Returns
    -695    -------
    -696    float
    -697        Hellinger distance between v and y
    -698
    -699    Notes
    -700    -----
    -701    .. math::
    -702
    -703        #\sqrt{2\sum(\sqrt{\frac{v_i}{\bar{v}}}-\sqrt{\frac{y_i}{\bar{y}}})^2}
    -704        \sqrt{2\sum(\sqrt{v_i}-\sqrt{y_i})^2}
    -705    """
    -706    #v_avg = np.mean(v)
    -707    #y_avg = np.mean(y)
    -708    #return np.sqrt(2 * np.sum(np.power(np.sqrt(v / v_avg) - np.sqrt(y / y_avg), 2)))
    -709    return np.sqrt(2 * np.sum(np.power(np.sqrt(y) - np.sqrt(v), 2)))
    +            
    697def hellinger_distance(v, y):
    +698    r"""
    +699    Hellinger distance:
    +700
    +701    Parameters
    +702    ----------
    +703    v : array_like
    +704        Vector 1
    +705    y : array_like
    +706        Vector 2
    +707
    +708    Returns
    +709    -------
    +710    float
    +711        Hellinger distance between v and y
    +712
    +713    Notes
    +714    -----
    +715    .. math::
    +716
    +717        #\sqrt{2\sum(\sqrt{\frac{v_i}{\bar{v}}}-\sqrt{\frac{y_i}{\bar{y}}})^2}
    +718        \sqrt{2\sum(\sqrt{v_i}-\sqrt{y_i})^2}
    +719    """
    +720    # v_avg = np.mean(v)
    +721    # y_avg = np.mean(y)
    +722    # return np.sqrt(2 * np.sum(np.power(np.sqrt(v / v_avg) - np.sqrt(y / y_avg), 2)))
    +723    return np.sqrt(2 * np.sum(np.power(np.sqrt(y) - np.sqrt(v), 2)))
     
    @@ -3398,31 +3456,31 @@
    Notes
    -
    712def whittaker_index_of_association_distance(v, y):
    -713    r"""
    -714    Whittaker index of association distance:
    -715
    -716    Parameters
    -717    ----------
    -718    v : array_like
    -719        Vector 1
    -720    y : array_like
    -721        Vector 2
    -722    
    -723    Returns
    -724    -------
    -725    float
    -726        Whittaker index of association distance between v and y
    -727
    -728    Notes
    -729    -----
    -730    .. math::
    -731
    -732        \frac{1}{2}\sum|\frac{v_i}{\bar{v}}-\frac{y_i}{\bar{y}}|
    -733    """
    -734    v_avg = np.mean(v)
    -735    y_avg = np.mean(y)
    -736    return 1 / 2 * np.sum(np.abs(v / v_avg - y / y_avg))
    +            
    726def whittaker_index_of_association_distance(v, y):
    +727    r"""
    +728    Whittaker index of association distance:
    +729
    +730    Parameters
    +731    ----------
    +732    v : array_like
    +733        Vector 1
    +734    y : array_like
    +735        Vector 2
    +736
    +737    Returns
    +738    -------
    +739    float
    +740        Whittaker index of association distance between v and y
    +741
    +742    Notes
    +743    -----
    +744    .. math::
    +745
    +746        \frac{1}{2}\sum|\frac{v_i}{\bar{v}}-\frac{y_i}{\bar{y}}|
    +747    """
    +748    v_avg = np.mean(v)
    +749    y_avg = np.mean(y)
    +750    return 1 / 2 * np.sum(np.abs(v / v_avg - y / y_avg))
     
    @@ -3461,30 +3519,30 @@
    Notes
    -
    754def similarity_index_distance(v, y):
    -755    r"""
    -756    Similarity Index Distance:
    -757
    -758    Parameters
    -759    ----------
    -760    v : array_like
    -761        Vector 1
    -762    y : array_like
    -763        Vector 2
    -764    
    -765    Returns
    -766    -------
    -767    float
    -768        Similarity Index Distance between v and y
    -769
    -770    Notes
    -771    -----
    -772    .. math::
    -773
    -774        \sqrt{\frac{\sum\{\frac{v_i-y_i}{y_i}\}^2}{N}}
    -775    """
    -776    n = np.sum(v > 0)
    -777    return np.sqrt(1 / n * np.sum(np.power((v - y) / y, 2)))
    +            
    769def similarity_index_distance(v, y):
    +770    r"""
    +771    Similarity Index Distance:
    +772
    +773    Parameters
    +774    ----------
    +775    v : array_like
    +776        Vector 1
    +777    y : array_like
    +778        Vector 2
    +779
    +780    Returns
    +781    -------
    +782    float
    +783        Similarity Index Distance between v and y
    +784
    +785    Notes
    +786    -----
    +787    .. math::
    +788
    +789        \sqrt{\frac{\sum\{\frac{v_i-y_i}{y_i}\}^2}{N}}
    +790    """
    +791    n = np.sum(v > 0)
    +792    return np.sqrt(1 / n * np.sum(np.power((v - y) / y, 2)))
     
    @@ -3523,30 +3581,30 @@
    Notes
    -
    780def improved_similarity_distance(v, y):
    -781    r"""
    -782    Improved Similarity Index:
    -783
    -784    Parameters
    -785    ----------
    -786    v : array_like
    -787        Vector 1
    -788    y : array_like
    -789        Vector 2
    -790    
    -791    Returns
    -792    -------
    -793    float
    -794        Improved Similarity Index between v and y
    -795
    -796    Notes
    -797    -----
    -798    .. math::
    -799
    -800        \sqrt{\frac{1}{N}\sum\{\frac{y_i-v_i}{y_i+v_i}\}^2}
    -801    """
    -802    n = np.sum(v > 0)
    -803    return np.sqrt(1 / n * np.sum(np.power((y - v) / (y + v), 2)))
    +            
    795def improved_similarity_distance(v, y):
    +796    r"""
    +797    Improved Similarity Index:
    +798
    +799    Parameters
    +800    ----------
    +801    v : array_like
    +802        Vector 1
    +803    y : array_like
    +804        Vector 2
    +805
    +806    Returns
    +807    -------
    +808    float
    +809        Improved Similarity Index between v and y
    +810
    +811    Notes
    +812    -----
    +813    .. math::
    +814
    +815        \sqrt{\frac{1}{N}\sum\{\frac{y_i-v_i}{y_i+v_i}\}^2}
    +816    """
    +817    n = np.sum(v > 0)
    +818    return np.sqrt(1 / n * np.sum(np.power((y - v) / (y + v), 2)))
     
    @@ -3585,31 +3643,31 @@
    Notes
    -
    806def absolute_value_distance(v, y):
    -807    r"""
    -808    Absolute Value Distance:
    -809
    -810    Parameters
    -811    ----------
    -812    v : array_like
    -813        Vector 1
    -814    y : array_like
    -815        Vector 2
    -816    
    -817    Returns
    -818    -------
    -819    float
    -820        Absolute Value Distance between v and y
    -821
    -822    Notes
    -823    -----
    -824    .. math::
    -825
    -826        \frac { \sum(|y_i-v_i|)}{\sum v_i}
    -827
    -828    """
    -829    dist = np.sum(np.abs(y - v)) / np.sum(v)
    -830    return dist
    +            
    821def absolute_value_distance(v, y):
    +822    r"""
    +823    Absolute Value Distance:
    +824
    +825    Parameters
    +826    ----------
    +827    v : array_like
    +828        Vector 1
    +829    y : array_like
    +830        Vector 2
    +831
    +832    Returns
    +833    -------
    +834    float
    +835        Absolute Value Distance between v and y
    +836
    +837    Notes
    +838    -----
    +839    .. math::
    +840
    +841        \frac { \sum(|y_i-v_i|)}{\sum v_i}
    +842
    +843    """
    +844    dist = np.sum(np.abs(y - v)) / np.sum(v)
    +845    return dist
     
    @@ -3648,33 +3706,35 @@
    Notes
    -
    832def spectral_contrast_angle_distance(v, y):
    -833    r"""
    -834    Spectral Contrast Angle:
    -835
    -836    Parameters
    -837    ----------
    -838    v : array_like
    -839        Vector 1
    -840    y : array_like
    -841        Vector 2
    -842    
    -843    Returns
    -844    -------
    -845    float
    -846        Spectral Contrast Angle between v and y
    -847
    -848    Notes
    -849    -----
    -850    .. math::
    +            
    848def spectral_contrast_angle_distance(v, y):
    +849    r"""
    +850    Spectral Contrast Angle:
     851
    -852        1 - \frac{\sum{y_iv_i}}{\sqrt{\sum y_i^2\sum v_i^2}}
    -853        \arccos(\frac{\sum_{P}y_{p}^* v_{p}^*}{\sqrt{\sum_{P}y_{p}^{*2} \sum_{P}v_{p}^{*2}}})
    -854    """
    -855    #return 1 - np.sum(y * v) / \
    -856    #       np.sqrt(np.sum(np.power(y, 2)) * np.sum(np.power(v, 2)))
    -857
    -858    return np.arccos(np.sum(y * v) / (np.sqrt(np.sum(np.power(y, 2)) * np.sum(np.power(v, 2)))))
    +852    Parameters
    +853    ----------
    +854    v : array_like
    +855        Vector 1
    +856    y : array_like
    +857        Vector 2
    +858
    +859    Returns
    +860    -------
    +861    float
    +862        Spectral Contrast Angle between v and y
    +863
    +864    Notes
    +865    -----
    +866    .. math::
    +867
    +868        1 - \frac{\sum{y_iv_i}}{\sqrt{\sum y_i^2\sum v_i^2}}
    +869        \arccos(\frac{\sum_{P}y_{p}^* v_{p}^*}{\sqrt{\sum_{P}y_{p}^{*2} \sum_{P}v_{p}^{*2}}})
    +870    """
    +871    # return 1 - np.sum(y * v) / \
    +872    #       np.sqrt(np.sum(np.power(y, 2)) * np.sum(np.power(v, 2)))
    +873
    +874    return np.arccos(
    +875        np.sum(y * v) / (np.sqrt(np.sum(np.power(y, 2)) * np.sum(np.power(v, 2))))
    +876    )
     
    @@ -3714,29 +3774,29 @@
    Notes
    -
    861def wave_hedges_distance(v, y):
    -862    r"""
    -863    Wave Hedges distance:
    -864
    -865    Parameters
    -866    ----------
    -867    v : array_like
    -868        Vector 1
    -869    y : array_like
    -870        Vector 2
    -871    
    -872    Returns
    -873    -------
    -874    float
    -875        Wave Hedges distance between v and y
    -876
    -877    Notes
    -878    -----
    -879    .. math::
    -880
    -881        \sum\frac{|v_i-y_i|}{\max{(v_i,y_i)}}
    -882    """
    -883    return np.sum(np.abs(v - y) / np.maximum(v, y))
    +            
    879def wave_hedges_distance(v, y):
    +880    r"""
    +881    Wave Hedges distance:
    +882
    +883    Parameters
    +884    ----------
    +885    v : array_like
    +886        Vector 1
    +887    y : array_like
    +888        Vector 2
    +889
    +890    Returns
    +891    -------
    +892    float
    +893        Wave Hedges distance between v and y
    +894
    +895    Notes
    +896    -----
    +897    .. math::
    +898
    +899        \sum\frac{|v_i-y_i|}{\max{(v_i,y_i)}}
    +900    """
    +901    return np.sum(np.abs(v - y) / np.maximum(v, y))
     
    @@ -3775,30 +3835,30 @@
    Notes
    -
    885def dice_similarity(v, y):
    -886    r"""
    -887    Dice similarity:
    -888
    -889    Parameters
    -890    ----------
    -891    v : array_like
    -892        Vector 1
    -893    y : array_like
    -894        Vector 2
    -895    
    -896    Returns
    -897    -------
    -898    float
    -899        Dice similarity between v and y
    -900
    -901    Notes
    -902    -----
    -903    .. math::
    -904
    -905        \frac{\sum(v_i-y_i)^2}{\sum v_i^2+\sum y_i^2}
    -906        \frac{2 * \sum_{i}v_{i}y_{i}}{\sum_{i}y_{i}^2 + \sum_{i}v_{i}^2}
    -907    """
    -908    return 2 * np.sum(v * y) / (np.sum(np.power(v, 2)) + np.sum(np.power(y, 2)))
    +            
    904def dice_similarity(v, y):
    +905    r"""
    +906    Dice similarity:
    +907
    +908    Parameters
    +909    ----------
    +910    v : array_like
    +911        Vector 1
    +912    y : array_like
    +913        Vector 2
    +914
    +915    Returns
    +916    -------
    +917    float
    +918        Dice similarity between v and y
    +919
    +920    Notes
    +921    -----
    +922    .. math::
    +923
    +924        \frac{\sum(v_i-y_i)^2}{\sum v_i^2+\sum y_i^2}
    +925        \frac{2 * \sum_{i}v_{i}y_{i}}{\sum_{i}y_{i}^2 + \sum_{i}v_{i}^2}
    +926    """
    +927    return 2 * np.sum(v * y) / (np.sum(np.power(v, 2)) + np.sum(np.power(y, 2)))
     
    @@ -3838,29 +3898,29 @@
    Notes
    -
    911def inner_product_distance(v, y):
    -912    r"""
    -913    Inner Product distance:
    -914
    -915    Parameters
    -916    ----------
    -917    v : array_like
    -918        Vector 1
    -919    y : array_like
    -920        Vector 2
    -921    
    -922    Returns
    -923    -------
    -924    float
    -925        Inner product distance between v and y
    -926
    -927    Notes
    -928    -----
    -929    .. math::
    -930
    -931        1-\sum{v_iy_i}
    -932    """
    -933    return 1 - np.sum(v * y)
    +            
    930def inner_product_distance(v, y):
    +931    r"""
    +932    Inner Product distance:
    +933
    +934    Parameters
    +935    ----------
    +936    v : array_like
    +937        Vector 1
    +938    y : array_like
    +939        Vector 2
    +940
    +941    Returns
    +942    -------
    +943    float
    +944        Inner product distance between v and y
    +945
    +946    Notes
    +947    -----
    +948    .. math::
    +949
    +950        1-\sum{v_iy_i}
    +951    """
    +952    return 1 - np.sum(v * y)
     
    @@ -3899,29 +3959,29 @@
    Notes
    -
    936def divergence_distance(v, y):
    -937    r"""
    -938    Divergence distance:
    -939
    -940    Parameters
    -941    ----------
    -942    v : array_like
    -943        Vector 1
    -944    y : array_like
    -945        Vector 2
    -946    
    -947    Returns
    -948    -------
    -949    float
    -950        Divergence distance between v and y
    -951
    -952    Notes
    -953    -----
    -954    .. math::
    -955
    -956        2\sum\frac{(v_i-y_i)^2}{(v_i+y_i)^2}
    -957    """
    -958    return 2 * np.sum((np.power(v - y, 2)) / np.power(v + y, 2))
    +            
    955def divergence_distance(v, y):
    +956    r"""
    +957    Divergence distance:
    +958
    +959    Parameters
    +960    ----------
    +961    v : array_like
    +962        Vector 1
    +963    y : array_like
    +964        Vector 2
    +965
    +966    Returns
    +967    -------
    +968    float
    +969        Divergence distance between v and y
    +970
    +971    Notes
    +972    -----
    +973    .. math::
    +974
    +975        2\sum\frac{(v_i-y_i)^2}{(v_i+y_i)^2}
    +976    """
    +977    return 2 * np.sum((np.power(v - y, 2)) / np.power(v + y, 2))
     
    @@ -3960,33 +4020,30 @@
    Notes
    -
     987def jensen_difference_distance(v, y):
    - 988    r"""
    - 989    Jensen difference:
    - 990    
    - 991    Parameters
    - 992    ----------
    - 993    v : array_like
    - 994        Vector 1
    - 995    y : array_like
    - 996        Vector 2
    - 997    
    - 998    Returns
    - 999    -------
    -1000    float
    -1001        Jensen difference distance between v and y
    -1002
    -1003    Notes
    -1004    -----
    -1005    .. math::
    -1006
    -1007        \sum[\frac{1}{2}(v_i\ln{v_i}+y_i\ln{y_i})-(\frac{v_i+y_i}{2})\ln{(\frac{v_i+y_i}{2})}]
    -1008    """
    -1009    y_v_avg = (y + v) / 2
    -1010    return np.sum(
    -1011        1 / 2 * (y * np.log(y) + v * np.log(v)) -
    -1012        y_v_avg * np.log(y_v_avg)
    -1013    )
    +            
    1006def jensen_difference_distance(v, y):
    +1007    r"""
    +1008    Jensen difference:
    +1009
    +1010    Parameters
    +1011    ----------
    +1012    v : array_like
    +1013        Vector 1
    +1014    y : array_like
    +1015        Vector 2
    +1016
    +1017    Returns
    +1018    -------
    +1019    float
    +1020        Jensen difference distance between v and y
    +1021
    +1022    Notes
    +1023    -----
    +1024    .. math::
    +1025
    +1026        \sum[\frac{1}{2}(v_i\ln{v_i}+y_i\ln{y_i})-(\frac{v_i+y_i}{2})\ln{(\frac{v_i+y_i}{2})}]
    +1027    """
    +1028    y_v_avg = (y + v) / 2
    +1029    return np.sum(1 / 2 * (y * np.log(y) + v * np.log(v)) - y_v_avg * np.log(y_v_avg))
     
    @@ -4025,32 +4082,31 @@
    Notes
    -
    1016def kumar_johnson_distance(v, y):
    -1017    r"""
    -1018    Kumar-Johnson distance:
    -1019
    -1020    Parameters
    -1021    ----------
    -1022    v : array_like
    -1023        Vector 1
    -1024    y : array_like
    -1025        Vector 2
    -1026    
    -1027    Returns
    -1028    -------
    -1029    float
    -1030        Kumar Johnson distance between v and y
    -1031
    -1032    Notes
    -1033    -----
    -1034    .. math::
    +            
    1032def kumar_johnson_distance(v, y):
    +1033    r"""
    +1034    Kumar-Johnson distance:
     1035
    -1036        \sum\frac{(v_i^2-y_i^2)^2}{2(v_iy_i)^\frac{3}{2}}
    -1037    """
    -1038    return np.sum(
    -1039        np.power(np.power(v, 2) - np.power(y, 2), 2) / \
    -1040        (2 * np.power(v * y, 3 / 2))
    -1041    )
    +1036    Parameters
    +1037    ----------
    +1038    v : array_like
    +1039        Vector 1
    +1040    y : array_like
    +1041        Vector 2
    +1042
    +1043    Returns
    +1044    -------
    +1045    float
    +1046        Kumar Johnson distance between v and y
    +1047
    +1048    Notes
    +1049    -----
    +1050    .. math::
    +1051
    +1052        \sum\frac{(v_i^2-y_i^2)^2}{2(v_iy_i)^\frac{3}{2}}
    +1053    """
    +1054    return np.sum(
    +1055        np.power(np.power(v, 2) - np.power(y, 2), 2) / (2 * np.power(v * y, 3 / 2))
    +1056    )
     
    @@ -4089,29 +4145,29 @@
    Notes
    -
    1044def avg_l_distance(v, y):
    -1045    r"""
    -1046    Avg (L1, L∞) distance:
    -1047
    -1048    Parameters
    -1049    ----------
    -1050    v : array_like
    -1051        Vector 1
    -1052    y : array_like
    -1053        Vector 2
    -1054    
    -1055    Returns
    -1056    -------
    -1057    float
    -1058        Average L distance between v and y
    -1059
    -1060    Notes
    -1061    -----
    -1062    .. math::
    -1063
    -1064        \frac{1}{2}(\sum|v_i-y_i|+\underset{i}{\max}{|v_i-y_i|})
    -1065    """
    -1066    return 1 / 2 * (np.sum(np.abs(v - y)) + max(np.abs(v - y)))
    +            
    1059def avg_l_distance(v, y):
    +1060    r"""
    +1061    Avg (L1, L∞) distance:
    +1062
    +1063    Parameters
    +1064    ----------
    +1065    v : array_like
    +1066        Vector 1
    +1067    y : array_like
    +1068        Vector 2
    +1069
    +1070    Returns
    +1071    -------
    +1072    float
    +1073        Average L distance between v and y
    +1074
    +1075    Notes
    +1076    -----
    +1077    .. math::
    +1078
    +1079        \frac{1}{2}(\sum|v_i-y_i|+\underset{i}{\max}{|v_i-y_i|})
    +1080    """
    +1081    return 1 / 2 * (np.sum(np.abs(v - y)) + max(np.abs(v - y)))
     
    @@ -4150,29 +4206,29 @@
    Notes
    -
    1069def vicis_wave_hadges_distance(v, y):
    -1070    r"""
    -1071    Vicis-Wave Hadges distance:
    -1072
    -1073    Parameters
    -1074    ----------
    -1075    v : array_like
    -1076        Vector 1
    -1077    y : array_like
    -1078        Vector 2
    -1079    
    -1080    Returns
    -1081    -------
    -1082    float
    -1083        Vicis Wave Hadges distance between v and y
    -1084
    -1085    Notes
    -1086    -----
    -1087    .. math::
    -1088
    -1089        \sum\frac{|v_i-y_i|}{\min{(v_i,\ y_i)}}
    -1090    """
    -1091    return np.sum(np.abs(v - y) / np.minimum(v, y))
    +            
    1084def vicis_wave_hadges_distance(v, y):
    +1085    r"""
    +1086    Vicis-Wave Hadges distance:
    +1087
    +1088    Parameters
    +1089    ----------
    +1090    v : array_like
    +1091        Vector 1
    +1092    y : array_like
    +1093        Vector 2
    +1094
    +1095    Returns
    +1096    -------
    +1097    float
    +1098        Vicis Wave Hadges distance between v and y
    +1099
    +1100    Notes
    +1101    -----
    +1102    .. math::
    +1103
    +1104        \sum\frac{|v_i-y_i|}{\min{(v_i,\ y_i)}}
    +1105    """
    +1106    return np.sum(np.abs(v - y) / np.minimum(v, y))
     
    @@ -4211,29 +4267,29 @@
    Notes
    -
    1094def vicis_symmetric_chi_squared_1_distance(v, y):
    -1095    r"""
    -1096    Vicis-Symmetric χ2 1 distance:
    -1097
    -1098    Parameters
    -1099    ----------
    -1100    v : array_like
    -1101        Vector 1
    -1102    y : array_like
    -1103        Vector 2
    -1104    
    -1105    Returns
    -1106    -------
    -1107    float
    -1108        Vici Symmetric χ2 1 distance between v and y
    -1109
    -1110    Notes
    -1111    -----
    -1112    .. math::
    -1113
    -1114        \sum\frac{(v_i-y_i)^2}{\min{(v_i,y_i)^2}}
    -1115    """
    -1116    return np.sum(np.power(v - y, 2) / np.power(np.minimum(v, y), 2))
    +            
    1109def vicis_symmetric_chi_squared_1_distance(v, y):
    +1110    r"""
    +1111    Vicis-Symmetric χ2 1 distance:
    +1112
    +1113    Parameters
    +1114    ----------
    +1115    v : array_like
    +1116        Vector 1
    +1117    y : array_like
    +1118        Vector 2
    +1119
    +1120    Returns
    +1121    -------
    +1122    float
    +1123        Vici Symmetric χ2 1 distance between v and y
    +1124
    +1125    Notes
    +1126    -----
    +1127    .. math::
    +1128
    +1129        \sum\frac{(v_i-y_i)^2}{\min{(v_i,y_i)^2}}
    +1130    """
    +1131    return np.sum(np.power(v - y, 2) / np.power(np.minimum(v, y), 2))
     
    @@ -4272,30 +4328,30 @@
    Notes
    -
    1119def vicis_symmetric_chi_squared_2_distance(v, y):
    -1120    r"""
    -1121    Vicis-Symmetric χ2 2 distance:
    -1122    
    -1123    Parameters
    -1124    ----------
    -1125    v : array_like
    -1126        Vector 1
    -1127    y : array_like
    -1128        Vector 2
    -1129    
    -1130    Returns
    -1131    -------
    -1132    float
    -1133        Vicis Symmetric χ2 2 distance between v and y
    -1134
    -1135    Notes
    -1136    -----
    +            
    1134def vicis_symmetric_chi_squared_2_distance(v, y):
    +1135    r"""
    +1136    Vicis-Symmetric χ2 2 distance:
     1137
    -1138    .. math::
    -1139
    -1140        \sum\frac{(v_i-y_i)^2}{\min{(v_i,y_i)}}
    -1141    """
    -1142    return np.sum(np.power(v - y, 2) / np.minimum(v, y))
    +1138    Parameters
    +1139    ----------
    +1140    v : array_like
    +1141        Vector 1
    +1142    y : array_like
    +1143        Vector 2
    +1144
    +1145    Returns
    +1146    -------
    +1147    float
    +1148        Vicis Symmetric χ2 2 distance between v and y
    +1149
    +1150    Notes
    +1151    -----
    +1152
    +1153    .. math::
    +1154
    +1155        \sum\frac{(v_i-y_i)^2}{\min{(v_i,y_i)}}
    +1156    """
    +1157    return np.sum(np.power(v - y, 2) / np.minimum(v, y))
     
    @@ -4334,30 +4390,30 @@
    Notes
    -
    1145def vicis_symmetric_chi_squared_3_distance(v, y):
    -1146    r"""
    -1147    Vicis-Symmetric χ2 3 distance:
    -1148    
    -1149    Parameters
    -1150    ----------
    -1151    v : array_like
    -1152        Vector 1
    -1153    y : array_like
    -1154        Vector 2
    -1155    
    -1156    Returns
    -1157    -------
    -1158    float
    -1159        Vici Symmetric χ2 3 distance between v and y
    -1160
    -1161    Notes
    -1162    -----
    +            
    1160def vicis_symmetric_chi_squared_3_distance(v, y):
    +1161    r"""
    +1162    Vicis-Symmetric χ2 3 distance:
     1163
    -1164    .. math::
    -1165
    -1166        \sum\frac{(v_i-y_i)^2}{\max{(v_i,y_i)}}
    -1167    """
    -1168    return np.sum(np.power(v - y, 2) / np.maximum(v, y))
    +1164    Parameters
    +1165    ----------
    +1166    v : array_like
    +1167        Vector 1
    +1168    y : array_like
    +1169        Vector 2
    +1170
    +1171    Returns
    +1172    -------
    +1173    float
    +1174        Vici Symmetric χ2 3 distance between v and y
    +1175
    +1176    Notes
    +1177    -----
    +1178
    +1179    .. math::
    +1180
    +1181        \sum\frac{(v_i-y_i)^2}{\max{(v_i,y_i)}}
    +1182    """
    +1183    return np.sum(np.power(v - y, 2) / np.maximum(v, y))
     
    @@ -4396,29 +4452,29 @@
    Notes
    -
    1171def max_symmetric_chi_squared_distance(v, y):
    -1172    r"""
    -1173    Max-Symmetric χ2 distance:
    -1174
    -1175    Parameters
    -1176    ----------
    -1177    v : array_like
    -1178        Vector 1
    -1179    y : array_like
    -1180        Vector 2
    -1181    
    -1182    Returns
    -1183    -------
    -1184    float
    -1185        Max-Symmetric χ2 distance between v and y
    -1186
    -1187    Notes
    -1188    -----
    -1189    .. math::
    -1190
    -1191        \max{(\sum\frac{(v_i-y_i)^2}{v_i},\sum\frac{(v_i-y_i)^2}{y_i})}
    -1192    """
    -1193    return max(np.sum(np.power(v - y, 2) / v), np.sum(np.power(v - y, 2) / y))
    +            
    1186def max_symmetric_chi_squared_distance(v, y):
    +1187    r"""
    +1188    Max-Symmetric χ2 distance:
    +1189
    +1190    Parameters
    +1191    ----------
    +1192    v : array_like
    +1193        Vector 1
    +1194    y : array_like
    +1195        Vector 2
    +1196
    +1197    Returns
    +1198    -------
    +1199    float
    +1200        Max-Symmetric χ2 distance between v and y
    +1201
    +1202    Notes
    +1203    -----
    +1204    .. math::
    +1205
    +1206        \max{(\sum\frac{(v_i-y_i)^2}{v_i},\sum\frac{(v_i-y_i)^2}{y_i})}
    +1207    """
    +1208    return max(np.sum(np.power(v - y, 2) / v), np.sum(np.power(v - y, 2) / y))
     
    @@ -4457,29 +4513,29 @@
    Notes
    -
    1196def min_symmetric_chi_squared_distance(v, y):
    -1197    r"""
    -1198    Min-Symmetric χ2 distance:
    -1199    
    -1200    Parameters
    -1201    ----------
    -1202    v : array_like
    -1203        Vector 1
    -1204    y : array_like
    -1205        Vector 2
    -1206    
    -1207    Returns
    -1208    -------
    -1209    float
    -1210        Min-Symmetric χ2 distance between v and y
    -1211
    -1212    Notes
    -1213    -----
    -1214    .. math::
    -1215
    -1216        \min{(\sum\frac{(v_i-y_i)^2}{v_i},\sum\frac{(v_i-y_i)^2}{y_i})}
    -1217    """
    -1218    return min(np.sum(np.power(v - y, 2) / v), np.sum(np.power(v - y, 2) / y))
    +            
    1211def min_symmetric_chi_squared_distance(v, y):
    +1212    r"""
    +1213    Min-Symmetric χ2 distance:
    +1214
    +1215    Parameters
    +1216    ----------
    +1217    v : array_like
    +1218        Vector 1
    +1219    y : array_like
    +1220        Vector 2
    +1221
    +1222    Returns
    +1223    -------
    +1224    float
    +1225        Min-Symmetric χ2 distance between v and y
    +1226
    +1227    Notes
    +1228    -----
    +1229    .. math::
    +1230
    +1231        \min{(\sum\frac{(v_i-y_i)^2}{v_i},\sum\frac{(v_i-y_i)^2}{y_i})}
    +1232    """
    +1233    return min(np.sum(np.power(v - y, 2) / v), np.sum(np.power(v - y, 2) / y))
     
    @@ -4518,29 +4574,29 @@
    Notes
    -
    1221def additive_sym_chi_sq(v, y):
    -1222    r"""
    -1223    Additive Symmetric χ2 distance:
    -1224    
    -1225    Parameters
    -1226    ----------
    -1227    v : array_like
    -1228        Vector 1
    -1229    y : array_like
    -1230        Vector 2
    -1231    
    -1232    Returns
    -1233    -------
    -1234    float
    -1235        Additive Symmetric χ2 distance between v and y
    -1236
    -1237    Notes
    -1238    -----
    -1239    .. math::
    -1240
    -1241        \sum_{i}\frac{(y_{i} - v_{i})^2(y_{i}+v_{i})}{y_{i}v_{i}}
    -1242    """
    -1243    return np.sum((np.power(y - v, 2) * (y + v))/(y * v))
    +            
    1236def additive_sym_chi_sq(v, y):
    +1237    r"""
    +1238    Additive Symmetric χ2 distance:
    +1239
    +1240    Parameters
    +1241    ----------
    +1242    v : array_like
    +1243        Vector 1
    +1244    y : array_like
    +1245        Vector 2
    +1246
    +1247    Returns
    +1248    -------
    +1249    float
    +1250        Additive Symmetric χ2 distance between v and y
    +1251
    +1252    Notes
    +1253    -----
    +1254    .. math::
    +1255
    +1256        \sum_{i}\frac{(y_{i} - v_{i})^2(y_{i}+v_{i})}{y_{i}v_{i}}
    +1257    """
    +1258    return np.sum((np.power(y - v, 2) * (y + v)) / (y * v))
     
    @@ -4579,29 +4635,29 @@
    Notes
    -
    1245def bhattacharya_distance(v, y):
    -1246    r"""
    -1247    Bhattacharya Distance:
    -1248
    -1249    Parameters
    -1250    ----------
    -1251    v : array_like
    -1252        Vector 1
    -1253    y : array_like
    -1254        Vector 2
    -1255    
    -1256    Returns
    -1257    -------
    -1258    float
    -1259        Bhattcharya distance between v and y
    -1260
    -1261    Notes
    -1262    -----
    -1263    .. math::
    +            
    1261def bhattacharya_distance(v, y):
    +1262    r"""
    +1263    Bhattacharya Distance:
     1264
    -1265        -ln(\sum_{i}\sqrt{y_{i}v_{i}})
    -1266    """
    -1267    return -1 * np.log(np.sum(np.sqrt(y * v)))
    +1265    Parameters
    +1266    ----------
    +1267    v : array_like
    +1268        Vector 1
    +1269    y : array_like
    +1270        Vector 2
    +1271
    +1272    Returns
    +1273    -------
    +1274    float
    +1275        Bhattcharya distance between v and y
    +1276
    +1277    Notes
    +1278    -----
    +1279    .. math::
    +1280
    +1281        -ln(\sum_{i}\sqrt{y_{i}v_{i}})
    +1282    """
    +1283    return -1 * np.log(np.sum(np.sqrt(y * v)))
     
    @@ -4640,31 +4696,31 @@
    Notes
    -
    1269def generalized_ochiai_index(v, y):
    -1270    r"""
    -1271    Generalized Ochiai Index
    -1272    
    -1273    Parameters
    -1274    ----------
    -1275    v : array_like
    -1276        Vector 1
    -1277    y : array_like
    -1278        Vector 2
    -1279    
    -1280    Returns
    -1281    -------
    -1282    float
    -1283        Generalized Ochiai Index between v and y
    -1284
    -1285    Notes
    -1286    -----
    -1287    .. math::
    -1288
    -1289        1 - \frac{\sum_{i}min(y_{i}, v_{i})}{\sqrt{\sum_{i}y_{i} \sum_{i}v_{i}}}
    -1290    """
    -1291
    -1292    ind = np.sum(np.minimum(y, v)) / np.sqrt(np.sum(y) * np.sum(v))
    -1293    return 1 - ind 
    +            
    1286def generalized_ochiai_index(v, y):
    +1287    r"""
    +1288    Generalized Ochiai Index
    +1289
    +1290    Parameters
    +1291    ----------
    +1292    v : array_like
    +1293        Vector 1
    +1294    y : array_like
    +1295        Vector 2
    +1296
    +1297    Returns
    +1298    -------
    +1299    float
    +1300        Generalized Ochiai Index between v and y
    +1301
    +1302    Notes
    +1303    -----
    +1304    .. math::
    +1305
    +1306        1 - \frac{\sum_{i}min(y_{i}, v_{i})}{\sqrt{\sum_{i}y_{i} \sum_{i}v_{i}}}
    +1307    """
    +1308
    +1309    ind = np.sum(np.minimum(y, v)) / np.sqrt(np.sum(y) * np.sum(v))
    +1310    return 1 - ind
     
    @@ -4703,32 +4759,32 @@
    Notes
    -
    1295def gower_distance(v, y):
    -1296    r"""
    -1297    Gower Distance
    -1298    
    -1299    Parameters
    -1300    ----------
    -1301    v : array_like
    -1302        Vector 1
    -1303    y : array_like
    -1304        Vector 2
    -1305    
    -1306    Returns
    -1307    -------
    -1308    float
    -1309        Gower distance between v and y
    -1310
    -1311    Notes
    -1312    -----
    -1313
    -1314    .. math::
    -1315
    -1316        \frac{1}{N}\sum_{i}|y_{i} - v_{i}|
    -1317    """
    -1318
    -1319    n = np.sum(y > 0)
    -1320    return (1 / n) * np.sum(np.abs(y - v))
    +            
    1313def gower_distance(v, y):
    +1314    r"""
    +1315    Gower Distance
    +1316
    +1317    Parameters
    +1318    ----------
    +1319    v : array_like
    +1320        Vector 1
    +1321    y : array_like
    +1322        Vector 2
    +1323
    +1324    Returns
    +1325    -------
    +1326    float
    +1327        Gower distance between v and y
    +1328
    +1329    Notes
    +1330    -----
    +1331
    +1332    .. math::
    +1333
    +1334        \frac{1}{N}\sum_{i}|y_{i} - v_{i}|
    +1335    """
    +1336
    +1337    n = np.sum(y > 0)
    +1338    return (1 / n) * np.sum(np.abs(y - v))
     
    @@ -4767,30 +4823,30 @@
    Notes
    -
    1322def impr_sqrt_cosine_sim(v, y):
    -1323    r"""
    -1324    Improved Square Root Cosine Similarity
    -1325    
    -1326    Parameters
    -1327    ----------
    -1328    v : array_like
    -1329        Vector 1
    -1330    y : array_like
    -1331        Vector 2
    -1332    
    -1333    Returns
    -1334    -------
    -1335    float
    -1336        Improved Square Root Cosine Similarity between v and y
    -1337
    -1338    Notes
    -1339    -----
    -1340    .. math::
    -1341
    -1342        \frac{\sum_{i}\sqrt{y_{i}v_{i}}}{\sum_{i}\sqrt{y_{i}}\sum_{i}\sqrt{v_{i}}}
    -1343    """
    +            
    1341def impr_sqrt_cosine_sim(v, y):
    +1342    r"""
    +1343    Improved Square Root Cosine Similarity
     1344
    -1345    return np.sum(np.sqrt(y * v)) / (np.sum(np.sqrt(y)) * np.sum(np.sqrt(v)))
    +1345    Parameters
    +1346    ----------
    +1347    v : array_like
    +1348        Vector 1
    +1349    y : array_like
    +1350        Vector 2
    +1351
    +1352    Returns
    +1353    -------
    +1354    float
    +1355        Improved Square Root Cosine Similarity between v and y
    +1356
    +1357    Notes
    +1358    -----
    +1359    .. math::
    +1360
    +1361        \frac{\sum_{i}\sqrt{y_{i}v_{i}}}{\sum_{i}\sqrt{y_{i}}\sum_{i}\sqrt{v_{i}}}
    +1362    """
    +1363
    +1364    return np.sum(np.sqrt(y * v)) / (np.sum(np.sqrt(y)) * np.sum(np.sqrt(v)))
     
    @@ -4829,30 +4885,30 @@
    Notes
    -
    1347def intersection_sim(v, y):
    -1348    r"""
    -1349    Intersection Similarity
    -1350    
    -1351    Parameters
    -1352    ----------
    -1353    v : array_like
    -1354        Vector 1
    -1355    y : array_like
    -1356        Vector 2
    -1357    
    -1358    Returns
    -1359    -------
    -1360    float
    -1361        Intersection Similarity between v and y
    -1362
    -1363    Notes
    -1364    -----
    -1365    .. math::
    -1366
    -1367        \sum_{i}min(y_{i}, v_{i})
    -1368    """
    -1369
    -1370    return np.sum(np.minimum(y, v))
    +            
    1367def intersection_sim(v, y):
    +1368    r"""
    +1369    Intersection Similarity
    +1370
    +1371    Parameters
    +1372    ----------
    +1373    v : array_like
    +1374        Vector 1
    +1375    y : array_like
    +1376        Vector 2
    +1377
    +1378    Returns
    +1379    -------
    +1380    float
    +1381        Intersection Similarity between v and y
    +1382
    +1383    Notes
    +1384    -----
    +1385    .. math::
    +1386
    +1387        \sum_{i}min(y_{i}, v_{i})
    +1388    """
    +1389
    +1390    return np.sum(np.minimum(y, v))
     
    @@ -4891,30 +4947,30 @@
    Notes
    -
    1372def j_divergence(v, y):
    -1373    r"""
    -1374    J Divergence
    -1375    
    -1376    Parameters
    -1377    ----------
    -1378    v : array_like
    -1379        Vector 1
    -1380    y : array_like
    -1381        Vector 2
    -1382    
    -1383    Returns
    -1384    -------
    -1385    float
    -1386        J Divergence between v and y
    -1387
    -1388    Notes
    -1389    -----
    -1390    .. math::
    -1391        
    -1392        \sum_{i}(y_{i} - v_{i}) ln(\frac{y_{i}}{v_{i}})
    -1393    """
    -1394
    -1395    return np.sum((v - y) * np.log(v / y))
    +            
    1393def j_divergence(v, y):
    +1394    r"""
    +1395    J Divergence
    +1396
    +1397    Parameters
    +1398    ----------
    +1399    v : array_like
    +1400        Vector 1
    +1401    y : array_like
    +1402        Vector 2
    +1403
    +1404    Returns
    +1405    -------
    +1406    float
    +1407        J Divergence between v and y
    +1408
    +1409    Notes
    +1410    -----
    +1411    .. math::
    +1412
    +1413        \sum_{i}(y_{i} - v_{i}) ln(\frac{y_{i}}{v_{i}})
    +1414    """
    +1415
    +1416    return np.sum((v - y) * np.log(v / y))
     
    @@ -4953,30 +5009,32 @@
    Notes
    -
    1397def jensen_shannon_index(v, y):
    -1398    r"""
    -1399    Jensen-Shannon Index
    -1400
    -1401    Parameters
    -1402    ----------
    -1403    v : array_like
    -1404        Vector 1
    -1405    y : array_like
    -1406        Vector 2
    -1407    
    -1408    Returns
    -1409    -------
    -1410    float
    -1411        Jensen Shannon Index between v and y
    -1412
    -1413    Notes
    -1414    -----
    -1415    .. math::
    -1416
    -1417        \frac{1}{2}[\sum_{i}y_{i}ln(\frac{2y_{i}}{y_{i} + v_{i}}) + \sum_{i}v_{i}ln(\frac{2v_{i}}{y_{i}+v_{i}})]
    -1418    """
    -1419
    -1420    return (1 / 2) * (np.sum(y * np.log(2 * y / (y + v))) + np.sum(v * np.log(2 * v / (y + v))))
    +            
    1419def jensen_shannon_index(v, y):
    +1420    r"""
    +1421    Jensen-Shannon Index
    +1422
    +1423    Parameters
    +1424    ----------
    +1425    v : array_like
    +1426        Vector 1
    +1427    y : array_like
    +1428        Vector 2
    +1429
    +1430    Returns
    +1431    -------
    +1432    float
    +1433        Jensen Shannon Index between v and y
    +1434
    +1435    Notes
    +1436    -----
    +1437    .. math::
    +1438
    +1439        \frac{1}{2}[\sum_{i}y_{i}ln(\frac{2y_{i}}{y_{i} + v_{i}}) + \sum_{i}v_{i}ln(\frac{2v_{i}}{y_{i}+v_{i}})]
    +1440    """
    +1441
    +1442    return (1 / 2) * (
    +1443        np.sum(y * np.log(2 * y / (y + v))) + np.sum(v * np.log(2 * v / (y + v)))
    +1444    )
     
    @@ -5015,30 +5073,30 @@
    Notes
    -
    1422def k_divergence(v, y):
    -1423    r"""
    -1424    K-Divergence
    -1425
    -1426    Parameters
    -1427    ----------
    -1428    v : array_like
    -1429        Vector 1
    -1430    y : array_like
    -1431        Vector 2
    -1432    
    -1433    Returns
    -1434    -------
    -1435    float
    -1436        K-Divergence between v and y
    -1437
    -1438    Notes
    -1439    -----
    -1440    .. math::
    -1441
    -1442        \sum_{i}y_{i}ln(\frac{2y_{i}}{y_{i} + v_{i}})
    -1443    """
    -1444
    -1445    return np.sum(v * np.log((2 * v) / (y + v)))
    +            
    1447def k_divergence(v, y):
    +1448    r"""
    +1449    K-Divergence
    +1450
    +1451    Parameters
    +1452    ----------
    +1453    v : array_like
    +1454        Vector 1
    +1455    y : array_like
    +1456        Vector 2
    +1457
    +1458    Returns
    +1459    -------
    +1460    float
    +1461        K-Divergence between v and y
    +1462
    +1463    Notes
    +1464    -----
    +1465    .. math::
    +1466
    +1467        \sum_{i}y_{i}ln(\frac{2y_{i}}{y_{i} + v_{i}})
    +1468    """
    +1469
    +1470    return np.sum(v * np.log((2 * v) / (y + v)))
     
    @@ -5077,28 +5135,28 @@
    Notes
    -
    1448def topsoe_distance(v, y):
    -1449    r""" Topsoe distance
    -1450    
    -1451    Parameters
    -1452    ----------
    -1453    v : array_like
    -1454        Vector 1
    -1455    y : array_like
    -1456        Vector 2
    -1457    
    -1458    Returns
    -1459    -------
    -1460    float
    -1461        Topsoe distance between v and y
    -1462        
    -1463    Notes
    -1464    -----
    -1465    """
    -1466    #[Chae] commented out the previous one; please review
    -1467    #v[v==0] = 1 #added by amt
    -1468    #y[y==0] = 1 #added by amt
    -1469    return np.sum((y * np.log((2 * y)/(y + v))) + (v * np.log((2 * v)/(y + v))))
    +            
    1473def topsoe_distance(v, y):
    +1474    r"""Topsoe distance
    +1475
    +1476    Parameters
    +1477    ----------
    +1478    v : array_like
    +1479        Vector 1
    +1480    y : array_like
    +1481        Vector 2
    +1482
    +1483    Returns
    +1484    -------
    +1485    float
    +1486        Topsoe distance between v and y
    +1487
    +1488    Notes
    +1489    -----
    +1490    """
    +1491    # [Chae] commented out the previous one; please review
    +1492    # v[v==0] = 1 #added by amt
    +1493    # y[y==0] = 1 #added by amt
    +1494    return np.sum((y * np.log((2 * y) / (y + v))) + (v * np.log((2 * v) / (y + v))))
     
    @@ -5135,11 +5193,11 @@
    Returns
    -
    1471def probabilistic_symmetric_chi_squared_distance(v, y):
    -1472    r""" Fixed
    -1473    "I commented out the previous one; please review"
    -1474    """
    -1475    return 2 * np.sum(np.sum(np.power(y - v, 2) / (y + v)))
    +            
    1497def probabilistic_symmetric_chi_squared_distance(v, y):
    +1498    r"""Fixed
    +1499    "I commented out the previous one; please review"
    +1500    """
    +1501    return 2 * np.sum(np.sum(np.power(y - v, 2) / (y + v)))
     
    @@ -5160,11 +5218,11 @@
    Returns
    -
    1477def VW6(v, y):
    -1478    r"""
    -1479    "appears to be the same as max_symmetric_chi_squared_distance"
    -1480    """
    -1481    return min(np.sum(np.power(y - v, 2) / y), np.sum(np.power(y - v, 2) / v))
    +            
    1504def VW6(v, y):
    +1505    r"""
    +1506    "appears to be the same as max_symmetric_chi_squared_distance"
    +1507    """
    +1508    return min(np.sum(np.power(y - v, 2) / y), np.sum(np.power(y - v, 2) / v))
     
    @@ -5184,11 +5242,11 @@
    Returns
    -
    1483def VW5(v, y):
    -1484    r"""
    -1485    "appears to be the same as max_symmetric_chi_squared_distance"
    -1486    """
    -1487    return max(np.sum(np.power(y - v, 2) / y), np.sum(np.power(y - v, 2) / v))
    +            
    1511def VW5(v, y):
    +1512    r"""
    +1513    "appears to be the same as max_symmetric_chi_squared_distance"
    +1514    """
    +1515    return max(np.sum(np.power(y - v, 2) / y), np.sum(np.power(y - v, 2) / v))
     
    @@ -5208,11 +5266,11 @@
    Returns
    -
    1489def VW4(v, y):
    -1490    r"""
    -1491    "Tecnically the Symmetric chi2 eq63"
    -1492    """
    -1493    return np.sum(np.power(y - v, 2) / np.maximum(y, v))
    +            
    1518def VW4(v, y):
    +1519    r"""
    +1520    "Tecnically the Symmetric chi2 eq63"
    +1521    """
    +1522    return np.sum(np.power(y - v, 2) / np.maximum(y, v))
     
    @@ -5232,11 +5290,11 @@
    Returns
    -
    1495def VW3(v, y):
    -1496    r"""
    -1497    "New"
    -1498    """
    -1499    return np.sum(np.power(y - v, 2) / np.minimum(y, v))
    +            
    1525def VW3(v, y):
    +1526    r"""
    +1527    "New"
    +1528    """
    +1529    return np.sum(np.power(y - v, 2) / np.minimum(y, v))
     
    @@ -5256,11 +5314,11 @@
    Returns
    -
    1501def VW2(v, y):
    -1502    r"""
    -1503    "New"
    -1504    """
    -1505    return np.sum(np.power(y - v, 2) / np.power(np.minimum(y, v), 2))
    +            
    1532def VW2(v, y):
    +1533    r"""
    +1534    "New"
    +1535    """
    +1536    return np.sum(np.power(y - v, 2) / np.power(np.minimum(y, v), 2))
     
    @@ -5280,11 +5338,11 @@
    Returns
    -
    1507def VW1(v, y):
    -1508    r"""
    -1509    "New"
    -1510    """
    -1511    return np.sum(np.abs(y - v) / np.minimum(y, v))
    +            
    1539def VW1(v, y):
    +1540    r"""
    +1541    "New"
    +1542    """
    +1543    return np.sum(np.abs(y - v) / np.minimum(y, v))
     
    @@ -5304,11 +5362,11 @@
    Returns
    -
    1513def taneja_divergence(v, y):
    -1514    r"""
    -1515    "New"
    -1516    """
    -1517    return np.sum(((y + v) / 2) * np.log((y + v)/(2 * np.sqrt(y * v))))
    +            
    1546def taneja_divergence(v, y):
    +1547    r"""
    +1548    "New"
    +1549    """
    +1550    return np.sum(((y + v) / 2) * np.log((y + v) / (2 * np.sqrt(y * v))))
     
    @@ -5328,11 +5386,11 @@
    Returns
    -
    1519def symmetric_chi_squared_distance (v, y):
    -1520    r"""
    -1521    "New"
    -1522    """
    -1523    return np.sum(np.power(y - v, 2) / (y * v))
    +            
    1553def symmetric_chi_squared_distance(v, y):
    +1554    r"""
    +1555    "New"
    +1556    """
    +1557    return np.sum(np.power(y - v, 2) / (y * v))
     
    @@ -5352,11 +5410,11 @@
    Returns
    -
    1525def squared_chi_squared_distance(v, y):
    -1526    r"""
    -1527    "New"
    -1528    """
    -1529    return np.sum(np.power(y - v, 2) / (y + v))
    +            
    1560def squared_chi_squared_distance(v, y):
    +1561    r"""
    +1562    "New"
    +1563    """
    +1564    return np.sum(np.power(y - v, 2) / (y + v))
     
    @@ -5376,11 +5434,11 @@
    Returns
    -
    1531def square_root_cosine_correlation(v, y):
    -1532    r"""
    -1533    "New"
    -1534    """
    -1535    return np.sum(np.sqrt(y * v)) / (np.sum(y) * np.sum(v))
    +            
    1567def square_root_cosine_correlation(v, y):
    +1568    r"""
    +1569    "New"
    +1570    """
    +1571    return np.sum(np.sqrt(y * v)) / (np.sum(y) * np.sum(v))
     
    @@ -5400,11 +5458,11 @@
    Returns
    -
    1537def sorensen_distance(v, y):
    -1538    r"""
    -1539    "New"
    -1540    """
    -1541    return np.sum(np.abs(y - v)) / (np.sum(y + v))
    +            
    1574def sorensen_distance(v, y):
    +1575    r"""
    +1576    "New"
    +1577    """
    +1578    return np.sum(np.abs(y - v)) / (np.sum(y + v))
     
    @@ -5424,11 +5482,11 @@
    Returns
    -
    1543def Pearson_chi_squared_distance(v, y):
    -1544    r"""
    -1545    "New"
    -1546    """
    -1547    return np.sum(np.power(y - v, 2) / v)
    +            
    1581def Pearson_chi_squared_distance(v, y):
    +1582    r"""
    +1583    "New"
    +1584    """
    +1585    return np.sum(np.power(y - v, 2) / v)
     
    @@ -5448,11 +5506,11 @@
    Returns
    -
    1549def Neyman_chi_squared_distance(v, y):
    -1550    r"""
    -1551    "New"
    -1552    """
    -1553    return np.sum(np.power(y - v, 2) / y)
    +            
    1588def Neyman_chi_squared_distance(v, y):
    +1589    r"""
    +1590    "New"
    +1591    """
    +1592    return np.sum(np.power(y - v, 2) / y)
     
    @@ -5472,11 +5530,11 @@
    Returns
    -
    1555def Minokowski_3(v, y):
    -1556    r"""
    -1557    "New"
    -1558    """
    -1559    return np.power(np.sum(np.power(np.abs(y - v), 3)), 1/3)
    +            
    1595def Minokowski_3(v, y):
    +1596    r"""
    +1597    "New"
    +1598    """
    +1599    return np.power(np.sum(np.power(np.abs(y - v), 3)), 1 / 3)
     
    @@ -5496,11 +5554,11 @@
    Returns
    -
    1561def Minokowski_4(v, y):
    -1562    r"""
    -1563    "New"
    -1564    """
    -1565    return np.power(np.sum(np.power(np.abs(y - v), 4)), 1/4)
    +            
    1602def Minokowski_4(v, y):
    +1603    r"""
    +1604    "New"
    +1605    """
    +1606    return np.power(np.sum(np.power(np.abs(y - v), 4)), 1 / 4)
     
    @@ -5520,11 +5578,13 @@
    Returns
    -
    1567def kumarjohnson_divergence(v, y):
    -1568    r"""
    -1569    "New"
    -1570    """
    -1571    return np.sum(np.power(np.power(y, 2) + np.power(v, 2), 2) / (2* np.power(y * v, 3/2)))
    +            
    1609def kumarjohnson_divergence(v, y):
    +1610    r"""
    +1611    "New"
    +1612    """
    +1613    return np.sum(
    +1614        np.power(np.power(y, 2) + np.power(v, 2), 2) / (2 * np.power(y * v, 3 / 2))
    +1615    )
     
    @@ -5544,11 +5604,13 @@
    Returns
    -
    1573def kumarhassebrook_similarity(v, y):
    -1574    r"""
    -1575    "New"
    -1576    """
    -1577    return np.sum(y * v) / (np.sum(np.power(y, 2)) + np.sum(np.power(v, 2)) - np.sum(y * v))
    +            
    1618def kumarhassebrook_similarity(v, y):
    +1619    r"""
    +1620    "New"
    +1621    """
    +1622    return np.sum(y * v) / (
    +1623        np.sum(np.power(y, 2)) + np.sum(np.power(v, 2)) - np.sum(y * v)
    +1624    )
     
    @@ -5568,11 +5630,11 @@
    Returns
    -
    1579def kullbackleibler_divergence (v, y):
    -1580    r"""
    -1581    "New"
    -1582    """
    -1583    return np.sum(v * np.log(v / y))
    +            
    1627def kullbackleibler_divergence(v, y):
    +1628    r"""
    +1629    "New"
    +1630    """
    +1631    return np.sum(v * np.log(v / y))
     
    @@ -5592,11 +5654,11 @@
    Returns
    -
    1585def soergel_distance(v, y):
    -1586    r"""
    -1587    "New"
    -1588    """
    -1589    return np.sum(np.abs(y - v))/np.sum(np.maximum(y, v))
    +            
    1634def soergel_distance(v, y):
    +1635    r"""
    +1636    "New"
    +1637    """
    +1638    return np.sum(np.abs(y - v)) / np.sum(np.maximum(y, v))
     
    diff --git a/docs/corems/molecular_id/factory/EI_SQL.html b/docs/corems/molecular_id/factory/EI_SQL.html index b5a2094c..7baaa446 100644 --- a/docs/corems/molecular_id/factory/EI_SQL.html +++ b/docs/corems/molecular_id/factory/EI_SQL.html @@ -339,603 +339,640 @@

      1__author__ = "Yuri E. Corilo"
       2__date__ = "Feb 12, 2020"
       3
    -  4import os 
    +  4import os
       5from dataclasses import dataclass
       6
    -  7from sqlalchemy import create_engine, Column, Integer, String, Float, LargeBinary, ForeignKey
    -  8from sqlalchemy.ext.declarative import declarative_base
    -  9from sqlalchemy.exc import SQLAlchemyError
    - 10from sqlalchemy.orm import sessionmaker, relationship
    - 11from sqlalchemy.pool import QueuePool
    - 12from sqlalchemy import between
    - 13
    - 14from numpy import array, frombuffer
    - 15
    - 16Base = declarative_base()
    - 17
    - 18class Metadatar(Base):
    - 19    """ This class is used to store the metadata of the compounds in the database
    - 20
    - 21    Attributes
    - 22    -----------
    - 23    id : int
    - 24        The id of the compound.
    - 25    cas : str
    - 26        The CAS number of the compound.
    - 27    inchikey : str
    - 28        The InChiKey of the compound.
    - 29    inchi : str
    - 30        The InChi of the compound.
    - 31    chebi : str
    - 32        The ChEBI ID of the compound.
    - 33    smiles : str
    - 34        The SMILES of the compound.
    - 35    kegg : str
    - 36        The KEGG ID of the compound.
    - 37    iupac_name : str
    - 38        The IUPAC name of the compound.
    - 39    traditional_name : str
    - 40        The traditional name of the compound.
    - 41    common_name : str
    - 42        The common name of the compound.
    - 43    data_id : int
    - 44        The id of the compound in the molecularData table.
    - 45    data : LowResolutionEICompound
    - 46        The compound object.
    - 47    """
    - 48    __tablename__ = 'metaDataR'
    - 49
    - 50    id = Column(Integer, primary_key=True)
    - 51    cas = Column(String, nullable=True)
    - 52    inchikey = Column(String, nullable=False)
    - 53    inchi = Column(String, nullable=False)
    - 54    chebi = Column(String, nullable=True)
    - 55    smiles = Column(String, nullable=True)
    - 56    kegg = Column(String, nullable=True)
    - 57    iupac_name = Column(String, nullable=True)
    - 58    traditional_name = Column(String, nullable=True)
    - 59    common_name = Column(String, nullable=True)
    - 60
    - 61    data_id = Column(Integer, ForeignKey('molecularData.id'))
    - 62    data = relationship("LowResolutionEICompound", back_populates="metadatar")
    - 63
    - 64class LowResolutionEICompound(Base):
    - 65    """ This class is used to store the molecular and spectral data of the compounds in the low res EI database
    - 66
    - 67    Attributes
    - 68    -----------
    - 69    id : int
    - 70        The id of the compound.
    - 71    name : str
    - 72        The name of the compound.
    - 73    classify : str
    - 74        The classification of the compound.
    - 75    formula : str
    - 76        The formula of the compound.
    - 77    ri : float
    - 78        The retention index of the compound.
    - 79    retention_time : float
    - 80        The retention time of the compound.
    - 81    source : str
    - 82        The source of the compound.
    - 83    casno : str
    - 84        The CAS number of the compound.
    - 85    comment : str
    - 86        The comment of the compound.
    - 87    source_temp_c : float
    - 88        The source temperature of the spectra.
    - 89    ev : float
    - 90        The electron volts of the spectra.
    - 91    peaks_count : int
    - 92        The number of peaks in the spectra.
    - 93    mz : numpy.ndarray
    - 94        The m/z values of the spectra.
    - 95    abundance : numpy.ndarray
    - 96        The abundance values of the spectra.
    - 97    metadatar : Metadatar
    - 98        The metadata object.
    - 99    """
    -100    __tablename__ = 'molecularData'
    -101
    -102    id = Column(Integer, primary_key=True)
    -103
    -104    name = Column(String, nullable=False)
    -105    classify = Column(String, nullable=True)
    -106    formula = Column(String, nullable=True)
    -107    ri = Column(Float, nullable=False)
    -108    retention_time = Column(Float, nullable=False)
    +  7from numpy import array, frombuffer
    +  8from sqlalchemy import (
    +  9    Column,
    + 10    Float,
    + 11    ForeignKey,
    + 12    Integer,
    + 13    LargeBinary,
    + 14    String,
    + 15    create_engine,
    + 16)
    + 17from sqlalchemy.exc import SQLAlchemyError
    + 18from sqlalchemy.ext.declarative import declarative_base
    + 19from sqlalchemy.orm import relationship, sessionmaker
    + 20from sqlalchemy.pool import QueuePool
    + 21
    + 22Base = declarative_base()
    + 23
    + 24
    + 25class Metadatar(Base):
    + 26    """This class is used to store the metadata of the compounds in the database
    + 27
    + 28    Attributes
    + 29    -----------
    + 30    id : int
    + 31        The id of the compound.
    + 32    cas : str
    + 33        The CAS number of the compound.
    + 34    inchikey : str
    + 35        The InChiKey of the compound.
    + 36    inchi : str
    + 37        The InChi of the compound.
    + 38    chebi : str
    + 39        The ChEBI ID of the compound.
    + 40    smiles : str
    + 41        The SMILES of the compound.
    + 42    kegg : str
    + 43        The KEGG ID of the compound.
    + 44    iupac_name : str
    + 45        The IUPAC name of the compound.
    + 46    traditional_name : str
    + 47        The traditional name of the compound.
    + 48    common_name : str
    + 49        The common name of the compound.
    + 50    data_id : int
    + 51        The id of the compound in the molecularData table.
    + 52    data : LowResolutionEICompound
    + 53        The compound object.
    + 54    """
    + 55
    + 56    __tablename__ = "metaDataR"
    + 57
    + 58    id = Column(Integer, primary_key=True)
    + 59    cas = Column(String, nullable=True)
    + 60    inchikey = Column(String, nullable=False)
    + 61    inchi = Column(String, nullable=False)
    + 62    chebi = Column(String, nullable=True)
    + 63    smiles = Column(String, nullable=True)
    + 64    kegg = Column(String, nullable=True)
    + 65    iupac_name = Column(String, nullable=True)
    + 66    traditional_name = Column(String, nullable=True)
    + 67    common_name = Column(String, nullable=True)
    + 68
    + 69    data_id = Column(Integer, ForeignKey("molecularData.id"))
    + 70    data = relationship("LowResolutionEICompound", back_populates="metadatar")
    + 71
    + 72
    + 73class LowResolutionEICompound(Base):
    + 74    """This class is used to store the molecular and spectral data of the compounds in the low res EI database
    + 75
    + 76    Attributes
    + 77    -----------
    + 78    id : int
    + 79        The id of the compound.
    + 80    name : str
    + 81        The name of the compound.
    + 82    classify : str
    + 83        The classification of the compound.
    + 84    formula : str
    + 85        The formula of the compound.
    + 86    ri : float
    + 87        The retention index of the compound.
    + 88    retention_time : float
    + 89        The retention time of the compound.
    + 90    source : str
    + 91        The source of the compound.
    + 92    casno : str
    + 93        The CAS number of the compound.
    + 94    comment : str
    + 95        The comment of the compound.
    + 96    source_temp_c : float
    + 97        The source temperature of the spectra.
    + 98    ev : float
    + 99        The electron volts of the spectra.
    +100    peaks_count : int
    +101        The number of peaks in the spectra.
    +102    mz : numpy.ndarray
    +103        The m/z values of the spectra.
    +104    abundance : numpy.ndarray
    +105        The abundance values of the spectra.
    +106    metadatar : Metadatar
    +107        The metadata object.
    +108    """
     109
    -110    source = Column(String, nullable=True)
    -111    casno = Column(String, nullable=False)
    -112    comment = Column(String, nullable=True)
    +110    __tablename__ = "molecularData"
    +111
    +112    id = Column(Integer, primary_key=True)
     113
    -114    derivativenum = Column(String, nullable=True)
    -115    derivatization = Column(String, nullable=True)
    -116
    -117    source_temp_c = Column(Float, nullable=True)
    -118    ev = Column(Float, nullable=True)
    +114    name = Column(String, nullable=False)
    +115    classify = Column(String, nullable=True)
    +116    formula = Column(String, nullable=True)
    +117    ri = Column(Float, nullable=False)
    +118    retention_time = Column(Float, nullable=False)
     119
    -120    peaks_count = Column(Integer, nullable=False)
    -121
    -122    mz = Column(LargeBinary, nullable=False)
    -123    abundance = Column(LargeBinary, nullable=False)
    -124
    -125    metadatar = relationship("Metadatar", uselist=False, back_populates="data")
    +120    source = Column(String, nullable=True)
    +121    casno = Column(String, nullable=False)
    +122    comment = Column(String, nullable=True)
    +123
    +124    derivativenum = Column(String, nullable=True)
    +125    derivatization = Column(String, nullable=True)
     126
    -127    # metadatar = relationship('Metadatar', backref='smile', lazy='dynamic')
    -128
    -129    def __init__(self, **dict_data):
    -130
    -131        self.id = dict_data.get('id')
    -132
    -133        self.name = dict_data.get('NAME')
    -134        self.classify = dict_data.get('classify')
    -135        self.formula = dict_data.get('FORM')
    -136        self.ri = dict_data.get('RI')
    -137        self.retention_time = dict_data.get('RT')
    +127    source_temp_c = Column(Float, nullable=True)
    +128    ev = Column(Float, nullable=True)
    +129
    +130    peaks_count = Column(Integer, nullable=False)
    +131
    +132    mz = Column(LargeBinary, nullable=False)
    +133    abundance = Column(LargeBinary, nullable=False)
    +134
    +135    metadatar = relationship("Metadatar", uselist=False, back_populates="data")
    +136
    +137    # metadatar = relationship('Metadatar', backref='smile', lazy='dynamic')
     138
    -139        self.source = dict_data.get('SOURCE')
    -140        self.casno = dict_data.get('CASNO')
    -141        self.comment = dict_data.get('COMMENT')
    -142
    -143        self.derivativenum = dict_data.get('derivativenum')
    -144        self.derivatization = dict_data.get('derivatization')
    -145
    -146        self.peaks_count = dict_data.get('NUM PEAKS')
    +139    def __init__(self, **dict_data):
    +140        self.id = dict_data.get("id")
    +141
    +142        self.name = dict_data.get("NAME")
    +143        self.classify = dict_data.get("classify")
    +144        self.formula = dict_data.get("FORM")
    +145        self.ri = dict_data.get("RI")
    +146        self.retention_time = dict_data.get("RT")
     147
    -148        # mz and abun are numpy arrays of 64 bits integer
    -149        # when using postgres array might be a better option
    -150
    -151        self.mz = array(dict_data.get('mz'), dtype='int32').tobytes()
    -152        self.abundance = array(dict_data.get('abundance'), dtype="int32").tobytes()
    -153
    -154        self.metadatar = dict_data.get('metadatar', None)
    -155
    -156    def __repr__(self):
    -157        return "<LowResolutionEICompound(name= %s , cas number = %s, formula = %s, Retention index= %.1f, Retention time= %.1f comment='%s')>" % (
    -158                                    self.name, self.casno, self.formula, self.ri, self.retention_time, self.comment)
    +148        self.source = dict_data.get("SOURCE")
    +149        self.casno = dict_data.get("CASNO")
    +150        self.comment = dict_data.get("COMMENT")
    +151
    +152        self.derivativenum = dict_data.get("derivativenum")
    +153        self.derivatization = dict_data.get("derivatization")
    +154
    +155        self.peaks_count = dict_data.get("NUM PEAKS")
    +156
    +157        # mz and abun are numpy arrays of 64 bits integer
    +158        # when using postgres array might be a better option
     159
    -160
    -161@dataclass
    -162class MetaboliteMetadata:
    -163    """ Dataclass for the Metabolite Metadata
    -164    
    -165    Attributes
    -166    -----------
    -167    id : int
    -168        The id of the compound.
    -169    cas : str
    -170        The CAS number of the compound.
    -171    inchikey : str
    -172        The InChiKey of the compound.
    -173    inchi : str
    -174        The InChi of the compound.
    -175    chebi : str
    -176        The ChEBI ID of the compound.
    -177    smiles : str
    -178        The SMILES of the compound.
    -179    kegg : str
    -180        The KEGG ID of the compound.
    -181    iupac_name : str
    -182        The IUPAC name of the compound.
    -183    traditional_name : str
    -184        The traditional name of the compound.
    -185    common_name : str
    -186        The common name of the compound.
    -187    data_id : int
    -188        The id of the compound in the molecularData table.
    -189                
    -190    """
    -191
    -192    id: int
    -193    cas: str
    -194    inchikey: str
    -195    inchi: str
    -196    chebi: str
    -197    smiles: str
    -198    kegg: str
    -199    data_id: int
    -200    iupac_name: str
    -201    traditional_name: str
    -202    common_name: str
    -203    
    -204@dataclass
    -205class LowResCompoundRef:
    -206    """ Dataclass for the Low Resolution Compound Reference
    -207    
    -208    This class is used to store the molecular and spectral data of the compounds in the low res EI database
    +160        self.mz = array(dict_data.get("mz"), dtype="int32").tobytes()
    +161        self.abundance = array(dict_data.get("abundance"), dtype="int32").tobytes()
    +162
    +163        self.metadatar = dict_data.get("metadatar", None)
    +164
    +165    def __repr__(self):
    +166        return (
    +167            "<LowResolutionEICompound(name= %s , cas number = %s, formula = %s, Retention index= %.1f, Retention time= %.1f comment='%s')>"
    +168            % (
    +169                self.name,
    +170                self.casno,
    +171                self.formula,
    +172                self.ri,
    +173                self.retention_time,
    +174                self.comment,
    +175            )
    +176        )
    +177
    +178
    +179@dataclass
    +180class MetaboliteMetadata:
    +181    """Dataclass for the Metabolite Metadata
    +182
    +183    Attributes
    +184    -----------
    +185    id : int
    +186        The id of the compound.
    +187    cas : str
    +188        The CAS number of the compound.
    +189    inchikey : str
    +190        The InChiKey of the compound.
    +191    inchi : str
    +192        The InChi of the compound.
    +193    chebi : str
    +194        The ChEBI ID of the compound.
    +195    smiles : str
    +196        The SMILES of the compound.
    +197    kegg : str
    +198        The KEGG ID of the compound.
    +199    iupac_name : str
    +200        The IUPAC name of the compound.
    +201    traditional_name : str
    +202        The traditional name of the compound.
    +203    common_name : str
    +204        The common name of the compound.
    +205    data_id : int
    +206        The id of the compound in the molecularData table.
    +207
    +208    """
     209
    -210    Parameters
    -211    -----------
    -212    compounds_dict : dict
    -213        A dictionary representing the compound.
    -214    
    -215    Attributes
    -216    -----------
    -217    id : int
    -218        The id of the compound.
    -219    name : str
    -220        The name of the compound.
    -221    ri : str
    -222        The retention index of the compound.
    -223    retention_time : str
    -224        The retention time of the compound.
    -225    casno : str
    -226        The CAS number of the compound.
    -227    comment : str
    -228        The comment of the compound.
    -229    peaks_count : int
    -230        The number of peaks in the spectra.
    -231    classify : str
    -232        The classification of the compound.
    -233    derivativenum : str
    -234        The derivative number of the compound.
    -235    derivatization : str
    -236        The derivatization applied to the compound.
    -237    mz : numpy.ndarray
    -238        The m/z values of the spectra.
    -239    abundance : numpy.ndarray
    -240        The abundance values of the spectra.
    -241    source_temp_c : float
    -242        The source temperature of the spectra.
    -243    ev : float
    -244        The electron volts of the spectra.
    -245    formula : str
    -246        The formula of the compound.
    -247    source : str
    -248        The source of the spectra data.
    -249    classify : str
    -250        The classification of the compound.
    -251    metadata : MetaboliteMetadata
    -252        The metadata object.
    -253    similarity_score : float
    -254        The similarity score of the compound.
    -255    ri_score : float
    -256        The RI score of the compound.
    -257    spectral_similarity_score : float
    -258        The spectral similarity score of the compound.
    -259    spectral_similarity_scores : dict
    -260        The spectral similarity scores of the compound.
    -261   
    -262    """
    -263    #this class is use to store the results inside the GCPeak class
    -264    def __init__(self, compounds_dict):
    -265
    -266        self.id = compounds_dict.get("id")
    -267        self.name = compounds_dict.get("name")
    -268        self.ri = compounds_dict.get("ri")
    -269        self.retention_time = compounds_dict.get("rt")
    -270        self.casno = compounds_dict.get("casno")
    -271        self.comment = compounds_dict.get("comment")
    -272        self.peaks_count = compounds_dict.get("peaks_count")
    -273
    -274        self.classify = compounds_dict.get('classify')
    -275        self.derivativenum = compounds_dict.get('derivativenum')
    -276        self.derivatization = compounds_dict.get('derivatization')
    -277
    -278        self.mz = compounds_dict.get('mz')
    -279        self.abundance = compounds_dict.get("abundance")
    +210    id: int
    +211    cas: str
    +212    inchikey: str
    +213    inchi: str
    +214    chebi: str
    +215    smiles: str
    +216    kegg: str
    +217    data_id: int
    +218    iupac_name: str
    +219    traditional_name: str
    +220    common_name: str
    +221
    +222
    +223@dataclass
    +224class LowResCompoundRef:
    +225    """Dataclass for the Low Resolution Compound Reference
    +226
    +227    This class is used to store the molecular and spectral data of the compounds in the low res EI database
    +228
    +229    Parameters
    +230    -----------
    +231    compounds_dict : dict
    +232        A dictionary representing the compound.
    +233
    +234    Attributes
    +235    -----------
    +236    id : int
    +237        The id of the compound.
    +238    name : str
    +239        The name of the compound.
    +240    ri : str
    +241        The retention index of the compound.
    +242    retention_time : str
    +243        The retention time of the compound.
    +244    casno : str
    +245        The CAS number of the compound.
    +246    comment : str
    +247        The comment of the compound.
    +248    peaks_count : int
    +249        The number of peaks in the spectra.
    +250    classify : str
    +251        The classification of the compound.
    +252    derivativenum : str
    +253        The derivative number of the compound.
    +254    derivatization : str
    +255        The derivatization applied to the compound.
    +256    mz : numpy.ndarray
    +257        The m/z values of the spectra.
    +258    abundance : numpy.ndarray
    +259        The abundance values of the spectra.
    +260    source_temp_c : float
    +261        The source temperature of the spectra.
    +262    ev : float
    +263        The electron volts of the spectra.
    +264    formula : str
    +265        The formula of the compound.
    +266    source : str
    +267        The source of the spectra data.
    +268    classify : str
    +269        The classification of the compound.
    +270    metadata : MetaboliteMetadata
    +271        The metadata object.
    +272    similarity_score : float
    +273        The similarity score of the compound.
    +274    ri_score : float
    +275        The RI score of the compound.
    +276    spectral_similarity_score : float
    +277        The spectral similarity score of the compound.
    +278    spectral_similarity_scores : dict
    +279        The spectral similarity scores of the compound.
     280
    -281        self.source_temp_c = compounds_dict.get("source_temp_c")
    -282        self.ev = compounds_dict.get("ev")
    -283        self.formula = compounds_dict.get("formula")
    -284        self.source = compounds_dict.get("source")
    -285
    -286        self.classify = compounds_dict.get("classify")
    -287
    -288        if compounds_dict.get("metadata"):
    -289            
    -290            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
    -291
    -292        else:
    -293
    -294            self.metadata = None
    -295
    -296        self.similarity_score = None
    -297        self.ri_score = None
    -298        self.spectral_similarity_score = None
    -299        self.spectral_similarity_scores = {}
    -300
    -301class EI_LowRes_SQLite:
    -302    """
    -303    A class for interacting with a SQLite database for low-resolution EI compounds.
    +281    """
    +282
    +283    # this class is use to store the results inside the GCPeak class
    +284    def __init__(self, compounds_dict):
    +285        self.id = compounds_dict.get("id")
    +286        self.name = compounds_dict.get("name")
    +287        self.ri = compounds_dict.get("ri")
    +288        self.retention_time = compounds_dict.get("rt")
    +289        self.casno = compounds_dict.get("casno")
    +290        self.comment = compounds_dict.get("comment")
    +291        self.peaks_count = compounds_dict.get("peaks_count")
    +292
    +293        self.classify = compounds_dict.get("classify")
    +294        self.derivativenum = compounds_dict.get("derivativenum")
    +295        self.derivatization = compounds_dict.get("derivatization")
    +296
    +297        self.mz = compounds_dict.get("mz")
    +298        self.abundance = compounds_dict.get("abundance")
    +299
    +300        self.source_temp_c = compounds_dict.get("source_temp_c")
    +301        self.ev = compounds_dict.get("ev")
    +302        self.formula = compounds_dict.get("formula")
    +303        self.source = compounds_dict.get("source")
     304
    -305    Parameters
    -306    -----------
    -307    url : str, optional
    -308        The URL of the SQLite database. Default is 'sqlite://'.
    +305        self.classify = compounds_dict.get("classify")
    +306
    +307        if compounds_dict.get("metadata"):
    +308            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
     309
    -310    Attributes
    -311    -----------
    -312    engine : sqlalchemy.engine.Engine
    -313        The SQLAlchemy engine for connecting to the database.
    -314    session : sqlalchemy.orm.Session
    -315        The SQLAlchemy session for executing database operations.
    -316
    -317    Methods
    -318    --------
    -319    * __init__(self, url='sqlite://').
    -320        Initializes the EI_LowRes_SQLite object.
    -321    * __exit__(self, exc_type, exc_val, exc_tb).
    -322        Closes the database connection.
    -323    * init_engine(self, url).
    -324        Initializes the SQLAlchemy engine.
    -325    * __enter__(self).
    -326        Returns the EI_LowRes_SQLite object.
    -327    * add_compound_list(self, data_dict_list).
    -328        Adds a list of compounds to the database.
    -329    * add_compound(self, data_dict).
    -330        Adds a single compound to the database.
    -331    * commit(self).
    -332        Commits the changes to the database.
    -333    * row_to_dict(self, row).
    -334        Converts a database row to a dictionary.
    -335    * get_all(self).
    -336        Retrieves all compounds from the database.
    -337    * query_min_max_rt(self, min_max_rt).
    -338        Queries compounds based on retention time range.
    -339    * query_min_max_ri(self, min_max_ri).
    -340        Queries compounds based on RI range.
    -341    * query_names_and_rt(self, min_max_rt, compound_names).
    -342        Queries compounds based on compound names and retention time range.
    -343    * query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).
    -344        Queries compounds based on RI range and retention time range.
    -345    * delete_compound(self, compound).
    -346        Deletes a compound from the database.
    -347    * purge(self).
    -348        Deletes all compounds from the database table.
    -349    * clear_data(self).
    -350        Clears all tables in the database.
    -351    """
    -352
    -353    def __init__(self, url='sqlite://'):
    -354        
    -355        self.engine = self.init_engine(url)
    -356
    -357        Base.metadata.create_all(self.engine)
    -358
    -359        Session = sessionmaker(bind=self.engine)
    -360
    -361        self.session = Session()
    -362
    -363    def __exit__(self, exc_type, exc_val, exc_tb):
    -364        """ Closes the database connection.
    -365        """
    -366        self.commit()
    -367        self.session.close()
    -368        self.engine.dispose()
    -369
    -370    def init_engine(self, url):
    -371        """ Initializes the SQLAlchemy engine.
    -372
    -373        Parameters
    -374        -----------
    -375        url : str
    -376            The URL of the SQLite database.
    +310        else:
    +311            self.metadata = None
    +312
    +313        self.similarity_score = None
    +314        self.ri_score = None
    +315        self.spectral_similarity_score = None
    +316        self.spectral_similarity_scores = {}
    +317
    +318
    +319class EI_LowRes_SQLite:
    +320    """
    +321    A class for interacting with a SQLite database for low-resolution EI compounds.
    +322
    +323    Parameters
    +324    -----------
    +325    url : str, optional
    +326        The URL of the SQLite database. Default is 'sqlite://'.
    +327
    +328    Attributes
    +329    -----------
    +330    engine : sqlalchemy.engine.Engine
    +331        The SQLAlchemy engine for connecting to the database.
    +332    session : sqlalchemy.orm.Session
    +333        The SQLAlchemy session for executing database operations.
    +334
    +335    Methods
    +336    --------
    +337    * __init__(self, url='sqlite://').
    +338        Initializes the EI_LowRes_SQLite object.
    +339    * __exit__(self, exc_type, exc_val, exc_tb).
    +340        Closes the database connection.
    +341    * init_engine(self, url).
    +342        Initializes the SQLAlchemy engine.
    +343    * __enter__(self).
    +344        Returns the EI_LowRes_SQLite object.
    +345    * add_compound_list(self, data_dict_list).
    +346        Adds a list of compounds to the database.
    +347    * add_compound(self, data_dict).
    +348        Adds a single compound to the database.
    +349    * commit(self).
    +350        Commits the changes to the database.
    +351    * row_to_dict(self, row).
    +352        Converts a database row to a dictionary.
    +353    * get_all(self).
    +354        Retrieves all compounds from the database.
    +355    * query_min_max_rt(self, min_max_rt).
    +356        Queries compounds based on retention time range.
    +357    * query_min_max_ri(self, min_max_ri).
    +358        Queries compounds based on RI range.
    +359    * query_names_and_rt(self, min_max_rt, compound_names).
    +360        Queries compounds based on compound names and retention time range.
    +361    * query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).
    +362        Queries compounds based on RI range and retention time range.
    +363    * delete_compound(self, compound).
    +364        Deletes a compound from the database.
    +365    * purge(self).
    +366        Deletes all compounds from the database table.
    +367    * clear_data(self).
    +368        Clears all tables in the database.
    +369    """
    +370
    +371    def __init__(self, url="sqlite://"):
    +372        self.engine = self.init_engine(url)
    +373
    +374        Base.metadata.create_all(self.engine)
    +375
    +376        Session = sessionmaker(bind=self.engine)
     377
    -378        Returns
    -379        --------
    -380        sqlalchemy.engine.Engine
    -381            The SQLAlchemy engine for connecting to the database.
    -382        """
    -383        directory = os.getcwd()
    -384        if not url:
    -385            if not os.path.isdir(directory + '/db'):
    -386                os.mkdir(directory + '/db')
    -387            url = 'sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite'.format(DB=directory)
    -388        return create_engine(url, poolclass=QueuePool)
    -389
    -390    def __enter__(self):
    -391        """ Returns the EI_LowRes_SQLite object.
    -392        """
    -393        return self
    -394
    -395    def add_compound_list(self, data_dict_list):
    -396        """ Adds a list of compounds to the database.
    -397
    -398        Parameters
    -399        -----------
    -400        data_dict_list : list of dict
    -401            A list of dictionaries representing the compounds.
    -402        """
    -403        for data_dict in data_dict_list:
    -404            # print(data_dict.get('NUM PEAKS'))
    -405            if not data_dict.get('NUM PEAKS'):
    -406                data_dict['NUM PEAKS'] = len(data_dict.get('mz'))
    -407            if not data_dict.get('CASNO'):
    -408                data_dict['CASNO'] = data_dict.get('CAS')
    -409        
    -410        self.session.add_all([LowResolutionEICompound(**data_dict) for data_dict in data_dict_list] )
    +378        self.session = Session()
    +379
    +380    def __exit__(self, exc_type, exc_val, exc_tb):
    +381        """Closes the database connection."""
    +382        self.commit()
    +383        self.session.close()
    +384        self.engine.dispose()
    +385
    +386    def init_engine(self, url):
    +387        """Initializes the SQLAlchemy engine.
    +388
    +389        Parameters
    +390        -----------
    +391        url : str
    +392            The URL of the SQLite database.
    +393
    +394        Returns
    +395        --------
    +396        sqlalchemy.engine.Engine
    +397            The SQLAlchemy engine for connecting to the database.
    +398        """
    +399        directory = os.getcwd()
    +400        if not url:
    +401            if not os.path.isdir(directory + "/db"):
    +402                os.mkdir(directory + "/db")
    +403            url = "sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite".format(
    +404                DB=directory
    +405            )
    +406        return create_engine(url, poolclass=QueuePool)
    +407
    +408    def __enter__(self):
    +409        """Returns the EI_LowRes_SQLite object."""
    +410        return self
     411
    -412    def add_compound(self, data_dict):
    -413        """ Adds a single compound to the database. 
    -414        
    +412    def add_compound_list(self, data_dict_list):
    +413        """Adds a list of compounds to the database.
    +414
     415        Parameters
     416        -----------
    -417        data_dict : dict
    -418            A dictionary representing the compound.
    -419        
    -420        """
    -421        one_compound = LowResolutionEICompound(**data_dict)
    -422        self.session.add(one_compound)
    -423        self.commit()
    -424
    -425    def commit(self):
    -426        """ Commits the changes to the database.
    -427        """
    -428        try:
    -429            self.session.commit()
    -430        except SQLAlchemyError as e:
    -431            self.session.rollback()
    -432            print(str(e))
    +417        data_dict_list : list of dict
    +418            A list of dictionaries representing the compounds.
    +419        """
    +420        for data_dict in data_dict_list:
    +421            # print(data_dict.get('NUM PEAKS'))
    +422            if not data_dict.get("NUM PEAKS"):
    +423                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
    +424            if not data_dict.get("CASNO"):
    +425                data_dict["CASNO"] = data_dict.get("CAS")
    +426
    +427        self.session.add_all(
    +428            [LowResolutionEICompound(**data_dict) for data_dict in data_dict_list]
    +429        )
    +430
    +431    def add_compound(self, data_dict):
    +432        """Adds a single compound to the database.
     433
    -434    def row_to_dict(self, row):
    -435        """ Converts a database row to a dictionary.
    -436        
    -437        Parameters
    -438        -----------
    -439        row : sqlalchemy.engine.row.Row
    -440            A row from the database.
    -441        
    -442        Returns
    -443        --------
    -444        dict
    -445            A dictionary representing the compound.
    -446        """
    -447        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}        
    -448
    -449        data_dict['mz'] = frombuffer(data_dict.get('mz'), dtype="int32")
    -450        data_dict['abundance'] = frombuffer(data_dict.get('abundance'), dtype="int32")
    +434        Parameters
    +435        -----------
    +436        data_dict : dict
    +437            A dictionary representing the compound.
    +438
    +439        """
    +440        one_compound = LowResolutionEICompound(**data_dict)
    +441        self.session.add(one_compound)
    +442        self.commit()
    +443
    +444    def commit(self):
    +445        """Commits the changes to the database."""
    +446        try:
    +447            self.session.commit()
    +448        except SQLAlchemyError as e:
    +449            self.session.rollback()
    +450            print(str(e))
     451
    -452        if row.metadatar:
    -453            data_dict['metadata'] = {c.name: getattr(row.metadatar, c.name) for c in row.metadatar.__table__.columns}
    +452    def row_to_dict(self, row):
    +453        """Converts a database row to a dictionary.
     454
    -455        else:
    -456            data_dict['metadata'] = None
    -457
    -458        return data_dict
    +455        Parameters
    +456        -----------
    +457        row : sqlalchemy.engine.row.Row
    +458            A row from the database.
     459
    -460    def get_all(self,):
    -461        """ Retrieves all compounds from the database.
    -462        
    -463        Returns
    -464        --------
    -465        list
    -466            A list of dictionaries representing the compounds.
    -467        """
    -468        compounds = self.session.query(LowResolutionEICompound).all()
    +460        Returns
    +461        --------
    +462        dict
    +463            A dictionary representing the compound.
    +464        """
    +465        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}
    +466
    +467        data_dict["mz"] = frombuffer(data_dict.get("mz"), dtype="int32")
    +468        data_dict["abundance"] = frombuffer(data_dict.get("abundance"), dtype="int32")
     469
    -470        return [self.row_to_dict(compound) for compound in compounds]
    -471
    -472    def query_min_max_rt(self, min_max_rt, ):
    -473        """ Queries compounds based on retention time range.
    -474        
    -475        Parameters
    -476        -----------
    -477        min_max_rt : tuple
    -478            A tuple containing the minimum and maximum retention time values.
    -479        
    -480        Returns
    -481        --------
    -482        list
    -483            A list of dictionaries representing the compounds.
    -484        """
    -485        min_rt, max_rt = min_max_rt
    -486
    -487        compounds = self.session.query(LowResolutionEICompound).filter(LowResolutionEICompound.retention_time.between(min_rt, max_rt))    
    -488
    -489        return [self.row_to_dict(compound) for compound in compounds]
    -490
    -491    def query_min_max_ri(self, min_max_ri):
    -492        """ Queries compounds based on RI range.
    -493        
    -494        Parameters
    -495        -----------
    -496        min_max_ri : tuple
    -497            A tuple containing the minimum and maximum RI values.
    -498        """
    -499        min_ri, max_ri = min_max_ri
    +470        if row.metadatar:
    +471            data_dict["metadata"] = {
    +472                c.name: getattr(row.metadatar, c.name)
    +473                for c in row.metadatar.__table__.columns
    +474            }
    +475
    +476        else:
    +477            data_dict["metadata"] = None
    +478
    +479        return data_dict
    +480
    +481    def get_all(
    +482        self,
    +483    ):
    +484        """Retrieves all compounds from the database.
    +485
    +486        Returns
    +487        --------
    +488        list
    +489            A list of dictionaries representing the compounds.
    +490        """
    +491        compounds = self.session.query(LowResolutionEICompound).all()
    +492
    +493        return [self.row_to_dict(compound) for compound in compounds]
    +494
    +495    def query_min_max_rt(
    +496        self,
    +497        min_max_rt,
    +498    ):
    +499        """Queries compounds based on retention time range.
     500
    -501        compounds = self.session.query(LowResolutionEICompound).filter(LowResolutionEICompound.ri.between(min_ri, max_ri)).all()
    -502
    -503        return [self.row_to_dict(compound) for compound in compounds]
    -504
    -505    def query_names_and_rt(self, min_max_rt, compound_names):
    -506        """ Queries compounds based on compound names and retention time range.
    -507        
    -508        Parameters
    -509        -----------
    -510        min_max_rt : tuple
    -511            A tuple containing the minimum and maximum retention time values.
    -512        compound_names : list
    -513            A list of compound names.
    -514        
    -515        Returns
    -516        --------
    -517        list
    -518            A list of dictionaries representing the compounds.
    -519        
    -520        """
    -521        min_rt, max_rt = min_max_rt
    -522
    -523        compounds = self.session.query(LowResolutionEICompound).filter(LowResolutionEICompound.name.in_(compound_names)).filter(
    -524                                        LowResolutionEICompound.retention_time >= min_rt,
    -525                                        LowResolutionEICompound.retention_time <= max_rt,
    -526                                        )
    -527        
    -528        #self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))    
    -529        # x = [self.row_to_dict(compound) for compound in compounds]
    -530        
    -531        return [self.row_to_dict(compound) for compound in compounds]
    -532
    -533    def query_min_max_ri_and_rt(self, min_max_ri, min_max_rt, ):
    -534        """ Queries compounds based on RI range and retention time range.
    -535        
    -536        Parameters
    -537        -----------
    -538        min_max_ri : tuple
    -539            A tuple containing the minimum and maximum RI values.
    -540        min_max_rt : tuple
    -541            A tuple containing the minimum and maximum retention time values.
    -542        
    -543        Returns
    -544        --------
    -545        list
    -546            A list of dictionaries representing the compounds.
    -547            
    -548        """
    -549        min_ri, max_ri = min_max_ri
    -550        
    -551        min_rt, max_rt = min_max_rt
    -552
    -553        compounds = self.session.query(LowResolutionEICompound).filter(
    -554            LowResolutionEICompound.ri <= max_ri,
    -555            LowResolutionEICompound.ri >= min_ri,
    -556            LowResolutionEICompound.ri >= min_rt,
    -557            LowResolutionEICompound.ri >= max_rt,
    -558            )
    -559        
    -560        
    -561        #self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))    
    -562        
    -563        return [self.row_to_dict(compound) for compound in compounds]
    -564
    -565    def delete_compound(self, compound):
    -566        """ Deletes a compound from the database.
    -567        
    -568        Parameters
    -569        -----------
    -570        compound : LowResolutionEICompound
    -571            A compound object.
    -572        
    -573        """
    -574        try:
    -575            self.session.delete(compound)  
    -576            self.session.commit()  
    -577        
    -578        except SQLAlchemyError as e:
    -579            self.session.rollback()
    -580            print(str(e))
    -581
    -582    def purge(self):
    -583        """ Deletes all compounds from the database table.
    -584        
    -585        Notes
    -586        ------
    -587        Careful, this will delete the entire database table.
    +501        Parameters
    +502        -----------
    +503        min_max_rt : tuple
    +504            A tuple containing the minimum and maximum retention time values.
    +505
    +506        Returns
    +507        --------
    +508        list
    +509            A list of dictionaries representing the compounds.
    +510        """
    +511        min_rt, max_rt = min_max_rt
    +512
    +513        compounds = self.session.query(LowResolutionEICompound).filter(
    +514            LowResolutionEICompound.retention_time.between(min_rt, max_rt)
    +515        )
    +516
    +517        return [self.row_to_dict(compound) for compound in compounds]
    +518
    +519    def query_min_max_ri(self, min_max_ri):
    +520        """Queries compounds based on RI range.
    +521
    +522        Parameters
    +523        -----------
    +524        min_max_ri : tuple
    +525            A tuple containing the minimum and maximum RI values.
    +526        """
    +527        min_ri, max_ri = min_max_ri
    +528
    +529        compounds = (
    +530            self.session.query(LowResolutionEICompound)
    +531            .filter(LowResolutionEICompound.ri.between(min_ri, max_ri))
    +532            .all()
    +533        )
    +534
    +535        return [self.row_to_dict(compound) for compound in compounds]
    +536
    +537    def query_names_and_rt(self, min_max_rt, compound_names):
    +538        """Queries compounds based on compound names and retention time range.
    +539
    +540        Parameters
    +541        -----------
    +542        min_max_rt : tuple
    +543            A tuple containing the minimum and maximum retention time values.
    +544        compound_names : list
    +545            A list of compound names.
    +546
    +547        Returns
    +548        --------
    +549        list
    +550            A list of dictionaries representing the compounds.
    +551
    +552        """
    +553        min_rt, max_rt = min_max_rt
    +554
    +555        compounds = (
    +556            self.session.query(LowResolutionEICompound)
    +557            .filter(LowResolutionEICompound.name.in_(compound_names))
    +558            .filter(
    +559                LowResolutionEICompound.retention_time >= min_rt,
    +560                LowResolutionEICompound.retention_time <= max_rt,
    +561            )
    +562        )
    +563
    +564        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
    +565        # x = [self.row_to_dict(compound) for compound in compounds]
    +566
    +567        return [self.row_to_dict(compound) for compound in compounds]
    +568
    +569    def query_min_max_ri_and_rt(
    +570        self,
    +571        min_max_ri,
    +572        min_max_rt,
    +573    ):
    +574        """Queries compounds based on RI range and retention time range.
    +575
    +576        Parameters
    +577        -----------
    +578        min_max_ri : tuple
    +579            A tuple containing the minimum and maximum RI values.
    +580        min_max_rt : tuple
    +581            A tuple containing the minimum and maximum retention time values.
    +582
    +583        Returns
    +584        --------
    +585        list
    +586            A list of dictionaries representing the compounds.
    +587
     588        """
    -589        self.session.query(LowResolutionEICompound).delete()
    -590        self.session.commit()  
    -591
    -592    def clear_data(self):
    -593        """ Clears all tables in the database.
    -594        """
    -595        meta = Base.metadata
    -596        for table in reversed(meta.sorted_tables):
    -597            print ('Clear table %s' % table)
    -598            self.session.execute(table.delete())
    -599        self.session.commit()
    -600   
    +589        min_ri, max_ri = min_max_ri
    +590
    +591        min_rt, max_rt = min_max_rt
    +592
    +593        compounds = self.session.query(LowResolutionEICompound).filter(
    +594            LowResolutionEICompound.ri <= max_ri,
    +595            LowResolutionEICompound.ri >= min_ri,
    +596            LowResolutionEICompound.ri >= min_rt,
    +597            LowResolutionEICompound.ri >= max_rt,
    +598        )
    +599
    +600        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
    +601
    +602        return [self.row_to_dict(compound) for compound in compounds]
    +603
    +604    def delete_compound(self, compound):
    +605        """Deletes a compound from the database.
    +606
    +607        Parameters
    +608        -----------
    +609        compound : LowResolutionEICompound
    +610            A compound object.
    +611
    +612        """
    +613        try:
    +614            self.session.delete(compound)
    +615            self.session.commit()
    +616
    +617        except SQLAlchemyError as e:
    +618            self.session.rollback()
    +619            print(str(e))
    +620
    +621    def purge(self):
    +622        """Deletes all compounds from the database table.
    +623
    +624        Notes
    +625        ------
    +626        Careful, this will delete the entire database table.
    +627        """
    +628        self.session.query(LowResolutionEICompound).delete()
    +629        self.session.commit()
    +630
    +631    def clear_data(self):
    +632        """Clears all tables in the database."""
    +633        meta = Base.metadata
    +634        for table in reversed(meta.sorted_tables):
    +635            print("Clear table %s" % table)
    +636            self.session.execute(table.delete())
    +637        self.session.commit()
     
    @@ -1035,51 +1072,52 @@

    -
    21class Metadatar(Base):
    -22    """ This class is used to store the metadata of the compounds in the database
    -23
    -24    Attributes
    -25    -----------
    -26    id : int
    -27        The id of the compound.
    -28    cas : str
    -29        The CAS number of the compound.
    -30    inchikey : str
    -31        The InChiKey of the compound.
    -32    inchi : str
    -33        The InChi of the compound.
    -34    chebi : str
    -35        The ChEBI ID of the compound.
    -36    smiles : str
    -37        The SMILES of the compound.
    -38    kegg : str
    -39        The KEGG ID of the compound.
    -40    iupac_name : str
    -41        The IUPAC name of the compound.
    -42    traditional_name : str
    -43        The traditional name of the compound.
    -44    common_name : str
    -45        The common name of the compound.
    -46    data_id : int
    -47        The id of the compound in the molecularData table.
    -48    data : LowResolutionEICompound
    -49        The compound object.
    -50    """
    -51    __tablename__ = 'metaDataR'
    -52
    -53    id = Column(Integer, primary_key=True)
    -54    cas = Column(String, nullable=True)
    -55    inchikey = Column(String, nullable=False)
    -56    inchi = Column(String, nullable=False)
    -57    chebi = Column(String, nullable=True)
    -58    smiles = Column(String, nullable=True)
    -59    kegg = Column(String, nullable=True)
    -60    iupac_name = Column(String, nullable=True)
    -61    traditional_name = Column(String, nullable=True)
    -62    common_name = Column(String, nullable=True)
    -63
    -64    data_id = Column(Integer, ForeignKey('molecularData.id'))
    -65    data = relationship("LowResolutionEICompound", back_populates="metadatar")
    +            
    26class Metadatar(Base):
    +27    """This class is used to store the metadata of the compounds in the database
    +28
    +29    Attributes
    +30    -----------
    +31    id : int
    +32        The id of the compound.
    +33    cas : str
    +34        The CAS number of the compound.
    +35    inchikey : str
    +36        The InChiKey of the compound.
    +37    inchi : str
    +38        The InChi of the compound.
    +39    chebi : str
    +40        The ChEBI ID of the compound.
    +41    smiles : str
    +42        The SMILES of the compound.
    +43    kegg : str
    +44        The KEGG ID of the compound.
    +45    iupac_name : str
    +46        The IUPAC name of the compound.
    +47    traditional_name : str
    +48        The traditional name of the compound.
    +49    common_name : str
    +50        The common name of the compound.
    +51    data_id : int
    +52        The id of the compound in the molecularData table.
    +53    data : LowResolutionEICompound
    +54        The compound object.
    +55    """
    +56
    +57    __tablename__ = "metaDataR"
    +58
    +59    id = Column(Integer, primary_key=True)
    +60    cas = Column(String, nullable=True)
    +61    inchikey = Column(String, nullable=False)
    +62    inchi = Column(String, nullable=False)
    +63    chebi = Column(String, nullable=True)
    +64    smiles = Column(String, nullable=True)
    +65    kegg = Column(String, nullable=True)
    +66    iupac_name = Column(String, nullable=True)
    +67    traditional_name = Column(String, nullable=True)
    +68    common_name = Column(String, nullable=True)
    +69
    +70    data_id = Column(Integer, ForeignKey("molecularData.id"))
    +71    data = relationship("LowResolutionEICompound", back_populates="metadatar")
     
    @@ -1291,101 +1329,110 @@
    Inherited Members
    -
     67class LowResolutionEICompound(Base):
    - 68    """ This class is used to store the molecular and spectral data of the compounds in the low res EI database
    - 69
    - 70    Attributes
    - 71    -----------
    - 72    id : int
    - 73        The id of the compound.
    - 74    name : str
    - 75        The name of the compound.
    - 76    classify : str
    - 77        The classification of the compound.
    - 78    formula : str
    - 79        The formula of the compound.
    - 80    ri : float
    - 81        The retention index of the compound.
    - 82    retention_time : float
    - 83        The retention time of the compound.
    - 84    source : str
    - 85        The source of the compound.
    - 86    casno : str
    - 87        The CAS number of the compound.
    - 88    comment : str
    - 89        The comment of the compound.
    - 90    source_temp_c : float
    - 91        The source temperature of the spectra.
    - 92    ev : float
    - 93        The electron volts of the spectra.
    - 94    peaks_count : int
    - 95        The number of peaks in the spectra.
    - 96    mz : numpy.ndarray
    - 97        The m/z values of the spectra.
    - 98    abundance : numpy.ndarray
    - 99        The abundance values of the spectra.
    -100    metadatar : Metadatar
    -101        The metadata object.
    -102    """
    -103    __tablename__ = 'molecularData'
    -104
    -105    id = Column(Integer, primary_key=True)
    -106
    -107    name = Column(String, nullable=False)
    -108    classify = Column(String, nullable=True)
    -109    formula = Column(String, nullable=True)
    -110    ri = Column(Float, nullable=False)
    -111    retention_time = Column(Float, nullable=False)
    +            
     74class LowResolutionEICompound(Base):
    + 75    """This class is used to store the molecular and spectral data of the compounds in the low res EI database
    + 76
    + 77    Attributes
    + 78    -----------
    + 79    id : int
    + 80        The id of the compound.
    + 81    name : str
    + 82        The name of the compound.
    + 83    classify : str
    + 84        The classification of the compound.
    + 85    formula : str
    + 86        The formula of the compound.
    + 87    ri : float
    + 88        The retention index of the compound.
    + 89    retention_time : float
    + 90        The retention time of the compound.
    + 91    source : str
    + 92        The source of the compound.
    + 93    casno : str
    + 94        The CAS number of the compound.
    + 95    comment : str
    + 96        The comment of the compound.
    + 97    source_temp_c : float
    + 98        The source temperature of the spectra.
    + 99    ev : float
    +100        The electron volts of the spectra.
    +101    peaks_count : int
    +102        The number of peaks in the spectra.
    +103    mz : numpy.ndarray
    +104        The m/z values of the spectra.
    +105    abundance : numpy.ndarray
    +106        The abundance values of the spectra.
    +107    metadatar : Metadatar
    +108        The metadata object.
    +109    """
    +110
    +111    __tablename__ = "molecularData"
     112
    -113    source = Column(String, nullable=True)
    -114    casno = Column(String, nullable=False)
    -115    comment = Column(String, nullable=True)
    -116
    -117    derivativenum = Column(String, nullable=True)
    -118    derivatization = Column(String, nullable=True)
    -119
    -120    source_temp_c = Column(Float, nullable=True)
    -121    ev = Column(Float, nullable=True)
    -122
    -123    peaks_count = Column(Integer, nullable=False)
    +113    id = Column(Integer, primary_key=True)
    +114
    +115    name = Column(String, nullable=False)
    +116    classify = Column(String, nullable=True)
    +117    formula = Column(String, nullable=True)
    +118    ri = Column(Float, nullable=False)
    +119    retention_time = Column(Float, nullable=False)
    +120
    +121    source = Column(String, nullable=True)
    +122    casno = Column(String, nullable=False)
    +123    comment = Column(String, nullable=True)
     124
    -125    mz = Column(LargeBinary, nullable=False)
    -126    abundance = Column(LargeBinary, nullable=False)
    +125    derivativenum = Column(String, nullable=True)
    +126    derivatization = Column(String, nullable=True)
     127
    -128    metadatar = relationship("Metadatar", uselist=False, back_populates="data")
    -129
    -130    # metadatar = relationship('Metadatar', backref='smile', lazy='dynamic')
    -131
    -132    def __init__(self, **dict_data):
    -133
    -134        self.id = dict_data.get('id')
    +128    source_temp_c = Column(Float, nullable=True)
    +129    ev = Column(Float, nullable=True)
    +130
    +131    peaks_count = Column(Integer, nullable=False)
    +132
    +133    mz = Column(LargeBinary, nullable=False)
    +134    abundance = Column(LargeBinary, nullable=False)
     135
    -136        self.name = dict_data.get('NAME')
    -137        self.classify = dict_data.get('classify')
    -138        self.formula = dict_data.get('FORM')
    -139        self.ri = dict_data.get('RI')
    -140        self.retention_time = dict_data.get('RT')
    -141
    -142        self.source = dict_data.get('SOURCE')
    -143        self.casno = dict_data.get('CASNO')
    -144        self.comment = dict_data.get('COMMENT')
    -145
    -146        self.derivativenum = dict_data.get('derivativenum')
    -147        self.derivatization = dict_data.get('derivatization')
    +136    metadatar = relationship("Metadatar", uselist=False, back_populates="data")
    +137
    +138    # metadatar = relationship('Metadatar', backref='smile', lazy='dynamic')
    +139
    +140    def __init__(self, **dict_data):
    +141        self.id = dict_data.get("id")
    +142
    +143        self.name = dict_data.get("NAME")
    +144        self.classify = dict_data.get("classify")
    +145        self.formula = dict_data.get("FORM")
    +146        self.ri = dict_data.get("RI")
    +147        self.retention_time = dict_data.get("RT")
     148
    -149        self.peaks_count = dict_data.get('NUM PEAKS')
    -150
    -151        # mz and abun are numpy arrays of 64 bits integer
    -152        # when using postgres array might be a better option
    -153
    -154        self.mz = array(dict_data.get('mz'), dtype='int32').tobytes()
    -155        self.abundance = array(dict_data.get('abundance'), dtype="int32").tobytes()
    -156
    -157        self.metadatar = dict_data.get('metadatar', None)
    -158
    -159    def __repr__(self):
    -160        return "<LowResolutionEICompound(name= %s , cas number = %s, formula = %s, Retention index= %.1f, Retention time= %.1f comment='%s')>" % (
    -161                                    self.name, self.casno, self.formula, self.ri, self.retention_time, self.comment)
    +149        self.source = dict_data.get("SOURCE")
    +150        self.casno = dict_data.get("CASNO")
    +151        self.comment = dict_data.get("COMMENT")
    +152
    +153        self.derivativenum = dict_data.get("derivativenum")
    +154        self.derivatization = dict_data.get("derivatization")
    +155
    +156        self.peaks_count = dict_data.get("NUM PEAKS")
    +157
    +158        # mz and abun are numpy arrays of 64 bits integer
    +159        # when using postgres array might be a better option
    +160
    +161        self.mz = array(dict_data.get("mz"), dtype="int32").tobytes()
    +162        self.abundance = array(dict_data.get("abundance"), dtype="int32").tobytes()
    +163
    +164        self.metadatar = dict_data.get("metadatar", None)
    +165
    +166    def __repr__(self):
    +167        return (
    +168            "<LowResolutionEICompound(name= %s , cas number = %s, formula = %s, Retention index= %.1f, Retention time= %.1f comment='%s')>"
    +169            % (
    +170                self.name,
    +171                self.casno,
    +172                self.formula,
    +173                self.ri,
    +174                self.retention_time,
    +175                self.comment,
    +176            )
    +177        )
     
    @@ -1650,48 +1697,48 @@
    Inherited Members
    -
    164@dataclass
    -165class MetaboliteMetadata:
    -166    """ Dataclass for the Metabolite Metadata
    -167    
    -168    Attributes
    -169    -----------
    -170    id : int
    -171        The id of the compound.
    -172    cas : str
    -173        The CAS number of the compound.
    -174    inchikey : str
    -175        The InChiKey of the compound.
    -176    inchi : str
    -177        The InChi of the compound.
    -178    chebi : str
    -179        The ChEBI ID of the compound.
    -180    smiles : str
    -181        The SMILES of the compound.
    -182    kegg : str
    -183        The KEGG ID of the compound.
    -184    iupac_name : str
    -185        The IUPAC name of the compound.
    -186    traditional_name : str
    -187        The traditional name of the compound.
    -188    common_name : str
    -189        The common name of the compound.
    -190    data_id : int
    -191        The id of the compound in the molecularData table.
    -192                
    -193    """
    -194
    -195    id: int
    -196    cas: str
    -197    inchikey: str
    -198    inchi: str
    -199    chebi: str
    -200    smiles: str
    -201    kegg: str
    -202    data_id: int
    -203    iupac_name: str
    -204    traditional_name: str
    -205    common_name: str
    +            
    180@dataclass
    +181class MetaboliteMetadata:
    +182    """Dataclass for the Metabolite Metadata
    +183
    +184    Attributes
    +185    -----------
    +186    id : int
    +187        The id of the compound.
    +188    cas : str
    +189        The CAS number of the compound.
    +190    inchikey : str
    +191        The InChiKey of the compound.
    +192    inchi : str
    +193        The InChi of the compound.
    +194    chebi : str
    +195        The ChEBI ID of the compound.
    +196    smiles : str
    +197        The SMILES of the compound.
    +198    kegg : str
    +199        The KEGG ID of the compound.
    +200    iupac_name : str
    +201        The IUPAC name of the compound.
    +202    traditional_name : str
    +203        The traditional name of the compound.
    +204    common_name : str
    +205        The common name of the compound.
    +206    data_id : int
    +207        The id of the compound in the molecularData table.
    +208
    +209    """
    +210
    +211    id: int
    +212    cas: str
    +213    inchikey: str
    +214    inchi: str
    +215    chebi: str
    +216    smiles: str
    +217    kegg: str
    +218    data_id: int
    +219    iupac_name: str
    +220    traditional_name: str
    +221    common_name: str
     
    @@ -1872,102 +1919,100 @@
    Attributes
    -
    207@dataclass
    -208class LowResCompoundRef:
    -209    """ Dataclass for the Low Resolution Compound Reference
    -210    
    -211    This class is used to store the molecular and spectral data of the compounds in the low res EI database
    -212
    -213    Parameters
    -214    -----------
    -215    compounds_dict : dict
    -216        A dictionary representing the compound.
    -217    
    -218    Attributes
    -219    -----------
    -220    id : int
    -221        The id of the compound.
    -222    name : str
    -223        The name of the compound.
    -224    ri : str
    -225        The retention index of the compound.
    -226    retention_time : str
    -227        The retention time of the compound.
    -228    casno : str
    -229        The CAS number of the compound.
    -230    comment : str
    -231        The comment of the compound.
    -232    peaks_count : int
    -233        The number of peaks in the spectra.
    -234    classify : str
    -235        The classification of the compound.
    -236    derivativenum : str
    -237        The derivative number of the compound.
    -238    derivatization : str
    -239        The derivatization applied to the compound.
    -240    mz : numpy.ndarray
    -241        The m/z values of the spectra.
    -242    abundance : numpy.ndarray
    -243        The abundance values of the spectra.
    -244    source_temp_c : float
    -245        The source temperature of the spectra.
    -246    ev : float
    -247        The electron volts of the spectra.
    -248    formula : str
    -249        The formula of the compound.
    -250    source : str
    -251        The source of the spectra data.
    -252    classify : str
    -253        The classification of the compound.
    -254    metadata : MetaboliteMetadata
    -255        The metadata object.
    -256    similarity_score : float
    -257        The similarity score of the compound.
    -258    ri_score : float
    -259        The RI score of the compound.
    -260    spectral_similarity_score : float
    -261        The spectral similarity score of the compound.
    -262    spectral_similarity_scores : dict
    -263        The spectral similarity scores of the compound.
    -264   
    -265    """
    -266    #this class is use to store the results inside the GCPeak class
    -267    def __init__(self, compounds_dict):
    -268
    -269        self.id = compounds_dict.get("id")
    -270        self.name = compounds_dict.get("name")
    -271        self.ri = compounds_dict.get("ri")
    -272        self.retention_time = compounds_dict.get("rt")
    -273        self.casno = compounds_dict.get("casno")
    -274        self.comment = compounds_dict.get("comment")
    -275        self.peaks_count = compounds_dict.get("peaks_count")
    -276
    -277        self.classify = compounds_dict.get('classify')
    -278        self.derivativenum = compounds_dict.get('derivativenum')
    -279        self.derivatization = compounds_dict.get('derivatization')
    -280
    -281        self.mz = compounds_dict.get('mz')
    -282        self.abundance = compounds_dict.get("abundance")
    +            
    224@dataclass
    +225class LowResCompoundRef:
    +226    """Dataclass for the Low Resolution Compound Reference
    +227
    +228    This class is used to store the molecular and spectral data of the compounds in the low res EI database
    +229
    +230    Parameters
    +231    -----------
    +232    compounds_dict : dict
    +233        A dictionary representing the compound.
    +234
    +235    Attributes
    +236    -----------
    +237    id : int
    +238        The id of the compound.
    +239    name : str
    +240        The name of the compound.
    +241    ri : str
    +242        The retention index of the compound.
    +243    retention_time : str
    +244        The retention time of the compound.
    +245    casno : str
    +246        The CAS number of the compound.
    +247    comment : str
    +248        The comment of the compound.
    +249    peaks_count : int
    +250        The number of peaks in the spectra.
    +251    classify : str
    +252        The classification of the compound.
    +253    derivativenum : str
    +254        The derivative number of the compound.
    +255    derivatization : str
    +256        The derivatization applied to the compound.
    +257    mz : numpy.ndarray
    +258        The m/z values of the spectra.
    +259    abundance : numpy.ndarray
    +260        The abundance values of the spectra.
    +261    source_temp_c : float
    +262        The source temperature of the spectra.
    +263    ev : float
    +264        The electron volts of the spectra.
    +265    formula : str
    +266        The formula of the compound.
    +267    source : str
    +268        The source of the spectra data.
    +269    classify : str
    +270        The classification of the compound.
    +271    metadata : MetaboliteMetadata
    +272        The metadata object.
    +273    similarity_score : float
    +274        The similarity score of the compound.
    +275    ri_score : float
    +276        The RI score of the compound.
    +277    spectral_similarity_score : float
    +278        The spectral similarity score of the compound.
    +279    spectral_similarity_scores : dict
    +280        The spectral similarity scores of the compound.
    +281
    +282    """
     283
    -284        self.source_temp_c = compounds_dict.get("source_temp_c")
    -285        self.ev = compounds_dict.get("ev")
    -286        self.formula = compounds_dict.get("formula")
    -287        self.source = compounds_dict.get("source")
    -288
    -289        self.classify = compounds_dict.get("classify")
    -290
    -291        if compounds_dict.get("metadata"):
    -292            
    -293            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
    -294
    -295        else:
    -296
    -297            self.metadata = None
    -298
    -299        self.similarity_score = None
    -300        self.ri_score = None
    -301        self.spectral_similarity_score = None
    -302        self.spectral_similarity_scores = {}
    +284    # this class is use to store the results inside the GCPeak class
    +285    def __init__(self, compounds_dict):
    +286        self.id = compounds_dict.get("id")
    +287        self.name = compounds_dict.get("name")
    +288        self.ri = compounds_dict.get("ri")
    +289        self.retention_time = compounds_dict.get("rt")
    +290        self.casno = compounds_dict.get("casno")
    +291        self.comment = compounds_dict.get("comment")
    +292        self.peaks_count = compounds_dict.get("peaks_count")
    +293
    +294        self.classify = compounds_dict.get("classify")
    +295        self.derivativenum = compounds_dict.get("derivativenum")
    +296        self.derivatization = compounds_dict.get("derivatization")
    +297
    +298        self.mz = compounds_dict.get("mz")
    +299        self.abundance = compounds_dict.get("abundance")
    +300
    +301        self.source_temp_c = compounds_dict.get("source_temp_c")
    +302        self.ev = compounds_dict.get("ev")
    +303        self.formula = compounds_dict.get("formula")
    +304        self.source = compounds_dict.get("source")
    +305
    +306        self.classify = compounds_dict.get("classify")
    +307
    +308        if compounds_dict.get("metadata"):
    +309            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
    +310
    +311        else:
    +312            self.metadata = None
    +313
    +314        self.similarity_score = None
    +315        self.ri_score = None
    +316        self.spectral_similarity_score = None
    +317        self.spectral_similarity_scores = {}
     
    @@ -2043,42 +2088,39 @@
    Attributes
    -
    267    def __init__(self, compounds_dict):
    -268
    -269        self.id = compounds_dict.get("id")
    -270        self.name = compounds_dict.get("name")
    -271        self.ri = compounds_dict.get("ri")
    -272        self.retention_time = compounds_dict.get("rt")
    -273        self.casno = compounds_dict.get("casno")
    -274        self.comment = compounds_dict.get("comment")
    -275        self.peaks_count = compounds_dict.get("peaks_count")
    -276
    -277        self.classify = compounds_dict.get('classify')
    -278        self.derivativenum = compounds_dict.get('derivativenum')
    -279        self.derivatization = compounds_dict.get('derivatization')
    -280
    -281        self.mz = compounds_dict.get('mz')
    -282        self.abundance = compounds_dict.get("abundance")
    -283
    -284        self.source_temp_c = compounds_dict.get("source_temp_c")
    -285        self.ev = compounds_dict.get("ev")
    -286        self.formula = compounds_dict.get("formula")
    -287        self.source = compounds_dict.get("source")
    -288
    -289        self.classify = compounds_dict.get("classify")
    -290
    -291        if compounds_dict.get("metadata"):
    -292            
    -293            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
    -294
    -295        else:
    -296
    -297            self.metadata = None
    -298
    -299        self.similarity_score = None
    -300        self.ri_score = None
    -301        self.spectral_similarity_score = None
    -302        self.spectral_similarity_scores = {}
    +            
    285    def __init__(self, compounds_dict):
    +286        self.id = compounds_dict.get("id")
    +287        self.name = compounds_dict.get("name")
    +288        self.ri = compounds_dict.get("ri")
    +289        self.retention_time = compounds_dict.get("rt")
    +290        self.casno = compounds_dict.get("casno")
    +291        self.comment = compounds_dict.get("comment")
    +292        self.peaks_count = compounds_dict.get("peaks_count")
    +293
    +294        self.classify = compounds_dict.get("classify")
    +295        self.derivativenum = compounds_dict.get("derivativenum")
    +296        self.derivatization = compounds_dict.get("derivatization")
    +297
    +298        self.mz = compounds_dict.get("mz")
    +299        self.abundance = compounds_dict.get("abundance")
    +300
    +301        self.source_temp_c = compounds_dict.get("source_temp_c")
    +302        self.ev = compounds_dict.get("ev")
    +303        self.formula = compounds_dict.get("formula")
    +304        self.source = compounds_dict.get("source")
    +305
    +306        self.classify = compounds_dict.get("classify")
    +307
    +308        if compounds_dict.get("metadata"):
    +309            self.metadata = MetaboliteMetadata(**compounds_dict.get("metadata"))
    +310
    +311        else:
    +312            self.metadata = None
    +313
    +314        self.similarity_score = None
    +315        self.ri_score = None
    +316        self.spectral_similarity_score = None
    +317        self.spectral_similarity_scores = {}
     
    @@ -2317,305 +2359,325 @@
    Attributes
    -
    304class EI_LowRes_SQLite:
    -305    """
    -306    A class for interacting with a SQLite database for low-resolution EI compounds.
    -307
    -308    Parameters
    -309    -----------
    -310    url : str, optional
    -311        The URL of the SQLite database. Default is 'sqlite://'.
    -312
    -313    Attributes
    -314    -----------
    -315    engine : sqlalchemy.engine.Engine
    -316        The SQLAlchemy engine for connecting to the database.
    -317    session : sqlalchemy.orm.Session
    -318        The SQLAlchemy session for executing database operations.
    -319
    -320    Methods
    -321    --------
    -322    * __init__(self, url='sqlite://').
    -323        Initializes the EI_LowRes_SQLite object.
    -324    * __exit__(self, exc_type, exc_val, exc_tb).
    -325        Closes the database connection.
    -326    * init_engine(self, url).
    -327        Initializes the SQLAlchemy engine.
    -328    * __enter__(self).
    -329        Returns the EI_LowRes_SQLite object.
    -330    * add_compound_list(self, data_dict_list).
    -331        Adds a list of compounds to the database.
    -332    * add_compound(self, data_dict).
    -333        Adds a single compound to the database.
    -334    * commit(self).
    -335        Commits the changes to the database.
    -336    * row_to_dict(self, row).
    -337        Converts a database row to a dictionary.
    -338    * get_all(self).
    -339        Retrieves all compounds from the database.
    -340    * query_min_max_rt(self, min_max_rt).
    -341        Queries compounds based on retention time range.
    -342    * query_min_max_ri(self, min_max_ri).
    -343        Queries compounds based on RI range.
    -344    * query_names_and_rt(self, min_max_rt, compound_names).
    -345        Queries compounds based on compound names and retention time range.
    -346    * query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).
    -347        Queries compounds based on RI range and retention time range.
    -348    * delete_compound(self, compound).
    -349        Deletes a compound from the database.
    -350    * purge(self).
    -351        Deletes all compounds from the database table.
    -352    * clear_data(self).
    -353        Clears all tables in the database.
    -354    """
    -355
    -356    def __init__(self, url='sqlite://'):
    -357        
    -358        self.engine = self.init_engine(url)
    -359
    -360        Base.metadata.create_all(self.engine)
    -361
    -362        Session = sessionmaker(bind=self.engine)
    -363
    -364        self.session = Session()
    -365
    -366    def __exit__(self, exc_type, exc_val, exc_tb):
    -367        """ Closes the database connection.
    -368        """
    -369        self.commit()
    -370        self.session.close()
    -371        self.engine.dispose()
    -372
    -373    def init_engine(self, url):
    -374        """ Initializes the SQLAlchemy engine.
    -375
    -376        Parameters
    -377        -----------
    -378        url : str
    -379            The URL of the SQLite database.
    +            
    320class EI_LowRes_SQLite:
    +321    """
    +322    A class for interacting with a SQLite database for low-resolution EI compounds.
    +323
    +324    Parameters
    +325    -----------
    +326    url : str, optional
    +327        The URL of the SQLite database. Default is 'sqlite://'.
    +328
    +329    Attributes
    +330    -----------
    +331    engine : sqlalchemy.engine.Engine
    +332        The SQLAlchemy engine for connecting to the database.
    +333    session : sqlalchemy.orm.Session
    +334        The SQLAlchemy session for executing database operations.
    +335
    +336    Methods
    +337    --------
    +338    * __init__(self, url='sqlite://').
    +339        Initializes the EI_LowRes_SQLite object.
    +340    * __exit__(self, exc_type, exc_val, exc_tb).
    +341        Closes the database connection.
    +342    * init_engine(self, url).
    +343        Initializes the SQLAlchemy engine.
    +344    * __enter__(self).
    +345        Returns the EI_LowRes_SQLite object.
    +346    * add_compound_list(self, data_dict_list).
    +347        Adds a list of compounds to the database.
    +348    * add_compound(self, data_dict).
    +349        Adds a single compound to the database.
    +350    * commit(self).
    +351        Commits the changes to the database.
    +352    * row_to_dict(self, row).
    +353        Converts a database row to a dictionary.
    +354    * get_all(self).
    +355        Retrieves all compounds from the database.
    +356    * query_min_max_rt(self, min_max_rt).
    +357        Queries compounds based on retention time range.
    +358    * query_min_max_ri(self, min_max_ri).
    +359        Queries compounds based on RI range.
    +360    * query_names_and_rt(self, min_max_rt, compound_names).
    +361        Queries compounds based on compound names and retention time range.
    +362    * query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).
    +363        Queries compounds based on RI range and retention time range.
    +364    * delete_compound(self, compound).
    +365        Deletes a compound from the database.
    +366    * purge(self).
    +367        Deletes all compounds from the database table.
    +368    * clear_data(self).
    +369        Clears all tables in the database.
    +370    """
    +371
    +372    def __init__(self, url="sqlite://"):
    +373        self.engine = self.init_engine(url)
    +374
    +375        Base.metadata.create_all(self.engine)
    +376
    +377        Session = sessionmaker(bind=self.engine)
    +378
    +379        self.session = Session()
     380
    -381        Returns
    -382        --------
    -383        sqlalchemy.engine.Engine
    -384            The SQLAlchemy engine for connecting to the database.
    -385        """
    -386        directory = os.getcwd()
    -387        if not url:
    -388            if not os.path.isdir(directory + '/db'):
    -389                os.mkdir(directory + '/db')
    -390            url = 'sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite'.format(DB=directory)
    -391        return create_engine(url, poolclass=QueuePool)
    -392
    -393    def __enter__(self):
    -394        """ Returns the EI_LowRes_SQLite object.
    -395        """
    -396        return self
    -397
    -398    def add_compound_list(self, data_dict_list):
    -399        """ Adds a list of compounds to the database.
    -400
    -401        Parameters
    -402        -----------
    -403        data_dict_list : list of dict
    -404            A list of dictionaries representing the compounds.
    -405        """
    -406        for data_dict in data_dict_list:
    -407            # print(data_dict.get('NUM PEAKS'))
    -408            if not data_dict.get('NUM PEAKS'):
    -409                data_dict['NUM PEAKS'] = len(data_dict.get('mz'))
    -410            if not data_dict.get('CASNO'):
    -411                data_dict['CASNO'] = data_dict.get('CAS')
    -412        
    -413        self.session.add_all([LowResolutionEICompound(**data_dict) for data_dict in data_dict_list] )
    -414
    -415    def add_compound(self, data_dict):
    -416        """ Adds a single compound to the database. 
    -417        
    -418        Parameters
    -419        -----------
    -420        data_dict : dict
    -421            A dictionary representing the compound.
    -422        
    -423        """
    -424        one_compound = LowResolutionEICompound(**data_dict)
    -425        self.session.add(one_compound)
    -426        self.commit()
    +381    def __exit__(self, exc_type, exc_val, exc_tb):
    +382        """Closes the database connection."""
    +383        self.commit()
    +384        self.session.close()
    +385        self.engine.dispose()
    +386
    +387    def init_engine(self, url):
    +388        """Initializes the SQLAlchemy engine.
    +389
    +390        Parameters
    +391        -----------
    +392        url : str
    +393            The URL of the SQLite database.
    +394
    +395        Returns
    +396        --------
    +397        sqlalchemy.engine.Engine
    +398            The SQLAlchemy engine for connecting to the database.
    +399        """
    +400        directory = os.getcwd()
    +401        if not url:
    +402            if not os.path.isdir(directory + "/db"):
    +403                os.mkdir(directory + "/db")
    +404            url = "sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite".format(
    +405                DB=directory
    +406            )
    +407        return create_engine(url, poolclass=QueuePool)
    +408
    +409    def __enter__(self):
    +410        """Returns the EI_LowRes_SQLite object."""
    +411        return self
    +412
    +413    def add_compound_list(self, data_dict_list):
    +414        """Adds a list of compounds to the database.
    +415
    +416        Parameters
    +417        -----------
    +418        data_dict_list : list of dict
    +419            A list of dictionaries representing the compounds.
    +420        """
    +421        for data_dict in data_dict_list:
    +422            # print(data_dict.get('NUM PEAKS'))
    +423            if not data_dict.get("NUM PEAKS"):
    +424                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
    +425            if not data_dict.get("CASNO"):
    +426                data_dict["CASNO"] = data_dict.get("CAS")
     427
    -428    def commit(self):
    -429        """ Commits the changes to the database.
    -430        """
    -431        try:
    -432            self.session.commit()
    -433        except SQLAlchemyError as e:
    -434            self.session.rollback()
    -435            print(str(e))
    -436
    -437    def row_to_dict(self, row):
    -438        """ Converts a database row to a dictionary.
    -439        
    -440        Parameters
    -441        -----------
    -442        row : sqlalchemy.engine.row.Row
    -443            A row from the database.
    -444        
    -445        Returns
    -446        --------
    -447        dict
    -448            A dictionary representing the compound.
    -449        """
    -450        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}        
    -451
    -452        data_dict['mz'] = frombuffer(data_dict.get('mz'), dtype="int32")
    -453        data_dict['abundance'] = frombuffer(data_dict.get('abundance'), dtype="int32")
    -454
    -455        if row.metadatar:
    -456            data_dict['metadata'] = {c.name: getattr(row.metadatar, c.name) for c in row.metadatar.__table__.columns}
    -457
    -458        else:
    -459            data_dict['metadata'] = None
    +428        self.session.add_all(
    +429            [LowResolutionEICompound(**data_dict) for data_dict in data_dict_list]
    +430        )
    +431
    +432    def add_compound(self, data_dict):
    +433        """Adds a single compound to the database.
    +434
    +435        Parameters
    +436        -----------
    +437        data_dict : dict
    +438            A dictionary representing the compound.
    +439
    +440        """
    +441        one_compound = LowResolutionEICompound(**data_dict)
    +442        self.session.add(one_compound)
    +443        self.commit()
    +444
    +445    def commit(self):
    +446        """Commits the changes to the database."""
    +447        try:
    +448            self.session.commit()
    +449        except SQLAlchemyError as e:
    +450            self.session.rollback()
    +451            print(str(e))
    +452
    +453    def row_to_dict(self, row):
    +454        """Converts a database row to a dictionary.
    +455
    +456        Parameters
    +457        -----------
    +458        row : sqlalchemy.engine.row.Row
    +459            A row from the database.
     460
    -461        return data_dict
    -462
    -463    def get_all(self,):
    -464        """ Retrieves all compounds from the database.
    -465        
    -466        Returns
    -467        --------
    -468        list
    -469            A list of dictionaries representing the compounds.
    -470        """
    -471        compounds = self.session.query(LowResolutionEICompound).all()
    -472
    -473        return [self.row_to_dict(compound) for compound in compounds]
    -474
    -475    def query_min_max_rt(self, min_max_rt, ):
    -476        """ Queries compounds based on retention time range.
    -477        
    -478        Parameters
    -479        -----------
    -480        min_max_rt : tuple
    -481            A tuple containing the minimum and maximum retention time values.
    -482        
    -483        Returns
    -484        --------
    -485        list
    -486            A list of dictionaries representing the compounds.
    -487        """
    -488        min_rt, max_rt = min_max_rt
    -489
    -490        compounds = self.session.query(LowResolutionEICompound).filter(LowResolutionEICompound.retention_time.between(min_rt, max_rt))    
    -491
    -492        return [self.row_to_dict(compound) for compound in compounds]
    +461        Returns
    +462        --------
    +463        dict
    +464            A dictionary representing the compound.
    +465        """
    +466        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}
    +467
    +468        data_dict["mz"] = frombuffer(data_dict.get("mz"), dtype="int32")
    +469        data_dict["abundance"] = frombuffer(data_dict.get("abundance"), dtype="int32")
    +470
    +471        if row.metadatar:
    +472            data_dict["metadata"] = {
    +473                c.name: getattr(row.metadatar, c.name)
    +474                for c in row.metadatar.__table__.columns
    +475            }
    +476
    +477        else:
    +478            data_dict["metadata"] = None
    +479
    +480        return data_dict
    +481
    +482    def get_all(
    +483        self,
    +484    ):
    +485        """Retrieves all compounds from the database.
    +486
    +487        Returns
    +488        --------
    +489        list
    +490            A list of dictionaries representing the compounds.
    +491        """
    +492        compounds = self.session.query(LowResolutionEICompound).all()
     493
    -494    def query_min_max_ri(self, min_max_ri):
    -495        """ Queries compounds based on RI range.
    -496        
    -497        Parameters
    -498        -----------
    -499        min_max_ri : tuple
    -500            A tuple containing the minimum and maximum RI values.
    -501        """
    -502        min_ri, max_ri = min_max_ri
    -503
    -504        compounds = self.session.query(LowResolutionEICompound).filter(LowResolutionEICompound.ri.between(min_ri, max_ri)).all()
    -505
    -506        return [self.row_to_dict(compound) for compound in compounds]
    -507
    -508    def query_names_and_rt(self, min_max_rt, compound_names):
    -509        """ Queries compounds based on compound names and retention time range.
    -510        
    -511        Parameters
    -512        -----------
    -513        min_max_rt : tuple
    -514            A tuple containing the minimum and maximum retention time values.
    -515        compound_names : list
    -516            A list of compound names.
    -517        
    -518        Returns
    -519        --------
    -520        list
    -521            A list of dictionaries representing the compounds.
    -522        
    -523        """
    -524        min_rt, max_rt = min_max_rt
    -525
    -526        compounds = self.session.query(LowResolutionEICompound).filter(LowResolutionEICompound.name.in_(compound_names)).filter(
    -527                                        LowResolutionEICompound.retention_time >= min_rt,
    -528                                        LowResolutionEICompound.retention_time <= max_rt,
    -529                                        )
    -530        
    -531        #self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))    
    -532        # x = [self.row_to_dict(compound) for compound in compounds]
    -533        
    -534        return [self.row_to_dict(compound) for compound in compounds]
    +494        return [self.row_to_dict(compound) for compound in compounds]
    +495
    +496    def query_min_max_rt(
    +497        self,
    +498        min_max_rt,
    +499    ):
    +500        """Queries compounds based on retention time range.
    +501
    +502        Parameters
    +503        -----------
    +504        min_max_rt : tuple
    +505            A tuple containing the minimum and maximum retention time values.
    +506
    +507        Returns
    +508        --------
    +509        list
    +510            A list of dictionaries representing the compounds.
    +511        """
    +512        min_rt, max_rt = min_max_rt
    +513
    +514        compounds = self.session.query(LowResolutionEICompound).filter(
    +515            LowResolutionEICompound.retention_time.between(min_rt, max_rt)
    +516        )
    +517
    +518        return [self.row_to_dict(compound) for compound in compounds]
    +519
    +520    def query_min_max_ri(self, min_max_ri):
    +521        """Queries compounds based on RI range.
    +522
    +523        Parameters
    +524        -----------
    +525        min_max_ri : tuple
    +526            A tuple containing the minimum and maximum RI values.
    +527        """
    +528        min_ri, max_ri = min_max_ri
    +529
    +530        compounds = (
    +531            self.session.query(LowResolutionEICompound)
    +532            .filter(LowResolutionEICompound.ri.between(min_ri, max_ri))
    +533            .all()
    +534        )
     535
    -536    def query_min_max_ri_and_rt(self, min_max_ri, min_max_rt, ):
    -537        """ Queries compounds based on RI range and retention time range.
    -538        
    -539        Parameters
    -540        -----------
    -541        min_max_ri : tuple
    -542            A tuple containing the minimum and maximum RI values.
    +536        return [self.row_to_dict(compound) for compound in compounds]
    +537
    +538    def query_names_and_rt(self, min_max_rt, compound_names):
    +539        """Queries compounds based on compound names and retention time range.
    +540
    +541        Parameters
    +542        -----------
     543        min_max_rt : tuple
     544            A tuple containing the minimum and maximum retention time values.
    -545        
    -546        Returns
    -547        --------
    -548        list
    -549            A list of dictionaries representing the compounds.
    -550            
    -551        """
    -552        min_ri, max_ri = min_max_ri
    -553        
    +545        compound_names : list
    +546            A list of compound names.
    +547
    +548        Returns
    +549        --------
    +550        list
    +551            A list of dictionaries representing the compounds.
    +552
    +553        """
     554        min_rt, max_rt = min_max_rt
     555
    -556        compounds = self.session.query(LowResolutionEICompound).filter(
    -557            LowResolutionEICompound.ri <= max_ri,
    -558            LowResolutionEICompound.ri >= min_ri,
    -559            LowResolutionEICompound.ri >= min_rt,
    -560            LowResolutionEICompound.ri >= max_rt,
    -561            )
    -562        
    -563        
    -564        #self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))    
    -565        
    -566        return [self.row_to_dict(compound) for compound in compounds]
    +556        compounds = (
    +557            self.session.query(LowResolutionEICompound)
    +558            .filter(LowResolutionEICompound.name.in_(compound_names))
    +559            .filter(
    +560                LowResolutionEICompound.retention_time >= min_rt,
    +561                LowResolutionEICompound.retention_time <= max_rt,
    +562            )
    +563        )
    +564
    +565        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
    +566        # x = [self.row_to_dict(compound) for compound in compounds]
     567
    -568    def delete_compound(self, compound):
    -569        """ Deletes a compound from the database.
    -570        
    -571        Parameters
    -572        -----------
    -573        compound : LowResolutionEICompound
    -574            A compound object.
    -575        
    -576        """
    -577        try:
    -578            self.session.delete(compound)  
    -579            self.session.commit()  
    -580        
    -581        except SQLAlchemyError as e:
    -582            self.session.rollback()
    -583            print(str(e))
    -584
    -585    def purge(self):
    -586        """ Deletes all compounds from the database table.
    -587        
    -588        Notes
    -589        ------
    -590        Careful, this will delete the entire database table.
    -591        """
    -592        self.session.query(LowResolutionEICompound).delete()
    -593        self.session.commit()  
    -594
    -595    def clear_data(self):
    -596        """ Clears all tables in the database.
    -597        """
    -598        meta = Base.metadata
    -599        for table in reversed(meta.sorted_tables):
    -600            print ('Clear table %s' % table)
    -601            self.session.execute(table.delete())
    -602        self.session.commit()
    +568        return [self.row_to_dict(compound) for compound in compounds]
    +569
    +570    def query_min_max_ri_and_rt(
    +571        self,
    +572        min_max_ri,
    +573        min_max_rt,
    +574    ):
    +575        """Queries compounds based on RI range and retention time range.
    +576
    +577        Parameters
    +578        -----------
    +579        min_max_ri : tuple
    +580            A tuple containing the minimum and maximum RI values.
    +581        min_max_rt : tuple
    +582            A tuple containing the minimum and maximum retention time values.
    +583
    +584        Returns
    +585        --------
    +586        list
    +587            A list of dictionaries representing the compounds.
    +588
    +589        """
    +590        min_ri, max_ri = min_max_ri
    +591
    +592        min_rt, max_rt = min_max_rt
    +593
    +594        compounds = self.session.query(LowResolutionEICompound).filter(
    +595            LowResolutionEICompound.ri <= max_ri,
    +596            LowResolutionEICompound.ri >= min_ri,
    +597            LowResolutionEICompound.ri >= min_rt,
    +598            LowResolutionEICompound.ri >= max_rt,
    +599        )
    +600
    +601        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
    +602
    +603        return [self.row_to_dict(compound) for compound in compounds]
    +604
    +605    def delete_compound(self, compound):
    +606        """Deletes a compound from the database.
    +607
    +608        Parameters
    +609        -----------
    +610        compound : LowResolutionEICompound
    +611            A compound object.
    +612
    +613        """
    +614        try:
    +615            self.session.delete(compound)
    +616            self.session.commit()
    +617
    +618        except SQLAlchemyError as e:
    +619            self.session.rollback()
    +620            print(str(e))
    +621
    +622    def purge(self):
    +623        """Deletes all compounds from the database table.
    +624
    +625        Notes
    +626        ------
    +627        Careful, this will delete the entire database table.
    +628        """
    +629        self.session.query(LowResolutionEICompound).delete()
    +630        self.session.commit()
    +631
    +632    def clear_data(self):
    +633        """Clears all tables in the database."""
    +634        meta = Base.metadata
    +635        for table in reversed(meta.sorted_tables):
    +636            print("Clear table %s" % table)
    +637            self.session.execute(table.delete())
    +638        self.session.commit()
     
    @@ -2686,15 +2748,14 @@
    Methods
    -
    356    def __init__(self, url='sqlite://'):
    -357        
    -358        self.engine = self.init_engine(url)
    -359
    -360        Base.metadata.create_all(self.engine)
    -361
    -362        Session = sessionmaker(bind=self.engine)
    -363
    -364        self.session = Session()
    +            
    372    def __init__(self, url="sqlite://"):
    +373        self.engine = self.init_engine(url)
    +374
    +375        Base.metadata.create_all(self.engine)
    +376
    +377        Session = sessionmaker(bind=self.engine)
    +378
    +379        self.session = Session()
     
    @@ -2734,25 +2795,27 @@
    Methods
    -
    373    def init_engine(self, url):
    -374        """ Initializes the SQLAlchemy engine.
    -375
    -376        Parameters
    -377        -----------
    -378        url : str
    -379            The URL of the SQLite database.
    -380
    -381        Returns
    -382        --------
    -383        sqlalchemy.engine.Engine
    -384            The SQLAlchemy engine for connecting to the database.
    -385        """
    -386        directory = os.getcwd()
    -387        if not url:
    -388            if not os.path.isdir(directory + '/db'):
    -389                os.mkdir(directory + '/db')
    -390            url = 'sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite'.format(DB=directory)
    -391        return create_engine(url, poolclass=QueuePool)
    +            
    387    def init_engine(self, url):
    +388        """Initializes the SQLAlchemy engine.
    +389
    +390        Parameters
    +391        -----------
    +392        url : str
    +393            The URL of the SQLite database.
    +394
    +395        Returns
    +396        --------
    +397        sqlalchemy.engine.Engine
    +398            The SQLAlchemy engine for connecting to the database.
    +399        """
    +400        directory = os.getcwd()
    +401        if not url:
    +402            if not os.path.isdir(directory + "/db"):
    +403                os.mkdir(directory + "/db")
    +404            url = "sqlite:///{DB}/db/pnnl_lowres_gcms_compounds.sqlite".format(
    +405                DB=directory
    +406            )
    +407        return create_engine(url, poolclass=QueuePool)
     
    @@ -2785,22 +2848,24 @@
    Returns
    -
    398    def add_compound_list(self, data_dict_list):
    -399        """ Adds a list of compounds to the database.
    -400
    -401        Parameters
    -402        -----------
    -403        data_dict_list : list of dict
    -404            A list of dictionaries representing the compounds.
    -405        """
    -406        for data_dict in data_dict_list:
    -407            # print(data_dict.get('NUM PEAKS'))
    -408            if not data_dict.get('NUM PEAKS'):
    -409                data_dict['NUM PEAKS'] = len(data_dict.get('mz'))
    -410            if not data_dict.get('CASNO'):
    -411                data_dict['CASNO'] = data_dict.get('CAS')
    -412        
    -413        self.session.add_all([LowResolutionEICompound(**data_dict) for data_dict in data_dict_list] )
    +            
    413    def add_compound_list(self, data_dict_list):
    +414        """Adds a list of compounds to the database.
    +415
    +416        Parameters
    +417        -----------
    +418        data_dict_list : list of dict
    +419            A list of dictionaries representing the compounds.
    +420        """
    +421        for data_dict in data_dict_list:
    +422            # print(data_dict.get('NUM PEAKS'))
    +423            if not data_dict.get("NUM PEAKS"):
    +424                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
    +425            if not data_dict.get("CASNO"):
    +426                data_dict["CASNO"] = data_dict.get("CAS")
    +427
    +428        self.session.add_all(
    +429            [LowResolutionEICompound(**data_dict) for data_dict in data_dict_list]
    +430        )
     
    @@ -2827,22 +2892,22 @@
    Parameters
    -
    415    def add_compound(self, data_dict):
    -416        """ Adds a single compound to the database. 
    -417        
    -418        Parameters
    -419        -----------
    -420        data_dict : dict
    -421            A dictionary representing the compound.
    -422        
    -423        """
    -424        one_compound = LowResolutionEICompound(**data_dict)
    -425        self.session.add(one_compound)
    -426        self.commit()
    +            
    432    def add_compound(self, data_dict):
    +433        """Adds a single compound to the database.
    +434
    +435        Parameters
    +436        -----------
    +437        data_dict : dict
    +438            A dictionary representing the compound.
    +439
    +440        """
    +441        one_compound = LowResolutionEICompound(**data_dict)
    +442        self.session.add(one_compound)
    +443        self.commit()
     
    -

    Adds a single compound to the database.

    +

    Adds a single compound to the database.

    Parameters
    @@ -2865,14 +2930,13 @@
    Parameters
    -
    428    def commit(self):
    -429        """ Commits the changes to the database.
    -430        """
    -431        try:
    -432            self.session.commit()
    -433        except SQLAlchemyError as e:
    -434            self.session.rollback()
    -435            print(str(e))
    +            
    445    def commit(self):
    +446        """Commits the changes to the database."""
    +447        try:
    +448            self.session.commit()
    +449        except SQLAlchemyError as e:
    +450            self.session.rollback()
    +451            print(str(e))
     
    @@ -2892,31 +2956,34 @@
    Parameters
    -
    437    def row_to_dict(self, row):
    -438        """ Converts a database row to a dictionary.
    -439        
    -440        Parameters
    -441        -----------
    -442        row : sqlalchemy.engine.row.Row
    -443            A row from the database.
    -444        
    -445        Returns
    -446        --------
    -447        dict
    -448            A dictionary representing the compound.
    -449        """
    -450        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}        
    -451
    -452        data_dict['mz'] = frombuffer(data_dict.get('mz'), dtype="int32")
    -453        data_dict['abundance'] = frombuffer(data_dict.get('abundance'), dtype="int32")
    -454
    -455        if row.metadatar:
    -456            data_dict['metadata'] = {c.name: getattr(row.metadatar, c.name) for c in row.metadatar.__table__.columns}
    -457
    -458        else:
    -459            data_dict['metadata'] = None
    +            
    453    def row_to_dict(self, row):
    +454        """Converts a database row to a dictionary.
    +455
    +456        Parameters
    +457        -----------
    +458        row : sqlalchemy.engine.row.Row
    +459            A row from the database.
     460
    -461        return data_dict
    +461        Returns
    +462        --------
    +463        dict
    +464            A dictionary representing the compound.
    +465        """
    +466        data_dict = {c.name: getattr(row, c.name) for c in row.__table__.columns}
    +467
    +468        data_dict["mz"] = frombuffer(data_dict.get("mz"), dtype="int32")
    +469        data_dict["abundance"] = frombuffer(data_dict.get("abundance"), dtype="int32")
    +470
    +471        if row.metadatar:
    +472            data_dict["metadata"] = {
    +473                c.name: getattr(row.metadatar, c.name)
    +474                for c in row.metadatar.__table__.columns
    +475            }
    +476
    +477        else:
    +478            data_dict["metadata"] = None
    +479
    +480        return data_dict
     
    @@ -2949,17 +3016,19 @@
    Returns
    -
    463    def get_all(self,):
    -464        """ Retrieves all compounds from the database.
    -465        
    -466        Returns
    -467        --------
    -468        list
    -469            A list of dictionaries representing the compounds.
    -470        """
    -471        compounds = self.session.query(LowResolutionEICompound).all()
    -472
    -473        return [self.row_to_dict(compound) for compound in compounds]
    +            
    482    def get_all(
    +483        self,
    +484    ):
    +485        """Retrieves all compounds from the database.
    +486
    +487        Returns
    +488        --------
    +489        list
    +490            A list of dictionaries representing the compounds.
    +491        """
    +492        compounds = self.session.query(LowResolutionEICompound).all()
    +493
    +494        return [self.row_to_dict(compound) for compound in compounds]
     
    @@ -2985,24 +3054,29 @@
    Returns
    -
    475    def query_min_max_rt(self, min_max_rt, ):
    -476        """ Queries compounds based on retention time range.
    -477        
    -478        Parameters
    -479        -----------
    -480        min_max_rt : tuple
    -481            A tuple containing the minimum and maximum retention time values.
    -482        
    -483        Returns
    -484        --------
    -485        list
    -486            A list of dictionaries representing the compounds.
    -487        """
    -488        min_rt, max_rt = min_max_rt
    -489
    -490        compounds = self.session.query(LowResolutionEICompound).filter(LowResolutionEICompound.retention_time.between(min_rt, max_rt))    
    -491
    -492        return [self.row_to_dict(compound) for compound in compounds]
    +            
    496    def query_min_max_rt(
    +497        self,
    +498        min_max_rt,
    +499    ):
    +500        """Queries compounds based on retention time range.
    +501
    +502        Parameters
    +503        -----------
    +504        min_max_rt : tuple
    +505            A tuple containing the minimum and maximum retention time values.
    +506
    +507        Returns
    +508        --------
    +509        list
    +510            A list of dictionaries representing the compounds.
    +511        """
    +512        min_rt, max_rt = min_max_rt
    +513
    +514        compounds = self.session.query(LowResolutionEICompound).filter(
    +515            LowResolutionEICompound.retention_time.between(min_rt, max_rt)
    +516        )
    +517
    +518        return [self.row_to_dict(compound) for compound in compounds]
     
    @@ -3035,19 +3109,23 @@
    Returns
    -
    494    def query_min_max_ri(self, min_max_ri):
    -495        """ Queries compounds based on RI range.
    -496        
    -497        Parameters
    -498        -----------
    -499        min_max_ri : tuple
    -500            A tuple containing the minimum and maximum RI values.
    -501        """
    -502        min_ri, max_ri = min_max_ri
    -503
    -504        compounds = self.session.query(LowResolutionEICompound).filter(LowResolutionEICompound.ri.between(min_ri, max_ri)).all()
    -505
    -506        return [self.row_to_dict(compound) for compound in compounds]
    +            
    520    def query_min_max_ri(self, min_max_ri):
    +521        """Queries compounds based on RI range.
    +522
    +523        Parameters
    +524        -----------
    +525        min_max_ri : tuple
    +526            A tuple containing the minimum and maximum RI values.
    +527        """
    +528        min_ri, max_ri = min_max_ri
    +529
    +530        compounds = (
    +531            self.session.query(LowResolutionEICompound)
    +532            .filter(LowResolutionEICompound.ri.between(min_ri, max_ri))
    +533            .all()
    +534        )
    +535
    +536        return [self.row_to_dict(compound) for compound in compounds]
     
    @@ -3074,33 +3152,37 @@
    Parameters
    -
    508    def query_names_and_rt(self, min_max_rt, compound_names):
    -509        """ Queries compounds based on compound names and retention time range.
    -510        
    -511        Parameters
    -512        -----------
    -513        min_max_rt : tuple
    -514            A tuple containing the minimum and maximum retention time values.
    -515        compound_names : list
    -516            A list of compound names.
    -517        
    -518        Returns
    -519        --------
    -520        list
    -521            A list of dictionaries representing the compounds.
    -522        
    -523        """
    -524        min_rt, max_rt = min_max_rt
    -525
    -526        compounds = self.session.query(LowResolutionEICompound).filter(LowResolutionEICompound.name.in_(compound_names)).filter(
    -527                                        LowResolutionEICompound.retention_time >= min_rt,
    -528                                        LowResolutionEICompound.retention_time <= max_rt,
    -529                                        )
    -530        
    -531        #self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))    
    -532        # x = [self.row_to_dict(compound) for compound in compounds]
    -533        
    -534        return [self.row_to_dict(compound) for compound in compounds]
    +            
    538    def query_names_and_rt(self, min_max_rt, compound_names):
    +539        """Queries compounds based on compound names and retention time range.
    +540
    +541        Parameters
    +542        -----------
    +543        min_max_rt : tuple
    +544            A tuple containing the minimum and maximum retention time values.
    +545        compound_names : list
    +546            A list of compound names.
    +547
    +548        Returns
    +549        --------
    +550        list
    +551            A list of dictionaries representing the compounds.
    +552
    +553        """
    +554        min_rt, max_rt = min_max_rt
    +555
    +556        compounds = (
    +557            self.session.query(LowResolutionEICompound)
    +558            .filter(LowResolutionEICompound.name.in_(compound_names))
    +559            .filter(
    +560                LowResolutionEICompound.retention_time >= min_rt,
    +561                LowResolutionEICompound.retention_time <= max_rt,
    +562            )
    +563        )
    +564
    +565        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
    +566        # x = [self.row_to_dict(compound) for compound in compounds]
    +567
    +568        return [self.row_to_dict(compound) for compound in compounds]
     
    @@ -3135,37 +3217,40 @@
    Returns
    -
    536    def query_min_max_ri_and_rt(self, min_max_ri, min_max_rt, ):
    -537        """ Queries compounds based on RI range and retention time range.
    -538        
    -539        Parameters
    -540        -----------
    -541        min_max_ri : tuple
    -542            A tuple containing the minimum and maximum RI values.
    -543        min_max_rt : tuple
    -544            A tuple containing the minimum and maximum retention time values.
    -545        
    -546        Returns
    -547        --------
    -548        list
    -549            A list of dictionaries representing the compounds.
    -550            
    -551        """
    -552        min_ri, max_ri = min_max_ri
    -553        
    -554        min_rt, max_rt = min_max_rt
    -555
    -556        compounds = self.session.query(LowResolutionEICompound).filter(
    -557            LowResolutionEICompound.ri <= max_ri,
    -558            LowResolutionEICompound.ri >= min_ri,
    -559            LowResolutionEICompound.ri >= min_rt,
    -560            LowResolutionEICompound.ri >= max_rt,
    -561            )
    -562        
    -563        
    -564        #self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))    
    -565        
    -566        return [self.row_to_dict(compound) for compound in compounds]
    +            
    570    def query_min_max_ri_and_rt(
    +571        self,
    +572        min_max_ri,
    +573        min_max_rt,
    +574    ):
    +575        """Queries compounds based on RI range and retention time range.
    +576
    +577        Parameters
    +578        -----------
    +579        min_max_ri : tuple
    +580            A tuple containing the minimum and maximum RI values.
    +581        min_max_rt : tuple
    +582            A tuple containing the minimum and maximum retention time values.
    +583
    +584        Returns
    +585        --------
    +586        list
    +587            A list of dictionaries representing the compounds.
    +588
    +589        """
    +590        min_ri, max_ri = min_max_ri
    +591
    +592        min_rt, max_rt = min_max_rt
    +593
    +594        compounds = self.session.query(LowResolutionEICompound).filter(
    +595            LowResolutionEICompound.ri <= max_ri,
    +596            LowResolutionEICompound.ri >= min_ri,
    +597            LowResolutionEICompound.ri >= min_rt,
    +598            LowResolutionEICompound.ri >= max_rt,
    +599        )
    +600
    +601        # self.session.query.select(LowResolutionEICompound).where(between(LowResolutionEICompound.ri, min_ri, max_ri))
    +602
    +603        return [self.row_to_dict(compound) for compound in compounds]
     
    @@ -3200,22 +3285,22 @@
    Returns
    -
    568    def delete_compound(self, compound):
    -569        """ Deletes a compound from the database.
    -570        
    -571        Parameters
    -572        -----------
    -573        compound : LowResolutionEICompound
    -574            A compound object.
    -575        
    -576        """
    -577        try:
    -578            self.session.delete(compound)  
    -579            self.session.commit()  
    -580        
    -581        except SQLAlchemyError as e:
    -582            self.session.rollback()
    -583            print(str(e))
    +            
    605    def delete_compound(self, compound):
    +606        """Deletes a compound from the database.
    +607
    +608        Parameters
    +609        -----------
    +610        compound : LowResolutionEICompound
    +611            A compound object.
    +612
    +613        """
    +614        try:
    +615            self.session.delete(compound)
    +616            self.session.commit()
    +617
    +618        except SQLAlchemyError as e:
    +619            self.session.rollback()
    +620            print(str(e))
     
    @@ -3242,15 +3327,15 @@
    Parameters
    -
    585    def purge(self):
    -586        """ Deletes all compounds from the database table.
    -587        
    -588        Notes
    -589        ------
    -590        Careful, this will delete the entire database table.
    -591        """
    -592        self.session.query(LowResolutionEICompound).delete()
    -593        self.session.commit()  
    +            
    622    def purge(self):
    +623        """Deletes all compounds from the database table.
    +624
    +625        Notes
    +626        ------
    +627        Careful, this will delete the entire database table.
    +628        """
    +629        self.session.query(LowResolutionEICompound).delete()
    +630        self.session.commit()
     
    @@ -3274,14 +3359,13 @@
    Notes
    -
    595    def clear_data(self):
    -596        """ Clears all tables in the database.
    -597        """
    -598        meta = Base.metadata
    -599        for table in reversed(meta.sorted_tables):
    -600            print ('Clear table %s' % table)
    -601            self.session.execute(table.delete())
    -602        self.session.commit()
    +            
    632    def clear_data(self):
    +633        """Clears all tables in the database."""
    +634        meta = Base.metadata
    +635        for table in reversed(meta.sorted_tables):
    +636            print("Clear table %s" % table)
    +637            self.session.execute(table.delete())
    +638        self.session.commit()
     
    diff --git a/docs/corems/molecular_id/factory/MolecularLookupTable.html b/docs/corems/molecular_id/factory/MolecularLookupTable.html index c6b76f65..570fd1be 100644 --- a/docs/corems/molecular_id/factory/MolecularLookupTable.html +++ b/docs/corems/molecular_id/factory/MolecularLookupTable.html @@ -120,59 +120,59 @@

      1__author__ = "Yuri E. Corilo"
       2__date__ = "Jul 02, 2019"
       3
    -  4from copy import deepcopy
    -  5import itertools
    -  6import multiprocessing
    -  7import json
    -  8import cProfile
    -  9import io
    +  4import contextlib
    +  5import cProfile
    +  6import io
    +  7import itertools
    +  8import json
    +  9import multiprocessing
      10import pstats
    - 11import contextlib
    + 11from copy import deepcopy
      12from typing import Dict
      13
    - 14from sqlalchemy.orm import scoped_session
    + 14from sqlalchemy import create_engine, func
      15from sqlalchemy.orm import sessionmaker
    - 16from sqlalchemy.orm import load_only
    - 17from sqlalchemy.ext.declarative import declarative_base
    - 18from sqlalchemy import create_engine, func
    - 19from tqdm import tqdm
    - 20
    + 16from tqdm import tqdm
    + 17
    + 18from corems import chunks, timeit
    + 19from corems.encapsulation.constant import Atoms
    + 20from corems.encapsulation.factory.parameters import MSParameters
      21from corems.encapsulation.factory.processingSetting import MolecularLookupDictSettings
    - 22from corems.encapsulation.constant import Atoms
    - 23from corems.molecular_id.factory.molecularSQL import CarbonHydrogen, HeteroAtoms, MolecularFormulaLink
    - 24from corems.encapsulation.factory.parameters import MSParameters
    - 25from corems import chunks, timeit
    - 26from corems.molecular_id.factory.molecularSQL import MolForm_SQL
    - 27import os
    + 22from corems.molecular_id.factory.molecularSQL import (
    + 23    CarbonHydrogen,
    + 24    HeteroAtoms,
    + 25    MolecularFormulaLink,
    + 26    MolForm_SQL,
    + 27)
      28
    - 29@contextlib.contextmanager
    - 30def profiled():
    - 31    """ A context manager for profiling."""
    - 32    pr = cProfile.Profile()
    - 33    pr.enable()
    - 34    yield
    - 35    pr.disable()
    - 36    s = io.StringIO()
    - 37    ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
    - 38    ps.print_stats()
    - 39    # uncomment this to see who's calling what
    - 40    # ps.print_callers()
    - 41    print(s.getvalue())
    - 42
    - 43def insert_database_worker(args):
    - 44    """ Inserts data into the database.
    - 45    """
    - 46    results, url = args
    - 47    
    - 48    if not url:
    - 49        
    - 50        url = 'sqlite:///db/molformulas.sqlite'
    + 29
    + 30@contextlib.contextmanager
    + 31def profiled():
    + 32    """A context manager for profiling."""
    + 33    pr = cProfile.Profile()
    + 34    pr.enable()
    + 35    yield
    + 36    pr.disable()
    + 37    s = io.StringIO()
    + 38    ps = pstats.Stats(pr, stream=s).sort_stats("cumulative")
    + 39    ps.print_stats()
    + 40    # uncomment this to see who's calling what
    + 41    # ps.print_callers()
    + 42    print(s.getvalue())
    + 43
    + 44
    + 45def insert_database_worker(args):
    + 46    """Inserts data into the database."""
    + 47    results, url = args
    + 48
    + 49    if not url:
    + 50        url = "sqlite:///db/molformulas.sqlite"
      51
    - 52    if url[0:6] == 'sqlite':
    - 53        engine = create_engine(url, echo = False)
    + 52    if url[0:6] == "sqlite":
    + 53        engine = create_engine(url, echo=False)
      54    else:
    - 55        engine = create_engine(url, echo = False, isolation_level="AUTOCOMMIT")
    - 56    
    + 55        engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT")
    + 56
      57    session_factory = sessionmaker(bind=engine)
      58    session = session_factory()
      59    insert_query = MolecularFormulaLink.__table__.insert().values(results)
    @@ -181,836 +181,851 @@ 

    62 session.close() 63 engine.dispose() 64 - 65class MolecularCombinations: - 66 """ A class for generating molecular formula combinations. - 67 - 68 Parameters - 69 ---------- - 70 molecular_search_settings : object - 71 An object containing user-defined settings. - 72 - 73 Attributes - 74 ---------- - 75 sql_db : MolForm_SQL - 76 The SQLite database object. - 77 len_existing_classes : int - 78 The number of existing classes in the SQLite database. - 79 odd_ch_id : list - 80 A list of odd carbon and hydrogen atom IDs. - 81 odd_ch_dict : list - 82 A list of odd carbon and hydrogen atom dictionaries. - 83 odd_ch_mass : list - 84 A list of odd carbon and hydrogen atom masses. - 85 odd_ch_dbe : list - 86 A list of odd carbon and hydrogen atom double bond equivalents. - 87 even_ch_id : list - 88 A list of even carbon and hydrogen atom IDs. - 89 even_ch_dict : list - 90 A list of even carbon and hydrogen atom dictionaries. - 91 even_ch_mass : list - 92 A list of even carbon and hydrogen atom masses. - 93 even_ch_dbe : list - 94 A list of even carbon and hydrogen atom double bond equivalents. - 95 - 96 Methods - 97 ------- - 98 * cProfile_worker(args) - 99 A cProfile worker for the get_mol_formulas function. -100 * check_database_get_class_list(molecular_search_settings) -101 Checks if the database has all the classes, if not create the missing classes. -102 * get_carbonsHydrogens(settings, odd_even) -103 Retrieves carbon and hydrogen atoms from the molecular lookup table based on user-defined settings. -104 * add_carbonsHydrogens(settings, existing_classes_objs) -105 Adds carbon and hydrogen atoms to the molecular lookup table based on user-defined settings. -106 * runworker(molecular_search_settings) -107 Runs the molecular formula lookup table worker. -108 * get_classes_in_order(molecular_search_settings) -109 Gets the classes in order. -110 * sort_classes(atoms_in_order, combination_dict) -111 Sorts the classes in order. -112 * get_fixed_initial_number_of_hydrogen(min_h, odd_even) -113 Gets the fixed initial number of hydrogen atoms. -114 * calc_mz(datadict, class_mass=0) -115 Calculates the mass-to-charge ratio (m/z) of a molecular formula. -116 * calc_dbe_class(datadict) -117 Calculates the double bond equivalent (DBE) of a molecular formula. -118 * populate_combinations(classe_tuple, settings) -119 Populates the combinations. -120 * get_or_add(SomeClass, kw) -121 Gets or adds a class. -122 * get_mol_formulas(odd_even_tag, classe_tuple, settings) -123 Gets the molecular formulas. -124 * get_h_odd_or_even(class_dict) -125 Gets the hydrogen odd or even. -126 * get_total_halogen_atoms(class_dict) -127 Gets the total number of halogen atoms. -128 * get_total_hetero_valence(class_dict) -129 Gets the total valence of heteroatoms other than N, F, Cl, and Br -130 """ -131 -132 def __init__(self, sql_db = None): -133 + 65 + 66class MolecularCombinations: + 67 """A class for generating molecular formula combinations. + 68 + 69 Parameters + 70 ---------- + 71 molecular_search_settings : object + 72 An object containing user-defined settings. + 73 + 74 Attributes + 75 ---------- + 76 sql_db : MolForm_SQL + 77 The SQLite database object. + 78 len_existing_classes : int + 79 The number of existing classes in the SQLite database. + 80 odd_ch_id : list + 81 A list of odd carbon and hydrogen atom IDs. + 82 odd_ch_dict : list + 83 A list of odd carbon and hydrogen atom dictionaries. + 84 odd_ch_mass : list + 85 A list of odd carbon and hydrogen atom masses. + 86 odd_ch_dbe : list + 87 A list of odd carbon and hydrogen atom double bond equivalents. + 88 even_ch_id : list + 89 A list of even carbon and hydrogen atom IDs. + 90 even_ch_dict : list + 91 A list of even carbon and hydrogen atom dictionaries. + 92 even_ch_mass : list + 93 A list of even carbon and hydrogen atom masses. + 94 even_ch_dbe : list + 95 A list of even carbon and hydrogen atom double bond equivalents. + 96 + 97 Methods + 98 ------- + 99 * cProfile_worker(args) +100 A cProfile worker for the get_mol_formulas function. +101 * check_database_get_class_list(molecular_search_settings) +102 Checks if the database has all the classes, if not create the missing classes. +103 * get_carbonsHydrogens(settings, odd_even) +104 Retrieves carbon and hydrogen atoms from the molecular lookup table based on user-defined settings. +105 * add_carbonsHydrogens(settings, existing_classes_objs) +106 Adds carbon and hydrogen atoms to the molecular lookup table based on user-defined settings. +107 * runworker(molecular_search_settings) +108 Runs the molecular formula lookup table worker. +109 * get_classes_in_order(molecular_search_settings) +110 Gets the classes in order. +111 * sort_classes(atoms_in_order, combination_dict) +112 Sorts the classes in order. +113 * get_fixed_initial_number_of_hydrogen(min_h, odd_even) +114 Gets the fixed initial number of hydrogen atoms. +115 * calc_mz(datadict, class_mass=0) +116 Calculates the mass-to-charge ratio (m/z) of a molecular formula. +117 * calc_dbe_class(datadict) +118 Calculates the double bond equivalent (DBE) of a molecular formula. +119 * populate_combinations(classe_tuple, settings) +120 Populates the combinations. +121 * get_or_add(SomeClass, kw) +122 Gets or adds a class. +123 * get_mol_formulas(odd_even_tag, classe_tuple, settings) +124 Gets the molecular formulas. +125 * get_h_odd_or_even(class_dict) +126 Gets the hydrogen odd or even. +127 * get_total_halogen_atoms(class_dict) +128 Gets the total number of halogen atoms. +129 * get_total_hetero_valence(class_dict) +130 Gets the total valence of heteroatoms other than N, F, Cl, and Br +131 """ +132 +133 def __init__(self, sql_db=None): 134 if not sql_db: -135 -136 self.sql_db = MolForm_SQL() -137 else: -138 -139 self.sql_db = sql_db -140 -141 def cProfile_worker(self, args): -142 """ cProfile worker for the get_mol_formulas function""" -143 cProfile.runctx('self.get_mol_formulas(*args)', globals(), locals(), 'mf_database_cprofile.prof') -144 -145 def check_database_get_class_list(self, molecular_search_settings): -146 """ check if the database has all the classes, if not create the missing classes -147 -148 Parameters -149 ---------- -150 molecular_search_settings : object -151 An object containing user-defined settings. -152 -153 Returns -154 ------- -155 list -156 list of tuples with the class name and the class dictionary -157 """ -158 all_class_to_create = [] -159 -160 classes_dict = self.get_classes_in_order(molecular_search_settings) -161 -162 class_str_set = set(classes_dict.keys()) -163 -164 existing_classes_objs = self.sql_db.session.query(HeteroAtoms).distinct().all() -165 -166 existing_classes_str = set([classe.name for classe in existing_classes_objs]) -167 -168 self.len_existing_classes = len(existing_classes_str) -169 -170 class_to_create = class_str_set - existing_classes_str -171 -172 class_count= len(existing_classes_objs) -173 -174 data_classes = list() -175 for index, class_str in enumerate(class_to_create): -176 -177 class_dict = classes_dict.get(class_str) -178 halogen_count = self.get_total_halogen_atoms(class_dict) -179 data_classes.append({"name":class_str, "id":class_count+ index + 1, "halogensCount": halogen_count}) -180 -181 #data_classes = [{"name":class_str, "id":class_count+ index + 1} for index, class_str in enumerate(class_to_create)] -182 -183 if data_classes: -184 -185 list_insert_chunks = chunks(data_classes, self.sql_db.chunks_count) -186 for insert_chunk in list_insert_chunks: -187 insert_query = HeteroAtoms.__table__.insert().values(insert_chunk) -188 self.sql_db.session.execute(insert_query) -189 -190 for index, class_str in enumerate(class_to_create): -191 -192 class_tuple = (class_str, classes_dict.get(class_str), class_count+ index + 1) -193 -194 all_class_to_create.append(class_tuple) -195 -196 return [(c_s, c_d) for c_s, c_d in classes_dict.items()], all_class_to_create, existing_classes_objs -197 -198 def get_carbonsHydrogens(self, settings, odd_even): -199 """ Retrieve carbon and hydrogen atoms from the molecular lookup table based on user-defined settings. -200 -201 Parameters -202 ---------- -203 settings : object -204 An object containing user-defined settings. -205 odd_even : str -206 A string indicating whether to retrieve even or odd hydrogen atoms. -207 -208 Returns -209 ------- -210 list -211 A list of CarbonHydrogen objects that satisfy the specified conditions. -212 """ -213 operator = '==' if odd_even == 'even' else '!=' -214 usedAtoms = settings.usedAtoms -215 user_min_c, user_max_c = usedAtoms.get('C') -216 user_min_h, user_max_h = usedAtoms.get('H') -217 -218 return eval("self.sql_db.session.query(CarbonHydrogen).filter(" -219 "CarbonHydrogen.C >= user_min_c," -220 "CarbonHydrogen.H >= user_min_h," -221 "CarbonHydrogen.C <= user_max_c," -222 "CarbonHydrogen.H <= user_max_h," -223 "CarbonHydrogen.H % 2" + operator+ "0).all()") -224 -225 def add_carbonsHydrogens(self, settings, existing_classes_objs): -226 """ Add carbon and hydrogen atoms to the molecular lookup table based on user-defined settings. -227 -228 Parameters -229 ---------- -230 settings : object -231 An object containing user-defined settings. -232 existing_classes_objs : list -233 A list of HeteroAtoms objects. -234 """ -235 usedAtoms = settings.usedAtoms -236 -237 user_min_c, user_max_c = usedAtoms.get('C') -238 user_min_h, user_max_h = usedAtoms.get('H') -239 -240 query_obj = self.sql_db.session.query(func.max(CarbonHydrogen.C).label("max_c"), -241 func.min(CarbonHydrogen.C).label("min_c"), -242 func.max(CarbonHydrogen.H).label("max_h"), -243 func.min(CarbonHydrogen.H).label("min_h"), -244 ) -245 -246 -247 database = query_obj.first() -248 if database.max_c == user_max_c and database.min_c == user_min_c and database.max_h == user_max_h and database.min_h == user_min_h: -249 #all data is already available at the database -250 pass -251 -252 else: -253 -254 current_count = self.sql_db.session.query(CarbonHydrogen.C).count() -255 -256 databaseCarbonHydrogen = self.sql_db.session.query(CarbonHydrogen).all() -257 -258 userCarbon = set(range(user_min_c, user_max_c + 1)) -259 userHydrogen = set(range(user_min_h, user_max_h + 1)) -260 -261 carbon_hydrogen_objs_database = {} -262 for obj in databaseCarbonHydrogen: -263 -264 str_data = "C:{},H:{}".format(obj.C, obj.H) -265 carbon_hydrogen_objs_database[str_data] = str_data -266 -267 carbon_hydrogen_objs_to_create = {'even': {}, 'odd': {}} -268 -269 list_ch_obj_to_add = list() -270 i = 0 -271 for comb in itertools.product(userCarbon, userHydrogen): -272 -273 C = comb[0] -274 H = comb[1] -275 data = {"C":C, -276 "H":H, -277 } -278 -279 data_insert = {"C":C, -280 "H":H, -281 } -282 -283 str_data = "C:{},H:{}".format(C,H) -284 -285 if not str_data in carbon_hydrogen_objs_database.keys(): -286 -287 label = 'even' if comb[1]%2 == 0 else 'odd' -288 data["mass"] = (C * Atoms.atomic_masses.get('C')) + (H * Atoms.atomic_masses.get('H')) -289 data["dbe"] = C - (H/2) + 1 -290 data["id"] = i + current_count + 1 -291 data_insert["id"] = i + current_count + 1 -292 i = i + 1 -293 carbon_hydrogen_objs_to_create[label][str_data] = data -294 -295 list_ch_obj_to_add.append(data_insert) -296 -297 if list_ch_obj_to_add: -298 # insert carbon hydrogen objs -299 list_insert_chunks = chunks(list_ch_obj_to_add, self.sql_db.chunks_count) -300 for insert_chunk in list_insert_chunks: -301 insert_query = CarbonHydrogen.__table__.insert().values(insert_chunk) -302 self.sql_db.session.execute(insert_query) -303 self.sql_db.session.commit() -304 -305 -306 list_molecular_form= list() -307 for classe_obj in existing_classes_objs: -308 -309 classe_dict = classe_obj.to_dict() -310 classe_mass = self.calc_mz(classe_dict) -311 classe_dbe = self.calc_dbe_class(classe_dict) -312 -313 odd_even_label = self.get_h_odd_or_even(classe_dict) -314 -315 ch_datalist = carbon_hydrogen_objs_to_create.get(odd_even_label).values() -316 -317 for ch_dict in ch_datalist: -318 mass = ch_dict.get('mass') + classe_mass -319 dbe = ch_dict.get('dbe') + classe_dbe -320 -321 if settings.min_mz <= mass <= settings.max_mz: -322 -323 if settings.min_dbe <= dbe <= settings.max_dbe: -324 -325 list_molecular_form.append( {"heteroAtoms_id":classe_obj.id, -326 "carbonHydrogen_id":ch_dict.get('id'), -327 "mass":mass, "DBE":dbe}) -328 -329 list_insert_chunks = chunks(list_molecular_form, self.sql_db.chunks_count) -330 for insert_chunk in list_insert_chunks: -331 insert_query = MolecularFormulaLink.__table__.insert().values(insert_chunk) -332 self.sql_db.session.execute(insert_query) -333 self.sql_db.session.commit() -334 -335 -336 @timeit -337 def runworker(self, molecular_search_settings): -338 """ Run the molecular formula lookup table worker. -339 -340 Parameters -341 ---------- -342 molecular_search_settings : object -343 An object containing user-defined settings. -344 -345 Returns -346 ------- -347 list -348 A list of tuples with the class name and the class dictionary. -349 -350 -351 """ -352 -353 classes_list, class_to_create, existing_classes_objs = self.check_database_get_class_list(molecular_search_settings) -354 -355 settings = MolecularLookupDictSettings() -356 settings.usedAtoms = deepcopy(molecular_search_settings.usedAtoms) -357 settings.url_database = molecular_search_settings.url_database -358 settings.db_jobs = molecular_search_settings.db_jobs -359 -360 self.add_carbonsHydrogens(settings, existing_classes_objs) -361 -362 if class_to_create: -363 -364 settings = MolecularLookupDictSettings() -365 settings.usedAtoms = deepcopy(molecular_search_settings.usedAtoms) -366 settings.url_database = molecular_search_settings.url_database -367 settings.db_jobs = molecular_search_settings.db_jobs -368 -369 self.sql_db.session.commit() -370 odd_ch_obj = self.get_carbonsHydrogens(settings,'odd') -371 self.odd_ch_id = [obj.id for obj in odd_ch_obj] -372 self.odd_ch_dict = [{'C':obj.C, 'H':obj.H} for obj in odd_ch_obj] -373 self.odd_ch_mass = [obj.mass for obj in odd_ch_obj] -374 self.odd_ch_dbe = [obj.dbe for obj in odd_ch_obj] -375 -376 even_ch_obj = self.get_carbonsHydrogens(settings, 'even') -377 self.even_ch_id = [obj.id for obj in even_ch_obj] -378 self.even_ch_dict = [{'C':obj.C, 'H':obj.H} for obj in even_ch_obj] -379 self.even_ch_mass = [obj.mass for obj in even_ch_obj] -380 self.even_ch_dbe = [obj.dbe for obj in even_ch_obj] +135 self.sql_db = MolForm_SQL() +136 else: +137 self.sql_db = sql_db +138 +139 def cProfile_worker(self, args): +140 """cProfile worker for the get_mol_formulas function""" +141 cProfile.runctx( +142 "self.get_mol_formulas(*args)", +143 globals(), +144 locals(), +145 "mf_database_cprofile.prof", +146 ) +147 +148 def check_database_get_class_list(self, molecular_search_settings): +149 """check if the database has all the classes, if not create the missing classes +150 +151 Parameters +152 ---------- +153 molecular_search_settings : object +154 An object containing user-defined settings. +155 +156 Returns +157 ------- +158 list +159 list of tuples with the class name and the class dictionary +160 """ +161 all_class_to_create = [] +162 +163 classes_dict = self.get_classes_in_order(molecular_search_settings) +164 +165 class_str_set = set(classes_dict.keys()) +166 +167 existing_classes_objs = self.sql_db.session.query(HeteroAtoms).distinct().all() +168 +169 existing_classes_str = set([classe.name for classe in existing_classes_objs]) +170 +171 self.len_existing_classes = len(existing_classes_str) +172 +173 class_to_create = class_str_set - existing_classes_str +174 +175 class_count = len(existing_classes_objs) +176 +177 data_classes = list() +178 for index, class_str in enumerate(class_to_create): +179 class_dict = classes_dict.get(class_str) +180 halogen_count = self.get_total_halogen_atoms(class_dict) +181 data_classes.append( +182 { +183 "name": class_str, +184 "id": class_count + index + 1, +185 "halogensCount": halogen_count, +186 } +187 ) +188 +189 # data_classes = [{"name":class_str, "id":class_count+ index + 1} for index, class_str in enumerate(class_to_create)] +190 +191 if data_classes: +192 list_insert_chunks = chunks(data_classes, self.sql_db.chunks_count) +193 for insert_chunk in list_insert_chunks: +194 insert_query = HeteroAtoms.__table__.insert().values(insert_chunk) +195 self.sql_db.session.execute(insert_query) +196 +197 for index, class_str in enumerate(class_to_create): +198 class_tuple = ( +199 class_str, +200 classes_dict.get(class_str), +201 class_count + index + 1, +202 ) +203 +204 all_class_to_create.append(class_tuple) +205 +206 return ( +207 [(c_s, c_d) for c_s, c_d in classes_dict.items()], +208 all_class_to_create, +209 existing_classes_objs, +210 ) +211 +212 def get_carbonsHydrogens(self, settings, odd_even): +213 """Retrieve carbon and hydrogen atoms from the molecular lookup table based on user-defined settings. +214 +215 Parameters +216 ---------- +217 settings : object +218 An object containing user-defined settings. +219 odd_even : str +220 A string indicating whether to retrieve even or odd hydrogen atoms. +221 +222 Returns +223 ------- +224 list +225 A list of CarbonHydrogen objects that satisfy the specified conditions. +226 """ +227 operator = "==" if odd_even == "even" else "!=" +228 usedAtoms = settings.usedAtoms +229 user_min_c, user_max_c = usedAtoms.get("C") +230 user_min_h, user_max_h = usedAtoms.get("H") +231 +232 return eval( +233 "self.sql_db.session.query(CarbonHydrogen).filter(" +234 "CarbonHydrogen.C >= user_min_c," +235 "CarbonHydrogen.H >= user_min_h," +236 "CarbonHydrogen.C <= user_max_c," +237 "CarbonHydrogen.H <= user_max_h," +238 "CarbonHydrogen.H % 2" + operator + "0).all()" +239 ) +240 +241 def add_carbonsHydrogens(self, settings, existing_classes_objs): +242 """Add carbon and hydrogen atoms to the molecular lookup table based on user-defined settings. +243 +244 Parameters +245 ---------- +246 settings : object +247 An object containing user-defined settings. +248 existing_classes_objs : list +249 A list of HeteroAtoms objects. +250 """ +251 usedAtoms = settings.usedAtoms +252 +253 user_min_c, user_max_c = usedAtoms.get("C") +254 user_min_h, user_max_h = usedAtoms.get("H") +255 +256 query_obj = self.sql_db.session.query( +257 func.max(CarbonHydrogen.C).label("max_c"), +258 func.min(CarbonHydrogen.C).label("min_c"), +259 func.max(CarbonHydrogen.H).label("max_h"), +260 func.min(CarbonHydrogen.H).label("min_h"), +261 ) +262 +263 database = query_obj.first() +264 if ( +265 database.max_c == user_max_c +266 and database.min_c == user_min_c +267 and database.max_h == user_max_h +268 and database.min_h == user_min_h +269 ): +270 # all data is already available at the database +271 pass +272 +273 else: +274 current_count = self.sql_db.session.query(CarbonHydrogen.C).count() +275 +276 databaseCarbonHydrogen = self.sql_db.session.query(CarbonHydrogen).all() +277 +278 userCarbon = set(range(user_min_c, user_max_c + 1)) +279 userHydrogen = set(range(user_min_h, user_max_h + 1)) +280 +281 carbon_hydrogen_objs_database = {} +282 for obj in databaseCarbonHydrogen: +283 str_data = "C:{},H:{}".format(obj.C, obj.H) +284 carbon_hydrogen_objs_database[str_data] = str_data +285 +286 carbon_hydrogen_objs_to_create = {"even": {}, "odd": {}} +287 +288 list_ch_obj_to_add = list() +289 i = 0 +290 for comb in itertools.product(userCarbon, userHydrogen): +291 C = comb[0] +292 H = comb[1] +293 data = { +294 "C": C, +295 "H": H, +296 } +297 +298 data_insert = { +299 "C": C, +300 "H": H, +301 } +302 +303 str_data = "C:{},H:{}".format(C, H) +304 +305 if not str_data in carbon_hydrogen_objs_database.keys(): +306 label = "even" if comb[1] % 2 == 0 else "odd" +307 data["mass"] = (C * Atoms.atomic_masses.get("C")) + ( +308 H * Atoms.atomic_masses.get("H") +309 ) +310 data["dbe"] = C - (H / 2) + 1 +311 data["id"] = i + current_count + 1 +312 data_insert["id"] = i + current_count + 1 +313 i = i + 1 +314 carbon_hydrogen_objs_to_create[label][str_data] = data +315 +316 list_ch_obj_to_add.append(data_insert) +317 +318 if list_ch_obj_to_add: +319 # insert carbon hydrogen objs +320 list_insert_chunks = chunks( +321 list_ch_obj_to_add, self.sql_db.chunks_count +322 ) +323 for insert_chunk in list_insert_chunks: +324 insert_query = CarbonHydrogen.__table__.insert().values( +325 insert_chunk +326 ) +327 self.sql_db.session.execute(insert_query) +328 self.sql_db.session.commit() +329 +330 list_molecular_form = list() +331 for classe_obj in existing_classes_objs: +332 classe_dict = classe_obj.to_dict() +333 classe_mass = self.calc_mz(classe_dict) +334 classe_dbe = self.calc_dbe_class(classe_dict) +335 +336 odd_even_label = self.get_h_odd_or_even(classe_dict) +337 +338 ch_datalist = carbon_hydrogen_objs_to_create.get( +339 odd_even_label +340 ).values() +341 +342 for ch_dict in ch_datalist: +343 mass = ch_dict.get("mass") + classe_mass +344 dbe = ch_dict.get("dbe") + classe_dbe +345 +346 if settings.min_mz <= mass <= settings.max_mz: +347 if settings.min_dbe <= dbe <= settings.max_dbe: +348 list_molecular_form.append( +349 { +350 "heteroAtoms_id": classe_obj.id, +351 "carbonHydrogen_id": ch_dict.get("id"), +352 "mass": mass, +353 "DBE": dbe, +354 } +355 ) +356 +357 list_insert_chunks = chunks( +358 list_molecular_form, self.sql_db.chunks_count +359 ) +360 for insert_chunk in list_insert_chunks: +361 insert_query = MolecularFormulaLink.__table__.insert().values( +362 insert_chunk +363 ) +364 self.sql_db.session.execute(insert_query) +365 self.sql_db.session.commit() +366 +367 @timeit +368 def runworker(self, molecular_search_settings): +369 """Run the molecular formula lookup table worker. +370 +371 Parameters +372 ---------- +373 molecular_search_settings : object +374 An object containing user-defined settings. +375 +376 Returns +377 ------- +378 list +379 A list of tuples with the class name and the class dictionary. +380 381 -382 all_results= list() -383 for class_tuple in tqdm(class_to_create): -384 -385 results = self.populate_combinations(class_tuple, settings) -386 all_results.extend(results) -387 if settings.db_jobs == 1: -388 #if len(all_results) >= self.sql_db.chunks_count: -389 list_insert_chunks = list(chunks(results, self.sql_db.chunks_count)) -390 for chunk in list_insert_chunks: -391 insert_query = MolecularFormulaLink.__table__.insert().values(chunk) -392 self.sql_db.session.execute(insert_query) -393 #all_results = list() -394 self.sql_db.session.commit() -395 # each chunk takes ~600Mb of memory, so if using 8 processes the total free memory needs to be 5GB -396 if settings.db_jobs > 1: -397 list_insert_chunks = list(chunks(all_results, self.sql_db.chunks_count)) -398 print( "Started database insert using {} iterations for a total of {} rows".format(len(list_insert_chunks), len(all_results))) -399 worker_args = [(chunk, settings.url_database) for chunk in list_insert_chunks] -400 p = multiprocessing.Pool(settings.db_jobs) -401 for class_list in tqdm(p.imap_unordered(insert_database_worker, worker_args)): -402 pass -403 p.close() -404 p.join() -405 -406 return classes_list -407 -408 def get_classes_in_order(self, molecular_search_settings): -409 """ Get the classes in order -410 -411 Parameters -412 ---------- -413 molecular_search_settings : object -414 An object containing user-defined settings. -415 -416 Returns -417 ------- -418 dict -419 A dictionary of classes in order. -420 structure is ('HC', {'HC': 1}) -421 """ -422 -423 usedAtoms = deepcopy(molecular_search_settings.usedAtoms) -424 -425 usedAtoms.pop("C") -426 usedAtoms.pop("H") -427 -428 min_n, max_n = usedAtoms.get('N') if usedAtoms.get('N') else (0,0) -429 min_o, max_o = usedAtoms.get('O') if usedAtoms.get('O') else (0,0) -430 min_s, max_s = usedAtoms.get('S') if usedAtoms.get('S') else (0,0) -431 min_p, max_p = usedAtoms.get('P') if usedAtoms.get('P') else (0,0) -432 -433 possible_n = [n for n in range(min_n, max_n + 1)] -434 possible_o = [o for o in range(min_o, max_o + 1)] -435 possible_s = [s for s in range(min_s, max_s + 1)] -436 possible_p = [p for p in range(min_p, max_p + 1)] -437 -438 atoms_in_order = ['N', 'O', 'S', 'P'] -439 -440 classe_in_order = {} -441 -442 all_atoms_tuples = itertools.product(possible_n, possible_o, -443 possible_s, possible_p) -444 -445 for atom in atoms_in_order: -446 usedAtoms.pop(atom, None) -447 -448 for selected_atom, min_max_tuple in usedAtoms.items(): -449 -450 min_x = min_max_tuple[0] -451 max_x = min_max_tuple[1] -452 -453 -454 possible_x = [x for x in range(min_x, max_x + 1)] -455 -456 all_atoms_tuples = itertools.product(all_atoms_tuples, possible_x) -457 all_atoms_tuples = [all_atoms_combined[0] + (all_atoms_combined[1],) for all_atoms_combined in -458 all_atoms_tuples] -459 atoms_in_order.append(selected_atom) -460 -461 for all_atoms_tuple in all_atoms_tuples: -462 -463 classe_str = '' -464 classe_dict = {} -465 -466 for each_atoms_index, atom_number in enumerate(all_atoms_tuple): -467 -468 if atom_number != 0: -469 classe_dict[atoms_in_order[each_atoms_index]] = atom_number -470 -471 if not classe_dict: -472 classe_in_order['HC'] = {"HC": ""} -473 continue -474 -475 classe_str =json.dumps(classe_dict) -476 -477 if len(classe_str) > 0: -478 -479 classe_in_order[classe_str] = classe_dict -480 -481 classe_in_order_dict = self.sort_classes(atoms_in_order, classe_in_order) -482 -483 return classe_in_order_dict -484 -485 @staticmethod -486 def sort_classes( atoms_in_order, combination_dict) -> Dict[str, Dict[str, int]]: -487 """ Sort the classes in order -488 -489 Parameters -490 ---------- -491 atoms_in_order : list -492 A list of atoms in order. -493 combination_dict : dict -494 A dictionary of classes. -495 -496 Returns -497 ------- -498 dict -499 A dictionary of classes in order. -500 """ -501 #ensures atoms are always in the order defined at atoms_in_order list -502 join_dict_classes = dict() -503 atoms_in_order = ['N','S','P','O'] + atoms_in_order[4:] + ['HC'] -504 -505 sort_method = lambda atoms_keys: [atoms_in_order.index(atoms_keys)] -506 for class_str, class_dict in combination_dict.items(): -507 -508 sorted_dict_keys = sorted(class_dict, key = sort_method) -509 class_dict = { atom: class_dict[atom] for atom in sorted_dict_keys} -510 class_str = json.dumps(class_dict) -511 # using json for the new database, class -512 # class_str = ' '.join([atom + str(class_dict[atom]) for atom in sorted_dict_keys]) -513 join_dict_classes[class_str] = class_dict -514 -515 return join_dict_classes +382 """ +383 +384 classes_list, class_to_create, existing_classes_objs = ( +385 self.check_database_get_class_list(molecular_search_settings) +386 ) +387 +388 settings = MolecularLookupDictSettings() +389 settings.usedAtoms = deepcopy(molecular_search_settings.usedAtoms) +390 settings.url_database = molecular_search_settings.url_database +391 settings.db_jobs = molecular_search_settings.db_jobs +392 +393 self.add_carbonsHydrogens(settings, existing_classes_objs) +394 +395 if class_to_create: +396 settings = MolecularLookupDictSettings() +397 settings.usedAtoms = deepcopy(molecular_search_settings.usedAtoms) +398 settings.url_database = molecular_search_settings.url_database +399 settings.db_jobs = molecular_search_settings.db_jobs +400 +401 self.sql_db.session.commit() +402 odd_ch_obj = self.get_carbonsHydrogens(settings, "odd") +403 self.odd_ch_id = [obj.id for obj in odd_ch_obj] +404 self.odd_ch_dict = [{"C": obj.C, "H": obj.H} for obj in odd_ch_obj] +405 self.odd_ch_mass = [obj.mass for obj in odd_ch_obj] +406 self.odd_ch_dbe = [obj.dbe for obj in odd_ch_obj] +407 +408 even_ch_obj = self.get_carbonsHydrogens(settings, "even") +409 self.even_ch_id = [obj.id for obj in even_ch_obj] +410 self.even_ch_dict = [{"C": obj.C, "H": obj.H} for obj in even_ch_obj] +411 self.even_ch_mass = [obj.mass for obj in even_ch_obj] +412 self.even_ch_dbe = [obj.dbe for obj in even_ch_obj] +413 +414 all_results = list() +415 for class_tuple in tqdm(class_to_create): +416 results = self.populate_combinations(class_tuple, settings) +417 all_results.extend(results) +418 if settings.db_jobs == 1: +419 # if len(all_results) >= self.sql_db.chunks_count: +420 list_insert_chunks = list(chunks(results, self.sql_db.chunks_count)) +421 for chunk in list_insert_chunks: +422 insert_query = MolecularFormulaLink.__table__.insert().values( +423 chunk +424 ) +425 self.sql_db.session.execute(insert_query) +426 # all_results = list() +427 self.sql_db.session.commit() +428 # each chunk takes ~600Mb of memory, so if using 8 processes the total free memory needs to be 5GB +429 if settings.db_jobs > 1: +430 list_insert_chunks = list(chunks(all_results, self.sql_db.chunks_count)) +431 print( +432 "Started database insert using {} iterations for a total of {} rows".format( +433 len(list_insert_chunks), len(all_results) +434 ) +435 ) +436 worker_args = [ +437 (chunk, settings.url_database) for chunk in list_insert_chunks +438 ] +439 p = multiprocessing.Pool(settings.db_jobs) +440 for class_list in tqdm( +441 p.imap_unordered(insert_database_worker, worker_args) +442 ): +443 pass +444 p.close() +445 p.join() +446 +447 return classes_list +448 +449 def get_classes_in_order(self, molecular_search_settings): +450 """Get the classes in order +451 +452 Parameters +453 ---------- +454 molecular_search_settings : object +455 An object containing user-defined settings. +456 +457 Returns +458 ------- +459 dict +460 A dictionary of classes in order. +461 structure is ('HC', {'HC': 1}) +462 """ +463 +464 usedAtoms = deepcopy(molecular_search_settings.usedAtoms) +465 +466 usedAtoms.pop("C") +467 usedAtoms.pop("H") +468 +469 min_n, max_n = usedAtoms.get("N") if usedAtoms.get("N") else (0, 0) +470 min_o, max_o = usedAtoms.get("O") if usedAtoms.get("O") else (0, 0) +471 min_s, max_s = usedAtoms.get("S") if usedAtoms.get("S") else (0, 0) +472 min_p, max_p = usedAtoms.get("P") if usedAtoms.get("P") else (0, 0) +473 +474 possible_n = [n for n in range(min_n, max_n + 1)] +475 possible_o = [o for o in range(min_o, max_o + 1)] +476 possible_s = [s for s in range(min_s, max_s + 1)] +477 possible_p = [p for p in range(min_p, max_p + 1)] +478 +479 atoms_in_order = ["N", "O", "S", "P"] +480 +481 classe_in_order = {} +482 +483 all_atoms_tuples = itertools.product( +484 possible_n, possible_o, possible_s, possible_p +485 ) +486 +487 for atom in atoms_in_order: +488 usedAtoms.pop(atom, None) +489 +490 for selected_atom, min_max_tuple in usedAtoms.items(): +491 min_x = min_max_tuple[0] +492 max_x = min_max_tuple[1] +493 +494 possible_x = [x for x in range(min_x, max_x + 1)] +495 +496 all_atoms_tuples = itertools.product(all_atoms_tuples, possible_x) +497 all_atoms_tuples = [ +498 all_atoms_combined[0] + (all_atoms_combined[1],) +499 for all_atoms_combined in all_atoms_tuples +500 ] +501 atoms_in_order.append(selected_atom) +502 +503 for all_atoms_tuple in all_atoms_tuples: +504 classe_str = "" +505 classe_dict = {} +506 +507 for each_atoms_index, atom_number in enumerate(all_atoms_tuple): +508 if atom_number != 0: +509 classe_dict[atoms_in_order[each_atoms_index]] = atom_number +510 +511 if not classe_dict: +512 classe_in_order["HC"] = {"HC": ""} +513 continue +514 +515 classe_str = json.dumps(classe_dict) 516 -517 @staticmethod -518 def get_fixed_initial_number_of_hydrogen( min_h, odd_even): -519 """ Get the fixed initial number of hydrogen atoms -520 -521 Parameters -522 ---------- -523 min_h : int -524 The minimum number of hydrogen atoms. -525 odd_even : str -526 A string indicating whether to retrieve even or odd hydrogen atoms. -527 """ -528 remaining_h = min_h % 2 -529 -530 if odd_even == 'even': -531 -532 if remaining_h == 0: return remaining_h -533 -534 else: return remaining_h + 1 -535 -536 else: -537 -538 if remaining_h == 0: return remaining_h + 1 -539 -540 else: return remaining_h -541 -542 def calc_mz(self, datadict, class_mass=0): -543 """ Calculate the mass-to-charge ratio (m/z) of a molecular formula. -544 -545 Parameters -546 ---------- -547 datadict : dict -548 A dictionary of classes. -549 class_mass : int -550 The mass of the class. -551 -552 Returns -553 ------- -554 float -555 The mass-to-charge ratio (m/z) of a molecular formula. -556 """ -557 mass = class_mass -558 -559 for atom in datadict.keys(): -560 -561 if atom != 'HC': -562 -563 mass = mass + Atoms.atomic_masses[atom] * datadict.get(atom) -564 -565 return mass -566 -567 def calc_dbe_class(self, datadict): -568 """ Calculate the double bond equivalent (DBE) of a molecular formula. -569 -570 Parameters -571 ---------- -572 datadict : dict -573 A dictionary of classes. -574 -575 Returns -576 ------- -577 float -578 The double bond equivalent (DBE) of a molecular formula. -579 """ -580 init_dbe = 0 -581 for atom in datadict.keys(): -582 -583 if atom == 'HC': -584 continue -585 -586 n_atom = int(datadict.get(atom)) -587 -588 clean_atom = ''.join([i for i in atom if not i.isdigit()]) -589 -590 valencia = MSParameters.molecular_search.used_atom_valences.get(clean_atom) -591 -592 if type(valencia) is tuple: -593 valencia = valencia[0] -594 if valencia > 0: -595 #print atom, valencia, n_atom, init_dbe -596 init_dbe = init_dbe + (n_atom * (valencia - 2)) -597 else: -598 continue -599 -600 return (0.5 * init_dbe) -601 -602 def populate_combinations(self, classe_tuple, settings): -603 """ Populate the combinations -604 -605 Parameters -606 ---------- -607 classe_tuple : tuple -608 A tuple containing the class name, the class dictionary, and the class ID. -609 settings : object -610 An object containing user-defined settings. -611 -612 Returns -613 ------- -614 list -615 A list of molecular formula data dictionaries. -616 """ -617 ion_charge = 0 -618 -619 class_dict = classe_tuple[1] -620 odd_or_even = self.get_h_odd_or_even(class_dict) -621 -622 return self.get_mol_formulas(odd_or_even, classe_tuple, settings) -623 -624 def get_or_add(self, SomeClass, kw): -625 """ Get or add a class -626 -627 Parameters -628 ---------- -629 SomeClass : object -630 A class object. -631 kw : dict -632 A dictionary of classes. -633 -634 Returns -635 ------- -636 object -637 A class object. -638 """ -639 obj = self.sql_db.session.query(SomeClass).filter_by(**kw).first() -640 if not obj: -641 obj = SomeClass(**kw) -642 return obj -643 -644 -645 def get_mol_formulas(self, odd_even_tag, classe_tuple, settings): -646 """ Get the molecular formulas -647 -648 Parameters -649 ---------- -650 odd_even_tag : str -651 A string indicating whether to retrieve even or odd hydrogen atoms. -652 classe_tuple : tuple -653 -654 settings : object -655 An object containing user-defined settings. -656 -657 Returns -658 ------- -659 list -660 A list of molecular formula data dictionaries. -661 -662 """ -663 class_str = classe_tuple[0] -664 class_dict = classe_tuple[1] -665 classe_id = classe_tuple[2] -666 -667 results = list() -668 -669 if 'HC' in class_dict: -670 del class_dict['HC'] -671 -672 class_dbe = self.calc_dbe_class(class_dict) -673 class_mass = self.calc_mz(class_dict) -674 -675 carbonHydrogen_mass = self.odd_ch_mass if odd_even_tag == 'odd' else self.even_ch_mass -676 carbonHydrogen_dbe = self.odd_ch_dbe if odd_even_tag == 'odd' else self.even_ch_dbe -677 carbonHydrogen_id = self.odd_ch_id if odd_even_tag == 'odd' else self.even_ch_id -678 -679 for index, carbonHydrogen_obj in enumerate(carbonHydrogen_id): -680 -681 mass = carbonHydrogen_mass[index] + class_mass -682 dbe = carbonHydrogen_dbe[index] + class_dbe -683 -684 if settings.min_mz <= mass <= settings.max_mz: -685 -686 if settings.min_dbe <= dbe <= settings.max_dbe: -687 -688 molecularFormula= {"heteroAtoms_id":classe_id, -689 "carbonHydrogen_id":carbonHydrogen_id[index], -690 "mass":mass, "DBE":dbe} -691 -692 results.append(molecularFormula) -693 -694 return results -695 -696 -697 def get_h_odd_or_even(self, class_dict): -698 """ Get the hydrogen odd or even -699 -700 Parameters -701 ---------- -702 class_dict : dict -703 A dictionary of classes. -704 -705 Returns -706 ------- -707 str -708 A string indicating whether to retrieve even or odd hydrogen atoms. -709 """ -710 -711 HAS_NITROGEN = 'N' in class_dict.keys() -712 -713 number_of_halogen = self.get_total_halogen_atoms(class_dict) -714 number_of_hetero = self.get_total_heteroatoms(class_dict) -715 -716 if number_of_halogen > 0: -717 -718 HAS_HALOGEN = True -719 -720 else: -721 -722 HAS_HALOGEN = False -723 -724 if HAS_HALOGEN: -725 -726 remaining_halogen = number_of_halogen % 2 -727 -728 else: -729 -730 remaining_halogen = 0 +517 if len(classe_str) > 0: +518 classe_in_order[classe_str] = classe_dict +519 +520 classe_in_order_dict = self.sort_classes(atoms_in_order, classe_in_order) +521 +522 return classe_in_order_dict +523 +524 @staticmethod +525 def sort_classes(atoms_in_order, combination_dict) -> Dict[str, Dict[str, int]]: +526 """Sort the classes in order +527 +528 Parameters +529 ---------- +530 atoms_in_order : list +531 A list of atoms in order. +532 combination_dict : dict +533 A dictionary of classes. +534 +535 Returns +536 ------- +537 dict +538 A dictionary of classes in order. +539 """ +540 # ensures atoms are always in the order defined at atoms_in_order list +541 join_dict_classes = dict() +542 atoms_in_order = ["N", "S", "P", "O"] + atoms_in_order[4:] + ["HC"] +543 +544 sort_method = lambda atoms_keys: [atoms_in_order.index(atoms_keys)] +545 for class_str, class_dict in combination_dict.items(): +546 sorted_dict_keys = sorted(class_dict, key=sort_method) +547 class_dict = {atom: class_dict[atom] for atom in sorted_dict_keys} +548 class_str = json.dumps(class_dict) +549 # using json for the new database, class +550 # class_str = ' '.join([atom + str(class_dict[atom]) for atom in sorted_dict_keys]) +551 join_dict_classes[class_str] = class_dict +552 +553 return join_dict_classes +554 +555 @staticmethod +556 def get_fixed_initial_number_of_hydrogen(min_h, odd_even): +557 """Get the fixed initial number of hydrogen atoms +558 +559 Parameters +560 ---------- +561 min_h : int +562 The minimum number of hydrogen atoms. +563 odd_even : str +564 A string indicating whether to retrieve even or odd hydrogen atoms. +565 """ +566 remaining_h = min_h % 2 +567 +568 if odd_even == "even": +569 if remaining_h == 0: +570 return remaining_h +571 +572 else: +573 return remaining_h + 1 +574 +575 else: +576 if remaining_h == 0: +577 return remaining_h + 1 +578 +579 else: +580 return remaining_h +581 +582 def calc_mz(self, datadict, class_mass=0): +583 """Calculate the mass-to-charge ratio (m/z) of a molecular formula. +584 +585 Parameters +586 ---------- +587 datadict : dict +588 A dictionary of classes. +589 class_mass : int +590 The mass of the class. +591 +592 Returns +593 ------- +594 float +595 The mass-to-charge ratio (m/z) of a molecular formula. +596 """ +597 mass = class_mass +598 +599 for atom in datadict.keys(): +600 if atom != "HC": +601 mass = mass + Atoms.atomic_masses[atom] * datadict.get(atom) +602 +603 return mass +604 +605 def calc_dbe_class(self, datadict): +606 """Calculate the double bond equivalent (DBE) of a molecular formula. +607 +608 Parameters +609 ---------- +610 datadict : dict +611 A dictionary of classes. +612 +613 Returns +614 ------- +615 float +616 The double bond equivalent (DBE) of a molecular formula. +617 """ +618 init_dbe = 0 +619 for atom in datadict.keys(): +620 if atom == "HC": +621 continue +622 +623 n_atom = int(datadict.get(atom)) +624 +625 clean_atom = "".join([i for i in atom if not i.isdigit()]) +626 +627 valencia = MSParameters.molecular_search.used_atom_valences.get(clean_atom) +628 +629 if type(valencia) is tuple: +630 valencia = valencia[0] +631 if valencia > 0: +632 # print atom, valencia, n_atom, init_dbe +633 init_dbe = init_dbe + (n_atom * (valencia - 2)) +634 else: +635 continue +636 +637 return 0.5 * init_dbe +638 +639 def populate_combinations(self, classe_tuple, settings): +640 """Populate the combinations +641 +642 Parameters +643 ---------- +644 classe_tuple : tuple +645 A tuple containing the class name, the class dictionary, and the class ID. +646 settings : object +647 An object containing user-defined settings. +648 +649 Returns +650 ------- +651 list +652 A list of molecular formula data dictionaries. +653 """ +654 ion_charge = 0 +655 +656 class_dict = classe_tuple[1] +657 odd_or_even = self.get_h_odd_or_even(class_dict) +658 +659 return self.get_mol_formulas(odd_or_even, classe_tuple, settings) +660 +661 def get_or_add(self, SomeClass, kw): +662 """Get or add a class +663 +664 Parameters +665 ---------- +666 SomeClass : object +667 A class object. +668 kw : dict +669 A dictionary of classes. +670 +671 Returns +672 ------- +673 object +674 A class object. +675 """ +676 obj = self.sql_db.session.query(SomeClass).filter_by(**kw).first() +677 if not obj: +678 obj = SomeClass(**kw) +679 return obj +680 +681 def get_mol_formulas(self, odd_even_tag, classe_tuple, settings): +682 """Get the molecular formulas +683 +684 Parameters +685 ---------- +686 odd_even_tag : str +687 A string indicating whether to retrieve even or odd hydrogen atoms. +688 classe_tuple : tuple +689 +690 settings : object +691 An object containing user-defined settings. +692 +693 Returns +694 ------- +695 list +696 A list of molecular formula data dictionaries. +697 +698 """ +699 class_str = classe_tuple[0] +700 class_dict = classe_tuple[1] +701 classe_id = classe_tuple[2] +702 +703 results = list() +704 +705 if "HC" in class_dict: +706 del class_dict["HC"] +707 +708 class_dbe = self.calc_dbe_class(class_dict) +709 class_mass = self.calc_mz(class_dict) +710 +711 carbonHydrogen_mass = ( +712 self.odd_ch_mass if odd_even_tag == "odd" else self.even_ch_mass +713 ) +714 carbonHydrogen_dbe = ( +715 self.odd_ch_dbe if odd_even_tag == "odd" else self.even_ch_dbe +716 ) +717 carbonHydrogen_id = self.odd_ch_id if odd_even_tag == "odd" else self.even_ch_id +718 +719 for index, carbonHydrogen_obj in enumerate(carbonHydrogen_id): +720 mass = carbonHydrogen_mass[index] + class_mass +721 dbe = carbonHydrogen_dbe[index] + class_dbe +722 +723 if settings.min_mz <= mass <= settings.max_mz: +724 if settings.min_dbe <= dbe <= settings.max_dbe: +725 molecularFormula = { +726 "heteroAtoms_id": classe_id, +727 "carbonHydrogen_id": carbonHydrogen_id[index], +728 "mass": mass, +729 "DBE": dbe, +730 } 731 -732 -733 if number_of_hetero > 0: -734 -735 HAS_OTHER_HETERO = True -736 -737 total_hetero_valence = self.get_total_hetero_valence(class_dict) -738 -739 else: -740 -741 HAS_OTHER_HETERO = False -742 -743 total_hetero_valence = 0 -744 -745 if HAS_OTHER_HETERO: -746 -747 remaining_hetero_valence = total_hetero_valence % 2 -748 -749 else: -750 -751 remaining_hetero_valence = 0 -752 -753 -754 if HAS_NITROGEN and not HAS_OTHER_HETERO: -755 -756 number_of_n = class_dict.get('N') -757 remaining_n = number_of_n % 2 -758 -759 elif HAS_NITROGEN and HAS_OTHER_HETERO: +732 results.append(molecularFormula) +733 +734 return results +735 +736 def get_h_odd_or_even(self, class_dict): +737 """Get the hydrogen odd or even +738 +739 Parameters +740 ---------- +741 class_dict : dict +742 A dictionary of classes. +743 +744 Returns +745 ------- +746 str +747 A string indicating whether to retrieve even or odd hydrogen atoms. +748 """ +749 +750 HAS_NITROGEN = "N" in class_dict.keys() +751 +752 number_of_halogen = self.get_total_halogen_atoms(class_dict) +753 number_of_hetero = self.get_total_heteroatoms(class_dict) +754 +755 if number_of_halogen > 0: +756 HAS_HALOGEN = True +757 +758 else: +759 HAS_HALOGEN = False 760 -761 number_of_n = class_dict.get('N') -762 remaining_n = (number_of_n + remaining_hetero_valence )% 2 +761 if HAS_HALOGEN: +762 remaining_halogen = number_of_halogen % 2 763 -764 elif HAS_OTHER_HETERO and not HAS_NITROGEN: -765 -766 remaining_n = remaining_hetero_valence -767 -768 else: +764 else: +765 remaining_halogen = 0 +766 +767 if number_of_hetero > 0: +768 HAS_OTHER_HETERO = True 769 -770 remaining_n = -1 +770 total_hetero_valence = self.get_total_hetero_valence(class_dict) 771 -772 if remaining_n > 0.0: -773 -774 if HAS_NITROGEN or HAS_OTHER_HETERO: -775 -776 if HAS_HALOGEN: -777 if remaining_halogen == 0: -778 return 'odd' -779 else: -780 return 'even' -781 -782 else: -783 return 'odd' -784 -785 elif remaining_n == 0.0: +772 else: +773 HAS_OTHER_HETERO = False +774 +775 total_hetero_valence = 0 +776 +777 if HAS_OTHER_HETERO: +778 remaining_hetero_valence = total_hetero_valence % 2 +779 +780 else: +781 remaining_hetero_valence = 0 +782 +783 if HAS_NITROGEN and not HAS_OTHER_HETERO: +784 number_of_n = class_dict.get("N") +785 remaining_n = number_of_n % 2 786 -787 if HAS_NITROGEN or HAS_OTHER_HETERO: -788 -789 if HAS_HALOGEN: -790 if remaining_halogen == 0: -791 return 'even' -792 else: -793 return 'odd' -794 -795 else: -796 return 'even' -797 -798 else: -799 -800 if HAS_HALOGEN: -801 if remaining_halogen == 0: -802 return 'even' -803 else: -804 return 'odd' -805 -806 else: -807 return 'even' -808 -809 -810 -811 @staticmethod -812 def get_total_heteroatoms(class_dict): -813 """ Get the total number of heteroatoms other than N, F, Cl, Br -814 -815 Parameters -816 ---------- -817 class_dict : dict -818 A dictionary of classes. -819 -820 Returns -821 ------- -822 int -823 The total number of heteroatoms. -824 """ +787 elif HAS_NITROGEN and HAS_OTHER_HETERO: +788 number_of_n = class_dict.get("N") +789 remaining_n = (number_of_n + remaining_hetero_valence) % 2 +790 +791 elif HAS_OTHER_HETERO and not HAS_NITROGEN: +792 remaining_n = remaining_hetero_valence +793 +794 else: +795 remaining_n = -1 +796 +797 if remaining_n > 0.0: +798 if HAS_NITROGEN or HAS_OTHER_HETERO: +799 if HAS_HALOGEN: +800 if remaining_halogen == 0: +801 return "odd" +802 else: +803 return "even" +804 +805 else: +806 return "odd" +807 +808 elif remaining_n == 0.0: +809 if HAS_NITROGEN or HAS_OTHER_HETERO: +810 if HAS_HALOGEN: +811 if remaining_halogen == 0: +812 return "even" +813 else: +814 return "odd" +815 +816 else: +817 return "even" +818 +819 else: +820 if HAS_HALOGEN: +821 if remaining_halogen == 0: +822 return "even" +823 else: +824 return "odd" 825 -826 total_number = 0 -827 -828 for atom in class_dict.keys(): -829 -830 if atom not in ['HC','C','H','O','N', 'F', 'Cl', 'Br']: -831 total_number = total_number + class_dict.get(atom) -832 -833 return total_number -834 -835 @staticmethod -836 def get_total_hetero_valence(class_dict): -837 """ Get the total valence of heteroatoms other than N, F, Cl, Br -838 -839 Parameters -840 ---------- -841 class_dict : dict -842 A dictionary of classes. -843 -844 Returns -845 ------- -846 int -847 The total heteroatom valence. -848 """ -849 total_valence = 0 -850 -851 for atom in class_dict.keys(): -852 -853 if atom not in ['HC','C','H','O','N', 'F', 'Cl', 'Br']: -854 -855 clean_atom = ''.join([i for i in atom if not i.isdigit()]) -856 -857 atom_valence = MSParameters.molecular_search.used_atom_valences.get(clean_atom) -858 -859 if type(atom_valence) is tuple: -860 atom_valence = atom_valence[0] -861 -862 n_atom =int(class_dict.get(atom)) -863 -864 n_atom_valence = atom_valence * n_atom -865 -866 total_valence = total_valence + n_atom_valence +826 else: +827 return "even" +828 +829 @staticmethod +830 def get_total_heteroatoms(class_dict): +831 """Get the total number of heteroatoms other than N, F, Cl, Br +832 +833 Parameters +834 ---------- +835 class_dict : dict +836 A dictionary of classes. +837 +838 Returns +839 ------- +840 int +841 The total number of heteroatoms. +842 """ +843 +844 total_number = 0 +845 +846 for atom in class_dict.keys(): +847 if atom not in ["HC", "C", "H", "O", "N", "F", "Cl", "Br"]: +848 total_number = total_number + class_dict.get(atom) +849 +850 return total_number +851 +852 @staticmethod +853 def get_total_hetero_valence(class_dict): +854 """Get the total valence of heteroatoms other than N, F, Cl, Br +855 +856 Parameters +857 ---------- +858 class_dict : dict +859 A dictionary of classes. +860 +861 Returns +862 ------- +863 int +864 The total heteroatom valence. +865 """ +866 total_valence = 0 867 -868 return total_valence -869 -870 @staticmethod -871 def get_total_halogen_atoms(class_dict): -872 """ Get the total number of halogen atoms -873 -874 Parameters -875 ---------- -876 class_dict : dict -877 A dictionary of classes. -878 -879 Returns -880 ------- -881 int -882 The total number of halogen atoms. -883 """ -884 atoms = ['F', 'Cl', 'Br'] -885 -886 total_number = 0 -887 -888 for atom in atoms: -889 -890 if atom in class_dict.keys(): -891 -892 total_number = total_number + class_dict.get(atom) -893 -894 return total_number +868 for atom in class_dict.keys(): +869 if atom not in ["HC", "C", "H", "O", "N", "F", "Cl", "Br"]: +870 clean_atom = "".join([i for i in atom if not i.isdigit()]) +871 +872 atom_valence = MSParameters.molecular_search.used_atom_valences.get( +873 clean_atom +874 ) +875 +876 if type(atom_valence) is tuple: +877 atom_valence = atom_valence[0] +878 +879 n_atom = int(class_dict.get(atom)) +880 +881 n_atom_valence = atom_valence * n_atom +882 +883 total_valence = total_valence + n_atom_valence +884 +885 return total_valence +886 +887 @staticmethod +888 def get_total_halogen_atoms(class_dict): +889 """Get the total number of halogen atoms +890 +891 Parameters +892 ---------- +893 class_dict : dict +894 A dictionary of classes. +895 +896 Returns +897 ------- +898 int +899 The total number of halogen atoms. +900 """ +901 atoms = ["F", "Cl", "Br"] +902 +903 total_number = 0 +904 +905 for atom in atoms: +906 if atom in class_dict.keys(): +907 total_number = total_number + class_dict.get(atom) +908 +909 return total_number

    @@ -1027,19 +1042,19 @@

    -
    30@contextlib.contextmanager
    -31def profiled():
    -32    """ A context manager for profiling."""
    -33    pr = cProfile.Profile()
    -34    pr.enable()
    -35    yield
    -36    pr.disable()
    -37    s = io.StringIO()
    -38    ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
    -39    ps.print_stats()
    -40    # uncomment this to see who's calling what
    -41    # ps.print_callers()
    -42    print(s.getvalue())
    +            
    31@contextlib.contextmanager
    +32def profiled():
    +33    """A context manager for profiling."""
    +34    pr = cProfile.Profile()
    +35    pr.enable()
    +36    yield
    +37    pr.disable()
    +38    s = io.StringIO()
    +39    ps = pstats.Stats(pr, stream=s).sort_stats("cumulative")
    +40    ps.print_stats()
    +41    # uncomment this to see who's calling what
    +42    # ps.print_callers()
    +43    print(s.getvalue())
     
    @@ -1059,20 +1074,18 @@

    -
    44def insert_database_worker(args):
    -45    """ Inserts data into the database.
    -46    """
    -47    results, url = args
    -48    
    -49    if not url:
    -50        
    -51        url = 'sqlite:///db/molformulas.sqlite'
    +            
    46def insert_database_worker(args):
    +47    """Inserts data into the database."""
    +48    results, url = args
    +49
    +50    if not url:
    +51        url = "sqlite:///db/molformulas.sqlite"
     52
    -53    if url[0:6] == 'sqlite':
    -54        engine = create_engine(url, echo = False)
    +53    if url[0:6] == "sqlite":
    +54        engine = create_engine(url, echo=False)
     55    else:
    -56        engine = create_engine(url, echo = False, isolation_level="AUTOCOMMIT")
    -57    
    +56        engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT")
    +57
     58    session_factory = sessionmaker(bind=engine)
     59    session = session_factory()
     60    insert_query = MolecularFormulaLink.__table__.insert().values(results)
    @@ -1099,836 +1112,850 @@ 

    -
     66class MolecularCombinations:
    - 67    """ A class for generating molecular formula combinations.
    - 68    
    - 69    Parameters
    - 70    ----------
    - 71    molecular_search_settings : object
    - 72        An object containing user-defined settings.
    - 73    
    - 74    Attributes
    - 75    ----------
    - 76    sql_db : MolForm_SQL
    - 77        The SQLite database object.
    - 78    len_existing_classes : int
    - 79        The number of existing classes in the SQLite database.
    - 80    odd_ch_id : list
    - 81        A list of odd carbon and hydrogen atom IDs.
    - 82    odd_ch_dict : list
    - 83        A list of odd carbon and hydrogen atom dictionaries.
    - 84    odd_ch_mass : list
    - 85        A list of odd carbon and hydrogen atom masses.
    - 86    odd_ch_dbe : list
    - 87        A list of odd carbon and hydrogen atom double bond equivalents.
    - 88    even_ch_id : list
    - 89        A list of even carbon and hydrogen atom IDs.
    - 90    even_ch_dict : list
    - 91        A list of even carbon and hydrogen atom dictionaries.
    - 92    even_ch_mass : list
    - 93        A list of even carbon and hydrogen atom masses.
    - 94    even_ch_dbe : list
    - 95        A list of even carbon and hydrogen atom double bond equivalents.
    - 96        
    - 97    Methods
    - 98    -------
    - 99    * cProfile_worker(args)  
    -100        A cProfile worker for the get_mol_formulas function.      
    -101    * check_database_get_class_list(molecular_search_settings)  
    -102        Checks if the database has all the classes, if not create the missing classes.  
    -103    * get_carbonsHydrogens(settings, odd_even)  
    -104        Retrieves carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.  
    -105    * add_carbonsHydrogens(settings, existing_classes_objs)  
    -106        Adds carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.  
    -107    * runworker(molecular_search_settings)  
    -108        Runs the molecular formula lookup table worker.  
    -109    * get_classes_in_order(molecular_search_settings)  
    -110        Gets the classes in order.  
    -111    * sort_classes(atoms_in_order, combination_dict)  
    -112        Sorts the classes in order.  
    -113    * get_fixed_initial_number_of_hydrogen(min_h, odd_even)   
    -114        Gets the fixed initial number of hydrogen atoms.  
    -115    * calc_mz(datadict, class_mass=0)  
    -116        Calculates the mass-to-charge ratio (m/z) of a molecular formula.  
    -117    * calc_dbe_class(datadict)  
    -118        Calculates the double bond equivalent (DBE) of a molecular formula.  
    -119    * populate_combinations(classe_tuple, settings)  
    -120        Populates the combinations.  
    -121    * get_or_add(SomeClass, kw)  
    -122        Gets or adds a class.  
    -123    * get_mol_formulas(odd_even_tag, classe_tuple, settings)  
    -124        Gets the molecular formulas.  
    -125    * get_h_odd_or_even(class_dict)   
    -126        Gets the hydrogen odd or even.  
    -127    * get_total_halogen_atoms(class_dict)  
    -128        Gets the total number of halogen atoms.  
    -129    * get_total_hetero_valence(class_dict)
    -130        Gets the total valence of heteroatoms other than N, F, Cl, and Br
    -131    """
    -132
    -133    def __init__(self, sql_db = None):
    -134
    +            
     67class MolecularCombinations:
    + 68    """A class for generating molecular formula combinations.
    + 69
    + 70    Parameters
    + 71    ----------
    + 72    molecular_search_settings : object
    + 73        An object containing user-defined settings.
    + 74
    + 75    Attributes
    + 76    ----------
    + 77    sql_db : MolForm_SQL
    + 78        The SQLite database object.
    + 79    len_existing_classes : int
    + 80        The number of existing classes in the SQLite database.
    + 81    odd_ch_id : list
    + 82        A list of odd carbon and hydrogen atom IDs.
    + 83    odd_ch_dict : list
    + 84        A list of odd carbon and hydrogen atom dictionaries.
    + 85    odd_ch_mass : list
    + 86        A list of odd carbon and hydrogen atom masses.
    + 87    odd_ch_dbe : list
    + 88        A list of odd carbon and hydrogen atom double bond equivalents.
    + 89    even_ch_id : list
    + 90        A list of even carbon and hydrogen atom IDs.
    + 91    even_ch_dict : list
    + 92        A list of even carbon and hydrogen atom dictionaries.
    + 93    even_ch_mass : list
    + 94        A list of even carbon and hydrogen atom masses.
    + 95    even_ch_dbe : list
    + 96        A list of even carbon and hydrogen atom double bond equivalents.
    + 97
    + 98    Methods
    + 99    -------
    +100    * cProfile_worker(args)
    +101        A cProfile worker for the get_mol_formulas function.
    +102    * check_database_get_class_list(molecular_search_settings)
    +103        Checks if the database has all the classes, if not create the missing classes.
    +104    * get_carbonsHydrogens(settings, odd_even)
    +105        Retrieves carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.
    +106    * add_carbonsHydrogens(settings, existing_classes_objs)
    +107        Adds carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.
    +108    * runworker(molecular_search_settings)
    +109        Runs the molecular formula lookup table worker.
    +110    * get_classes_in_order(molecular_search_settings)
    +111        Gets the classes in order.
    +112    * sort_classes(atoms_in_order, combination_dict)
    +113        Sorts the classes in order.
    +114    * get_fixed_initial_number_of_hydrogen(min_h, odd_even)
    +115        Gets the fixed initial number of hydrogen atoms.
    +116    * calc_mz(datadict, class_mass=0)
    +117        Calculates the mass-to-charge ratio (m/z) of a molecular formula.
    +118    * calc_dbe_class(datadict)
    +119        Calculates the double bond equivalent (DBE) of a molecular formula.
    +120    * populate_combinations(classe_tuple, settings)
    +121        Populates the combinations.
    +122    * get_or_add(SomeClass, kw)
    +123        Gets or adds a class.
    +124    * get_mol_formulas(odd_even_tag, classe_tuple, settings)
    +125        Gets the molecular formulas.
    +126    * get_h_odd_or_even(class_dict)
    +127        Gets the hydrogen odd or even.
    +128    * get_total_halogen_atoms(class_dict)
    +129        Gets the total number of halogen atoms.
    +130    * get_total_hetero_valence(class_dict)
    +131        Gets the total valence of heteroatoms other than N, F, Cl, and Br
    +132    """
    +133
    +134    def __init__(self, sql_db=None):
     135        if not sql_db:
    -136            
    -137            self.sql_db = MolForm_SQL()
    -138        else:
    -139            
    -140            self.sql_db = sql_db
    -141
    -142    def cProfile_worker(self, args):
    -143        """ cProfile worker for the get_mol_formulas function"""
    -144        cProfile.runctx('self.get_mol_formulas(*args)', globals(), locals(), 'mf_database_cprofile.prof')
    -145
    -146    def check_database_get_class_list(self, molecular_search_settings):
    -147        """ check if the database has all the classes, if not create the missing classes
    -148        
    -149        Parameters
    -150        ----------
    -151        molecular_search_settings : object
    -152            An object containing user-defined settings.
    -153        
    -154        Returns
    -155        -------
    -156        list
    -157            list of tuples with the class name and the class dictionary
    -158        """
    -159        all_class_to_create = []
    -160        
    -161        classes_dict = self.get_classes_in_order(molecular_search_settings)
    -162        
    -163        class_str_set = set(classes_dict.keys())
    -164        
    -165        existing_classes_objs = self.sql_db.session.query(HeteroAtoms).distinct().all()
    -166        
    -167        existing_classes_str = set([classe.name for classe in existing_classes_objs])
    -168
    -169        self.len_existing_classes = len(existing_classes_str)
    -170
    -171        class_to_create = class_str_set - existing_classes_str
    -172        
    -173        class_count= len(existing_classes_objs)
    -174
    -175        data_classes = list()    
    -176        for index, class_str in enumerate(class_to_create):
    -177            
    -178            class_dict = classes_dict.get(class_str)
    -179            halogen_count = self.get_total_halogen_atoms(class_dict)
    -180            data_classes.append({"name":class_str, "id":class_count+ index + 1, "halogensCount": halogen_count})
    -181
    -182        #data_classes = [{"name":class_str, "id":class_count+ index + 1} for index, class_str in enumerate(class_to_create)]
    -183
    -184        if data_classes:
    -185
    -186            list_insert_chunks = chunks(data_classes, self.sql_db.chunks_count)
    -187            for insert_chunk in  list_insert_chunks:   
    -188                insert_query = HeteroAtoms.__table__.insert().values(insert_chunk)
    -189                self.sql_db.session.execute(insert_query)
    -190
    -191        for index, class_str in enumerate(class_to_create):
    -192
    -193            class_tuple = (class_str, classes_dict.get(class_str), class_count+ index + 1)
    -194
    -195            all_class_to_create.append(class_tuple)
    -196
    -197        return [(c_s, c_d) for c_s, c_d in classes_dict.items()], all_class_to_create, existing_classes_objs       
    -198
    -199    def get_carbonsHydrogens(self, settings, odd_even):
    -200            """ Retrieve carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.
    -201
    -202            Parameters
    -203            ----------
    -204            settings : object
    -205                 An object containing user-defined settings.
    -206            odd_even : str
    -207                A string indicating whether to retrieve even or odd hydrogen atoms.
    -208
    -209            Returns
    -210            -------
    -211            list
    -212                A list of CarbonHydrogen objects that satisfy the specified conditions.
    -213            """
    -214            operator = '==' if odd_even == 'even' else '!=' 
    -215            usedAtoms = settings.usedAtoms
    -216            user_min_c, user_max_c = usedAtoms.get('C')
    -217            user_min_h, user_max_h = usedAtoms.get('H')
    -218
    -219            return eval("self.sql_db.session.query(CarbonHydrogen).filter(" 
    -220                                           "CarbonHydrogen.C >= user_min_c,"
    -221                                            "CarbonHydrogen.H >= user_min_h,"
    -222                                            "CarbonHydrogen.C <= user_max_c,"
    -223                                            "CarbonHydrogen.H <= user_max_h,"
    -224                                            "CarbonHydrogen.H % 2" + operator+ "0).all()")
    -225
    -226    def add_carbonsHydrogens(self, settings, existing_classes_objs):
    -227        """ Add carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.
    -228
    -229        Parameters
    -230        ----------
    -231        settings : object
    -232            An object containing user-defined settings.
    -233        existing_classes_objs : list
    -234            A list of HeteroAtoms objects.
    -235        """
    -236        usedAtoms = settings.usedAtoms
    -237
    -238        user_min_c, user_max_c = usedAtoms.get('C')
    -239        user_min_h, user_max_h = usedAtoms.get('H')
    -240
    -241        query_obj = self.sql_db.session.query(func.max(CarbonHydrogen.C).label("max_c"), 
    -242                        func.min(CarbonHydrogen.C).label("min_c"),
    -243                        func.max(CarbonHydrogen.H).label("max_h"),
    -244                        func.min(CarbonHydrogen.H).label("min_h"),
    -245                        )
    -246
    -247
    -248        database = query_obj.first()
    -249        if database.max_c == user_max_c and database.min_c == user_min_c and database.max_h == user_max_h and database.min_h == user_min_h:   
    -250            #all data is already available at the database
    -251            pass
    -252
    -253        else:
    -254
    -255            current_count = self.sql_db.session.query(CarbonHydrogen.C).count()
    -256            
    -257            databaseCarbonHydrogen = self.sql_db.session.query(CarbonHydrogen).all()
    -258            
    -259            userCarbon = set(range(user_min_c, user_max_c + 1))
    -260            userHydrogen = set(range(user_min_h, user_max_h + 1))
    -261            
    -262            carbon_hydrogen_objs_database = {}
    -263            for obj in databaseCarbonHydrogen:
    -264                
    -265                str_data = "C:{},H:{}".format(obj.C, obj.H)
    -266                carbon_hydrogen_objs_database[str_data] = str_data
    -267
    -268            carbon_hydrogen_objs_to_create = {'even': {}, 'odd': {}}
    -269            
    -270            list_ch_obj_to_add = list()
    -271            i = 0
    -272            for comb in itertools.product(userCarbon, userHydrogen):
    -273                
    -274                C  = comb[0]
    -275                H =  comb[1]
    -276                data = {"C":C,
    -277                       "H":H,
    -278                }
    -279
    -280                data_insert = {"C":C,
    -281                       "H":H,
    -282                }
    -283
    -284                str_data = "C:{},H:{}".format(C,H)
    -285                
    -286                if not str_data in carbon_hydrogen_objs_database.keys():
    -287                    
    -288                    label = 'even' if comb[1]%2 == 0 else 'odd'
    -289                    data["mass"] = (C * Atoms.atomic_masses.get('C')) + (H * Atoms.atomic_masses.get('H'))
    -290                    data["dbe"] = C - (H/2) + 1
    -291                    data["id"] = i + current_count + 1
    -292                    data_insert["id"] = i + current_count + 1
    -293                    i = i + 1 
    -294                    carbon_hydrogen_objs_to_create[label][str_data] = data
    -295                    
    -296                    list_ch_obj_to_add.append(data_insert)
    -297
    -298            if list_ch_obj_to_add:
    -299                # insert carbon hydrogen objs
    -300                list_insert_chunks = chunks(list_ch_obj_to_add, self.sql_db.chunks_count)
    -301                for insert_chunk in  list_insert_chunks:   
    -302                    insert_query = CarbonHydrogen.__table__.insert().values(insert_chunk)
    -303                    self.sql_db.session.execute(insert_query)
    -304                self.sql_db.session.commit()    
    -305            
    -306                
    -307                list_molecular_form= list()
    -308                for classe_obj in existing_classes_objs:
    -309
    -310                    classe_dict = classe_obj.to_dict()  
    -311                    classe_mass = self.calc_mz(classe_dict)
    -312                    classe_dbe = self.calc_dbe_class(classe_dict)
    -313
    -314                    odd_even_label = self.get_h_odd_or_even(classe_dict)
    -315
    -316                    ch_datalist = carbon_hydrogen_objs_to_create.get(odd_even_label).values()
    -317
    -318                    for ch_dict in ch_datalist:
    -319                        mass = ch_dict.get('mass') + classe_mass
    -320                        dbe = ch_dict.get('dbe') + classe_dbe
    -321
    -322                        if settings.min_mz <= mass <= settings.max_mz:
    -323                
    -324                            if settings.min_dbe <= dbe <= settings.max_dbe:
    -325                                
    -326                                list_molecular_form.append( {"heteroAtoms_id":classe_obj.id, 
    -327                                        "carbonHydrogen_id":ch_dict.get('id'), 
    -328                                        "mass":mass, "DBE":dbe})
    -329
    -330                list_insert_chunks = chunks(list_molecular_form, self.sql_db.chunks_count)
    -331                for insert_chunk in  list_insert_chunks:   
    -332                    insert_query = MolecularFormulaLink.__table__.insert().values(insert_chunk)
    -333                    self.sql_db.session.execute(insert_query)
    -334                self.sql_db.session.commit()    
    -335            
    -336                
    -337    @timeit
    -338    def runworker(self, molecular_search_settings):
    -339        """ Run the molecular formula lookup table worker.
    -340
    -341        Parameters
    -342        ----------
    -343        molecular_search_settings : object
    -344            An object containing user-defined settings.
    -345        
    -346        Returns
    -347        -------
    -348        list
    -349            A list of tuples with the class name and the class dictionary.
    -350        
    -351
    -352        """
    -353        
    -354        classes_list, class_to_create, existing_classes_objs = self.check_database_get_class_list(molecular_search_settings)
    -355        
    -356        settings = MolecularLookupDictSettings()
    -357        settings.usedAtoms = deepcopy(molecular_search_settings.usedAtoms)
    -358        settings.url_database = molecular_search_settings.url_database
    -359        settings.db_jobs = molecular_search_settings.db_jobs
    -360
    -361        self.add_carbonsHydrogens(settings, existing_classes_objs)
    -362        
    -363        if class_to_create:
    -364            
    -365            settings = MolecularLookupDictSettings()
    -366            settings.usedAtoms = deepcopy(molecular_search_settings.usedAtoms)
    -367            settings.url_database = molecular_search_settings.url_database
    -368            settings.db_jobs = molecular_search_settings.db_jobs
    -369            
    -370            self.sql_db.session.commit()
    -371            odd_ch_obj = self.get_carbonsHydrogens(settings,'odd')
    -372            self.odd_ch_id = [obj.id for obj in odd_ch_obj]
    -373            self.odd_ch_dict = [{'C':obj.C, 'H':obj.H} for obj in odd_ch_obj]
    -374            self.odd_ch_mass = [obj.mass for obj in odd_ch_obj]
    -375            self.odd_ch_dbe = [obj.dbe for obj in odd_ch_obj]
    -376            
    -377            even_ch_obj = self.get_carbonsHydrogens(settings, 'even')
    -378            self.even_ch_id = [obj.id for obj in even_ch_obj]
    -379            self.even_ch_dict = [{'C':obj.C, 'H':obj.H} for obj in even_ch_obj]
    -380            self.even_ch_mass = [obj.mass for obj in even_ch_obj]
    -381            self.even_ch_dbe = [obj.dbe for obj in even_ch_obj]
    +136            self.sql_db = MolForm_SQL()
    +137        else:
    +138            self.sql_db = sql_db
    +139
    +140    def cProfile_worker(self, args):
    +141        """cProfile worker for the get_mol_formulas function"""
    +142        cProfile.runctx(
    +143            "self.get_mol_formulas(*args)",
    +144            globals(),
    +145            locals(),
    +146            "mf_database_cprofile.prof",
    +147        )
    +148
    +149    def check_database_get_class_list(self, molecular_search_settings):
    +150        """check if the database has all the classes, if not create the missing classes
    +151
    +152        Parameters
    +153        ----------
    +154        molecular_search_settings : object
    +155            An object containing user-defined settings.
    +156
    +157        Returns
    +158        -------
    +159        list
    +160            list of tuples with the class name and the class dictionary
    +161        """
    +162        all_class_to_create = []
    +163
    +164        classes_dict = self.get_classes_in_order(molecular_search_settings)
    +165
    +166        class_str_set = set(classes_dict.keys())
    +167
    +168        existing_classes_objs = self.sql_db.session.query(HeteroAtoms).distinct().all()
    +169
    +170        existing_classes_str = set([classe.name for classe in existing_classes_objs])
    +171
    +172        self.len_existing_classes = len(existing_classes_str)
    +173
    +174        class_to_create = class_str_set - existing_classes_str
    +175
    +176        class_count = len(existing_classes_objs)
    +177
    +178        data_classes = list()
    +179        for index, class_str in enumerate(class_to_create):
    +180            class_dict = classes_dict.get(class_str)
    +181            halogen_count = self.get_total_halogen_atoms(class_dict)
    +182            data_classes.append(
    +183                {
    +184                    "name": class_str,
    +185                    "id": class_count + index + 1,
    +186                    "halogensCount": halogen_count,
    +187                }
    +188            )
    +189
    +190        # data_classes = [{"name":class_str, "id":class_count+ index + 1} for index, class_str in enumerate(class_to_create)]
    +191
    +192        if data_classes:
    +193            list_insert_chunks = chunks(data_classes, self.sql_db.chunks_count)
    +194            for insert_chunk in list_insert_chunks:
    +195                insert_query = HeteroAtoms.__table__.insert().values(insert_chunk)
    +196                self.sql_db.session.execute(insert_query)
    +197
    +198        for index, class_str in enumerate(class_to_create):
    +199            class_tuple = (
    +200                class_str,
    +201                classes_dict.get(class_str),
    +202                class_count + index + 1,
    +203            )
    +204
    +205            all_class_to_create.append(class_tuple)
    +206
    +207        return (
    +208            [(c_s, c_d) for c_s, c_d in classes_dict.items()],
    +209            all_class_to_create,
    +210            existing_classes_objs,
    +211        )
    +212
    +213    def get_carbonsHydrogens(self, settings, odd_even):
    +214        """Retrieve carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.
    +215
    +216        Parameters
    +217        ----------
    +218        settings : object
    +219             An object containing user-defined settings.
    +220        odd_even : str
    +221            A string indicating whether to retrieve even or odd hydrogen atoms.
    +222
    +223        Returns
    +224        -------
    +225        list
    +226            A list of CarbonHydrogen objects that satisfy the specified conditions.
    +227        """
    +228        operator = "==" if odd_even == "even" else "!="
    +229        usedAtoms = settings.usedAtoms
    +230        user_min_c, user_max_c = usedAtoms.get("C")
    +231        user_min_h, user_max_h = usedAtoms.get("H")
    +232
    +233        return eval(
    +234            "self.sql_db.session.query(CarbonHydrogen).filter("
    +235            "CarbonHydrogen.C >= user_min_c,"
    +236            "CarbonHydrogen.H >= user_min_h,"
    +237            "CarbonHydrogen.C <= user_max_c,"
    +238            "CarbonHydrogen.H <= user_max_h,"
    +239            "CarbonHydrogen.H % 2" + operator + "0).all()"
    +240        )
    +241
    +242    def add_carbonsHydrogens(self, settings, existing_classes_objs):
    +243        """Add carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.
    +244
    +245        Parameters
    +246        ----------
    +247        settings : object
    +248            An object containing user-defined settings.
    +249        existing_classes_objs : list
    +250            A list of HeteroAtoms objects.
    +251        """
    +252        usedAtoms = settings.usedAtoms
    +253
    +254        user_min_c, user_max_c = usedAtoms.get("C")
    +255        user_min_h, user_max_h = usedAtoms.get("H")
    +256
    +257        query_obj = self.sql_db.session.query(
    +258            func.max(CarbonHydrogen.C).label("max_c"),
    +259            func.min(CarbonHydrogen.C).label("min_c"),
    +260            func.max(CarbonHydrogen.H).label("max_h"),
    +261            func.min(CarbonHydrogen.H).label("min_h"),
    +262        )
    +263
    +264        database = query_obj.first()
    +265        if (
    +266            database.max_c == user_max_c
    +267            and database.min_c == user_min_c
    +268            and database.max_h == user_max_h
    +269            and database.min_h == user_min_h
    +270        ):
    +271            # all data is already available at the database
    +272            pass
    +273
    +274        else:
    +275            current_count = self.sql_db.session.query(CarbonHydrogen.C).count()
    +276
    +277            databaseCarbonHydrogen = self.sql_db.session.query(CarbonHydrogen).all()
    +278
    +279            userCarbon = set(range(user_min_c, user_max_c + 1))
    +280            userHydrogen = set(range(user_min_h, user_max_h + 1))
    +281
    +282            carbon_hydrogen_objs_database = {}
    +283            for obj in databaseCarbonHydrogen:
    +284                str_data = "C:{},H:{}".format(obj.C, obj.H)
    +285                carbon_hydrogen_objs_database[str_data] = str_data
    +286
    +287            carbon_hydrogen_objs_to_create = {"even": {}, "odd": {}}
    +288
    +289            list_ch_obj_to_add = list()
    +290            i = 0
    +291            for comb in itertools.product(userCarbon, userHydrogen):
    +292                C = comb[0]
    +293                H = comb[1]
    +294                data = {
    +295                    "C": C,
    +296                    "H": H,
    +297                }
    +298
    +299                data_insert = {
    +300                    "C": C,
    +301                    "H": H,
    +302                }
    +303
    +304                str_data = "C:{},H:{}".format(C, H)
    +305
    +306                if not str_data in carbon_hydrogen_objs_database.keys():
    +307                    label = "even" if comb[1] % 2 == 0 else "odd"
    +308                    data["mass"] = (C * Atoms.atomic_masses.get("C")) + (
    +309                        H * Atoms.atomic_masses.get("H")
    +310                    )
    +311                    data["dbe"] = C - (H / 2) + 1
    +312                    data["id"] = i + current_count + 1
    +313                    data_insert["id"] = i + current_count + 1
    +314                    i = i + 1
    +315                    carbon_hydrogen_objs_to_create[label][str_data] = data
    +316
    +317                    list_ch_obj_to_add.append(data_insert)
    +318
    +319            if list_ch_obj_to_add:
    +320                # insert carbon hydrogen objs
    +321                list_insert_chunks = chunks(
    +322                    list_ch_obj_to_add, self.sql_db.chunks_count
    +323                )
    +324                for insert_chunk in list_insert_chunks:
    +325                    insert_query = CarbonHydrogen.__table__.insert().values(
    +326                        insert_chunk
    +327                    )
    +328                    self.sql_db.session.execute(insert_query)
    +329                self.sql_db.session.commit()
    +330
    +331                list_molecular_form = list()
    +332                for classe_obj in existing_classes_objs:
    +333                    classe_dict = classe_obj.to_dict()
    +334                    classe_mass = self.calc_mz(classe_dict)
    +335                    classe_dbe = self.calc_dbe_class(classe_dict)
    +336
    +337                    odd_even_label = self.get_h_odd_or_even(classe_dict)
    +338
    +339                    ch_datalist = carbon_hydrogen_objs_to_create.get(
    +340                        odd_even_label
    +341                    ).values()
    +342
    +343                    for ch_dict in ch_datalist:
    +344                        mass = ch_dict.get("mass") + classe_mass
    +345                        dbe = ch_dict.get("dbe") + classe_dbe
    +346
    +347                        if settings.min_mz <= mass <= settings.max_mz:
    +348                            if settings.min_dbe <= dbe <= settings.max_dbe:
    +349                                list_molecular_form.append(
    +350                                    {
    +351                                        "heteroAtoms_id": classe_obj.id,
    +352                                        "carbonHydrogen_id": ch_dict.get("id"),
    +353                                        "mass": mass,
    +354                                        "DBE": dbe,
    +355                                    }
    +356                                )
    +357
    +358                list_insert_chunks = chunks(
    +359                    list_molecular_form, self.sql_db.chunks_count
    +360                )
    +361                for insert_chunk in list_insert_chunks:
    +362                    insert_query = MolecularFormulaLink.__table__.insert().values(
    +363                        insert_chunk
    +364                    )
    +365                    self.sql_db.session.execute(insert_query)
    +366                self.sql_db.session.commit()
    +367
    +368    @timeit
    +369    def runworker(self, molecular_search_settings):
    +370        """Run the molecular formula lookup table worker.
    +371
    +372        Parameters
    +373        ----------
    +374        molecular_search_settings : object
    +375            An object containing user-defined settings.
    +376
    +377        Returns
    +378        -------
    +379        list
    +380            A list of tuples with the class name and the class dictionary.
    +381
     382
    -383            all_results= list()
    -384            for class_tuple in tqdm(class_to_create):
    -385                
    -386                results = self.populate_combinations(class_tuple, settings)
    -387                all_results.extend(results)
    -388                if settings.db_jobs == 1: 
    -389                    #if len(all_results) >= self.sql_db.chunks_count:
    -390                        list_insert_chunks = list(chunks(results, self.sql_db.chunks_count))
    -391                        for chunk in list_insert_chunks:
    -392                            insert_query = MolecularFormulaLink.__table__.insert().values(chunk)
    -393                            self.sql_db.session.execute(insert_query)
    -394                        #all_results = list()
    -395            self.sql_db.session.commit()
    -396            # each chunk takes ~600Mb of memory, so if using 8 processes the total free memory needs to be 5GB
    -397            if settings.db_jobs > 1: 
    -398                list_insert_chunks = list(chunks(all_results, self.sql_db.chunks_count))
    -399                print( "Started database insert using {} iterations for a total of {} rows".format(len(list_insert_chunks), len(all_results)))
    -400                worker_args = [(chunk, settings.url_database) for chunk in list_insert_chunks]
    -401                p = multiprocessing.Pool(settings.db_jobs)
    -402                for class_list in tqdm(p.imap_unordered(insert_database_worker, worker_args)):
    -403                    pass
    -404                p.close()
    -405                p.join()
    -406        
    -407        return classes_list
    -408    
    -409    def get_classes_in_order(self, molecular_search_settings):
    -410        """ Get the classes in order
    -411
    -412        Parameters
    -413        ----------
    -414        molecular_search_settings : object
    -415            An object containing user-defined settings.
    -416        
    -417        Returns
    -418        -------
    -419        dict
    -420            A dictionary of classes in order.
    -421            structure is  ('HC', {'HC': 1})
    -422        """
    -423        
    -424        usedAtoms = deepcopy(molecular_search_settings.usedAtoms)
    -425        
    -426        usedAtoms.pop("C")
    -427        usedAtoms.pop("H")
    -428
    -429        min_n, max_n = usedAtoms.get('N') if usedAtoms.get('N') else (0,0)
    -430        min_o, max_o = usedAtoms.get('O') if usedAtoms.get('O') else (0,0)
    -431        min_s, max_s = usedAtoms.get('S') if usedAtoms.get('S') else (0,0)
    -432        min_p, max_p = usedAtoms.get('P') if usedAtoms.get('P') else (0,0)
    -433
    -434        possible_n = [n for n in range(min_n, max_n + 1)]
    -435        possible_o = [o for o in range(min_o, max_o + 1)]
    -436        possible_s = [s for s in range(min_s, max_s + 1)]
    -437        possible_p = [p for p in range(min_p, max_p + 1)]
    -438        
    -439        atoms_in_order = ['N', 'O', 'S', 'P']
    -440
    -441        classe_in_order = {}
    -442
    -443        all_atoms_tuples = itertools.product(possible_n, possible_o,
    -444                                            possible_s, possible_p)
    -445        
    -446        for atom in atoms_in_order:
    -447            usedAtoms.pop(atom, None)
    -448        
    -449        for selected_atom, min_max_tuple in usedAtoms.items():
    -450            
    -451            min_x = min_max_tuple[0]
    -452            max_x = min_max_tuple[1]
    -453            
    -454
    -455            possible_x = [x for x in range(min_x, max_x + 1)]
    -456
    -457            all_atoms_tuples = itertools.product(all_atoms_tuples, possible_x)
    -458            all_atoms_tuples = [all_atoms_combined[0] + (all_atoms_combined[1],) for all_atoms_combined in
    -459                                all_atoms_tuples]
    -460            atoms_in_order.append(selected_atom)
    -461        
    -462        for all_atoms_tuple in all_atoms_tuples:
    -463
    -464            classe_str = ''
    -465            classe_dict = {}
    -466            
    -467            for each_atoms_index, atom_number in enumerate(all_atoms_tuple):
    -468                
    -469                if atom_number != 0:
    -470                    classe_dict[atoms_in_order[each_atoms_index]] = atom_number
    -471            
    -472            if not classe_dict:
    -473                classe_in_order['HC'] = {"HC": ""}
    -474                continue
    -475
    -476            classe_str =json.dumps(classe_dict)
    -477            
    -478            if len(classe_str) > 0:
    -479                
    -480                classe_in_order[classe_str] =  classe_dict
    -481        
    -482        classe_in_order_dict = self.sort_classes(atoms_in_order, classe_in_order)
    -483        
    -484        return classe_in_order_dict
    -485
    -486    @staticmethod
    -487    def sort_classes( atoms_in_order, combination_dict) -> Dict[str, Dict[str, int]]: 
    -488        """ Sort the classes in order
    -489        
    -490        Parameters
    -491        ----------
    -492        atoms_in_order : list
    -493            A list of atoms in order.
    -494        combination_dict : dict
    -495            A dictionary of classes.
    -496        
    -497        Returns
    -498        -------
    -499        dict
    -500            A dictionary of classes in order.
    -501        """
    -502        #ensures atoms are always in the order defined at atoms_in_order list
    -503        join_dict_classes = dict()
    -504        atoms_in_order =  ['N','S','P','O'] + atoms_in_order[4:] + ['HC']
    -505        
    -506        sort_method = lambda atoms_keys: [atoms_in_order.index(atoms_keys)] 
    -507        for class_str, class_dict in combination_dict.items():
    -508            
    -509            sorted_dict_keys = sorted(class_dict, key = sort_method)
    -510            class_dict = { atom: class_dict[atom] for atom in sorted_dict_keys}
    -511            class_str = json.dumps(class_dict)
    -512            # using json for the new database, class 
    -513            # class_str = ' '.join([atom + str(class_dict[atom]) for atom in sorted_dict_keys])
    -514            join_dict_classes[class_str] =  class_dict
    -515        
    -516        return join_dict_classes
    +383        """
    +384
    +385        classes_list, class_to_create, existing_classes_objs = (
    +386            self.check_database_get_class_list(molecular_search_settings)
    +387        )
    +388
    +389        settings = MolecularLookupDictSettings()
    +390        settings.usedAtoms = deepcopy(molecular_search_settings.usedAtoms)
    +391        settings.url_database = molecular_search_settings.url_database
    +392        settings.db_jobs = molecular_search_settings.db_jobs
    +393
    +394        self.add_carbonsHydrogens(settings, existing_classes_objs)
    +395
    +396        if class_to_create:
    +397            settings = MolecularLookupDictSettings()
    +398            settings.usedAtoms = deepcopy(molecular_search_settings.usedAtoms)
    +399            settings.url_database = molecular_search_settings.url_database
    +400            settings.db_jobs = molecular_search_settings.db_jobs
    +401
    +402            self.sql_db.session.commit()
    +403            odd_ch_obj = self.get_carbonsHydrogens(settings, "odd")
    +404            self.odd_ch_id = [obj.id for obj in odd_ch_obj]
    +405            self.odd_ch_dict = [{"C": obj.C, "H": obj.H} for obj in odd_ch_obj]
    +406            self.odd_ch_mass = [obj.mass for obj in odd_ch_obj]
    +407            self.odd_ch_dbe = [obj.dbe for obj in odd_ch_obj]
    +408
    +409            even_ch_obj = self.get_carbonsHydrogens(settings, "even")
    +410            self.even_ch_id = [obj.id for obj in even_ch_obj]
    +411            self.even_ch_dict = [{"C": obj.C, "H": obj.H} for obj in even_ch_obj]
    +412            self.even_ch_mass = [obj.mass for obj in even_ch_obj]
    +413            self.even_ch_dbe = [obj.dbe for obj in even_ch_obj]
    +414
    +415            all_results = list()
    +416            for class_tuple in tqdm(class_to_create):
    +417                results = self.populate_combinations(class_tuple, settings)
    +418                all_results.extend(results)
    +419                if settings.db_jobs == 1:
    +420                    # if len(all_results) >= self.sql_db.chunks_count:
    +421                    list_insert_chunks = list(chunks(results, self.sql_db.chunks_count))
    +422                    for chunk in list_insert_chunks:
    +423                        insert_query = MolecularFormulaLink.__table__.insert().values(
    +424                            chunk
    +425                        )
    +426                        self.sql_db.session.execute(insert_query)
    +427                    # all_results = list()
    +428            self.sql_db.session.commit()
    +429            # each chunk takes ~600Mb of memory, so if using 8 processes the total free memory needs to be 5GB
    +430            if settings.db_jobs > 1:
    +431                list_insert_chunks = list(chunks(all_results, self.sql_db.chunks_count))
    +432                print(
    +433                    "Started database insert using {} iterations for a total of {} rows".format(
    +434                        len(list_insert_chunks), len(all_results)
    +435                    )
    +436                )
    +437                worker_args = [
    +438                    (chunk, settings.url_database) for chunk in list_insert_chunks
    +439                ]
    +440                p = multiprocessing.Pool(settings.db_jobs)
    +441                for class_list in tqdm(
    +442                    p.imap_unordered(insert_database_worker, worker_args)
    +443                ):
    +444                    pass
    +445                p.close()
    +446                p.join()
    +447
    +448        return classes_list
    +449
    +450    def get_classes_in_order(self, molecular_search_settings):
    +451        """Get the classes in order
    +452
    +453        Parameters
    +454        ----------
    +455        molecular_search_settings : object
    +456            An object containing user-defined settings.
    +457
    +458        Returns
    +459        -------
    +460        dict
    +461            A dictionary of classes in order.
    +462            structure is  ('HC', {'HC': 1})
    +463        """
    +464
    +465        usedAtoms = deepcopy(molecular_search_settings.usedAtoms)
    +466
    +467        usedAtoms.pop("C")
    +468        usedAtoms.pop("H")
    +469
    +470        min_n, max_n = usedAtoms.get("N") if usedAtoms.get("N") else (0, 0)
    +471        min_o, max_o = usedAtoms.get("O") if usedAtoms.get("O") else (0, 0)
    +472        min_s, max_s = usedAtoms.get("S") if usedAtoms.get("S") else (0, 0)
    +473        min_p, max_p = usedAtoms.get("P") if usedAtoms.get("P") else (0, 0)
    +474
    +475        possible_n = [n for n in range(min_n, max_n + 1)]
    +476        possible_o = [o for o in range(min_o, max_o + 1)]
    +477        possible_s = [s for s in range(min_s, max_s + 1)]
    +478        possible_p = [p for p in range(min_p, max_p + 1)]
    +479
    +480        atoms_in_order = ["N", "O", "S", "P"]
    +481
    +482        classe_in_order = {}
    +483
    +484        all_atoms_tuples = itertools.product(
    +485            possible_n, possible_o, possible_s, possible_p
    +486        )
    +487
    +488        for atom in atoms_in_order:
    +489            usedAtoms.pop(atom, None)
    +490
    +491        for selected_atom, min_max_tuple in usedAtoms.items():
    +492            min_x = min_max_tuple[0]
    +493            max_x = min_max_tuple[1]
    +494
    +495            possible_x = [x for x in range(min_x, max_x + 1)]
    +496
    +497            all_atoms_tuples = itertools.product(all_atoms_tuples, possible_x)
    +498            all_atoms_tuples = [
    +499                all_atoms_combined[0] + (all_atoms_combined[1],)
    +500                for all_atoms_combined in all_atoms_tuples
    +501            ]
    +502            atoms_in_order.append(selected_atom)
    +503
    +504        for all_atoms_tuple in all_atoms_tuples:
    +505            classe_str = ""
    +506            classe_dict = {}
    +507
    +508            for each_atoms_index, atom_number in enumerate(all_atoms_tuple):
    +509                if atom_number != 0:
    +510                    classe_dict[atoms_in_order[each_atoms_index]] = atom_number
    +511
    +512            if not classe_dict:
    +513                classe_in_order["HC"] = {"HC": ""}
    +514                continue
    +515
    +516            classe_str = json.dumps(classe_dict)
     517
    -518    @staticmethod
    -519    def get_fixed_initial_number_of_hydrogen( min_h, odd_even):
    -520        """ Get the fixed initial number of hydrogen atoms
    -521        
    -522        Parameters
    -523        ----------
    -524        min_h : int
    -525            The minimum number of hydrogen atoms.
    -526        odd_even : str
    -527            A string indicating whether to retrieve even or odd hydrogen atoms.
    -528        """
    -529        remaining_h = min_h % 2
    -530        
    -531        if odd_even == 'even':
    -532            
    -533            if remaining_h == 0: return remaining_h
    -534            
    -535            else: return remaining_h + 1    
    -536        
    -537        else:
    -538            
    -539            if remaining_h == 0: return remaining_h + 1
    -540            
    -541            else: return remaining_h    
    -542
    -543    def calc_mz(self, datadict, class_mass=0):
    -544        """ Calculate the mass-to-charge ratio (m/z) of a molecular formula.
    -545        
    -546        Parameters
    -547        ----------
    -548        datadict : dict
    -549            A dictionary of classes.
    -550        class_mass : int
    -551            The mass of the class.
    -552        
    -553        Returns
    -554        -------
    -555        float
    -556            The mass-to-charge ratio (m/z) of a molecular formula.
    -557        """
    -558        mass = class_mass
    -559        
    -560        for atom in datadict.keys():
    -561            
    -562            if atom != 'HC':    
    -563                
    -564                mass = mass + Atoms.atomic_masses[atom]  *  datadict.get(atom)
    -565            
    -566        return mass 
    -567        
    -568    def calc_dbe_class(self, datadict):
    -569        """ Calculate the double bond equivalent (DBE) of a molecular formula.
    -570        
    -571        Parameters
    -572        ----------
    -573        datadict : dict
    -574            A dictionary of classes.
    -575        
    -576        Returns
    -577        -------
    -578        float
    -579            The double bond equivalent (DBE) of a molecular formula.
    -580        """
    -581        init_dbe = 0
    -582        for atom in datadict.keys():
    -583
    -584            if atom == 'HC':
    -585                continue  
    -586            
    -587            n_atom = int(datadict.get(atom))
    -588            
    -589            clean_atom = ''.join([i for i in atom if not i.isdigit()]) 
    -590            
    -591            valencia = MSParameters.molecular_search.used_atom_valences.get(clean_atom)
    -592            
    -593            if type(valencia) is tuple:
    -594                valencia = valencia[0]
    -595            if valencia > 0:
    -596                #print atom, valencia, n_atom, init_dbe
    -597                init_dbe = init_dbe + (n_atom * (valencia - 2))
    -598            else:
    -599                continue
    -600            
    -601        return (0.5 * init_dbe)
    -602            
    -603    def populate_combinations(self, classe_tuple, settings):
    -604        """ Populate the combinations
    -605        
    -606        Parameters
    -607        ----------
    -608        classe_tuple : tuple
    -609            A tuple containing the class name, the class dictionary, and the class ID.
    -610        settings : object
    -611            An object containing user-defined settings.
    -612        
    -613        Returns
    -614        -------
    -615        list
    -616            A list of molecular formula data dictionaries.
    -617        """
    -618        ion_charge =  0
    -619        
    -620        class_dict = classe_tuple[1]
    -621        odd_or_even = self.get_h_odd_or_even(class_dict)
    -622        
    -623        return self.get_mol_formulas(odd_or_even, classe_tuple, settings)
    -624        
    -625    def get_or_add(self, SomeClass, kw):
    -626        """ Get or add a class
    -627        
    -628        Parameters
    -629        ----------
    -630        SomeClass : object
    -631            A class object.
    -632        kw : dict
    -633            A dictionary of classes.
    -634        
    -635        Returns
    -636        -------
    -637        object
    -638            A class object.
    -639        """
    -640        obj = self.sql_db.session.query(SomeClass).filter_by(**kw).first()
    -641        if not obj:
    -642            obj = SomeClass(**kw)
    -643        return obj
    -644    
    -645    
    -646    def get_mol_formulas(self, odd_even_tag, classe_tuple, settings):
    -647        """ Get the molecular formulas
    -648        
    -649        Parameters
    -650        ----------
    -651        odd_even_tag : str
    -652            A string indicating whether to retrieve even or odd hydrogen atoms.
    -653        classe_tuple : tuple
    -654            
    -655        settings : object
    -656            An object containing user-defined settings.
    -657        
    -658        Returns
    -659        -------
    -660        list
    -661            A list of molecular formula data dictionaries.
    -662              
    -663        """
    -664        class_str = classe_tuple[0]
    -665        class_dict = classe_tuple[1]
    -666        classe_id = classe_tuple[2]
    -667        
    -668        results = list()
    -669        
    -670        if 'HC' in class_dict:
    -671            del class_dict['HC']
    -672            
    -673        class_dbe = self.calc_dbe_class(class_dict)    
    -674        class_mass = self.calc_mz(class_dict)
    -675        
    -676        carbonHydrogen_mass = self.odd_ch_mass if odd_even_tag == 'odd' else self.even_ch_mass 
    -677        carbonHydrogen_dbe = self.odd_ch_dbe if odd_even_tag == 'odd' else self.even_ch_dbe 
    -678        carbonHydrogen_id = self.odd_ch_id if odd_even_tag == 'odd' else self.even_ch_id 
    -679        
    -680        for index, carbonHydrogen_obj in enumerate(carbonHydrogen_id):
    -681            
    -682            mass = carbonHydrogen_mass[index] + class_mass
    -683            dbe =  carbonHydrogen_dbe[index] + class_dbe
    -684    
    -685            if settings.min_mz <= mass <= settings.max_mz:
    -686                
    -687                if settings.min_dbe <= dbe <= settings.max_dbe:
    -688                    
    -689                    molecularFormula=  {"heteroAtoms_id":classe_id, 
    -690                            "carbonHydrogen_id":carbonHydrogen_id[index], 
    -691                            "mass":mass, "DBE":dbe}
    -692                    
    -693                    results.append(molecularFormula)
    -694        
    -695        return results
    -696        
    -697        
    -698    def get_h_odd_or_even(self, class_dict):
    -699        """ Get the hydrogen odd or even
    -700        
    -701        Parameters
    -702        ----------
    -703        class_dict : dict
    -704            A dictionary of classes.
    -705        
    -706        Returns
    -707        -------
    -708        str
    -709            A string indicating whether to retrieve even or odd hydrogen atoms.
    -710        """
    -711        
    -712        HAS_NITROGEN = 'N' in class_dict.keys()
    -713
    -714        number_of_halogen = self.get_total_halogen_atoms(class_dict)
    -715        number_of_hetero = self.get_total_heteroatoms(class_dict)
    -716        
    -717        if number_of_halogen > 0:
    -718
    -719            HAS_HALOGEN = True
    -720
    -721        else:
    -722
    -723            HAS_HALOGEN = False
    -724
    -725        if HAS_HALOGEN:
    -726
    -727            remaining_halogen = number_of_halogen % 2
    -728
    -729        else:
    -730
    -731            remaining_halogen = 0
    +518            if len(classe_str) > 0:
    +519                classe_in_order[classe_str] = classe_dict
    +520
    +521        classe_in_order_dict = self.sort_classes(atoms_in_order, classe_in_order)
    +522
    +523        return classe_in_order_dict
    +524
    +525    @staticmethod
    +526    def sort_classes(atoms_in_order, combination_dict) -> Dict[str, Dict[str, int]]:
    +527        """Sort the classes in order
    +528
    +529        Parameters
    +530        ----------
    +531        atoms_in_order : list
    +532            A list of atoms in order.
    +533        combination_dict : dict
    +534            A dictionary of classes.
    +535
    +536        Returns
    +537        -------
    +538        dict
    +539            A dictionary of classes in order.
    +540        """
    +541        # ensures atoms are always in the order defined at atoms_in_order list
    +542        join_dict_classes = dict()
    +543        atoms_in_order = ["N", "S", "P", "O"] + atoms_in_order[4:] + ["HC"]
    +544
    +545        sort_method = lambda atoms_keys: [atoms_in_order.index(atoms_keys)]
    +546        for class_str, class_dict in combination_dict.items():
    +547            sorted_dict_keys = sorted(class_dict, key=sort_method)
    +548            class_dict = {atom: class_dict[atom] for atom in sorted_dict_keys}
    +549            class_str = json.dumps(class_dict)
    +550            # using json for the new database, class
    +551            # class_str = ' '.join([atom + str(class_dict[atom]) for atom in sorted_dict_keys])
    +552            join_dict_classes[class_str] = class_dict
    +553
    +554        return join_dict_classes
    +555
    +556    @staticmethod
    +557    def get_fixed_initial_number_of_hydrogen(min_h, odd_even):
    +558        """Get the fixed initial number of hydrogen atoms
    +559
    +560        Parameters
    +561        ----------
    +562        min_h : int
    +563            The minimum number of hydrogen atoms.
    +564        odd_even : str
    +565            A string indicating whether to retrieve even or odd hydrogen atoms.
    +566        """
    +567        remaining_h = min_h % 2
    +568
    +569        if odd_even == "even":
    +570            if remaining_h == 0:
    +571                return remaining_h
    +572
    +573            else:
    +574                return remaining_h + 1
    +575
    +576        else:
    +577            if remaining_h == 0:
    +578                return remaining_h + 1
    +579
    +580            else:
    +581                return remaining_h
    +582
    +583    def calc_mz(self, datadict, class_mass=0):
    +584        """Calculate the mass-to-charge ratio (m/z) of a molecular formula.
    +585
    +586        Parameters
    +587        ----------
    +588        datadict : dict
    +589            A dictionary of classes.
    +590        class_mass : int
    +591            The mass of the class.
    +592
    +593        Returns
    +594        -------
    +595        float
    +596            The mass-to-charge ratio (m/z) of a molecular formula.
    +597        """
    +598        mass = class_mass
    +599
    +600        for atom in datadict.keys():
    +601            if atom != "HC":
    +602                mass = mass + Atoms.atomic_masses[atom] * datadict.get(atom)
    +603
    +604        return mass
    +605
    +606    def calc_dbe_class(self, datadict):
    +607        """Calculate the double bond equivalent (DBE) of a molecular formula.
    +608
    +609        Parameters
    +610        ----------
    +611        datadict : dict
    +612            A dictionary of classes.
    +613
    +614        Returns
    +615        -------
    +616        float
    +617            The double bond equivalent (DBE) of a molecular formula.
    +618        """
    +619        init_dbe = 0
    +620        for atom in datadict.keys():
    +621            if atom == "HC":
    +622                continue
    +623
    +624            n_atom = int(datadict.get(atom))
    +625
    +626            clean_atom = "".join([i for i in atom if not i.isdigit()])
    +627
    +628            valencia = MSParameters.molecular_search.used_atom_valences.get(clean_atom)
    +629
    +630            if type(valencia) is tuple:
    +631                valencia = valencia[0]
    +632            if valencia > 0:
    +633                # print atom, valencia, n_atom, init_dbe
    +634                init_dbe = init_dbe + (n_atom * (valencia - 2))
    +635            else:
    +636                continue
    +637
    +638        return 0.5 * init_dbe
    +639
    +640    def populate_combinations(self, classe_tuple, settings):
    +641        """Populate the combinations
    +642
    +643        Parameters
    +644        ----------
    +645        classe_tuple : tuple
    +646            A tuple containing the class name, the class dictionary, and the class ID.
    +647        settings : object
    +648            An object containing user-defined settings.
    +649
    +650        Returns
    +651        -------
    +652        list
    +653            A list of molecular formula data dictionaries.
    +654        """
    +655        ion_charge = 0
    +656
    +657        class_dict = classe_tuple[1]
    +658        odd_or_even = self.get_h_odd_or_even(class_dict)
    +659
    +660        return self.get_mol_formulas(odd_or_even, classe_tuple, settings)
    +661
    +662    def get_or_add(self, SomeClass, kw):
    +663        """Get or add a class
    +664
    +665        Parameters
    +666        ----------
    +667        SomeClass : object
    +668            A class object.
    +669        kw : dict
    +670            A dictionary of classes.
    +671
    +672        Returns
    +673        -------
    +674        object
    +675            A class object.
    +676        """
    +677        obj = self.sql_db.session.query(SomeClass).filter_by(**kw).first()
    +678        if not obj:
    +679            obj = SomeClass(**kw)
    +680        return obj
    +681
    +682    def get_mol_formulas(self, odd_even_tag, classe_tuple, settings):
    +683        """Get the molecular formulas
    +684
    +685        Parameters
    +686        ----------
    +687        odd_even_tag : str
    +688            A string indicating whether to retrieve even or odd hydrogen atoms.
    +689        classe_tuple : tuple
    +690
    +691        settings : object
    +692            An object containing user-defined settings.
    +693
    +694        Returns
    +695        -------
    +696        list
    +697            A list of molecular formula data dictionaries.
    +698
    +699        """
    +700        class_str = classe_tuple[0]
    +701        class_dict = classe_tuple[1]
    +702        classe_id = classe_tuple[2]
    +703
    +704        results = list()
    +705
    +706        if "HC" in class_dict:
    +707            del class_dict["HC"]
    +708
    +709        class_dbe = self.calc_dbe_class(class_dict)
    +710        class_mass = self.calc_mz(class_dict)
    +711
    +712        carbonHydrogen_mass = (
    +713            self.odd_ch_mass if odd_even_tag == "odd" else self.even_ch_mass
    +714        )
    +715        carbonHydrogen_dbe = (
    +716            self.odd_ch_dbe if odd_even_tag == "odd" else self.even_ch_dbe
    +717        )
    +718        carbonHydrogen_id = self.odd_ch_id if odd_even_tag == "odd" else self.even_ch_id
    +719
    +720        for index, carbonHydrogen_obj in enumerate(carbonHydrogen_id):
    +721            mass = carbonHydrogen_mass[index] + class_mass
    +722            dbe = carbonHydrogen_dbe[index] + class_dbe
    +723
    +724            if settings.min_mz <= mass <= settings.max_mz:
    +725                if settings.min_dbe <= dbe <= settings.max_dbe:
    +726                    molecularFormula = {
    +727                        "heteroAtoms_id": classe_id,
    +728                        "carbonHydrogen_id": carbonHydrogen_id[index],
    +729                        "mass": mass,
    +730                        "DBE": dbe,
    +731                    }
     732
    -733
    -734        if number_of_hetero > 0:
    -735
    -736            HAS_OTHER_HETERO = True
    -737
    -738            total_hetero_valence = self.get_total_hetero_valence(class_dict)
    -739        
    -740        else: 
    -741
    -742            HAS_OTHER_HETERO = False
    -743
    -744            total_hetero_valence = 0
    -745
    -746        if HAS_OTHER_HETERO:
    -747
    -748            remaining_hetero_valence = total_hetero_valence % 2
    -749
    -750        else:
    -751
    -752            remaining_hetero_valence = 0
    -753
    -754            
    -755        if HAS_NITROGEN and not HAS_OTHER_HETERO:
    -756
    -757            number_of_n = class_dict.get('N')
    -758            remaining_n = number_of_n % 2
    -759
    -760        elif HAS_NITROGEN and HAS_OTHER_HETERO:
    +733                    results.append(molecularFormula)
    +734
    +735        return results
    +736
    +737    def get_h_odd_or_even(self, class_dict):
    +738        """Get the hydrogen odd or even
    +739
    +740        Parameters
    +741        ----------
    +742        class_dict : dict
    +743            A dictionary of classes.
    +744
    +745        Returns
    +746        -------
    +747        str
    +748            A string indicating whether to retrieve even or odd hydrogen atoms.
    +749        """
    +750
    +751        HAS_NITROGEN = "N" in class_dict.keys()
    +752
    +753        number_of_halogen = self.get_total_halogen_atoms(class_dict)
    +754        number_of_hetero = self.get_total_heteroatoms(class_dict)
    +755
    +756        if number_of_halogen > 0:
    +757            HAS_HALOGEN = True
    +758
    +759        else:
    +760            HAS_HALOGEN = False
     761
    -762            number_of_n = class_dict.get('N') 
    -763            remaining_n = (number_of_n + remaining_hetero_valence )% 2 
    +762        if HAS_HALOGEN:
    +763            remaining_halogen = number_of_halogen % 2
     764
    -765        elif HAS_OTHER_HETERO and not HAS_NITROGEN:
    -766
    -767            remaining_n = remaining_hetero_valence
    -768
    -769        else:
    +765        else:
    +766            remaining_halogen = 0
    +767
    +768        if number_of_hetero > 0:
    +769            HAS_OTHER_HETERO = True
     770
    -771            remaining_n = -1
    +771            total_hetero_valence = self.get_total_hetero_valence(class_dict)
     772
    -773        if remaining_n > 0.0:
    -774
    -775            if HAS_NITROGEN or HAS_OTHER_HETERO:
    -776
    -777                if HAS_HALOGEN:
    -778                    if remaining_halogen == 0:
    -779                        return 'odd'
    -780                    else:
    -781                        return 'even'
    -782                
    -783                else:
    -784                    return 'odd'
    -785
    -786        elif remaining_n == 0.0:
    +773        else:
    +774            HAS_OTHER_HETERO = False
    +775
    +776            total_hetero_valence = 0
    +777
    +778        if HAS_OTHER_HETERO:
    +779            remaining_hetero_valence = total_hetero_valence % 2
    +780
    +781        else:
    +782            remaining_hetero_valence = 0
    +783
    +784        if HAS_NITROGEN and not HAS_OTHER_HETERO:
    +785            number_of_n = class_dict.get("N")
    +786            remaining_n = number_of_n % 2
     787
    -788            if HAS_NITROGEN or HAS_OTHER_HETERO:
    -789
    -790                if HAS_HALOGEN:
    -791                    if remaining_halogen == 0:
    -792                        return 'even'
    -793                    else:
    -794                        return 'odd'
    -795                
    -796                else:
    -797                    return 'even'
    -798                
    -799        else:
    -800
    -801            if HAS_HALOGEN:
    -802                if remaining_halogen == 0:
    -803                    return 'even'
    -804                else:
    -805                    return 'odd'
    -806            
    -807            else:
    -808                return 'even'
    -809            
    -810            
    -811
    -812    @staticmethod
    -813    def get_total_heteroatoms(class_dict):
    -814        """ Get the total number of heteroatoms other than N, F, Cl, Br
    -815        
    -816        Parameters
    -817        ----------
    -818        class_dict : dict
    -819            A dictionary of classes.
    -820        
    -821        Returns
    -822        -------
    -823        int
    -824            The total number of heteroatoms.
    -825        """
    +788        elif HAS_NITROGEN and HAS_OTHER_HETERO:
    +789            number_of_n = class_dict.get("N")
    +790            remaining_n = (number_of_n + remaining_hetero_valence) % 2
    +791
    +792        elif HAS_OTHER_HETERO and not HAS_NITROGEN:
    +793            remaining_n = remaining_hetero_valence
    +794
    +795        else:
    +796            remaining_n = -1
    +797
    +798        if remaining_n > 0.0:
    +799            if HAS_NITROGEN or HAS_OTHER_HETERO:
    +800                if HAS_HALOGEN:
    +801                    if remaining_halogen == 0:
    +802                        return "odd"
    +803                    else:
    +804                        return "even"
    +805
    +806                else:
    +807                    return "odd"
    +808
    +809        elif remaining_n == 0.0:
    +810            if HAS_NITROGEN or HAS_OTHER_HETERO:
    +811                if HAS_HALOGEN:
    +812                    if remaining_halogen == 0:
    +813                        return "even"
    +814                    else:
    +815                        return "odd"
    +816
    +817                else:
    +818                    return "even"
    +819
    +820        else:
    +821            if HAS_HALOGEN:
    +822                if remaining_halogen == 0:
    +823                    return "even"
    +824                else:
    +825                    return "odd"
     826
    -827        total_number = 0
    -828        
    -829        for atom in class_dict.keys():
    -830
    -831            if atom not in ['HC','C','H','O','N', 'F', 'Cl', 'Br']:
    -832                total_number = total_number + class_dict.get(atom)
    -833        
    -834        return total_number                
    -835
    -836    @staticmethod
    -837    def get_total_hetero_valence(class_dict):
    -838        """ Get the total valence of heteroatoms other than N, F, Cl, Br
    -839        
    -840        Parameters
    -841        ----------
    -842        class_dict : dict
    -843            A dictionary of classes.
    -844        
    -845        Returns
    -846        -------
    -847        int
    -848            The total heteroatom valence.
    -849        """
    -850        total_valence = 0
    -851        
    -852        for atom in class_dict.keys():
    -853
    -854            if atom not in ['HC','C','H','O','N', 'F', 'Cl', 'Br']:
    -855
    -856                clean_atom = ''.join([i for i in atom if not i.isdigit()]) 
    -857
    -858                atom_valence = MSParameters.molecular_search.used_atom_valences.get(clean_atom)
    -859
    -860                if type(atom_valence) is tuple:
    -861                    atom_valence = atom_valence[0]
    -862
    -863                n_atom =int(class_dict.get(atom))
    -864
    -865                n_atom_valence = atom_valence * n_atom
    -866                
    -867                total_valence = total_valence + n_atom_valence
    +827            else:
    +828                return "even"
    +829
    +830    @staticmethod
    +831    def get_total_heteroatoms(class_dict):
    +832        """Get the total number of heteroatoms other than N, F, Cl, Br
    +833
    +834        Parameters
    +835        ----------
    +836        class_dict : dict
    +837            A dictionary of classes.
    +838
    +839        Returns
    +840        -------
    +841        int
    +842            The total number of heteroatoms.
    +843        """
    +844
    +845        total_number = 0
    +846
    +847        for atom in class_dict.keys():
    +848            if atom not in ["HC", "C", "H", "O", "N", "F", "Cl", "Br"]:
    +849                total_number = total_number + class_dict.get(atom)
    +850
    +851        return total_number
    +852
    +853    @staticmethod
    +854    def get_total_hetero_valence(class_dict):
    +855        """Get the total valence of heteroatoms other than N, F, Cl, Br
    +856
    +857        Parameters
    +858        ----------
    +859        class_dict : dict
    +860            A dictionary of classes.
    +861
    +862        Returns
    +863        -------
    +864        int
    +865            The total heteroatom valence.
    +866        """
    +867        total_valence = 0
     868
    -869        return total_valence  
    -870    
    -871    @staticmethod
    -872    def get_total_halogen_atoms(class_dict):
    -873        """ Get the total number of halogen atoms
    -874        
    -875        Parameters
    -876        ----------
    -877        class_dict : dict
    -878            A dictionary of classes.
    -879        
    -880        Returns
    -881        -------
    -882        int
    -883            The total number of halogen atoms.
    -884        """
    -885        atoms = ['F', 'Cl', 'Br']
    -886
    -887        total_number = 0
    -888        
    -889        for atom in atoms:
    -890
    -891            if atom in class_dict.keys():
    -892
    -893                total_number = total_number + class_dict.get(atom)
    -894        
    -895        return total_number    
    +869        for atom in class_dict.keys():
    +870            if atom not in ["HC", "C", "H", "O", "N", "F", "Cl", "Br"]:
    +871                clean_atom = "".join([i for i in atom if not i.isdigit()])
    +872
    +873                atom_valence = MSParameters.molecular_search.used_atom_valences.get(
    +874                    clean_atom
    +875                )
    +876
    +877                if type(atom_valence) is tuple:
    +878                    atom_valence = atom_valence[0]
    +879
    +880                n_atom = int(class_dict.get(atom))
    +881
    +882                n_atom_valence = atom_valence * n_atom
    +883
    +884                total_valence = total_valence + n_atom_valence
    +885
    +886        return total_valence
    +887
    +888    @staticmethod
    +889    def get_total_halogen_atoms(class_dict):
    +890        """Get the total number of halogen atoms
    +891
    +892        Parameters
    +893        ----------
    +894        class_dict : dict
    +895            A dictionary of classes.
    +896
    +897        Returns
    +898        -------
    +899        int
    +900            The total number of halogen atoms.
    +901        """
    +902        atoms = ["F", "Cl", "Br"]
    +903
    +904        total_number = 0
    +905
    +906        for atom in atoms:
    +907            if atom in class_dict.keys():
    +908                total_number = total_number + class_dict.get(atom)
    +909
    +910        return total_number
     
    @@ -1969,36 +1996,36 @@
    Attributes
    Methods
      -
    • cProfile_worker(args)
      -A cProfile worker for the get_mol_formulas function.
    • -
    • check_database_get_class_list(molecular_search_settings)
      -Checks if the database has all the classes, if not create the missing classes.
    • -
    • get_carbonsHydrogens(settings, odd_even)
      -Retrieves carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.
    • -
    • add_carbonsHydrogens(settings, existing_classes_objs)
      -Adds carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.
    • -
    • runworker(molecular_search_settings)
      -Runs the molecular formula lookup table worker.
    • -
    • get_classes_in_order(molecular_search_settings)
      -Gets the classes in order.
    • -
    • sort_classes(atoms_in_order, combination_dict)
      -Sorts the classes in order.
    • -
    • get_fixed_initial_number_of_hydrogen(min_h, odd_even)
      -Gets the fixed initial number of hydrogen atoms.
    • -
    • calc_mz(datadict, class_mass=0)
      -Calculates the mass-to-charge ratio (m/z) of a molecular formula.
    • -
    • calc_dbe_class(datadict)
      -Calculates the double bond equivalent (DBE) of a molecular formula.
    • -
    • populate_combinations(classe_tuple, settings)
      -Populates the combinations.
    • -
    • get_or_add(SomeClass, kw)
      -Gets or adds a class.
    • -
    • get_mol_formulas(odd_even_tag, classe_tuple, settings)
      -Gets the molecular formulas.
    • -
    • get_h_odd_or_even(class_dict)
      -Gets the hydrogen odd or even.
    • -
    • get_total_halogen_atoms(class_dict)
      -Gets the total number of halogen atoms.
    • +
    • cProfile_worker(args) +A cProfile worker for the get_mol_formulas function.
    • +
    • check_database_get_class_list(molecular_search_settings) +Checks if the database has all the classes, if not create the missing classes.
    • +
    • get_carbonsHydrogens(settings, odd_even) +Retrieves carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.
    • +
    • add_carbonsHydrogens(settings, existing_classes_objs) +Adds carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.
    • +
    • runworker(molecular_search_settings) +Runs the molecular formula lookup table worker.
    • +
    • get_classes_in_order(molecular_search_settings) +Gets the classes in order.
    • +
    • sort_classes(atoms_in_order, combination_dict) +Sorts the classes in order.
    • +
    • get_fixed_initial_number_of_hydrogen(min_h, odd_even) +Gets the fixed initial number of hydrogen atoms.
    • +
    • calc_mz(datadict, class_mass=0) +Calculates the mass-to-charge ratio (m/z) of a molecular formula.
    • +
    • calc_dbe_class(datadict) +Calculates the double bond equivalent (DBE) of a molecular formula.
    • +
    • populate_combinations(classe_tuple, settings) +Populates the combinations.
    • +
    • get_or_add(SomeClass, kw) +Gets or adds a class.
    • +
    • get_mol_formulas(odd_even_tag, classe_tuple, settings) +Gets the molecular formulas.
    • +
    • get_h_odd_or_even(class_dict) +Gets the hydrogen odd or even.
    • +
    • get_total_halogen_atoms(class_dict) +Gets the total number of halogen atoms.
    • get_total_hetero_valence(class_dict) Gets the total valence of heteroatoms other than N, F, Cl, and Br
    @@ -2015,14 +2042,11 @@
    Methods
    -
    133    def __init__(self, sql_db = None):
    -134
    +            
    134    def __init__(self, sql_db=None):
     135        if not sql_db:
    -136            
    -137            self.sql_db = MolForm_SQL()
    -138        else:
    -139            
    -140            self.sql_db = sql_db
    +136            self.sql_db = MolForm_SQL()
    +137        else:
    +138            self.sql_db = sql_db
     
    @@ -2040,9 +2064,14 @@
    Methods
    -
    142    def cProfile_worker(self, args):
    -143        """ cProfile worker for the get_mol_formulas function"""
    -144        cProfile.runctx('self.get_mol_formulas(*args)', globals(), locals(), 'mf_database_cprofile.prof')
    +            
    140    def cProfile_worker(self, args):
    +141        """cProfile worker for the get_mol_formulas function"""
    +142        cProfile.runctx(
    +143            "self.get_mol_formulas(*args)",
    +144            globals(),
    +145            locals(),
    +146            "mf_database_cprofile.prof",
    +147        )
     
    @@ -2062,58 +2091,69 @@
    Methods
    -
    146    def check_database_get_class_list(self, molecular_search_settings):
    -147        """ check if the database has all the classes, if not create the missing classes
    -148        
    -149        Parameters
    -150        ----------
    -151        molecular_search_settings : object
    -152            An object containing user-defined settings.
    -153        
    -154        Returns
    -155        -------
    -156        list
    -157            list of tuples with the class name and the class dictionary
    -158        """
    -159        all_class_to_create = []
    -160        
    -161        classes_dict = self.get_classes_in_order(molecular_search_settings)
    -162        
    -163        class_str_set = set(classes_dict.keys())
    -164        
    -165        existing_classes_objs = self.sql_db.session.query(HeteroAtoms).distinct().all()
    -166        
    -167        existing_classes_str = set([classe.name for classe in existing_classes_objs])
    -168
    -169        self.len_existing_classes = len(existing_classes_str)
    -170
    -171        class_to_create = class_str_set - existing_classes_str
    -172        
    -173        class_count= len(existing_classes_objs)
    -174
    -175        data_classes = list()    
    -176        for index, class_str in enumerate(class_to_create):
    -177            
    -178            class_dict = classes_dict.get(class_str)
    -179            halogen_count = self.get_total_halogen_atoms(class_dict)
    -180            data_classes.append({"name":class_str, "id":class_count+ index + 1, "halogensCount": halogen_count})
    -181
    -182        #data_classes = [{"name":class_str, "id":class_count+ index + 1} for index, class_str in enumerate(class_to_create)]
    -183
    -184        if data_classes:
    -185
    -186            list_insert_chunks = chunks(data_classes, self.sql_db.chunks_count)
    -187            for insert_chunk in  list_insert_chunks:   
    -188                insert_query = HeteroAtoms.__table__.insert().values(insert_chunk)
    -189                self.sql_db.session.execute(insert_query)
    -190
    -191        for index, class_str in enumerate(class_to_create):
    -192
    -193            class_tuple = (class_str, classes_dict.get(class_str), class_count+ index + 1)
    -194
    -195            all_class_to_create.append(class_tuple)
    -196
    -197        return [(c_s, c_d) for c_s, c_d in classes_dict.items()], all_class_to_create, existing_classes_objs       
    +            
    149    def check_database_get_class_list(self, molecular_search_settings):
    +150        """check if the database has all the classes, if not create the missing classes
    +151
    +152        Parameters
    +153        ----------
    +154        molecular_search_settings : object
    +155            An object containing user-defined settings.
    +156
    +157        Returns
    +158        -------
    +159        list
    +160            list of tuples with the class name and the class dictionary
    +161        """
    +162        all_class_to_create = []
    +163
    +164        classes_dict = self.get_classes_in_order(molecular_search_settings)
    +165
    +166        class_str_set = set(classes_dict.keys())
    +167
    +168        existing_classes_objs = self.sql_db.session.query(HeteroAtoms).distinct().all()
    +169
    +170        existing_classes_str = set([classe.name for classe in existing_classes_objs])
    +171
    +172        self.len_existing_classes = len(existing_classes_str)
    +173
    +174        class_to_create = class_str_set - existing_classes_str
    +175
    +176        class_count = len(existing_classes_objs)
    +177
    +178        data_classes = list()
    +179        for index, class_str in enumerate(class_to_create):
    +180            class_dict = classes_dict.get(class_str)
    +181            halogen_count = self.get_total_halogen_atoms(class_dict)
    +182            data_classes.append(
    +183                {
    +184                    "name": class_str,
    +185                    "id": class_count + index + 1,
    +186                    "halogensCount": halogen_count,
    +187                }
    +188            )
    +189
    +190        # data_classes = [{"name":class_str, "id":class_count+ index + 1} for index, class_str in enumerate(class_to_create)]
    +191
    +192        if data_classes:
    +193            list_insert_chunks = chunks(data_classes, self.sql_db.chunks_count)
    +194            for insert_chunk in list_insert_chunks:
    +195                insert_query = HeteroAtoms.__table__.insert().values(insert_chunk)
    +196                self.sql_db.session.execute(insert_query)
    +197
    +198        for index, class_str in enumerate(class_to_create):
    +199            class_tuple = (
    +200                class_str,
    +201                classes_dict.get(class_str),
    +202                class_count + index + 1,
    +203            )
    +204
    +205            all_class_to_create.append(class_tuple)
    +206
    +207        return (
    +208            [(c_s, c_d) for c_s, c_d in classes_dict.items()],
    +209            all_class_to_create,
    +210            existing_classes_objs,
    +211        )
     
    @@ -2146,32 +2186,34 @@
    Returns
    -
    199    def get_carbonsHydrogens(self, settings, odd_even):
    -200            """ Retrieve carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.
    -201
    -202            Parameters
    -203            ----------
    -204            settings : object
    -205                 An object containing user-defined settings.
    -206            odd_even : str
    -207                A string indicating whether to retrieve even or odd hydrogen atoms.
    -208
    -209            Returns
    -210            -------
    -211            list
    -212                A list of CarbonHydrogen objects that satisfy the specified conditions.
    -213            """
    -214            operator = '==' if odd_even == 'even' else '!=' 
    -215            usedAtoms = settings.usedAtoms
    -216            user_min_c, user_max_c = usedAtoms.get('C')
    -217            user_min_h, user_max_h = usedAtoms.get('H')
    -218
    -219            return eval("self.sql_db.session.query(CarbonHydrogen).filter(" 
    -220                                           "CarbonHydrogen.C >= user_min_c,"
    -221                                            "CarbonHydrogen.H >= user_min_h,"
    -222                                            "CarbonHydrogen.C <= user_max_c,"
    -223                                            "CarbonHydrogen.H <= user_max_h,"
    -224                                            "CarbonHydrogen.H % 2" + operator+ "0).all()")
    +            
    213    def get_carbonsHydrogens(self, settings, odd_even):
    +214        """Retrieve carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.
    +215
    +216        Parameters
    +217        ----------
    +218        settings : object
    +219             An object containing user-defined settings.
    +220        odd_even : str
    +221            A string indicating whether to retrieve even or odd hydrogen atoms.
    +222
    +223        Returns
    +224        -------
    +225        list
    +226            A list of CarbonHydrogen objects that satisfy the specified conditions.
    +227        """
    +228        operator = "==" if odd_even == "even" else "!="
    +229        usedAtoms = settings.usedAtoms
    +230        user_min_c, user_max_c = usedAtoms.get("C")
    +231        user_min_h, user_max_h = usedAtoms.get("H")
    +232
    +233        return eval(
    +234            "self.sql_db.session.query(CarbonHydrogen).filter("
    +235            "CarbonHydrogen.C >= user_min_c,"
    +236            "CarbonHydrogen.H >= user_min_h,"
    +237            "CarbonHydrogen.C <= user_max_c,"
    +238            "CarbonHydrogen.H <= user_max_h,"
    +239            "CarbonHydrogen.H % 2" + operator + "0).all()"
    +240        )
     
    @@ -2206,115 +2248,131 @@
    Returns
    -
    226    def add_carbonsHydrogens(self, settings, existing_classes_objs):
    -227        """ Add carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.
    -228
    -229        Parameters
    -230        ----------
    -231        settings : object
    -232            An object containing user-defined settings.
    -233        existing_classes_objs : list
    -234            A list of HeteroAtoms objects.
    -235        """
    -236        usedAtoms = settings.usedAtoms
    -237
    -238        user_min_c, user_max_c = usedAtoms.get('C')
    -239        user_min_h, user_max_h = usedAtoms.get('H')
    -240
    -241        query_obj = self.sql_db.session.query(func.max(CarbonHydrogen.C).label("max_c"), 
    -242                        func.min(CarbonHydrogen.C).label("min_c"),
    -243                        func.max(CarbonHydrogen.H).label("max_h"),
    -244                        func.min(CarbonHydrogen.H).label("min_h"),
    -245                        )
    -246
    -247
    -248        database = query_obj.first()
    -249        if database.max_c == user_max_c and database.min_c == user_min_c and database.max_h == user_max_h and database.min_h == user_min_h:   
    -250            #all data is already available at the database
    -251            pass
    -252
    -253        else:
    -254
    -255            current_count = self.sql_db.session.query(CarbonHydrogen.C).count()
    -256            
    -257            databaseCarbonHydrogen = self.sql_db.session.query(CarbonHydrogen).all()
    -258            
    -259            userCarbon = set(range(user_min_c, user_max_c + 1))
    -260            userHydrogen = set(range(user_min_h, user_max_h + 1))
    -261            
    -262            carbon_hydrogen_objs_database = {}
    -263            for obj in databaseCarbonHydrogen:
    -264                
    -265                str_data = "C:{},H:{}".format(obj.C, obj.H)
    -266                carbon_hydrogen_objs_database[str_data] = str_data
    -267
    -268            carbon_hydrogen_objs_to_create = {'even': {}, 'odd': {}}
    -269            
    -270            list_ch_obj_to_add = list()
    -271            i = 0
    -272            for comb in itertools.product(userCarbon, userHydrogen):
    -273                
    -274                C  = comb[0]
    -275                H =  comb[1]
    -276                data = {"C":C,
    -277                       "H":H,
    -278                }
    -279
    -280                data_insert = {"C":C,
    -281                       "H":H,
    -282                }
    -283
    -284                str_data = "C:{},H:{}".format(C,H)
    -285                
    -286                if not str_data in carbon_hydrogen_objs_database.keys():
    -287                    
    -288                    label = 'even' if comb[1]%2 == 0 else 'odd'
    -289                    data["mass"] = (C * Atoms.atomic_masses.get('C')) + (H * Atoms.atomic_masses.get('H'))
    -290                    data["dbe"] = C - (H/2) + 1
    -291                    data["id"] = i + current_count + 1
    -292                    data_insert["id"] = i + current_count + 1
    -293                    i = i + 1 
    -294                    carbon_hydrogen_objs_to_create[label][str_data] = data
    -295                    
    -296                    list_ch_obj_to_add.append(data_insert)
    -297
    -298            if list_ch_obj_to_add:
    -299                # insert carbon hydrogen objs
    -300                list_insert_chunks = chunks(list_ch_obj_to_add, self.sql_db.chunks_count)
    -301                for insert_chunk in  list_insert_chunks:   
    -302                    insert_query = CarbonHydrogen.__table__.insert().values(insert_chunk)
    -303                    self.sql_db.session.execute(insert_query)
    -304                self.sql_db.session.commit()    
    -305            
    -306                
    -307                list_molecular_form= list()
    -308                for classe_obj in existing_classes_objs:
    -309
    -310                    classe_dict = classe_obj.to_dict()  
    -311                    classe_mass = self.calc_mz(classe_dict)
    -312                    classe_dbe = self.calc_dbe_class(classe_dict)
    -313
    -314                    odd_even_label = self.get_h_odd_or_even(classe_dict)
    -315
    -316                    ch_datalist = carbon_hydrogen_objs_to_create.get(odd_even_label).values()
    -317
    -318                    for ch_dict in ch_datalist:
    -319                        mass = ch_dict.get('mass') + classe_mass
    -320                        dbe = ch_dict.get('dbe') + classe_dbe
    -321
    -322                        if settings.min_mz <= mass <= settings.max_mz:
    -323                
    -324                            if settings.min_dbe <= dbe <= settings.max_dbe:
    -325                                
    -326                                list_molecular_form.append( {"heteroAtoms_id":classe_obj.id, 
    -327                                        "carbonHydrogen_id":ch_dict.get('id'), 
    -328                                        "mass":mass, "DBE":dbe})
    -329
    -330                list_insert_chunks = chunks(list_molecular_form, self.sql_db.chunks_count)
    -331                for insert_chunk in  list_insert_chunks:   
    -332                    insert_query = MolecularFormulaLink.__table__.insert().values(insert_chunk)
    -333                    self.sql_db.session.execute(insert_query)
    -334                self.sql_db.session.commit()    
    +            
    242    def add_carbonsHydrogens(self, settings, existing_classes_objs):
    +243        """Add carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.
    +244
    +245        Parameters
    +246        ----------
    +247        settings : object
    +248            An object containing user-defined settings.
    +249        existing_classes_objs : list
    +250            A list of HeteroAtoms objects.
    +251        """
    +252        usedAtoms = settings.usedAtoms
    +253
    +254        user_min_c, user_max_c = usedAtoms.get("C")
    +255        user_min_h, user_max_h = usedAtoms.get("H")
    +256
    +257        query_obj = self.sql_db.session.query(
    +258            func.max(CarbonHydrogen.C).label("max_c"),
    +259            func.min(CarbonHydrogen.C).label("min_c"),
    +260            func.max(CarbonHydrogen.H).label("max_h"),
    +261            func.min(CarbonHydrogen.H).label("min_h"),
    +262        )
    +263
    +264        database = query_obj.first()
    +265        if (
    +266            database.max_c == user_max_c
    +267            and database.min_c == user_min_c
    +268            and database.max_h == user_max_h
    +269            and database.min_h == user_min_h
    +270        ):
    +271            # all data is already available at the database
    +272            pass
    +273
    +274        else:
    +275            current_count = self.sql_db.session.query(CarbonHydrogen.C).count()
    +276
    +277            databaseCarbonHydrogen = self.sql_db.session.query(CarbonHydrogen).all()
    +278
    +279            userCarbon = set(range(user_min_c, user_max_c + 1))
    +280            userHydrogen = set(range(user_min_h, user_max_h + 1))
    +281
    +282            carbon_hydrogen_objs_database = {}
    +283            for obj in databaseCarbonHydrogen:
    +284                str_data = "C:{},H:{}".format(obj.C, obj.H)
    +285                carbon_hydrogen_objs_database[str_data] = str_data
    +286
    +287            carbon_hydrogen_objs_to_create = {"even": {}, "odd": {}}
    +288
    +289            list_ch_obj_to_add = list()
    +290            i = 0
    +291            for comb in itertools.product(userCarbon, userHydrogen):
    +292                C = comb[0]
    +293                H = comb[1]
    +294                data = {
    +295                    "C": C,
    +296                    "H": H,
    +297                }
    +298
    +299                data_insert = {
    +300                    "C": C,
    +301                    "H": H,
    +302                }
    +303
    +304                str_data = "C:{},H:{}".format(C, H)
    +305
    +306                if not str_data in carbon_hydrogen_objs_database.keys():
    +307                    label = "even" if comb[1] % 2 == 0 else "odd"
    +308                    data["mass"] = (C * Atoms.atomic_masses.get("C")) + (
    +309                        H * Atoms.atomic_masses.get("H")
    +310                    )
    +311                    data["dbe"] = C - (H / 2) + 1
    +312                    data["id"] = i + current_count + 1
    +313                    data_insert["id"] = i + current_count + 1
    +314                    i = i + 1
    +315                    carbon_hydrogen_objs_to_create[label][str_data] = data
    +316
    +317                    list_ch_obj_to_add.append(data_insert)
    +318
    +319            if list_ch_obj_to_add:
    +320                # insert carbon hydrogen objs
    +321                list_insert_chunks = chunks(
    +322                    list_ch_obj_to_add, self.sql_db.chunks_count
    +323                )
    +324                for insert_chunk in list_insert_chunks:
    +325                    insert_query = CarbonHydrogen.__table__.insert().values(
    +326                        insert_chunk
    +327                    )
    +328                    self.sql_db.session.execute(insert_query)
    +329                self.sql_db.session.commit()
    +330
    +331                list_molecular_form = list()
    +332                for classe_obj in existing_classes_objs:
    +333                    classe_dict = classe_obj.to_dict()
    +334                    classe_mass = self.calc_mz(classe_dict)
    +335                    classe_dbe = self.calc_dbe_class(classe_dict)
    +336
    +337                    odd_even_label = self.get_h_odd_or_even(classe_dict)
    +338
    +339                    ch_datalist = carbon_hydrogen_objs_to_create.get(
    +340                        odd_even_label
    +341                    ).values()
    +342
    +343                    for ch_dict in ch_datalist:
    +344                        mass = ch_dict.get("mass") + classe_mass
    +345                        dbe = ch_dict.get("dbe") + classe_dbe
    +346
    +347                        if settings.min_mz <= mass <= settings.max_mz:
    +348                            if settings.min_dbe <= dbe <= settings.max_dbe:
    +349                                list_molecular_form.append(
    +350                                    {
    +351                                        "heteroAtoms_id": classe_obj.id,
    +352                                        "carbonHydrogen_id": ch_dict.get("id"),
    +353                                        "mass": mass,
    +354                                        "DBE": dbe,
    +355                                    }
    +356                                )
    +357
    +358                list_insert_chunks = chunks(
    +359                    list_molecular_form, self.sql_db.chunks_count
    +360                )
    +361                for insert_chunk in list_insert_chunks:
    +362                    insert_query = MolecularFormulaLink.__table__.insert().values(
    +363                        insert_chunk
    +364                    )
    +365                    self.sql_db.session.execute(insert_query)
    +366                self.sql_db.session.commit()
     
    @@ -2343,16 +2401,16 @@
    Parameters
    -
    17    def timed(*args, **kw):
    -18        ts = time.time()
    -19        result = method(*args, **kw)
    -20        te = time.time()
    -21        if 'log_time' in kw:
    -22            name = kw.get('log_name', method.__name__.upper())
    -23            kw['log_time'][name] = int((te - ts) * 1000)
    -24        else:
    -25            print("%r  %2.2f ms" % (method.__name__, (te - ts) * 1000))
    -26        return result
    +            
    18    def timed(*args, **kw):
    +19        ts = time.time()
    +20        result = method(*args, **kw)
    +21        te = time.time()
    +22        if "log_time" in kw:
    +23            name = kw.get("log_name", method.__name__.upper())
    +24            kw["log_time"][name] = int((te - ts) * 1000)
    +25        else:
    +26            print("%r  %2.2f ms" % (method.__name__, (te - ts) * 1000))
    +27        return result
     
    @@ -2385,82 +2443,80 @@
    Returns
    -
    409    def get_classes_in_order(self, molecular_search_settings):
    -410        """ Get the classes in order
    -411
    -412        Parameters
    -413        ----------
    -414        molecular_search_settings : object
    -415            An object containing user-defined settings.
    -416        
    -417        Returns
    -418        -------
    -419        dict
    -420            A dictionary of classes in order.
    -421            structure is  ('HC', {'HC': 1})
    -422        """
    -423        
    -424        usedAtoms = deepcopy(molecular_search_settings.usedAtoms)
    -425        
    -426        usedAtoms.pop("C")
    -427        usedAtoms.pop("H")
    -428
    -429        min_n, max_n = usedAtoms.get('N') if usedAtoms.get('N') else (0,0)
    -430        min_o, max_o = usedAtoms.get('O') if usedAtoms.get('O') else (0,0)
    -431        min_s, max_s = usedAtoms.get('S') if usedAtoms.get('S') else (0,0)
    -432        min_p, max_p = usedAtoms.get('P') if usedAtoms.get('P') else (0,0)
    -433
    -434        possible_n = [n for n in range(min_n, max_n + 1)]
    -435        possible_o = [o for o in range(min_o, max_o + 1)]
    -436        possible_s = [s for s in range(min_s, max_s + 1)]
    -437        possible_p = [p for p in range(min_p, max_p + 1)]
    -438        
    -439        atoms_in_order = ['N', 'O', 'S', 'P']
    -440
    -441        classe_in_order = {}
    -442
    -443        all_atoms_tuples = itertools.product(possible_n, possible_o,
    -444                                            possible_s, possible_p)
    -445        
    -446        for atom in atoms_in_order:
    -447            usedAtoms.pop(atom, None)
    -448        
    -449        for selected_atom, min_max_tuple in usedAtoms.items():
    -450            
    -451            min_x = min_max_tuple[0]
    -452            max_x = min_max_tuple[1]
    -453            
    -454
    -455            possible_x = [x for x in range(min_x, max_x + 1)]
    -456
    -457            all_atoms_tuples = itertools.product(all_atoms_tuples, possible_x)
    -458            all_atoms_tuples = [all_atoms_combined[0] + (all_atoms_combined[1],) for all_atoms_combined in
    -459                                all_atoms_tuples]
    -460            atoms_in_order.append(selected_atom)
    -461        
    -462        for all_atoms_tuple in all_atoms_tuples:
    -463
    -464            classe_str = ''
    -465            classe_dict = {}
    -466            
    -467            for each_atoms_index, atom_number in enumerate(all_atoms_tuple):
    -468                
    -469                if atom_number != 0:
    -470                    classe_dict[atoms_in_order[each_atoms_index]] = atom_number
    -471            
    -472            if not classe_dict:
    -473                classe_in_order['HC'] = {"HC": ""}
    -474                continue
    -475
    -476            classe_str =json.dumps(classe_dict)
    -477            
    -478            if len(classe_str) > 0:
    -479                
    -480                classe_in_order[classe_str] =  classe_dict
    -481        
    -482        classe_in_order_dict = self.sort_classes(atoms_in_order, classe_in_order)
    -483        
    -484        return classe_in_order_dict
    +            
    450    def get_classes_in_order(self, molecular_search_settings):
    +451        """Get the classes in order
    +452
    +453        Parameters
    +454        ----------
    +455        molecular_search_settings : object
    +456            An object containing user-defined settings.
    +457
    +458        Returns
    +459        -------
    +460        dict
    +461            A dictionary of classes in order.
    +462            structure is  ('HC', {'HC': 1})
    +463        """
    +464
    +465        usedAtoms = deepcopy(molecular_search_settings.usedAtoms)
    +466
    +467        usedAtoms.pop("C")
    +468        usedAtoms.pop("H")
    +469
    +470        min_n, max_n = usedAtoms.get("N") if usedAtoms.get("N") else (0, 0)
    +471        min_o, max_o = usedAtoms.get("O") if usedAtoms.get("O") else (0, 0)
    +472        min_s, max_s = usedAtoms.get("S") if usedAtoms.get("S") else (0, 0)
    +473        min_p, max_p = usedAtoms.get("P") if usedAtoms.get("P") else (0, 0)
    +474
    +475        possible_n = [n for n in range(min_n, max_n + 1)]
    +476        possible_o = [o for o in range(min_o, max_o + 1)]
    +477        possible_s = [s for s in range(min_s, max_s + 1)]
    +478        possible_p = [p for p in range(min_p, max_p + 1)]
    +479
    +480        atoms_in_order = ["N", "O", "S", "P"]
    +481
    +482        classe_in_order = {}
    +483
    +484        all_atoms_tuples = itertools.product(
    +485            possible_n, possible_o, possible_s, possible_p
    +486        )
    +487
    +488        for atom in atoms_in_order:
    +489            usedAtoms.pop(atom, None)
    +490
    +491        for selected_atom, min_max_tuple in usedAtoms.items():
    +492            min_x = min_max_tuple[0]
    +493            max_x = min_max_tuple[1]
    +494
    +495            possible_x = [x for x in range(min_x, max_x + 1)]
    +496
    +497            all_atoms_tuples = itertools.product(all_atoms_tuples, possible_x)
    +498            all_atoms_tuples = [
    +499                all_atoms_combined[0] + (all_atoms_combined[1],)
    +500                for all_atoms_combined in all_atoms_tuples
    +501            ]
    +502            atoms_in_order.append(selected_atom)
    +503
    +504        for all_atoms_tuple in all_atoms_tuples:
    +505            classe_str = ""
    +506            classe_dict = {}
    +507
    +508            for each_atoms_index, atom_number in enumerate(all_atoms_tuple):
    +509                if atom_number != 0:
    +510                    classe_dict[atoms_in_order[each_atoms_index]] = atom_number
    +511
    +512            if not classe_dict:
    +513                classe_in_order["HC"] = {"HC": ""}
    +514                continue
    +515
    +516            classe_str = json.dumps(classe_dict)
    +517
    +518            if len(classe_str) > 0:
    +519                classe_in_order[classe_str] = classe_dict
    +520
    +521        classe_in_order_dict = self.sort_classes(atoms_in_order, classe_in_order)
    +522
    +523        return classe_in_order_dict
     
    @@ -2495,37 +2551,36 @@
    Returns
    -
    486    @staticmethod
    -487    def sort_classes( atoms_in_order, combination_dict) -> Dict[str, Dict[str, int]]: 
    -488        """ Sort the classes in order
    -489        
    -490        Parameters
    -491        ----------
    -492        atoms_in_order : list
    -493            A list of atoms in order.
    -494        combination_dict : dict
    -495            A dictionary of classes.
    -496        
    -497        Returns
    -498        -------
    -499        dict
    -500            A dictionary of classes in order.
    -501        """
    -502        #ensures atoms are always in the order defined at atoms_in_order list
    -503        join_dict_classes = dict()
    -504        atoms_in_order =  ['N','S','P','O'] + atoms_in_order[4:] + ['HC']
    -505        
    -506        sort_method = lambda atoms_keys: [atoms_in_order.index(atoms_keys)] 
    -507        for class_str, class_dict in combination_dict.items():
    -508            
    -509            sorted_dict_keys = sorted(class_dict, key = sort_method)
    -510            class_dict = { atom: class_dict[atom] for atom in sorted_dict_keys}
    -511            class_str = json.dumps(class_dict)
    -512            # using json for the new database, class 
    -513            # class_str = ' '.join([atom + str(class_dict[atom]) for atom in sorted_dict_keys])
    -514            join_dict_classes[class_str] =  class_dict
    -515        
    -516        return join_dict_classes
    +            
    525    @staticmethod
    +526    def sort_classes(atoms_in_order, combination_dict) -> Dict[str, Dict[str, int]]:
    +527        """Sort the classes in order
    +528
    +529        Parameters
    +530        ----------
    +531        atoms_in_order : list
    +532            A list of atoms in order.
    +533        combination_dict : dict
    +534            A dictionary of classes.
    +535
    +536        Returns
    +537        -------
    +538        dict
    +539            A dictionary of classes in order.
    +540        """
    +541        # ensures atoms are always in the order defined at atoms_in_order list
    +542        join_dict_classes = dict()
    +543        atoms_in_order = ["N", "S", "P", "O"] + atoms_in_order[4:] + ["HC"]
    +544
    +545        sort_method = lambda atoms_keys: [atoms_in_order.index(atoms_keys)]
    +546        for class_str, class_dict in combination_dict.items():
    +547            sorted_dict_keys = sorted(class_dict, key=sort_method)
    +548            class_dict = {atom: class_dict[atom] for atom in sorted_dict_keys}
    +549            class_str = json.dumps(class_dict)
    +550            # using json for the new database, class
    +551            # class_str = ' '.join([atom + str(class_dict[atom]) for atom in sorted_dict_keys])
    +552            join_dict_classes[class_str] = class_dict
    +553
    +554        return join_dict_classes
     
    @@ -2561,30 +2616,32 @@
    Returns
    -
    518    @staticmethod
    -519    def get_fixed_initial_number_of_hydrogen( min_h, odd_even):
    -520        """ Get the fixed initial number of hydrogen atoms
    -521        
    -522        Parameters
    -523        ----------
    -524        min_h : int
    -525            The minimum number of hydrogen atoms.
    -526        odd_even : str
    -527            A string indicating whether to retrieve even or odd hydrogen atoms.
    -528        """
    -529        remaining_h = min_h % 2
    -530        
    -531        if odd_even == 'even':
    -532            
    -533            if remaining_h == 0: return remaining_h
    -534            
    -535            else: return remaining_h + 1    
    -536        
    -537        else:
    -538            
    -539            if remaining_h == 0: return remaining_h + 1
    -540            
    -541            else: return remaining_h    
    +            
    556    @staticmethod
    +557    def get_fixed_initial_number_of_hydrogen(min_h, odd_even):
    +558        """Get the fixed initial number of hydrogen atoms
    +559
    +560        Parameters
    +561        ----------
    +562        min_h : int
    +563            The minimum number of hydrogen atoms.
    +564        odd_even : str
    +565            A string indicating whether to retrieve even or odd hydrogen atoms.
    +566        """
    +567        remaining_h = min_h % 2
    +568
    +569        if odd_even == "even":
    +570            if remaining_h == 0:
    +571                return remaining_h
    +572
    +573            else:
    +574                return remaining_h + 1
    +575
    +576        else:
    +577            if remaining_h == 0:
    +578                return remaining_h + 1
    +579
    +580            else:
    +581                return remaining_h
     
    @@ -2613,30 +2670,28 @@
    Parameters
    -
    543    def calc_mz(self, datadict, class_mass=0):
    -544        """ Calculate the mass-to-charge ratio (m/z) of a molecular formula.
    -545        
    -546        Parameters
    -547        ----------
    -548        datadict : dict
    -549            A dictionary of classes.
    -550        class_mass : int
    -551            The mass of the class.
    -552        
    -553        Returns
    -554        -------
    -555        float
    -556            The mass-to-charge ratio (m/z) of a molecular formula.
    -557        """
    -558        mass = class_mass
    -559        
    -560        for atom in datadict.keys():
    -561            
    -562            if atom != 'HC':    
    -563                
    -564                mass = mass + Atoms.atomic_masses[atom]  *  datadict.get(atom)
    -565            
    -566        return mass 
    +            
    583    def calc_mz(self, datadict, class_mass=0):
    +584        """Calculate the mass-to-charge ratio (m/z) of a molecular formula.
    +585
    +586        Parameters
    +587        ----------
    +588        datadict : dict
    +589            A dictionary of classes.
    +590        class_mass : int
    +591            The mass of the class.
    +592
    +593        Returns
    +594        -------
    +595        float
    +596            The mass-to-charge ratio (m/z) of a molecular formula.
    +597        """
    +598        mass = class_mass
    +599
    +600        for atom in datadict.keys():
    +601            if atom != "HC":
    +602                mass = mass + Atoms.atomic_masses[atom] * datadict.get(atom)
    +603
    +604        return mass
     
    @@ -2671,40 +2726,39 @@
    Returns
    -
    568    def calc_dbe_class(self, datadict):
    -569        """ Calculate the double bond equivalent (DBE) of a molecular formula.
    -570        
    -571        Parameters
    -572        ----------
    -573        datadict : dict
    -574            A dictionary of classes.
    -575        
    -576        Returns
    -577        -------
    -578        float
    -579            The double bond equivalent (DBE) of a molecular formula.
    -580        """
    -581        init_dbe = 0
    -582        for atom in datadict.keys():
    -583
    -584            if atom == 'HC':
    -585                continue  
    -586            
    -587            n_atom = int(datadict.get(atom))
    -588            
    -589            clean_atom = ''.join([i for i in atom if not i.isdigit()]) 
    -590            
    -591            valencia = MSParameters.molecular_search.used_atom_valences.get(clean_atom)
    -592            
    -593            if type(valencia) is tuple:
    -594                valencia = valencia[0]
    -595            if valencia > 0:
    -596                #print atom, valencia, n_atom, init_dbe
    -597                init_dbe = init_dbe + (n_atom * (valencia - 2))
    -598            else:
    -599                continue
    -600            
    -601        return (0.5 * init_dbe)
    +            
    606    def calc_dbe_class(self, datadict):
    +607        """Calculate the double bond equivalent (DBE) of a molecular formula.
    +608
    +609        Parameters
    +610        ----------
    +611        datadict : dict
    +612            A dictionary of classes.
    +613
    +614        Returns
    +615        -------
    +616        float
    +617            The double bond equivalent (DBE) of a molecular formula.
    +618        """
    +619        init_dbe = 0
    +620        for atom in datadict.keys():
    +621            if atom == "HC":
    +622                continue
    +623
    +624            n_atom = int(datadict.get(atom))
    +625
    +626            clean_atom = "".join([i for i in atom if not i.isdigit()])
    +627
    +628            valencia = MSParameters.molecular_search.used_atom_valences.get(clean_atom)
    +629
    +630            if type(valencia) is tuple:
    +631                valencia = valencia[0]
    +632            if valencia > 0:
    +633                # print atom, valencia, n_atom, init_dbe
    +634                init_dbe = init_dbe + (n_atom * (valencia - 2))
    +635            else:
    +636                continue
    +637
    +638        return 0.5 * init_dbe
     
    @@ -2737,27 +2791,27 @@
    Returns
    -
    603    def populate_combinations(self, classe_tuple, settings):
    -604        """ Populate the combinations
    -605        
    -606        Parameters
    -607        ----------
    -608        classe_tuple : tuple
    -609            A tuple containing the class name, the class dictionary, and the class ID.
    -610        settings : object
    -611            An object containing user-defined settings.
    -612        
    -613        Returns
    -614        -------
    -615        list
    -616            A list of molecular formula data dictionaries.
    -617        """
    -618        ion_charge =  0
    -619        
    -620        class_dict = classe_tuple[1]
    -621        odd_or_even = self.get_h_odd_or_even(class_dict)
    -622        
    -623        return self.get_mol_formulas(odd_or_even, classe_tuple, settings)
    +            
    640    def populate_combinations(self, classe_tuple, settings):
    +641        """Populate the combinations
    +642
    +643        Parameters
    +644        ----------
    +645        classe_tuple : tuple
    +646            A tuple containing the class name, the class dictionary, and the class ID.
    +647        settings : object
    +648            An object containing user-defined settings.
    +649
    +650        Returns
    +651        -------
    +652        list
    +653            A list of molecular formula data dictionaries.
    +654        """
    +655        ion_charge = 0
    +656
    +657        class_dict = classe_tuple[1]
    +658        odd_or_even = self.get_h_odd_or_even(class_dict)
    +659
    +660        return self.get_mol_formulas(odd_or_even, classe_tuple, settings)
     
    @@ -2792,25 +2846,25 @@
    Returns
    -
    625    def get_or_add(self, SomeClass, kw):
    -626        """ Get or add a class
    -627        
    -628        Parameters
    -629        ----------
    -630        SomeClass : object
    -631            A class object.
    -632        kw : dict
    -633            A dictionary of classes.
    -634        
    -635        Returns
    -636        -------
    -637        object
    -638            A class object.
    -639        """
    -640        obj = self.sql_db.session.query(SomeClass).filter_by(**kw).first()
    -641        if not obj:
    -642            obj = SomeClass(**kw)
    -643        return obj
    +            
    662    def get_or_add(self, SomeClass, kw):
    +663        """Get or add a class
    +664
    +665        Parameters
    +666        ----------
    +667        SomeClass : object
    +668            A class object.
    +669        kw : dict
    +670            A dictionary of classes.
    +671
    +672        Returns
    +673        -------
    +674        object
    +675            A class object.
    +676        """
    +677        obj = self.sql_db.session.query(SomeClass).filter_by(**kw).first()
    +678        if not obj:
    +679            obj = SomeClass(**kw)
    +680        return obj
     
    @@ -2845,56 +2899,60 @@
    Returns
    -
    646    def get_mol_formulas(self, odd_even_tag, classe_tuple, settings):
    -647        """ Get the molecular formulas
    -648        
    -649        Parameters
    -650        ----------
    -651        odd_even_tag : str
    -652            A string indicating whether to retrieve even or odd hydrogen atoms.
    -653        classe_tuple : tuple
    -654            
    -655        settings : object
    -656            An object containing user-defined settings.
    -657        
    -658        Returns
    -659        -------
    -660        list
    -661            A list of molecular formula data dictionaries.
    -662              
    -663        """
    -664        class_str = classe_tuple[0]
    -665        class_dict = classe_tuple[1]
    -666        classe_id = classe_tuple[2]
    -667        
    -668        results = list()
    -669        
    -670        if 'HC' in class_dict:
    -671            del class_dict['HC']
    -672            
    -673        class_dbe = self.calc_dbe_class(class_dict)    
    -674        class_mass = self.calc_mz(class_dict)
    -675        
    -676        carbonHydrogen_mass = self.odd_ch_mass if odd_even_tag == 'odd' else self.even_ch_mass 
    -677        carbonHydrogen_dbe = self.odd_ch_dbe if odd_even_tag == 'odd' else self.even_ch_dbe 
    -678        carbonHydrogen_id = self.odd_ch_id if odd_even_tag == 'odd' else self.even_ch_id 
    -679        
    -680        for index, carbonHydrogen_obj in enumerate(carbonHydrogen_id):
    -681            
    -682            mass = carbonHydrogen_mass[index] + class_mass
    -683            dbe =  carbonHydrogen_dbe[index] + class_dbe
    -684    
    -685            if settings.min_mz <= mass <= settings.max_mz:
    -686                
    -687                if settings.min_dbe <= dbe <= settings.max_dbe:
    -688                    
    -689                    molecularFormula=  {"heteroAtoms_id":classe_id, 
    -690                            "carbonHydrogen_id":carbonHydrogen_id[index], 
    -691                            "mass":mass, "DBE":dbe}
    -692                    
    -693                    results.append(molecularFormula)
    -694        
    -695        return results
    +            
    682    def get_mol_formulas(self, odd_even_tag, classe_tuple, settings):
    +683        """Get the molecular formulas
    +684
    +685        Parameters
    +686        ----------
    +687        odd_even_tag : str
    +688            A string indicating whether to retrieve even or odd hydrogen atoms.
    +689        classe_tuple : tuple
    +690
    +691        settings : object
    +692            An object containing user-defined settings.
    +693
    +694        Returns
    +695        -------
    +696        list
    +697            A list of molecular formula data dictionaries.
    +698
    +699        """
    +700        class_str = classe_tuple[0]
    +701        class_dict = classe_tuple[1]
    +702        classe_id = classe_tuple[2]
    +703
    +704        results = list()
    +705
    +706        if "HC" in class_dict:
    +707            del class_dict["HC"]
    +708
    +709        class_dbe = self.calc_dbe_class(class_dict)
    +710        class_mass = self.calc_mz(class_dict)
    +711
    +712        carbonHydrogen_mass = (
    +713            self.odd_ch_mass if odd_even_tag == "odd" else self.even_ch_mass
    +714        )
    +715        carbonHydrogen_dbe = (
    +716            self.odd_ch_dbe if odd_even_tag == "odd" else self.even_ch_dbe
    +717        )
    +718        carbonHydrogen_id = self.odd_ch_id if odd_even_tag == "odd" else self.even_ch_id
    +719
    +720        for index, carbonHydrogen_obj in enumerate(carbonHydrogen_id):
    +721            mass = carbonHydrogen_mass[index] + class_mass
    +722            dbe = carbonHydrogen_dbe[index] + class_dbe
    +723
    +724            if settings.min_mz <= mass <= settings.max_mz:
    +725                if settings.min_dbe <= dbe <= settings.max_dbe:
    +726                    molecularFormula = {
    +727                        "heteroAtoms_id": classe_id,
    +728                        "carbonHydrogen_id": carbonHydrogen_id[index],
    +729                        "mass": mass,
    +730                        "DBE": dbe,
    +731                    }
    +732
    +733                    results.append(molecularFormula)
    +734
    +735        return results
     
    @@ -2930,117 +2988,98 @@
    Returns
    -
    698    def get_h_odd_or_even(self, class_dict):
    -699        """ Get the hydrogen odd or even
    -700        
    -701        Parameters
    -702        ----------
    -703        class_dict : dict
    -704            A dictionary of classes.
    -705        
    -706        Returns
    -707        -------
    -708        str
    -709            A string indicating whether to retrieve even or odd hydrogen atoms.
    -710        """
    -711        
    -712        HAS_NITROGEN = 'N' in class_dict.keys()
    -713
    -714        number_of_halogen = self.get_total_halogen_atoms(class_dict)
    -715        number_of_hetero = self.get_total_heteroatoms(class_dict)
    -716        
    -717        if number_of_halogen > 0:
    -718
    -719            HAS_HALOGEN = True
    -720
    -721        else:
    -722
    -723            HAS_HALOGEN = False
    -724
    -725        if HAS_HALOGEN:
    -726
    -727            remaining_halogen = number_of_halogen % 2
    -728
    -729        else:
    -730
    -731            remaining_halogen = 0
    -732
    -733
    -734        if number_of_hetero > 0:
    -735
    -736            HAS_OTHER_HETERO = True
    -737
    -738            total_hetero_valence = self.get_total_hetero_valence(class_dict)
    -739        
    -740        else: 
    -741
    -742            HAS_OTHER_HETERO = False
    -743
    -744            total_hetero_valence = 0
    -745
    -746        if HAS_OTHER_HETERO:
    -747
    -748            remaining_hetero_valence = total_hetero_valence % 2
    -749
    -750        else:
    -751
    -752            remaining_hetero_valence = 0
    -753
    -754            
    -755        if HAS_NITROGEN and not HAS_OTHER_HETERO:
    -756
    -757            number_of_n = class_dict.get('N')
    -758            remaining_n = number_of_n % 2
    -759
    -760        elif HAS_NITROGEN and HAS_OTHER_HETERO:
    +            
    737    def get_h_odd_or_even(self, class_dict):
    +738        """Get the hydrogen odd or even
    +739
    +740        Parameters
    +741        ----------
    +742        class_dict : dict
    +743            A dictionary of classes.
    +744
    +745        Returns
    +746        -------
    +747        str
    +748            A string indicating whether to retrieve even or odd hydrogen atoms.
    +749        """
    +750
    +751        HAS_NITROGEN = "N" in class_dict.keys()
    +752
    +753        number_of_halogen = self.get_total_halogen_atoms(class_dict)
    +754        number_of_hetero = self.get_total_heteroatoms(class_dict)
    +755
    +756        if number_of_halogen > 0:
    +757            HAS_HALOGEN = True
    +758
    +759        else:
    +760            HAS_HALOGEN = False
     761
    -762            number_of_n = class_dict.get('N') 
    -763            remaining_n = (number_of_n + remaining_hetero_valence )% 2 
    +762        if HAS_HALOGEN:
    +763            remaining_halogen = number_of_halogen % 2
     764
    -765        elif HAS_OTHER_HETERO and not HAS_NITROGEN:
    -766
    -767            remaining_n = remaining_hetero_valence
    -768
    -769        else:
    +765        else:
    +766            remaining_halogen = 0
    +767
    +768        if number_of_hetero > 0:
    +769            HAS_OTHER_HETERO = True
     770
    -771            remaining_n = -1
    +771            total_hetero_valence = self.get_total_hetero_valence(class_dict)
     772
    -773        if remaining_n > 0.0:
    -774
    -775            if HAS_NITROGEN or HAS_OTHER_HETERO:
    -776
    -777                if HAS_HALOGEN:
    -778                    if remaining_halogen == 0:
    -779                        return 'odd'
    -780                    else:
    -781                        return 'even'
    -782                
    -783                else:
    -784                    return 'odd'
    -785
    -786        elif remaining_n == 0.0:
    +773        else:
    +774            HAS_OTHER_HETERO = False
    +775
    +776            total_hetero_valence = 0
    +777
    +778        if HAS_OTHER_HETERO:
    +779            remaining_hetero_valence = total_hetero_valence % 2
    +780
    +781        else:
    +782            remaining_hetero_valence = 0
    +783
    +784        if HAS_NITROGEN and not HAS_OTHER_HETERO:
    +785            number_of_n = class_dict.get("N")
    +786            remaining_n = number_of_n % 2
     787
    -788            if HAS_NITROGEN or HAS_OTHER_HETERO:
    -789
    -790                if HAS_HALOGEN:
    -791                    if remaining_halogen == 0:
    -792                        return 'even'
    -793                    else:
    -794                        return 'odd'
    -795                
    -796                else:
    -797                    return 'even'
    -798                
    -799        else:
    -800
    -801            if HAS_HALOGEN:
    -802                if remaining_halogen == 0:
    -803                    return 'even'
    -804                else:
    -805                    return 'odd'
    -806            
    -807            else:
    -808                return 'even'
    +788        elif HAS_NITROGEN and HAS_OTHER_HETERO:
    +789            number_of_n = class_dict.get("N")
    +790            remaining_n = (number_of_n + remaining_hetero_valence) % 2
    +791
    +792        elif HAS_OTHER_HETERO and not HAS_NITROGEN:
    +793            remaining_n = remaining_hetero_valence
    +794
    +795        else:
    +796            remaining_n = -1
    +797
    +798        if remaining_n > 0.0:
    +799            if HAS_NITROGEN or HAS_OTHER_HETERO:
    +800                if HAS_HALOGEN:
    +801                    if remaining_halogen == 0:
    +802                        return "odd"
    +803                    else:
    +804                        return "even"
    +805
    +806                else:
    +807                    return "odd"
    +808
    +809        elif remaining_n == 0.0:
    +810            if HAS_NITROGEN or HAS_OTHER_HETERO:
    +811                if HAS_HALOGEN:
    +812                    if remaining_halogen == 0:
    +813                        return "even"
    +814                    else:
    +815                        return "odd"
    +816
    +817                else:
    +818                    return "even"
    +819
    +820        else:
    +821            if HAS_HALOGEN:
    +822                if remaining_halogen == 0:
    +823                    return "even"
    +824                else:
    +825                    return "odd"
    +826
    +827            else:
    +828                return "even"
     
    @@ -3074,29 +3113,28 @@
    Returns
    -
    812    @staticmethod
    -813    def get_total_heteroatoms(class_dict):
    -814        """ Get the total number of heteroatoms other than N, F, Cl, Br
    -815        
    -816        Parameters
    -817        ----------
    -818        class_dict : dict
    -819            A dictionary of classes.
    -820        
    -821        Returns
    -822        -------
    -823        int
    -824            The total number of heteroatoms.
    -825        """
    -826
    -827        total_number = 0
    -828        
    -829        for atom in class_dict.keys():
    -830
    -831            if atom not in ['HC','C','H','O','N', 'F', 'Cl', 'Br']:
    -832                total_number = total_number + class_dict.get(atom)
    -833        
    -834        return total_number                
    +            
    830    @staticmethod
    +831    def get_total_heteroatoms(class_dict):
    +832        """Get the total number of heteroatoms other than N, F, Cl, Br
    +833
    +834        Parameters
    +835        ----------
    +836        class_dict : dict
    +837            A dictionary of classes.
    +838
    +839        Returns
    +840        -------
    +841        int
    +842            The total number of heteroatoms.
    +843        """
    +844
    +845        total_number = 0
    +846
    +847        for atom in class_dict.keys():
    +848            if atom not in ["HC", "C", "H", "O", "N", "F", "Cl", "Br"]:
    +849                total_number = total_number + class_dict.get(atom)
    +850
    +851        return total_number
     
    @@ -3130,40 +3168,40 @@
    Returns
    -
    836    @staticmethod
    -837    def get_total_hetero_valence(class_dict):
    -838        """ Get the total valence of heteroatoms other than N, F, Cl, Br
    -839        
    -840        Parameters
    -841        ----------
    -842        class_dict : dict
    -843            A dictionary of classes.
    -844        
    -845        Returns
    -846        -------
    -847        int
    -848            The total heteroatom valence.
    -849        """
    -850        total_valence = 0
    -851        
    -852        for atom in class_dict.keys():
    -853
    -854            if atom not in ['HC','C','H','O','N', 'F', 'Cl', 'Br']:
    -855
    -856                clean_atom = ''.join([i for i in atom if not i.isdigit()]) 
    -857
    -858                atom_valence = MSParameters.molecular_search.used_atom_valences.get(clean_atom)
    -859
    -860                if type(atom_valence) is tuple:
    -861                    atom_valence = atom_valence[0]
    -862
    -863                n_atom =int(class_dict.get(atom))
    -864
    -865                n_atom_valence = atom_valence * n_atom
    -866                
    -867                total_valence = total_valence + n_atom_valence
    +            
    853    @staticmethod
    +854    def get_total_hetero_valence(class_dict):
    +855        """Get the total valence of heteroatoms other than N, F, Cl, Br
    +856
    +857        Parameters
    +858        ----------
    +859        class_dict : dict
    +860            A dictionary of classes.
    +861
    +862        Returns
    +863        -------
    +864        int
    +865            The total heteroatom valence.
    +866        """
    +867        total_valence = 0
     868
    -869        return total_valence  
    +869        for atom in class_dict.keys():
    +870            if atom not in ["HC", "C", "H", "O", "N", "F", "Cl", "Br"]:
    +871                clean_atom = "".join([i for i in atom if not i.isdigit()])
    +872
    +873                atom_valence = MSParameters.molecular_search.used_atom_valences.get(
    +874                    clean_atom
    +875                )
    +876
    +877                if type(atom_valence) is tuple:
    +878                    atom_valence = atom_valence[0]
    +879
    +880                n_atom = int(class_dict.get(atom))
    +881
    +882                n_atom_valence = atom_valence * n_atom
    +883
    +884                total_valence = total_valence + n_atom_valence
    +885
    +886        return total_valence
     
    @@ -3197,31 +3235,29 @@
    Returns
    -
    871    @staticmethod
    -872    def get_total_halogen_atoms(class_dict):
    -873        """ Get the total number of halogen atoms
    -874        
    -875        Parameters
    -876        ----------
    -877        class_dict : dict
    -878            A dictionary of classes.
    -879        
    -880        Returns
    -881        -------
    -882        int
    -883            The total number of halogen atoms.
    -884        """
    -885        atoms = ['F', 'Cl', 'Br']
    -886
    -887        total_number = 0
    -888        
    -889        for atom in atoms:
    -890
    -891            if atom in class_dict.keys():
    -892
    -893                total_number = total_number + class_dict.get(atom)
    -894        
    -895        return total_number    
    +            
    888    @staticmethod
    +889    def get_total_halogen_atoms(class_dict):
    +890        """Get the total number of halogen atoms
    +891
    +892        Parameters
    +893        ----------
    +894        class_dict : dict
    +895            A dictionary of classes.
    +896
    +897        Returns
    +898        -------
    +899        int
    +900            The total number of halogen atoms.
    +901        """
    +902        atoms = ["F", "Cl", "Br"]
    +903
    +904        total_number = 0
    +905
    +906        for atom in atoms:
    +907            if atom in class_dict.keys():
    +908                total_number = total_number + class_dict.get(atom)
    +909
    +910        return total_number
     
    diff --git a/docs/corems/molecular_id/factory/classification.html b/docs/corems/molecular_id/factory/classification.html index 6feaaaf5..a64123a4 100644 --- a/docs/corems/molecular_id/factory/classification.html +++ b/docs/corems/molecular_id/factory/classification.html @@ -176,26 +176,26 @@

    6from matplotlib import pyplot as plt 7from numpy import linspace 8 - 9from corems.encapsulation.constant import Labels - 10from corems.encapsulation.constant import Atoms - 11 - 12flatten_list = lambda l: [item for sublist in l for item in sublist] + 9from corems.encapsulation.constant import Atoms, Labels + 10 + 11flatten_list = lambda l: [item for sublist in l for item in sublist] + 12 13 14class HeteroatomsClassification(Mapping): - 15 """ Class for grouping mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..) - 16 + 15 """Class for grouping mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..) + 16 17 Parameters 18 ---------- 19 mass_spectrum : MassSpectrum 20 The mass spectrum object. 21 choose_molecular_formula : bool, optional 22 If True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered. Default is True. - 23 + 23 24 Raises 25 ------ 26 Exception 27 If no molecular formula is associated with any mspeak objects. - 28 + 28 29 Attributes 30 ---------- 31 _ms_grouped_class : dict @@ -216,652 +216,761 @@

    46 The maximum ppm error. 47 all_identified_atoms : list 48 A list of all identified atoms. - 49 + 49 50 Methods 51 ------- 52 * __len__(). - 53 Returns the number of classes. - 54 * __getitem__(classe) - 55 Returns the molecular formula list for specified class. + 53 Returns the number of classes. + 54 * __getitem__(classe) + 55 Returns the molecular formula list for specified class. 56 * __iter__(). - 57 Returns an iterator over the keys of the dictionary. + 57 Returns an iterator over the keys of the dictionary. 58 * get_classes(threshold_perc=1, isotopologue=True). - 59 Returns a list of classes with abundance percentile above threshold. + 59 Returns a list of classes with abundance percentile above threshold. 60 * molecular_formula_string(classe). - 61 Returns a list of molecular formula string for specified class. + 61 Returns a list of molecular formula string for specified class. 62 * molecular_formula(classe). - 63 Returns a list of molecular formula for specified class. + 63 Returns a list of molecular formula for specified class. 64 * carbon_number(classe). - 65 Returns a list of carbon number for specified class. + 65 Returns a list of carbon number for specified class. 66 * atom_count(atom, classe). - 67 Returns a list of atom count for specified class. + 67 Returns a list of atom count for specified class. 68 * dbe(classe). - 69 Returns a list of DBE for specified class. + 69 Returns a list of DBE for specified class. 70 * atoms_ratio(classe, numerator, denominator). - 71 Returns a list of atoms ratio for specified class. + 71 Returns a list of atoms ratio for specified class. 72 * mz_exp(classe). - 73 Returns a list of experimental mz for specified class. + 73 Returns a list of experimental mz for specified class. 74 * abundance(classe). - 75 Returns a list of abundance for specified class. + 75 Returns a list of abundance for specified class. 76 * mz_error(classe). - 77 Returns a list of mz error for specified class. + 77 Returns a list of mz error for specified class. 78 * mz_calc(classe). - 79 Returns a list of calculated mz for specified class. + 79 Returns a list of calculated mz for specified class. 80 * peaks_count_percentile(classe). - 81 Returns the peaks count percentile of a specific class. + 81 Returns the peaks count percentile of a specific class. 82 * abundance_count_percentile(classe). - 83 Returns the abundance percentile of a specific class. + 83 Returns the abundance percentile of a specific class. 84 * mz_exp_assigned(). - 85 Returns a list of experimental mz for all assigned classes. + 85 Returns a list of experimental mz for all assigned classes. 86 * abundance_assigned(). - 87 Returns a list of abundance for all classes. + 87 Returns a list of abundance for all classes. 88 * mz_exp_all(). - 89 Returns a list of mz for all classes. - 90 + 89 Returns a list of mz for all classes. + 90 91 """ - 92 #Group mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..) - 93 - 94 # class obj behaves as a dictionary of classes and return a list of ms_peak obj - 95 - 96 + 92 + 93 # Group mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..) + 94 + 95 # class obj behaves as a dictionary of classes and return a list of ms_peak obj + 96 97 def __init__(self, mass_spectrum, choose_molecular_formula=True): - 98 - 99 def sort_atoms_method( atom): -100 """ Sort atoms by order of appearance in the Atoms class""" -101 return [Atoms.atoms_order.index(atom)] -102 -103 self._ms_grouped_class = dict() -104 -105 self.choose_mf = choose_molecular_formula -106 -107 #mapping for ms peaks without any molecular formula associated -108 self._ms_grouped_class[Labels.unassigned] = list() -109 -110 self.total_peaks = 0 -111 -112 self.sum_abundance = 0 -113 -114 self.min_max_mz = (mass_spectrum.min_mz_exp, mass_spectrum.max_mz_exp) -115 -116 self.min_max_abundance = (mass_spectrum.min_abundance, mass_spectrum.max_abundance) -117 -118 self.min_ppm_error = mass_spectrum.molecular_search_settings.min_ppm_error + 98 def sort_atoms_method(atom): + 99 """Sort atoms by order of appearance in the Atoms class""" +100 return [Atoms.atoms_order.index(atom)] +101 +102 self._ms_grouped_class = dict() +103 +104 self.choose_mf = choose_molecular_formula +105 +106 # mapping for ms peaks without any molecular formula associated +107 self._ms_grouped_class[Labels.unassigned] = list() +108 +109 self.total_peaks = 0 +110 +111 self.sum_abundance = 0 +112 +113 self.min_max_mz = (mass_spectrum.min_mz_exp, mass_spectrum.max_mz_exp) +114 +115 self.min_max_abundance = ( +116 mass_spectrum.min_abundance, +117 mass_spectrum.max_abundance, +118 ) 119 -120 self.max_ppm_error = mass_spectrum.molecular_search_settings.max_ppm_error +120 self.min_ppm_error = mass_spectrum.molecular_search_settings.min_ppm_error 121 -122 check_assign = False +122 self.max_ppm_error = mass_spectrum.molecular_search_settings.max_ppm_error 123 -124 all_used_atoms = set() +124 check_assign = False 125 -126 for ms_peak in mass_spectrum: -127 -128 self.total_peaks += 1 -129 -130 self.sum_abundance += ms_peak.abundance -131 -132 if not ms_peak.is_assigned: -133 +126 all_used_atoms = set() +127 +128 for ms_peak in mass_spectrum: +129 self.total_peaks += 1 +130 +131 self.sum_abundance += ms_peak.abundance +132 +133 if not ms_peak.is_assigned: 134 self._ms_grouped_class.get(Labels.unassigned).append(ms_peak) -135 -136 else: -137 -138 check_assign = True -139 -140 if choose_molecular_formula: -141 -142 mf = ms_peak.best_molecular_formula_candidate -143 -144 classes = [mf.class_label] -145 -146 for atom in mf.atoms: -147 -148 all_used_atoms.add(atom) +135 +136 else: +137 check_assign = True +138 +139 if choose_molecular_formula: +140 mf = ms_peak.best_molecular_formula_candidate +141 +142 classes = [mf.class_label] +143 +144 for atom in mf.atoms: +145 all_used_atoms.add(atom) +146 +147 else: +148 classes = [] 149 -150 else: -151 -152 classes = [] -153 -154 for mf in ms_peak: -155 -156 classes.append(mf.class_label) -157 -158 for atom in mf.atoms: -159 -160 all_used_atoms.add(atom) -161 -162 for classe in classes: -163 -164 if classe in self._ms_grouped_class.keys(): -165 -166 self._ms_grouped_class.get(classe).append(ms_peak) -167 -168 else: -169 -170 self._ms_grouped_class[classe] = [ms_peak] +150 for mf in ms_peak: +151 classes.append(mf.class_label) +152 +153 for atom in mf.atoms: +154 all_used_atoms.add(atom) +155 +156 for classe in classes: +157 if classe in self._ms_grouped_class.keys(): +158 self._ms_grouped_class.get(classe).append(ms_peak) +159 +160 else: +161 self._ms_grouped_class[classe] = [ms_peak] +162 +163 self.all_identified_atoms = sorted(all_used_atoms, key=sort_atoms_method) +164 +165 if not check_assign: +166 raise Exception("No molecular formula associated with any mspeak objects") +167 +168 def __len__(self): +169 """Return the number of classes""" +170 return len(self._ms_grouped_class) 171 -172 self.all_identified_atoms = sorted(all_used_atoms, key=sort_atoms_method) -173 -174 if not check_assign: +172 def __getitem__(self, classe): +173 """Return the molecular formula list for specified class""" +174 return self._ms_grouped_class.get(classe) 175 -176 raise Exception("No molecular formula associated with any mspeak objects") -177 -178 -179 def __len__(self): -180 """ Return the number of classes""" -181 return len(self._ms_grouped_class) -182 -183 def __getitem__(self, classe): -184 """ Return the molecular formula list for specified class""" -185 return self._ms_grouped_class.get(classe) -186 -187 def __iter__(self): -188 """ Return an iterator over the keys of the dictionary.""" -189 return iter(self._ms_grouped_class) +176 def __iter__(self): +177 """Return an iterator over the keys of the dictionary.""" +178 return iter(self._ms_grouped_class) +179 +180 def get_classes(self, threshold_perc=1, isotopologue=True): +181 """Return a list of classes with abundance percentile above threshold""" +182 classes = list() +183 for classe in self.keys(): +184 if classe != Labels.unassigned: +185 if self.abundance_count_percentile(classe) > threshold_perc: +186 if classe != Labels.unassigned: +187 # access first molecular formula inside the first ms peak and check isotopologue +188 if not isotopologue and self.get(classe)[0][0].is_isotopologue: +189 continue 190 -191 def get_classes(self, threshold_perc=1, isotopologue=True): -192 """ Return a list of classes with abundance percentile above threshold""" -193 classes = list() -194 for classe in self.keys(): -195 if classe != Labels.unassigned: -196 if self.abundance_count_percentile(classe) > threshold_perc: -197 -198 if classe != Labels.unassigned: -199 # access first molecular formula inside the first ms peak and check isotopologue -200 if not isotopologue and self.get(classe)[0][0].is_isotopologue: continue -201 -202 classes.append(classe) -203 #TODO sort classes chemically here too -204 return classes -205 -206 def molecular_formula_string(self, classe,): -207 """ Return a list of molecular formula string for specified class""" -208 if self.choose_mf: -209 return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]] -210 else: -211 return [mf for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -212 -213 def molecular_formula(self, classe,): -214 """ Return a list of molecular formula for specified class""" +191 classes.append(classe) +192 # TODO sort classes chemically here too +193 return classes +194 +195 def molecular_formula_string( +196 self, +197 classe, +198 ): +199 """Return a list of molecular formula string for specified class""" +200 if self.choose_mf: +201 return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]] +202 else: +203 return [ +204 mf +205 for mspeak in self[classe] +206 for mf in mspeak +207 if mf.class_label == classe +208 ] +209 +210 def molecular_formula( +211 self, +212 classe, +213 ): +214 """Return a list of molecular formula for specified class""" 215 if self.choose_mf: 216 return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]] 217 else: -218 return [mf for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -219 -220 def carbon_number(self, classe): -221 """ Return a list of carbon number for specified class""" -222 if self.choose_mf: -223 return [mspeak.best_molecular_formula_candidate.get("C") for mspeak in self[classe]] -224 else: -225 return [mf.get('C') for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -226 -227 def atom_count(self, atom, classe): -228 """ Return a list of atom count for specified class""" -229 -230 if self.choose_mf: -231 return [mspeak.best_molecular_formula_candidate.get(atom) for mspeak in self[classe]] -232 else: -233 return [mf.get(atom) for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -234 -235 def dbe(self, classe): -236 """ Return a list of DBE for specified class""" -237 if self.choose_mf: -238 return [mspeak.best_molecular_formula_candidate.dbe for mspeak in self[classe]] -239 else: -240 return [mf.dbe for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -241 -242 def atoms_ratio(self, classe, numerator, denominator): -243 """ Return a list of atoms ratio for specified class""" -244 return [mf.get(numerator)/mf.get(denominator) for mf in self.molecular_formula(classe)] -245 -246 def mz_exp(self, classe): -247 """ Return a list of experimental mz for specified class""" -248 if self.choose_mf or classe == Labels.unassigned: -249 -250 return [mspeak.mz_exp for mspeak in self[classe]] -251 -252 else: -253 -254 return [mspeak.mz_exp for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -255 -256 def abundance(self, classe): -257 """ Return a list of abundance for specified class""" -258 if self.choose_mf or classe == Labels.unassigned: -259 -260 return [mspeak.abundance for mspeak in self[classe]] -261 +218 return [ +219 mf +220 for mspeak in self[classe] +221 for mf in mspeak +222 if mf.class_label == classe +223 ] +224 +225 def carbon_number(self, classe): +226 """Return a list of carbon number for specified class""" +227 if self.choose_mf: +228 return [ +229 mspeak.best_molecular_formula_candidate.get("C") +230 for mspeak in self[classe] +231 ] +232 else: +233 return [ +234 mf.get("C") +235 for mspeak in self[classe] +236 for mf in mspeak +237 if mf.class_label == classe +238 ] +239 +240 def atom_count(self, atom, classe): +241 """Return a list of atom count for specified class""" +242 +243 if self.choose_mf: +244 return [ +245 mspeak.best_molecular_formula_candidate.get(atom) +246 for mspeak in self[classe] +247 ] +248 else: +249 return [ +250 mf.get(atom) +251 for mspeak in self[classe] +252 for mf in mspeak +253 if mf.class_label == classe +254 ] +255 +256 def dbe(self, classe): +257 """Return a list of DBE for specified class""" +258 if self.choose_mf: +259 return [ +260 mspeak.best_molecular_formula_candidate.dbe for mspeak in self[classe] +261 ] 262 else: -263 -264 return [mspeak.abundance for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -265 -266 def mz_error(self, classe): -267 """ Return a list of mz error for specified class""" -268 if classe != Labels.unassigned: -269 -270 if self.choose_mf: -271 -272 return [mspeak.best_molecular_formula_candidate.mz_error for mspeak in self[classe]] -273 -274 else: -275 -276 return [mf.mz_error for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -277 -278 def mz_calc(self, classe): -279 """ Return a list of calculated mz for specified class""" -280 if self.choose_mf: -281 -282 return [mspeak.best_molecular_formula_candidate.mz_calc for mspeak in self[classe]] -283 -284 else: -285 -286 return [mf.mz_calc for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -287 -288 def peaks_count_percentile(self, classe): -289 """ Return the peaks count percentile of a specific class""" -290 return (len(self[classe])/self.total_peaks)*100 -291 -292 def abundance_count_percentile(self, classe): -293 """ Return the abundance percentile of a specific class""" -294 return (sum([mspeak.abundance for mspeak in self[classe]]) / self.sum_abundance)*100 -295 -296 def mz_exp_assigned(self): -297 """ Return a list of experimental mz for all assigned classes""" -298 classes = self.keys() -299 -300 return [mspeak.mz_exp for classe in classes for mspeak in self[classe] if classe != Labels.unassigned] -301 -302 def abundance_assigned(self): -303 """ Return a list of abundance for all classes """ -304 classes = self.keys() -305 -306 return [mspeak.abundance for classe in classes for mspeak in self[classe] if classe != Labels.unassigned] -307 -308 def mz_exp_all(self): -309 """ Return a list of mz for all classes""" -310 classes = self.keys() -311 -312 return flatten_list([self.mz_exp(classe) for classe in classes if classe != Labels.unassigned]) -313 -314 def mz_error_all(self): -315 """ Return a list of mz error for all classes""" -316 classes = self.keys() -317 -318 return flatten_list([self.mz_error(classe) for classe in classes if classe != Labels.unassigned]) -319 -320 def carbon_number_all(self): -321 """ Return a list of carbon number for all classes""" -322 classes = self.keys() -323 -324 return flatten_list([self.carbon_number(classe) for classe in classes if classe != Labels.unassigned]) -325 -326 def dbe_all(self): -327 """ Return a list of DBE for all classes""" -328 classes = self.keys() -329 -330 return flatten_list([self.dbe(classe) for classe in classes if classe != Labels.unassigned]) -331 -332 def atoms_ratio_all(self, numerator, denominator): -333 """ Return a list of atoms ratio for all classes""" -334 classes = self.keys() -335 -336 return flatten_list([self.atoms_ratio(classe, numerator, denominator) for classe in classes if classe != Labels.unassigned]) -337 -338 def to_dataframe(self, include_isotopologue=False, abundance_perc_threshold=5, include_unassigned=False): -339 """ Return a pandas dataframe with all the data from the class -340 -341 Parameters -342 ---------- -343 include_isotopologue : bool, optional -344 Include isotopologues, by default False -345 abundance_perc_threshold : int, optional -346 Abundance percentile threshold, by default 5 -347 include_unassigned : bool, optional -348 Include unassigned peaks, by default False -349 -350 Returns -351 ------- -352 DataFrame -353 Pandas dataframe with all the data from the class -354 """ -355 from pandas import DataFrame -356 -357 columns_labels = ['mz', 'calibrated_mz', 'calculated_m_z', 'abundance', -358 'resolving_power', 'sn', 'ion_charge', 'mass_error', -359 'DBE', 'class', 'HC', 'OC', 'ion_type','is_isotopologue', -360 'class_abundance', 'class_count'] -361 -362 dict_data_list = [] -363 -364 for classe, list_mspeaks in self.items(): -365 -366 percent_abundance = self.abundance_count_percentile(classe) -367 -368 #ignores low abundant classes -369 if abundance_perc_threshold < abundance_perc_threshold: continue -370 -371 peaks_count_percentile = self.peaks_count_percentile(classe) -372 -373 for ms_peak in list_mspeaks: -374 -375 if ms_peak.is_assigned: -376 -377 for m_formula in ms_peak: -378 -379 #ignores isotopologues -380 if not include_isotopologue and m_formula.is_isotopologue: continue -381 -382 formula_dict = m_formula.to_dict() +263 return [ +264 mf.dbe +265 for mspeak in self[classe] +266 for mf in mspeak +267 if mf.class_label == classe +268 ] +269 +270 def atoms_ratio(self, classe, numerator, denominator): +271 """Return a list of atoms ratio for specified class""" +272 return [ +273 mf.get(numerator) / mf.get(denominator) +274 for mf in self.molecular_formula(classe) +275 ] +276 +277 def mz_exp(self, classe): +278 """Return a list of experimental mz for specified class""" +279 if self.choose_mf or classe == Labels.unassigned: +280 return [mspeak.mz_exp for mspeak in self[classe]] +281 +282 else: +283 return [ +284 mspeak.mz_exp +285 for mspeak in self[classe] +286 for mf in mspeak +287 if mf.class_label == classe +288 ] +289 +290 def abundance(self, classe): +291 """Return a list of abundance for specified class""" +292 if self.choose_mf or classe == Labels.unassigned: +293 return [mspeak.abundance for mspeak in self[classe]] +294 +295 else: +296 return [ +297 mspeak.abundance +298 for mspeak in self[classe] +299 for mf in mspeak +300 if mf.class_label == classe +301 ] +302 +303 def mz_error(self, classe): +304 """Return a list of mz error for specified class""" +305 if classe != Labels.unassigned: +306 if self.choose_mf: +307 return [ +308 mspeak.best_molecular_formula_candidate.mz_error +309 for mspeak in self[classe] +310 ] +311 +312 else: +313 return [ +314 mf.mz_error +315 for mspeak in self[classe] +316 for mf in mspeak +317 if mf.class_label == classe +318 ] +319 +320 def mz_calc(self, classe): +321 """Return a list of calculated mz for specified class""" +322 if self.choose_mf: +323 return [ +324 mspeak.best_molecular_formula_candidate.mz_calc +325 for mspeak in self[classe] +326 ] +327 +328 else: +329 return [ +330 mf.mz_calc +331 for mspeak in self[classe] +332 for mf in mspeak +333 if mf.class_label == classe +334 ] +335 +336 def peaks_count_percentile(self, classe): +337 """Return the peaks count percentile of a specific class""" +338 return (len(self[classe]) / self.total_peaks) * 100 +339 +340 def abundance_count_percentile(self, classe): +341 """Return the abundance percentile of a specific class""" +342 return ( +343 sum([mspeak.abundance for mspeak in self[classe]]) / self.sum_abundance +344 ) * 100 +345 +346 def mz_exp_assigned(self): +347 """Return a list of experimental mz for all assigned classes""" +348 classes = self.keys() +349 +350 return [ +351 mspeak.mz_exp +352 for classe in classes +353 for mspeak in self[classe] +354 if classe != Labels.unassigned +355 ] +356 +357 def abundance_assigned(self): +358 """Return a list of abundance for all classes""" +359 classes = self.keys() +360 +361 return [ +362 mspeak.abundance +363 for classe in classes +364 for mspeak in self[classe] +365 if classe != Labels.unassigned +366 ] +367 +368 def mz_exp_all(self): +369 """Return a list of mz for all classes""" +370 classes = self.keys() +371 +372 return flatten_list( +373 [self.mz_exp(classe) for classe in classes if classe != Labels.unassigned] +374 ) +375 +376 def mz_error_all(self): +377 """Return a list of mz error for all classes""" +378 classes = self.keys() +379 +380 return flatten_list( +381 [self.mz_error(classe) for classe in classes if classe != Labels.unassigned] +382 ) 383 -384 dict_result = {'mz': ms_peak._mz_exp, -385 'calibrated_mz': ms_peak.mz_exp, -386 'calculated_mz': m_formula.mz_calc, -387 'abundance': ms_peak.abundance, -388 'resolving_power': ms_peak.resolving_power, -389 'sn': ms_peak.signal_to_noise, -390 'ion_charge': ms_peak.ion_charge, -391 'mass_error': m_formula.mz_error, -392 'DBE': m_formula.dbe, -393 'class': classe, -394 'HC': m_formula.H_C, -395 'OC': m_formula.O_C, -396 'ion_type': str(m_formula.ion_type.lower().encode('utf-8')), -397 'is_isotopologue': int(m_formula.is_isotopologue), -398 'class_abundance': percent_abundance, -399 'class_count': peaks_count_percentile -400 } -401 -402 for atom in formula_dict.keys(): -403 -404 dict_result[atom] = formula_dict.get(atom) -405 -406 dict_data_list.append(dict_result) +384 def carbon_number_all(self): +385 """Return a list of carbon number for all classes""" +386 classes = self.keys() +387 +388 return flatten_list( +389 [ +390 self.carbon_number(classe) +391 for classe in classes +392 if classe != Labels.unassigned +393 ] +394 ) +395 +396 def dbe_all(self): +397 """Return a list of DBE for all classes""" +398 classes = self.keys() +399 +400 return flatten_list( +401 [self.dbe(classe) for classe in classes if classe != Labels.unassigned] +402 ) +403 +404 def atoms_ratio_all(self, numerator, denominator): +405 """Return a list of atoms ratio for all classes""" +406 classes = self.keys() 407 -408 else: -409 -410 if not include_unassigned: continue -411 -412 dict_result = {'mz': ms_peak._mz_exp, -413 'calibrated_mz': ms_peak.mz_exp, -414 'abundance': ms_peak.abundance, -415 'resolving_power': ms_peak.resolving_power, -416 'sn': ms_peak.signal_to_noise, -417 'ion_charge': ms_peak.ion_charge, -418 'class': classe, -419 'class_abundance': percent_abundance, -420 'class_count': percent_abundance -421 } -422 -423 dict_data_list.append(dict_result) -424 -425 columns = columns_labels + self.all_identified_atoms -426 -427 return DataFrame(dict_data_list, columns=columns) -428 -429 def plot_ms_assigned_unassigned(self, assigned_color= 'b', unassigned_color = 'r'): -430 """ Plot stick mass spectrum of all classes -431 -432 Parameters -433 ---------- -434 assigned_color : str, optional -435 Matplotlib color for the assigned peaks, by default "b" -436 unassigned_color : str, optional -437 Matplotlib color for the unassigned peaks, by default "r" -438 -439 Returns -440 ------- -441 ax : matplotlib.axes -442 Matplotlib axes object -443 """ -444 mz_assigned = self.mz_exp_assigned() -445 abundance_assigned = self.abundance_assigned() -446 -447 mz_not_assigned = self.mz_exp(Labels.unassigned) -448 abundance_not_assigned = self.abundance(Labels.unassigned) -449 -450 ax = plt.gca() -451 -452 for plot_obj in ax.stem(mz_assigned,abundance_assigned, linefmt='-', markerfmt=" ", label="Assigned"): -453 -454 plt.setp(plot_obj, 'color', assigned_color, 'linewidth', 2) -455 -456 for plot_obj in ax.stem(mz_not_assigned, abundance_not_assigned, linefmt='-', markerfmt=" ", label="Unassigned"): -457 -458 plt.setp(plot_obj, 'color', unassigned_color, 'linewidth', 2) -459 -460 ax.set_xlabel("$\t{m/z}$", fontsize=12) -461 ax.set_ylabel('Abundance', fontsize=12) -462 ax.tick_params(axis='both', which='major', labelsize=12) +408 return flatten_list( +409 [ +410 self.atoms_ratio(classe, numerator, denominator) +411 for classe in classes +412 if classe != Labels.unassigned +413 ] +414 ) +415 +416 def to_dataframe( +417 self, +418 include_isotopologue=False, +419 abundance_perc_threshold=5, +420 include_unassigned=False, +421 ): +422 """Return a pandas dataframe with all the data from the class +423 +424 Parameters +425 ---------- +426 include_isotopologue : bool, optional +427 Include isotopologues, by default False +428 abundance_perc_threshold : int, optional +429 Abundance percentile threshold, by default 5 +430 include_unassigned : bool, optional +431 Include unassigned peaks, by default False +432 +433 Returns +434 ------- +435 DataFrame +436 Pandas dataframe with all the data from the class +437 """ +438 from pandas import DataFrame +439 +440 columns_labels = [ +441 "mz", +442 "calibrated_mz", +443 "calculated_m_z", +444 "abundance", +445 "resolving_power", +446 "sn", +447 "ion_charge", +448 "mass_error", +449 "DBE", +450 "class", +451 "HC", +452 "OC", +453 "ion_type", +454 "is_isotopologue", +455 "class_abundance", +456 "class_count", +457 ] +458 +459 dict_data_list = [] +460 +461 for classe, list_mspeaks in self.items(): +462 percent_abundance = self.abundance_count_percentile(classe) 463 -464 ax.axes.spines['top'].set_visible(False) -465 ax.axes.spines['right'].set_visible(False) -466 -467 ax.get_yaxis().set_visible(False) -468 ax.spines['left'].set_visible(False) -469 plt.legend() -470 -471 return ax -472 -473 def plot_mz_error(self, color= 'g'): -474 """ Plot mz error scatter plot of all classes -475 -476 Parameters -477 ---------- -478 color : str, optional -479 Matplotlib color, by default "g" -480 -481 Returns -482 ------- -483 ax : matplotlib.axes -484 Matplotlib axes object -485 """ -486 ax = plt.gca() -487 -488 mz_assigned = self.mz_exp_all() -489 mz_error= self.mz_error_all() -490 -491 ax.scatter( mz_assigned, mz_error, c=color) -492 -493 ax.set_xlabel("$\t{m/z}$", fontsize=12) -494 ax.set_ylabel('Error (ppm)', fontsize=12) -495 ax.tick_params(axis='both', which='major', labelsize=12) -496 -497 ax.axes.spines['top'].set_visible(True) -498 ax.axes.spines['right'].set_visible(True) -499 -500 ax.get_yaxis().set_visible(True) -501 ax.spines['left'].set_visible(True) +464 # ignores low abundant classes +465 if abundance_perc_threshold < abundance_perc_threshold: +466 continue +467 +468 peaks_count_percentile = self.peaks_count_percentile(classe) +469 +470 for ms_peak in list_mspeaks: +471 if ms_peak.is_assigned: +472 for m_formula in ms_peak: +473 # ignores isotopologues +474 if not include_isotopologue and m_formula.is_isotopologue: +475 continue +476 +477 formula_dict = m_formula.to_dict() +478 +479 dict_result = { +480 "mz": ms_peak._mz_exp, +481 "calibrated_mz": ms_peak.mz_exp, +482 "calculated_mz": m_formula.mz_calc, +483 "abundance": ms_peak.abundance, +484 "resolving_power": ms_peak.resolving_power, +485 "sn": ms_peak.signal_to_noise, +486 "ion_charge": ms_peak.ion_charge, +487 "mass_error": m_formula.mz_error, +488 "DBE": m_formula.dbe, +489 "class": classe, +490 "HC": m_formula.H_C, +491 "OC": m_formula.O_C, +492 "ion_type": str(m_formula.ion_type.lower().encode("utf-8")), +493 "is_isotopologue": int(m_formula.is_isotopologue), +494 "class_abundance": percent_abundance, +495 "class_count": peaks_count_percentile, +496 } +497 +498 for atom in formula_dict.keys(): +499 dict_result[atom] = formula_dict.get(atom) +500 +501 dict_data_list.append(dict_result) 502 -503 ax.set_xlim(self.min_max_mz) -504 ax.set_ylim(self.min_ppm_error , self.max_ppm_error) -505 -506 return ax -507 -508 def plot_mz_error_class(self, classe, color= 'g'): -509 """ Plot mz error scatter plot of a specific class -510 -511 Parameters -512 ---------- -513 classe : str -514 Class name -515 color : str, optional -516 Matplotlib color, by default "g" -517 -518 Returns -519 ------- -520 ax : matplotlib.axes -521 Matplotlib axes object -522 -523 """ -524 if classe != Labels.unassigned: -525 ax = plt.gca() -526 -527 abun_perc = self.abundance_count_percentile(classe) -528 mz_assigned = self.mz_exp(classe) -529 mz_error= self.mz_error(classe) -530 -531 ax.scatter( mz_assigned, mz_error, c=color) -532 -533 title = "%s, %.2f %%" % (classe, abun_perc) -534 ax.set_title(title) -535 ax.set_xlabel("$\t{m/z}$", fontsize=12) -536 ax.set_ylabel('Error (ppm)', fontsize=12) -537 ax.tick_params(axis='both', which='major', labelsize=12) -538 -539 ax.axes.spines['top'].set_visible(True) -540 ax.axes.spines['right'].set_visible(True) -541 -542 ax.get_yaxis().set_visible(True) -543 ax.spines['left'].set_visible(True) -544 -545 ax.set_xlim(self.min_max_mz) -546 ax.set_ylim(self.min_ppm_error , self.max_ppm_error) -547 -548 return ax -549 -550 def plot_ms_class(self, classe, color= 'g'): -551 """ Plot stick mass spectrum of a specific class -552 -553 Parameters -554 ---------- -555 classe : str -556 Class name -557 color : str, optional -558 Matplotlib color, by default "g" -559 -560 Returns -561 ------- -562 ax : matplotlib.axes -563 Matplotlib axes object -564 -565 """ -566 if classe != Labels.unassigned: -567 ax = plt.gca() -568 -569 abun_perc = self.abundance_count_percentile(classe) -570 mz_assigned = self.mz_exp(classe) -571 abundance_assigned= self.abundance(classe) +503 else: +504 if not include_unassigned: +505 continue +506 +507 dict_result = { +508 "mz": ms_peak._mz_exp, +509 "calibrated_mz": ms_peak.mz_exp, +510 "abundance": ms_peak.abundance, +511 "resolving_power": ms_peak.resolving_power, +512 "sn": ms_peak.signal_to_noise, +513 "ion_charge": ms_peak.ion_charge, +514 "class": classe, +515 "class_abundance": percent_abundance, +516 "class_count": percent_abundance, +517 } +518 +519 dict_data_list.append(dict_result) +520 +521 columns = columns_labels + self.all_identified_atoms +522 +523 return DataFrame(dict_data_list, columns=columns) +524 +525 def plot_ms_assigned_unassigned(self, assigned_color="b", unassigned_color="r"): +526 """Plot stick mass spectrum of all classes +527 +528 Parameters +529 ---------- +530 assigned_color : str, optional +531 Matplotlib color for the assigned peaks, by default "b" +532 unassigned_color : str, optional +533 Matplotlib color for the unassigned peaks, by default "r" +534 +535 Returns +536 ------- +537 ax : matplotlib.axes +538 Matplotlib axes object +539 """ +540 mz_assigned = self.mz_exp_assigned() +541 abundance_assigned = self.abundance_assigned() +542 +543 mz_not_assigned = self.mz_exp(Labels.unassigned) +544 abundance_not_assigned = self.abundance(Labels.unassigned) +545 +546 ax = plt.gca() +547 +548 for plot_obj in ax.stem( +549 mz_assigned, +550 abundance_assigned, +551 linefmt="-", +552 markerfmt=" ", +553 label="Assigned", +554 ): +555 plt.setp(plot_obj, "color", assigned_color, "linewidth", 2) +556 +557 for plot_obj in ax.stem( +558 mz_not_assigned, +559 abundance_not_assigned, +560 linefmt="-", +561 markerfmt=" ", +562 label="Unassigned", +563 ): +564 plt.setp(plot_obj, "color", unassigned_color, "linewidth", 2) +565 +566 ax.set_xlabel("$\t{m/z}$", fontsize=12) +567 ax.set_ylabel("Abundance", fontsize=12) +568 ax.tick_params(axis="both", which="major", labelsize=12) +569 +570 ax.axes.spines["top"].set_visible(False) +571 ax.axes.spines["right"].set_visible(False) 572 -573 for plot_obj in ax.stem( mz_assigned, abundance_assigned, linefmt='-', markerfmt=" "): -574 -575 plt.setp(plot_obj, 'color', color, 'linewidth', 2) -576 -577 title = "%s, %.2f %%" % (classe, abun_perc) -578 ax.set_title(title) -579 ax.set_xlabel("$\t{m/z}$", fontsize=12) -580 ax.set_ylabel('Abundance', fontsize=12) -581 ax.tick_params(axis='both', which='major', labelsize=12) -582 -583 ax.axes.spines['top'].set_visible(False) -584 ax.axes.spines['right'].set_visible(False) -585 -586 ax.get_yaxis().set_visible(False) -587 ax.spines['left'].set_visible(False) -588 -589 ax.set_xlim(self.min_max_mz) -590 ax.set_ylim(self.min_max_abundance) -591 -592 return ax +573 ax.get_yaxis().set_visible(False) +574 ax.spines["left"].set_visible(False) +575 plt.legend() +576 +577 return ax +578 +579 def plot_mz_error(self, color="g"): +580 """Plot mz error scatter plot of all classes +581 +582 Parameters +583 ---------- +584 color : str, optional +585 Matplotlib color, by default "g" +586 +587 Returns +588 ------- +589 ax : matplotlib.axes +590 Matplotlib axes object +591 """ +592 ax = plt.gca() 593 -594 def plot_van_krevelen(self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis"): -595 """ Plot Van Krevelen Diagram -596 -597 Parameters -598 ---------- -599 classe : str -600 Class name -601 max_hc : float, optional -602 Max H/C ratio, by default 2.5 -603 max_oc : float, optional -604 Max O/C ratio, by default 2 -605 ticks_number : int, optional -606 Number of ticks, by default 5 -607 color : str, optional -608 Matplotlib color, by default "viridis" -609 -610 Returns -611 ------- -612 ax : matplotlib.axes -613 Matplotlib axes object -614 abun_perc : float -615 Class percentile of the relative abundance -616 """ -617 if classe != Labels.unassigned: -618 -619 # get data -620 abun_perc = self.abundance_count_percentile(classe) -621 hc = self.atoms_ratio(classe, "H", "C") -622 oc = self.atoms_ratio(classe, "O", "C") -623 abundance = self.abundance(classe) -624 -625 #plot data -626 ax = plt.gca() -627 -628 ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color) -629 -630 #ax.scatter(carbon_number, dbe, c=color, alpha=0.5) -631 -632 title = "%s, %.2f %%" % (classe, abun_perc) -633 ax.set_title(title) -634 ax.set_xlabel("O/C", fontsize=16) -635 ax.set_ylabel('H/C', fontsize=16) -636 ax.tick_params(axis='both', which='major', labelsize=18) -637 ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True)) -638 ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True)) -639 -640 # returns matplot axes obj and the class percentile of the relative abundance -641 -642 return ax, abun_perc -643 -644 def plot_dbe_vs_carbon_number(self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis"): -645 """ Plot DBE vs Carbon Number -646 -647 Parameters -648 ---------- -649 classe : str -650 Class name -651 max_c : int, optional -652 Max Carbon Number, by default 50 -653 max_dbe : int, optional -654 Max DBE, by default 40 -655 dbe_incr : int, optional -656 DBE increment, by default 5 -657 c_incr : int, optional -658 Carbon Number increment, by default 10 -659 color : str, optional -660 Matplotlib color, by default "viridis" -661 -662 Returns -663 ------- -664 ax : matplotlib.axes -665 Matplotlib axes object -666 abun_perc : float -667 Class percentile of the relative abundance -668 """ -669 if classe != Labels.unassigned: +594 mz_assigned = self.mz_exp_all() +595 mz_error = self.mz_error_all() +596 +597 ax.scatter(mz_assigned, mz_error, c=color) +598 +599 ax.set_xlabel("$\t{m/z}$", fontsize=12) +600 ax.set_ylabel("Error (ppm)", fontsize=12) +601 ax.tick_params(axis="both", which="major", labelsize=12) +602 +603 ax.axes.spines["top"].set_visible(True) +604 ax.axes.spines["right"].set_visible(True) +605 +606 ax.get_yaxis().set_visible(True) +607 ax.spines["left"].set_visible(True) +608 +609 ax.set_xlim(self.min_max_mz) +610 ax.set_ylim(self.min_ppm_error, self.max_ppm_error) +611 +612 return ax +613 +614 def plot_mz_error_class(self, classe, color="g"): +615 """Plot mz error scatter plot of a specific class +616 +617 Parameters +618 ---------- +619 classe : str +620 Class name +621 color : str, optional +622 Matplotlib color, by default "g" +623 +624 Returns +625 ------- +626 ax : matplotlib.axes +627 Matplotlib axes object +628 +629 """ +630 if classe != Labels.unassigned: +631 ax = plt.gca() +632 +633 abun_perc = self.abundance_count_percentile(classe) +634 mz_assigned = self.mz_exp(classe) +635 mz_error = self.mz_error(classe) +636 +637 ax.scatter(mz_assigned, mz_error, c=color) +638 +639 title = "%s, %.2f %%" % (classe, abun_perc) +640 ax.set_title(title) +641 ax.set_xlabel("$\t{m/z}$", fontsize=12) +642 ax.set_ylabel("Error (ppm)", fontsize=12) +643 ax.tick_params(axis="both", which="major", labelsize=12) +644 +645 ax.axes.spines["top"].set_visible(True) +646 ax.axes.spines["right"].set_visible(True) +647 +648 ax.get_yaxis().set_visible(True) +649 ax.spines["left"].set_visible(True) +650 +651 ax.set_xlim(self.min_max_mz) +652 ax.set_ylim(self.min_ppm_error, self.max_ppm_error) +653 +654 return ax +655 +656 def plot_ms_class(self, classe, color="g"): +657 """Plot stick mass spectrum of a specific class +658 +659 Parameters +660 ---------- +661 classe : str +662 Class name +663 color : str, optional +664 Matplotlib color, by default "g" +665 +666 Returns +667 ------- +668 ax : matplotlib.axes +669 Matplotlib axes object 670 -671 # get data -672 abun_perc = self.abundance_count_percentile(classe) -673 carbon_number = self.carbon_number(classe) -674 dbe = self.dbe(classe) -675 abundance = self.abundance(classe) -676 -677 #plot data -678 ax = plt.gca() -679 -680 ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color) -681 -682 #ax.scatter(carbon_number, dbe, c=color, alpha=0.5) -683 +671 """ +672 if classe != Labels.unassigned: +673 ax = plt.gca() +674 +675 abun_perc = self.abundance_count_percentile(classe) +676 mz_assigned = self.mz_exp(classe) +677 abundance_assigned = self.abundance(classe) +678 +679 for plot_obj in ax.stem( +680 mz_assigned, abundance_assigned, linefmt="-", markerfmt=" " +681 ): +682 plt.setp(plot_obj, "color", color, "linewidth", 2) +683 684 title = "%s, %.2f %%" % (classe, abun_perc) 685 ax.set_title(title) -686 ax.set_xlabel("Carbon number", fontsize=16) -687 ax.set_ylabel('DBE', fontsize=16) -688 ax.tick_params(axis='both', which='major', labelsize=18) -689 ax.set_xticks(range(0, max_c, c_incr)) -690 ax.set_yticks(range(0, max_dbe, dbe_incr)) -691 -692 # returns matplot axes obj and the class percentile of the relative abundance -693 -694 return ax, abun_perc +686 ax.set_xlabel("$\t{m/z}$", fontsize=12) +687 ax.set_ylabel("Abundance", fontsize=12) +688 ax.tick_params(axis="both", which="major", labelsize=12) +689 +690 ax.axes.spines["top"].set_visible(False) +691 ax.axes.spines["right"].set_visible(False) +692 +693 ax.get_yaxis().set_visible(False) +694 ax.spines["left"].set_visible(False) +695 +696 ax.set_xlim(self.min_max_mz) +697 ax.set_ylim(self.min_max_abundance) +698 +699 return ax +700 +701 def plot_van_krevelen( +702 self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis" +703 ): +704 """Plot Van Krevelen Diagram +705 +706 Parameters +707 ---------- +708 classe : str +709 Class name +710 max_hc : float, optional +711 Max H/C ratio, by default 2.5 +712 max_oc : float, optional +713 Max O/C ratio, by default 2 +714 ticks_number : int, optional +715 Number of ticks, by default 5 +716 color : str, optional +717 Matplotlib color, by default "viridis" +718 +719 Returns +720 ------- +721 ax : matplotlib.axes +722 Matplotlib axes object +723 abun_perc : float +724 Class percentile of the relative abundance +725 """ +726 if classe != Labels.unassigned: +727 # get data +728 abun_perc = self.abundance_count_percentile(classe) +729 hc = self.atoms_ratio(classe, "H", "C") +730 oc = self.atoms_ratio(classe, "O", "C") +731 abundance = self.abundance(classe) +732 +733 # plot data +734 ax = plt.gca() +735 +736 ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color) +737 +738 # ax.scatter(carbon_number, dbe, c=color, alpha=0.5) +739 +740 title = "%s, %.2f %%" % (classe, abun_perc) +741 ax.set_title(title) +742 ax.set_xlabel("O/C", fontsize=16) +743 ax.set_ylabel("H/C", fontsize=16) +744 ax.tick_params(axis="both", which="major", labelsize=18) +745 ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True)) +746 ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True)) +747 +748 # returns matplot axes obj and the class percentile of the relative abundance +749 +750 return ax, abun_perc +751 +752 def plot_dbe_vs_carbon_number( +753 self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis" +754 ): +755 """Plot DBE vs Carbon Number +756 +757 Parameters +758 ---------- +759 classe : str +760 Class name +761 max_c : int, optional +762 Max Carbon Number, by default 50 +763 max_dbe : int, optional +764 Max DBE, by default 40 +765 dbe_incr : int, optional +766 DBE increment, by default 5 +767 c_incr : int, optional +768 Carbon Number increment, by default 10 +769 color : str, optional +770 Matplotlib color, by default "viridis" +771 +772 Returns +773 ------- +774 ax : matplotlib.axes +775 Matplotlib axes object +776 abun_perc : float +777 Class percentile of the relative abundance +778 """ +779 if classe != Labels.unassigned: +780 # get data +781 abun_perc = self.abundance_count_percentile(classe) +782 carbon_number = self.carbon_number(classe) +783 dbe = self.dbe(classe) +784 abundance = self.abundance(classe) +785 +786 # plot data +787 ax = plt.gca() +788 +789 ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color) +790 +791 # ax.scatter(carbon_number, dbe, c=color, alpha=0.5) +792 +793 title = "%s, %.2f %%" % (classe, abun_perc) +794 ax.set_title(title) +795 ax.set_xlabel("Carbon number", fontsize=16) +796 ax.set_ylabel("DBE", fontsize=16) +797 ax.tick_params(axis="both", which="major", labelsize=18) +798 ax.set_xticks(range(0, max_c, c_incr)) +799 ax.set_yticks(range(0, max_dbe, dbe_incr)) +800 +801 # returns matplot axes obj and the class percentile of the relative abundance +802 +803 return ax, abun_perc

    @@ -877,7 +986,7 @@

    -
    13flatten_list = lambda l: [item for sublist in l for item in sublist]
    +            
    12flatten_list = lambda l: [item for sublist in l for item in sublist]
     
    @@ -896,20 +1005,20 @@

     15class HeteroatomsClassification(Mapping):
    - 16    """ Class for grouping mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..)
    - 17    
    + 16    """Class for grouping mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..)
    + 17
      18    Parameters
      19    ----------
      20    mass_spectrum : MassSpectrum
      21        The mass spectrum object.
      22    choose_molecular_formula : bool, optional
      23        If True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered. Default is True.
    - 24    
    + 24
      25    Raises
      26    ------
      27    Exception
      28        If no molecular formula is associated with any mspeak objects.
    - 29    
    + 29
      30    Attributes
      31    ----------
      32    _ms_grouped_class : dict
    @@ -930,652 +1039,761 @@ 

    47 The maximum ppm error. 48 all_identified_atoms : list 49 A list of all identified atoms. - 50 + 50 51 Methods 52 ------- 53 * __len__(). - 54 Returns the number of classes. - 55 * __getitem__(classe) - 56 Returns the molecular formula list for specified class. + 54 Returns the number of classes. + 55 * __getitem__(classe) + 56 Returns the molecular formula list for specified class. 57 * __iter__(). - 58 Returns an iterator over the keys of the dictionary. + 58 Returns an iterator over the keys of the dictionary. 59 * get_classes(threshold_perc=1, isotopologue=True). - 60 Returns a list of classes with abundance percentile above threshold. + 60 Returns a list of classes with abundance percentile above threshold. 61 * molecular_formula_string(classe). - 62 Returns a list of molecular formula string for specified class. + 62 Returns a list of molecular formula string for specified class. 63 * molecular_formula(classe). - 64 Returns a list of molecular formula for specified class. + 64 Returns a list of molecular formula for specified class. 65 * carbon_number(classe). - 66 Returns a list of carbon number for specified class. + 66 Returns a list of carbon number for specified class. 67 * atom_count(atom, classe). - 68 Returns a list of atom count for specified class. + 68 Returns a list of atom count for specified class. 69 * dbe(classe). - 70 Returns a list of DBE for specified class. + 70 Returns a list of DBE for specified class. 71 * atoms_ratio(classe, numerator, denominator). - 72 Returns a list of atoms ratio for specified class. + 72 Returns a list of atoms ratio for specified class. 73 * mz_exp(classe). - 74 Returns a list of experimental mz for specified class. + 74 Returns a list of experimental mz for specified class. 75 * abundance(classe). - 76 Returns a list of abundance for specified class. + 76 Returns a list of abundance for specified class. 77 * mz_error(classe). - 78 Returns a list of mz error for specified class. + 78 Returns a list of mz error for specified class. 79 * mz_calc(classe). - 80 Returns a list of calculated mz for specified class. + 80 Returns a list of calculated mz for specified class. 81 * peaks_count_percentile(classe). - 82 Returns the peaks count percentile of a specific class. + 82 Returns the peaks count percentile of a specific class. 83 * abundance_count_percentile(classe). - 84 Returns the abundance percentile of a specific class. + 84 Returns the abundance percentile of a specific class. 85 * mz_exp_assigned(). - 86 Returns a list of experimental mz for all assigned classes. + 86 Returns a list of experimental mz for all assigned classes. 87 * abundance_assigned(). - 88 Returns a list of abundance for all classes. + 88 Returns a list of abundance for all classes. 89 * mz_exp_all(). - 90 Returns a list of mz for all classes. - 91 + 90 Returns a list of mz for all classes. + 91 92 """ - 93 #Group mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..) - 94 - 95 # class obj behaves as a dictionary of classes and return a list of ms_peak obj - 96 - 97 + 93 + 94 # Group mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..) + 95 + 96 # class obj behaves as a dictionary of classes and return a list of ms_peak obj + 97 98 def __init__(self, mass_spectrum, choose_molecular_formula=True): - 99 -100 def sort_atoms_method( atom): -101 """ Sort atoms by order of appearance in the Atoms class""" -102 return [Atoms.atoms_order.index(atom)] -103 -104 self._ms_grouped_class = dict() -105 -106 self.choose_mf = choose_molecular_formula -107 -108 #mapping for ms peaks without any molecular formula associated -109 self._ms_grouped_class[Labels.unassigned] = list() -110 -111 self.total_peaks = 0 -112 -113 self.sum_abundance = 0 -114 -115 self.min_max_mz = (mass_spectrum.min_mz_exp, mass_spectrum.max_mz_exp) -116 -117 self.min_max_abundance = (mass_spectrum.min_abundance, mass_spectrum.max_abundance) -118 -119 self.min_ppm_error = mass_spectrum.molecular_search_settings.min_ppm_error + 99 def sort_atoms_method(atom): +100 """Sort atoms by order of appearance in the Atoms class""" +101 return [Atoms.atoms_order.index(atom)] +102 +103 self._ms_grouped_class = dict() +104 +105 self.choose_mf = choose_molecular_formula +106 +107 # mapping for ms peaks without any molecular formula associated +108 self._ms_grouped_class[Labels.unassigned] = list() +109 +110 self.total_peaks = 0 +111 +112 self.sum_abundance = 0 +113 +114 self.min_max_mz = (mass_spectrum.min_mz_exp, mass_spectrum.max_mz_exp) +115 +116 self.min_max_abundance = ( +117 mass_spectrum.min_abundance, +118 mass_spectrum.max_abundance, +119 ) 120 -121 self.max_ppm_error = mass_spectrum.molecular_search_settings.max_ppm_error +121 self.min_ppm_error = mass_spectrum.molecular_search_settings.min_ppm_error 122 -123 check_assign = False +123 self.max_ppm_error = mass_spectrum.molecular_search_settings.max_ppm_error 124 -125 all_used_atoms = set() +125 check_assign = False 126 -127 for ms_peak in mass_spectrum: -128 -129 self.total_peaks += 1 -130 -131 self.sum_abundance += ms_peak.abundance -132 -133 if not ms_peak.is_assigned: -134 +127 all_used_atoms = set() +128 +129 for ms_peak in mass_spectrum: +130 self.total_peaks += 1 +131 +132 self.sum_abundance += ms_peak.abundance +133 +134 if not ms_peak.is_assigned: 135 self._ms_grouped_class.get(Labels.unassigned).append(ms_peak) -136 -137 else: -138 -139 check_assign = True -140 -141 if choose_molecular_formula: -142 -143 mf = ms_peak.best_molecular_formula_candidate -144 -145 classes = [mf.class_label] -146 -147 for atom in mf.atoms: -148 -149 all_used_atoms.add(atom) +136 +137 else: +138 check_assign = True +139 +140 if choose_molecular_formula: +141 mf = ms_peak.best_molecular_formula_candidate +142 +143 classes = [mf.class_label] +144 +145 for atom in mf.atoms: +146 all_used_atoms.add(atom) +147 +148 else: +149 classes = [] 150 -151 else: -152 -153 classes = [] -154 -155 for mf in ms_peak: -156 -157 classes.append(mf.class_label) -158 -159 for atom in mf.atoms: -160 -161 all_used_atoms.add(atom) -162 -163 for classe in classes: -164 -165 if classe in self._ms_grouped_class.keys(): -166 -167 self._ms_grouped_class.get(classe).append(ms_peak) -168 -169 else: -170 -171 self._ms_grouped_class[classe] = [ms_peak] +151 for mf in ms_peak: +152 classes.append(mf.class_label) +153 +154 for atom in mf.atoms: +155 all_used_atoms.add(atom) +156 +157 for classe in classes: +158 if classe in self._ms_grouped_class.keys(): +159 self._ms_grouped_class.get(classe).append(ms_peak) +160 +161 else: +162 self._ms_grouped_class[classe] = [ms_peak] +163 +164 self.all_identified_atoms = sorted(all_used_atoms, key=sort_atoms_method) +165 +166 if not check_assign: +167 raise Exception("No molecular formula associated with any mspeak objects") +168 +169 def __len__(self): +170 """Return the number of classes""" +171 return len(self._ms_grouped_class) 172 -173 self.all_identified_atoms = sorted(all_used_atoms, key=sort_atoms_method) -174 -175 if not check_assign: +173 def __getitem__(self, classe): +174 """Return the molecular formula list for specified class""" +175 return self._ms_grouped_class.get(classe) 176 -177 raise Exception("No molecular formula associated with any mspeak objects") -178 -179 -180 def __len__(self): -181 """ Return the number of classes""" -182 return len(self._ms_grouped_class) -183 -184 def __getitem__(self, classe): -185 """ Return the molecular formula list for specified class""" -186 return self._ms_grouped_class.get(classe) -187 -188 def __iter__(self): -189 """ Return an iterator over the keys of the dictionary.""" -190 return iter(self._ms_grouped_class) +177 def __iter__(self): +178 """Return an iterator over the keys of the dictionary.""" +179 return iter(self._ms_grouped_class) +180 +181 def get_classes(self, threshold_perc=1, isotopologue=True): +182 """Return a list of classes with abundance percentile above threshold""" +183 classes = list() +184 for classe in self.keys(): +185 if classe != Labels.unassigned: +186 if self.abundance_count_percentile(classe) > threshold_perc: +187 if classe != Labels.unassigned: +188 # access first molecular formula inside the first ms peak and check isotopologue +189 if not isotopologue and self.get(classe)[0][0].is_isotopologue: +190 continue 191 -192 def get_classes(self, threshold_perc=1, isotopologue=True): -193 """ Return a list of classes with abundance percentile above threshold""" -194 classes = list() -195 for classe in self.keys(): -196 if classe != Labels.unassigned: -197 if self.abundance_count_percentile(classe) > threshold_perc: -198 -199 if classe != Labels.unassigned: -200 # access first molecular formula inside the first ms peak and check isotopologue -201 if not isotopologue and self.get(classe)[0][0].is_isotopologue: continue -202 -203 classes.append(classe) -204 #TODO sort classes chemically here too -205 return classes -206 -207 def molecular_formula_string(self, classe,): -208 """ Return a list of molecular formula string for specified class""" -209 if self.choose_mf: -210 return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]] -211 else: -212 return [mf for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -213 -214 def molecular_formula(self, classe,): -215 """ Return a list of molecular formula for specified class""" +192 classes.append(classe) +193 # TODO sort classes chemically here too +194 return classes +195 +196 def molecular_formula_string( +197 self, +198 classe, +199 ): +200 """Return a list of molecular formula string for specified class""" +201 if self.choose_mf: +202 return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]] +203 else: +204 return [ +205 mf +206 for mspeak in self[classe] +207 for mf in mspeak +208 if mf.class_label == classe +209 ] +210 +211 def molecular_formula( +212 self, +213 classe, +214 ): +215 """Return a list of molecular formula for specified class""" 216 if self.choose_mf: 217 return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]] 218 else: -219 return [mf for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -220 -221 def carbon_number(self, classe): -222 """ Return a list of carbon number for specified class""" -223 if self.choose_mf: -224 return [mspeak.best_molecular_formula_candidate.get("C") for mspeak in self[classe]] -225 else: -226 return [mf.get('C') for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -227 -228 def atom_count(self, atom, classe): -229 """ Return a list of atom count for specified class""" -230 -231 if self.choose_mf: -232 return [mspeak.best_molecular_formula_candidate.get(atom) for mspeak in self[classe]] -233 else: -234 return [mf.get(atom) for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -235 -236 def dbe(self, classe): -237 """ Return a list of DBE for specified class""" -238 if self.choose_mf: -239 return [mspeak.best_molecular_formula_candidate.dbe for mspeak in self[classe]] -240 else: -241 return [mf.dbe for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -242 -243 def atoms_ratio(self, classe, numerator, denominator): -244 """ Return a list of atoms ratio for specified class""" -245 return [mf.get(numerator)/mf.get(denominator) for mf in self.molecular_formula(classe)] -246 -247 def mz_exp(self, classe): -248 """ Return a list of experimental mz for specified class""" -249 if self.choose_mf or classe == Labels.unassigned: -250 -251 return [mspeak.mz_exp for mspeak in self[classe]] -252 -253 else: -254 -255 return [mspeak.mz_exp for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -256 -257 def abundance(self, classe): -258 """ Return a list of abundance for specified class""" -259 if self.choose_mf or classe == Labels.unassigned: -260 -261 return [mspeak.abundance for mspeak in self[classe]] -262 +219 return [ +220 mf +221 for mspeak in self[classe] +222 for mf in mspeak +223 if mf.class_label == classe +224 ] +225 +226 def carbon_number(self, classe): +227 """Return a list of carbon number for specified class""" +228 if self.choose_mf: +229 return [ +230 mspeak.best_molecular_formula_candidate.get("C") +231 for mspeak in self[classe] +232 ] +233 else: +234 return [ +235 mf.get("C") +236 for mspeak in self[classe] +237 for mf in mspeak +238 if mf.class_label == classe +239 ] +240 +241 def atom_count(self, atom, classe): +242 """Return a list of atom count for specified class""" +243 +244 if self.choose_mf: +245 return [ +246 mspeak.best_molecular_formula_candidate.get(atom) +247 for mspeak in self[classe] +248 ] +249 else: +250 return [ +251 mf.get(atom) +252 for mspeak in self[classe] +253 for mf in mspeak +254 if mf.class_label == classe +255 ] +256 +257 def dbe(self, classe): +258 """Return a list of DBE for specified class""" +259 if self.choose_mf: +260 return [ +261 mspeak.best_molecular_formula_candidate.dbe for mspeak in self[classe] +262 ] 263 else: -264 -265 return [mspeak.abundance for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -266 -267 def mz_error(self, classe): -268 """ Return a list of mz error for specified class""" -269 if classe != Labels.unassigned: -270 -271 if self.choose_mf: -272 -273 return [mspeak.best_molecular_formula_candidate.mz_error for mspeak in self[classe]] -274 -275 else: -276 -277 return [mf.mz_error for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -278 -279 def mz_calc(self, classe): -280 """ Return a list of calculated mz for specified class""" -281 if self.choose_mf: -282 -283 return [mspeak.best_molecular_formula_candidate.mz_calc for mspeak in self[classe]] -284 -285 else: -286 -287 return [mf.mz_calc for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] -288 -289 def peaks_count_percentile(self, classe): -290 """ Return the peaks count percentile of a specific class""" -291 return (len(self[classe])/self.total_peaks)*100 -292 -293 def abundance_count_percentile(self, classe): -294 """ Return the abundance percentile of a specific class""" -295 return (sum([mspeak.abundance for mspeak in self[classe]]) / self.sum_abundance)*100 -296 -297 def mz_exp_assigned(self): -298 """ Return a list of experimental mz for all assigned classes""" -299 classes = self.keys() -300 -301 return [mspeak.mz_exp for classe in classes for mspeak in self[classe] if classe != Labels.unassigned] -302 -303 def abundance_assigned(self): -304 """ Return a list of abundance for all classes """ -305 classes = self.keys() -306 -307 return [mspeak.abundance for classe in classes for mspeak in self[classe] if classe != Labels.unassigned] -308 -309 def mz_exp_all(self): -310 """ Return a list of mz for all classes""" -311 classes = self.keys() -312 -313 return flatten_list([self.mz_exp(classe) for classe in classes if classe != Labels.unassigned]) -314 -315 def mz_error_all(self): -316 """ Return a list of mz error for all classes""" -317 classes = self.keys() -318 -319 return flatten_list([self.mz_error(classe) for classe in classes if classe != Labels.unassigned]) -320 -321 def carbon_number_all(self): -322 """ Return a list of carbon number for all classes""" -323 classes = self.keys() -324 -325 return flatten_list([self.carbon_number(classe) for classe in classes if classe != Labels.unassigned]) -326 -327 def dbe_all(self): -328 """ Return a list of DBE for all classes""" -329 classes = self.keys() -330 -331 return flatten_list([self.dbe(classe) for classe in classes if classe != Labels.unassigned]) -332 -333 def atoms_ratio_all(self, numerator, denominator): -334 """ Return a list of atoms ratio for all classes""" -335 classes = self.keys() -336 -337 return flatten_list([self.atoms_ratio(classe, numerator, denominator) for classe in classes if classe != Labels.unassigned]) -338 -339 def to_dataframe(self, include_isotopologue=False, abundance_perc_threshold=5, include_unassigned=False): -340 """ Return a pandas dataframe with all the data from the class -341 -342 Parameters -343 ---------- -344 include_isotopologue : bool, optional -345 Include isotopologues, by default False -346 abundance_perc_threshold : int, optional -347 Abundance percentile threshold, by default 5 -348 include_unassigned : bool, optional -349 Include unassigned peaks, by default False -350 -351 Returns -352 ------- -353 DataFrame -354 Pandas dataframe with all the data from the class -355 """ -356 from pandas import DataFrame -357 -358 columns_labels = ['mz', 'calibrated_mz', 'calculated_m_z', 'abundance', -359 'resolving_power', 'sn', 'ion_charge', 'mass_error', -360 'DBE', 'class', 'HC', 'OC', 'ion_type','is_isotopologue', -361 'class_abundance', 'class_count'] -362 -363 dict_data_list = [] -364 -365 for classe, list_mspeaks in self.items(): -366 -367 percent_abundance = self.abundance_count_percentile(classe) -368 -369 #ignores low abundant classes -370 if abundance_perc_threshold < abundance_perc_threshold: continue -371 -372 peaks_count_percentile = self.peaks_count_percentile(classe) -373 -374 for ms_peak in list_mspeaks: -375 -376 if ms_peak.is_assigned: -377 -378 for m_formula in ms_peak: -379 -380 #ignores isotopologues -381 if not include_isotopologue and m_formula.is_isotopologue: continue -382 -383 formula_dict = m_formula.to_dict() +264 return [ +265 mf.dbe +266 for mspeak in self[classe] +267 for mf in mspeak +268 if mf.class_label == classe +269 ] +270 +271 def atoms_ratio(self, classe, numerator, denominator): +272 """Return a list of atoms ratio for specified class""" +273 return [ +274 mf.get(numerator) / mf.get(denominator) +275 for mf in self.molecular_formula(classe) +276 ] +277 +278 def mz_exp(self, classe): +279 """Return a list of experimental mz for specified class""" +280 if self.choose_mf or classe == Labels.unassigned: +281 return [mspeak.mz_exp for mspeak in self[classe]] +282 +283 else: +284 return [ +285 mspeak.mz_exp +286 for mspeak in self[classe] +287 for mf in mspeak +288 if mf.class_label == classe +289 ] +290 +291 def abundance(self, classe): +292 """Return a list of abundance for specified class""" +293 if self.choose_mf or classe == Labels.unassigned: +294 return [mspeak.abundance for mspeak in self[classe]] +295 +296 else: +297 return [ +298 mspeak.abundance +299 for mspeak in self[classe] +300 for mf in mspeak +301 if mf.class_label == classe +302 ] +303 +304 def mz_error(self, classe): +305 """Return a list of mz error for specified class""" +306 if classe != Labels.unassigned: +307 if self.choose_mf: +308 return [ +309 mspeak.best_molecular_formula_candidate.mz_error +310 for mspeak in self[classe] +311 ] +312 +313 else: +314 return [ +315 mf.mz_error +316 for mspeak in self[classe] +317 for mf in mspeak +318 if mf.class_label == classe +319 ] +320 +321 def mz_calc(self, classe): +322 """Return a list of calculated mz for specified class""" +323 if self.choose_mf: +324 return [ +325 mspeak.best_molecular_formula_candidate.mz_calc +326 for mspeak in self[classe] +327 ] +328 +329 else: +330 return [ +331 mf.mz_calc +332 for mspeak in self[classe] +333 for mf in mspeak +334 if mf.class_label == classe +335 ] +336 +337 def peaks_count_percentile(self, classe): +338 """Return the peaks count percentile of a specific class""" +339 return (len(self[classe]) / self.total_peaks) * 100 +340 +341 def abundance_count_percentile(self, classe): +342 """Return the abundance percentile of a specific class""" +343 return ( +344 sum([mspeak.abundance for mspeak in self[classe]]) / self.sum_abundance +345 ) * 100 +346 +347 def mz_exp_assigned(self): +348 """Return a list of experimental mz for all assigned classes""" +349 classes = self.keys() +350 +351 return [ +352 mspeak.mz_exp +353 for classe in classes +354 for mspeak in self[classe] +355 if classe != Labels.unassigned +356 ] +357 +358 def abundance_assigned(self): +359 """Return a list of abundance for all classes""" +360 classes = self.keys() +361 +362 return [ +363 mspeak.abundance +364 for classe in classes +365 for mspeak in self[classe] +366 if classe != Labels.unassigned +367 ] +368 +369 def mz_exp_all(self): +370 """Return a list of mz for all classes""" +371 classes = self.keys() +372 +373 return flatten_list( +374 [self.mz_exp(classe) for classe in classes if classe != Labels.unassigned] +375 ) +376 +377 def mz_error_all(self): +378 """Return a list of mz error for all classes""" +379 classes = self.keys() +380 +381 return flatten_list( +382 [self.mz_error(classe) for classe in classes if classe != Labels.unassigned] +383 ) 384 -385 dict_result = {'mz': ms_peak._mz_exp, -386 'calibrated_mz': ms_peak.mz_exp, -387 'calculated_mz': m_formula.mz_calc, -388 'abundance': ms_peak.abundance, -389 'resolving_power': ms_peak.resolving_power, -390 'sn': ms_peak.signal_to_noise, -391 'ion_charge': ms_peak.ion_charge, -392 'mass_error': m_formula.mz_error, -393 'DBE': m_formula.dbe, -394 'class': classe, -395 'HC': m_formula.H_C, -396 'OC': m_formula.O_C, -397 'ion_type': str(m_formula.ion_type.lower().encode('utf-8')), -398 'is_isotopologue': int(m_formula.is_isotopologue), -399 'class_abundance': percent_abundance, -400 'class_count': peaks_count_percentile -401 } -402 -403 for atom in formula_dict.keys(): -404 -405 dict_result[atom] = formula_dict.get(atom) -406 -407 dict_data_list.append(dict_result) +385 def carbon_number_all(self): +386 """Return a list of carbon number for all classes""" +387 classes = self.keys() +388 +389 return flatten_list( +390 [ +391 self.carbon_number(classe) +392 for classe in classes +393 if classe != Labels.unassigned +394 ] +395 ) +396 +397 def dbe_all(self): +398 """Return a list of DBE for all classes""" +399 classes = self.keys() +400 +401 return flatten_list( +402 [self.dbe(classe) for classe in classes if classe != Labels.unassigned] +403 ) +404 +405 def atoms_ratio_all(self, numerator, denominator): +406 """Return a list of atoms ratio for all classes""" +407 classes = self.keys() 408 -409 else: -410 -411 if not include_unassigned: continue -412 -413 dict_result = {'mz': ms_peak._mz_exp, -414 'calibrated_mz': ms_peak.mz_exp, -415 'abundance': ms_peak.abundance, -416 'resolving_power': ms_peak.resolving_power, -417 'sn': ms_peak.signal_to_noise, -418 'ion_charge': ms_peak.ion_charge, -419 'class': classe, -420 'class_abundance': percent_abundance, -421 'class_count': percent_abundance -422 } -423 -424 dict_data_list.append(dict_result) -425 -426 columns = columns_labels + self.all_identified_atoms -427 -428 return DataFrame(dict_data_list, columns=columns) -429 -430 def plot_ms_assigned_unassigned(self, assigned_color= 'b', unassigned_color = 'r'): -431 """ Plot stick mass spectrum of all classes -432 -433 Parameters -434 ---------- -435 assigned_color : str, optional -436 Matplotlib color for the assigned peaks, by default "b" -437 unassigned_color : str, optional -438 Matplotlib color for the unassigned peaks, by default "r" -439 -440 Returns -441 ------- -442 ax : matplotlib.axes -443 Matplotlib axes object -444 """ -445 mz_assigned = self.mz_exp_assigned() -446 abundance_assigned = self.abundance_assigned() -447 -448 mz_not_assigned = self.mz_exp(Labels.unassigned) -449 abundance_not_assigned = self.abundance(Labels.unassigned) -450 -451 ax = plt.gca() -452 -453 for plot_obj in ax.stem(mz_assigned,abundance_assigned, linefmt='-', markerfmt=" ", label="Assigned"): -454 -455 plt.setp(plot_obj, 'color', assigned_color, 'linewidth', 2) -456 -457 for plot_obj in ax.stem(mz_not_assigned, abundance_not_assigned, linefmt='-', markerfmt=" ", label="Unassigned"): -458 -459 plt.setp(plot_obj, 'color', unassigned_color, 'linewidth', 2) -460 -461 ax.set_xlabel("$\t{m/z}$", fontsize=12) -462 ax.set_ylabel('Abundance', fontsize=12) -463 ax.tick_params(axis='both', which='major', labelsize=12) +409 return flatten_list( +410 [ +411 self.atoms_ratio(classe, numerator, denominator) +412 for classe in classes +413 if classe != Labels.unassigned +414 ] +415 ) +416 +417 def to_dataframe( +418 self, +419 include_isotopologue=False, +420 abundance_perc_threshold=5, +421 include_unassigned=False, +422 ): +423 """Return a pandas dataframe with all the data from the class +424 +425 Parameters +426 ---------- +427 include_isotopologue : bool, optional +428 Include isotopologues, by default False +429 abundance_perc_threshold : int, optional +430 Abundance percentile threshold, by default 5 +431 include_unassigned : bool, optional +432 Include unassigned peaks, by default False +433 +434 Returns +435 ------- +436 DataFrame +437 Pandas dataframe with all the data from the class +438 """ +439 from pandas import DataFrame +440 +441 columns_labels = [ +442 "mz", +443 "calibrated_mz", +444 "calculated_m_z", +445 "abundance", +446 "resolving_power", +447 "sn", +448 "ion_charge", +449 "mass_error", +450 "DBE", +451 "class", +452 "HC", +453 "OC", +454 "ion_type", +455 "is_isotopologue", +456 "class_abundance", +457 "class_count", +458 ] +459 +460 dict_data_list = [] +461 +462 for classe, list_mspeaks in self.items(): +463 percent_abundance = self.abundance_count_percentile(classe) 464 -465 ax.axes.spines['top'].set_visible(False) -466 ax.axes.spines['right'].set_visible(False) -467 -468 ax.get_yaxis().set_visible(False) -469 ax.spines['left'].set_visible(False) -470 plt.legend() -471 -472 return ax -473 -474 def plot_mz_error(self, color= 'g'): -475 """ Plot mz error scatter plot of all classes -476 -477 Parameters -478 ---------- -479 color : str, optional -480 Matplotlib color, by default "g" -481 -482 Returns -483 ------- -484 ax : matplotlib.axes -485 Matplotlib axes object -486 """ -487 ax = plt.gca() -488 -489 mz_assigned = self.mz_exp_all() -490 mz_error= self.mz_error_all() -491 -492 ax.scatter( mz_assigned, mz_error, c=color) -493 -494 ax.set_xlabel("$\t{m/z}$", fontsize=12) -495 ax.set_ylabel('Error (ppm)', fontsize=12) -496 ax.tick_params(axis='both', which='major', labelsize=12) -497 -498 ax.axes.spines['top'].set_visible(True) -499 ax.axes.spines['right'].set_visible(True) -500 -501 ax.get_yaxis().set_visible(True) -502 ax.spines['left'].set_visible(True) +465 # ignores low abundant classes +466 if abundance_perc_threshold < abundance_perc_threshold: +467 continue +468 +469 peaks_count_percentile = self.peaks_count_percentile(classe) +470 +471 for ms_peak in list_mspeaks: +472 if ms_peak.is_assigned: +473 for m_formula in ms_peak: +474 # ignores isotopologues +475 if not include_isotopologue and m_formula.is_isotopologue: +476 continue +477 +478 formula_dict = m_formula.to_dict() +479 +480 dict_result = { +481 "mz": ms_peak._mz_exp, +482 "calibrated_mz": ms_peak.mz_exp, +483 "calculated_mz": m_formula.mz_calc, +484 "abundance": ms_peak.abundance, +485 "resolving_power": ms_peak.resolving_power, +486 "sn": ms_peak.signal_to_noise, +487 "ion_charge": ms_peak.ion_charge, +488 "mass_error": m_formula.mz_error, +489 "DBE": m_formula.dbe, +490 "class": classe, +491 "HC": m_formula.H_C, +492 "OC": m_formula.O_C, +493 "ion_type": str(m_formula.ion_type.lower().encode("utf-8")), +494 "is_isotopologue": int(m_formula.is_isotopologue), +495 "class_abundance": percent_abundance, +496 "class_count": peaks_count_percentile, +497 } +498 +499 for atom in formula_dict.keys(): +500 dict_result[atom] = formula_dict.get(atom) +501 +502 dict_data_list.append(dict_result) 503 -504 ax.set_xlim(self.min_max_mz) -505 ax.set_ylim(self.min_ppm_error , self.max_ppm_error) -506 -507 return ax -508 -509 def plot_mz_error_class(self, classe, color= 'g'): -510 """ Plot mz error scatter plot of a specific class -511 -512 Parameters -513 ---------- -514 classe : str -515 Class name -516 color : str, optional -517 Matplotlib color, by default "g" -518 -519 Returns -520 ------- -521 ax : matplotlib.axes -522 Matplotlib axes object -523 -524 """ -525 if classe != Labels.unassigned: -526 ax = plt.gca() -527 -528 abun_perc = self.abundance_count_percentile(classe) -529 mz_assigned = self.mz_exp(classe) -530 mz_error= self.mz_error(classe) -531 -532 ax.scatter( mz_assigned, mz_error, c=color) -533 -534 title = "%s, %.2f %%" % (classe, abun_perc) -535 ax.set_title(title) -536 ax.set_xlabel("$\t{m/z}$", fontsize=12) -537 ax.set_ylabel('Error (ppm)', fontsize=12) -538 ax.tick_params(axis='both', which='major', labelsize=12) -539 -540 ax.axes.spines['top'].set_visible(True) -541 ax.axes.spines['right'].set_visible(True) -542 -543 ax.get_yaxis().set_visible(True) -544 ax.spines['left'].set_visible(True) -545 -546 ax.set_xlim(self.min_max_mz) -547 ax.set_ylim(self.min_ppm_error , self.max_ppm_error) -548 -549 return ax -550 -551 def plot_ms_class(self, classe, color= 'g'): -552 """ Plot stick mass spectrum of a specific class -553 -554 Parameters -555 ---------- -556 classe : str -557 Class name -558 color : str, optional -559 Matplotlib color, by default "g" -560 -561 Returns -562 ------- -563 ax : matplotlib.axes -564 Matplotlib axes object -565 -566 """ -567 if classe != Labels.unassigned: -568 ax = plt.gca() -569 -570 abun_perc = self.abundance_count_percentile(classe) -571 mz_assigned = self.mz_exp(classe) -572 abundance_assigned= self.abundance(classe) +504 else: +505 if not include_unassigned: +506 continue +507 +508 dict_result = { +509 "mz": ms_peak._mz_exp, +510 "calibrated_mz": ms_peak.mz_exp, +511 "abundance": ms_peak.abundance, +512 "resolving_power": ms_peak.resolving_power, +513 "sn": ms_peak.signal_to_noise, +514 "ion_charge": ms_peak.ion_charge, +515 "class": classe, +516 "class_abundance": percent_abundance, +517 "class_count": percent_abundance, +518 } +519 +520 dict_data_list.append(dict_result) +521 +522 columns = columns_labels + self.all_identified_atoms +523 +524 return DataFrame(dict_data_list, columns=columns) +525 +526 def plot_ms_assigned_unassigned(self, assigned_color="b", unassigned_color="r"): +527 """Plot stick mass spectrum of all classes +528 +529 Parameters +530 ---------- +531 assigned_color : str, optional +532 Matplotlib color for the assigned peaks, by default "b" +533 unassigned_color : str, optional +534 Matplotlib color for the unassigned peaks, by default "r" +535 +536 Returns +537 ------- +538 ax : matplotlib.axes +539 Matplotlib axes object +540 """ +541 mz_assigned = self.mz_exp_assigned() +542 abundance_assigned = self.abundance_assigned() +543 +544 mz_not_assigned = self.mz_exp(Labels.unassigned) +545 abundance_not_assigned = self.abundance(Labels.unassigned) +546 +547 ax = plt.gca() +548 +549 for plot_obj in ax.stem( +550 mz_assigned, +551 abundance_assigned, +552 linefmt="-", +553 markerfmt=" ", +554 label="Assigned", +555 ): +556 plt.setp(plot_obj, "color", assigned_color, "linewidth", 2) +557 +558 for plot_obj in ax.stem( +559 mz_not_assigned, +560 abundance_not_assigned, +561 linefmt="-", +562 markerfmt=" ", +563 label="Unassigned", +564 ): +565 plt.setp(plot_obj, "color", unassigned_color, "linewidth", 2) +566 +567 ax.set_xlabel("$\t{m/z}$", fontsize=12) +568 ax.set_ylabel("Abundance", fontsize=12) +569 ax.tick_params(axis="both", which="major", labelsize=12) +570 +571 ax.axes.spines["top"].set_visible(False) +572 ax.axes.spines["right"].set_visible(False) 573 -574 for plot_obj in ax.stem( mz_assigned, abundance_assigned, linefmt='-', markerfmt=" "): -575 -576 plt.setp(plot_obj, 'color', color, 'linewidth', 2) -577 -578 title = "%s, %.2f %%" % (classe, abun_perc) -579 ax.set_title(title) -580 ax.set_xlabel("$\t{m/z}$", fontsize=12) -581 ax.set_ylabel('Abundance', fontsize=12) -582 ax.tick_params(axis='both', which='major', labelsize=12) -583 -584 ax.axes.spines['top'].set_visible(False) -585 ax.axes.spines['right'].set_visible(False) -586 -587 ax.get_yaxis().set_visible(False) -588 ax.spines['left'].set_visible(False) -589 -590 ax.set_xlim(self.min_max_mz) -591 ax.set_ylim(self.min_max_abundance) -592 -593 return ax +574 ax.get_yaxis().set_visible(False) +575 ax.spines["left"].set_visible(False) +576 plt.legend() +577 +578 return ax +579 +580 def plot_mz_error(self, color="g"): +581 """Plot mz error scatter plot of all classes +582 +583 Parameters +584 ---------- +585 color : str, optional +586 Matplotlib color, by default "g" +587 +588 Returns +589 ------- +590 ax : matplotlib.axes +591 Matplotlib axes object +592 """ +593 ax = plt.gca() 594 -595 def plot_van_krevelen(self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis"): -596 """ Plot Van Krevelen Diagram -597 -598 Parameters -599 ---------- -600 classe : str -601 Class name -602 max_hc : float, optional -603 Max H/C ratio, by default 2.5 -604 max_oc : float, optional -605 Max O/C ratio, by default 2 -606 ticks_number : int, optional -607 Number of ticks, by default 5 -608 color : str, optional -609 Matplotlib color, by default "viridis" -610 -611 Returns -612 ------- -613 ax : matplotlib.axes -614 Matplotlib axes object -615 abun_perc : float -616 Class percentile of the relative abundance -617 """ -618 if classe != Labels.unassigned: -619 -620 # get data -621 abun_perc = self.abundance_count_percentile(classe) -622 hc = self.atoms_ratio(classe, "H", "C") -623 oc = self.atoms_ratio(classe, "O", "C") -624 abundance = self.abundance(classe) -625 -626 #plot data -627 ax = plt.gca() -628 -629 ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color) -630 -631 #ax.scatter(carbon_number, dbe, c=color, alpha=0.5) -632 -633 title = "%s, %.2f %%" % (classe, abun_perc) -634 ax.set_title(title) -635 ax.set_xlabel("O/C", fontsize=16) -636 ax.set_ylabel('H/C', fontsize=16) -637 ax.tick_params(axis='both', which='major', labelsize=18) -638 ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True)) -639 ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True)) -640 -641 # returns matplot axes obj and the class percentile of the relative abundance -642 -643 return ax, abun_perc -644 -645 def plot_dbe_vs_carbon_number(self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis"): -646 """ Plot DBE vs Carbon Number -647 -648 Parameters -649 ---------- -650 classe : str -651 Class name -652 max_c : int, optional -653 Max Carbon Number, by default 50 -654 max_dbe : int, optional -655 Max DBE, by default 40 -656 dbe_incr : int, optional -657 DBE increment, by default 5 -658 c_incr : int, optional -659 Carbon Number increment, by default 10 -660 color : str, optional -661 Matplotlib color, by default "viridis" -662 -663 Returns -664 ------- -665 ax : matplotlib.axes -666 Matplotlib axes object -667 abun_perc : float -668 Class percentile of the relative abundance -669 """ -670 if classe != Labels.unassigned: +595 mz_assigned = self.mz_exp_all() +596 mz_error = self.mz_error_all() +597 +598 ax.scatter(mz_assigned, mz_error, c=color) +599 +600 ax.set_xlabel("$\t{m/z}$", fontsize=12) +601 ax.set_ylabel("Error (ppm)", fontsize=12) +602 ax.tick_params(axis="both", which="major", labelsize=12) +603 +604 ax.axes.spines["top"].set_visible(True) +605 ax.axes.spines["right"].set_visible(True) +606 +607 ax.get_yaxis().set_visible(True) +608 ax.spines["left"].set_visible(True) +609 +610 ax.set_xlim(self.min_max_mz) +611 ax.set_ylim(self.min_ppm_error, self.max_ppm_error) +612 +613 return ax +614 +615 def plot_mz_error_class(self, classe, color="g"): +616 """Plot mz error scatter plot of a specific class +617 +618 Parameters +619 ---------- +620 classe : str +621 Class name +622 color : str, optional +623 Matplotlib color, by default "g" +624 +625 Returns +626 ------- +627 ax : matplotlib.axes +628 Matplotlib axes object +629 +630 """ +631 if classe != Labels.unassigned: +632 ax = plt.gca() +633 +634 abun_perc = self.abundance_count_percentile(classe) +635 mz_assigned = self.mz_exp(classe) +636 mz_error = self.mz_error(classe) +637 +638 ax.scatter(mz_assigned, mz_error, c=color) +639 +640 title = "%s, %.2f %%" % (classe, abun_perc) +641 ax.set_title(title) +642 ax.set_xlabel("$\t{m/z}$", fontsize=12) +643 ax.set_ylabel("Error (ppm)", fontsize=12) +644 ax.tick_params(axis="both", which="major", labelsize=12) +645 +646 ax.axes.spines["top"].set_visible(True) +647 ax.axes.spines["right"].set_visible(True) +648 +649 ax.get_yaxis().set_visible(True) +650 ax.spines["left"].set_visible(True) +651 +652 ax.set_xlim(self.min_max_mz) +653 ax.set_ylim(self.min_ppm_error, self.max_ppm_error) +654 +655 return ax +656 +657 def plot_ms_class(self, classe, color="g"): +658 """Plot stick mass spectrum of a specific class +659 +660 Parameters +661 ---------- +662 classe : str +663 Class name +664 color : str, optional +665 Matplotlib color, by default "g" +666 +667 Returns +668 ------- +669 ax : matplotlib.axes +670 Matplotlib axes object 671 -672 # get data -673 abun_perc = self.abundance_count_percentile(classe) -674 carbon_number = self.carbon_number(classe) -675 dbe = self.dbe(classe) -676 abundance = self.abundance(classe) -677 -678 #plot data -679 ax = plt.gca() -680 -681 ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color) -682 -683 #ax.scatter(carbon_number, dbe, c=color, alpha=0.5) -684 +672 """ +673 if classe != Labels.unassigned: +674 ax = plt.gca() +675 +676 abun_perc = self.abundance_count_percentile(classe) +677 mz_assigned = self.mz_exp(classe) +678 abundance_assigned = self.abundance(classe) +679 +680 for plot_obj in ax.stem( +681 mz_assigned, abundance_assigned, linefmt="-", markerfmt=" " +682 ): +683 plt.setp(plot_obj, "color", color, "linewidth", 2) +684 685 title = "%s, %.2f %%" % (classe, abun_perc) 686 ax.set_title(title) -687 ax.set_xlabel("Carbon number", fontsize=16) -688 ax.set_ylabel('DBE', fontsize=16) -689 ax.tick_params(axis='both', which='major', labelsize=18) -690 ax.set_xticks(range(0, max_c, c_incr)) -691 ax.set_yticks(range(0, max_dbe, dbe_incr)) -692 -693 # returns matplot axes obj and the class percentile of the relative abundance -694 -695 return ax, abun_perc +687 ax.set_xlabel("$\t{m/z}$", fontsize=12) +688 ax.set_ylabel("Abundance", fontsize=12) +689 ax.tick_params(axis="both", which="major", labelsize=12) +690 +691 ax.axes.spines["top"].set_visible(False) +692 ax.axes.spines["right"].set_visible(False) +693 +694 ax.get_yaxis().set_visible(False) +695 ax.spines["left"].set_visible(False) +696 +697 ax.set_xlim(self.min_max_mz) +698 ax.set_ylim(self.min_max_abundance) +699 +700 return ax +701 +702 def plot_van_krevelen( +703 self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis" +704 ): +705 """Plot Van Krevelen Diagram +706 +707 Parameters +708 ---------- +709 classe : str +710 Class name +711 max_hc : float, optional +712 Max H/C ratio, by default 2.5 +713 max_oc : float, optional +714 Max O/C ratio, by default 2 +715 ticks_number : int, optional +716 Number of ticks, by default 5 +717 color : str, optional +718 Matplotlib color, by default "viridis" +719 +720 Returns +721 ------- +722 ax : matplotlib.axes +723 Matplotlib axes object +724 abun_perc : float +725 Class percentile of the relative abundance +726 """ +727 if classe != Labels.unassigned: +728 # get data +729 abun_perc = self.abundance_count_percentile(classe) +730 hc = self.atoms_ratio(classe, "H", "C") +731 oc = self.atoms_ratio(classe, "O", "C") +732 abundance = self.abundance(classe) +733 +734 # plot data +735 ax = plt.gca() +736 +737 ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color) +738 +739 # ax.scatter(carbon_number, dbe, c=color, alpha=0.5) +740 +741 title = "%s, %.2f %%" % (classe, abun_perc) +742 ax.set_title(title) +743 ax.set_xlabel("O/C", fontsize=16) +744 ax.set_ylabel("H/C", fontsize=16) +745 ax.tick_params(axis="both", which="major", labelsize=18) +746 ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True)) +747 ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True)) +748 +749 # returns matplot axes obj and the class percentile of the relative abundance +750 +751 return ax, abun_perc +752 +753 def plot_dbe_vs_carbon_number( +754 self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis" +755 ): +756 """Plot DBE vs Carbon Number +757 +758 Parameters +759 ---------- +760 classe : str +761 Class name +762 max_c : int, optional +763 Max Carbon Number, by default 50 +764 max_dbe : int, optional +765 Max DBE, by default 40 +766 dbe_incr : int, optional +767 DBE increment, by default 5 +768 c_incr : int, optional +769 Carbon Number increment, by default 10 +770 color : str, optional +771 Matplotlib color, by default "viridis" +772 +773 Returns +774 ------- +775 ax : matplotlib.axes +776 Matplotlib axes object +777 abun_perc : float +778 Class percentile of the relative abundance +779 """ +780 if classe != Labels.unassigned: +781 # get data +782 abun_perc = self.abundance_count_percentile(classe) +783 carbon_number = self.carbon_number(classe) +784 dbe = self.dbe(classe) +785 abundance = self.abundance(classe) +786 +787 # plot data +788 ax = plt.gca() +789 +790 ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color) +791 +792 # ax.scatter(carbon_number, dbe, c=color, alpha=0.5) +793 +794 title = "%s, %.2f %%" % (classe, abun_perc) +795 ax.set_title(title) +796 ax.set_xlabel("Carbon number", fontsize=16) +797 ax.set_ylabel("DBE", fontsize=16) +798 ax.tick_params(axis="both", which="major", labelsize=18) +799 ax.set_xticks(range(0, max_c, c_incr)) +800 ax.set_yticks(range(0, max_dbe, dbe_incr)) +801 +802 # returns matplot axes obj and the class percentile of the relative abundance +803 +804 return ax, abun_perc

    @@ -1623,41 +1841,41 @@
    Methods
    • __len__(). -Returns the number of classes.
    • -
    • __getitem__(classe)
      -Returns the molecular formula list for specified class.
    • +Returns the number of classes. +
    • __getitem__(classe) +Returns the molecular formula list for specified class.
    • __iter__(). -Returns an iterator over the keys of the dictionary.
    • +Returns an iterator over the keys of the dictionary.
    • get_classes(threshold_perc=1, isotopologue=True). -Returns a list of classes with abundance percentile above threshold.
    • +Returns a list of classes with abundance percentile above threshold.
    • molecular_formula_string(classe). -Returns a list of molecular formula string for specified class.
    • +Returns a list of molecular formula string for specified class.
    • molecular_formula(classe). -Returns a list of molecular formula for specified class.
    • +Returns a list of molecular formula for specified class.
    • carbon_number(classe). -Returns a list of carbon number for specified class.
    • +Returns a list of carbon number for specified class.
    • atom_count(atom, classe). -Returns a list of atom count for specified class.
    • +Returns a list of atom count for specified class.
    • dbe(classe). -Returns a list of DBE for specified class.
    • +Returns a list of DBE for specified class.
    • atoms_ratio(classe, numerator, denominator). -Returns a list of atoms ratio for specified class.
    • +Returns a list of atoms ratio for specified class.
    • mz_exp(classe). -Returns a list of experimental mz for specified class.
    • +Returns a list of experimental mz for specified class.
    • abundance(classe). -Returns a list of abundance for specified class.
    • +Returns a list of abundance for specified class.
    • mz_error(classe). -Returns a list of mz error for specified class.
    • +Returns a list of mz error for specified class.
    • mz_calc(classe). -Returns a list of calculated mz for specified class.
    • +Returns a list of calculated mz for specified class.
    • peaks_count_percentile(classe). -Returns the peaks count percentile of a specific class.
    • +Returns the peaks count percentile of a specific class.
    • abundance_count_percentile(classe). -Returns the abundance percentile of a specific class.
    • +Returns the abundance percentile of a specific class.
    • mz_exp_assigned(). -Returns a list of experimental mz for all assigned classes.
    • +Returns a list of experimental mz for all assigned classes.
    • abundance_assigned(). -Returns a list of abundance for all classes.
    • +Returns a list of abundance for all classes.
    • mz_exp_all(). Returns a list of mz for all classes.
    @@ -1675,85 +1893,75 @@
    Methods
     98    def __init__(self, mass_spectrum, choose_molecular_formula=True):
    - 99
    -100        def sort_atoms_method( atom):
    -101            """ Sort atoms by order of appearance in the Atoms class"""
    -102            return [Atoms.atoms_order.index(atom)]
    -103
    -104        self._ms_grouped_class = dict()
    -105        
    -106        self.choose_mf = choose_molecular_formula
    -107        
    -108        #mapping for ms peaks without any molecular formula associated
    -109        self._ms_grouped_class[Labels.unassigned] = list()
    -110
    -111        self.total_peaks = 0
    -112
    -113        self.sum_abundance = 0
    -114
    -115        self.min_max_mz = (mass_spectrum.min_mz_exp, mass_spectrum.max_mz_exp)
    -116
    -117        self.min_max_abundance = (mass_spectrum.min_abundance, mass_spectrum.max_abundance)
    -118
    -119        self.min_ppm_error = mass_spectrum.molecular_search_settings.min_ppm_error 
    + 99        def sort_atoms_method(atom):
    +100            """Sort atoms by order of appearance in the Atoms class"""
    +101            return [Atoms.atoms_order.index(atom)]
    +102
    +103        self._ms_grouped_class = dict()
    +104
    +105        self.choose_mf = choose_molecular_formula
    +106
    +107        # mapping for ms peaks without any molecular formula associated
    +108        self._ms_grouped_class[Labels.unassigned] = list()
    +109
    +110        self.total_peaks = 0
    +111
    +112        self.sum_abundance = 0
    +113
    +114        self.min_max_mz = (mass_spectrum.min_mz_exp, mass_spectrum.max_mz_exp)
    +115
    +116        self.min_max_abundance = (
    +117            mass_spectrum.min_abundance,
    +118            mass_spectrum.max_abundance,
    +119        )
     120
    -121        self.max_ppm_error = mass_spectrum.molecular_search_settings.max_ppm_error
    +121        self.min_ppm_error = mass_spectrum.molecular_search_settings.min_ppm_error
     122
    -123        check_assign = False
    +123        self.max_ppm_error = mass_spectrum.molecular_search_settings.max_ppm_error
     124
    -125        all_used_atoms = set()
    +125        check_assign = False
     126
    -127        for ms_peak in mass_spectrum:
    -128            
    -129            self.total_peaks += 1
    -130
    -131            self.sum_abundance += ms_peak.abundance
    -132
    -133            if not ms_peak.is_assigned:
    -134
    +127        all_used_atoms = set()
    +128
    +129        for ms_peak in mass_spectrum:
    +130            self.total_peaks += 1
    +131
    +132            self.sum_abundance += ms_peak.abundance
    +133
    +134            if not ms_peak.is_assigned:
     135                self._ms_grouped_class.get(Labels.unassigned).append(ms_peak)
    -136                
    -137            else:    
    -138                
    -139                check_assign = True    
    -140
    -141                if choose_molecular_formula:
    -142                    
    -143                    mf = ms_peak.best_molecular_formula_candidate
    -144                    
    -145                    classes =  [mf.class_label]
    -146                    
    -147                    for atom in mf.atoms:
    -148                        
    -149                        all_used_atoms.add(atom)
    +136
    +137            else:
    +138                check_assign = True
    +139
    +140                if choose_molecular_formula:
    +141                    mf = ms_peak.best_molecular_formula_candidate
    +142
    +143                    classes = [mf.class_label]
    +144
    +145                    for atom in mf.atoms:
    +146                        all_used_atoms.add(atom)
    +147
    +148                else:
    +149                    classes = []
     150
    -151                else: 
    -152
    -153                    classes = []
    -154                    
    -155                    for mf in ms_peak:
    -156                        
    -157                        classes.append(mf.class_label)
    -158                        
    -159                        for atom in mf.atoms:
    -160                             
    -161                             all_used_atoms.add(atom)
    -162
    -163                for classe in classes:
    -164                    
    -165                    if classe in self._ms_grouped_class.keys():
    -166
    -167                        self._ms_grouped_class.get(classe).append(ms_peak)
    -168                    
    -169                    else:     
    -170
    -171                        self._ms_grouped_class[classe] = [ms_peak]
    -172
    -173        self.all_identified_atoms = sorted(all_used_atoms, key=sort_atoms_method)
    -174
    -175        if not check_assign:
    -176
    -177            raise Exception("No molecular formula associated with any mspeak objects")
    +151                    for mf in ms_peak:
    +152                        classes.append(mf.class_label)
    +153
    +154                        for atom in mf.atoms:
    +155                            all_used_atoms.add(atom)
    +156
    +157                for classe in classes:
    +158                    if classe in self._ms_grouped_class.keys():
    +159                        self._ms_grouped_class.get(classe).append(ms_peak)
    +160
    +161                    else:
    +162                        self._ms_grouped_class[classe] = [ms_peak]
    +163
    +164        self.all_identified_atoms = sorted(all_used_atoms, key=sort_atoms_method)
    +165
    +166        if not check_assign:
    +167            raise Exception("No molecular formula associated with any mspeak objects")
     
    @@ -1859,20 +2067,20 @@
    Methods
    -
    192    def get_classes(self, threshold_perc=1, isotopologue=True):
    -193        """ Return a list of classes with abundance percentile above threshold"""
    -194        classes = list()
    -195        for classe in self.keys():
    -196            if classe != Labels.unassigned:
    -197                if self.abundance_count_percentile(classe) > threshold_perc:
    -198                    
    -199                    if classe != Labels.unassigned:
    -200                        # access first molecular formula inside the first ms peak and check isotopologue
    -201                        if not isotopologue and self.get(classe)[0][0].is_isotopologue: continue
    -202                    
    -203                    classes.append(classe)
    -204        #TODO sort classes chemically here too
    -205        return classes
    +            
    181    def get_classes(self, threshold_perc=1, isotopologue=True):
    +182        """Return a list of classes with abundance percentile above threshold"""
    +183        classes = list()
    +184        for classe in self.keys():
    +185            if classe != Labels.unassigned:
    +186                if self.abundance_count_percentile(classe) > threshold_perc:
    +187                    if classe != Labels.unassigned:
    +188                        # access first molecular formula inside the first ms peak and check isotopologue
    +189                        if not isotopologue and self.get(classe)[0][0].is_isotopologue:
    +190                            continue
    +191
    +192                    classes.append(classe)
    +193        # TODO sort classes chemically here too
    +194        return classes
     
    @@ -1892,12 +2100,20 @@
    Methods
    -
    207    def molecular_formula_string(self, classe,):
    -208        """ Return a list of molecular formula string for specified class"""
    -209        if self.choose_mf:
    -210            return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]]
    -211        else:
    -212            return [mf for mspeak in self[classe] for mf in mspeak if mf.class_label == classe]
    +            
    196    def molecular_formula_string(
    +197        self,
    +198        classe,
    +199    ):
    +200        """Return a list of molecular formula string for specified class"""
    +201        if self.choose_mf:
    +202            return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]]
    +203        else:
    +204            return [
    +205                mf
    +206                for mspeak in self[classe]
    +207                for mf in mspeak
    +208                if mf.class_label == classe
    +209            ]
     
    @@ -1917,12 +2133,20 @@
    Methods
    -
    214    def molecular_formula(self, classe,):
    -215        """ Return a list of molecular formula for specified class"""
    +            
    211    def molecular_formula(
    +212        self,
    +213        classe,
    +214    ):
    +215        """Return a list of molecular formula for specified class"""
     216        if self.choose_mf:
     217            return [mspeak.best_molecular_formula_candidate for mspeak in self[classe]]
     218        else:
    -219            return [mf for mspeak in self[classe] for mf in mspeak if mf.class_label == classe]
    +219            return [
    +220                mf
    +221                for mspeak in self[classe]
    +222                for mf in mspeak
    +223                if mf.class_label == classe
    +224            ]
     
    @@ -1942,12 +2166,20 @@
    Methods
    -
    221    def carbon_number(self, classe):
    -222        """ Return a list of carbon number for specified class"""
    -223        if self.choose_mf:
    -224            return [mspeak.best_molecular_formula_candidate.get("C") for mspeak in self[classe]]
    -225        else:
    -226            return [mf.get('C') for mspeak in self[classe] for mf in mspeak if mf.class_label == classe]
    +            
    226    def carbon_number(self, classe):
    +227        """Return a list of carbon number for specified class"""
    +228        if self.choose_mf:
    +229            return [
    +230                mspeak.best_molecular_formula_candidate.get("C")
    +231                for mspeak in self[classe]
    +232            ]
    +233        else:
    +234            return [
    +235                mf.get("C")
    +236                for mspeak in self[classe]
    +237                for mf in mspeak
    +238                if mf.class_label == classe
    +239            ]
     
    @@ -1967,13 +2199,21 @@
    Methods
    -
    228    def atom_count(self, atom, classe):
    -229        """ Return a list of atom count for specified class"""
    -230
    -231        if self.choose_mf:
    -232            return [mspeak.best_molecular_formula_candidate.get(atom) for mspeak in self[classe]]
    -233        else:    
    -234            return [mf.get(atom) for mspeak in self[classe] for mf in mspeak if mf.class_label == classe]
    +            
    241    def atom_count(self, atom, classe):
    +242        """Return a list of atom count for specified class"""
    +243
    +244        if self.choose_mf:
    +245            return [
    +246                mspeak.best_molecular_formula_candidate.get(atom)
    +247                for mspeak in self[classe]
    +248            ]
    +249        else:
    +250            return [
    +251                mf.get(atom)
    +252                for mspeak in self[classe]
    +253                for mf in mspeak
    +254                if mf.class_label == classe
    +255            ]
     
    @@ -1993,12 +2233,19 @@
    Methods
    -
    236    def dbe(self, classe):
    -237        """ Return a list of DBE for specified class"""
    -238        if self.choose_mf:
    -239            return [mspeak.best_molecular_formula_candidate.dbe for mspeak in self[classe]]
    -240        else:    
    -241            return [mf.dbe for mspeak in self[classe] for mf in mspeak if mf.class_label == classe]
    +            
    257    def dbe(self, classe):
    +258        """Return a list of DBE for specified class"""
    +259        if self.choose_mf:
    +260            return [
    +261                mspeak.best_molecular_formula_candidate.dbe for mspeak in self[classe]
    +262            ]
    +263        else:
    +264            return [
    +265                mf.dbe
    +266                for mspeak in self[classe]
    +267                for mf in mspeak
    +268                if mf.class_label == classe
    +269            ]
     
    @@ -2018,9 +2265,12 @@
    Methods
    -
    243    def atoms_ratio(self, classe, numerator, denominator):
    -244        """ Return a list of atoms ratio for specified class"""
    -245        return [mf.get(numerator)/mf.get(denominator) for mf in self.molecular_formula(classe)]
    +            
    271    def atoms_ratio(self, classe, numerator, denominator):
    +272        """Return a list of atoms ratio for specified class"""
    +273        return [
    +274            mf.get(numerator) / mf.get(denominator)
    +275            for mf in self.molecular_formula(classe)
    +276        ]
     
    @@ -2040,15 +2290,18 @@
    Methods
    -
    247    def mz_exp(self, classe):
    -248        """ Return a list of experimental mz for specified class"""
    -249        if self.choose_mf or classe == Labels.unassigned:
    -250            
    -251            return [mspeak.mz_exp for mspeak in self[classe]]
    -252        
    -253        else:
    -254            
    -255            return [mspeak.mz_exp for mspeak in self[classe] for mf in mspeak if mf.class_label == classe]
    +            
    278    def mz_exp(self, classe):
    +279        """Return a list of experimental mz for specified class"""
    +280        if self.choose_mf or classe == Labels.unassigned:
    +281            return [mspeak.mz_exp for mspeak in self[classe]]
    +282
    +283        else:
    +284            return [
    +285                mspeak.mz_exp
    +286                for mspeak in self[classe]
    +287                for mf in mspeak
    +288                if mf.class_label == classe
    +289            ]
     
    @@ -2068,15 +2321,18 @@
    Methods
    -
    257    def abundance(self, classe):
    -258        """ Return a list of abundance for specified class"""
    -259        if self.choose_mf or classe == Labels.unassigned:
    -260            
    -261            return [mspeak.abundance for mspeak in self[classe]]
    -262        
    -263        else:
    -264            
    -265            return [mspeak.abundance for mspeak in self[classe] for mf in mspeak if mf.class_label == classe]
    +            
    291    def abundance(self, classe):
    +292        """Return a list of abundance for specified class"""
    +293        if self.choose_mf or classe == Labels.unassigned:
    +294            return [mspeak.abundance for mspeak in self[classe]]
    +295
    +296        else:
    +297            return [
    +298                mspeak.abundance
    +299                for mspeak in self[classe]
    +300                for mf in mspeak
    +301                if mf.class_label == classe
    +302            ]
     
    @@ -2096,17 +2352,22 @@
    Methods
    -
    267    def mz_error(self, classe):
    -268        """ Return a list of mz error for specified class"""
    -269        if classe != Labels.unassigned:
    -270            
    -271            if self.choose_mf:
    -272                
    -273                return [mspeak.best_molecular_formula_candidate.mz_error for mspeak in self[classe]]
    -274            
    -275            else:
    -276                
    -277                return [mf.mz_error for mspeak in self[classe] for mf in mspeak if mf.class_label == classe]
    +            
    304    def mz_error(self, classe):
    +305        """Return a list of mz error for specified class"""
    +306        if classe != Labels.unassigned:
    +307            if self.choose_mf:
    +308                return [
    +309                    mspeak.best_molecular_formula_candidate.mz_error
    +310                    for mspeak in self[classe]
    +311                ]
    +312
    +313            else:
    +314                return [
    +315                    mf.mz_error
    +316                    for mspeak in self[classe]
    +317                    for mf in mspeak
    +318                    if mf.class_label == classe
    +319                ]
     
    @@ -2126,15 +2387,21 @@
    Methods
    -
    279    def mz_calc(self, classe):
    -280        """ Return a list of calculated mz for specified class"""
    -281        if self.choose_mf:
    -282            
    -283            return [mspeak.best_molecular_formula_candidate.mz_calc for mspeak in self[classe]]
    -284        
    -285        else:
    -286            
    -287            return [mf.mz_calc for mspeak in self[classe] for mf in mspeak if mf.class_label == classe] 
    +            
    321    def mz_calc(self, classe):
    +322        """Return a list of calculated mz for specified class"""
    +323        if self.choose_mf:
    +324            return [
    +325                mspeak.best_molecular_formula_candidate.mz_calc
    +326                for mspeak in self[classe]
    +327            ]
    +328
    +329        else:
    +330            return [
    +331                mf.mz_calc
    +332                for mspeak in self[classe]
    +333                for mf in mspeak
    +334                if mf.class_label == classe
    +335            ]
     
    @@ -2154,9 +2421,9 @@
    Methods
    -
    289    def peaks_count_percentile(self, classe):
    -290        """ Return the peaks count percentile of a specific class"""
    -291        return (len(self[classe])/self.total_peaks)*100
    +            
    337    def peaks_count_percentile(self, classe):
    +338        """Return the peaks count percentile of a specific class"""
    +339        return (len(self[classe]) / self.total_peaks) * 100
     
    @@ -2176,9 +2443,11 @@
    Methods
    -
    293    def abundance_count_percentile(self, classe):
    -294        """ Return the abundance percentile of a specific class"""
    -295        return (sum([mspeak.abundance for mspeak in self[classe]]) / self.sum_abundance)*100
    +            
    341    def abundance_count_percentile(self, classe):
    +342        """Return the abundance percentile of a specific class"""
    +343        return (
    +344            sum([mspeak.abundance for mspeak in self[classe]]) / self.sum_abundance
    +345        ) * 100
     
    @@ -2198,11 +2467,16 @@
    Methods
    -
    297    def mz_exp_assigned(self):
    -298        """ Return a list of experimental mz for all assigned classes"""
    -299        classes = self.keys()
    -300
    -301        return [mspeak.mz_exp for classe in classes for mspeak in self[classe] if classe != Labels.unassigned]
    +            
    347    def mz_exp_assigned(self):
    +348        """Return a list of experimental mz for all assigned classes"""
    +349        classes = self.keys()
    +350
    +351        return [
    +352            mspeak.mz_exp
    +353            for classe in classes
    +354            for mspeak in self[classe]
    +355            if classe != Labels.unassigned
    +356        ]
     
    @@ -2222,11 +2496,16 @@
    Methods
    -
    303    def abundance_assigned(self):
    -304        """ Return a list of abundance for all classes """
    -305        classes = self.keys()
    -306            
    -307        return [mspeak.abundance for classe in classes for mspeak in self[classe] if classe != Labels.unassigned]
    +            
    358    def abundance_assigned(self):
    +359        """Return a list of abundance for all classes"""
    +360        classes = self.keys()
    +361
    +362        return [
    +363            mspeak.abundance
    +364            for classe in classes
    +365            for mspeak in self[classe]
    +366            if classe != Labels.unassigned
    +367        ]
     
    @@ -2246,11 +2525,13 @@
    Methods
    -
    309    def mz_exp_all(self):
    -310        """ Return a list of mz for all classes"""
    -311        classes = self.keys()
    -312        
    -313        return flatten_list([self.mz_exp(classe) for classe in classes if classe != Labels.unassigned])
    +            
    369    def mz_exp_all(self):
    +370        """Return a list of mz for all classes"""
    +371        classes = self.keys()
    +372
    +373        return flatten_list(
    +374            [self.mz_exp(classe) for classe in classes if classe != Labels.unassigned]
    +375        )
     
    @@ -2270,11 +2551,13 @@
    Methods
    -
    315    def mz_error_all(self):
    -316        """ Return a list of mz error for all classes"""
    -317        classes = self.keys()
    -318        
    -319        return flatten_list([self.mz_error(classe) for classe in classes if classe != Labels.unassigned])
    +            
    377    def mz_error_all(self):
    +378        """Return a list of mz error for all classes"""
    +379        classes = self.keys()
    +380
    +381        return flatten_list(
    +382            [self.mz_error(classe) for classe in classes if classe != Labels.unassigned]
    +383        )
     
    @@ -2294,11 +2577,17 @@
    Methods
    -
    321    def carbon_number_all(self):
    -322        """ Return a list of carbon number for all classes"""
    -323        classes = self.keys()
    -324            
    -325        return flatten_list([self.carbon_number(classe) for classe in classes if classe != Labels.unassigned])
    +            
    385    def carbon_number_all(self):
    +386        """Return a list of carbon number for all classes"""
    +387        classes = self.keys()
    +388
    +389        return flatten_list(
    +390            [
    +391                self.carbon_number(classe)
    +392                for classe in classes
    +393                if classe != Labels.unassigned
    +394            ]
    +395        )
     
    @@ -2318,11 +2607,13 @@
    Methods
    -
    327    def dbe_all(self):
    -328        """ Return a list of DBE for all classes"""
    -329        classes = self.keys()
    -330            
    -331        return flatten_list([self.dbe(classe) for classe in classes if classe != Labels.unassigned])
    +            
    397    def dbe_all(self):
    +398        """Return a list of DBE for all classes"""
    +399        classes = self.keys()
    +400
    +401        return flatten_list(
    +402            [self.dbe(classe) for classe in classes if classe != Labels.unassigned]
    +403        )
     
    @@ -2342,11 +2633,17 @@
    Methods
    -
    333    def atoms_ratio_all(self, numerator, denominator):
    -334        """ Return a list of atoms ratio for all classes"""
    -335        classes = self.keys()
    -336            
    -337        return flatten_list([self.atoms_ratio(classe, numerator, denominator) for classe in classes if classe != Labels.unassigned])
    +            
    405    def atoms_ratio_all(self, numerator, denominator):
    +406        """Return a list of atoms ratio for all classes"""
    +407        classes = self.keys()
    +408
    +409        return flatten_list(
    +410            [
    +411                self.atoms_ratio(classe, numerator, denominator)
    +412                for classe in classes
    +413                if classe != Labels.unassigned
    +414            ]
    +415        )
     
    @@ -2366,96 +2663,114 @@
    Methods
    -
    339    def to_dataframe(self, include_isotopologue=False, abundance_perc_threshold=5, include_unassigned=False):
    -340        """ Return a pandas dataframe with all the data from the class
    -341        
    -342        Parameters
    -343        ----------
    -344        include_isotopologue : bool, optional
    -345            Include isotopologues, by default False
    -346        abundance_perc_threshold : int, optional
    -347            Abundance percentile threshold, by default 5
    -348        include_unassigned : bool, optional
    -349            Include unassigned peaks, by default False
    -350        
    -351        Returns
    -352        -------
    -353        DataFrame
    -354            Pandas dataframe with all the data from the class
    -355        """
    -356        from pandas import DataFrame
    -357        
    -358        columns_labels = ['mz', 'calibrated_mz', 'calculated_m_z', 'abundance',
    -359                                'resolving_power', 'sn', 'ion_charge', 'mass_error',
    -360                                'DBE', 'class', 'HC', 'OC', 'ion_type','is_isotopologue',
    -361                                'class_abundance', 'class_count']
    -362
    -363        dict_data_list = []
    -364
    -365        for classe, list_mspeaks in self.items():
    -366
    -367            percent_abundance = self.abundance_count_percentile(classe)
    -368            
    -369            #ignores low abundant classes
    -370            if abundance_perc_threshold < abundance_perc_threshold: continue
    -371                
    -372            peaks_count_percentile = self.peaks_count_percentile(classe)
    -373
    -374            for ms_peak in list_mspeaks:
    -375                 
    -376                if ms_peak.is_assigned:
    -377                    
    -378                    for m_formula in ms_peak:
    -379                        
    -380                        #ignores isotopologues
    -381                        if not include_isotopologue and m_formula.is_isotopologue: continue
    -382                        
    -383                        formula_dict = m_formula.to_dict()
    -384
    -385                        dict_result = {'mz':  ms_peak._mz_exp,
    -386                                'calibrated_mz': ms_peak.mz_exp,
    -387                                'calculated_mz': m_formula.mz_calc,
    -388                                'abundance': ms_peak.abundance,
    -389                                'resolving_power': ms_peak.resolving_power,
    -390                                'sn':  ms_peak.signal_to_noise,
    -391                                'ion_charge': ms_peak.ion_charge,
    -392                                'mass_error': m_formula.mz_error,
    -393                                'DBE':  m_formula.dbe,
    -394                                'class': classe,
    -395                                'HC':  m_formula.H_C,
    -396                                'OC':  m_formula.O_C,
    -397                                'ion_type': str(m_formula.ion_type.lower().encode('utf-8')),
    -398                                'is_isotopologue': int(m_formula.is_isotopologue),
    -399                                'class_abundance': percent_abundance,
    -400                                'class_count': peaks_count_percentile
    -401                                }
    -402                        
    -403                        for atom in formula_dict.keys():
    -404                        
    -405                           dict_result[atom] = formula_dict.get(atom)
    -406
    -407                    dict_data_list.append(dict_result)
    -408
    -409                else:
    -410
    -411                    if not include_unassigned: continue
    -412
    -413                    dict_result = {'mz':  ms_peak._mz_exp,
    -414                                'calibrated_mz': ms_peak.mz_exp,
    -415                                'abundance': ms_peak.abundance,
    -416                                'resolving_power': ms_peak.resolving_power,
    -417                                'sn':  ms_peak.signal_to_noise,
    -418                                'ion_charge': ms_peak.ion_charge,
    -419                                'class': classe,
    -420                                'class_abundance': percent_abundance,
    -421                                'class_count': percent_abundance
    -422                                }
    -423                
    -424                    dict_data_list.append(dict_result)                
    -425
    -426        columns = columns_labels + self.all_identified_atoms
    -427
    -428        return DataFrame(dict_data_list, columns=columns)
    +            
    417    def to_dataframe(
    +418        self,
    +419        include_isotopologue=False,
    +420        abundance_perc_threshold=5,
    +421        include_unassigned=False,
    +422    ):
    +423        """Return a pandas dataframe with all the data from the class
    +424
    +425        Parameters
    +426        ----------
    +427        include_isotopologue : bool, optional
    +428            Include isotopologues, by default False
    +429        abundance_perc_threshold : int, optional
    +430            Abundance percentile threshold, by default 5
    +431        include_unassigned : bool, optional
    +432            Include unassigned peaks, by default False
    +433
    +434        Returns
    +435        -------
    +436        DataFrame
    +437            Pandas dataframe with all the data from the class
    +438        """
    +439        from pandas import DataFrame
    +440
    +441        columns_labels = [
    +442            "mz",
    +443            "calibrated_mz",
    +444            "calculated_m_z",
    +445            "abundance",
    +446            "resolving_power",
    +447            "sn",
    +448            "ion_charge",
    +449            "mass_error",
    +450            "DBE",
    +451            "class",
    +452            "HC",
    +453            "OC",
    +454            "ion_type",
    +455            "is_isotopologue",
    +456            "class_abundance",
    +457            "class_count",
    +458        ]
    +459
    +460        dict_data_list = []
    +461
    +462        for classe, list_mspeaks in self.items():
    +463            percent_abundance = self.abundance_count_percentile(classe)
    +464
    +465            # ignores low abundant classes
    +466            if abundance_perc_threshold < abundance_perc_threshold:
    +467                continue
    +468
    +469            peaks_count_percentile = self.peaks_count_percentile(classe)
    +470
    +471            for ms_peak in list_mspeaks:
    +472                if ms_peak.is_assigned:
    +473                    for m_formula in ms_peak:
    +474                        # ignores isotopologues
    +475                        if not include_isotopologue and m_formula.is_isotopologue:
    +476                            continue
    +477
    +478                        formula_dict = m_formula.to_dict()
    +479
    +480                        dict_result = {
    +481                            "mz": ms_peak._mz_exp,
    +482                            "calibrated_mz": ms_peak.mz_exp,
    +483                            "calculated_mz": m_formula.mz_calc,
    +484                            "abundance": ms_peak.abundance,
    +485                            "resolving_power": ms_peak.resolving_power,
    +486                            "sn": ms_peak.signal_to_noise,
    +487                            "ion_charge": ms_peak.ion_charge,
    +488                            "mass_error": m_formula.mz_error,
    +489                            "DBE": m_formula.dbe,
    +490                            "class": classe,
    +491                            "HC": m_formula.H_C,
    +492                            "OC": m_formula.O_C,
    +493                            "ion_type": str(m_formula.ion_type.lower().encode("utf-8")),
    +494                            "is_isotopologue": int(m_formula.is_isotopologue),
    +495                            "class_abundance": percent_abundance,
    +496                            "class_count": peaks_count_percentile,
    +497                        }
    +498
    +499                        for atom in formula_dict.keys():
    +500                            dict_result[atom] = formula_dict.get(atom)
    +501
    +502                    dict_data_list.append(dict_result)
    +503
    +504                else:
    +505                    if not include_unassigned:
    +506                        continue
    +507
    +508                    dict_result = {
    +509                        "mz": ms_peak._mz_exp,
    +510                        "calibrated_mz": ms_peak.mz_exp,
    +511                        "abundance": ms_peak.abundance,
    +512                        "resolving_power": ms_peak.resolving_power,
    +513                        "sn": ms_peak.signal_to_noise,
    +514                        "ion_charge": ms_peak.ion_charge,
    +515                        "class": classe,
    +516                        "class_abundance": percent_abundance,
    +517                        "class_count": percent_abundance,
    +518                    }
    +519
    +520                    dict_data_list.append(dict_result)
    +521
    +522        columns = columns_labels + self.all_identified_atoms
    +523
    +524        return DataFrame(dict_data_list, columns=columns)
     
    @@ -2492,49 +2807,59 @@
    Returns
    -
    430    def plot_ms_assigned_unassigned(self, assigned_color= 'b', unassigned_color = 'r'):
    -431        """ Plot stick mass spectrum of all classes
    -432        
    -433        Parameters
    -434        ----------
    -435        assigned_color : str, optional
    -436            Matplotlib color for the assigned peaks, by default "b"
    -437        unassigned_color : str, optional
    -438            Matplotlib color for the unassigned peaks, by default "r"
    -439        
    -440        Returns
    -441        -------
    -442        ax : matplotlib.axes
    -443            Matplotlib axes object
    -444        """
    -445        mz_assigned = self.mz_exp_assigned()
    -446        abundance_assigned = self.abundance_assigned()
    -447    
    -448        mz_not_assigned = self.mz_exp(Labels.unassigned)
    -449        abundance_not_assigned = self.abundance(Labels.unassigned)
    -450        
    -451        ax = plt.gca()
    -452
    -453        for plot_obj in ax.stem(mz_assigned,abundance_assigned, linefmt='-',  markerfmt=" ", label="Assigned"):
    -454        
    -455            plt.setp(plot_obj, 'color', assigned_color, 'linewidth', 2)
    -456        
    -457        for plot_obj in ax.stem(mz_not_assigned, abundance_not_assigned, linefmt='-', markerfmt=" ", label="Unassigned"):
    -458        
    -459            plt.setp(plot_obj, 'color', unassigned_color, 'linewidth', 2)
    -460        
    -461        ax.set_xlabel("$\t{m/z}$", fontsize=12)
    -462        ax.set_ylabel('Abundance', fontsize=12)
    -463        ax.tick_params(axis='both', which='major', labelsize=12)
    -464
    -465        ax.axes.spines['top'].set_visible(False)
    -466        ax.axes.spines['right'].set_visible(False)
    -467
    -468        ax.get_yaxis().set_visible(False)
    -469        ax.spines['left'].set_visible(False)
    -470        plt.legend()
    -471        
    -472        return ax    
    +            
    526    def plot_ms_assigned_unassigned(self, assigned_color="b", unassigned_color="r"):
    +527        """Plot stick mass spectrum of all classes
    +528
    +529        Parameters
    +530        ----------
    +531        assigned_color : str, optional
    +532            Matplotlib color for the assigned peaks, by default "b"
    +533        unassigned_color : str, optional
    +534            Matplotlib color for the unassigned peaks, by default "r"
    +535
    +536        Returns
    +537        -------
    +538        ax : matplotlib.axes
    +539            Matplotlib axes object
    +540        """
    +541        mz_assigned = self.mz_exp_assigned()
    +542        abundance_assigned = self.abundance_assigned()
    +543
    +544        mz_not_assigned = self.mz_exp(Labels.unassigned)
    +545        abundance_not_assigned = self.abundance(Labels.unassigned)
    +546
    +547        ax = plt.gca()
    +548
    +549        for plot_obj in ax.stem(
    +550            mz_assigned,
    +551            abundance_assigned,
    +552            linefmt="-",
    +553            markerfmt=" ",
    +554            label="Assigned",
    +555        ):
    +556            plt.setp(plot_obj, "color", assigned_color, "linewidth", 2)
    +557
    +558        for plot_obj in ax.stem(
    +559            mz_not_assigned,
    +560            abundance_not_assigned,
    +561            linefmt="-",
    +562            markerfmt=" ",
    +563            label="Unassigned",
    +564        ):
    +565            plt.setp(plot_obj, "color", unassigned_color, "linewidth", 2)
    +566
    +567        ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +568        ax.set_ylabel("Abundance", fontsize=12)
    +569        ax.tick_params(axis="both", which="major", labelsize=12)
    +570
    +571        ax.axes.spines["top"].set_visible(False)
    +572        ax.axes.spines["right"].set_visible(False)
    +573
    +574        ax.get_yaxis().set_visible(False)
    +575        ax.spines["left"].set_visible(False)
    +576        plt.legend()
    +577
    +578        return ax
     
    @@ -2570,40 +2895,40 @@
    Returns
    -
    474    def plot_mz_error(self, color= 'g'):
    -475        """ Plot mz error scatter plot of all classes
    -476        
    -477        Parameters
    -478        ----------
    -479        color : str, optional
    -480            Matplotlib color, by default "g"
    -481        
    -482        Returns
    -483        -------
    -484        ax : matplotlib.axes
    -485            Matplotlib axes object
    -486        """
    -487        ax = plt.gca()
    -488
    -489        mz_assigned = self.mz_exp_all()
    -490        mz_error= self.mz_error_all()
    -491        
    -492        ax.scatter( mz_assigned, mz_error, c=color)
    -493        
    -494        ax.set_xlabel("$\t{m/z}$", fontsize=12)
    -495        ax.set_ylabel('Error (ppm)', fontsize=12)
    -496        ax.tick_params(axis='both', which='major', labelsize=12)
    -497
    -498        ax.axes.spines['top'].set_visible(True)
    -499        ax.axes.spines['right'].set_visible(True)
    -500
    -501        ax.get_yaxis().set_visible(True)
    -502        ax.spines['left'].set_visible(True)
    -503
    -504        ax.set_xlim(self.min_max_mz)
    -505        ax.set_ylim(self.min_ppm_error , self.max_ppm_error)
    -506    
    -507        return ax
    +            
    580    def plot_mz_error(self, color="g"):
    +581        """Plot mz error scatter plot of all classes
    +582
    +583        Parameters
    +584        ----------
    +585        color : str, optional
    +586            Matplotlib color, by default "g"
    +587
    +588        Returns
    +589        -------
    +590        ax : matplotlib.axes
    +591            Matplotlib axes object
    +592        """
    +593        ax = plt.gca()
    +594
    +595        mz_assigned = self.mz_exp_all()
    +596        mz_error = self.mz_error_all()
    +597
    +598        ax.scatter(mz_assigned, mz_error, c=color)
    +599
    +600        ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +601        ax.set_ylabel("Error (ppm)", fontsize=12)
    +602        ax.tick_params(axis="both", which="major", labelsize=12)
    +603
    +604        ax.axes.spines["top"].set_visible(True)
    +605        ax.axes.spines["right"].set_visible(True)
    +606
    +607        ax.get_yaxis().set_visible(True)
    +608        ax.spines["left"].set_visible(True)
    +609
    +610        ax.set_xlim(self.min_max_mz)
    +611        ax.set_ylim(self.min_ppm_error, self.max_ppm_error)
    +612
    +613        return ax
     
    @@ -2637,47 +2962,47 @@
    Returns
    -
    509    def plot_mz_error_class(self, classe, color= 'g'):
    -510        """ Plot mz error scatter plot of a specific class
    -511
    -512        Parameters
    -513        ----------
    -514        classe : str
    -515            Class name
    -516        color : str, optional
    -517            Matplotlib color, by default "g"
    -518        
    -519        Returns
    -520        -------
    -521        ax : matplotlib.axes
    -522            Matplotlib axes object
    -523        
    -524        """
    -525        if classe != Labels.unassigned:
    -526            ax = plt.gca()
    -527            
    -528            abun_perc = self.abundance_count_percentile(classe)
    -529            mz_assigned = self.mz_exp(classe)
    -530            mz_error= self.mz_error(classe)
    -531            
    -532            ax.scatter( mz_assigned, mz_error, c=color)
    -533            
    -534            title = "%s, %.2f %%" % (classe, abun_perc)
    -535            ax.set_title(title)
    -536            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    -537            ax.set_ylabel('Error (ppm)', fontsize=12)
    -538            ax.tick_params(axis='both', which='major', labelsize=12)
    -539
    -540            ax.axes.spines['top'].set_visible(True)
    -541            ax.axes.spines['right'].set_visible(True)
    -542
    -543            ax.get_yaxis().set_visible(True)
    -544            ax.spines['left'].set_visible(True)
    -545
    -546            ax.set_xlim(self.min_max_mz)
    -547            ax.set_ylim(self.min_ppm_error , self.max_ppm_error)
    -548        
    -549            return ax   
    +            
    615    def plot_mz_error_class(self, classe, color="g"):
    +616        """Plot mz error scatter plot of a specific class
    +617
    +618        Parameters
    +619        ----------
    +620        classe : str
    +621            Class name
    +622        color : str, optional
    +623            Matplotlib color, by default "g"
    +624
    +625        Returns
    +626        -------
    +627        ax : matplotlib.axes
    +628            Matplotlib axes object
    +629
    +630        """
    +631        if classe != Labels.unassigned:
    +632            ax = plt.gca()
    +633
    +634            abun_perc = self.abundance_count_percentile(classe)
    +635            mz_assigned = self.mz_exp(classe)
    +636            mz_error = self.mz_error(classe)
    +637
    +638            ax.scatter(mz_assigned, mz_error, c=color)
    +639
    +640            title = "%s, %.2f %%" % (classe, abun_perc)
    +641            ax.set_title(title)
    +642            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +643            ax.set_ylabel("Error (ppm)", fontsize=12)
    +644            ax.tick_params(axis="both", which="major", labelsize=12)
    +645
    +646            ax.axes.spines["top"].set_visible(True)
    +647            ax.axes.spines["right"].set_visible(True)
    +648
    +649            ax.get_yaxis().set_visible(True)
    +650            ax.spines["left"].set_visible(True)
    +651
    +652            ax.set_xlim(self.min_max_mz)
    +653            ax.set_ylim(self.min_ppm_error, self.max_ppm_error)
    +654
    +655            return ax
     
    @@ -2713,49 +3038,50 @@
    Returns
    -
    551    def plot_ms_class(self, classe, color= 'g'):
    -552        """ Plot stick mass spectrum of a specific class
    -553        
    -554        Parameters
    -555        ----------
    -556        classe : str
    -557            Class name
    -558        color : str, optional
    -559            Matplotlib color, by default "g"
    -560        
    -561        Returns
    -562        -------
    -563        ax : matplotlib.axes
    -564            Matplotlib axes object
    -565        
    -566        """
    -567        if classe != Labels.unassigned:
    -568            ax = plt.gca()
    -569            
    -570            abun_perc = self.abundance_count_percentile(classe)
    -571            mz_assigned = self.mz_exp(classe)
    -572            abundance_assigned= self.abundance(classe)
    -573
    -574            for plot_obj in ax.stem( mz_assigned, abundance_assigned, linefmt='-',  markerfmt=" "):
    -575            
    -576                plt.setp(plot_obj, 'color', color, 'linewidth', 2)
    -577            
    -578            title = "%s, %.2f %%" % (classe, abun_perc)
    -579            ax.set_title(title)
    -580            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    -581            ax.set_ylabel('Abundance', fontsize=12)
    -582            ax.tick_params(axis='both', which='major', labelsize=12)
    -583
    -584            ax.axes.spines['top'].set_visible(False)
    -585            ax.axes.spines['right'].set_visible(False)
    -586
    -587            ax.get_yaxis().set_visible(False)
    -588            ax.spines['left'].set_visible(False)
    -589
    -590            ax.set_xlim(self.min_max_mz)
    -591            ax.set_ylim(self.min_max_abundance)
    -592        
    -593            return ax
    +            
    657    def plot_ms_class(self, classe, color="g"):
    +658        """Plot stick mass spectrum of a specific class
    +659
    +660        Parameters
    +661        ----------
    +662        classe : str
    +663            Class name
    +664        color : str, optional
    +665            Matplotlib color, by default "g"
    +666
    +667        Returns
    +668        -------
    +669        ax : matplotlib.axes
    +670            Matplotlib axes object
    +671
    +672        """
    +673        if classe != Labels.unassigned:
    +674            ax = plt.gca()
    +675
    +676            abun_perc = self.abundance_count_percentile(classe)
    +677            mz_assigned = self.mz_exp(classe)
    +678            abundance_assigned = self.abundance(classe)
    +679
    +680            for plot_obj in ax.stem(
    +681                mz_assigned, abundance_assigned, linefmt="-", markerfmt=" "
    +682            ):
    +683                plt.setp(plot_obj, "color", color, "linewidth", 2)
    +684
    +685            title = "%s, %.2f %%" % (classe, abun_perc)
    +686            ax.set_title(title)
    +687            ax.set_xlabel("$\t{m/z}$", fontsize=12)
    +688            ax.set_ylabel("Abundance", fontsize=12)
    +689            ax.tick_params(axis="both", which="major", labelsize=12)
    +690
    +691            ax.axes.spines["top"].set_visible(False)
    +692            ax.axes.spines["right"].set_visible(False)
    +693
    +694            ax.get_yaxis().set_visible(False)
    +695            ax.spines["left"].set_visible(False)
    +696
    +697            ax.set_xlim(self.min_max_mz)
    +698            ax.set_ylim(self.min_max_abundance)
    +699
    +700            return ax
     
    @@ -2791,59 +3117,60 @@
    Returns
    -
    595    def plot_van_krevelen(self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis"):
    -596        """ Plot Van Krevelen Diagram 
    -597        
    -598        Parameters
    -599        ----------
    -600        classe : str
    -601            Class name
    -602        max_hc : float, optional
    -603            Max H/C ratio, by default 2.5
    -604        max_oc : float, optional
    -605            Max O/C ratio, by default 2
    -606        ticks_number : int, optional
    -607            Number of ticks, by default 5
    -608        color : str, optional
    -609            Matplotlib color, by default "viridis"
    -610        
    -611        Returns
    -612        -------
    -613        ax : matplotlib.axes
    -614            Matplotlib axes object
    -615        abun_perc : float
    -616            Class percentile of the relative abundance
    -617        """
    -618        if classe != Labels.unassigned:
    -619
    -620            # get data 
    -621            abun_perc = self.abundance_count_percentile(classe)
    -622            hc = self.atoms_ratio(classe, "H", "C") 
    -623            oc = self.atoms_ratio(classe, "O", "C") 
    -624            abundance = self.abundance(classe)
    -625            
    -626            #plot data
    -627            ax = plt.gca()
    -628
    -629            ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color)
    -630
    -631            #ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
    -632            
    -633            title = "%s, %.2f %%" % (classe, abun_perc)
    -634            ax.set_title(title)
    -635            ax.set_xlabel("O/C", fontsize=16)
    -636            ax.set_ylabel('H/C', fontsize=16)
    -637            ax.tick_params(axis='both', which='major', labelsize=18)
    -638            ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True))
    -639            ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True))
    -640
    -641            # returns matplot axes obj and the class percentile of the relative abundance 
    -642            
    -643            return ax, abun_perc 
    +            
    702    def plot_van_krevelen(
    +703        self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color="viridis"
    +704    ):
    +705        """Plot Van Krevelen Diagram
    +706
    +707        Parameters
    +708        ----------
    +709        classe : str
    +710            Class name
    +711        max_hc : float, optional
    +712            Max H/C ratio, by default 2.5
    +713        max_oc : float, optional
    +714            Max O/C ratio, by default 2
    +715        ticks_number : int, optional
    +716            Number of ticks, by default 5
    +717        color : str, optional
    +718            Matplotlib color, by default "viridis"
    +719
    +720        Returns
    +721        -------
    +722        ax : matplotlib.axes
    +723            Matplotlib axes object
    +724        abun_perc : float
    +725            Class percentile of the relative abundance
    +726        """
    +727        if classe != Labels.unassigned:
    +728            # get data
    +729            abun_perc = self.abundance_count_percentile(classe)
    +730            hc = self.atoms_ratio(classe, "H", "C")
    +731            oc = self.atoms_ratio(classe, "O", "C")
    +732            abundance = self.abundance(classe)
    +733
    +734            # plot data
    +735            ax = plt.gca()
    +736
    +737            ax.scatter(oc, hc, c=abundance, alpha=0.5, cmap=color)
    +738
    +739            # ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
    +740
    +741            title = "%s, %.2f %%" % (classe, abun_perc)
    +742            ax.set_title(title)
    +743            ax.set_xlabel("O/C", fontsize=16)
    +744            ax.set_ylabel("H/C", fontsize=16)
    +745            ax.tick_params(axis="both", which="major", labelsize=18)
    +746            ax.set_xticks(linspace(0, max_oc, ticks_number, endpoint=True))
    +747            ax.set_yticks(linspace(0, max_hc, ticks_number, endpoint=True))
    +748
    +749            # returns matplot axes obj and the class percentile of the relative abundance
    +750
    +751            return ax, abun_perc
     
    -

    Plot Van Krevelen Diagram

    +

    Plot Van Krevelen Diagram

    Parameters
    @@ -2883,57 +3210,58 @@
    Returns
    -
    645    def plot_dbe_vs_carbon_number(self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis"):
    -646        """ Plot DBE vs Carbon Number
    -647        
    -648        Parameters
    -649        ----------
    -650        classe : str
    -651            Class name
    -652        max_c : int, optional
    -653            Max Carbon Number, by default 50
    -654        max_dbe : int, optional
    -655            Max DBE, by default 40
    -656        dbe_incr : int, optional
    -657            DBE increment, by default 5
    -658        c_incr : int, optional
    -659            Carbon Number increment, by default 10
    -660        color : str, optional
    -661            Matplotlib color, by default "viridis"
    -662
    -663        Returns
    -664        -------
    -665        ax : matplotlib.axes
    -666            Matplotlib axes object
    -667        abun_perc : float
    -668            Class percentile of the relative abundance
    -669        """
    -670        if classe != Labels.unassigned:
    -671
    -672            # get data 
    -673            abun_perc = self.abundance_count_percentile(classe)
    -674            carbon_number = self.carbon_number(classe)
    -675            dbe = self.dbe(classe)
    -676            abundance = self.abundance(classe)
    -677            
    -678            #plot data
    -679            ax = plt.gca()
    -680
    -681            ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color)
    -682
    -683            #ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
    -684            
    -685            title = "%s, %.2f %%" % (classe, abun_perc)
    -686            ax.set_title(title)
    -687            ax.set_xlabel("Carbon number", fontsize=16)
    -688            ax.set_ylabel('DBE', fontsize=16)
    -689            ax.tick_params(axis='both', which='major', labelsize=18)
    -690            ax.set_xticks(range(0, max_c, c_incr))
    -691            ax.set_yticks(range(0, max_dbe, dbe_incr))
    -692
    -693            # returns matplot axes obj and the class percentile of the relative abundance 
    -694            
    -695            return ax, abun_perc 
    +            
    753    def plot_dbe_vs_carbon_number(
    +754        self, classe, max_c=50, max_dbe=40, dbe_incr=5, c_incr=10, color="viridis"
    +755    ):
    +756        """Plot DBE vs Carbon Number
    +757
    +758        Parameters
    +759        ----------
    +760        classe : str
    +761            Class name
    +762        max_c : int, optional
    +763            Max Carbon Number, by default 50
    +764        max_dbe : int, optional
    +765            Max DBE, by default 40
    +766        dbe_incr : int, optional
    +767            DBE increment, by default 5
    +768        c_incr : int, optional
    +769            Carbon Number increment, by default 10
    +770        color : str, optional
    +771            Matplotlib color, by default "viridis"
    +772
    +773        Returns
    +774        -------
    +775        ax : matplotlib.axes
    +776            Matplotlib axes object
    +777        abun_perc : float
    +778            Class percentile of the relative abundance
    +779        """
    +780        if classe != Labels.unassigned:
    +781            # get data
    +782            abun_perc = self.abundance_count_percentile(classe)
    +783            carbon_number = self.carbon_number(classe)
    +784            dbe = self.dbe(classe)
    +785            abundance = self.abundance(classe)
    +786
    +787            # plot data
    +788            ax = plt.gca()
    +789
    +790            ax.scatter(carbon_number, dbe, c=abundance, alpha=0.5, cmap=color)
    +791
    +792            # ax.scatter(carbon_number, dbe, c=color, alpha=0.5)
    +793
    +794            title = "%s, %.2f %%" % (classe, abun_perc)
    +795            ax.set_title(title)
    +796            ax.set_xlabel("Carbon number", fontsize=16)
    +797            ax.set_ylabel("DBE", fontsize=16)
    +798            ax.tick_params(axis="both", which="major", labelsize=18)
    +799            ax.set_xticks(range(0, max_c, c_incr))
    +800            ax.set_yticks(range(0, max_dbe, dbe_incr))
    +801
    +802            # returns matplot axes obj and the class percentile of the relative abundance
    +803
    +804            return ax, abun_perc
     
    diff --git a/docs/corems/molecular_id/factory/lipid_molecular_metadata.html b/docs/corems/molecular_id/factory/lipid_molecular_metadata.html index f70a6dbb..32e306f5 100644 --- a/docs/corems/molecular_id/factory/lipid_molecular_metadata.html +++ b/docs/corems/molecular_id/factory/lipid_molecular_metadata.html @@ -98,7 +98,7 @@

    9@dataclass 10class LipidMetadata(MetaboliteMetadata): 11 """Dataclass for the Lipid Metadata -12 +12 13 Parameters 14 ---------- 15 name : str @@ -112,7 +112,7 @@

    23 structure_level : str 24 The structure level of the lipid, following the LIPID MAPS classification 25 lipid_summed_name : str -26 The summed name of the lipid, aka lipid species, +26 The summed name of the lipid, aka lipid species, 27 following the LIPID MAPS classification 28 lipid_subclass : str 29 The subclass of the lipid, following the LIPID MAPS classification @@ -151,7 +151,7 @@

    10@dataclass
     11class LipidMetadata(MetaboliteMetadata):
     12    """Dataclass for the Lipid Metadata
    -13    
    +13
     14    Parameters
     15    ----------
     16    name : str
    @@ -165,7 +165,7 @@ 

    24 structure_level : str 25 The structure level of the lipid, following the LIPID MAPS classification 26 lipid_summed_name : str -27 The summed name of the lipid, aka lipid species, +27 The summed name of the lipid, aka lipid species, 28 following the LIPID MAPS classification 29 lipid_subclass : str 30 The subclass of the lipid, following the LIPID MAPS classification @@ -204,7 +204,7 @@
    Parameters
  • structure_level (str): The structure level of the lipid, following the LIPID MAPS classification
  • lipid_summed_name (str): -The summed name of the lipid, aka lipid species, +The summed name of the lipid, aka lipid species, following the LIPID MAPS classification
  • lipid_subclass (str): The subclass of the lipid, following the LIPID MAPS classification
  • diff --git a/docs/corems/molecular_id/factory/molecularSQL.html b/docs/corems/molecular_id/factory/molecularSQL.html index e5ae4926..65de7299 100644 --- a/docs/corems/molecular_id/factory/molecularSQL.html +++ b/docs/corems/molecular_id/factory/molecularSQL.html @@ -223,775 +223,835 @@

      1import sys
    -  2sys.path.append(".")
    -  3import os
    -  4
    -  5from sqlalchemy import Numeric, create_engine, ForeignKey, Column, Integer, String, Float, func
    -  6from sqlalchemy.orm import backref, column_property, relationship
    -  7from sqlalchemy.ext.associationproxy import association_proxy
    -  8from sqlalchemy.sql.schema import UniqueConstraint
    -  9from sqlalchemy import exc
    - 10
    - 11
    - 12from sqlalchemy.ext.declarative import declarative_base, declared_attr
    - 13from sqlalchemy.ext.hybrid import hybrid_property, hybrid_method
    - 14from sqlalchemy.exc import SQLAlchemyError
    - 15from sqlalchemy.orm.session import sessionmaker
    - 16from sqlalchemy.sql.operators import exists
    - 17from sqlalchemy import event, and_
    - 18from sqlalchemy import func
    - 19
    - 20from corems.encapsulation.constant import Atoms, Labels
    - 21import json
    - 22from corems.encapsulation.factory.processingSetting import MolecularFormulaSearchSettings
    - 23from sqlalchemy.orm.scoping import scoped_session
    - 24from corems import chunks
    - 25import tqdm
    - 26
    - 27Base = declarative_base()
    - 28
    - 29class HeteroAtoms(Base):
    - 30    """ HeteroAtoms class for the heteroAtoms table in the SQLite database.
    - 31    
    - 32    Attributes
    - 33    ----------
    - 34    id : int
    - 35        The primary key for the table.
    - 36    name : str
    - 37        The name of the heteroAtoms class.
    - 38    halogensCount : int
    - 39        The number of halogens in the heteroAtoms class.
    - 40    carbonHydrogen : relationship
    - 41        The relationship to the carbonHydrogen table.
    - 42    
    - 43    Methods
    - 44    -------
    - 45    * __repr__()
    - 46        Returns the string representation of the object.
    - 47    * to_dict()
    - 48        Returns the heteroAtoms class as a dictionary.
    - 49    * halogens_count()
    - 50        Returns the number of halogens as a float.
    - 51    
    - 52
    - 53    """
    - 54    __tablename__ = 'heteroAtoms'
    - 55
    - 56    id = Column(Integer, primary_key=True,
    - 57                         unique = True,
    - 58                         nullable = False)
    - 59
    - 60    name = Column(String, unique=True, nullable=False)
    - 61
    - 62    halogensCount = Column(Integer, unique=False, nullable=False)
    - 63
    - 64    carbonHydrogen = relationship('CarbonHydrogen', secondary='molecularformula',  viewonly=True)
    - 65
    - 66    def __repr__(self):
    - 67        return '<HeteroAtoms Model {} class {}>'.format(self.id, self.name)      
    +  2
    +  3sys.path.append(".")
    +  4import json
    +  5import os
    +  6
    +  7import tqdm
    +  8from sqlalchemy import (
    +  9    Column,
    + 10    Float,
    + 11    ForeignKey,
    + 12    Integer,
    + 13    String,
    + 14    and_,
    + 15    create_engine,
    + 16    event,
    + 17    exc,
    + 18    func,
    + 19)
    + 20from sqlalchemy.exc import SQLAlchemyError
    + 21from sqlalchemy.ext.associationproxy import association_proxy
    + 22from sqlalchemy.ext.declarative import declarative_base
    + 23from sqlalchemy.ext.hybrid import hybrid_method, hybrid_property
    + 24from sqlalchemy.orm import backref, relationship
    + 25from sqlalchemy.orm.scoping import scoped_session
    + 26from sqlalchemy.orm.session import sessionmaker
    + 27from sqlalchemy.sql.operators import exists
    + 28from sqlalchemy.sql.schema import UniqueConstraint
    + 29
    + 30from corems.encapsulation.constant import Atoms, Labels
    + 31
    + 32Base = declarative_base()
    + 33
    + 34
    + 35class HeteroAtoms(Base):
    + 36    """HeteroAtoms class for the heteroAtoms table in the SQLite database.
    + 37
    + 38    Attributes
    + 39    ----------
    + 40    id : int
    + 41        The primary key for the table.
    + 42    name : str
    + 43        The name of the heteroAtoms class.
    + 44    halogensCount : int
    + 45        The number of halogens in the heteroAtoms class.
    + 46    carbonHydrogen : relationship
    + 47        The relationship to the carbonHydrogen table.
    + 48
    + 49    Methods
    + 50    -------
    + 51    * __repr__()
    + 52        Returns the string representation of the object.
    + 53    * to_dict()
    + 54        Returns the heteroAtoms class as a dictionary.
    + 55    * halogens_count()
    + 56        Returns the number of halogens as a float.
    + 57
    + 58
    + 59    """
    + 60
    + 61    __tablename__ = "heteroAtoms"
    + 62
    + 63    id = Column(Integer, primary_key=True, unique=True, nullable=False)
    + 64
    + 65    name = Column(String, unique=True, nullable=False)
    + 66
    + 67    halogensCount = Column(Integer, unique=False, nullable=False)
      68
    - 69    @hybrid_property
    - 70    def halogens_count(cls):
    - 71        """ Returns the number of halogens as a float."""
    - 72        return cls.halogensCount.cast(Float)
    - 73
    - 74    def to_dict(self):
    - 75        """ Returns the heteroAtoms class as a dictionary."""
    - 76        return json.loads(self.name)
    - 77
    - 78
    - 79class CarbonHydrogen(Base):
    - 80    """ CarbonHydrogen class for the carbonHydrogen table in the SQLite database.
    - 81    
    - 82    Attributes
    - 83    ----------
    - 84    id : int
    - 85        The primary key for the table.
    - 86    C : int
    - 87        The number of carbon atoms.
    - 88    H : int
    - 89        The number of hydrogen atoms.
    - 90    heteroAtoms : relationship
    - 91        The relationship to the heteroAtoms table.
    - 92    
    - 93    Methods
    - 94    -------
    - 95    * __repr__()
    - 96        Returns the string representation of the object.
    - 97    * mass()
    - 98        Returns the mass of the carbonHydrogen class as a float.
    - 99    * c()
    -100        Returns the number of carbon atoms as a float.
    -101    * h()
    -102        Returns the number of hydrogen atoms as a float.
    -103    * dbe()
    -104        Returns the double bond equivalent as a float.
    -105    
    -106    """
    -107
    -108    __tablename__ = 'carbonHydrogen'
    -109    __table_args__ = (UniqueConstraint('C', 'H', name='unique_c_h'), )
    -110
    -111    id = Column(Integer, primary_key=True,
    -112                unique=True,
    -113                nullable=False)
    + 69    carbonHydrogen = relationship(
    + 70        "CarbonHydrogen", secondary="molecularformula", viewonly=True
    + 71    )
    + 72
    + 73    def __repr__(self):
    + 74        return "<HeteroAtoms Model {} class {}>".format(self.id, self.name)
    + 75
    + 76    @hybrid_property
    + 77    def halogens_count(cls):
    + 78        """Returns the number of halogens as a float."""
    + 79        return cls.halogensCount.cast(Float)
    + 80
    + 81    def to_dict(self):
    + 82        """Returns the heteroAtoms class as a dictionary."""
    + 83        return json.loads(self.name)
    + 84
    + 85
    + 86class CarbonHydrogen(Base):
    + 87    """CarbonHydrogen class for the carbonHydrogen table in the SQLite database.
    + 88
    + 89    Attributes
    + 90    ----------
    + 91    id : int
    + 92        The primary key for the table.
    + 93    C : int
    + 94        The number of carbon atoms.
    + 95    H : int
    + 96        The number of hydrogen atoms.
    + 97    heteroAtoms : relationship
    + 98        The relationship to the heteroAtoms table.
    + 99
    +100    Methods
    +101    -------
    +102    * __repr__()
    +103        Returns the string representation of the object.
    +104    * mass()
    +105        Returns the mass of the carbonHydrogen class as a float.
    +106    * c()
    +107        Returns the number of carbon atoms as a float.
    +108    * h()
    +109        Returns the number of hydrogen atoms as a float.
    +110    * dbe()
    +111        Returns the double bond equivalent as a float.
    +112
    +113    """
     114
    -115    C = Column(Integer, nullable=False)
    -116
    -117    H = Column(Integer, nullable=False)
    -118
    -119    heteroAtoms = relationship("HeteroAtoms",
    -120                               secondary="molecularformula",
    -121                               viewonly=True
    -122                               )
    +115    __tablename__ = "carbonHydrogen"
    +116    __table_args__ = (UniqueConstraint("C", "H", name="unique_c_h"),)
    +117
    +118    id = Column(Integer, primary_key=True, unique=True, nullable=False)
    +119
    +120    C = Column(Integer, nullable=False)
    +121
    +122    H = Column(Integer, nullable=False)
     123
    -124    def __repr__(self):
    -125        """ Returns the string representation of the object."""
    -126        return '<CarbonHydrogen Model {} C{} H{}>'.format(self.id, self.C, self.H)                     
    +124    heteroAtoms = relationship(
    +125        "HeteroAtoms", secondary="molecularformula", viewonly=True
    +126    )
     127
    -128    @property
    -129    def mass(self):
    -130        """ Returns the mass of the carbonHydrogen class as a float."""
    -131        return (self.C * Atoms.atomic_masses.get('C')) + (self.H * Atoms.atomic_masses.get('H'))
    -132
    -133    @hybrid_property
    -134    def c(cls):
    -135        """ Returns the number of carbon atoms as a float."""
    -136        return cls.C.cast(Float)
    -137
    -138    @hybrid_property
    -139    def h(cls):
    -140        """ Returns the number of hydrogen atoms as a float."""
    -141        return cls.H.cast(Float)
    -142
    -143    @hybrid_property
    -144    def dbe(cls):
    -145        """ Returns the double bond equivalent as a float."""
    -146        # return cls.C.cast(Float) - (cls.H.cast(Float) / 2) + 1
    -147        return float(cls.C) - float(cls.H / 2) + 1
    +128    def __repr__(self):
    +129        """Returns the string representation of the object."""
    +130        return "<CarbonHydrogen Model {} C{} H{}>".format(self.id, self.C, self.H)
    +131
    +132    @property
    +133    def mass(self):
    +134        """Returns the mass of the carbonHydrogen class as a float."""
    +135        return (self.C * Atoms.atomic_masses.get("C")) + (
    +136            self.H * Atoms.atomic_masses.get("H")
    +137        )
    +138
    +139    @hybrid_property
    +140    def c(cls):
    +141        """Returns the number of carbon atoms as a float."""
    +142        return cls.C.cast(Float)
    +143
    +144    @hybrid_property
    +145    def h(cls):
    +146        """Returns the number of hydrogen atoms as a float."""
    +147        return cls.H.cast(Float)
     148
    -149# 264888.88 ms
    -150class MolecularFormulaLink(Base):
    -151    """ MolecularFormulaLink class for the molecularformula table in the SQLite database.
    -152
    -153    Attributes
    -154    ----------
    -155    heteroAtoms_id : int
    -156        The foreign key for the heteroAtoms table.
    -157    carbonHydrogen_id : int
    -158        The foreign key for the carbonHydrogen table.
    -159    mass : float
    -160        The mass of the molecular formula.
    -161    DBE : float
    -162        The double bond equivalent of the molecular formula.
    -163    carbonHydrogen : relationship
    -164        The relationship to the carbonHydrogen table.
    -165    heteroAtoms : relationship
    -166        The relationship to the heteroAtoms table.
    -167    C : association_proxy
    -168        The association proxy for the carbonHydrogen table.
    -169    H : association_proxy
    -170        The association proxy for the carbonHydrogen table.
    -171    classe : association_proxy
    -172        The association proxy for the heteroAtoms table.
    -173    
    -174    Methods
    -175    -------
    -176    * __repr__()
    -177        Returns the string representation of the object.
    -178    * to_dict()
    -179        Returns the molecular formula as a dictionary.
    -180    * formula_string()
    -181        Returns the molecular formula as a string.
    -182    * classe_string()
    -183        Returns the heteroAtoms class as a string.
    -184    * _adduct_mz(ion_charge, adduct_atom)
    -185        Returns the m/z of the adduct ion as a float.
    -186    * _protonated_mz(ion_charge)
    -187        Returns the m/z of the protonated ion as a float.
    -188    * _radical_mz(ion_charge)
    -189        Returns the m/z of the radical ion as a float.
    -190    
    -191
    -192
    -193    """
    -194    __tablename__ = 'molecularformula'
    -195    __table_args__ = (UniqueConstraint('heteroAtoms_id', 'carbonHydrogen_id', name='unique_molform'), )
    -196
    -197    # id = Column(Integer, primary_key=True,
    -198    #                    unique=True,
    -199    #                    nullable=False)
    -200
    -201    heteroAtoms_id = Column(Integer,
    -202                            ForeignKey('heteroAtoms.id'),
    -203                            primary_key=True)
    -204
    -205    carbonHydrogen_id = Column(Integer,
    -206                               ForeignKey('carbonHydrogen.id'), 
    -207                               primary_key=True)
    -208
    -209    mass = Column(Float)
    +149    @hybrid_property
    +150    def dbe(cls):
    +151        """Returns the double bond equivalent as a float."""
    +152        # return cls.C.cast(Float) - (cls.H.cast(Float) / 2) + 1
    +153        return float(cls.C) - float(cls.H / 2) + 1
    +154
    +155
    +156# 264888.88 ms
    +157class MolecularFormulaLink(Base):
    +158    """MolecularFormulaLink class for the molecularformula table in the SQLite database.
    +159
    +160    Attributes
    +161    ----------
    +162    heteroAtoms_id : int
    +163        The foreign key for the heteroAtoms table.
    +164    carbonHydrogen_id : int
    +165        The foreign key for the carbonHydrogen table.
    +166    mass : float
    +167        The mass of the molecular formula.
    +168    DBE : float
    +169        The double bond equivalent of the molecular formula.
    +170    carbonHydrogen : relationship
    +171        The relationship to the carbonHydrogen table.
    +172    heteroAtoms : relationship
    +173        The relationship to the heteroAtoms table.
    +174    C : association_proxy
    +175        The association proxy for the carbonHydrogen table.
    +176    H : association_proxy
    +177        The association proxy for the carbonHydrogen table.
    +178    classe : association_proxy
    +179        The association proxy for the heteroAtoms table.
    +180
    +181    Methods
    +182    -------
    +183    * __repr__()
    +184        Returns the string representation of the object.
    +185    * to_dict()
    +186        Returns the molecular formula as a dictionary.
    +187    * formula_string()
    +188        Returns the molecular formula as a string.
    +189    * classe_string()
    +190        Returns the heteroAtoms class as a string.
    +191    * _adduct_mz(ion_charge, adduct_atom)
    +192        Returns the m/z of the adduct ion as a float.
    +193    * _protonated_mz(ion_charge)
    +194        Returns the m/z of the protonated ion as a float.
    +195    * _radical_mz(ion_charge)
    +196        Returns the m/z of the radical ion as a float.
    +197
    +198
    +199
    +200    """
    +201
    +202    __tablename__ = "molecularformula"
    +203    __table_args__ = (
    +204        UniqueConstraint("heteroAtoms_id", "carbonHydrogen_id", name="unique_molform"),
    +205    )
    +206
    +207    # id = Column(Integer, primary_key=True,
    +208    #                    unique=True,
    +209    #                    nullable=False)
     210
    -211    DBE = Column(Float)
    +211    heteroAtoms_id = Column(Integer, ForeignKey("heteroAtoms.id"), primary_key=True)
     212
    -213    carbonHydrogen = relationship(CarbonHydrogen, backref=backref("heteroAtoms_assoc"))
    -214
    -215    heteroAtoms = relationship(HeteroAtoms, backref=backref("carbonHydrogen_assoc"))
    +213    carbonHydrogen_id = Column(
    +214        Integer, ForeignKey("carbonHydrogen.id"), primary_key=True
    +215    )
     216
    -217    C = association_proxy('carbonHydrogen', 'C')
    +217    mass = Column(Float)
     218
    -219    H = association_proxy('carbonHydrogen', 'H')
    +219    DBE = Column(Float)
     220
    -221    classe = association_proxy('heteroAtoms', 'name')
    +221    carbonHydrogen = relationship(CarbonHydrogen, backref=backref("heteroAtoms_assoc"))
     222
    -223    def to_dict(self):
    -224        """ Returns the molecular formula as a dictionary.
    -225        
    -226        Returns
    -227        -------
    -228        dict
    -229            The molecular formula as a dictionary.  
    -230        """
    -231        carbon = {'C': self.C, 'H': self.H}
    -232        classe = json.loads(self.classe)
    -233        if self.classe == '{"HC": ""}':
    -234            return {**carbon}
    -235        else:
    -236            return {**carbon, **classe}
    -237
    -238    @property
    -239    def formula_string(self):
    -240        """ Returns the molecular formula as a string."""
    -241        class_dict = self.to_dict()
    -242        class_str = ' '.join([atom + str(class_dict[atom]) for atom in class_dict.keys()])
    -243        return class_str.strip()
    -244
    -245    @property
    -246    def classe_string(self):
    -247        """ Returns the heteroAtoms class as a string."""
    -248        class_dict = json.loads(self.classe)
    -249        class_str = ' '.join([atom + str(class_dict[atom]) for atom in class_dict.keys()])
    -250        return class_str.strip()
    -251
    -252    @hybrid_method
    -253    def _adduct_mz(self, ion_charge, adduct_atom):
    -254        """ Returns the m/z of the adduct ion as a float."""
    -255        return (self.mass + (Atoms.atomic_masses.get(adduct_atom)) + (ion_charge * -1 * Atoms.electron_mass))/ abs(ion_charge)
    -256
    -257    @hybrid_method
    -258    def _protonated_mz(self, ion_charge):
    -259        """ Returns the m/z of the protonated ion as a float."""
    -260        return (self.mass + (ion_charge * Atoms.atomic_masses.get("H")) + (ion_charge * -1 * Atoms.electron_mass))/abs(ion_charge)
    -261
    -262    @hybrid_method
    -263    def _radical_mz(self, ion_charge):
    -264        """ Returns the m/z of the radical ion as a float."""
    -265        return (self.mass + (ion_charge * -1 * Atoms.electron_mass))/ abs(ion_charge)
    -266
    -267    def __repr__(self):
    -268        """ Returns the string representation of the object."""
    -269        return '<MolecularFormulaLink Model {}>'.format(self.formula_string)       
    -270
    -271
    -272class MolForm_SQL:
    -273    """ MolForm_SQL class for the SQLite database.
    -274    
    -275    Attributes
    -276    ----------
    -277    engine : sqlalchemy.engine.base.Engine
    -278        The SQLAlchemy engine.
    -279    session : sqlalchemy.orm.session.Session
    -280        The SQLAlchemy session.
    -281    type : str
    -282        The type of database.
    -283    chunks_count : int
    -284        The number of chunks to use when querying the database.
    -285    
    -286    Methods
    -287    -------
    -288    * __init__(url=None, echo=False)
    -289        Initializes the database.
    -290    * __exit__(exc_type, exc_val, exc_tb)
    -291        Closes the database.
    -292    * initiate_database(url, database_name)
    -293        Creates the database.
    -294    * commit()
    -295        Commits the session.
    -296    * init_engine(url)
    -297        Initializes the SQLAlchemy engine.
    -298    * __enter__()
    -299
    -300    * get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None)
    -301        Returns a dictionary of molecular formulas.
    -302    * check_entry(classe, ion_type, molecular_search_settings)
    -303        Checks if a molecular formula is in the database.
    -304    * get_all_classes()
    -305        Returns a list of all classes in the database.
    -306    * get_all()
    -307        Returns a list of all molecular formulas in the database.
    -308    * delete_entry(row)
    -309        Deletes a molecular formula from the database.
    -310    * purge(cls)
    -311        Deletes all molecular formulas from the database.
    -312    * clear_data()
    -313        Clears the database.
    -314    * close(commit=True)
    -315        Closes the database.
    -316    * add_engine_pidguard(engine)
    -317        Adds multiprocessing guards.
    -318    
    -319    """
    -320    def __init__(self, url=None, echo=False):
    -321
    -322        self.engine = self.init_engine(url)
    -323
    -324        self.add_engine_pidguard(self.engine)
    -325
    -326        session_factory = sessionmaker(bind=self.engine)
    -327
    -328        Session = scoped_session(session_factory)
    -329
    -330        self.session = session_factory()
    -331
    -332        Base.metadata.create_all(self.engine)
    -333
    -334        self.session.commit()
    -335
    -336    def __exit__(self, exc_type, exc_val, exc_tb):
    -337        """ Closes the database.
    -338        
    -339        Parameters
    -340        ----------
    -341        exc_type : str
    -342            The exception type.
    -343        exc_val : str
    -344            The exception value.
    -345        exc_tb : str
    -346            The exception traceback.
    -347        """
    -348        # make sure the dbconnection gets closed
    +223    heteroAtoms = relationship(HeteroAtoms, backref=backref("carbonHydrogen_assoc"))
    +224
    +225    C = association_proxy("carbonHydrogen", "C")
    +226
    +227    H = association_proxy("carbonHydrogen", "H")
    +228
    +229    classe = association_proxy("heteroAtoms", "name")
    +230
    +231    def to_dict(self):
    +232        """Returns the molecular formula as a dictionary.
    +233
    +234        Returns
    +235        -------
    +236        dict
    +237            The molecular formula as a dictionary.
    +238        """
    +239        carbon = {"C": self.C, "H": self.H}
    +240        classe = json.loads(self.classe)
    +241        if self.classe == '{"HC": ""}':
    +242            return {**carbon}
    +243        else:
    +244            return {**carbon, **classe}
    +245
    +246    @property
    +247    def formula_string(self):
    +248        """Returns the molecular formula as a string."""
    +249        class_dict = self.to_dict()
    +250        class_str = " ".join(
    +251            [atom + str(class_dict[atom]) for atom in class_dict.keys()]
    +252        )
    +253        return class_str.strip()
    +254
    +255    @property
    +256    def classe_string(self):
    +257        """Returns the heteroAtoms class as a string."""
    +258        class_dict = json.loads(self.classe)
    +259        class_str = " ".join(
    +260            [atom + str(class_dict[atom]) for atom in class_dict.keys()]
    +261        )
    +262        return class_str.strip()
    +263
    +264    @hybrid_method
    +265    def _adduct_mz(self, ion_charge, adduct_atom):
    +266        """Returns the m/z of the adduct ion as a float."""
    +267        return (
    +268            self.mass
    +269            + (Atoms.atomic_masses.get(adduct_atom))
    +270            + (ion_charge * -1 * Atoms.electron_mass)
    +271        ) / abs(ion_charge)
    +272
    +273    @hybrid_method
    +274    def _protonated_mz(self, ion_charge):
    +275        """Returns the m/z of the protonated ion as a float."""
    +276        return (
    +277            self.mass
    +278            + (ion_charge * Atoms.atomic_masses.get("H"))
    +279            + (ion_charge * -1 * Atoms.electron_mass)
    +280        ) / abs(ion_charge)
    +281
    +282    @hybrid_method
    +283    def _radical_mz(self, ion_charge):
    +284        """Returns the m/z of the radical ion as a float."""
    +285        return (self.mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge)
    +286
    +287    def __repr__(self):
    +288        """Returns the string representation of the object."""
    +289        return "<MolecularFormulaLink Model {}>".format(self.formula_string)
    +290
    +291
    +292class MolForm_SQL:
    +293    """MolForm_SQL class for the SQLite database.
    +294
    +295    Attributes
    +296    ----------
    +297    engine : sqlalchemy.engine.base.Engine
    +298        The SQLAlchemy engine.
    +299    session : sqlalchemy.orm.session.Session
    +300        The SQLAlchemy session.
    +301    type : str
    +302        The type of database.
    +303    chunks_count : int
    +304        The number of chunks to use when querying the database.
    +305
    +306    Methods
    +307    -------
    +308    * __init__(url=None, echo=False)
    +309        Initializes the database.
    +310    * __exit__(exc_type, exc_val, exc_tb)
    +311        Closes the database.
    +312    * initiate_database(url, database_name)
    +313        Creates the database.
    +314    * commit()
    +315        Commits the session.
    +316    * init_engine(url)
    +317        Initializes the SQLAlchemy engine.
    +318    * __enter__()
    +319
    +320    * get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None)
    +321        Returns a dictionary of molecular formulas.
    +322    * check_entry(classe, ion_type, molecular_search_settings)
    +323        Checks if a molecular formula is in the database.
    +324    * get_all_classes()
    +325        Returns a list of all classes in the database.
    +326    * get_all()
    +327        Returns a list of all molecular formulas in the database.
    +328    * delete_entry(row)
    +329        Deletes a molecular formula from the database.
    +330    * purge(cls)
    +331        Deletes all molecular formulas from the database.
    +332    * clear_data()
    +333        Clears the database.
    +334    * close(commit=True)
    +335        Closes the database.
    +336    * add_engine_pidguard(engine)
    +337        Adds multiprocessing guards.
    +338
    +339    """
    +340
    +341    def __init__(self, url=None, echo=False):
    +342        self.engine = self.init_engine(url)
    +343
    +344        self.add_engine_pidguard(self.engine)
    +345
    +346        session_factory = sessionmaker(bind=self.engine)
    +347
    +348        Session = scoped_session(session_factory)
     349
    -350        self.commit()
    -351        self.session.close()
    -352        self.engine.dispose()
    +350        self.session = session_factory()
    +351
    +352        Base.metadata.create_all(self.engine)
     353
    -354    def initiate_database(self, url, database_name):  #CREATION
    -355        """ Creates the database.
    -356        
    -357        Parameters
    -358        ----------
    -359        url : str
    -360            The URL for the database.
    -361        database_name : str
    -362            The name of the database.
    -363        """
    -364        engine = create_engine(url)
    -365        conn = engine.connect()
    -366        conn.execute("commit")
    -367        conn.execute("create database " + database_name)
    -368        conn.close()
    +354        self.session.commit()
    +355
    +356    def __exit__(self, exc_type, exc_val, exc_tb):
    +357        """Closes the database.
    +358
    +359        Parameters
    +360        ----------
    +361        exc_type : str
    +362            The exception type.
    +363        exc_val : str
    +364            The exception value.
    +365        exc_tb : str
    +366            The exception traceback.
    +367        """
    +368        # make sure the dbconnection gets closed
     369
    -370    def commit(self):
    -371        """ Commits the session.
    -372        """
    -373        try:
    -374            self.session.commit()  
    -375        except SQLAlchemyError as e:
    -376            self.session.rollback()
    -377            print(str(e))
    -378
    -379    def init_engine(self, url):
    -380        """ Initializes the SQLAlchemy engine.
    -381        
    -382        Parameters
    -383        ----------
    -384        url : str
    -385            The URL for the database.
    -386        
    -387        Returns
    -388        -------
    -389        sqlalchemy.engine.base.Engine
    -390            The SQLAlchemy engine.
    -391        
    -392        """
    -393        if not url or url == 'None' or url == 'False':
    -394            directory = os.getcwd()
    -395
    -396            if not os.path.isdir(directory + '/db'):
    -397                os.mkdir(directory + '/db')
    -398
    -399            url = 'sqlite:///{DB}/db/molformulas.sqlite'.format(DB=directory)
    +370        self.commit()
    +371        self.session.close()
    +372        self.engine.dispose()
    +373
    +374    def initiate_database(self, url, database_name):  # CREATION
    +375        """Creates the database.
    +376
    +377        Parameters
    +378        ----------
    +379        url : str
    +380            The URL for the database.
    +381        database_name : str
    +382            The name of the database.
    +383        """
    +384        engine = create_engine(url)
    +385        conn = engine.connect()
    +386        conn.execute("commit")
    +387        conn.execute("create database " + database_name)
    +388        conn.close()
    +389
    +390    def commit(self):
    +391        """Commits the session."""
    +392        try:
    +393            self.session.commit()
    +394        except SQLAlchemyError as e:
    +395            self.session.rollback()
    +396            print(str(e))
    +397
    +398    def init_engine(self, url):
    +399        """Initializes the SQLAlchemy engine.
     400
    -401        if url[0:6] == 'sqlite':
    -402            self.type = 'sqlite'
    -403        else:
    -404            self.type = 'normal'
    -405            
    -406        if url[0:6] == 'sqlite':
    -407            engine = create_engine(url, echo = False)
    -408            self.chunks_count = 50
    -409        
    -410        elif url[0:10] == 'postgresql' or url[0:8] == 'postgres':
    -411            #postgresql
    -412            self.chunks_count = 50000
    -413            engine = create_engine(url, echo = False, isolation_level="AUTOCOMMIT")
    -414        
    -415        return engine# poolclass=NullPool
    -416
    -417    def __enter__(self):
    -418        """ Returns the object.
    -419        """
    -420        return self
    -421    
    -422    def get_dict_by_classes(self, classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None):
    -423        """ Returns a dictionary of molecular formulas.
    -424        
    -425        Parameters
    -426        ----------
    -427        classes : list
    -428            The list of classes.
    -429        ion_type : str
    -430            The ion type.
    -431        nominal_mzs : list
    -432            The list of nominal m/z values.
    -433        ion_charge : int
    -434            The ion charge.
    -435        molecular_search_settings : MolecularFormulaSearchSettings
    -436            The molecular formula search settings.
    -437        adducts : list, optional
    -438            The list of adducts. Default is None.
    -439        
    -440        Returns
    -441        -------
    -442        dict
    -443            The dictionary of molecular formulas.
    -444        
    -445        Notes
    -446        -----
    -447        Known issue, when using SQLite:
    -448        if the number of classes and nominal_m/zs are higher than 999 the query will fail
    -449        Solution: use postgres or split query
    -450        """                     
    -451         
    -452        def query_normal(class_list, len_adduct):
    -453            """ query for normal database
    -454            
    -455            Parameters
    -456            ----------
    -457            class_list : list
    -458                The list of classes.
    -459            len_adduct : int
    -460                The length of the adduct.
    -461            
    -462            Returns
    -463            -------
    -464            sqlalchemy.orm.query.Query
    -465                The query.
    -466            """
    -467            base_query = self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms)\
    -468                                .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id)\
    -469                                .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id)
    -470            
    -471            return base_query.filter(
    -472                and_(
    -473                    HeteroAtoms.name.in_(class_list), 
    -474                    and_(
    -475                        MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe, 
    -476                        MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe, 
    -477                        and_(
    -478                            ((CarbonHydrogen.h + HeteroAtoms.halogens_count - len_adduct) / CarbonHydrogen.c) >= molecular_search_settings.min_hc_filter,
    -479                            ((CarbonHydrogen.h + HeteroAtoms.halogens_count - len_adduct) / CarbonHydrogen.c) <= molecular_search_settings.max_hc_filter,
    -480                            CarbonHydrogen.C >= molecular_search_settings.usedAtoms.get("C")[0],
    -481                            CarbonHydrogen.c <= molecular_search_settings.usedAtoms.get("C")[1],
    -482                            CarbonHydrogen.h >= molecular_search_settings.usedAtoms.get("H")[0],
    -483                            CarbonHydrogen.h <= molecular_search_settings.usedAtoms.get("H")[1],
    -484                        )
    -485                    )
    -486                )
    -487            )
    -488
    -489        def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None):
    -490            """ add molecular formula to dict
    -491            
    -492            Parameters
    -493            ----------
    -494            formulas : sqlalchemy.orm.query.Query
    -495                The query.
    -496            ion_type : str
    -497                The ion type.
    -498            ion_charge : int
    -499                The ion charge.
    -500            adduct_atom : str, optional
    -501                The adduct atom. Default is None.
    -502            
    -503            Returns
    -504            -------
    -505            dict
    -506                The dictionary of molecular formulas.
    -507            
    -508            """
    -509            "organize data by heteroatom classes"
    -510            dict_res = {}
    -511
    -512            def nominal_mass_by_ion_type(formula_obj):
    -513                
    -514                if ion_type == Labels.protonated_de_ion:
    -515                
    -516                    return int(formula_obj._protonated_mz(ion_charge))
    -517                
    -518                elif ion_type == Labels.radical_ion:
    -519                    
    -520                    return int(formula_obj._radical_mz(ion_charge))
    -521
    -522                elif ion_type == Labels.adduct_ion and adduct_atom:
    -523                    
    -524                    return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
    -525            
    -526            for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database"):
    -527                
    -528                nominal_mz = nominal_mass_by_ion_type(formula_obj)
    -529                
    -530                if self.type != 'normal':
    -531                    if not nominal_mz in nominal_mzs:
    -532                        continue
    -533                classe = classe_obj.name
    -534
    -535                # classe_str = formula.classe_string
    -536                
    -537                # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str)
    -538                
    -539                formula_dict = formula_obj.to_dict()
    -540
    -541                if formula_dict.get("O"):
    -542                    
    -543                    if formula_dict.get("O") / formula_dict.get("C") >= molecular_search_settings.max_oc_filter:
    -544                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter)
    -545                        continue
    -546                    elif formula_dict.get("O") / formula_dict.get("C") <= molecular_search_settings.min_oc_filter:
    -547                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter)
    -548                        continue
    -549                    #if formula_dict.get("P"):
    +401        Parameters
    +402        ----------
    +403        url : str
    +404            The URL for the database.
    +405
    +406        Returns
    +407        -------
    +408        sqlalchemy.engine.base.Engine
    +409            The SQLAlchemy engine.
    +410
    +411        """
    +412        if not url or url == "None" or url == "False":
    +413            directory = os.getcwd()
    +414
    +415            if not os.path.isdir(directory + "/db"):
    +416                os.mkdir(directory + "/db")
    +417
    +418            url = "sqlite:///{DB}/db/molformulas.sqlite".format(DB=directory)
    +419
    +420        if url[0:6] == "sqlite":
    +421            self.type = "sqlite"
    +422        else:
    +423            self.type = "normal"
    +424
    +425        if url[0:6] == "sqlite":
    +426            engine = create_engine(url, echo=False)
    +427            self.chunks_count = 50
    +428
    +429        elif url[0:10] == "postgresql" or url[0:8] == "postgres":
    +430            # postgresql
    +431            self.chunks_count = 50000
    +432            engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT")
    +433
    +434        return engine  # poolclass=NullPool
    +435
    +436    def __enter__(self):
    +437        """Returns the object."""
    +438        return self
    +439
    +440    def get_dict_by_classes(
    +441        self,
    +442        classes,
    +443        ion_type,
    +444        nominal_mzs,
    +445        ion_charge,
    +446        molecular_search_settings,
    +447        adducts=None,
    +448    ):
    +449        """Returns a dictionary of molecular formulas.
    +450
    +451        Parameters
    +452        ----------
    +453        classes : list
    +454            The list of classes.
    +455        ion_type : str
    +456            The ion type.
    +457        nominal_mzs : list
    +458            The list of nominal m/z values.
    +459        ion_charge : int
    +460            The ion charge.
    +461        molecular_search_settings : MolecularFormulaSearchSettings
    +462            The molecular formula search settings.
    +463        adducts : list, optional
    +464            The list of adducts. Default is None.
    +465
    +466        Returns
    +467        -------
    +468        dict
    +469            The dictionary of molecular formulas.
    +470
    +471        Notes
    +472        -----
    +473        Known issue, when using SQLite:
    +474        if the number of classes and nominal_m/zs are higher than 999 the query will fail
    +475        Solution: use postgres or split query
    +476        """
    +477
    +478        def query_normal(class_list, len_adduct):
    +479            """query for normal database
    +480
    +481            Parameters
    +482            ----------
    +483            class_list : list
    +484                The list of classes.
    +485            len_adduct : int
    +486                The length of the adduct.
    +487
    +488            Returns
    +489            -------
    +490            sqlalchemy.orm.query.Query
    +491                The query.
    +492            """
    +493            base_query = (
    +494                self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms)
    +495                .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id)
    +496                .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id)
    +497            )
    +498
    +499            return base_query.filter(
    +500                and_(
    +501                    HeteroAtoms.name.in_(class_list),
    +502                    and_(
    +503                        MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe,
    +504                        MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe,
    +505                        and_(
    +506                            (
    +507                                (
    +508                                    CarbonHydrogen.h
    +509                                    + HeteroAtoms.halogens_count
    +510                                    - len_adduct
    +511                                )
    +512                                / CarbonHydrogen.c
    +513                            )
    +514                            >= molecular_search_settings.min_hc_filter,
    +515                            (
    +516                                (
    +517                                    CarbonHydrogen.h
    +518                                    + HeteroAtoms.halogens_count
    +519                                    - len_adduct
    +520                                )
    +521                                / CarbonHydrogen.c
    +522                            )
    +523                            <= molecular_search_settings.max_hc_filter,
    +524                            CarbonHydrogen.C
    +525                            >= molecular_search_settings.usedAtoms.get("C")[0],
    +526                            CarbonHydrogen.c
    +527                            <= molecular_search_settings.usedAtoms.get("C")[1],
    +528                            CarbonHydrogen.h
    +529                            >= molecular_search_settings.usedAtoms.get("H")[0],
    +530                            CarbonHydrogen.h
    +531                            <= molecular_search_settings.usedAtoms.get("H")[1],
    +532                        ),
    +533                    ),
    +534                )
    +535            )
    +536
    +537        def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None):
    +538            """add molecular formula to dict
    +539
    +540            Parameters
    +541            ----------
    +542            formulas : sqlalchemy.orm.query.Query
    +543                The query.
    +544            ion_type : str
    +545                The ion type.
    +546            ion_charge : int
    +547                The ion charge.
    +548            adduct_atom : str, optional
    +549                The adduct atom. Default is None.
     550
    -551                    #    if  not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter:
    -552                            
    -553                    #        continue
    -554        
    -555                if classe in dict_res.keys():
    -556                    
    -557                    if nominal_mz in dict_res[classe].keys():
    -558                        
    -559                        dict_res.get(classe).get(nominal_mz).append(formula_obj)
    -560                    
    -561                    else:
    -562
    -563                        dict_res.get(classe)[nominal_mz] = [formula_obj ]  
    -564            
    -565                else:
    -566                    
    -567                    dict_res[classe] = {nominal_mz: [formula_obj] }     
    -568            
    -569            return dict_res
    -570        
    -571        
    -572        len_adducts = 0
    -573        if ion_type == Labels.adduct_ion:
    -574            len_adducts = 1
    -575        
    -576        query = query_normal(classes, len_adducts)
    -577        
    -578        if ion_type == Labels.protonated_de_ion:
    -579            if self.type == 'normal':
    -580                
    -581                query = query.filter(
    -582                                func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_(nominal_mzs)
    -583                                )
    -584                                
    -585                                
    -586            return add_dict_formula(query, ion_type, ion_charge)
    -587        
    -588        if ion_type == Labels.radical_ion:
    -589            if self.type == 'normal':
    -590                query = query.filter(func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_(nominal_mzs))
    -591            return add_dict_formula(query, ion_type, ion_charge)
    -592        
    -593        if ion_type == Labels.adduct_ion:
    -594            dict_res = {}
    -595            if adducts: 
    -596                for atom in adducts:
    -597                    if self.type == 'normal':
    -598                        query = query.filter(func.floor(MolecularFormulaLink._adduct_mz(ion_charge, atom)).in_(nominal_mzs))    
    -599                    dict_res[atom] = add_dict_formula(query, ion_type, ion_charge, adduct_atom=atom)
    -600                return dict_res
    -601        # dump all objs to memory
    -602        self.session.expunge_all()
    -603        
    -604    def check_entry(self,classe, ion_type, molecular_search_settings):
    -605        """ Checks if a molecular formula is in the database.
    -606
    -607        Parameters
    -608        ----------
    -609        classe : str
    -610            The class of the molecular formula.
    -611        ion_type : str
    -612            The ion type.
    -613        molecular_search_settings : MolecularFormulaSearchSettings
    -614            The molecular formula search settings.
    -615        
    -616        Returns
    -617        -------
    -618        sqlalchemy.orm.query.Query
    -619            The query.
    -620        """
    -621        #  get all classes, ion_type, ion charge as str add to a dict or list
    -622        #  then check if class in database
    -623        has_class = self.session.query(exists().where(
    -624            (MolecularFormulaLink.classe == classe)))
    -625        
    -626        return has_class
    -627    
    -628    def get_all_classes(self):
    -629        """ Returns a list of all classes in the database."""
    -630        query = self.session.query(MolecularFormulaLink.classe.distinct().label("classe"))
    -631        
    -632        return query.all()  
    -633    
    -634    def get_all(self,):
    -635        """ Returns a list of all molecular formulas in the database."""
    -636        mol_formulas = self.session.query(MolecularFormulaLink).all()
    -637        
    -638        return mol_formulas
    -639
    -640    def delete_entry(self, row):
    -641        """ Deletes a molecular formula from the database."""
    -642        try:
    -643            self.session.delete(row)  
    -644            self.session.commit()  
    -645        
    -646        except SQLAlchemyError as e:
    -647            self.session.rollback()
    -648            print(str(e))
    -649
    -650    def purge(self, cls):
    -651        """ Deletes all molecular formulas from the database.
    -652        
    -653        Notes 
    -654        -------
    -655        Careful, this will delete the entire database table
    -656
    -657        """
    -658        self.session.query(cls).delete()
    -659        self.session.commit()  
    -660
    -661    def clear_data(self):
    -662        """ Clears the database.
    -663        """
    -664        meta = Base.metadata
    -665        for table in reversed(meta.sorted_tables):
    -666            print ('Clear table %s' % table)
    -667            self.session.execute(table.delete())
    -668        self.session.commit()
    -669
    -670    def close(self, commit=True):
    -671        """ Closes the database.
    -672        
    -673        Parameters
    -674        ----------
    -675        commit : bool, optional
    -676            Whether to commit the session. Default is True.
    -677        """
    -678        # make sure the dbconnection gets closed
    -679        
    -680        if commit: self.commit()
    -681        self.session.close()
    -682        self.engine.dispose()    
    -683   
    -684    def add_engine_pidguard(self, engine):
    -685        """ Adds multiprocessing guards.
    -686        
    -687        Forces a connection to be reconnected if it is detected
    -688        as having been shared to a sub-process.
    +551            Returns
    +552            -------
    +553            dict
    +554                The dictionary of molecular formulas.
    +555
    +556            """
    +557            "organize data by heteroatom classes"
    +558            dict_res = {}
    +559
    +560            def nominal_mass_by_ion_type(formula_obj):
    +561                if ion_type == Labels.protonated_de_ion:
    +562                    return int(formula_obj._protonated_mz(ion_charge))
    +563
    +564                elif ion_type == Labels.radical_ion:
    +565                    return int(formula_obj._radical_mz(ion_charge))
    +566
    +567                elif ion_type == Labels.adduct_ion and adduct_atom:
    +568                    return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
    +569
    +570            for formula_obj, ch_obj, classe_obj in tqdm.tqdm(
    +571                formulas, desc="Loading molecular formula database"
    +572            ):
    +573                nominal_mz = nominal_mass_by_ion_type(formula_obj)
    +574
    +575                if self.type != "normal":
    +576                    if not nominal_mz in nominal_mzs:
    +577                        continue
    +578                classe = classe_obj.name
    +579
    +580                # classe_str = formula.classe_string
    +581
    +582                # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str)
    +583
    +584                formula_dict = formula_obj.to_dict()
    +585
    +586                if formula_dict.get("O"):
    +587                    if (
    +588                        formula_dict.get("O") / formula_dict.get("C")
    +589                        >= molecular_search_settings.max_oc_filter
    +590                    ):
    +591                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter)
    +592                        continue
    +593                    elif (
    +594                        formula_dict.get("O") / formula_dict.get("C")
    +595                        <= molecular_search_settings.min_oc_filter
    +596                    ):
    +597                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter)
    +598                        continue
    +599                    # if formula_dict.get("P"):
    +600
    +601                    #    if  not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter:
    +602
    +603                    #        continue
    +604
    +605                if classe in dict_res.keys():
    +606                    if nominal_mz in dict_res[classe].keys():
    +607                        dict_res.get(classe).get(nominal_mz).append(formula_obj)
    +608
    +609                    else:
    +610                        dict_res.get(classe)[nominal_mz] = [formula_obj]
    +611
    +612                else:
    +613                    dict_res[classe] = {nominal_mz: [formula_obj]}
    +614
    +615            return dict_res
    +616
    +617        len_adducts = 0
    +618        if ion_type == Labels.adduct_ion:
    +619            len_adducts = 1
    +620
    +621        query = query_normal(classes, len_adducts)
    +622
    +623        if ion_type == Labels.protonated_de_ion:
    +624            if self.type == "normal":
    +625                query = query.filter(
    +626                    func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_(
    +627                        nominal_mzs
    +628                    )
    +629                )
    +630
    +631            return add_dict_formula(query, ion_type, ion_charge)
    +632
    +633        if ion_type == Labels.radical_ion:
    +634            if self.type == "normal":
    +635                query = query.filter(
    +636                    func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_(
    +637                        nominal_mzs
    +638                    )
    +639                )
    +640            return add_dict_formula(query, ion_type, ion_charge)
    +641
    +642        if ion_type == Labels.adduct_ion:
    +643            dict_res = {}
    +644            if adducts:
    +645                for atom in adducts:
    +646                    if self.type == "normal":
    +647                        query = query.filter(
    +648                            func.floor(
    +649                                MolecularFormulaLink._adduct_mz(ion_charge, atom)
    +650                            ).in_(nominal_mzs)
    +651                        )
    +652                    dict_res[atom] = add_dict_formula(
    +653                        query, ion_type, ion_charge, adduct_atom=atom
    +654                    )
    +655                return dict_res
    +656        # dump all objs to memory
    +657        self.session.expunge_all()
    +658
    +659    def check_entry(self, classe, ion_type, molecular_search_settings):
    +660        """Checks if a molecular formula is in the database.
    +661
    +662        Parameters
    +663        ----------
    +664        classe : str
    +665            The class of the molecular formula.
    +666        ion_type : str
    +667            The ion type.
    +668        molecular_search_settings : MolecularFormulaSearchSettings
    +669            The molecular formula search settings.
    +670
    +671        Returns
    +672        -------
    +673        sqlalchemy.orm.query.Query
    +674            The query.
    +675        """
    +676        #  get all classes, ion_type, ion charge as str add to a dict or list
    +677        #  then check if class in database
    +678        has_class = self.session.query(
    +679            exists().where((MolecularFormulaLink.classe == classe))
    +680        )
    +681
    +682        return has_class
    +683
    +684    def get_all_classes(self):
    +685        """Returns a list of all classes in the database."""
    +686        query = self.session.query(
    +687            MolecularFormulaLink.classe.distinct().label("classe")
    +688        )
     689
    -690        Parameters
    -691        ----------
    -692        engine : sqlalchemy.engine.base.Engine
    -693            The SQLAlchemy engine.
    -694        
    -695        """
    -696        import os, warnings
    -697     
    -698
    -699        @event.listens_for(engine, "connect")
    -700        def connect(dbapi_connection, connection_record):
    -701            """ Forces a connection to be reconnected if it is detected
    -702            
    -703            Parameters
    -704            ----------
    -705            dbapi_connection : sqlalchemy.engine.base.Engine
    -706                The SQLAlchemy engine.
    -707            connection_record : sqlalchemy.engine.base.Engine
    -708                The SQLAlchemy engine.
    -709            """
    -710            connection_record.info['pid'] = os.getpid()
    -711
    -712        @event.listens_for(engine, "checkout")
    -713        def checkout(dbapi_connection, connection_record, connection_proxy):
    -714            """ Forces a connection to be reconnected if it is detected
    -715            
    -716            Parameters
    -717            ----------
    -718            dbapi_connection : sqlalchemy.engine.base.Engine
    -719                The SQLAlchemy engine.
    -720            connection_record : sqlalchemy.engine.base.Engine
    -721                The SQLAlchemy engine.
    -722            connection_proxy : sqlalchemy.engine.base.Engine
    -723                The SQLAlchemy engine.
    -724            
    -725            Raises
    -726            ------
    -727            exc.DisconnectionError
    -728                If the connection record belongs to a different process.
    -729            
    -730            """
    -731            pid = os.getpid()
    -732            if connection_record.info['pid'] != pid:
    -733                # substitute log.debug() or similar here as desired
    -734                warnings.warn(
    -735                    "Parent process %(orig)s forked (%(newproc)s) with an open "
    -736                    "database connection, "
    -737                    "which is being discarded and recreated." %
    -738                    {"newproc": pid, "orig": connection_record.info['pid']})
    -739                connection_record.connection = connection_proxy.connection = None
    -740                raise exc.DisconnectionError(
    -741                    "Connection record belongs to pid %s, "
    -742                    "attempting to check out in pid %s" %
    -743                    (connection_record.info['pid'], pid)
    -744                )    
    -745
    -746if __name__ == "__main__":
    -747    
    -748    sql = MolForm_SQL(url='sqlite:///')
    -749    
    -750    dict_data = {"name": '{"O": 12}'}
    -751    dict_data2 = {"name": '{"O": 13}'}
    -752    hetero_obj = HeteroAtoms(**dict_data)
    -753    hetero_obj2 = HeteroAtoms(**dict_data2)
    -754    sql.session.add(hetero_obj)
    -755    sql.session.add(hetero_obj2)
    -756
    -757    print(sql.session.query(HeteroAtoms).all())
    -758    #molecular_search_settings = MolecularFormulaSearchSettings()
    -759    #sql = MolForm_SQL()
    -760    #query = sql.session.query(MolecularFormulaLink).filter_by(classe = '{"O": 12}').filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250)
    -761    #query = sql.get_by_classe('{"O": 12}', molecular_search_settings).filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250)
    -762    #classes = ['{"O": 12}']*1
    -763    #for i, classe in enumerate(classes):
    -764        #query = sql.get_by_classe(classe, molecular_search_settings)
    -765        #query = sql.session.query(MolecularFormulaLink).filter_by(classe = '{"O": 12}').filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250)
    -766        #for i in query.filter(MolecularFormulaLink.mass < 250):
    -767            
    -768        #    print(i._radical_mz(-1), i._protonated_mz(-1), i._adduct_mz(+2, "Na"), i.mass, i.to_dict(), i.formula_string)
    -769    #
    -770 
    +690        return query.all()
    +691
    +692    def get_all(
    +693        self,
    +694    ):
    +695        """Returns a list of all molecular formulas in the database."""
    +696        mol_formulas = self.session.query(MolecularFormulaLink).all()
    +697
    +698        return mol_formulas
    +699
    +700    def delete_entry(self, row):
    +701        """Deletes a molecular formula from the database."""
    +702        try:
    +703            self.session.delete(row)
    +704            self.session.commit()
    +705
    +706        except SQLAlchemyError as e:
    +707            self.session.rollback()
    +708            print(str(e))
    +709
    +710    def purge(self, cls):
    +711        """Deletes all molecular formulas from the database.
    +712
    +713        Notes
    +714        -------
    +715        Careful, this will delete the entire database table
    +716
    +717        """
    +718        self.session.query(cls).delete()
    +719        self.session.commit()
    +720
    +721    def clear_data(self):
    +722        """Clears the database."""
    +723        meta = Base.metadata
    +724        for table in reversed(meta.sorted_tables):
    +725            print("Clear table %s" % table)
    +726            self.session.execute(table.delete())
    +727        self.session.commit()
    +728
    +729    def close(self, commit=True):
    +730        """Closes the database.
    +731
    +732        Parameters
    +733        ----------
    +734        commit : bool, optional
    +735            Whether to commit the session. Default is True.
    +736        """
    +737        # make sure the dbconnection gets closed
    +738
    +739        if commit:
    +740            self.commit()
    +741        self.session.close()
    +742        self.engine.dispose()
    +743
    +744    def add_engine_pidguard(self, engine):
    +745        """Adds multiprocessing guards.
    +746
    +747        Forces a connection to be reconnected if it is detected
    +748        as having been shared to a sub-process.
    +749
    +750        Parameters
    +751        ----------
    +752        engine : sqlalchemy.engine.base.Engine
    +753            The SQLAlchemy engine.
    +754
    +755        """
    +756        import os
    +757        import warnings
    +758
    +759        @event.listens_for(engine, "connect")
    +760        def connect(dbapi_connection, connection_record):
    +761            """Forces a connection to be reconnected if it is detected
    +762
    +763            Parameters
    +764            ----------
    +765            dbapi_connection : sqlalchemy.engine.base.Engine
    +766                The SQLAlchemy engine.
    +767            connection_record : sqlalchemy.engine.base.Engine
    +768                The SQLAlchemy engine.
    +769            """
    +770            connection_record.info["pid"] = os.getpid()
    +771
    +772        @event.listens_for(engine, "checkout")
    +773        def checkout(dbapi_connection, connection_record, connection_proxy):
    +774            """Forces a connection to be reconnected if it is detected
    +775
    +776            Parameters
    +777            ----------
    +778            dbapi_connection : sqlalchemy.engine.base.Engine
    +779                The SQLAlchemy engine.
    +780            connection_record : sqlalchemy.engine.base.Engine
    +781                The SQLAlchemy engine.
    +782            connection_proxy : sqlalchemy.engine.base.Engine
    +783                The SQLAlchemy engine.
    +784
    +785            Raises
    +786            ------
    +787            exc.DisconnectionError
    +788                If the connection record belongs to a different process.
    +789
    +790            """
    +791            pid = os.getpid()
    +792            if connection_record.info["pid"] != pid:
    +793                # substitute log.debug() or similar here as desired
    +794                warnings.warn(
    +795                    "Parent process %(orig)s forked (%(newproc)s) with an open "
    +796                    "database connection, "
    +797                    "which is being discarded and recreated."
    +798                    % {"newproc": pid, "orig": connection_record.info["pid"]}
    +799                )
    +800                connection_record.connection = connection_proxy.connection = None
    +801                raise exc.DisconnectionError(
    +802                    "Connection record belongs to pid %s, "
    +803                    "attempting to check out in pid %s"
    +804                    % (connection_record.info["pid"], pid)
    +805                )
    +806
    +807
    +808if __name__ == "__main__":
    +809    sql = MolForm_SQL(url="sqlite:///")
    +810
    +811    dict_data = {"name": '{"O": 12}'}
    +812    dict_data2 = {"name": '{"O": 13}'}
    +813    hetero_obj = HeteroAtoms(**dict_data)
    +814    hetero_obj2 = HeteroAtoms(**dict_data2)
    +815    sql.session.add(hetero_obj)
    +816    sql.session.add(hetero_obj2)
    +817
    +818    print(sql.session.query(HeteroAtoms).all())
    +819    # molecular_search_settings = MolecularFormulaSearchSettings()
    +820    # sql = MolForm_SQL()
    +821    # query = sql.session.query(MolecularFormulaLink).filter_by(classe = '{"O": 12}').filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250)
    +822    # query = sql.get_by_classe('{"O": 12}', molecular_search_settings).filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250)
    +823    # classes = ['{"O": 12}']*1
    +824    # for i, classe in enumerate(classes):
    +825    # query = sql.get_by_classe(classe, molecular_search_settings)
    +826    # query = sql.session.query(MolecularFormulaLink).filter_by(classe = '{"O": 12}').filter(MolecularFormulaLink._adduct_mz(+2, "Na") < 250)
    +827    # for i in query.filter(MolecularFormulaLink.mass < 250):
    +828
    +829    #    print(i._radical_mz(-1), i._protonated_mz(-1), i._adduct_mz(+2, "Na"), i.mass, i.to_dict(), i.formula_string)
    +830    #
     
    @@ -1091,54 +1151,55 @@

    -
    30class HeteroAtoms(Base):
    -31    """ HeteroAtoms class for the heteroAtoms table in the SQLite database.
    -32    
    -33    Attributes
    -34    ----------
    -35    id : int
    -36        The primary key for the table.
    -37    name : str
    -38        The name of the heteroAtoms class.
    -39    halogensCount : int
    -40        The number of halogens in the heteroAtoms class.
    -41    carbonHydrogen : relationship
    -42        The relationship to the carbonHydrogen table.
    -43    
    -44    Methods
    -45    -------
    -46    * __repr__()
    -47        Returns the string representation of the object.
    -48    * to_dict()
    -49        Returns the heteroAtoms class as a dictionary.
    -50    * halogens_count()
    -51        Returns the number of halogens as a float.
    -52    
    -53
    -54    """
    -55    __tablename__ = 'heteroAtoms'
    -56
    -57    id = Column(Integer, primary_key=True,
    -58                         unique = True,
    -59                         nullable = False)
    -60
    -61    name = Column(String, unique=True, nullable=False)
    -62
    -63    halogensCount = Column(Integer, unique=False, nullable=False)
    -64
    -65    carbonHydrogen = relationship('CarbonHydrogen', secondary='molecularformula',  viewonly=True)
    -66
    -67    def __repr__(self):
    -68        return '<HeteroAtoms Model {} class {}>'.format(self.id, self.name)      
    +            
    36class HeteroAtoms(Base):
    +37    """HeteroAtoms class for the heteroAtoms table in the SQLite database.
    +38
    +39    Attributes
    +40    ----------
    +41    id : int
    +42        The primary key for the table.
    +43    name : str
    +44        The name of the heteroAtoms class.
    +45    halogensCount : int
    +46        The number of halogens in the heteroAtoms class.
    +47    carbonHydrogen : relationship
    +48        The relationship to the carbonHydrogen table.
    +49
    +50    Methods
    +51    -------
    +52    * __repr__()
    +53        Returns the string representation of the object.
    +54    * to_dict()
    +55        Returns the heteroAtoms class as a dictionary.
    +56    * halogens_count()
    +57        Returns the number of halogens as a float.
    +58
    +59
    +60    """
    +61
    +62    __tablename__ = "heteroAtoms"
    +63
    +64    id = Column(Integer, primary_key=True, unique=True, nullable=False)
    +65
    +66    name = Column(String, unique=True, nullable=False)
    +67
    +68    halogensCount = Column(Integer, unique=False, nullable=False)
     69
    -70    @hybrid_property
    -71    def halogens_count(cls):
    -72        """ Returns the number of halogens as a float."""
    -73        return cls.halogensCount.cast(Float)
    -74
    -75    def to_dict(self):
    -76        """ Returns the heteroAtoms class as a dictionary."""
    -77        return json.loads(self.name)
    +70    carbonHydrogen = relationship(
    +71        "CarbonHydrogen", secondary="molecularformula", viewonly=True
    +72    )
    +73
    +74    def __repr__(self):
    +75        return "<HeteroAtoms Model {} class {}>".format(self.id, self.name)
    +76
    +77    @hybrid_property
    +78    def halogens_count(cls):
    +79        """Returns the number of halogens as a float."""
    +80        return cls.halogensCount.cast(Float)
    +81
    +82    def to_dict(self):
    +83        """Returns the heteroAtoms class as a dictionary."""
    +84        return json.loads(self.name)
     
    @@ -1259,9 +1320,9 @@
    Methods
    -
    75    def to_dict(self):
    -76        """ Returns the heteroAtoms class as a dictionary."""
    -77        return json.loads(self.name)
    +            
    82    def to_dict(self):
    +83        """Returns the heteroAtoms class as a dictionary."""
    +84        return json.loads(self.name)
     
    @@ -1292,75 +1353,74 @@
    Inherited Members
    -
     80class CarbonHydrogen(Base):
    - 81    """ CarbonHydrogen class for the carbonHydrogen table in the SQLite database.
    - 82    
    - 83    Attributes
    - 84    ----------
    - 85    id : int
    - 86        The primary key for the table.
    - 87    C : int
    - 88        The number of carbon atoms.
    - 89    H : int
    - 90        The number of hydrogen atoms.
    - 91    heteroAtoms : relationship
    - 92        The relationship to the heteroAtoms table.
    - 93    
    - 94    Methods
    - 95    -------
    - 96    * __repr__()
    - 97        Returns the string representation of the object.
    - 98    * mass()
    - 99        Returns the mass of the carbonHydrogen class as a float.
    -100    * c()
    -101        Returns the number of carbon atoms as a float.
    -102    * h()
    -103        Returns the number of hydrogen atoms as a float.
    -104    * dbe()
    -105        Returns the double bond equivalent as a float.
    -106    
    -107    """
    -108
    -109    __tablename__ = 'carbonHydrogen'
    -110    __table_args__ = (UniqueConstraint('C', 'H', name='unique_c_h'), )
    -111
    -112    id = Column(Integer, primary_key=True,
    -113                unique=True,
    -114                nullable=False)
    +            
     87class CarbonHydrogen(Base):
    + 88    """CarbonHydrogen class for the carbonHydrogen table in the SQLite database.
    + 89
    + 90    Attributes
    + 91    ----------
    + 92    id : int
    + 93        The primary key for the table.
    + 94    C : int
    + 95        The number of carbon atoms.
    + 96    H : int
    + 97        The number of hydrogen atoms.
    + 98    heteroAtoms : relationship
    + 99        The relationship to the heteroAtoms table.
    +100
    +101    Methods
    +102    -------
    +103    * __repr__()
    +104        Returns the string representation of the object.
    +105    * mass()
    +106        Returns the mass of the carbonHydrogen class as a float.
    +107    * c()
    +108        Returns the number of carbon atoms as a float.
    +109    * h()
    +110        Returns the number of hydrogen atoms as a float.
    +111    * dbe()
    +112        Returns the double bond equivalent as a float.
    +113
    +114    """
     115
    -116    C = Column(Integer, nullable=False)
    -117
    -118    H = Column(Integer, nullable=False)
    -119
    -120    heteroAtoms = relationship("HeteroAtoms",
    -121                               secondary="molecularformula",
    -122                               viewonly=True
    -123                               )
    +116    __tablename__ = "carbonHydrogen"
    +117    __table_args__ = (UniqueConstraint("C", "H", name="unique_c_h"),)
    +118
    +119    id = Column(Integer, primary_key=True, unique=True, nullable=False)
    +120
    +121    C = Column(Integer, nullable=False)
    +122
    +123    H = Column(Integer, nullable=False)
     124
    -125    def __repr__(self):
    -126        """ Returns the string representation of the object."""
    -127        return '<CarbonHydrogen Model {} C{} H{}>'.format(self.id, self.C, self.H)                     
    +125    heteroAtoms = relationship(
    +126        "HeteroAtoms", secondary="molecularformula", viewonly=True
    +127    )
     128
    -129    @property
    -130    def mass(self):
    -131        """ Returns the mass of the carbonHydrogen class as a float."""
    -132        return (self.C * Atoms.atomic_masses.get('C')) + (self.H * Atoms.atomic_masses.get('H'))
    -133
    -134    @hybrid_property
    -135    def c(cls):
    -136        """ Returns the number of carbon atoms as a float."""
    -137        return cls.C.cast(Float)
    -138
    -139    @hybrid_property
    -140    def h(cls):
    -141        """ Returns the number of hydrogen atoms as a float."""
    -142        return cls.H.cast(Float)
    -143
    -144    @hybrid_property
    -145    def dbe(cls):
    -146        """ Returns the double bond equivalent as a float."""
    -147        # return cls.C.cast(Float) - (cls.H.cast(Float) / 2) + 1
    -148        return float(cls.C) - float(cls.H / 2) + 1
    +129    def __repr__(self):
    +130        """Returns the string representation of the object."""
    +131        return "<CarbonHydrogen Model {} C{} H{}>".format(self.id, self.C, self.H)
    +132
    +133    @property
    +134    def mass(self):
    +135        """Returns the mass of the carbonHydrogen class as a float."""
    +136        return (self.C * Atoms.atomic_masses.get("C")) + (
    +137            self.H * Atoms.atomic_masses.get("H")
    +138        )
    +139
    +140    @hybrid_property
    +141    def c(cls):
    +142        """Returns the number of carbon atoms as a float."""
    +143        return cls.C.cast(Float)
    +144
    +145    @hybrid_property
    +146    def h(cls):
    +147        """Returns the number of hydrogen atoms as a float."""
    +148        return cls.H.cast(Float)
    +149
    +150    @hybrid_property
    +151    def dbe(cls):
    +152        """Returns the double bond equivalent as a float."""
    +153        # return cls.C.cast(Float) - (cls.H.cast(Float) / 2) + 1
    +154        return float(cls.C) - float(cls.H / 2) + 1
     
    @@ -1535,126 +1595,139 @@
    Inherited Members
    -
    151class MolecularFormulaLink(Base):
    -152    """ MolecularFormulaLink class for the molecularformula table in the SQLite database.
    -153
    -154    Attributes
    -155    ----------
    -156    heteroAtoms_id : int
    -157        The foreign key for the heteroAtoms table.
    -158    carbonHydrogen_id : int
    -159        The foreign key for the carbonHydrogen table.
    -160    mass : float
    -161        The mass of the molecular formula.
    -162    DBE : float
    -163        The double bond equivalent of the molecular formula.
    -164    carbonHydrogen : relationship
    -165        The relationship to the carbonHydrogen table.
    -166    heteroAtoms : relationship
    -167        The relationship to the heteroAtoms table.
    -168    C : association_proxy
    -169        The association proxy for the carbonHydrogen table.
    -170    H : association_proxy
    -171        The association proxy for the carbonHydrogen table.
    -172    classe : association_proxy
    -173        The association proxy for the heteroAtoms table.
    -174    
    -175    Methods
    -176    -------
    -177    * __repr__()
    -178        Returns the string representation of the object.
    -179    * to_dict()
    -180        Returns the molecular formula as a dictionary.
    -181    * formula_string()
    -182        Returns the molecular formula as a string.
    -183    * classe_string()
    -184        Returns the heteroAtoms class as a string.
    -185    * _adduct_mz(ion_charge, adduct_atom)
    -186        Returns the m/z of the adduct ion as a float.
    -187    * _protonated_mz(ion_charge)
    -188        Returns the m/z of the protonated ion as a float.
    -189    * _radical_mz(ion_charge)
    -190        Returns the m/z of the radical ion as a float.
    -191    
    -192
    -193
    -194    """
    -195    __tablename__ = 'molecularformula'
    -196    __table_args__ = (UniqueConstraint('heteroAtoms_id', 'carbonHydrogen_id', name='unique_molform'), )
    -197
    -198    # id = Column(Integer, primary_key=True,
    -199    #                    unique=True,
    -200    #                    nullable=False)
    -201
    -202    heteroAtoms_id = Column(Integer,
    -203                            ForeignKey('heteroAtoms.id'),
    -204                            primary_key=True)
    -205
    -206    carbonHydrogen_id = Column(Integer,
    -207                               ForeignKey('carbonHydrogen.id'), 
    -208                               primary_key=True)
    -209
    -210    mass = Column(Float)
    +            
    158class MolecularFormulaLink(Base):
    +159    """MolecularFormulaLink class for the molecularformula table in the SQLite database.
    +160
    +161    Attributes
    +162    ----------
    +163    heteroAtoms_id : int
    +164        The foreign key for the heteroAtoms table.
    +165    carbonHydrogen_id : int
    +166        The foreign key for the carbonHydrogen table.
    +167    mass : float
    +168        The mass of the molecular formula.
    +169    DBE : float
    +170        The double bond equivalent of the molecular formula.
    +171    carbonHydrogen : relationship
    +172        The relationship to the carbonHydrogen table.
    +173    heteroAtoms : relationship
    +174        The relationship to the heteroAtoms table.
    +175    C : association_proxy
    +176        The association proxy for the carbonHydrogen table.
    +177    H : association_proxy
    +178        The association proxy for the carbonHydrogen table.
    +179    classe : association_proxy
    +180        The association proxy for the heteroAtoms table.
    +181
    +182    Methods
    +183    -------
    +184    * __repr__()
    +185        Returns the string representation of the object.
    +186    * to_dict()
    +187        Returns the molecular formula as a dictionary.
    +188    * formula_string()
    +189        Returns the molecular formula as a string.
    +190    * classe_string()
    +191        Returns the heteroAtoms class as a string.
    +192    * _adduct_mz(ion_charge, adduct_atom)
    +193        Returns the m/z of the adduct ion as a float.
    +194    * _protonated_mz(ion_charge)
    +195        Returns the m/z of the protonated ion as a float.
    +196    * _radical_mz(ion_charge)
    +197        Returns the m/z of the radical ion as a float.
    +198
    +199
    +200
    +201    """
    +202
    +203    __tablename__ = "molecularformula"
    +204    __table_args__ = (
    +205        UniqueConstraint("heteroAtoms_id", "carbonHydrogen_id", name="unique_molform"),
    +206    )
    +207
    +208    # id = Column(Integer, primary_key=True,
    +209    #                    unique=True,
    +210    #                    nullable=False)
     211
    -212    DBE = Column(Float)
    +212    heteroAtoms_id = Column(Integer, ForeignKey("heteroAtoms.id"), primary_key=True)
     213
    -214    carbonHydrogen = relationship(CarbonHydrogen, backref=backref("heteroAtoms_assoc"))
    -215
    -216    heteroAtoms = relationship(HeteroAtoms, backref=backref("carbonHydrogen_assoc"))
    +214    carbonHydrogen_id = Column(
    +215        Integer, ForeignKey("carbonHydrogen.id"), primary_key=True
    +216    )
     217
    -218    C = association_proxy('carbonHydrogen', 'C')
    +218    mass = Column(Float)
     219
    -220    H = association_proxy('carbonHydrogen', 'H')
    +220    DBE = Column(Float)
     221
    -222    classe = association_proxy('heteroAtoms', 'name')
    +222    carbonHydrogen = relationship(CarbonHydrogen, backref=backref("heteroAtoms_assoc"))
     223
    -224    def to_dict(self):
    -225        """ Returns the molecular formula as a dictionary.
    -226        
    -227        Returns
    -228        -------
    -229        dict
    -230            The molecular formula as a dictionary.  
    -231        """
    -232        carbon = {'C': self.C, 'H': self.H}
    -233        classe = json.loads(self.classe)
    -234        if self.classe == '{"HC": ""}':
    -235            return {**carbon}
    -236        else:
    -237            return {**carbon, **classe}
    -238
    -239    @property
    -240    def formula_string(self):
    -241        """ Returns the molecular formula as a string."""
    -242        class_dict = self.to_dict()
    -243        class_str = ' '.join([atom + str(class_dict[atom]) for atom in class_dict.keys()])
    -244        return class_str.strip()
    -245
    -246    @property
    -247    def classe_string(self):
    -248        """ Returns the heteroAtoms class as a string."""
    -249        class_dict = json.loads(self.classe)
    -250        class_str = ' '.join([atom + str(class_dict[atom]) for atom in class_dict.keys()])
    -251        return class_str.strip()
    -252
    -253    @hybrid_method
    -254    def _adduct_mz(self, ion_charge, adduct_atom):
    -255        """ Returns the m/z of the adduct ion as a float."""
    -256        return (self.mass + (Atoms.atomic_masses.get(adduct_atom)) + (ion_charge * -1 * Atoms.electron_mass))/ abs(ion_charge)
    -257
    -258    @hybrid_method
    -259    def _protonated_mz(self, ion_charge):
    -260        """ Returns the m/z of the protonated ion as a float."""
    -261        return (self.mass + (ion_charge * Atoms.atomic_masses.get("H")) + (ion_charge * -1 * Atoms.electron_mass))/abs(ion_charge)
    -262
    -263    @hybrid_method
    -264    def _radical_mz(self, ion_charge):
    -265        """ Returns the m/z of the radical ion as a float."""
    -266        return (self.mass + (ion_charge * -1 * Atoms.electron_mass))/ abs(ion_charge)
    -267
    -268    def __repr__(self):
    -269        """ Returns the string representation of the object."""
    -270        return '<MolecularFormulaLink Model {}>'.format(self.formula_string)       
    +224    heteroAtoms = relationship(HeteroAtoms, backref=backref("carbonHydrogen_assoc"))
    +225
    +226    C = association_proxy("carbonHydrogen", "C")
    +227
    +228    H = association_proxy("carbonHydrogen", "H")
    +229
    +230    classe = association_proxy("heteroAtoms", "name")
    +231
    +232    def to_dict(self):
    +233        """Returns the molecular formula as a dictionary.
    +234
    +235        Returns
    +236        -------
    +237        dict
    +238            The molecular formula as a dictionary.
    +239        """
    +240        carbon = {"C": self.C, "H": self.H}
    +241        classe = json.loads(self.classe)
    +242        if self.classe == '{"HC": ""}':
    +243            return {**carbon}
    +244        else:
    +245            return {**carbon, **classe}
    +246
    +247    @property
    +248    def formula_string(self):
    +249        """Returns the molecular formula as a string."""
    +250        class_dict = self.to_dict()
    +251        class_str = " ".join(
    +252            [atom + str(class_dict[atom]) for atom in class_dict.keys()]
    +253        )
    +254        return class_str.strip()
    +255
    +256    @property
    +257    def classe_string(self):
    +258        """Returns the heteroAtoms class as a string."""
    +259        class_dict = json.loads(self.classe)
    +260        class_str = " ".join(
    +261            [atom + str(class_dict[atom]) for atom in class_dict.keys()]
    +262        )
    +263        return class_str.strip()
    +264
    +265    @hybrid_method
    +266    def _adduct_mz(self, ion_charge, adduct_atom):
    +267        """Returns the m/z of the adduct ion as a float."""
    +268        return (
    +269            self.mass
    +270            + (Atoms.atomic_masses.get(adduct_atom))
    +271            + (ion_charge * -1 * Atoms.electron_mass)
    +272        ) / abs(ion_charge)
    +273
    +274    @hybrid_method
    +275    def _protonated_mz(self, ion_charge):
    +276        """Returns the m/z of the protonated ion as a float."""
    +277        return (
    +278            self.mass
    +279            + (ion_charge * Atoms.atomic_masses.get("H"))
    +280            + (ion_charge * -1 * Atoms.electron_mass)
    +281        ) / abs(ion_charge)
    +282
    +283    @hybrid_method
    +284    def _radical_mz(self, ion_charge):
    +285        """Returns the m/z of the radical ion as a float."""
    +286        return (self.mass + (ion_charge * -1 * Atoms.electron_mass)) / abs(ion_charge)
    +287
    +288    def __repr__(self):
    +289        """Returns the string representation of the object."""
    +290        return "<MolecularFormulaLink Model {}>".format(self.formula_string)
     
    @@ -1841,20 +1914,20 @@
    Methods
    -
    224    def to_dict(self):
    -225        """ Returns the molecular formula as a dictionary.
    -226        
    -227        Returns
    -228        -------
    -229        dict
    -230            The molecular formula as a dictionary.  
    -231        """
    -232        carbon = {'C': self.C, 'H': self.H}
    -233        classe = json.loads(self.classe)
    -234        if self.classe == '{"HC": ""}':
    -235            return {**carbon}
    -236        else:
    -237            return {**carbon, **classe}
    +            
    232    def to_dict(self):
    +233        """Returns the molecular formula as a dictionary.
    +234
    +235        Returns
    +236        -------
    +237        dict
    +238            The molecular formula as a dictionary.
    +239        """
    +240        carbon = {"C": self.C, "H": self.H}
    +241        classe = json.loads(self.classe)
    +242        if self.classe == '{"HC": ""}':
    +243            return {**carbon}
    +244        else:
    +245            return {**carbon, **classe}
     
    @@ -1917,479 +1990,520 @@
    Inherited Members
    -
    273class MolForm_SQL:
    -274    """ MolForm_SQL class for the SQLite database.
    -275    
    -276    Attributes
    -277    ----------
    -278    engine : sqlalchemy.engine.base.Engine
    -279        The SQLAlchemy engine.
    -280    session : sqlalchemy.orm.session.Session
    -281        The SQLAlchemy session.
    -282    type : str
    -283        The type of database.
    -284    chunks_count : int
    -285        The number of chunks to use when querying the database.
    -286    
    -287    Methods
    -288    -------
    -289    * __init__(url=None, echo=False)
    -290        Initializes the database.
    -291    * __exit__(exc_type, exc_val, exc_tb)
    -292        Closes the database.
    -293    * initiate_database(url, database_name)
    -294        Creates the database.
    -295    * commit()
    -296        Commits the session.
    -297    * init_engine(url)
    -298        Initializes the SQLAlchemy engine.
    -299    * __enter__()
    -300
    -301    * get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None)
    -302        Returns a dictionary of molecular formulas.
    -303    * check_entry(classe, ion_type, molecular_search_settings)
    -304        Checks if a molecular formula is in the database.
    -305    * get_all_classes()
    -306        Returns a list of all classes in the database.
    -307    * get_all()
    -308        Returns a list of all molecular formulas in the database.
    -309    * delete_entry(row)
    -310        Deletes a molecular formula from the database.
    -311    * purge(cls)
    -312        Deletes all molecular formulas from the database.
    -313    * clear_data()
    -314        Clears the database.
    -315    * close(commit=True)
    -316        Closes the database.
    -317    * add_engine_pidguard(engine)
    -318        Adds multiprocessing guards.
    -319    
    -320    """
    -321    def __init__(self, url=None, echo=False):
    -322
    -323        self.engine = self.init_engine(url)
    -324
    -325        self.add_engine_pidguard(self.engine)
    -326
    -327        session_factory = sessionmaker(bind=self.engine)
    -328
    -329        Session = scoped_session(session_factory)
    -330
    -331        self.session = session_factory()
    -332
    -333        Base.metadata.create_all(self.engine)
    -334
    -335        self.session.commit()
    -336
    -337    def __exit__(self, exc_type, exc_val, exc_tb):
    -338        """ Closes the database.
    -339        
    -340        Parameters
    -341        ----------
    -342        exc_type : str
    -343            The exception type.
    -344        exc_val : str
    -345            The exception value.
    -346        exc_tb : str
    -347            The exception traceback.
    -348        """
    -349        # make sure the dbconnection gets closed
    +            
    293class MolForm_SQL:
    +294    """MolForm_SQL class for the SQLite database.
    +295
    +296    Attributes
    +297    ----------
    +298    engine : sqlalchemy.engine.base.Engine
    +299        The SQLAlchemy engine.
    +300    session : sqlalchemy.orm.session.Session
    +301        The SQLAlchemy session.
    +302    type : str
    +303        The type of database.
    +304    chunks_count : int
    +305        The number of chunks to use when querying the database.
    +306
    +307    Methods
    +308    -------
    +309    * __init__(url=None, echo=False)
    +310        Initializes the database.
    +311    * __exit__(exc_type, exc_val, exc_tb)
    +312        Closes the database.
    +313    * initiate_database(url, database_name)
    +314        Creates the database.
    +315    * commit()
    +316        Commits the session.
    +317    * init_engine(url)
    +318        Initializes the SQLAlchemy engine.
    +319    * __enter__()
    +320
    +321    * get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None)
    +322        Returns a dictionary of molecular formulas.
    +323    * check_entry(classe, ion_type, molecular_search_settings)
    +324        Checks if a molecular formula is in the database.
    +325    * get_all_classes()
    +326        Returns a list of all classes in the database.
    +327    * get_all()
    +328        Returns a list of all molecular formulas in the database.
    +329    * delete_entry(row)
    +330        Deletes a molecular formula from the database.
    +331    * purge(cls)
    +332        Deletes all molecular formulas from the database.
    +333    * clear_data()
    +334        Clears the database.
    +335    * close(commit=True)
    +336        Closes the database.
    +337    * add_engine_pidguard(engine)
    +338        Adds multiprocessing guards.
    +339
    +340    """
    +341
    +342    def __init__(self, url=None, echo=False):
    +343        self.engine = self.init_engine(url)
    +344
    +345        self.add_engine_pidguard(self.engine)
    +346
    +347        session_factory = sessionmaker(bind=self.engine)
    +348
    +349        Session = scoped_session(session_factory)
     350
    -351        self.commit()
    -352        self.session.close()
    -353        self.engine.dispose()
    +351        self.session = session_factory()
    +352
    +353        Base.metadata.create_all(self.engine)
     354
    -355    def initiate_database(self, url, database_name):  #CREATION
    -356        """ Creates the database.
    -357        
    -358        Parameters
    -359        ----------
    -360        url : str
    -361            The URL for the database.
    -362        database_name : str
    -363            The name of the database.
    -364        """
    -365        engine = create_engine(url)
    -366        conn = engine.connect()
    -367        conn.execute("commit")
    -368        conn.execute("create database " + database_name)
    -369        conn.close()
    +355        self.session.commit()
    +356
    +357    def __exit__(self, exc_type, exc_val, exc_tb):
    +358        """Closes the database.
    +359
    +360        Parameters
    +361        ----------
    +362        exc_type : str
    +363            The exception type.
    +364        exc_val : str
    +365            The exception value.
    +366        exc_tb : str
    +367            The exception traceback.
    +368        """
    +369        # make sure the dbconnection gets closed
     370
    -371    def commit(self):
    -372        """ Commits the session.
    -373        """
    -374        try:
    -375            self.session.commit()  
    -376        except SQLAlchemyError as e:
    -377            self.session.rollback()
    -378            print(str(e))
    -379
    -380    def init_engine(self, url):
    -381        """ Initializes the SQLAlchemy engine.
    -382        
    -383        Parameters
    -384        ----------
    -385        url : str
    -386            The URL for the database.
    -387        
    -388        Returns
    -389        -------
    -390        sqlalchemy.engine.base.Engine
    -391            The SQLAlchemy engine.
    -392        
    -393        """
    -394        if not url or url == 'None' or url == 'False':
    -395            directory = os.getcwd()
    -396
    -397            if not os.path.isdir(directory + '/db'):
    -398                os.mkdir(directory + '/db')
    -399
    -400            url = 'sqlite:///{DB}/db/molformulas.sqlite'.format(DB=directory)
    +371        self.commit()
    +372        self.session.close()
    +373        self.engine.dispose()
    +374
    +375    def initiate_database(self, url, database_name):  # CREATION
    +376        """Creates the database.
    +377
    +378        Parameters
    +379        ----------
    +380        url : str
    +381            The URL for the database.
    +382        database_name : str
    +383            The name of the database.
    +384        """
    +385        engine = create_engine(url)
    +386        conn = engine.connect()
    +387        conn.execute("commit")
    +388        conn.execute("create database " + database_name)
    +389        conn.close()
    +390
    +391    def commit(self):
    +392        """Commits the session."""
    +393        try:
    +394            self.session.commit()
    +395        except SQLAlchemyError as e:
    +396            self.session.rollback()
    +397            print(str(e))
    +398
    +399    def init_engine(self, url):
    +400        """Initializes the SQLAlchemy engine.
     401
    -402        if url[0:6] == 'sqlite':
    -403            self.type = 'sqlite'
    -404        else:
    -405            self.type = 'normal'
    -406            
    -407        if url[0:6] == 'sqlite':
    -408            engine = create_engine(url, echo = False)
    -409            self.chunks_count = 50
    -410        
    -411        elif url[0:10] == 'postgresql' or url[0:8] == 'postgres':
    -412            #postgresql
    -413            self.chunks_count = 50000
    -414            engine = create_engine(url, echo = False, isolation_level="AUTOCOMMIT")
    -415        
    -416        return engine# poolclass=NullPool
    -417
    -418    def __enter__(self):
    -419        """ Returns the object.
    -420        """
    -421        return self
    -422    
    -423    def get_dict_by_classes(self, classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None):
    -424        """ Returns a dictionary of molecular formulas.
    -425        
    -426        Parameters
    -427        ----------
    -428        classes : list
    -429            The list of classes.
    -430        ion_type : str
    -431            The ion type.
    -432        nominal_mzs : list
    -433            The list of nominal m/z values.
    -434        ion_charge : int
    -435            The ion charge.
    -436        molecular_search_settings : MolecularFormulaSearchSettings
    -437            The molecular formula search settings.
    -438        adducts : list, optional
    -439            The list of adducts. Default is None.
    -440        
    -441        Returns
    -442        -------
    -443        dict
    -444            The dictionary of molecular formulas.
    -445        
    -446        Notes
    -447        -----
    -448        Known issue, when using SQLite:
    -449        if the number of classes and nominal_m/zs are higher than 999 the query will fail
    -450        Solution: use postgres or split query
    -451        """                     
    -452         
    -453        def query_normal(class_list, len_adduct):
    -454            """ query for normal database
    -455            
    -456            Parameters
    -457            ----------
    -458            class_list : list
    -459                The list of classes.
    -460            len_adduct : int
    -461                The length of the adduct.
    -462            
    -463            Returns
    -464            -------
    -465            sqlalchemy.orm.query.Query
    -466                The query.
    -467            """
    -468            base_query = self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms)\
    -469                                .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id)\
    -470                                .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id)
    -471            
    -472            return base_query.filter(
    -473                and_(
    -474                    HeteroAtoms.name.in_(class_list), 
    -475                    and_(
    -476                        MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe, 
    -477                        MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe, 
    -478                        and_(
    -479                            ((CarbonHydrogen.h + HeteroAtoms.halogens_count - len_adduct) / CarbonHydrogen.c) >= molecular_search_settings.min_hc_filter,
    -480                            ((CarbonHydrogen.h + HeteroAtoms.halogens_count - len_adduct) / CarbonHydrogen.c) <= molecular_search_settings.max_hc_filter,
    -481                            CarbonHydrogen.C >= molecular_search_settings.usedAtoms.get("C")[0],
    -482                            CarbonHydrogen.c <= molecular_search_settings.usedAtoms.get("C")[1],
    -483                            CarbonHydrogen.h >= molecular_search_settings.usedAtoms.get("H")[0],
    -484                            CarbonHydrogen.h <= molecular_search_settings.usedAtoms.get("H")[1],
    -485                        )
    -486                    )
    -487                )
    -488            )
    -489
    -490        def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None):
    -491            """ add molecular formula to dict
    -492            
    -493            Parameters
    -494            ----------
    -495            formulas : sqlalchemy.orm.query.Query
    -496                The query.
    -497            ion_type : str
    -498                The ion type.
    -499            ion_charge : int
    -500                The ion charge.
    -501            adduct_atom : str, optional
    -502                The adduct atom. Default is None.
    -503            
    -504            Returns
    -505            -------
    -506            dict
    -507                The dictionary of molecular formulas.
    -508            
    -509            """
    -510            "organize data by heteroatom classes"
    -511            dict_res = {}
    -512
    -513            def nominal_mass_by_ion_type(formula_obj):
    -514                
    -515                if ion_type == Labels.protonated_de_ion:
    -516                
    -517                    return int(formula_obj._protonated_mz(ion_charge))
    -518                
    -519                elif ion_type == Labels.radical_ion:
    -520                    
    -521                    return int(formula_obj._radical_mz(ion_charge))
    -522
    -523                elif ion_type == Labels.adduct_ion and adduct_atom:
    -524                    
    -525                    return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
    -526            
    -527            for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database"):
    -528                
    -529                nominal_mz = nominal_mass_by_ion_type(formula_obj)
    -530                
    -531                if self.type != 'normal':
    -532                    if not nominal_mz in nominal_mzs:
    -533                        continue
    -534                classe = classe_obj.name
    -535
    -536                # classe_str = formula.classe_string
    -537                
    -538                # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str)
    -539                
    -540                formula_dict = formula_obj.to_dict()
    -541
    -542                if formula_dict.get("O"):
    -543                    
    -544                    if formula_dict.get("O") / formula_dict.get("C") >= molecular_search_settings.max_oc_filter:
    -545                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter)
    -546                        continue
    -547                    elif formula_dict.get("O") / formula_dict.get("C") <= molecular_search_settings.min_oc_filter:
    -548                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter)
    -549                        continue
    -550                    #if formula_dict.get("P"):
    +402        Parameters
    +403        ----------
    +404        url : str
    +405            The URL for the database.
    +406
    +407        Returns
    +408        -------
    +409        sqlalchemy.engine.base.Engine
    +410            The SQLAlchemy engine.
    +411
    +412        """
    +413        if not url or url == "None" or url == "False":
    +414            directory = os.getcwd()
    +415
    +416            if not os.path.isdir(directory + "/db"):
    +417                os.mkdir(directory + "/db")
    +418
    +419            url = "sqlite:///{DB}/db/molformulas.sqlite".format(DB=directory)
    +420
    +421        if url[0:6] == "sqlite":
    +422            self.type = "sqlite"
    +423        else:
    +424            self.type = "normal"
    +425
    +426        if url[0:6] == "sqlite":
    +427            engine = create_engine(url, echo=False)
    +428            self.chunks_count = 50
    +429
    +430        elif url[0:10] == "postgresql" or url[0:8] == "postgres":
    +431            # postgresql
    +432            self.chunks_count = 50000
    +433            engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT")
    +434
    +435        return engine  # poolclass=NullPool
    +436
    +437    def __enter__(self):
    +438        """Returns the object."""
    +439        return self
    +440
    +441    def get_dict_by_classes(
    +442        self,
    +443        classes,
    +444        ion_type,
    +445        nominal_mzs,
    +446        ion_charge,
    +447        molecular_search_settings,
    +448        adducts=None,
    +449    ):
    +450        """Returns a dictionary of molecular formulas.
    +451
    +452        Parameters
    +453        ----------
    +454        classes : list
    +455            The list of classes.
    +456        ion_type : str
    +457            The ion type.
    +458        nominal_mzs : list
    +459            The list of nominal m/z values.
    +460        ion_charge : int
    +461            The ion charge.
    +462        molecular_search_settings : MolecularFormulaSearchSettings
    +463            The molecular formula search settings.
    +464        adducts : list, optional
    +465            The list of adducts. Default is None.
    +466
    +467        Returns
    +468        -------
    +469        dict
    +470            The dictionary of molecular formulas.
    +471
    +472        Notes
    +473        -----
    +474        Known issue, when using SQLite:
    +475        if the number of classes and nominal_m/zs are higher than 999 the query will fail
    +476        Solution: use postgres or split query
    +477        """
    +478
    +479        def query_normal(class_list, len_adduct):
    +480            """query for normal database
    +481
    +482            Parameters
    +483            ----------
    +484            class_list : list
    +485                The list of classes.
    +486            len_adduct : int
    +487                The length of the adduct.
    +488
    +489            Returns
    +490            -------
    +491            sqlalchemy.orm.query.Query
    +492                The query.
    +493            """
    +494            base_query = (
    +495                self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms)
    +496                .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id)
    +497                .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id)
    +498            )
    +499
    +500            return base_query.filter(
    +501                and_(
    +502                    HeteroAtoms.name.in_(class_list),
    +503                    and_(
    +504                        MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe,
    +505                        MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe,
    +506                        and_(
    +507                            (
    +508                                (
    +509                                    CarbonHydrogen.h
    +510                                    + HeteroAtoms.halogens_count
    +511                                    - len_adduct
    +512                                )
    +513                                / CarbonHydrogen.c
    +514                            )
    +515                            >= molecular_search_settings.min_hc_filter,
    +516                            (
    +517                                (
    +518                                    CarbonHydrogen.h
    +519                                    + HeteroAtoms.halogens_count
    +520                                    - len_adduct
    +521                                )
    +522                                / CarbonHydrogen.c
    +523                            )
    +524                            <= molecular_search_settings.max_hc_filter,
    +525                            CarbonHydrogen.C
    +526                            >= molecular_search_settings.usedAtoms.get("C")[0],
    +527                            CarbonHydrogen.c
    +528                            <= molecular_search_settings.usedAtoms.get("C")[1],
    +529                            CarbonHydrogen.h
    +530                            >= molecular_search_settings.usedAtoms.get("H")[0],
    +531                            CarbonHydrogen.h
    +532                            <= molecular_search_settings.usedAtoms.get("H")[1],
    +533                        ),
    +534                    ),
    +535                )
    +536            )
    +537
    +538        def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None):
    +539            """add molecular formula to dict
    +540
    +541            Parameters
    +542            ----------
    +543            formulas : sqlalchemy.orm.query.Query
    +544                The query.
    +545            ion_type : str
    +546                The ion type.
    +547            ion_charge : int
    +548                The ion charge.
    +549            adduct_atom : str, optional
    +550                The adduct atom. Default is None.
     551
    -552                    #    if  not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter:
    -553                            
    -554                    #        continue
    -555        
    -556                if classe in dict_res.keys():
    -557                    
    -558                    if nominal_mz in dict_res[classe].keys():
    -559                        
    -560                        dict_res.get(classe).get(nominal_mz).append(formula_obj)
    -561                    
    -562                    else:
    -563
    -564                        dict_res.get(classe)[nominal_mz] = [formula_obj ]  
    -565            
    -566                else:
    -567                    
    -568                    dict_res[classe] = {nominal_mz: [formula_obj] }     
    -569            
    -570            return dict_res
    -571        
    -572        
    -573        len_adducts = 0
    -574        if ion_type == Labels.adduct_ion:
    -575            len_adducts = 1
    -576        
    -577        query = query_normal(classes, len_adducts)
    -578        
    -579        if ion_type == Labels.protonated_de_ion:
    -580            if self.type == 'normal':
    -581                
    -582                query = query.filter(
    -583                                func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_(nominal_mzs)
    -584                                )
    -585                                
    -586                                
    -587            return add_dict_formula(query, ion_type, ion_charge)
    -588        
    -589        if ion_type == Labels.radical_ion:
    -590            if self.type == 'normal':
    -591                query = query.filter(func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_(nominal_mzs))
    -592            return add_dict_formula(query, ion_type, ion_charge)
    -593        
    -594        if ion_type == Labels.adduct_ion:
    -595            dict_res = {}
    -596            if adducts: 
    -597                for atom in adducts:
    -598                    if self.type == 'normal':
    -599                        query = query.filter(func.floor(MolecularFormulaLink._adduct_mz(ion_charge, atom)).in_(nominal_mzs))    
    -600                    dict_res[atom] = add_dict_formula(query, ion_type, ion_charge, adduct_atom=atom)
    -601                return dict_res
    -602        # dump all objs to memory
    -603        self.session.expunge_all()
    -604        
    -605    def check_entry(self,classe, ion_type, molecular_search_settings):
    -606        """ Checks if a molecular formula is in the database.
    -607
    -608        Parameters
    -609        ----------
    -610        classe : str
    -611            The class of the molecular formula.
    -612        ion_type : str
    -613            The ion type.
    -614        molecular_search_settings : MolecularFormulaSearchSettings
    -615            The molecular formula search settings.
    -616        
    -617        Returns
    -618        -------
    -619        sqlalchemy.orm.query.Query
    -620            The query.
    -621        """
    -622        #  get all classes, ion_type, ion charge as str add to a dict or list
    -623        #  then check if class in database
    -624        has_class = self.session.query(exists().where(
    -625            (MolecularFormulaLink.classe == classe)))
    -626        
    -627        return has_class
    -628    
    -629    def get_all_classes(self):
    -630        """ Returns a list of all classes in the database."""
    -631        query = self.session.query(MolecularFormulaLink.classe.distinct().label("classe"))
    -632        
    -633        return query.all()  
    -634    
    -635    def get_all(self,):
    -636        """ Returns a list of all molecular formulas in the database."""
    -637        mol_formulas = self.session.query(MolecularFormulaLink).all()
    -638        
    -639        return mol_formulas
    -640
    -641    def delete_entry(self, row):
    -642        """ Deletes a molecular formula from the database."""
    -643        try:
    -644            self.session.delete(row)  
    -645            self.session.commit()  
    -646        
    -647        except SQLAlchemyError as e:
    -648            self.session.rollback()
    -649            print(str(e))
    -650
    -651    def purge(self, cls):
    -652        """ Deletes all molecular formulas from the database.
    -653        
    -654        Notes 
    -655        -------
    -656        Careful, this will delete the entire database table
    -657
    -658        """
    -659        self.session.query(cls).delete()
    -660        self.session.commit()  
    -661
    -662    def clear_data(self):
    -663        """ Clears the database.
    -664        """
    -665        meta = Base.metadata
    -666        for table in reversed(meta.sorted_tables):
    -667            print ('Clear table %s' % table)
    -668            self.session.execute(table.delete())
    -669        self.session.commit()
    -670
    -671    def close(self, commit=True):
    -672        """ Closes the database.
    -673        
    -674        Parameters
    -675        ----------
    -676        commit : bool, optional
    -677            Whether to commit the session. Default is True.
    -678        """
    -679        # make sure the dbconnection gets closed
    -680        
    -681        if commit: self.commit()
    -682        self.session.close()
    -683        self.engine.dispose()    
    -684   
    -685    def add_engine_pidguard(self, engine):
    -686        """ Adds multiprocessing guards.
    -687        
    -688        Forces a connection to be reconnected if it is detected
    -689        as having been shared to a sub-process.
    +552            Returns
    +553            -------
    +554            dict
    +555                The dictionary of molecular formulas.
    +556
    +557            """
    +558            "organize data by heteroatom classes"
    +559            dict_res = {}
    +560
    +561            def nominal_mass_by_ion_type(formula_obj):
    +562                if ion_type == Labels.protonated_de_ion:
    +563                    return int(formula_obj._protonated_mz(ion_charge))
    +564
    +565                elif ion_type == Labels.radical_ion:
    +566                    return int(formula_obj._radical_mz(ion_charge))
    +567
    +568                elif ion_type == Labels.adduct_ion and adduct_atom:
    +569                    return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
    +570
    +571            for formula_obj, ch_obj, classe_obj in tqdm.tqdm(
    +572                formulas, desc="Loading molecular formula database"
    +573            ):
    +574                nominal_mz = nominal_mass_by_ion_type(formula_obj)
    +575
    +576                if self.type != "normal":
    +577                    if not nominal_mz in nominal_mzs:
    +578                        continue
    +579                classe = classe_obj.name
    +580
    +581                # classe_str = formula.classe_string
    +582
    +583                # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str)
    +584
    +585                formula_dict = formula_obj.to_dict()
    +586
    +587                if formula_dict.get("O"):
    +588                    if (
    +589                        formula_dict.get("O") / formula_dict.get("C")
    +590                        >= molecular_search_settings.max_oc_filter
    +591                    ):
    +592                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter)
    +593                        continue
    +594                    elif (
    +595                        formula_dict.get("O") / formula_dict.get("C")
    +596                        <= molecular_search_settings.min_oc_filter
    +597                    ):
    +598                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter)
    +599                        continue
    +600                    # if formula_dict.get("P"):
    +601
    +602                    #    if  not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter:
    +603
    +604                    #        continue
    +605
    +606                if classe in dict_res.keys():
    +607                    if nominal_mz in dict_res[classe].keys():
    +608                        dict_res.get(classe).get(nominal_mz).append(formula_obj)
    +609
    +610                    else:
    +611                        dict_res.get(classe)[nominal_mz] = [formula_obj]
    +612
    +613                else:
    +614                    dict_res[classe] = {nominal_mz: [formula_obj]}
    +615
    +616            return dict_res
    +617
    +618        len_adducts = 0
    +619        if ion_type == Labels.adduct_ion:
    +620            len_adducts = 1
    +621
    +622        query = query_normal(classes, len_adducts)
    +623
    +624        if ion_type == Labels.protonated_de_ion:
    +625            if self.type == "normal":
    +626                query = query.filter(
    +627                    func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_(
    +628                        nominal_mzs
    +629                    )
    +630                )
    +631
    +632            return add_dict_formula(query, ion_type, ion_charge)
    +633
    +634        if ion_type == Labels.radical_ion:
    +635            if self.type == "normal":
    +636                query = query.filter(
    +637                    func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_(
    +638                        nominal_mzs
    +639                    )
    +640                )
    +641            return add_dict_formula(query, ion_type, ion_charge)
    +642
    +643        if ion_type == Labels.adduct_ion:
    +644            dict_res = {}
    +645            if adducts:
    +646                for atom in adducts:
    +647                    if self.type == "normal":
    +648                        query = query.filter(
    +649                            func.floor(
    +650                                MolecularFormulaLink._adduct_mz(ion_charge, atom)
    +651                            ).in_(nominal_mzs)
    +652                        )
    +653                    dict_res[atom] = add_dict_formula(
    +654                        query, ion_type, ion_charge, adduct_atom=atom
    +655                    )
    +656                return dict_res
    +657        # dump all objs to memory
    +658        self.session.expunge_all()
    +659
    +660    def check_entry(self, classe, ion_type, molecular_search_settings):
    +661        """Checks if a molecular formula is in the database.
    +662
    +663        Parameters
    +664        ----------
    +665        classe : str
    +666            The class of the molecular formula.
    +667        ion_type : str
    +668            The ion type.
    +669        molecular_search_settings : MolecularFormulaSearchSettings
    +670            The molecular formula search settings.
    +671
    +672        Returns
    +673        -------
    +674        sqlalchemy.orm.query.Query
    +675            The query.
    +676        """
    +677        #  get all classes, ion_type, ion charge as str add to a dict or list
    +678        #  then check if class in database
    +679        has_class = self.session.query(
    +680            exists().where((MolecularFormulaLink.classe == classe))
    +681        )
    +682
    +683        return has_class
    +684
    +685    def get_all_classes(self):
    +686        """Returns a list of all classes in the database."""
    +687        query = self.session.query(
    +688            MolecularFormulaLink.classe.distinct().label("classe")
    +689        )
     690
    -691        Parameters
    -692        ----------
    -693        engine : sqlalchemy.engine.base.Engine
    -694            The SQLAlchemy engine.
    -695        
    -696        """
    -697        import os, warnings
    -698     
    -699
    -700        @event.listens_for(engine, "connect")
    -701        def connect(dbapi_connection, connection_record):
    -702            """ Forces a connection to be reconnected if it is detected
    -703            
    -704            Parameters
    -705            ----------
    -706            dbapi_connection : sqlalchemy.engine.base.Engine
    -707                The SQLAlchemy engine.
    -708            connection_record : sqlalchemy.engine.base.Engine
    -709                The SQLAlchemy engine.
    -710            """
    -711            connection_record.info['pid'] = os.getpid()
    -712
    -713        @event.listens_for(engine, "checkout")
    -714        def checkout(dbapi_connection, connection_record, connection_proxy):
    -715            """ Forces a connection to be reconnected if it is detected
    -716            
    -717            Parameters
    -718            ----------
    -719            dbapi_connection : sqlalchemy.engine.base.Engine
    -720                The SQLAlchemy engine.
    -721            connection_record : sqlalchemy.engine.base.Engine
    -722                The SQLAlchemy engine.
    -723            connection_proxy : sqlalchemy.engine.base.Engine
    -724                The SQLAlchemy engine.
    -725            
    -726            Raises
    -727            ------
    -728            exc.DisconnectionError
    -729                If the connection record belongs to a different process.
    -730            
    -731            """
    -732            pid = os.getpid()
    -733            if connection_record.info['pid'] != pid:
    -734                # substitute log.debug() or similar here as desired
    -735                warnings.warn(
    -736                    "Parent process %(orig)s forked (%(newproc)s) with an open "
    -737                    "database connection, "
    -738                    "which is being discarded and recreated." %
    -739                    {"newproc": pid, "orig": connection_record.info['pid']})
    -740                connection_record.connection = connection_proxy.connection = None
    -741                raise exc.DisconnectionError(
    -742                    "Connection record belongs to pid %s, "
    -743                    "attempting to check out in pid %s" %
    -744                    (connection_record.info['pid'], pid)
    -745                )    
    +691        return query.all()
    +692
    +693    def get_all(
    +694        self,
    +695    ):
    +696        """Returns a list of all molecular formulas in the database."""
    +697        mol_formulas = self.session.query(MolecularFormulaLink).all()
    +698
    +699        return mol_formulas
    +700
    +701    def delete_entry(self, row):
    +702        """Deletes a molecular formula from the database."""
    +703        try:
    +704            self.session.delete(row)
    +705            self.session.commit()
    +706
    +707        except SQLAlchemyError as e:
    +708            self.session.rollback()
    +709            print(str(e))
    +710
    +711    def purge(self, cls):
    +712        """Deletes all molecular formulas from the database.
    +713
    +714        Notes
    +715        -------
    +716        Careful, this will delete the entire database table
    +717
    +718        """
    +719        self.session.query(cls).delete()
    +720        self.session.commit()
    +721
    +722    def clear_data(self):
    +723        """Clears the database."""
    +724        meta = Base.metadata
    +725        for table in reversed(meta.sorted_tables):
    +726            print("Clear table %s" % table)
    +727            self.session.execute(table.delete())
    +728        self.session.commit()
    +729
    +730    def close(self, commit=True):
    +731        """Closes the database.
    +732
    +733        Parameters
    +734        ----------
    +735        commit : bool, optional
    +736            Whether to commit the session. Default is True.
    +737        """
    +738        # make sure the dbconnection gets closed
    +739
    +740        if commit:
    +741            self.commit()
    +742        self.session.close()
    +743        self.engine.dispose()
    +744
    +745    def add_engine_pidguard(self, engine):
    +746        """Adds multiprocessing guards.
    +747
    +748        Forces a connection to be reconnected if it is detected
    +749        as having been shared to a sub-process.
    +750
    +751        Parameters
    +752        ----------
    +753        engine : sqlalchemy.engine.base.Engine
    +754            The SQLAlchemy engine.
    +755
    +756        """
    +757        import os
    +758        import warnings
    +759
    +760        @event.listens_for(engine, "connect")
    +761        def connect(dbapi_connection, connection_record):
    +762            """Forces a connection to be reconnected if it is detected
    +763
    +764            Parameters
    +765            ----------
    +766            dbapi_connection : sqlalchemy.engine.base.Engine
    +767                The SQLAlchemy engine.
    +768            connection_record : sqlalchemy.engine.base.Engine
    +769                The SQLAlchemy engine.
    +770            """
    +771            connection_record.info["pid"] = os.getpid()
    +772
    +773        @event.listens_for(engine, "checkout")
    +774        def checkout(dbapi_connection, connection_record, connection_proxy):
    +775            """Forces a connection to be reconnected if it is detected
    +776
    +777            Parameters
    +778            ----------
    +779            dbapi_connection : sqlalchemy.engine.base.Engine
    +780                The SQLAlchemy engine.
    +781            connection_record : sqlalchemy.engine.base.Engine
    +782                The SQLAlchemy engine.
    +783            connection_proxy : sqlalchemy.engine.base.Engine
    +784                The SQLAlchemy engine.
    +785
    +786            Raises
    +787            ------
    +788            exc.DisconnectionError
    +789                If the connection record belongs to a different process.
    +790
    +791            """
    +792            pid = os.getpid()
    +793            if connection_record.info["pid"] != pid:
    +794                # substitute log.debug() or similar here as desired
    +795                warnings.warn(
    +796                    "Parent process %(orig)s forked (%(newproc)s) with an open "
    +797                    "database connection, "
    +798                    "which is being discarded and recreated."
    +799                    % {"newproc": pid, "orig": connection_record.info["pid"]}
    +800                )
    +801                connection_record.connection = connection_proxy.connection = None
    +802                raise exc.DisconnectionError(
    +803                    "Connection record belongs to pid %s, "
    +804                    "attempting to check out in pid %s"
    +805                    % (connection_record.info["pid"], pid)
    +806                )
     
    @@ -2454,21 +2568,20 @@
    Methods
    -
    321    def __init__(self, url=None, echo=False):
    -322
    -323        self.engine = self.init_engine(url)
    -324
    -325        self.add_engine_pidguard(self.engine)
    -326
    -327        session_factory = sessionmaker(bind=self.engine)
    -328
    -329        Session = scoped_session(session_factory)
    -330
    -331        self.session = session_factory()
    -332
    -333        Base.metadata.create_all(self.engine)
    -334
    -335        self.session.commit()
    +            
    342    def __init__(self, url=None, echo=False):
    +343        self.engine = self.init_engine(url)
    +344
    +345        self.add_engine_pidguard(self.engine)
    +346
    +347        session_factory = sessionmaker(bind=self.engine)
    +348
    +349        Session = scoped_session(session_factory)
    +350
    +351        self.session = session_factory()
    +352
    +353        Base.metadata.create_all(self.engine)
    +354
    +355        self.session.commit()
     
    @@ -2508,21 +2621,21 @@
    Methods
    -
    355    def initiate_database(self, url, database_name):  #CREATION
    -356        """ Creates the database.
    -357        
    -358        Parameters
    -359        ----------
    -360        url : str
    -361            The URL for the database.
    -362        database_name : str
    -363            The name of the database.
    -364        """
    -365        engine = create_engine(url)
    -366        conn = engine.connect()
    -367        conn.execute("commit")
    -368        conn.execute("create database " + database_name)
    -369        conn.close()
    +            
    375    def initiate_database(self, url, database_name):  # CREATION
    +376        """Creates the database.
    +377
    +378        Parameters
    +379        ----------
    +380        url : str
    +381            The URL for the database.
    +382        database_name : str
    +383            The name of the database.
    +384        """
    +385        engine = create_engine(url)
    +386        conn = engine.connect()
    +387        conn.execute("commit")
    +388        conn.execute("create database " + database_name)
    +389        conn.close()
     
    @@ -2551,14 +2664,13 @@
    Parameters
    -
    371    def commit(self):
    -372        """ Commits the session.
    -373        """
    -374        try:
    -375            self.session.commit()  
    -376        except SQLAlchemyError as e:
    -377            self.session.rollback()
    -378            print(str(e))
    +            
    391    def commit(self):
    +392        """Commits the session."""
    +393        try:
    +394            self.session.commit()
    +395        except SQLAlchemyError as e:
    +396            self.session.rollback()
    +397            print(str(e))
     
    @@ -2578,43 +2690,43 @@
    Parameters
    -
    380    def init_engine(self, url):
    -381        """ Initializes the SQLAlchemy engine.
    -382        
    -383        Parameters
    -384        ----------
    -385        url : str
    -386            The URL for the database.
    -387        
    -388        Returns
    -389        -------
    -390        sqlalchemy.engine.base.Engine
    -391            The SQLAlchemy engine.
    -392        
    -393        """
    -394        if not url or url == 'None' or url == 'False':
    -395            directory = os.getcwd()
    -396
    -397            if not os.path.isdir(directory + '/db'):
    -398                os.mkdir(directory + '/db')
    -399
    -400            url = 'sqlite:///{DB}/db/molformulas.sqlite'.format(DB=directory)
    +            
    399    def init_engine(self, url):
    +400        """Initializes the SQLAlchemy engine.
     401
    -402        if url[0:6] == 'sqlite':
    -403            self.type = 'sqlite'
    -404        else:
    -405            self.type = 'normal'
    -406            
    -407        if url[0:6] == 'sqlite':
    -408            engine = create_engine(url, echo = False)
    -409            self.chunks_count = 50
    -410        
    -411        elif url[0:10] == 'postgresql' or url[0:8] == 'postgres':
    -412            #postgresql
    -413            self.chunks_count = 50000
    -414            engine = create_engine(url, echo = False, isolation_level="AUTOCOMMIT")
    -415        
    -416        return engine# poolclass=NullPool
    +402        Parameters
    +403        ----------
    +404        url : str
    +405            The URL for the database.
    +406
    +407        Returns
    +408        -------
    +409        sqlalchemy.engine.base.Engine
    +410            The SQLAlchemy engine.
    +411
    +412        """
    +413        if not url or url == "None" or url == "False":
    +414            directory = os.getcwd()
    +415
    +416            if not os.path.isdir(directory + "/db"):
    +417                os.mkdir(directory + "/db")
    +418
    +419            url = "sqlite:///{DB}/db/molformulas.sqlite".format(DB=directory)
    +420
    +421        if url[0:6] == "sqlite":
    +422            self.type = "sqlite"
    +423        else:
    +424            self.type = "normal"
    +425
    +426        if url[0:6] == "sqlite":
    +427            engine = create_engine(url, echo=False)
    +428            self.chunks_count = 50
    +429
    +430        elif url[0:10] == "postgresql" or url[0:8] == "postgres":
    +431            # postgresql
    +432            self.chunks_count = 50000
    +433            engine = create_engine(url, echo=False, isolation_level="AUTOCOMMIT")
    +434
    +435        return engine  # poolclass=NullPool
     
    @@ -2647,187 +2759,224 @@
    Returns
    -
    423    def get_dict_by_classes(self, classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None):
    -424        """ Returns a dictionary of molecular formulas.
    -425        
    -426        Parameters
    -427        ----------
    -428        classes : list
    -429            The list of classes.
    -430        ion_type : str
    -431            The ion type.
    -432        nominal_mzs : list
    -433            The list of nominal m/z values.
    -434        ion_charge : int
    -435            The ion charge.
    -436        molecular_search_settings : MolecularFormulaSearchSettings
    -437            The molecular formula search settings.
    -438        adducts : list, optional
    -439            The list of adducts. Default is None.
    -440        
    -441        Returns
    -442        -------
    -443        dict
    -444            The dictionary of molecular formulas.
    -445        
    -446        Notes
    -447        -----
    -448        Known issue, when using SQLite:
    -449        if the number of classes and nominal_m/zs are higher than 999 the query will fail
    -450        Solution: use postgres or split query
    -451        """                     
    -452         
    -453        def query_normal(class_list, len_adduct):
    -454            """ query for normal database
    -455            
    -456            Parameters
    -457            ----------
    -458            class_list : list
    -459                The list of classes.
    -460            len_adduct : int
    -461                The length of the adduct.
    -462            
    -463            Returns
    -464            -------
    -465            sqlalchemy.orm.query.Query
    -466                The query.
    -467            """
    -468            base_query = self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms)\
    -469                                .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id)\
    -470                                .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id)
    -471            
    -472            return base_query.filter(
    -473                and_(
    -474                    HeteroAtoms.name.in_(class_list), 
    -475                    and_(
    -476                        MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe, 
    -477                        MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe, 
    -478                        and_(
    -479                            ((CarbonHydrogen.h + HeteroAtoms.halogens_count - len_adduct) / CarbonHydrogen.c) >= molecular_search_settings.min_hc_filter,
    -480                            ((CarbonHydrogen.h + HeteroAtoms.halogens_count - len_adduct) / CarbonHydrogen.c) <= molecular_search_settings.max_hc_filter,
    -481                            CarbonHydrogen.C >= molecular_search_settings.usedAtoms.get("C")[0],
    -482                            CarbonHydrogen.c <= molecular_search_settings.usedAtoms.get("C")[1],
    -483                            CarbonHydrogen.h >= molecular_search_settings.usedAtoms.get("H")[0],
    -484                            CarbonHydrogen.h <= molecular_search_settings.usedAtoms.get("H")[1],
    -485                        )
    -486                    )
    -487                )
    -488            )
    -489
    -490        def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None):
    -491            """ add molecular formula to dict
    -492            
    -493            Parameters
    -494            ----------
    -495            formulas : sqlalchemy.orm.query.Query
    -496                The query.
    -497            ion_type : str
    -498                The ion type.
    -499            ion_charge : int
    -500                The ion charge.
    -501            adduct_atom : str, optional
    -502                The adduct atom. Default is None.
    -503            
    -504            Returns
    -505            -------
    -506            dict
    -507                The dictionary of molecular formulas.
    -508            
    -509            """
    -510            "organize data by heteroatom classes"
    -511            dict_res = {}
    -512
    -513            def nominal_mass_by_ion_type(formula_obj):
    -514                
    -515                if ion_type == Labels.protonated_de_ion:
    -516                
    -517                    return int(formula_obj._protonated_mz(ion_charge))
    -518                
    -519                elif ion_type == Labels.radical_ion:
    -520                    
    -521                    return int(formula_obj._radical_mz(ion_charge))
    -522
    -523                elif ion_type == Labels.adduct_ion and adduct_atom:
    -524                    
    -525                    return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
    -526            
    -527            for formula_obj, ch_obj, classe_obj in tqdm.tqdm(formulas, desc="Loading molecular formula database"):
    -528                
    -529                nominal_mz = nominal_mass_by_ion_type(formula_obj)
    -530                
    -531                if self.type != 'normal':
    -532                    if not nominal_mz in nominal_mzs:
    -533                        continue
    -534                classe = classe_obj.name
    -535
    -536                # classe_str = formula.classe_string
    -537                
    -538                # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str)
    -539                
    -540                formula_dict = formula_obj.to_dict()
    -541
    -542                if formula_dict.get("O"):
    -543                    
    -544                    if formula_dict.get("O") / formula_dict.get("C") >= molecular_search_settings.max_oc_filter:
    -545                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter)
    -546                        continue
    -547                    elif formula_dict.get("O") / formula_dict.get("C") <= molecular_search_settings.min_oc_filter:
    -548                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter)
    -549                        continue
    -550                    #if formula_dict.get("P"):
    +            
    441    def get_dict_by_classes(
    +442        self,
    +443        classes,
    +444        ion_type,
    +445        nominal_mzs,
    +446        ion_charge,
    +447        molecular_search_settings,
    +448        adducts=None,
    +449    ):
    +450        """Returns a dictionary of molecular formulas.
    +451
    +452        Parameters
    +453        ----------
    +454        classes : list
    +455            The list of classes.
    +456        ion_type : str
    +457            The ion type.
    +458        nominal_mzs : list
    +459            The list of nominal m/z values.
    +460        ion_charge : int
    +461            The ion charge.
    +462        molecular_search_settings : MolecularFormulaSearchSettings
    +463            The molecular formula search settings.
    +464        adducts : list, optional
    +465            The list of adducts. Default is None.
    +466
    +467        Returns
    +468        -------
    +469        dict
    +470            The dictionary of molecular formulas.
    +471
    +472        Notes
    +473        -----
    +474        Known issue, when using SQLite:
    +475        if the number of classes and nominal_m/zs are higher than 999 the query will fail
    +476        Solution: use postgres or split query
    +477        """
    +478
    +479        def query_normal(class_list, len_adduct):
    +480            """query for normal database
    +481
    +482            Parameters
    +483            ----------
    +484            class_list : list
    +485                The list of classes.
    +486            len_adduct : int
    +487                The length of the adduct.
    +488
    +489            Returns
    +490            -------
    +491            sqlalchemy.orm.query.Query
    +492                The query.
    +493            """
    +494            base_query = (
    +495                self.session.query(MolecularFormulaLink, CarbonHydrogen, HeteroAtoms)
    +496                .filter(MolecularFormulaLink.carbonHydrogen_id == CarbonHydrogen.id)
    +497                .filter(MolecularFormulaLink.heteroAtoms_id == HeteroAtoms.id)
    +498            )
    +499
    +500            return base_query.filter(
    +501                and_(
    +502                    HeteroAtoms.name.in_(class_list),
    +503                    and_(
    +504                        MolecularFormulaLink.DBE >= molecular_search_settings.min_dbe,
    +505                        MolecularFormulaLink.DBE <= molecular_search_settings.max_dbe,
    +506                        and_(
    +507                            (
    +508                                (
    +509                                    CarbonHydrogen.h
    +510                                    + HeteroAtoms.halogens_count
    +511                                    - len_adduct
    +512                                )
    +513                                / CarbonHydrogen.c
    +514                            )
    +515                            >= molecular_search_settings.min_hc_filter,
    +516                            (
    +517                                (
    +518                                    CarbonHydrogen.h
    +519                                    + HeteroAtoms.halogens_count
    +520                                    - len_adduct
    +521                                )
    +522                                / CarbonHydrogen.c
    +523                            )
    +524                            <= molecular_search_settings.max_hc_filter,
    +525                            CarbonHydrogen.C
    +526                            >= molecular_search_settings.usedAtoms.get("C")[0],
    +527                            CarbonHydrogen.c
    +528                            <= molecular_search_settings.usedAtoms.get("C")[1],
    +529                            CarbonHydrogen.h
    +530                            >= molecular_search_settings.usedAtoms.get("H")[0],
    +531                            CarbonHydrogen.h
    +532                            <= molecular_search_settings.usedAtoms.get("H")[1],
    +533                        ),
    +534                    ),
    +535                )
    +536            )
    +537
    +538        def add_dict_formula(formulas, ion_type, ion_charge, adduct_atom=None):
    +539            """add molecular formula to dict
    +540
    +541            Parameters
    +542            ----------
    +543            formulas : sqlalchemy.orm.query.Query
    +544                The query.
    +545            ion_type : str
    +546                The ion type.
    +547            ion_charge : int
    +548                The ion charge.
    +549            adduct_atom : str, optional
    +550                The adduct atom. Default is None.
     551
    -552                    #    if  not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter:
    -553                            
    -554                    #        continue
    -555        
    -556                if classe in dict_res.keys():
    -557                    
    -558                    if nominal_mz in dict_res[classe].keys():
    -559                        
    -560                        dict_res.get(classe).get(nominal_mz).append(formula_obj)
    -561                    
    -562                    else:
    -563
    -564                        dict_res.get(classe)[nominal_mz] = [formula_obj ]  
    -565            
    -566                else:
    -567                    
    -568                    dict_res[classe] = {nominal_mz: [formula_obj] }     
    -569            
    -570            return dict_res
    -571        
    -572        
    -573        len_adducts = 0
    -574        if ion_type == Labels.adduct_ion:
    -575            len_adducts = 1
    -576        
    -577        query = query_normal(classes, len_adducts)
    -578        
    -579        if ion_type == Labels.protonated_de_ion:
    -580            if self.type == 'normal':
    -581                
    -582                query = query.filter(
    -583                                func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_(nominal_mzs)
    -584                                )
    -585                                
    -586                                
    -587            return add_dict_formula(query, ion_type, ion_charge)
    -588        
    -589        if ion_type == Labels.radical_ion:
    -590            if self.type == 'normal':
    -591                query = query.filter(func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_(nominal_mzs))
    -592            return add_dict_formula(query, ion_type, ion_charge)
    -593        
    -594        if ion_type == Labels.adduct_ion:
    -595            dict_res = {}
    -596            if adducts: 
    -597                for atom in adducts:
    -598                    if self.type == 'normal':
    -599                        query = query.filter(func.floor(MolecularFormulaLink._adduct_mz(ion_charge, atom)).in_(nominal_mzs))    
    -600                    dict_res[atom] = add_dict_formula(query, ion_type, ion_charge, adduct_atom=atom)
    -601                return dict_res
    -602        # dump all objs to memory
    -603        self.session.expunge_all()
    +552            Returns
    +553            -------
    +554            dict
    +555                The dictionary of molecular formulas.
    +556
    +557            """
    +558            "organize data by heteroatom classes"
    +559            dict_res = {}
    +560
    +561            def nominal_mass_by_ion_type(formula_obj):
    +562                if ion_type == Labels.protonated_de_ion:
    +563                    return int(formula_obj._protonated_mz(ion_charge))
    +564
    +565                elif ion_type == Labels.radical_ion:
    +566                    return int(formula_obj._radical_mz(ion_charge))
    +567
    +568                elif ion_type == Labels.adduct_ion and adduct_atom:
    +569                    return int(formula_obj._adduct_mz(ion_charge, adduct_atom))
    +570
    +571            for formula_obj, ch_obj, classe_obj in tqdm.tqdm(
    +572                formulas, desc="Loading molecular formula database"
    +573            ):
    +574                nominal_mz = nominal_mass_by_ion_type(formula_obj)
    +575
    +576                if self.type != "normal":
    +577                    if not nominal_mz in nominal_mzs:
    +578                        continue
    +579                classe = classe_obj.name
    +580
    +581                # classe_str = formula.classe_string
    +582
    +583                # pbar.set_description_str(desc="Loading molecular formula database for class %s " % classe_str)
    +584
    +585                formula_dict = formula_obj.to_dict()
    +586
    +587                if formula_dict.get("O"):
    +588                    if (
    +589                        formula_dict.get("O") / formula_dict.get("C")
    +590                        >= molecular_search_settings.max_oc_filter
    +591                    ):
    +592                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.max_oc_filter)
    +593                        continue
    +594                    elif (
    +595                        formula_dict.get("O") / formula_dict.get("C")
    +596                        <= molecular_search_settings.min_oc_filter
    +597                    ):
    +598                        # print(formula_dict.get("O") / formula_dict.get("C"), molecular_search_settings.min_oc_filter)
    +599                        continue
    +600                    # if formula_dict.get("P"):
    +601
    +602                    #    if  not (formula_dict.get("O") -2)/ formula_dict.get("P") >= molecular_search_settings.min_op_filter:
    +603
    +604                    #        continue
    +605
    +606                if classe in dict_res.keys():
    +607                    if nominal_mz in dict_res[classe].keys():
    +608                        dict_res.get(classe).get(nominal_mz).append(formula_obj)
    +609
    +610                    else:
    +611                        dict_res.get(classe)[nominal_mz] = [formula_obj]
    +612
    +613                else:
    +614                    dict_res[classe] = {nominal_mz: [formula_obj]}
    +615
    +616            return dict_res
    +617
    +618        len_adducts = 0
    +619        if ion_type == Labels.adduct_ion:
    +620            len_adducts = 1
    +621
    +622        query = query_normal(classes, len_adducts)
    +623
    +624        if ion_type == Labels.protonated_de_ion:
    +625            if self.type == "normal":
    +626                query = query.filter(
    +627                    func.floor(MolecularFormulaLink._protonated_mz(ion_charge)).in_(
    +628                        nominal_mzs
    +629                    )
    +630                )
    +631
    +632            return add_dict_formula(query, ion_type, ion_charge)
    +633
    +634        if ion_type == Labels.radical_ion:
    +635            if self.type == "normal":
    +636                query = query.filter(
    +637                    func.floor(MolecularFormulaLink._radical_mz(ion_charge)).in_(
    +638                        nominal_mzs
    +639                    )
    +640                )
    +641            return add_dict_formula(query, ion_type, ion_charge)
    +642
    +643        if ion_type == Labels.adduct_ion:
    +644            dict_res = {}
    +645            if adducts:
    +646                for atom in adducts:
    +647                    if self.type == "normal":
    +648                        query = query.filter(
    +649                            func.floor(
    +650                                MolecularFormulaLink._adduct_mz(ion_charge, atom)
    +651                            ).in_(nominal_mzs)
    +652                        )
    +653                    dict_res[atom] = add_dict_formula(
    +654                        query, ion_type, ion_charge, adduct_atom=atom
    +655                    )
    +656                return dict_res
    +657        # dump all objs to memory
    +658        self.session.expunge_all()
     
    @@ -2876,29 +3025,30 @@
    Notes
    -
    605    def check_entry(self,classe, ion_type, molecular_search_settings):
    -606        """ Checks if a molecular formula is in the database.
    -607
    -608        Parameters
    -609        ----------
    -610        classe : str
    -611            The class of the molecular formula.
    -612        ion_type : str
    -613            The ion type.
    -614        molecular_search_settings : MolecularFormulaSearchSettings
    -615            The molecular formula search settings.
    -616        
    -617        Returns
    -618        -------
    -619        sqlalchemy.orm.query.Query
    -620            The query.
    -621        """
    -622        #  get all classes, ion_type, ion charge as str add to a dict or list
    -623        #  then check if class in database
    -624        has_class = self.session.query(exists().where(
    -625            (MolecularFormulaLink.classe == classe)))
    -626        
    -627        return has_class
    +            
    660    def check_entry(self, classe, ion_type, molecular_search_settings):
    +661        """Checks if a molecular formula is in the database.
    +662
    +663        Parameters
    +664        ----------
    +665        classe : str
    +666            The class of the molecular formula.
    +667        ion_type : str
    +668            The ion type.
    +669        molecular_search_settings : MolecularFormulaSearchSettings
    +670            The molecular formula search settings.
    +671
    +672        Returns
    +673        -------
    +674        sqlalchemy.orm.query.Query
    +675            The query.
    +676        """
    +677        #  get all classes, ion_type, ion charge as str add to a dict or list
    +678        #  then check if class in database
    +679        has_class = self.session.query(
    +680            exists().where((MolecularFormulaLink.classe == classe))
    +681        )
    +682
    +683        return has_class
     
    @@ -2935,11 +3085,13 @@
    Returns
    -
    629    def get_all_classes(self):
    -630        """ Returns a list of all classes in the database."""
    -631        query = self.session.query(MolecularFormulaLink.classe.distinct().label("classe"))
    -632        
    -633        return query.all()  
    +            
    685    def get_all_classes(self):
    +686        """Returns a list of all classes in the database."""
    +687        query = self.session.query(
    +688            MolecularFormulaLink.classe.distinct().label("classe")
    +689        )
    +690
    +691        return query.all()
     
    @@ -2959,11 +3111,13 @@
    Returns
    -
    635    def get_all(self,):
    -636        """ Returns a list of all molecular formulas in the database."""
    -637        mol_formulas = self.session.query(MolecularFormulaLink).all()
    -638        
    -639        return mol_formulas
    +            
    693    def get_all(
    +694        self,
    +695    ):
    +696        """Returns a list of all molecular formulas in the database."""
    +697        mol_formulas = self.session.query(MolecularFormulaLink).all()
    +698
    +699        return mol_formulas
     
    @@ -2983,15 +3137,15 @@
    Returns
    -
    641    def delete_entry(self, row):
    -642        """ Deletes a molecular formula from the database."""
    -643        try:
    -644            self.session.delete(row)  
    -645            self.session.commit()  
    -646        
    -647        except SQLAlchemyError as e:
    -648            self.session.rollback()
    -649            print(str(e))
    +            
    701    def delete_entry(self, row):
    +702        """Deletes a molecular formula from the database."""
    +703        try:
    +704            self.session.delete(row)
    +705            self.session.commit()
    +706
    +707        except SQLAlchemyError as e:
    +708            self.session.rollback()
    +709            print(str(e))
     
    @@ -3011,16 +3165,16 @@
    Returns
    -
    651    def purge(self, cls):
    -652        """ Deletes all molecular formulas from the database.
    -653        
    -654        Notes 
    -655        -------
    -656        Careful, this will delete the entire database table
    -657
    -658        """
    -659        self.session.query(cls).delete()
    -660        self.session.commit()  
    +            
    711    def purge(self, cls):
    +712        """Deletes all molecular formulas from the database.
    +713
    +714        Notes
    +715        -------
    +716        Careful, this will delete the entire database table
    +717
    +718        """
    +719        self.session.query(cls).delete()
    +720        self.session.commit()
     
    @@ -3044,14 +3198,13 @@
    Notes
    -
    662    def clear_data(self):
    -663        """ Clears the database.
    -664        """
    -665        meta = Base.metadata
    -666        for table in reversed(meta.sorted_tables):
    -667            print ('Clear table %s' % table)
    -668            self.session.execute(table.delete())
    -669        self.session.commit()
    +            
    722    def clear_data(self):
    +723        """Clears the database."""
    +724        meta = Base.metadata
    +725        for table in reversed(meta.sorted_tables):
    +726            print("Clear table %s" % table)
    +727            self.session.execute(table.delete())
    +728        self.session.commit()
     
    @@ -3071,19 +3224,20 @@
    Notes
    -
    671    def close(self, commit=True):
    -672        """ Closes the database.
    -673        
    -674        Parameters
    -675        ----------
    -676        commit : bool, optional
    -677            Whether to commit the session. Default is True.
    -678        """
    -679        # make sure the dbconnection gets closed
    -680        
    -681        if commit: self.commit()
    -682        self.session.close()
    -683        self.engine.dispose()    
    +            
    730    def close(self, commit=True):
    +731        """Closes the database.
    +732
    +733        Parameters
    +734        ----------
    +735        commit : bool, optional
    +736            Whether to commit the session. Default is True.
    +737        """
    +738        # make sure the dbconnection gets closed
    +739
    +740        if commit:
    +741            self.commit()
    +742        self.session.close()
    +743        self.engine.dispose()
     
    @@ -3110,67 +3264,68 @@
    Parameters
    -
    685    def add_engine_pidguard(self, engine):
    -686        """ Adds multiprocessing guards.
    -687        
    -688        Forces a connection to be reconnected if it is detected
    -689        as having been shared to a sub-process.
    -690
    -691        Parameters
    -692        ----------
    -693        engine : sqlalchemy.engine.base.Engine
    -694            The SQLAlchemy engine.
    -695        
    -696        """
    -697        import os, warnings
    -698     
    -699
    -700        @event.listens_for(engine, "connect")
    -701        def connect(dbapi_connection, connection_record):
    -702            """ Forces a connection to be reconnected if it is detected
    -703            
    -704            Parameters
    -705            ----------
    -706            dbapi_connection : sqlalchemy.engine.base.Engine
    -707                The SQLAlchemy engine.
    -708            connection_record : sqlalchemy.engine.base.Engine
    -709                The SQLAlchemy engine.
    -710            """
    -711            connection_record.info['pid'] = os.getpid()
    -712
    -713        @event.listens_for(engine, "checkout")
    -714        def checkout(dbapi_connection, connection_record, connection_proxy):
    -715            """ Forces a connection to be reconnected if it is detected
    -716            
    -717            Parameters
    -718            ----------
    -719            dbapi_connection : sqlalchemy.engine.base.Engine
    -720                The SQLAlchemy engine.
    -721            connection_record : sqlalchemy.engine.base.Engine
    -722                The SQLAlchemy engine.
    -723            connection_proxy : sqlalchemy.engine.base.Engine
    -724                The SQLAlchemy engine.
    -725            
    -726            Raises
    -727            ------
    -728            exc.DisconnectionError
    -729                If the connection record belongs to a different process.
    -730            
    -731            """
    -732            pid = os.getpid()
    -733            if connection_record.info['pid'] != pid:
    -734                # substitute log.debug() or similar here as desired
    -735                warnings.warn(
    -736                    "Parent process %(orig)s forked (%(newproc)s) with an open "
    -737                    "database connection, "
    -738                    "which is being discarded and recreated." %
    -739                    {"newproc": pid, "orig": connection_record.info['pid']})
    -740                connection_record.connection = connection_proxy.connection = None
    -741                raise exc.DisconnectionError(
    -742                    "Connection record belongs to pid %s, "
    -743                    "attempting to check out in pid %s" %
    -744                    (connection_record.info['pid'], pid)
    -745                )    
    +            
    745    def add_engine_pidguard(self, engine):
    +746        """Adds multiprocessing guards.
    +747
    +748        Forces a connection to be reconnected if it is detected
    +749        as having been shared to a sub-process.
    +750
    +751        Parameters
    +752        ----------
    +753        engine : sqlalchemy.engine.base.Engine
    +754            The SQLAlchemy engine.
    +755
    +756        """
    +757        import os
    +758        import warnings
    +759
    +760        @event.listens_for(engine, "connect")
    +761        def connect(dbapi_connection, connection_record):
    +762            """Forces a connection to be reconnected if it is detected
    +763
    +764            Parameters
    +765            ----------
    +766            dbapi_connection : sqlalchemy.engine.base.Engine
    +767                The SQLAlchemy engine.
    +768            connection_record : sqlalchemy.engine.base.Engine
    +769                The SQLAlchemy engine.
    +770            """
    +771            connection_record.info["pid"] = os.getpid()
    +772
    +773        @event.listens_for(engine, "checkout")
    +774        def checkout(dbapi_connection, connection_record, connection_proxy):
    +775            """Forces a connection to be reconnected if it is detected
    +776
    +777            Parameters
    +778            ----------
    +779            dbapi_connection : sqlalchemy.engine.base.Engine
    +780                The SQLAlchemy engine.
    +781            connection_record : sqlalchemy.engine.base.Engine
    +782                The SQLAlchemy engine.
    +783            connection_proxy : sqlalchemy.engine.base.Engine
    +784                The SQLAlchemy engine.
    +785
    +786            Raises
    +787            ------
    +788            exc.DisconnectionError
    +789                If the connection record belongs to a different process.
    +790
    +791            """
    +792            pid = os.getpid()
    +793            if connection_record.info["pid"] != pid:
    +794                # substitute log.debug() or similar here as desired
    +795                warnings.warn(
    +796                    "Parent process %(orig)s forked (%(newproc)s) with an open "
    +797                    "database connection, "
    +798                    "which is being discarded and recreated."
    +799                    % {"newproc": pid, "orig": connection_record.info["pid"]}
    +800                )
    +801                connection_record.connection = connection_proxy.connection = None
    +802                raise exc.DisconnectionError(
    +803                    "Connection record belongs to pid %s, "
    +804                    "attempting to check out in pid %s"
    +805                    % (connection_record.info["pid"], pid)
    +806                )
     
    diff --git a/docs/corems/molecular_id/input/nistMSI.html b/docs/corems/molecular_id/input/nistMSI.html index 8cabe9c5..4a94ffb6 100644 --- a/docs/corems/molecular_id/input/nistMSI.html +++ b/docs/corems/molecular_id/input/nistMSI.html @@ -83,157 +83,146 @@

    6 7from corems.molecular_id.factory.EI_SQL import EI_LowRes_SQLite 8 - 9class ReadNistMSI(Thread): - 10 """ A class for reading NIST MSI files and storing the data in a SQLite database. - 11 - 12 Parameters - 13 ---------- - 14 file_path : str - 15 The path to the NIST MSI file. - 16 url : str, optional - 17 The URL for the SQLite database. Default is 'sqlite://'. - 18 - 19 Raises - 20 ------ - 21 FileExistsError - 22 If the specified file does not exist. - 23 - 24 Attributes - 25 ---------- - 26 file_path : str - 27 The path to the NIST MSI file. - 28 url : str - 29 The URL for the SQLite database. - 30 sqlLite_obj : EI_LowRes_SQLite - 31 The SQLite object for storing the compound data. - 32 - 33 Methods - 34 ------- - 35 * run(). - 36 Runs the thread and initializes the SQLite object. - 37 * get_sqlLite_obj(). - 38 Returns the SQLite object. - 39 * get_compound_data_dict_list(). - 40 Parses the NIST MSI file and returns a list of compound data dictionaries. - 41 """ - 42 - 43 def __init__(self, file_path, url='sqlite://'): - 44 + 9 + 10class ReadNistMSI(Thread): + 11 """A class for reading NIST MSI files and storing the data in a SQLite database. + 12 + 13 Parameters + 14 ---------- + 15 file_path : str + 16 The path to the NIST MSI file. + 17 url : str, optional + 18 The URL for the SQLite database. Default is 'sqlite://'. + 19 + 20 Raises + 21 ------ + 22 FileExistsError + 23 If the specified file does not exist. + 24 + 25 Attributes + 26 ---------- + 27 file_path : str + 28 The path to the NIST MSI file. + 29 url : str + 30 The URL for the SQLite database. + 31 sqlLite_obj : EI_LowRes_SQLite + 32 The SQLite object for storing the compound data. + 33 + 34 Methods + 35 ------- + 36 * run(). + 37 Runs the thread and initializes the SQLite object. + 38 * get_sqlLite_obj(). + 39 Returns the SQLite object. + 40 * get_compound_data_dict_list(). + 41 Parses the NIST MSI file and returns a list of compound data dictionaries. + 42 """ + 43 + 44 def __init__(self, file_path, url="sqlite://"): 45 Thread.__init__(self) 46 file_path = Path(file_path) 47 48 if not file_path.exists(): - 49 - 50 raise FileExistsError("File does not exist: " + file_path) - 51 - 52 self.file_path = file_path - 53 - 54 self.url = url - 55 - 56 def run(self): - 57 """ Runs the thread and initializes the SQLite object. - 58 """ - 59 self.sqlLite_obj = self.get_sqlLite_obj() - 60 - 61 def get_sqlLite_obj(self): - 62 """ Returns the SQLite object. - 63 - 64 Returns - 65 ------- - 66 EI_LowRes_SQLite - 67 The SQLite object for storing the compound data. - 68 """ - 69 compound_data_dict_list = self.get_compound_data_dict_list() + 49 raise FileExistsError("File does not exist: " + file_path) + 50 + 51 self.file_path = file_path + 52 + 53 self.url = url + 54 + 55 def run(self): + 56 """Runs the thread and initializes the SQLite object.""" + 57 self.sqlLite_obj = self.get_sqlLite_obj() + 58 + 59 def get_sqlLite_obj(self): + 60 """Returns the SQLite object. + 61 + 62 Returns + 63 ------- + 64 EI_LowRes_SQLite + 65 The SQLite object for storing the compound data. + 66 """ + 67 compound_data_dict_list = self.get_compound_data_dict_list() + 68 + 69 sqlLite_obj = EI_LowRes_SQLite(url=self.url) 70 - 71 sqlLite_obj = EI_LowRes_SQLite(url=self.url) - 72 - 73 for data_dict in compound_data_dict_list: - 74 if not data_dict.get('NUM PEAKS'): - 75 data_dict['NUM PEAKS'] = len(data_dict.get('mz')) - 76 if not data_dict.get('CASNO'): - 77 data_dict['CASNO'] = data_dict.get('CAS') - 78 if not data_dict['CASNO']: - 79 data_dict['CASNO'] = 0 - 80 #print(data_dict) - 81 try: - 82 sqlLite_obj.add_compound(data_dict) - 83 except: - 84 print(data_dict.get('NAME')) + 71 for data_dict in compound_data_dict_list: + 72 if not data_dict.get("NUM PEAKS"): + 73 data_dict["NUM PEAKS"] = len(data_dict.get("mz")) + 74 if not data_dict.get("CASNO"): + 75 data_dict["CASNO"] = data_dict.get("CAS") + 76 if not data_dict["CASNO"]: + 77 data_dict["CASNO"] = 0 + 78 # print(data_dict) + 79 try: + 80 sqlLite_obj.add_compound(data_dict) + 81 except: + 82 print(data_dict.get("NAME")) + 83 + 84 return sqlLite_obj 85 - 86 return sqlLite_obj - 87 - 88 def get_compound_data_dict_list(self): - 89 """ Parses the NIST MSI file and returns a list of compound data dictionaries. - 90 - 91 Returns - 92 ------- - 93 list - 94 A list of compound data dictionaries. - 95 """ - 96 list_dict_data = [] - 97 - 98 with open(self.file_path) as msifile: - 99 -100 content = msifile.readlines() -101 -102 i = 0 -103 -104 dict_data = dict() -105 dict_data['mz'] = list() -106 dict_data['abundance'] = list() -107 #for line in content: -108 # print(line, line=="\n" ) -109 -110 while i < len(content): -111 -112 split_line = content[i].split(":") -113 -114 #empty line -115 if len(content[i]) == 1: -116 -117 i += 1 -118 if dict_data.get('NAME'): -119 list_dict_data.append(dict_data) + 86 def get_compound_data_dict_list(self): + 87 """Parses the NIST MSI file and returns a list of compound data dictionaries. + 88 + 89 Returns + 90 ------- + 91 list + 92 A list of compound data dictionaries. + 93 """ + 94 list_dict_data = [] + 95 + 96 with open(self.file_path) as msifile: + 97 content = msifile.readlines() + 98 + 99 i = 0 +100 +101 dict_data = dict() +102 dict_data["mz"] = list() +103 dict_data["abundance"] = list() +104 # for line in content: +105 # print(line, line=="\n" ) +106 +107 while i < len(content): +108 split_line = content[i].split(":") +109 +110 # empty line +111 if len(content[i]) == 1: +112 i += 1 +113 if dict_data.get("NAME"): +114 list_dict_data.append(dict_data) +115 +116 # print(dict_data) +117 dict_data = dict() +118 dict_data["mz"] = list() +119 dict_data["abundance"] = list() 120 -121 #print(dict_data) -122 dict_data = dict() -123 dict_data['mz'] = list() -124 dict_data['abundance'] = list() -125 -126 #metadata, name, ri, rt etc -127 elif len(split_line) >= 2: -128 -129 label = split_line[0] -130 value = ':'.join(split_line[1:]).strip('\n').strip('') -131 dict_data[label] = value -132 i += 1 -133 -134 #mz and abundance pairs -135 elif len(split_line) == 1: -136 -137 for s in content[i].strip('\n').strip('').split("(")[1:]: -138 -139 values = s.split(" ") -140 -141 if values[0] == '': -142 mz = values[1] -143 else: -144 mz = values[0] -145 -146 abun = values[-2].strip(')') +121 # metadata, name, ri, rt etc +122 elif len(split_line) >= 2: +123 label = split_line[0] +124 value = ":".join(split_line[1:]).strip("\n").strip("") +125 dict_data[label] = value +126 i += 1 +127 +128 # mz and abundance pairs +129 elif len(split_line) == 1: +130 for s in content[i].strip("\n").strip("").split("(")[1:]: +131 values = s.split(" ") +132 +133 if values[0] == "": +134 mz = values[1] +135 else: +136 mz = values[0] +137 +138 abun = values[-2].strip(")") +139 +140 dict_data["mz"].append(mz) +141 dict_data["abundance"].append(abun) +142 +143 i += 1 +144 # something else +145 else: +146 i += 1 147 -148 dict_data['mz'].append(mz) -149 dict_data['abundance'].append(abun) -150 -151 i += 1 -152 #something else -153 else: -154 -155 i += 1 -156 -157 return list_dict_data -158 -159 +148 return list_dict_data

    @@ -249,155 +238,145 @@

    -
     13class ReadNistMSI(Thread):
    - 14    """ A class for reading NIST MSI files and storing the data in a SQLite database.
    - 15
    - 16    Parameters
    - 17    ----------
    - 18    file_path : str
    - 19        The path to the NIST MSI file.
    - 20    url : str, optional
    - 21        The URL for the SQLite database. Default is 'sqlite://'.
    - 22
    - 23    Raises
    - 24    ------
    - 25    FileExistsError
    - 26        If the specified file does not exist.
    - 27
    - 28    Attributes
    - 29    ----------
    - 30    file_path : str
    - 31        The path to the NIST MSI file.
    - 32    url : str
    - 33        The URL for the SQLite database.
    - 34    sqlLite_obj : EI_LowRes_SQLite
    - 35        The SQLite object for storing the compound data.
    - 36
    - 37    Methods
    - 38    -------
    - 39    * run().
    - 40        Runs the thread and initializes the SQLite object.
    - 41    * get_sqlLite_obj().
    - 42        Returns the SQLite object.
    - 43    * get_compound_data_dict_list().
    - 44        Parses the NIST MSI file and returns a list of compound data dictionaries.
    - 45    """
    - 46
    - 47    def __init__(self, file_path, url='sqlite://'):
    +            
     11class ReadNistMSI(Thread):
    + 12    """A class for reading NIST MSI files and storing the data in a SQLite database.
    + 13
    + 14    Parameters
    + 15    ----------
    + 16    file_path : str
    + 17        The path to the NIST MSI file.
    + 18    url : str, optional
    + 19        The URL for the SQLite database. Default is 'sqlite://'.
    + 20
    + 21    Raises
    + 22    ------
    + 23    FileExistsError
    + 24        If the specified file does not exist.
    + 25
    + 26    Attributes
    + 27    ----------
    + 28    file_path : str
    + 29        The path to the NIST MSI file.
    + 30    url : str
    + 31        The URL for the SQLite database.
    + 32    sqlLite_obj : EI_LowRes_SQLite
    + 33        The SQLite object for storing the compound data.
    + 34
    + 35    Methods
    + 36    -------
    + 37    * run().
    + 38        Runs the thread and initializes the SQLite object.
    + 39    * get_sqlLite_obj().
    + 40        Returns the SQLite object.
    + 41    * get_compound_data_dict_list().
    + 42        Parses the NIST MSI file and returns a list of compound data dictionaries.
    + 43    """
    + 44
    + 45    def __init__(self, file_path, url="sqlite://"):
    + 46        Thread.__init__(self)
    + 47        file_path = Path(file_path)
      48
    - 49        Thread.__init__(self)
    - 50        file_path = Path(file_path)
    + 49        if not file_path.exists():
    + 50            raise FileExistsError("File does not exist: " + file_path)
      51
    - 52        if not file_path.exists():
    + 52        self.file_path = file_path
      53
    - 54            raise FileExistsError("File does not exist: " + file_path)
    + 54        self.url = url
      55
    - 56        self.file_path = file_path
    - 57
    - 58        self.url = url
    + 56    def run(self):
    + 57        """Runs the thread and initializes the SQLite object."""
    + 58        self.sqlLite_obj = self.get_sqlLite_obj()
      59
    - 60    def run(self):
    - 61        """ Runs the thread and initializes the SQLite object.
    - 62        """
    - 63        self.sqlLite_obj = self.get_sqlLite_obj()
    - 64
    - 65    def get_sqlLite_obj(self):
    - 66        """ Returns the SQLite object.
    - 67
    - 68        Returns
    - 69        -------
    - 70        EI_LowRes_SQLite
    - 71            The SQLite object for storing the compound data.
    - 72        """
    - 73        compound_data_dict_list = self.get_compound_data_dict_list()
    - 74
    - 75        sqlLite_obj = EI_LowRes_SQLite(url=self.url)
    - 76
    - 77        for data_dict in compound_data_dict_list:
    - 78            if not data_dict.get('NUM PEAKS'):
    - 79                data_dict['NUM PEAKS'] = len(data_dict.get('mz'))
    - 80            if not data_dict.get('CASNO'):
    - 81                data_dict['CASNO'] = data_dict.get('CAS')
    - 82                if not data_dict['CASNO']:
    - 83                    data_dict['CASNO'] = 0
    - 84            #print(data_dict)
    - 85            try:
    - 86                sqlLite_obj.add_compound(data_dict)
    - 87            except:
    - 88                print(data_dict.get('NAME'))
    + 60    def get_sqlLite_obj(self):
    + 61        """Returns the SQLite object.
    + 62
    + 63        Returns
    + 64        -------
    + 65        EI_LowRes_SQLite
    + 66            The SQLite object for storing the compound data.
    + 67        """
    + 68        compound_data_dict_list = self.get_compound_data_dict_list()
    + 69
    + 70        sqlLite_obj = EI_LowRes_SQLite(url=self.url)
    + 71
    + 72        for data_dict in compound_data_dict_list:
    + 73            if not data_dict.get("NUM PEAKS"):
    + 74                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
    + 75            if not data_dict.get("CASNO"):
    + 76                data_dict["CASNO"] = data_dict.get("CAS")
    + 77                if not data_dict["CASNO"]:
    + 78                    data_dict["CASNO"] = 0
    + 79            # print(data_dict)
    + 80            try:
    + 81                sqlLite_obj.add_compound(data_dict)
    + 82            except:
    + 83                print(data_dict.get("NAME"))
    + 84
    + 85        return sqlLite_obj
    + 86
    + 87    def get_compound_data_dict_list(self):
    + 88        """Parses the NIST MSI file and returns a list of compound data dictionaries.
      89
    - 90        return sqlLite_obj
    - 91
    - 92    def get_compound_data_dict_list(self):
    - 93        """ Parses the NIST MSI file and returns a list of compound data dictionaries.
    - 94
    - 95        Returns
    - 96        -------
    - 97        list
    - 98            A list of compound data dictionaries.
    - 99        """
    -100        list_dict_data = []
    + 90        Returns
    + 91        -------
    + 92        list
    + 93            A list of compound data dictionaries.
    + 94        """
    + 95        list_dict_data = []
    + 96
    + 97        with open(self.file_path) as msifile:
    + 98            content = msifile.readlines()
    + 99
    +100            i = 0
     101
    -102        with open(self.file_path) as msifile:
    -103
    -104            content = msifile.readlines()
    -105
    -106            i = 0
    +102            dict_data = dict()
    +103            dict_data["mz"] = list()
    +104            dict_data["abundance"] = list()
    +105            # for line in content:
    +106            #   print(line, line=="\n" )
     107
    -108            dict_data = dict()
    -109            dict_data['mz'] = list()
    -110            dict_data['abundance'] = list()
    -111            #for line in content:
    -112            #   print(line, line=="\n" )
    -113            
    -114            while i < len(content):
    -115
    -116                split_line = content[i].split(":")
    -117
    -118                #empty line 
    -119                if len(content[i]) == 1:
    -120
    -121                    i += 1
    -122                    if dict_data.get('NAME'):
    -123                        list_dict_data.append(dict_data)
    -124
    -125                    #print(dict_data)
    -126                    dict_data = dict()
    -127                    dict_data['mz'] = list()
    -128                    dict_data['abundance'] = list()
    -129
    -130                #metadata, name, ri, rt etc 
    -131                elif len(split_line) >= 2:
    -132
    -133                    label = split_line[0]
    -134                    value = ':'.join(split_line[1:]).strip('\n').strip('')
    -135                    dict_data[label] = value
    -136                    i += 1
    -137
    -138                #mz and abundance pairs
    -139                elif len(split_line) == 1:
    +108            while i < len(content):
    +109                split_line = content[i].split(":")
    +110
    +111                # empty line
    +112                if len(content[i]) == 1:
    +113                    i += 1
    +114                    if dict_data.get("NAME"):
    +115                        list_dict_data.append(dict_data)
    +116
    +117                    # print(dict_data)
    +118                    dict_data = dict()
    +119                    dict_data["mz"] = list()
    +120                    dict_data["abundance"] = list()
    +121
    +122                # metadata, name, ri, rt etc
    +123                elif len(split_line) >= 2:
    +124                    label = split_line[0]
    +125                    value = ":".join(split_line[1:]).strip("\n").strip("")
    +126                    dict_data[label] = value
    +127                    i += 1
    +128
    +129                # mz and abundance pairs
    +130                elif len(split_line) == 1:
    +131                    for s in content[i].strip("\n").strip("").split("(")[1:]:
    +132                        values = s.split(" ")
    +133
    +134                        if values[0] == "":
    +135                            mz = values[1]
    +136                        else:
    +137                            mz = values[0]
    +138
    +139                        abun = values[-2].strip(")")
     140
    -141                    for s in content[i].strip('\n').strip('').split("(")[1:]:
    -142
    -143                        values = s.split(" ")
    -144
    -145                        if values[0] == '':
    -146                            mz = values[1]
    -147                        else:
    -148                            mz = values[0]
    -149
    -150                        abun = values[-2].strip(')')
    -151
    -152                        dict_data['mz'].append(mz)
    -153                        dict_data['abundance'].append(abun)
    -154
    -155                    i += 1
    -156                #something else    
    -157                else:
    -158
    -159                    i += 1
    -160
    -161        return list_dict_data
    +141                        dict_data["mz"].append(mz)
    +142                        dict_data["abundance"].append(abun)
    +143
    +144                    i += 1
    +145                # something else
    +146                else:
    +147                    i += 1
    +148
    +149        return list_dict_data
     
    @@ -452,18 +431,16 @@
    Methods
    -
    47    def __init__(self, file_path, url='sqlite://'):
    +            
    45    def __init__(self, file_path, url="sqlite://"):
    +46        Thread.__init__(self)
    +47        file_path = Path(file_path)
     48
    -49        Thread.__init__(self)
    -50        file_path = Path(file_path)
    +49        if not file_path.exists():
    +50            raise FileExistsError("File does not exist: " + file_path)
     51
    -52        if not file_path.exists():
    +52        self.file_path = file_path
     53
    -54            raise FileExistsError("File does not exist: " + file_path)
    -55
    -56        self.file_path = file_path
    -57
    -58        self.url = url
    +54        self.url = url
     
    @@ -523,10 +500,9 @@
    Methods
    -
    60    def run(self):
    -61        """ Runs the thread and initializes the SQLite object.
    -62        """
    -63        self.sqlLite_obj = self.get_sqlLite_obj()
    +            
    56    def run(self):
    +57        """Runs the thread and initializes the SQLite object."""
    +58        self.sqlLite_obj = self.get_sqlLite_obj()
     
    @@ -546,32 +522,32 @@
    Methods
    -
    65    def get_sqlLite_obj(self):
    -66        """ Returns the SQLite object.
    -67
    -68        Returns
    -69        -------
    -70        EI_LowRes_SQLite
    -71            The SQLite object for storing the compound data.
    -72        """
    -73        compound_data_dict_list = self.get_compound_data_dict_list()
    -74
    -75        sqlLite_obj = EI_LowRes_SQLite(url=self.url)
    -76
    -77        for data_dict in compound_data_dict_list:
    -78            if not data_dict.get('NUM PEAKS'):
    -79                data_dict['NUM PEAKS'] = len(data_dict.get('mz'))
    -80            if not data_dict.get('CASNO'):
    -81                data_dict['CASNO'] = data_dict.get('CAS')
    -82                if not data_dict['CASNO']:
    -83                    data_dict['CASNO'] = 0
    -84            #print(data_dict)
    -85            try:
    -86                sqlLite_obj.add_compound(data_dict)
    -87            except:
    -88                print(data_dict.get('NAME'))
    -89
    -90        return sqlLite_obj
    +            
    60    def get_sqlLite_obj(self):
    +61        """Returns the SQLite object.
    +62
    +63        Returns
    +64        -------
    +65        EI_LowRes_SQLite
    +66            The SQLite object for storing the compound data.
    +67        """
    +68        compound_data_dict_list = self.get_compound_data_dict_list()
    +69
    +70        sqlLite_obj = EI_LowRes_SQLite(url=self.url)
    +71
    +72        for data_dict in compound_data_dict_list:
    +73            if not data_dict.get("NUM PEAKS"):
    +74                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
    +75            if not data_dict.get("CASNO"):
    +76                data_dict["CASNO"] = data_dict.get("CAS")
    +77                if not data_dict["CASNO"]:
    +78                    data_dict["CASNO"] = 0
    +79            # print(data_dict)
    +80            try:
    +81                sqlLite_obj.add_compound(data_dict)
    +82            except:
    +83                print(data_dict.get("NAME"))
    +84
    +85        return sqlLite_obj
     
    @@ -597,76 +573,69 @@
    Returns
    -
     92    def get_compound_data_dict_list(self):
    - 93        """ Parses the NIST MSI file and returns a list of compound data dictionaries.
    - 94
    - 95        Returns
    - 96        -------
    - 97        list
    - 98            A list of compound data dictionaries.
    - 99        """
    -100        list_dict_data = []
    +            
     87    def get_compound_data_dict_list(self):
    + 88        """Parses the NIST MSI file and returns a list of compound data dictionaries.
    + 89
    + 90        Returns
    + 91        -------
    + 92        list
    + 93            A list of compound data dictionaries.
    + 94        """
    + 95        list_dict_data = []
    + 96
    + 97        with open(self.file_path) as msifile:
    + 98            content = msifile.readlines()
    + 99
    +100            i = 0
     101
    -102        with open(self.file_path) as msifile:
    -103
    -104            content = msifile.readlines()
    -105
    -106            i = 0
    +102            dict_data = dict()
    +103            dict_data["mz"] = list()
    +104            dict_data["abundance"] = list()
    +105            # for line in content:
    +106            #   print(line, line=="\n" )
     107
    -108            dict_data = dict()
    -109            dict_data['mz'] = list()
    -110            dict_data['abundance'] = list()
    -111            #for line in content:
    -112            #   print(line, line=="\n" )
    -113            
    -114            while i < len(content):
    -115
    -116                split_line = content[i].split(":")
    -117
    -118                #empty line 
    -119                if len(content[i]) == 1:
    -120
    -121                    i += 1
    -122                    if dict_data.get('NAME'):
    -123                        list_dict_data.append(dict_data)
    -124
    -125                    #print(dict_data)
    -126                    dict_data = dict()
    -127                    dict_data['mz'] = list()
    -128                    dict_data['abundance'] = list()
    -129
    -130                #metadata, name, ri, rt etc 
    -131                elif len(split_line) >= 2:
    -132
    -133                    label = split_line[0]
    -134                    value = ':'.join(split_line[1:]).strip('\n').strip('')
    -135                    dict_data[label] = value
    -136                    i += 1
    -137
    -138                #mz and abundance pairs
    -139                elif len(split_line) == 1:
    +108            while i < len(content):
    +109                split_line = content[i].split(":")
    +110
    +111                # empty line
    +112                if len(content[i]) == 1:
    +113                    i += 1
    +114                    if dict_data.get("NAME"):
    +115                        list_dict_data.append(dict_data)
    +116
    +117                    # print(dict_data)
    +118                    dict_data = dict()
    +119                    dict_data["mz"] = list()
    +120                    dict_data["abundance"] = list()
    +121
    +122                # metadata, name, ri, rt etc
    +123                elif len(split_line) >= 2:
    +124                    label = split_line[0]
    +125                    value = ":".join(split_line[1:]).strip("\n").strip("")
    +126                    dict_data[label] = value
    +127                    i += 1
    +128
    +129                # mz and abundance pairs
    +130                elif len(split_line) == 1:
    +131                    for s in content[i].strip("\n").strip("").split("(")[1:]:
    +132                        values = s.split(" ")
    +133
    +134                        if values[0] == "":
    +135                            mz = values[1]
    +136                        else:
    +137                            mz = values[0]
    +138
    +139                        abun = values[-2].strip(")")
     140
    -141                    for s in content[i].strip('\n').strip('').split("(")[1:]:
    -142
    -143                        values = s.split(" ")
    -144
    -145                        if values[0] == '':
    -146                            mz = values[1]
    -147                        else:
    -148                            mz = values[0]
    -149
    -150                        abun = values[-2].strip(')')
    -151
    -152                        dict_data['mz'].append(mz)
    -153                        dict_data['abundance'].append(abun)
    -154
    -155                    i += 1
    -156                #something else    
    -157                else:
    -158
    -159                    i += 1
    -160
    -161        return list_dict_data
    +141                        dict_data["mz"].append(mz)
    +142                        dict_data["abundance"].append(abun)
    +143
    +144                    i += 1
    +145                # something else
    +146                else:
    +147                    i += 1
    +148
    +149        return list_dict_data
     
    diff --git a/docs/corems/molecular_id/search/compoundSearch.html b/docs/corems/molecular_id/search/compoundSearch.html index 793b587e..52919722 100644 --- a/docs/corems/molecular_id/search/compoundSearch.html +++ b/docs/corems/molecular_id/search/compoundSearch.html @@ -72,17 +72,17 @@

    -
      1from threading import Thread
    -  2from pathlib import Path
    +                        
      1from math import exp
    +  2from threading import Thread
       3
    -  4from pandas import DataFrame
    -  5from math import exp
    -  6from numpy import power
    +  4from numpy import power
    +  5
    +  6from corems.molecular_id.calc.SpectralSimilarity import SpectralSimilarity
       7from corems.molecular_id.factory.EI_SQL import EI_LowRes_SQLite
    -  8from corems.molecular_id.calc.SpectralSimilarity import SpectralSimilarity
    +  8
       9
      10class LowResMassSpectralMatch(Thread):
    - 11    """ A class representing a low-resolution mass spectral match.
    + 11    """A class representing a low-resolution mass spectral match.
      12
      13    Parameters
      14    -----------
    @@ -112,144 +112,181 @@ 

    38 """ 39 40 def __init__(self, gcms_obj, sql_obj=None, calibration=False): - 41 - 42 Thread.__init__(self) - 43 - 44 self.gcms_obj = gcms_obj - 45 - 46 # initiated at create_molecular_database() - 47 #self.dict_molecular_lookup_table = None - 48 self.calibration = calibration - 49 # reading local file for now, - 50 if not sql_obj: - 51 self.sql_obj = EI_LowRes_SQLite(url=self.gcms_obj.molecular_search_settings.url_database) - 52 else: - 53 self.sql_obj = sql_obj - 54 - 55 def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi): - 56 """ - 57 Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object. - 58 - 59 Parameters - 60 ----------- - 61 gc_peak : object - 62 The GC peak object. - 63 ref_obj : object - 64 The reference object. - 65 spectral_simi : object - 66 The spectral similarity object. - 67 - 68 Returns - 69 -------- - 70 tuple - 71 A tuple containing the spectral similarity scores, RI score, and similarity score. - 72 - 73 """ - 74 spectral_similarity_scores = {} - 75 spectral_similarity_scores["cosine_correlation"] = spectral_simi.cosine_correlation() - 76 - 77 if self.gcms_obj.molecular_search_settings.exploratory_mode: - 78 - 79 spectral_similarity_scores["weighted_cosine_correlation"] = spectral_simi.weighted_cosine_correlation() - 80 ss, ss_nist = spectral_simi.stein_scott() - 81 spectral_similarity_scores["stein_scott_similarity"] = ss - 82 spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist - 83 - 84 spectral_similarity_scores["pearson_correlation"] = spectral_simi.pearson_correlation() - 85 spectral_similarity_scores["spearman_correlation"] = spectral_simi.spearman_correlation() - 86 spectral_similarity_scores["kendall_tau_correlation"] = spectral_simi.kendall_tau() - 87 spectral_similarity_scores["euclidean_distance"] = spectral_simi.euclidean_distance() - 88 spectral_similarity_scores["manhattan_distance"] = spectral_simi.manhattan_distance() - 89 spectral_similarity_scores["jaccard_distance"] = spectral_simi.jaccard_distance() - 90 spectral_similarity_scores["dft_correlation"] = spectral_simi.dft_correlation() - 91 spectral_similarity_scores["dwt_correlation"] = spectral_simi.dwt_correlation() - 92 spectral_similarity_scores.update(spectral_simi.extra_distances()) - 93 # print(spectral_similarity_scores) - 94 # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window) - 95 - 96 ri_score = exp(-1 * (power((gc_peak.ri - ref_obj.get('ri')), 2) / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2)))) - 97 - 98 similarity_score = ((spectral_similarity_scores.get("cosine_correlation")**2) * (ri_score))**(1 / 3) - 99 -100 return spectral_similarity_scores, ri_score, similarity_score -101 -102# @timeit -103 def run(self): -104 """ Runs the low-resolution mass spectral match. -105 -106 """ -107 # TODO select the best gcms peak -108 import tqdm -109 -110 original_use_deconvolution = self.gcms_obj.chromatogram_settings.use_deconvolution -111 -112 if not self.gcms_obj: -113 -114 # Do not use deconvolution for the retention index calibration + 41 Thread.__init__(self) + 42 + 43 self.gcms_obj = gcms_obj + 44 + 45 # initiated at create_molecular_database() + 46 # self.dict_molecular_lookup_table = None + 47 self.calibration = calibration + 48 # reading local file for now, + 49 if not sql_obj: + 50 self.sql_obj = EI_LowRes_SQLite( + 51 url=self.gcms_obj.molecular_search_settings.url_database + 52 ) + 53 else: + 54 self.sql_obj = sql_obj + 55 + 56 def metabolite_detector_score(self, gc_peak, ref_obj, spectral_simi): + 57 """ + 58 Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object. + 59 + 60 Parameters + 61 ----------- + 62 gc_peak : object + 63 The GC peak object. + 64 ref_obj : object + 65 The reference object. + 66 spectral_simi : object + 67 The spectral similarity object. + 68 + 69 Returns + 70 -------- + 71 tuple + 72 A tuple containing the spectral similarity scores, RI score, and similarity score. + 73 + 74 """ + 75 spectral_similarity_scores = {} + 76 spectral_similarity_scores["cosine_correlation"] = ( + 77 spectral_simi.cosine_correlation() + 78 ) + 79 + 80 if self.gcms_obj.molecular_search_settings.exploratory_mode: + 81 spectral_similarity_scores["weighted_cosine_correlation"] = ( + 82 spectral_simi.weighted_cosine_correlation() + 83 ) + 84 ss, ss_nist = spectral_simi.stein_scott() + 85 spectral_similarity_scores["stein_scott_similarity"] = ss + 86 spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist + 87 + 88 spectral_similarity_scores["pearson_correlation"] = ( + 89 spectral_simi.pearson_correlation() + 90 ) + 91 spectral_similarity_scores["spearman_correlation"] = ( + 92 spectral_simi.spearman_correlation() + 93 ) + 94 spectral_similarity_scores["kendall_tau_correlation"] = ( + 95 spectral_simi.kendall_tau() + 96 ) + 97 spectral_similarity_scores["euclidean_distance"] = ( + 98 spectral_simi.euclidean_distance() + 99 ) +100 spectral_similarity_scores["manhattan_distance"] = ( +101 spectral_simi.manhattan_distance() +102 ) +103 spectral_similarity_scores["jaccard_distance"] = ( +104 spectral_simi.jaccard_distance() +105 ) +106 spectral_similarity_scores["dft_correlation"] = ( +107 spectral_simi.dft_correlation() +108 ) +109 spectral_similarity_scores["dwt_correlation"] = ( +110 spectral_simi.dwt_correlation() +111 ) +112 spectral_similarity_scores.update(spectral_simi.extra_distances()) +113 # print(spectral_similarity_scores) +114 # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window) 115 -116 if self.calibration: -117 -118 self.gcms_obj.chromatogram_settings.use_deconvolution = False -119 -120 self.gcms_obj.process_chromatogram() -121 -122 self.gcms_obj.chromatogram_settings.use_deconvolution = original_use_deconvolution +116 ri_score = exp( +117 -1 +118 * ( +119 power((gc_peak.ri - ref_obj.get("ri")), 2) +120 / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2)) +121 ) +122 ) 123 -124 for gc_peak in tqdm.tqdm(self.gcms_obj): -125 -126 if not self.calibration: -127 -128 window = self.gcms_obj.molecular_search_settings.ri_search_range +124 similarity_score = ( +125 (spectral_similarity_scores.get("cosine_correlation") ** 2) * (ri_score) +126 ) ** (1 / 3) +127 +128 return spectral_similarity_scores, ri_score, similarity_score 129 -130 ri = gc_peak.ri -131 -132 min_mat_ri = (ri-window, ri+window) -133 -134 ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri) +130 # @timeit +131 def run(self): +132 """Runs the low-resolution mass spectral match.""" +133 # TODO select the best gcms peak +134 import tqdm 135 -136 else: -137 -138 compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names +136 original_use_deconvolution = ( +137 self.gcms_obj.chromatogram_settings.use_deconvolution +138 ) 139 -140 window = self.gcms_obj.molecular_search_settings.rt_search_range -141 -142 rt = gc_peak.retention_time -143 -144 min_mat_rt = (rt-window, rt+window) +140 if not self.gcms_obj: +141 # Do not use deconvolution for the retention index calibration +142 +143 if self.calibration: +144 self.gcms_obj.chromatogram_settings.use_deconvolution = False 145 -146 ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names) +146 self.gcms_obj.process_chromatogram() 147 -148 for ref_obj in ref_objs: -149 # uses spectral similarly and uses a threshold to only select peaks with high data correlation -150 -151 spectral_simi = SpectralSimilarity(gc_peak.mass_spectrum.mz_abun_dict, ref_obj) -152 -153 if self.calibration: -154 -155 spectral_similarity_scores = {} -156 spectral_similarity_scores["cosine_correlation"] = spectral_simi.cosine_correlation() +148 self.gcms_obj.chromatogram_settings.use_deconvolution = ( +149 original_use_deconvolution +150 ) +151 +152 for gc_peak in tqdm.tqdm(self.gcms_obj): +153 if not self.calibration: +154 window = self.gcms_obj.molecular_search_settings.ri_search_range +155 +156 ri = gc_peak.ri 157 -158 #print(w_correlation_value,correlation_value ) -159 if spectral_similarity_scores["cosine_correlation"] >= self.gcms_obj.molecular_search_settings.correlation_threshold: -160 -161 gc_peak.add_compound(ref_obj, spectral_similarity_scores) -162 -163 # use score, usually a combination of Retention index and Spectral Similarity -164 # Threshold is implemented by not necessarily used -165 else: +158 min_mat_ri = (ri - window, ri + window) +159 +160 ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri) +161 +162 else: +163 compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names +164 +165 window = self.gcms_obj.molecular_search_settings.rt_search_range 166 -167 # m/q developed methods will be implemented here -168 spectral_similarity_scores, ri_score, similarity_score = self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi) -169 -170 #TODO need to add similarity score option in the parameters encapsulation class -171 -172 if similarity_score >= self.gcms_obj.molecular_search_settings.score_threshold: -173 -174 gc_peak.add_compound(ref_obj, spectral_similarity_scores, ri_score, similarity_score) +167 rt = gc_peak.retention_time +168 +169 min_mat_rt = (rt - window, rt + window) +170 +171 ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names) +172 +173 for ref_obj in ref_objs: +174 # uses spectral similarly and uses a threshold to only select peaks with high data correlation 175 -176 -177 self.sql_obj.session.close() -178 self.sql_obj.engine.dispose() +176 spectral_simi = SpectralSimilarity( +177 gc_peak.mass_spectrum.mz_abun_dict, ref_obj +178 ) +179 +180 if self.calibration: +181 spectral_similarity_scores = {} +182 spectral_similarity_scores["cosine_correlation"] = ( +183 spectral_simi.cosine_correlation() +184 ) +185 +186 # print(w_correlation_value,correlation_value ) +187 if ( +188 spectral_similarity_scores["cosine_correlation"] +189 >= self.gcms_obj.molecular_search_settings.correlation_threshold +190 ): +191 gc_peak.add_compound(ref_obj, spectral_similarity_scores) +192 +193 # use score, usually a combination of Retention index and Spectral Similarity +194 # Threshold is implemented by not necessarily used +195 else: +196 # m/q developed methods will be implemented here +197 spectral_similarity_scores, ri_score, similarity_score = ( +198 self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi) +199 ) +200 +201 # TODO need to add similarity score option in the parameters encapsulation class +202 +203 if ( +204 similarity_score +205 >= self.gcms_obj.molecular_search_settings.score_threshold +206 ): +207 gc_peak.add_compound( +208 ref_obj, +209 spectral_similarity_scores, +210 ri_score, +211 similarity_score, +212 ) +213 +214 self.sql_obj.session.close() +215 self.sql_obj.engine.dispose()

    @@ -265,48 +302,49 @@

    -
     12class LowResMassSpectralMatch(Thread):
    - 13    """ A class representing a low-resolution mass spectral match.
    - 14
    - 15    Parameters
    - 16    -----------
    - 17    gcms_obj : object
    - 18        The GC-MS object.
    - 19    sql_obj : object, optional
    - 20        The SQL object for database operations. Default is None.
    - 21    calibration : bool, optional
    - 22        Flag indicating if the match is for calibration. Default is False.
    - 23
    - 24    Attributes
    - 25    -----------
    - 26    gcms_obj : object
    - 27        The GC-MS object.
    - 28    sql_obj : object
    - 29        The SQL object for database operations.
    - 30    calibration : bool
    - 31        Flag indicating if the match is for calibration.
    - 32
    - 33    Methods
    - 34    --------
    - 35    * metabolite_detector_score(gc_peak, ref_obj, spectral_simi).
    - 36        Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
    - 37    * run().
    - 38        Runs the low-resolution mass spectral match.
    - 39
    - 40    """
    - 41
    - 42    def __init__(self, gcms_obj, sql_obj=None, calibration=False):
    - 43        
    - 44        Thread.__init__(self)
    +            
     11class LowResMassSpectralMatch(Thread):
    + 12    """A class representing a low-resolution mass spectral match.
    + 13
    + 14    Parameters
    + 15    -----------
    + 16    gcms_obj : object
    + 17        The GC-MS object.
    + 18    sql_obj : object, optional
    + 19        The SQL object for database operations. Default is None.
    + 20    calibration : bool, optional
    + 21        Flag indicating if the match is for calibration. Default is False.
    + 22
    + 23    Attributes
    + 24    -----------
    + 25    gcms_obj : object
    + 26        The GC-MS object.
    + 27    sql_obj : object
    + 28        The SQL object for database operations.
    + 29    calibration : bool
    + 30        Flag indicating if the match is for calibration.
    + 31
    + 32    Methods
    + 33    --------
    + 34    * metabolite_detector_score(gc_peak, ref_obj, spectral_simi).
    + 35        Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
    + 36    * run().
    + 37        Runs the low-resolution mass spectral match.
    + 38
    + 39    """
    + 40
    + 41    def __init__(self, gcms_obj, sql_obj=None, calibration=False):
    + 42        Thread.__init__(self)
    + 43
    + 44        self.gcms_obj = gcms_obj
      45
    - 46        self.gcms_obj = gcms_obj
    - 47
    - 48        #  initiated at create_molecular_database()
    - 49        #self.dict_molecular_lookup_table = None
    - 50        self.calibration = calibration
    - 51        # reading local file for now,
    - 52        if not sql_obj:
    - 53            self.sql_obj = EI_LowRes_SQLite(url=self.gcms_obj.molecular_search_settings.url_database)
    + 46        #  initiated at create_molecular_database()
    + 47        # self.dict_molecular_lookup_table = None
    + 48        self.calibration = calibration
    + 49        # reading local file for now,
    + 50        if not sql_obj:
    + 51            self.sql_obj = EI_LowRes_SQLite(
    + 52                url=self.gcms_obj.molecular_search_settings.url_database
    + 53            )
      54        else:
      55            self.sql_obj = sql_obj
      56
    @@ -330,110 +368,146 @@ 

    74 75 """ 76 spectral_similarity_scores = {} - 77 spectral_similarity_scores["cosine_correlation"] = spectral_simi.cosine_correlation() - 78 - 79 if self.gcms_obj.molecular_search_settings.exploratory_mode: + 77 spectral_similarity_scores["cosine_correlation"] = ( + 78 spectral_simi.cosine_correlation() + 79 ) 80 - 81 spectral_similarity_scores["weighted_cosine_correlation"] = spectral_simi.weighted_cosine_correlation() - 82 ss, ss_nist = spectral_simi.stein_scott() - 83 spectral_similarity_scores["stein_scott_similarity"] = ss - 84 spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist - 85 - 86 spectral_similarity_scores["pearson_correlation"] = spectral_simi.pearson_correlation() - 87 spectral_similarity_scores["spearman_correlation"] = spectral_simi.spearman_correlation() - 88 spectral_similarity_scores["kendall_tau_correlation"] = spectral_simi.kendall_tau() - 89 spectral_similarity_scores["euclidean_distance"] = spectral_simi.euclidean_distance() - 90 spectral_similarity_scores["manhattan_distance"] = spectral_simi.manhattan_distance() - 91 spectral_similarity_scores["jaccard_distance"] = spectral_simi.jaccard_distance() - 92 spectral_similarity_scores["dft_correlation"] = spectral_simi.dft_correlation() - 93 spectral_similarity_scores["dwt_correlation"] = spectral_simi.dwt_correlation() - 94 spectral_similarity_scores.update(spectral_simi.extra_distances()) - 95 # print(spectral_similarity_scores) - 96 # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window) - 97 - 98 ri_score = exp(-1 * (power((gc_peak.ri - ref_obj.get('ri')), 2) / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2)))) - 99 -100 similarity_score = ((spectral_similarity_scores.get("cosine_correlation")**2) * (ri_score))**(1 / 3) -101 -102 return spectral_similarity_scores, ri_score, similarity_score -103 -104# @timeit -105 def run(self): -106 """ Runs the low-resolution mass spectral match. -107 -108 """ -109 # TODO select the best gcms peak -110 import tqdm -111 -112 original_use_deconvolution = self.gcms_obj.chromatogram_settings.use_deconvolution -113 -114 if not self.gcms_obj: -115 -116 # Do not use deconvolution for the retention index calibration -117 -118 if self.calibration: -119 -120 self.gcms_obj.chromatogram_settings.use_deconvolution = False -121 -122 self.gcms_obj.process_chromatogram() -123 -124 self.gcms_obj.chromatogram_settings.use_deconvolution = original_use_deconvolution -125 -126 for gc_peak in tqdm.tqdm(self.gcms_obj): -127 -128 if not self.calibration: -129 -130 window = self.gcms_obj.molecular_search_settings.ri_search_range -131 -132 ri = gc_peak.ri -133 -134 min_mat_ri = (ri-window, ri+window) -135 -136 ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri) -137 -138 else: -139 -140 compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names -141 -142 window = self.gcms_obj.molecular_search_settings.rt_search_range + 81 if self.gcms_obj.molecular_search_settings.exploratory_mode: + 82 spectral_similarity_scores["weighted_cosine_correlation"] = ( + 83 spectral_simi.weighted_cosine_correlation() + 84 ) + 85 ss, ss_nist = spectral_simi.stein_scott() + 86 spectral_similarity_scores["stein_scott_similarity"] = ss + 87 spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist + 88 + 89 spectral_similarity_scores["pearson_correlation"] = ( + 90 spectral_simi.pearson_correlation() + 91 ) + 92 spectral_similarity_scores["spearman_correlation"] = ( + 93 spectral_simi.spearman_correlation() + 94 ) + 95 spectral_similarity_scores["kendall_tau_correlation"] = ( + 96 spectral_simi.kendall_tau() + 97 ) + 98 spectral_similarity_scores["euclidean_distance"] = ( + 99 spectral_simi.euclidean_distance() +100 ) +101 spectral_similarity_scores["manhattan_distance"] = ( +102 spectral_simi.manhattan_distance() +103 ) +104 spectral_similarity_scores["jaccard_distance"] = ( +105 spectral_simi.jaccard_distance() +106 ) +107 spectral_similarity_scores["dft_correlation"] = ( +108 spectral_simi.dft_correlation() +109 ) +110 spectral_similarity_scores["dwt_correlation"] = ( +111 spectral_simi.dwt_correlation() +112 ) +113 spectral_similarity_scores.update(spectral_simi.extra_distances()) +114 # print(spectral_similarity_scores) +115 # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window) +116 +117 ri_score = exp( +118 -1 +119 * ( +120 power((gc_peak.ri - ref_obj.get("ri")), 2) +121 / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2)) +122 ) +123 ) +124 +125 similarity_score = ( +126 (spectral_similarity_scores.get("cosine_correlation") ** 2) * (ri_score) +127 ) ** (1 / 3) +128 +129 return spectral_similarity_scores, ri_score, similarity_score +130 +131 # @timeit +132 def run(self): +133 """Runs the low-resolution mass spectral match.""" +134 # TODO select the best gcms peak +135 import tqdm +136 +137 original_use_deconvolution = ( +138 self.gcms_obj.chromatogram_settings.use_deconvolution +139 ) +140 +141 if not self.gcms_obj: +142 # Do not use deconvolution for the retention index calibration 143 -144 rt = gc_peak.retention_time -145 -146 min_mat_rt = (rt-window, rt+window) -147 -148 ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names) -149 -150 for ref_obj in ref_objs: -151 # uses spectral similarly and uses a threshold to only select peaks with high data correlation -152 -153 spectral_simi = SpectralSimilarity(gc_peak.mass_spectrum.mz_abun_dict, ref_obj) -154 -155 if self.calibration: +144 if self.calibration: +145 self.gcms_obj.chromatogram_settings.use_deconvolution = False +146 +147 self.gcms_obj.process_chromatogram() +148 +149 self.gcms_obj.chromatogram_settings.use_deconvolution = ( +150 original_use_deconvolution +151 ) +152 +153 for gc_peak in tqdm.tqdm(self.gcms_obj): +154 if not self.calibration: +155 window = self.gcms_obj.molecular_search_settings.ri_search_range 156 -157 spectral_similarity_scores = {} -158 spectral_similarity_scores["cosine_correlation"] = spectral_simi.cosine_correlation() -159 -160 #print(w_correlation_value,correlation_value ) -161 if spectral_similarity_scores["cosine_correlation"] >= self.gcms_obj.molecular_search_settings.correlation_threshold: +157 ri = gc_peak.ri +158 +159 min_mat_ri = (ri - window, ri + window) +160 +161 ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri) 162 -163 gc_peak.add_compound(ref_obj, spectral_similarity_scores) -164 -165 # use score, usually a combination of Retention index and Spectral Similarity -166 # Threshold is implemented by not necessarily used -167 else: -168 -169 # m/q developed methods will be implemented here -170 spectral_similarity_scores, ri_score, similarity_score = self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi) +163 else: +164 compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names +165 +166 window = self.gcms_obj.molecular_search_settings.rt_search_range +167 +168 rt = gc_peak.retention_time +169 +170 min_mat_rt = (rt - window, rt + window) 171 -172 #TODO need to add similarity score option in the parameters encapsulation class +172 ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names) 173 -174 if similarity_score >= self.gcms_obj.molecular_search_settings.score_threshold: -175 -176 gc_peak.add_compound(ref_obj, spectral_similarity_scores, ri_score, similarity_score) -177 -178 -179 self.sql_obj.session.close() -180 self.sql_obj.engine.dispose() +174 for ref_obj in ref_objs: +175 # uses spectral similarly and uses a threshold to only select peaks with high data correlation +176 +177 spectral_simi = SpectralSimilarity( +178 gc_peak.mass_spectrum.mz_abun_dict, ref_obj +179 ) +180 +181 if self.calibration: +182 spectral_similarity_scores = {} +183 spectral_similarity_scores["cosine_correlation"] = ( +184 spectral_simi.cosine_correlation() +185 ) +186 +187 # print(w_correlation_value,correlation_value ) +188 if ( +189 spectral_similarity_scores["cosine_correlation"] +190 >= self.gcms_obj.molecular_search_settings.correlation_threshold +191 ): +192 gc_peak.add_compound(ref_obj, spectral_similarity_scores) +193 +194 # use score, usually a combination of Retention index and Spectral Similarity +195 # Threshold is implemented by not necessarily used +196 else: +197 # m/q developed methods will be implemented here +198 spectral_similarity_scores, ri_score, similarity_score = ( +199 self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi) +200 ) +201 +202 # TODO need to add similarity score option in the parameters encapsulation class +203 +204 if ( +205 similarity_score +206 >= self.gcms_obj.molecular_search_settings.score_threshold +207 ): +208 gc_peak.add_compound( +209 ref_obj, +210 spectral_similarity_scores, +211 ri_score, +212 similarity_score, +213 ) +214 +215 self.sql_obj.session.close() +216 self.sql_obj.engine.dispose()

    @@ -482,18 +556,19 @@
    Methods
    -
    42    def __init__(self, gcms_obj, sql_obj=None, calibration=False):
    -43        
    -44        Thread.__init__(self)
    +            
    41    def __init__(self, gcms_obj, sql_obj=None, calibration=False):
    +42        Thread.__init__(self)
    +43
    +44        self.gcms_obj = gcms_obj
     45
    -46        self.gcms_obj = gcms_obj
    -47
    -48        #  initiated at create_molecular_database()
    -49        #self.dict_molecular_lookup_table = None
    -50        self.calibration = calibration
    -51        # reading local file for now,
    -52        if not sql_obj:
    -53            self.sql_obj = EI_LowRes_SQLite(url=self.gcms_obj.molecular_search_settings.url_database)
    +46        #  initiated at create_molecular_database()
    +47        # self.dict_molecular_lookup_table = None
    +48        self.calibration = calibration
    +49        # reading local file for now,
    +50        if not sql_obj:
    +51            self.sql_obj = EI_LowRes_SQLite(
    +52                url=self.gcms_obj.molecular_search_settings.url_database
    +53            )
     54        else:
     55            self.sql_obj = sql_obj
     
    @@ -575,32 +650,59 @@
    Methods
    74 75 """ 76 spectral_similarity_scores = {} - 77 spectral_similarity_scores["cosine_correlation"] = spectral_simi.cosine_correlation() - 78 - 79 if self.gcms_obj.molecular_search_settings.exploratory_mode: + 77 spectral_similarity_scores["cosine_correlation"] = ( + 78 spectral_simi.cosine_correlation() + 79 ) 80 - 81 spectral_similarity_scores["weighted_cosine_correlation"] = spectral_simi.weighted_cosine_correlation() - 82 ss, ss_nist = spectral_simi.stein_scott() - 83 spectral_similarity_scores["stein_scott_similarity"] = ss - 84 spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist - 85 - 86 spectral_similarity_scores["pearson_correlation"] = spectral_simi.pearson_correlation() - 87 spectral_similarity_scores["spearman_correlation"] = spectral_simi.spearman_correlation() - 88 spectral_similarity_scores["kendall_tau_correlation"] = spectral_simi.kendall_tau() - 89 spectral_similarity_scores["euclidean_distance"] = spectral_simi.euclidean_distance() - 90 spectral_similarity_scores["manhattan_distance"] = spectral_simi.manhattan_distance() - 91 spectral_similarity_scores["jaccard_distance"] = spectral_simi.jaccard_distance() - 92 spectral_similarity_scores["dft_correlation"] = spectral_simi.dft_correlation() - 93 spectral_similarity_scores["dwt_correlation"] = spectral_simi.dwt_correlation() - 94 spectral_similarity_scores.update(spectral_simi.extra_distances()) - 95 # print(spectral_similarity_scores) - 96 # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window) - 97 - 98 ri_score = exp(-1 * (power((gc_peak.ri - ref_obj.get('ri')), 2) / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2)))) - 99 -100 similarity_score = ((spectral_similarity_scores.get("cosine_correlation")**2) * (ri_score))**(1 / 3) -101 -102 return spectral_similarity_scores, ri_score, similarity_score + 81 if self.gcms_obj.molecular_search_settings.exploratory_mode: + 82 spectral_similarity_scores["weighted_cosine_correlation"] = ( + 83 spectral_simi.weighted_cosine_correlation() + 84 ) + 85 ss, ss_nist = spectral_simi.stein_scott() + 86 spectral_similarity_scores["stein_scott_similarity"] = ss + 87 spectral_similarity_scores["stein_scott_similarity_nist"] = ss_nist + 88 + 89 spectral_similarity_scores["pearson_correlation"] = ( + 90 spectral_simi.pearson_correlation() + 91 ) + 92 spectral_similarity_scores["spearman_correlation"] = ( + 93 spectral_simi.spearman_correlation() + 94 ) + 95 spectral_similarity_scores["kendall_tau_correlation"] = ( + 96 spectral_simi.kendall_tau() + 97 ) + 98 spectral_similarity_scores["euclidean_distance"] = ( + 99 spectral_simi.euclidean_distance() +100 ) +101 spectral_similarity_scores["manhattan_distance"] = ( +102 spectral_simi.manhattan_distance() +103 ) +104 spectral_similarity_scores["jaccard_distance"] = ( +105 spectral_simi.jaccard_distance() +106 ) +107 spectral_similarity_scores["dft_correlation"] = ( +108 spectral_simi.dft_correlation() +109 ) +110 spectral_similarity_scores["dwt_correlation"] = ( +111 spectral_simi.dwt_correlation() +112 ) +113 spectral_similarity_scores.update(spectral_simi.extra_distances()) +114 # print(spectral_similarity_scores) +115 # print(ref_obj.get('ri'), gc_peak.ri, self.gcms_obj.molecular_search_settings.ri_window) +116 +117 ri_score = exp( +118 -1 +119 * ( +120 power((gc_peak.ri - ref_obj.get("ri")), 2) +121 / (2 * power(self.gcms_obj.molecular_search_settings.ri_std, 2)) +122 ) +123 ) +124 +125 similarity_score = ( +126 (spectral_similarity_scores.get("cosine_correlation") ** 2) * (ri_score) +127 ) ** (1 / 3) +128 +129 return spectral_similarity_scores, ri_score, similarity_score
    @@ -637,82 +739,91 @@

    Returns
    -
    105    def run(self):
    -106        """ Runs the low-resolution mass spectral match.
    -107        
    -108        """
    -109        # TODO select the best gcms peak
    -110        import tqdm
    -111
    -112        original_use_deconvolution = self.gcms_obj.chromatogram_settings.use_deconvolution
    -113
    -114        if not self.gcms_obj:
    -115
    -116            # Do not use deconvolution for the retention index calibration
    -117
    -118            if self.calibration:
    -119
    -120                self.gcms_obj.chromatogram_settings.use_deconvolution = False
    -121
    -122            self.gcms_obj.process_chromatogram()
    -123
    -124        self.gcms_obj.chromatogram_settings.use_deconvolution = original_use_deconvolution
    -125
    -126        for gc_peak in tqdm.tqdm(self.gcms_obj):
    -127
    -128            if not self.calibration:
    -129                
    -130                window = self.gcms_obj.molecular_search_settings.ri_search_range
    -131
    -132                ri = gc_peak.ri
    -133
    -134                min_mat_ri = (ri-window, ri+window)
    -135
    -136                ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri)
    -137
    -138            else:
    -139                
    -140                compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names
    -141
    -142                window = self.gcms_obj.molecular_search_settings.rt_search_range
    +            
    132    def run(self):
    +133        """Runs the low-resolution mass spectral match."""
    +134        # TODO select the best gcms peak
    +135        import tqdm
    +136
    +137        original_use_deconvolution = (
    +138            self.gcms_obj.chromatogram_settings.use_deconvolution
    +139        )
    +140
    +141        if not self.gcms_obj:
    +142            # Do not use deconvolution for the retention index calibration
     143
    -144                rt = gc_peak.retention_time
    -145
    -146                min_mat_rt = (rt-window, rt+window)
    -147
    -148                ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names)
    -149
    -150            for ref_obj in ref_objs:
    -151            # uses spectral similarly and uses a threshold to only select peaks with high data correlation
    -152                
    -153                spectral_simi = SpectralSimilarity(gc_peak.mass_spectrum.mz_abun_dict, ref_obj)
    -154
    -155                if self.calibration:
    +144            if self.calibration:
    +145                self.gcms_obj.chromatogram_settings.use_deconvolution = False
    +146
    +147            self.gcms_obj.process_chromatogram()
    +148
    +149        self.gcms_obj.chromatogram_settings.use_deconvolution = (
    +150            original_use_deconvolution
    +151        )
    +152
    +153        for gc_peak in tqdm.tqdm(self.gcms_obj):
    +154            if not self.calibration:
    +155                window = self.gcms_obj.molecular_search_settings.ri_search_range
     156
    -157                    spectral_similarity_scores = {}
    -158                    spectral_similarity_scores["cosine_correlation"] = spectral_simi.cosine_correlation()
    -159
    -160                #print(w_correlation_value,correlation_value )
    -161                    if spectral_similarity_scores["cosine_correlation"] >= self.gcms_obj.molecular_search_settings.correlation_threshold:
    +157                ri = gc_peak.ri
    +158
    +159                min_mat_ri = (ri - window, ri + window)
    +160
    +161                ref_objs = self.sql_obj.query_min_max_ri(min_mat_ri)
     162
    -163                        gc_peak.add_compound(ref_obj, spectral_similarity_scores)
    -164
    -165                # use score, usually a combination of Retention index and Spectral Similarity
    -166                # Threshold is implemented by not necessarily used
    -167                else:
    -168
    -169                    # m/q developed methods will be implemented here
    -170                    spectral_similarity_scores, ri_score, similarity_score = self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi)
    +163            else:
    +164                compound_names = self.gcms_obj.molecular_search_settings.ri_calibration_compound_names
    +165
    +166                window = self.gcms_obj.molecular_search_settings.rt_search_range
    +167
    +168                rt = gc_peak.retention_time
    +169
    +170                min_mat_rt = (rt - window, rt + window)
     171
    -172                    #TODO need to add similarity score option in the parameters encapsulation class
    +172                ref_objs = self.sql_obj.query_names_and_rt(min_mat_rt, compound_names)
     173
    -174                    if similarity_score >= self.gcms_obj.molecular_search_settings.score_threshold:
    -175
    -176                        gc_peak.add_compound(ref_obj, spectral_similarity_scores, ri_score, similarity_score)
    -177
    -178
    -179        self.sql_obj.session.close()
    -180        self.sql_obj.engine.dispose()
    +174            for ref_obj in ref_objs:
    +175                # uses spectral similarly and uses a threshold to only select peaks with high data correlation
    +176
    +177                spectral_simi = SpectralSimilarity(
    +178                    gc_peak.mass_spectrum.mz_abun_dict, ref_obj
    +179                )
    +180
    +181                if self.calibration:
    +182                    spectral_similarity_scores = {}
    +183                    spectral_similarity_scores["cosine_correlation"] = (
    +184                        spectral_simi.cosine_correlation()
    +185                    )
    +186
    +187                    # print(w_correlation_value,correlation_value )
    +188                    if (
    +189                        spectral_similarity_scores["cosine_correlation"]
    +190                        >= self.gcms_obj.molecular_search_settings.correlation_threshold
    +191                    ):
    +192                        gc_peak.add_compound(ref_obj, spectral_similarity_scores)
    +193
    +194                # use score, usually a combination of Retention index and Spectral Similarity
    +195                # Threshold is implemented by not necessarily used
    +196                else:
    +197                    # m/q developed methods will be implemented here
    +198                    spectral_similarity_scores, ri_score, similarity_score = (
    +199                        self.metabolite_detector_score(gc_peak, ref_obj, spectral_simi)
    +200                    )
    +201
    +202                    # TODO need to add similarity score option in the parameters encapsulation class
    +203
    +204                    if (
    +205                        similarity_score
    +206                        >= self.gcms_obj.molecular_search_settings.score_threshold
    +207                    ):
    +208                        gc_peak.add_compound(
    +209                            ref_obj,
    +210                            spectral_similarity_scores,
    +211                            ri_score,
    +212                            similarity_score,
    +213                        )
    +214
    +215        self.sql_obj.session.close()
    +216        self.sql_obj.engine.dispose()
     
    diff --git a/docs/corems/molecular_id/search/database_interfaces.html b/docs/corems/molecular_id/search/database_interfaces.html index 90a34b84..2a7f24d3 100644 --- a/docs/corems/molecular_id/search/database_interfaces.html +++ b/docs/corems/molecular_id/search/database_interfaces.html @@ -151,647 +151,647 @@

    11from corems.molecular_id.factory.lipid_molecular_metadata import LipidMetadata 12from corems.mass_spectra.calc.lc_calc import find_closest 13 - 14class SpectralDatabaseInterface(ABC): - 15 """ - 16 Base class that facilitates connection to spectral reference databases, - 17 such as EMSL's Metabolomics Reference Database (MetabRef). - 18 - 19 """ - 20 - 21 def __init__(self, key=None): - 22 """ - 23 Initialize instance. - 24 - 25 Parameters - 26 ---------- - 27 key : str - 28 Token key. - 29 - 30 """ - 31 - 32 self.key = key - 33 - 34 if self.key is None: - 35 raise ValueError( - 36 "Must specify environment variable key for token associatedwith this database interface." - 37 ) - 38 - 39 def set_token(self, path): - 40 """ - 41 Set environment variable for MetabRef database token. - 42 - 43 Parameters - 44 ---------- - 45 path : str - 46 Path to token. - 47 - 48 """ - 49 - 50 # Read token from file - 51 with open(path, "r", encoding="utf-8") as f: - 52 token = f.readline().strip() - 53 - 54 # Set environment variable - 55 os.environ[self.key] = token - 56 - 57 def get_token(self): - 58 """ - 59 Get environment variable for database token. - 60 - 61 Returns - 62 ------- - 63 str - 64 Token string. - 65 - 66 """ - 67 - 68 # Check for token - 69 if self.key not in os.environ: - 70 raise ValueError("Must set {} environment variable.".format(self.key)) - 71 - 72 # Get token from environment variables - 73 return os.environ.get(self.key) - 74 - 75 def get_header(self): - 76 """ - 77 Access stored database token and prepare as header. - 78 - 79 Returns - 80 ------- - 81 str - 82 Header string. - 83 - 84 """ - 85 - 86 # Get token - 87 token = self.get_token() - 88 - 89 # Pad header information - 90 header = {"Authorization": f"Bearer {token}", "Content-Type": "text/plain"} - 91 - 92 return header - 93 - 94 def get_query(self, url): - 95 """ - 96 Request payload from URL according to `get` protocol. - 97 - 98 Parameters - 99 ---------- -100 url : str -101 URL for request. -102 -103 Returns -104 ------- -105 dict -106 Response as JSON. -107 -108 """ -109 -110 # Query URL via `get` -111 response = requests.get(url, headers=self.get_header()) -112 -113 # Check response -114 response.raise_for_status() -115 -116 # Return as JSON -117 return response.json() -118 -119 def post_query(self, url, variable, values, tolerance): -120 """ -121 Request payload from URL according to `post` protocol. -122 -123 Parameters -124 ---------- -125 url : str -126 URL for request. -127 variable : str -128 Variable to query. -129 values : str -130 Specific values of `variable` to query. -131 tolerance : str -132 Query tolerance relative to `values`. -133 -134 Returns -135 ------- -136 dict -137 Response as JSON. -138 -139 """ -140 -141 # Coerce to string -142 if not isinstance(variable, str): -143 variable = str(variable).replace(" ", "") -144 -145 if not isinstance(values, str): -146 values = str(values).replace(" ", "") -147 -148 if not isinstance(tolerance, str): -149 tolerance = str(tolerance).replace(" ", "") -150 -151 # Query URL via `post` -152 response = requests.post( -153 os.path.join(url, variable, tolerance), -154 data=values, -155 headers=self.get_header(), -156 ) -157 -158 # Check response -159 response.raise_for_status() -160 -161 # Return as JSON -162 return response.json() -163 + 14 + 15class SpectralDatabaseInterface(ABC): + 16 """ + 17 Base class that facilitates connection to spectral reference databases, + 18 such as EMSL's Metabolomics Reference Database (MetabRef). + 19 + 20 """ + 21 + 22 def __init__(self, key=None): + 23 """ + 24 Initialize instance. + 25 + 26 Parameters + 27 ---------- + 28 key : str + 29 Token key. + 30 + 31 """ + 32 + 33 self.key = key + 34 + 35 if self.key is None: + 36 raise ValueError( + 37 "Must specify environment variable key for token associatedwith this database interface." + 38 ) + 39 + 40 def set_token(self, path): + 41 """ + 42 Set environment variable for MetabRef database token. + 43 + 44 Parameters + 45 ---------- + 46 path : str + 47 Path to token. + 48 + 49 """ + 50 + 51 # Read token from file + 52 with open(path, "r", encoding="utf-8") as f: + 53 token = f.readline().strip() + 54 + 55 # Set environment variable + 56 os.environ[self.key] = token + 57 + 58 def get_token(self): + 59 """ + 60 Get environment variable for database token. + 61 + 62 Returns + 63 ------- + 64 str + 65 Token string. + 66 + 67 """ + 68 + 69 # Check for token + 70 if self.key not in os.environ: + 71 raise ValueError("Must set {} environment variable.".format(self.key)) + 72 + 73 # Get token from environment variables + 74 return os.environ.get(self.key) + 75 + 76 def get_header(self): + 77 """ + 78 Access stored database token and prepare as header. + 79 + 80 Returns + 81 ------- + 82 str + 83 Header string. + 84 + 85 """ + 86 + 87 # Get token + 88 token = self.get_token() + 89 + 90 # Pad header information + 91 header = {"Authorization": f"Bearer {token}", "Content-Type": "text/plain"} + 92 + 93 return header + 94 + 95 def get_query(self, url): + 96 """ + 97 Request payload from URL according to `get` protocol. + 98 + 99 Parameters +100 ---------- +101 url : str +102 URL for request. +103 +104 Returns +105 ------- +106 dict +107 Response as JSON. +108 +109 """ +110 +111 # Query URL via `get` +112 response = requests.get(url, headers=self.get_header()) +113 +114 # Check response +115 response.raise_for_status() +116 +117 # Return as JSON +118 return response.json() +119 +120 def post_query(self, url, variable, values, tolerance): +121 """ +122 Request payload from URL according to `post` protocol. +123 +124 Parameters +125 ---------- +126 url : str +127 URL for request. +128 variable : str +129 Variable to query. +130 values : str +131 Specific values of `variable` to query. +132 tolerance : str +133 Query tolerance relative to `values`. +134 +135 Returns +136 ------- +137 dict +138 Response as JSON. +139 +140 """ +141 +142 # Coerce to string +143 if not isinstance(variable, str): +144 variable = str(variable).replace(" ", "") +145 +146 if not isinstance(values, str): +147 values = str(values).replace(" ", "") +148 +149 if not isinstance(tolerance, str): +150 tolerance = str(tolerance).replace(" ", "") +151 +152 # Query URL via `post` +153 response = requests.post( +154 os.path.join(url, variable, tolerance), +155 data=values, +156 headers=self.get_header(), +157 ) +158 +159 # Check response +160 response.raise_for_status() +161 +162 # Return as JSON +163 return response.json() 164 -165class MetabRefInterface(SpectralDatabaseInterface): -166 """ -167 Interface to the Metabolomics Reference Database. -168 """ -169 -170 def __init__(self): -171 """ -172 Initialize instance. -173 -174 """ -175 -176 super().__init__(key="METABREF_TOKEN") -177 -178 def _get_format_func(self, format): -179 """ -180 Obtain format function by key. -181 -182 Returns -183 ------- -184 func -185 Formatting function. -186 """ -187 -188 if format.lower() in self.format_map.keys(): -189 return self.format_map[format.lower()] -190 -191 raise ValueError(("{} not a supported format.").format(format)) -192 -193 def spectrum_to_array(self, spectrum, normalize=True): -194 """ -195 Convert MetabRef-formatted spectrum to array. -196 -197 Parameters -198 ---------- -199 spectrum : str -200 MetabRef spectrum, i.e. list of (m/z,abundance) pairs. -201 normalize : bool -202 Normalize the spectrum by its magnitude. -203 -204 Returns -205 ------- -206 :obj:`~numpy.array` -207 Array of shape (N, 2), with m/z in the first column and abundance in -208 the second. -209 -210 """ -211 -212 # Convert parenthesis-delimited string to array -213 arr = np.array( -214 re.findall(r"\(([^,]+),([^)]+)\)", spectrum), dtype=float -215 ).reshape(-1, 2) -216 -217 # Normalize the array -218 if normalize: -219 arr[:, -1] = arr[:, -1] / arr[:, -1].sum() -220 -221 return arr -222 -223 def _to_flashentropy(self, metabref_lib, normalize=True, fe_kwargs={}): -224 """ -225 Convert metabref-formatted library to FlashEntropy library. -226 -227 Parameters -228 ---------- -229 metabref_lib : dict -230 MetabRef MS2 library in JSON format or FlashEntropy search instance (for reformatting at different MS2 separation). -231 normalize : bool -232 Normalize each spectrum by its magnitude. -233 fe_kwargs : dict, optional -234 Keyword arguments for instantiation of FlashEntropy search and building index for FlashEntropy search; -235 any keys not recognized will be ignored. By default, all parameters set to defaults. -236 -237 Returns -238 ------- -239 :obj:`~ms_entropy.FlashEntropySearch` -240 MS2 library as FlashEntropy search instance. -241 -242 Raises -243 ------ -244 ValueError -245 If "min_ms2_difference_in_da" or "max_ms2_tolerance_in_da" are present in `fe_kwargs` and they are not equal. -246 -247 """ -248 # If "min_ms2_difference_in_da" in fe_kwargs, check that "max_ms2_tolerance_in_da" is also present and that min_ms2_difference_in_da = 2xmax_ms2_tolerance_in_da -249 if ( -250 "min_ms2_difference_in_da" in fe_kwargs -251 or "max_ms2_tolerance_in_da" in fe_kwargs -252 ): -253 if ( -254 "min_ms2_difference_in_da" not in fe_kwargs -255 or "max_ms2_tolerance_in_da" not in fe_kwargs -256 ): -257 raise ValueError( -258 "Both 'min_ms2_difference_in_da' and 'max_ms2_tolerance_in_da' must be specified." -259 ) -260 if ( -261 fe_kwargs["min_ms2_difference_in_da"] != 2*fe_kwargs["max_ms2_tolerance_in_da"] -262 ): -263 raise ValueError( -264 "The values of 'min_ms2_difference_in_da' must be exactly 2x 'max_ms2_tolerance_in_da'." -265 ) -266 -267 # Initialize empty library -268 fe_lib = [] -269 -270 # Enumerate spectra -271 for i, source in enumerate(metabref_lib): -272 # Reorganize source dict, if necessary -273 if "spectrum_data" in source.keys(): -274 spectrum = source["spectrum_data"] -275 else: -276 spectrum = source -277 -278 # Rename precursor_mz key for FlashEntropy -279 if "precursor_mz" not in spectrum.keys(): -280 spectrum["precursor_mz"] = spectrum.pop("precursor_ion") -281 -282 # Convert CoreMS spectrum to array and clean, store as `peaks` -283 spectrum["peaks"] = self.spectrum_to_array( -284 spectrum["mz"], normalize=normalize -285 ) -286 -287 # Add spectrum to library -288 fe_lib.append(spectrum) -289 -290 # Initialize FlashEntropy -291 fe_init_kws = [ -292 "max_ms2_tolerance_in_da", -293 "mz_index_step", -294 "low_memory", -295 "path_data", -296 ] -297 fe_init_kws = {k: v for k, v in fe_kwargs.items() if k in fe_init_kws} -298 fes = FlashEntropySearch(**fe_init_kws) -299 -300 # Build FlashEntropy index -301 fe_index_kws = [ -302 "max_indexed_mz", -303 "precursor_ions_removal_da", -304 "noise_threshold", -305 "min_ms2_difference_in_da", -306 "max_peak_num", -307 ] -308 fe_index_kws = {k: v for k, v in fe_kwargs.items() if k in fe_index_kws} -309 fes.build_index(fe_lib, **fe_index_kws, clean_spectra=True) -310 -311 return fes +165 +166class MetabRefInterface(SpectralDatabaseInterface): +167 """ +168 Interface to the Metabolomics Reference Database. +169 """ +170 +171 def __init__(self): +172 """ +173 Initialize instance. +174 +175 """ +176 +177 super().__init__(key="METABREF_TOKEN") +178 +179 def _get_format_func(self, format): +180 """ +181 Obtain format function by key. +182 +183 Returns +184 ------- +185 func +186 Formatting function. +187 """ +188 +189 if format.lower() in self.format_map.keys(): +190 return self.format_map[format.lower()] +191 +192 raise ValueError(("{} not a supported format.").format(format)) +193 +194 def spectrum_to_array(self, spectrum, normalize=True): +195 """ +196 Convert MetabRef-formatted spectrum to array. +197 +198 Parameters +199 ---------- +200 spectrum : str +201 MetabRef spectrum, i.e. list of (m/z,abundance) pairs. +202 normalize : bool +203 Normalize the spectrum by its magnitude. +204 +205 Returns +206 ------- +207 :obj:`~numpy.array` +208 Array of shape (N, 2), with m/z in the first column and abundance in +209 the second. +210 +211 """ +212 +213 # Convert parenthesis-delimited string to array +214 arr = np.array( +215 re.findall(r"\(([^,]+),([^)]+)\)", spectrum), dtype=float +216 ).reshape(-1, 2) +217 +218 # Normalize the array +219 if normalize: +220 arr[:, -1] = arr[:, -1] / arr[:, -1].sum() +221 +222 return arr +223 +224 def _to_flashentropy(self, metabref_lib, normalize=True, fe_kwargs={}): +225 """ +226 Convert metabref-formatted library to FlashEntropy library. +227 +228 Parameters +229 ---------- +230 metabref_lib : dict +231 MetabRef MS2 library in JSON format or FlashEntropy search instance (for reformatting at different MS2 separation). +232 normalize : bool +233 Normalize each spectrum by its magnitude. +234 fe_kwargs : dict, optional +235 Keyword arguments for instantiation of FlashEntropy search and building index for FlashEntropy search; +236 any keys not recognized will be ignored. By default, all parameters set to defaults. +237 +238 Returns +239 ------- +240 :obj:`~ms_entropy.FlashEntropySearch` +241 MS2 library as FlashEntropy search instance. +242 +243 Raises +244 ------ +245 ValueError +246 If "min_ms2_difference_in_da" or "max_ms2_tolerance_in_da" are present in `fe_kwargs` and they are not equal. +247 +248 """ +249 # If "min_ms2_difference_in_da" in fe_kwargs, check that "max_ms2_tolerance_in_da" is also present and that min_ms2_difference_in_da = 2xmax_ms2_tolerance_in_da +250 if ( +251 "min_ms2_difference_in_da" in fe_kwargs +252 or "max_ms2_tolerance_in_da" in fe_kwargs +253 ): +254 if ( +255 "min_ms2_difference_in_da" not in fe_kwargs +256 or "max_ms2_tolerance_in_da" not in fe_kwargs +257 ): +258 raise ValueError( +259 "Both 'min_ms2_difference_in_da' and 'max_ms2_tolerance_in_da' must be specified." +260 ) +261 if ( +262 fe_kwargs["min_ms2_difference_in_da"] +263 != 2 * fe_kwargs["max_ms2_tolerance_in_da"] +264 ): +265 raise ValueError( +266 "The values of 'min_ms2_difference_in_da' must be exactly 2x 'max_ms2_tolerance_in_da'." +267 ) +268 +269 # Initialize empty library +270 fe_lib = [] +271 +272 # Enumerate spectra +273 for i, source in enumerate(metabref_lib): +274 # Reorganize source dict, if necessary +275 if "spectrum_data" in source.keys(): +276 spectrum = source["spectrum_data"] +277 else: +278 spectrum = source +279 +280 # Rename precursor_mz key for FlashEntropy +281 if "precursor_mz" not in spectrum.keys(): +282 spectrum["precursor_mz"] = spectrum.pop("precursor_ion") +283 +284 # Convert CoreMS spectrum to array and clean, store as `peaks` +285 spectrum["peaks"] = self.spectrum_to_array( +286 spectrum["mz"], normalize=normalize +287 ) +288 +289 # Add spectrum to library +290 fe_lib.append(spectrum) +291 +292 # Initialize FlashEntropy +293 fe_init_kws = [ +294 "max_ms2_tolerance_in_da", +295 "mz_index_step", +296 "low_memory", +297 "path_data", +298 ] +299 fe_init_kws = {k: v for k, v in fe_kwargs.items() if k in fe_init_kws} +300 fes = FlashEntropySearch(**fe_init_kws) +301 +302 # Build FlashEntropy index +303 fe_index_kws = [ +304 "max_indexed_mz", +305 "precursor_ions_removal_da", +306 "noise_threshold", +307 "min_ms2_difference_in_da", +308 "max_peak_num", +309 ] +310 fe_index_kws = {k: v for k, v in fe_kwargs.items() if k in fe_index_kws} +311 fes.build_index(fe_lib, **fe_index_kws, clean_spectra=True) 312 -313 def _dict_to_dataclass(self, metabref_lib, data_class): -314 """ -315 Convert dictionary to dataclass. -316 -317 Notes -318 ----- -319 This function will pull the attributes a dataclass and its parent class -320 and convert the dictionary to a dataclass instance with the appropriate -321 attributes. -322 -323 Parameters -324 ---------- -325 data_class : :obj:`~dataclasses.dataclass` -326 Dataclass to convert to. -327 metabref_lib : dict -328 Metabref dictionary object to convert to dataclass. -329 -330 Returns -331 ------- -332 :obj:`~dataclasses.dataclass` -333 Dataclass instance. -334 -335 """ +313 return fes +314 +315 def _dict_to_dataclass(self, metabref_lib, data_class): +316 """ +317 Convert dictionary to dataclass. +318 +319 Notes +320 ----- +321 This function will pull the attributes a dataclass and its parent class +322 and convert the dictionary to a dataclass instance with the appropriate +323 attributes. +324 +325 Parameters +326 ---------- +327 data_class : :obj:`~dataclasses.dataclass` +328 Dataclass to convert to. +329 metabref_lib : dict +330 Metabref dictionary object to convert to dataclass. +331 +332 Returns +333 ------- +334 :obj:`~dataclasses.dataclass` +335 Dataclass instance. 336 -337 # Get list of expected attributes of data_class -338 data_class_keys = list(data_class.__annotations__.keys()) -339 -340 # Does the data_class inherit from another class, if so, get the attributes of the parent class as well -341 if len(data_class.__mro__) > 2: -342 parent_class_keys = list(data_class.__bases__[0].__annotations__.keys()) -343 data_class_keys = list(set(data_class_keys + parent_class_keys)) -344 -345 # Remove keys that are not in the data_class from the input dictionary -346 input_dict = {k: v for k, v in metabref_lib.items() if k in data_class_keys} -347 -348 # Add keys that are in the data class but not in the input dictionary as None -349 for key in data_class_keys: -350 if key not in input_dict.keys(): -351 input_dict[key] = None -352 return data_class(**input_dict) -353 -354 -355class MetabRefGCInterface(MetabRefInterface): -356 """ -357 Interface to the Metabolomics Reference Database. -358 """ -359 -360 def __init__(self): -361 """ -362 Initialize instance. -363 -364 """ +337 """ +338 +339 # Get list of expected attributes of data_class +340 data_class_keys = list(data_class.__annotations__.keys()) +341 +342 # Does the data_class inherit from another class, if so, get the attributes of the parent class as well +343 if len(data_class.__mro__) > 2: +344 parent_class_keys = list(data_class.__bases__[0].__annotations__.keys()) +345 data_class_keys = list(set(data_class_keys + parent_class_keys)) +346 +347 # Remove keys that are not in the data_class from the input dictionary +348 input_dict = {k: v for k, v in metabref_lib.items() if k in data_class_keys} +349 +350 # Add keys that are in the data class but not in the input dictionary as None +351 for key in data_class_keys: +352 if key not in input_dict.keys(): +353 input_dict[key] = None +354 return data_class(**input_dict) +355 +356 +357class MetabRefGCInterface(MetabRefInterface): +358 """ +359 Interface to the Metabolomics Reference Database. +360 """ +361 +362 def __init__(self): +363 """ +364 Initialize instance. 365 -366 super().__init__() -367 self.GCMS_LIBRARY_URL = "https://metabref.emsl.pnnl.gov/api/mslevel/1" -368 self.FAMES_URL = "https://metabref.emsl.pnnl.gov/api/fames" -369 -370 self.__init_format_map__() +366 """ +367 +368 super().__init__() +369 self.GCMS_LIBRARY_URL = "https://metabref.emsl.pnnl.gov/api/mslevel/1" +370 self.FAMES_URL = "https://metabref.emsl.pnnl.gov/api/fames" 371 -372 def __init_format_map__(self): -373 """ -374 Initialize database format mapper, enabling multiple format requests. -375 -376 """ +372 self.__init_format_map__() +373 +374 def __init_format_map__(self): +375 """ +376 Initialize database format mapper, enabling multiple format requests. 377 -378 # Define format workflows -379 self.format_map = { -380 "json": lambda x, normalize, fe_kwargs: x, -381 "dict": lambda x, -382 normalize, -383 fe_kwargs: self._to_LowResolutionEICompound_dict(x, normalize), -384 "sql": lambda x, -385 normalize, -386 fe_kwargs: self._LowResolutionEICompound_dict_to_sqlite( -387 self._to_LowResolutionEICompound_dict(x, normalize) -388 ), -389 } -390 -391 # Add aliases -392 self.format_map["metabref"] = self.format_map["json"] -393 self.format_map["datadict"] = self.format_map["dict"] -394 self.format_map["data-dict"] = self.format_map["dict"] -395 self.format_map["lowreseicompound"] = self.format_map["dict"] -396 self.format_map["lowres"] = self.format_map["dict"] -397 self.format_map["lowresgc"] = self.format_map["dict"] -398 self.format_map["sqlite"] = self.format_map["sql"] -399 -400 def available_formats(self): -401 """ -402 View list of available formats. -403 -404 Returns -405 ------- -406 list -407 Format map keys. -408 """ -409 -410 return list(self.format_map.keys()) +378 """ +379 +380 # Define format workflows +381 self.format_map = { +382 "json": lambda x, normalize, fe_kwargs: x, +383 "dict": lambda x, +384 normalize, +385 fe_kwargs: self._to_LowResolutionEICompound_dict(x, normalize), +386 "sql": lambda x, +387 normalize, +388 fe_kwargs: self._LowResolutionEICompound_dict_to_sqlite( +389 self._to_LowResolutionEICompound_dict(x, normalize) +390 ), +391 } +392 +393 # Add aliases +394 self.format_map["metabref"] = self.format_map["json"] +395 self.format_map["datadict"] = self.format_map["dict"] +396 self.format_map["data-dict"] = self.format_map["dict"] +397 self.format_map["lowreseicompound"] = self.format_map["dict"] +398 self.format_map["lowres"] = self.format_map["dict"] +399 self.format_map["lowresgc"] = self.format_map["dict"] +400 self.format_map["sqlite"] = self.format_map["sql"] +401 +402 def available_formats(self): +403 """ +404 View list of available formats. +405 +406 Returns +407 ------- +408 list +409 Format map keys. +410 """ 411 -412 def get_library(self, format="json", normalize=False): -413 """ -414 Request MetabRef GC/MS library. -415 -416 Parameters -417 ---------- -418 format : str -419 Format of requested library, i.e. "json", "sql", "flashentropy". -420 See `available_formats` method for aliases. -421 normalize : bool -422 Normalize the spectrum by its magnitude. -423 -424 Returns -425 ------- -426 Library in requested format. -427 -428 """ +412 return list(self.format_map.keys()) +413 +414 def get_library(self, format="json", normalize=False): +415 """ +416 Request MetabRef GC/MS library. +417 +418 Parameters +419 ---------- +420 format : str +421 Format of requested library, i.e. "json", "sql", "flashentropy". +422 See `available_formats` method for aliases. +423 normalize : bool +424 Normalize the spectrum by its magnitude. +425 +426 Returns +427 ------- +428 Library in requested format. 429 -430 # Init format function -431 format_func = self._get_format_func(format) -432 -433 return format_func( -434 self.get_query(self.GCMS_LIBRARY_URL)["GC-MS"], normalize, {} -435 ) -436 -437 def get_fames(self, format="json", normalize=False): -438 """ -439 Request MetabRef GC/MS FAMEs library. -440 -441 Parameters -442 ---------- -443 format : str -444 Format of requested library, i.e. "json", "sql", "flashentropy". -445 See `available_formats` method for aliases. -446 normalize : bool -447 Normalize the spectrum by its magnitude. -448 -449 Returns -450 ------- -451 Library in requested format. -452 -453 """ +430 """ +431 +432 # Init format function +433 format_func = self._get_format_func(format) +434 +435 return format_func( +436 self.get_query(self.GCMS_LIBRARY_URL)["GC-MS"], normalize, {} +437 ) +438 +439 def get_fames(self, format="json", normalize=False): +440 """ +441 Request MetabRef GC/MS FAMEs library. +442 +443 Parameters +444 ---------- +445 format : str +446 Format of requested library, i.e. "json", "sql", "flashentropy". +447 See `available_formats` method for aliases. +448 normalize : bool +449 Normalize the spectrum by its magnitude. +450 +451 Returns +452 ------- +453 Library in requested format. 454 -455 # Init format function -456 format_func = self._get_format_func(format) -457 -458 return format_func(self.get_query(self.FAMES_URL)["GC-MS"], normalize, {}) +455 """ +456 +457 # Init format function +458 format_func = self._get_format_func(format) 459 -460 def _to_LowResolutionEICompound_dict(self, metabref_lib, normalize=False): -461 """ -462 Convert MetabRef-formatted library to CoreMS LowResolutionEICompound-formatted -463 dictionary for local ingestion. -464 -465 Parameters -466 ---------- -467 metabref_lib : dict -468 MetabRef GC-MS library in JSON format. -469 normalize : bool -470 Normalize each spectrum by its magnitude. -471 -472 Returns -473 ------- -474 list of dict -475 List of each spectrum contained in dictionary. -476 -477 """ +460 return format_func(self.get_query(self.FAMES_URL)["GC-MS"], normalize, {}) +461 +462 def _to_LowResolutionEICompound_dict(self, metabref_lib, normalize=False): +463 """ +464 Convert MetabRef-formatted library to CoreMS LowResolutionEICompound-formatted +465 dictionary for local ingestion. +466 +467 Parameters +468 ---------- +469 metabref_lib : dict +470 MetabRef GC-MS library in JSON format. +471 normalize : bool +472 Normalize each spectrum by its magnitude. +473 +474 Returns +475 ------- +476 list of dict +477 List of each spectrum contained in dictionary. 478 -479 # All below key:value lookups are based on CoreMS class definitions -480 # NOT MetabRef content. For example, MetabRef has keys for PubChem, -481 # USI, etc. that are not considered below. -482 -483 # Dictionary to map metabref keys to corems keys -484 metadatar_cols = { -485 "casno": "cas", -486 "inchikey": "inchikey", -487 "inchi": "inchi", -488 "chebi": "chebi", -489 "smiles": "smiles", -490 "kegg": "kegg", -491 "iupac_name": "iupac_name", -492 "traditional_name": "traditional_name", # Not present in metabref -493 "common_name": "common_name", # Not present in metabref -494 } -495 -496 # Dictionary to map metabref keys to corems keys -497 lowres_ei_compound_cols = { -498 "id": "metabref_id", -499 "molecule_name": "name", # Is this correct? -500 "classify": "classify", # Not present in metabref -501 "formula": "formula", -502 "ri": "ri", -503 "rt": "retention_time", -504 "source": "source", # Not present in metabref -505 "casno": "casno", -506 "comments": "comment", -507 "source_temp_c": "source_temp_c", # Not present in metabref -508 "ev": "ev", # Not present in metabref -509 "peak_count": "peaks_count", -510 "mz": "mz", -511 "abundance": "abundance", -512 } -513 -514 # Local result container -515 corems_lib = [] -516 -517 # Enumerate spectra -518 for i, source_ in enumerate(metabref_lib): -519 # Copy source to prevent modification -520 source = source_.copy() -521 -522 # Flatten source dict -523 source = source.pop("spectrum_data") | source -524 -525 # Parse target data -526 target = { -527 lowres_ei_compound_cols[k]: v -528 for k, v in source.items() -529 if k in lowres_ei_compound_cols -530 } -531 -532 # Explicitly add this to connect with LowResCompoundRef later -533 target["rt"] = source["rt"] -534 -535 # Parse (mz, abundance) -536 arr = self.spectrum_to_array(target["mz"], normalize=normalize) -537 target["mz"] = arr[:, 0] -538 target["abundance"] = arr[:, 1] -539 -540 # Parse meta data -541 target["metadata"] = { -542 metadatar_cols[k]: v for k, v in source.items() if k in metadatar_cols -543 } -544 -545 # Add anything else -546 for k in source: -547 if k not in lowres_ei_compound_cols: -548 target[k] = source[k] -549 -550 # Add to CoreMS list -551 corems_lib.append(target) -552 -553 return corems_lib +479 """ +480 +481 # All below key:value lookups are based on CoreMS class definitions +482 # NOT MetabRef content. For example, MetabRef has keys for PubChem, +483 # USI, etc. that are not considered below. +484 +485 # Dictionary to map metabref keys to corems keys +486 metadatar_cols = { +487 "casno": "cas", +488 "inchikey": "inchikey", +489 "inchi": "inchi", +490 "chebi": "chebi", +491 "smiles": "smiles", +492 "kegg": "kegg", +493 "iupac_name": "iupac_name", +494 "traditional_name": "traditional_name", # Not present in metabref +495 "common_name": "common_name", # Not present in metabref +496 } +497 +498 # Dictionary to map metabref keys to corems keys +499 lowres_ei_compound_cols = { +500 "id": "metabref_id", +501 "molecule_name": "name", # Is this correct? +502 "classify": "classify", # Not present in metabref +503 "formula": "formula", +504 "ri": "ri", +505 "rt": "retention_time", +506 "source": "source", # Not present in metabref +507 "casno": "casno", +508 "comments": "comment", +509 "source_temp_c": "source_temp_c", # Not present in metabref +510 "ev": "ev", # Not present in metabref +511 "peak_count": "peaks_count", +512 "mz": "mz", +513 "abundance": "abundance", +514 } +515 +516 # Local result container +517 corems_lib = [] +518 +519 # Enumerate spectra +520 for i, source_ in enumerate(metabref_lib): +521 # Copy source to prevent modification +522 source = source_.copy() +523 +524 # Flatten source dict +525 source = source.pop("spectrum_data") | source +526 +527 # Parse target data +528 target = { +529 lowres_ei_compound_cols[k]: v +530 for k, v in source.items() +531 if k in lowres_ei_compound_cols +532 } +533 +534 # Explicitly add this to connect with LowResCompoundRef later +535 target["rt"] = source["rt"] +536 +537 # Parse (mz, abundance) +538 arr = self.spectrum_to_array(target["mz"], normalize=normalize) +539 target["mz"] = arr[:, 0] +540 target["abundance"] = arr[:, 1] +541 +542 # Parse meta data +543 target["metadata"] = { +544 metadatar_cols[k]: v for k, v in source.items() if k in metadatar_cols +545 } +546 +547 # Add anything else +548 for k in source: +549 if k not in lowres_ei_compound_cols: +550 target[k] = source[k] +551 +552 # Add to CoreMS list +553 corems_lib.append(target) 554 -555 def _LowResolutionEICompound_dict_to_sqlite( -556 self, lowres_ei_compound_dict, url="sqlite://" -557 ): -558 """ -559 Convert CoreMS LowResolutionEICompound-formatted dictionary to SQLite -560 database for local ingestion. -561 -562 Parameters -563 ---------- -564 lowres_ei_compound_dict : dict -565 CoreMS GC-MS library formatted for LowResolutionEICompound. -566 url : str -567 URL to SQLite prefix. -568 -569 Returns -570 ------- -571 sqlite database -572 Spectra contained in SQLite database. -573 -574 """ +555 return corems_lib +556 +557 def _LowResolutionEICompound_dict_to_sqlite( +558 self, lowres_ei_compound_dict, url="sqlite://" +559 ): +560 """ +561 Convert CoreMS LowResolutionEICompound-formatted dictionary to SQLite +562 database for local ingestion. +563 +564 Parameters +565 ---------- +566 lowres_ei_compound_dict : dict +567 CoreMS GC-MS library formatted for LowResolutionEICompound. +568 url : str +569 URL to SQLite prefix. +570 +571 Returns +572 ------- +573 sqlite database +574 Spectra contained in SQLite database. 575 -576 # Dictionary to map corems keys to all-caps keys -577 capped_cols = { -578 "name": "NAME", -579 "formula": "FORM", -580 "ri": "RI", -581 "retention_time": "RT", -582 "source": "SOURCE", -583 "casno": "CASNO", -584 "comment": "COMMENT", -585 "peaks_count": "NUM PEAKS", -586 } -587 -588 # Initialize SQLite object -589 sqlite_obj = EI_LowRes_SQLite(url=url) -590 -591 # Iterate spectra -592 for _data_dict in lowres_ei_compound_dict: -593 # Copy source to prevent modification -594 data_dict = _data_dict.copy() -595 -596 # Add missing capped values -597 for k, v in capped_cols.items(): -598 # Key exists -599 if k in data_dict: -600 # # This will replace the key -601 # data_dict[v] = data_dict.pop(k) -602 -603 # This will keep both keys -604 data_dict[v] = data_dict[k] -605 -606 # Parse number of peaks -607 if not data_dict.get("NUM PEAKS"): -608 data_dict["NUM PEAKS"] = len(data_dict.get("mz")) -609 -610 # Parse CAS number -611 if not data_dict.get("CASNO"): -612 data_dict["CASNO"] = data_dict.get("CAS") -613 -614 if not data_dict["CASNO"]: -615 data_dict["CASNO"] = 0 -616 -617 # Build linked metadata table -618 if "metadata" in data_dict: -619 if len(data_dict["metadata"]) > 0: -620 data_dict["metadatar"] = Metadatar(**data_dict.pop("metadata")) -621 else: -622 data_dict.pop("metadata") -623 -624 # Attempt addition to sqlite -625 try: -626 sqlite_obj.add_compound(data_dict) -627 except: -628 print(data_dict["NAME"]) -629 -630 return sqlite_obj +576 """ +577 +578 # Dictionary to map corems keys to all-caps keys +579 capped_cols = { +580 "name": "NAME", +581 "formula": "FORM", +582 "ri": "RI", +583 "retention_time": "RT", +584 "source": "SOURCE", +585 "casno": "CASNO", +586 "comment": "COMMENT", +587 "peaks_count": "NUM PEAKS", +588 } +589 +590 # Initialize SQLite object +591 sqlite_obj = EI_LowRes_SQLite(url=url) +592 +593 # Iterate spectra +594 for _data_dict in lowres_ei_compound_dict: +595 # Copy source to prevent modification +596 data_dict = _data_dict.copy() +597 +598 # Add missing capped values +599 for k, v in capped_cols.items(): +600 # Key exists +601 if k in data_dict: +602 # # This will replace the key +603 # data_dict[v] = data_dict.pop(k) +604 +605 # This will keep both keys +606 data_dict[v] = data_dict[k] +607 +608 # Parse number of peaks +609 if not data_dict.get("NUM PEAKS"): +610 data_dict["NUM PEAKS"] = len(data_dict.get("mz")) +611 +612 # Parse CAS number +613 if not data_dict.get("CASNO"): +614 data_dict["CASNO"] = data_dict.get("CAS") +615 +616 if not data_dict["CASNO"]: +617 data_dict["CASNO"] = 0 +618 +619 # Build linked metadata table +620 if "metadata" in data_dict: +621 if len(data_dict["metadata"]) > 0: +622 data_dict["metadatar"] = Metadatar(**data_dict.pop("metadata")) +623 else: +624 data_dict.pop("metadata") +625 +626 # Attempt addition to sqlite +627 try: +628 sqlite_obj.add_compound(data_dict) +629 except: +630 print(data_dict["NAME"]) 631 -632 -633class MetabRefLCInterface(MetabRefInterface): -634 """ -635 Interface to the Metabolomics Reference Database for LC-MS data. -636 """ -637 -638 def __init__(self): -639 """ -640 Initialize instance. -641 -642 """ +632 return sqlite_obj +633 +634 +635class MetabRefLCInterface(MetabRefInterface): +636 """ +637 Interface to the Metabolomics Reference Database for LC-MS data. +638 """ +639 +640 def __init__(self): +641 """ +642 Initialize instance. 643 -644 super().__init__() +644 """ 645 -646 # API endpoint for precursor m/z search -647 self.PRECURSOR_MZ_URL = ( -648 "https://metabref.emsl.pnnl.gov/api/precursors/m/{}/t/{}/{}" -649 ) -650 -651 # API endpoint for returning full list of precursor m/z values in database -652 self.PRECURSOR_MZ_ALL_URL = ( -653 "https://metabref.emsl.pnnl.gov/api/precursors/{}" -654 ) +646 super().__init__() +647 +648 # API endpoint for precursor m/z search +649 self.PRECURSOR_MZ_URL = ( +650 "https://metabref.emsl.pnnl.gov/api/precursors/m/{}/t/{}/{}" +651 ) +652 +653 # API endpoint for returning full list of precursor m/z values in database +654 self.PRECURSOR_MZ_ALL_URL = "https://metabref.emsl.pnnl.gov/api/precursors/{}" 655 656 self.__init_format_map__() 657 @@ -867,16 +867,16 @@

    727 ) 728 729 return lib -730 +730 731 def request_all_precursors(self, polarity): 732 """ 733 Request all precursor m/z values from MetabRef. -734 +734 735 Parameters 736 ---------- 737 polarity : str 738 Ionization polarity, either "positive" or "negative". -739 +739 740 Returns 741 ------- 742 list @@ -885,10 +885,10 @@

    745 # If polarity is anything other than positive or negative, raise error 746 if polarity not in ["positive", "negative"]: 747 raise ValueError("Polarity must be 'positive' or 'negative'") -748 +748 749 # Query MetabRef for all precursor m/z values 750 return self.get_query(self.PRECURSOR_MZ_ALL_URL.format(polarity)) -751 +751 752 def get_lipid_library( 753 self, 754 mz_list, @@ -931,48 +931,50 @@

    791 mz_list.sort() 792 793 # Get all precursors in the library matching the polarity -794 precusors_in_lib = self.request_all_precursors( -795 polarity=polarity -796 ) -797 precusors_in_lib.sort() -798 precusors_in_lib = np.array(precusors_in_lib) -799 -800 # Compare the mz_list with the precursors in the library, keep any mzs that are within mz_tol of any precursor in the library -801 mz_list = np.array(mz_list) -802 mz_df = pd.DataFrame(mz_list, columns=['mass_feature_mz']) -803 mz_df["closest_lib_pre_mz"] = precusors_in_lib[ -804 find_closest(precusors_in_lib, mz_df.mass_feature_mz.values) -805 ] -806 mz_df["mz_diff_ppm"] = np.abs((mz_df["mass_feature_mz"] - mz_df["closest_lib_pre_mz"])/mz_df["mass_feature_mz"]*1e6) -807 mz_df_sub = mz_df[mz_df["mz_diff_ppm"] <= mz_tol_ppm] -808 -809 # Query the library for the precursors in the mz_list that are in the library to retrieve the spectra and metadata -810 lib = self.query_by_precursor( -811 mz_list=mz_df_sub.mass_feature_mz.values, -812 polarity=polarity, -813 mz_tol_ppm=mz_tol_ppm, -814 mz_tol_da_api=mz_tol_da_api, -815 ) -816 -817 # Pull out lipid metadata from the metabref library and convert to LipidMetadata dataclass -818 mol_data_dict = {x["id"]: x["Molecular Data"] for x in lib} -819 lipid_lib = {x["id"]: x["Lipid Tree"] for x in lib if "Lipid Tree" in x.keys()} -820 mol_data_dict = {k: {**v, **lipid_lib[k]} for k, v in mol_data_dict.items()} -821 mol_data_dict = { -822 k: self._dict_to_dataclass(v, LipidMetadata) -823 for k, v in mol_data_dict.items() -824 } -825 -826 # Remove lipid metadata from the metabref library -827 lib = [ -828 {k: v for k, v in x.items() if k not in ["Molecular Data", "Lipid Tree"]} -829 for x in lib -830 ] -831 -832 # Format the spectral library -833 format_func = self._get_format_func(format) -834 lib = format_func(lib, normalize=normalize, fe_kwargs=fe_kwargs) -835 return (lib, mol_data_dict) +794 precusors_in_lib = self.request_all_precursors(polarity=polarity) +795 precusors_in_lib.sort() +796 precusors_in_lib = np.array(precusors_in_lib) +797 +798 # Compare the mz_list with the precursors in the library, keep any mzs that are within mz_tol of any precursor in the library +799 mz_list = np.array(mz_list) +800 mz_df = pd.DataFrame(mz_list, columns=["mass_feature_mz"]) +801 mz_df["closest_lib_pre_mz"] = precusors_in_lib[ +802 find_closest(precusors_in_lib, mz_df.mass_feature_mz.values) +803 ] +804 mz_df["mz_diff_ppm"] = np.abs( +805 (mz_df["mass_feature_mz"] - mz_df["closest_lib_pre_mz"]) +806 / mz_df["mass_feature_mz"] +807 * 1e6 +808 ) +809 mz_df_sub = mz_df[mz_df["mz_diff_ppm"] <= mz_tol_ppm] +810 +811 # Query the library for the precursors in the mz_list that are in the library to retrieve the spectra and metadata +812 lib = self.query_by_precursor( +813 mz_list=mz_df_sub.mass_feature_mz.values, +814 polarity=polarity, +815 mz_tol_ppm=mz_tol_ppm, +816 mz_tol_da_api=mz_tol_da_api, +817 ) +818 +819 # Pull out lipid metadata from the metabref library and convert to LipidMetadata dataclass +820 mol_data_dict = {x["id"]: x["Molecular Data"] for x in lib} +821 lipid_lib = {x["id"]: x["Lipid Tree"] for x in lib if "Lipid Tree" in x.keys()} +822 mol_data_dict = {k: {**v, **lipid_lib[k]} for k, v in mol_data_dict.items()} +823 mol_data_dict = { +824 k: self._dict_to_dataclass(v, LipidMetadata) +825 for k, v in mol_data_dict.items() +826 } +827 +828 # Remove lipid metadata from the metabref library +829 lib = [ +830 {k: v for k, v in x.items() if k not in ["Molecular Data", "Lipid Tree"]} +831 for x in lib +832 ] +833 +834 # Format the spectral library +835 format_func = self._get_format_func(format) +836 lib = format_func(lib, normalize=normalize, fe_kwargs=fe_kwargs) +837 return (lib, mol_data_dict)

    @@ -988,155 +990,155 @@

    -
     15class SpectralDatabaseInterface(ABC):
    - 16    """
    - 17    Base class that facilitates connection to spectral reference databases,
    - 18    such as EMSL's Metabolomics Reference Database (MetabRef).
    - 19
    - 20    """
    - 21
    - 22    def __init__(self, key=None):
    - 23        """
    - 24        Initialize instance.
    - 25
    - 26        Parameters
    - 27        ----------
    - 28        key : str
    - 29            Token key.
    - 30
    - 31        """
    - 32
    - 33        self.key = key
    - 34
    - 35        if self.key is None:
    - 36            raise ValueError(
    - 37                "Must specify environment variable key for token associatedwith this database interface."
    - 38            )
    - 39
    - 40    def set_token(self, path):
    - 41        """
    - 42        Set environment variable for MetabRef database token.
    - 43
    - 44        Parameters
    - 45        ----------
    - 46        path : str
    - 47            Path to token.
    - 48
    - 49        """
    - 50
    - 51        # Read token from file
    - 52        with open(path, "r", encoding="utf-8") as f:
    - 53            token = f.readline().strip()
    - 54
    - 55        # Set environment variable
    - 56        os.environ[self.key] = token
    - 57
    - 58    def get_token(self):
    - 59        """
    - 60        Get environment variable for database token.
    - 61
    - 62        Returns
    - 63        -------
    - 64        str
    - 65            Token string.
    - 66
    - 67        """
    - 68
    - 69        # Check for token
    - 70        if self.key not in os.environ:
    - 71            raise ValueError("Must set {} environment variable.".format(self.key))
    - 72
    - 73        # Get token from environment variables
    - 74        return os.environ.get(self.key)
    - 75
    - 76    def get_header(self):
    - 77        """
    - 78        Access stored database token and prepare as header.
    - 79
    - 80        Returns
    - 81        -------
    - 82        str
    - 83            Header string.
    - 84
    - 85        """
    - 86
    - 87        # Get token
    - 88        token = self.get_token()
    - 89
    - 90        # Pad header information
    - 91        header = {"Authorization": f"Bearer {token}", "Content-Type": "text/plain"}
    - 92
    - 93        return header
    - 94
    - 95    def get_query(self, url):
    - 96        """
    - 97        Request payload from URL according to `get` protocol.
    - 98
    - 99        Parameters
    -100        ----------
    -101        url : str
    -102            URL for request.
    -103
    -104        Returns
    -105        -------
    -106        dict
    -107            Response as JSON.
    -108
    -109        """
    -110
    -111        # Query URL via `get`
    -112        response = requests.get(url, headers=self.get_header())
    -113
    -114        # Check response
    -115        response.raise_for_status()
    -116
    -117        # Return as JSON
    -118        return response.json()
    -119
    -120    def post_query(self, url, variable, values, tolerance):
    -121        """
    -122        Request payload from URL according to `post` protocol.
    -123
    -124        Parameters
    -125        ----------
    -126        url : str
    -127            URL for request.
    -128        variable : str
    -129            Variable to query.
    -130        values : str
    -131            Specific values of `variable` to query.
    -132        tolerance : str
    -133            Query tolerance relative to `values`.
    -134
    -135        Returns
    -136        -------
    -137        dict
    -138            Response as JSON.
    -139
    -140        """
    -141
    -142        # Coerce to string
    -143        if not isinstance(variable, str):
    -144            variable = str(variable).replace(" ", "")
    -145
    -146        if not isinstance(values, str):
    -147            values = str(values).replace(" ", "")
    -148
    -149        if not isinstance(tolerance, str):
    -150            tolerance = str(tolerance).replace(" ", "")
    -151
    -152        # Query URL via `post`
    -153        response = requests.post(
    -154            os.path.join(url, variable, tolerance),
    -155            data=values,
    -156            headers=self.get_header(),
    -157        )
    -158
    -159        # Check response
    -160        response.raise_for_status()
    -161
    -162        # Return as JSON
    -163        return response.json()
    +            
     16class SpectralDatabaseInterface(ABC):
    + 17    """
    + 18    Base class that facilitates connection to spectral reference databases,
    + 19    such as EMSL's Metabolomics Reference Database (MetabRef).
    + 20
    + 21    """
    + 22
    + 23    def __init__(self, key=None):
    + 24        """
    + 25        Initialize instance.
    + 26
    + 27        Parameters
    + 28        ----------
    + 29        key : str
    + 30            Token key.
    + 31
    + 32        """
    + 33
    + 34        self.key = key
    + 35
    + 36        if self.key is None:
    + 37            raise ValueError(
    + 38                "Must specify environment variable key for token associatedwith this database interface."
    + 39            )
    + 40
    + 41    def set_token(self, path):
    + 42        """
    + 43        Set environment variable for MetabRef database token.
    + 44
    + 45        Parameters
    + 46        ----------
    + 47        path : str
    + 48            Path to token.
    + 49
    + 50        """
    + 51
    + 52        # Read token from file
    + 53        with open(path, "r", encoding="utf-8") as f:
    + 54            token = f.readline().strip()
    + 55
    + 56        # Set environment variable
    + 57        os.environ[self.key] = token
    + 58
    + 59    def get_token(self):
    + 60        """
    + 61        Get environment variable for database token.
    + 62
    + 63        Returns
    + 64        -------
    + 65        str
    + 66            Token string.
    + 67
    + 68        """
    + 69
    + 70        # Check for token
    + 71        if self.key not in os.environ:
    + 72            raise ValueError("Must set {} environment variable.".format(self.key))
    + 73
    + 74        # Get token from environment variables
    + 75        return os.environ.get(self.key)
    + 76
    + 77    def get_header(self):
    + 78        """
    + 79        Access stored database token and prepare as header.
    + 80
    + 81        Returns
    + 82        -------
    + 83        str
    + 84            Header string.
    + 85
    + 86        """
    + 87
    + 88        # Get token
    + 89        token = self.get_token()
    + 90
    + 91        # Pad header information
    + 92        header = {"Authorization": f"Bearer {token}", "Content-Type": "text/plain"}
    + 93
    + 94        return header
    + 95
    + 96    def get_query(self, url):
    + 97        """
    + 98        Request payload from URL according to `get` protocol.
    + 99
    +100        Parameters
    +101        ----------
    +102        url : str
    +103            URL for request.
    +104
    +105        Returns
    +106        -------
    +107        dict
    +108            Response as JSON.
    +109
    +110        """
    +111
    +112        # Query URL via `get`
    +113        response = requests.get(url, headers=self.get_header())
    +114
    +115        # Check response
    +116        response.raise_for_status()
    +117
    +118        # Return as JSON
    +119        return response.json()
    +120
    +121    def post_query(self, url, variable, values, tolerance):
    +122        """
    +123        Request payload from URL according to `post` protocol.
    +124
    +125        Parameters
    +126        ----------
    +127        url : str
    +128            URL for request.
    +129        variable : str
    +130            Variable to query.
    +131        values : str
    +132            Specific values of `variable` to query.
    +133        tolerance : str
    +134            Query tolerance relative to `values`.
    +135
    +136        Returns
    +137        -------
    +138        dict
    +139            Response as JSON.
    +140
    +141        """
    +142
    +143        # Coerce to string
    +144        if not isinstance(variable, str):
    +145            variable = str(variable).replace(" ", "")
    +146
    +147        if not isinstance(values, str):
    +148            values = str(values).replace(" ", "")
    +149
    +150        if not isinstance(tolerance, str):
    +151            tolerance = str(tolerance).replace(" ", "")
    +152
    +153        # Query URL via `post`
    +154        response = requests.post(
    +155            os.path.join(url, variable, tolerance),
    +156            data=values,
    +157            headers=self.get_header(),
    +158        )
    +159
    +160        # Check response
    +161        response.raise_for_status()
    +162
    +163        # Return as JSON
    +164        return response.json()
     
    @@ -1155,23 +1157,23 @@

    -
    22    def __init__(self, key=None):
    -23        """
    -24        Initialize instance.
    -25
    -26        Parameters
    -27        ----------
    -28        key : str
    -29            Token key.
    -30
    -31        """
    -32
    -33        self.key = key
    -34
    -35        if self.key is None:
    -36            raise ValueError(
    -37                "Must specify environment variable key for token associatedwith this database interface."
    -38            )
    +            
    23    def __init__(self, key=None):
    +24        """
    +25        Initialize instance.
    +26
    +27        Parameters
    +28        ----------
    +29        key : str
    +30            Token key.
    +31
    +32        """
    +33
    +34        self.key = key
    +35
    +36        if self.key is None:
    +37            raise ValueError(
    +38                "Must specify environment variable key for token associatedwith this database interface."
    +39            )
     
    @@ -1209,23 +1211,23 @@
    Parameters
    -
    40    def set_token(self, path):
    -41        """
    -42        Set environment variable for MetabRef database token.
    -43
    -44        Parameters
    -45        ----------
    -46        path : str
    -47            Path to token.
    -48
    -49        """
    -50
    -51        # Read token from file
    -52        with open(path, "r", encoding="utf-8") as f:
    -53            token = f.readline().strip()
    -54
    -55        # Set environment variable
    -56        os.environ[self.key] = token
    +            
    41    def set_token(self, path):
    +42        """
    +43        Set environment variable for MetabRef database token.
    +44
    +45        Parameters
    +46        ----------
    +47        path : str
    +48            Path to token.
    +49
    +50        """
    +51
    +52        # Read token from file
    +53        with open(path, "r", encoding="utf-8") as f:
    +54            token = f.readline().strip()
    +55
    +56        # Set environment variable
    +57        os.environ[self.key] = token
     
    @@ -1252,23 +1254,23 @@
    Parameters
    -
    58    def get_token(self):
    -59        """
    -60        Get environment variable for database token.
    -61
    -62        Returns
    -63        -------
    -64        str
    -65            Token string.
    -66
    -67        """
    -68
    -69        # Check for token
    -70        if self.key not in os.environ:
    -71            raise ValueError("Must set {} environment variable.".format(self.key))
    -72
    -73        # Get token from environment variables
    -74        return os.environ.get(self.key)
    +            
    59    def get_token(self):
    +60        """
    +61        Get environment variable for database token.
    +62
    +63        Returns
    +64        -------
    +65        str
    +66            Token string.
    +67
    +68        """
    +69
    +70        # Check for token
    +71        if self.key not in os.environ:
    +72            raise ValueError("Must set {} environment variable.".format(self.key))
    +73
    +74        # Get token from environment variables
    +75        return os.environ.get(self.key)
     
    @@ -1294,24 +1296,24 @@
    Returns
    -
    76    def get_header(self):
    -77        """
    -78        Access stored database token and prepare as header.
    -79
    -80        Returns
    -81        -------
    -82        str
    -83            Header string.
    -84
    -85        """
    -86
    -87        # Get token
    -88        token = self.get_token()
    -89
    -90        # Pad header information
    -91        header = {"Authorization": f"Bearer {token}", "Content-Type": "text/plain"}
    -92
    -93        return header
    +            
    77    def get_header(self):
    +78        """
    +79        Access stored database token and prepare as header.
    +80
    +81        Returns
    +82        -------
    +83        str
    +84            Header string.
    +85
    +86        """
    +87
    +88        # Get token
    +89        token = self.get_token()
    +90
    +91        # Pad header information
    +92        header = {"Authorization": f"Bearer {token}", "Content-Type": "text/plain"}
    +93
    +94        return header
     
    @@ -1337,30 +1339,30 @@
    Returns
    -
     95    def get_query(self, url):
    - 96        """
    - 97        Request payload from URL according to `get` protocol.
    - 98
    - 99        Parameters
    -100        ----------
    -101        url : str
    -102            URL for request.
    -103
    -104        Returns
    -105        -------
    -106        dict
    -107            Response as JSON.
    -108
    -109        """
    -110
    -111        # Query URL via `get`
    -112        response = requests.get(url, headers=self.get_header())
    -113
    -114        # Check response
    -115        response.raise_for_status()
    -116
    -117        # Return as JSON
    -118        return response.json()
    +            
     96    def get_query(self, url):
    + 97        """
    + 98        Request payload from URL according to `get` protocol.
    + 99
    +100        Parameters
    +101        ----------
    +102        url : str
    +103            URL for request.
    +104
    +105        Returns
    +106        -------
    +107        dict
    +108            Response as JSON.
    +109
    +110        """
    +111
    +112        # Query URL via `get`
    +113        response = requests.get(url, headers=self.get_header())
    +114
    +115        # Check response
    +116        response.raise_for_status()
    +117
    +118        # Return as JSON
    +119        return response.json()
     
    @@ -1393,50 +1395,50 @@
    Returns
    -
    120    def post_query(self, url, variable, values, tolerance):
    -121        """
    -122        Request payload from URL according to `post` protocol.
    -123
    -124        Parameters
    -125        ----------
    -126        url : str
    -127            URL for request.
    -128        variable : str
    -129            Variable to query.
    -130        values : str
    -131            Specific values of `variable` to query.
    -132        tolerance : str
    -133            Query tolerance relative to `values`.
    -134
    -135        Returns
    -136        -------
    -137        dict
    -138            Response as JSON.
    -139
    -140        """
    -141
    -142        # Coerce to string
    -143        if not isinstance(variable, str):
    -144            variable = str(variable).replace(" ", "")
    -145
    -146        if not isinstance(values, str):
    -147            values = str(values).replace(" ", "")
    -148
    -149        if not isinstance(tolerance, str):
    -150            tolerance = str(tolerance).replace(" ", "")
    -151
    -152        # Query URL via `post`
    -153        response = requests.post(
    -154            os.path.join(url, variable, tolerance),
    -155            data=values,
    -156            headers=self.get_header(),
    -157        )
    -158
    -159        # Check response
    -160        response.raise_for_status()
    -161
    -162        # Return as JSON
    -163        return response.json()
    +            
    121    def post_query(self, url, variable, values, tolerance):
    +122        """
    +123        Request payload from URL according to `post` protocol.
    +124
    +125        Parameters
    +126        ----------
    +127        url : str
    +128            URL for request.
    +129        variable : str
    +130            Variable to query.
    +131        values : str
    +132            Specific values of `variable` to query.
    +133        tolerance : str
    +134            Query tolerance relative to `values`.
    +135
    +136        Returns
    +137        -------
    +138        dict
    +139            Response as JSON.
    +140
    +141        """
    +142
    +143        # Coerce to string
    +144        if not isinstance(variable, str):
    +145            variable = str(variable).replace(" ", "")
    +146
    +147        if not isinstance(values, str):
    +148            values = str(values).replace(" ", "")
    +149
    +150        if not isinstance(tolerance, str):
    +151            tolerance = str(tolerance).replace(" ", "")
    +152
    +153        # Query URL via `post`
    +154        response = requests.post(
    +155            os.path.join(url, variable, tolerance),
    +156            data=values,
    +157            headers=self.get_header(),
    +158        )
    +159
    +160        # Check response
    +161        response.raise_for_status()
    +162
    +163        # Return as JSON
    +164        return response.json()
     
    @@ -1476,194 +1478,195 @@
    Returns
    -
    166class MetabRefInterface(SpectralDatabaseInterface):
    -167    """
    -168    Interface to the Metabolomics Reference Database.
    -169    """
    -170
    -171    def __init__(self):
    -172        """
    -173        Initialize instance.
    -174
    -175        """
    -176
    -177        super().__init__(key="METABREF_TOKEN")
    -178
    -179    def _get_format_func(self, format):
    -180        """
    -181        Obtain format function by key.
    -182
    -183        Returns
    -184        -------
    -185        func
    -186            Formatting function.
    -187        """
    -188
    -189        if format.lower() in self.format_map.keys():
    -190            return self.format_map[format.lower()]
    -191
    -192        raise ValueError(("{} not a supported format.").format(format))
    -193
    -194    def spectrum_to_array(self, spectrum, normalize=True):
    -195        """
    -196        Convert MetabRef-formatted spectrum to array.
    -197
    -198        Parameters
    -199        ----------
    -200        spectrum : str
    -201            MetabRef spectrum, i.e. list of (m/z,abundance) pairs.
    -202        normalize : bool
    -203            Normalize the spectrum by its magnitude.
    -204
    -205        Returns
    -206        -------
    -207        :obj:`~numpy.array`
    -208            Array of shape (N, 2), with m/z in the first column and abundance in
    -209            the second.
    -210
    -211        """
    -212
    -213        # Convert parenthesis-delimited string to array
    -214        arr = np.array(
    -215            re.findall(r"\(([^,]+),([^)]+)\)", spectrum), dtype=float
    -216        ).reshape(-1, 2)
    -217
    -218        # Normalize the array
    -219        if normalize:
    -220            arr[:, -1] = arr[:, -1] / arr[:, -1].sum()
    -221
    -222        return arr
    -223
    -224    def _to_flashentropy(self, metabref_lib, normalize=True, fe_kwargs={}):
    -225        """
    -226        Convert metabref-formatted library to FlashEntropy library.
    -227
    -228        Parameters
    -229        ----------
    -230        metabref_lib : dict
    -231            MetabRef MS2 library in JSON format or FlashEntropy search instance (for reformatting at different MS2 separation).
    -232        normalize : bool
    -233            Normalize each spectrum by its magnitude.
    -234        fe_kwargs : dict, optional
    -235            Keyword arguments for instantiation of FlashEntropy search and building index for FlashEntropy search;
    -236            any keys not recognized will be ignored. By default, all parameters set to defaults.
    -237
    -238        Returns
    -239        -------
    -240        :obj:`~ms_entropy.FlashEntropySearch`
    -241            MS2 library as FlashEntropy search instance.
    -242
    -243        Raises
    -244        ------
    -245        ValueError
    -246            If "min_ms2_difference_in_da" or "max_ms2_tolerance_in_da" are present in `fe_kwargs` and they are not equal.
    -247
    -248        """
    -249        # If "min_ms2_difference_in_da" in fe_kwargs, check that "max_ms2_tolerance_in_da" is also present and that min_ms2_difference_in_da = 2xmax_ms2_tolerance_in_da
    -250        if (
    -251            "min_ms2_difference_in_da" in fe_kwargs
    -252            or "max_ms2_tolerance_in_da" in fe_kwargs
    -253        ):
    -254            if (
    -255                "min_ms2_difference_in_da" not in fe_kwargs
    -256                or "max_ms2_tolerance_in_da" not in fe_kwargs
    -257            ):
    -258                raise ValueError(
    -259                    "Both 'min_ms2_difference_in_da' and 'max_ms2_tolerance_in_da' must be specified."
    -260                )
    -261            if (
    -262                fe_kwargs["min_ms2_difference_in_da"] != 2*fe_kwargs["max_ms2_tolerance_in_da"]
    -263            ):
    -264                raise ValueError(
    -265                    "The values of 'min_ms2_difference_in_da' must be exactly 2x 'max_ms2_tolerance_in_da'."
    -266                )
    -267
    -268        # Initialize empty library
    -269        fe_lib = []
    -270
    -271        # Enumerate spectra
    -272        for i, source in enumerate(metabref_lib):
    -273            # Reorganize source dict, if necessary
    -274            if "spectrum_data" in source.keys():
    -275                spectrum = source["spectrum_data"]
    -276            else:
    -277                spectrum = source
    -278
    -279            # Rename precursor_mz key for FlashEntropy
    -280            if "precursor_mz" not in spectrum.keys():
    -281                spectrum["precursor_mz"] = spectrum.pop("precursor_ion")
    -282
    -283            # Convert CoreMS spectrum to array and clean, store as `peaks`
    -284            spectrum["peaks"] = self.spectrum_to_array(
    -285                spectrum["mz"], normalize=normalize
    -286            )
    -287
    -288            # Add spectrum to library
    -289            fe_lib.append(spectrum)
    -290
    -291        # Initialize FlashEntropy
    -292        fe_init_kws = [
    -293            "max_ms2_tolerance_in_da",
    -294            "mz_index_step",
    -295            "low_memory",
    -296            "path_data",
    -297        ]
    -298        fe_init_kws = {k: v for k, v in fe_kwargs.items() if k in fe_init_kws}
    -299        fes = FlashEntropySearch(**fe_init_kws)
    -300
    -301        # Build FlashEntropy index
    -302        fe_index_kws = [
    -303            "max_indexed_mz",
    -304            "precursor_ions_removal_da",
    -305            "noise_threshold",
    -306            "min_ms2_difference_in_da",
    -307            "max_peak_num",
    -308        ]
    -309        fe_index_kws = {k: v for k, v in fe_kwargs.items() if k in fe_index_kws}
    -310        fes.build_index(fe_lib, **fe_index_kws, clean_spectra=True)
    -311
    -312        return fes
    +            
    167class MetabRefInterface(SpectralDatabaseInterface):
    +168    """
    +169    Interface to the Metabolomics Reference Database.
    +170    """
    +171
    +172    def __init__(self):
    +173        """
    +174        Initialize instance.
    +175
    +176        """
    +177
    +178        super().__init__(key="METABREF_TOKEN")
    +179
    +180    def _get_format_func(self, format):
    +181        """
    +182        Obtain format function by key.
    +183
    +184        Returns
    +185        -------
    +186        func
    +187            Formatting function.
    +188        """
    +189
    +190        if format.lower() in self.format_map.keys():
    +191            return self.format_map[format.lower()]
    +192
    +193        raise ValueError(("{} not a supported format.").format(format))
    +194
    +195    def spectrum_to_array(self, spectrum, normalize=True):
    +196        """
    +197        Convert MetabRef-formatted spectrum to array.
    +198
    +199        Parameters
    +200        ----------
    +201        spectrum : str
    +202            MetabRef spectrum, i.e. list of (m/z,abundance) pairs.
    +203        normalize : bool
    +204            Normalize the spectrum by its magnitude.
    +205
    +206        Returns
    +207        -------
    +208        :obj:`~numpy.array`
    +209            Array of shape (N, 2), with m/z in the first column and abundance in
    +210            the second.
    +211
    +212        """
    +213
    +214        # Convert parenthesis-delimited string to array
    +215        arr = np.array(
    +216            re.findall(r"\(([^,]+),([^)]+)\)", spectrum), dtype=float
    +217        ).reshape(-1, 2)
    +218
    +219        # Normalize the array
    +220        if normalize:
    +221            arr[:, -1] = arr[:, -1] / arr[:, -1].sum()
    +222
    +223        return arr
    +224
    +225    def _to_flashentropy(self, metabref_lib, normalize=True, fe_kwargs={}):
    +226        """
    +227        Convert metabref-formatted library to FlashEntropy library.
    +228
    +229        Parameters
    +230        ----------
    +231        metabref_lib : dict
    +232            MetabRef MS2 library in JSON format or FlashEntropy search instance (for reformatting at different MS2 separation).
    +233        normalize : bool
    +234            Normalize each spectrum by its magnitude.
    +235        fe_kwargs : dict, optional
    +236            Keyword arguments for instantiation of FlashEntropy search and building index for FlashEntropy search;
    +237            any keys not recognized will be ignored. By default, all parameters set to defaults.
    +238
    +239        Returns
    +240        -------
    +241        :obj:`~ms_entropy.FlashEntropySearch`
    +242            MS2 library as FlashEntropy search instance.
    +243
    +244        Raises
    +245        ------
    +246        ValueError
    +247            If "min_ms2_difference_in_da" or "max_ms2_tolerance_in_da" are present in `fe_kwargs` and they are not equal.
    +248
    +249        """
    +250        # If "min_ms2_difference_in_da" in fe_kwargs, check that "max_ms2_tolerance_in_da" is also present and that min_ms2_difference_in_da = 2xmax_ms2_tolerance_in_da
    +251        if (
    +252            "min_ms2_difference_in_da" in fe_kwargs
    +253            or "max_ms2_tolerance_in_da" in fe_kwargs
    +254        ):
    +255            if (
    +256                "min_ms2_difference_in_da" not in fe_kwargs
    +257                or "max_ms2_tolerance_in_da" not in fe_kwargs
    +258            ):
    +259                raise ValueError(
    +260                    "Both 'min_ms2_difference_in_da' and 'max_ms2_tolerance_in_da' must be specified."
    +261                )
    +262            if (
    +263                fe_kwargs["min_ms2_difference_in_da"]
    +264                != 2 * fe_kwargs["max_ms2_tolerance_in_da"]
    +265            ):
    +266                raise ValueError(
    +267                    "The values of 'min_ms2_difference_in_da' must be exactly 2x 'max_ms2_tolerance_in_da'."
    +268                )
    +269
    +270        # Initialize empty library
    +271        fe_lib = []
    +272
    +273        # Enumerate spectra
    +274        for i, source in enumerate(metabref_lib):
    +275            # Reorganize source dict, if necessary
    +276            if "spectrum_data" in source.keys():
    +277                spectrum = source["spectrum_data"]
    +278            else:
    +279                spectrum = source
    +280
    +281            # Rename precursor_mz key for FlashEntropy
    +282            if "precursor_mz" not in spectrum.keys():
    +283                spectrum["precursor_mz"] = spectrum.pop("precursor_ion")
    +284
    +285            # Convert CoreMS spectrum to array and clean, store as `peaks`
    +286            spectrum["peaks"] = self.spectrum_to_array(
    +287                spectrum["mz"], normalize=normalize
    +288            )
    +289
    +290            # Add spectrum to library
    +291            fe_lib.append(spectrum)
    +292
    +293        # Initialize FlashEntropy
    +294        fe_init_kws = [
    +295            "max_ms2_tolerance_in_da",
    +296            "mz_index_step",
    +297            "low_memory",
    +298            "path_data",
    +299        ]
    +300        fe_init_kws = {k: v for k, v in fe_kwargs.items() if k in fe_init_kws}
    +301        fes = FlashEntropySearch(**fe_init_kws)
    +302
    +303        # Build FlashEntropy index
    +304        fe_index_kws = [
    +305            "max_indexed_mz",
    +306            "precursor_ions_removal_da",
    +307            "noise_threshold",
    +308            "min_ms2_difference_in_da",
    +309            "max_peak_num",
    +310        ]
    +311        fe_index_kws = {k: v for k, v in fe_kwargs.items() if k in fe_index_kws}
    +312        fes.build_index(fe_lib, **fe_index_kws, clean_spectra=True)
     313
    -314    def _dict_to_dataclass(self, metabref_lib, data_class):
    -315        """
    -316        Convert dictionary to dataclass.
    -317
    -318        Notes
    -319        -----
    -320        This function will pull the attributes a dataclass and its parent class
    -321        and convert the dictionary to a dataclass instance with the appropriate
    -322        attributes.
    -323
    -324        Parameters
    -325        ----------
    -326        data_class : :obj:`~dataclasses.dataclass`
    -327            Dataclass to convert to.
    -328        metabref_lib : dict
    -329            Metabref dictionary object to convert to dataclass.
    -330
    -331        Returns
    -332        -------
    -333        :obj:`~dataclasses.dataclass`
    -334            Dataclass instance.
    -335
    -336        """
    +314        return fes
    +315
    +316    def _dict_to_dataclass(self, metabref_lib, data_class):
    +317        """
    +318        Convert dictionary to dataclass.
    +319
    +320        Notes
    +321        -----
    +322        This function will pull the attributes a dataclass and its parent class
    +323        and convert the dictionary to a dataclass instance with the appropriate
    +324        attributes.
    +325
    +326        Parameters
    +327        ----------
    +328        data_class : :obj:`~dataclasses.dataclass`
    +329            Dataclass to convert to.
    +330        metabref_lib : dict
    +331            Metabref dictionary object to convert to dataclass.
    +332
    +333        Returns
    +334        -------
    +335        :obj:`~dataclasses.dataclass`
    +336            Dataclass instance.
     337
    -338        # Get list of expected attributes of data_class
    -339        data_class_keys = list(data_class.__annotations__.keys())
    -340
    -341        # Does the data_class inherit from another class, if so, get the attributes of the parent class as well
    -342        if len(data_class.__mro__) > 2:
    -343            parent_class_keys = list(data_class.__bases__[0].__annotations__.keys())
    -344            data_class_keys = list(set(data_class_keys + parent_class_keys))
    -345
    -346        # Remove keys that are not in the data_class from the input dictionary
    -347        input_dict = {k: v for k, v in metabref_lib.items() if k in data_class_keys}
    -348
    -349        # Add keys that are in the data class but not in the input dictionary as None
    -350        for key in data_class_keys:
    -351            if key not in input_dict.keys():
    -352                input_dict[key] = None
    -353        return data_class(**input_dict)
    +338        """
    +339
    +340        # Get list of expected attributes of data_class
    +341        data_class_keys = list(data_class.__annotations__.keys())
    +342
    +343        # Does the data_class inherit from another class, if so, get the attributes of the parent class as well
    +344        if len(data_class.__mro__) > 2:
    +345            parent_class_keys = list(data_class.__bases__[0].__annotations__.keys())
    +346            data_class_keys = list(set(data_class_keys + parent_class_keys))
    +347
    +348        # Remove keys that are not in the data_class from the input dictionary
    +349        input_dict = {k: v for k, v in metabref_lib.items() if k in data_class_keys}
    +350
    +351        # Add keys that are in the data class but not in the input dictionary as None
    +352        for key in data_class_keys:
    +353            if key not in input_dict.keys():
    +354                input_dict[key] = None
    +355        return data_class(**input_dict)
     
    @@ -1681,13 +1684,13 @@
    Returns
    -
    171    def __init__(self):
    -172        """
    -173        Initialize instance.
    -174
    -175        """
    -176
    -177        super().__init__(key="METABREF_TOKEN")
    +            
    172    def __init__(self):
    +173        """
    +174        Initialize instance.
    +175
    +176        """
    +177
    +178        super().__init__(key="METABREF_TOKEN")
     
    @@ -1707,35 +1710,35 @@
    Returns
    -
    194    def spectrum_to_array(self, spectrum, normalize=True):
    -195        """
    -196        Convert MetabRef-formatted spectrum to array.
    -197
    -198        Parameters
    -199        ----------
    -200        spectrum : str
    -201            MetabRef spectrum, i.e. list of (m/z,abundance) pairs.
    -202        normalize : bool
    -203            Normalize the spectrum by its magnitude.
    -204
    -205        Returns
    -206        -------
    -207        :obj:`~numpy.array`
    -208            Array of shape (N, 2), with m/z in the first column and abundance in
    -209            the second.
    -210
    -211        """
    -212
    -213        # Convert parenthesis-delimited string to array
    -214        arr = np.array(
    -215            re.findall(r"\(([^,]+),([^)]+)\)", spectrum), dtype=float
    -216        ).reshape(-1, 2)
    -217
    -218        # Normalize the array
    -219        if normalize:
    -220            arr[:, -1] = arr[:, -1] / arr[:, -1].sum()
    -221
    -222        return arr
    +            
    195    def spectrum_to_array(self, spectrum, normalize=True):
    +196        """
    +197        Convert MetabRef-formatted spectrum to array.
    +198
    +199        Parameters
    +200        ----------
    +201        spectrum : str
    +202            MetabRef spectrum, i.e. list of (m/z,abundance) pairs.
    +203        normalize : bool
    +204            Normalize the spectrum by its magnitude.
    +205
    +206        Returns
    +207        -------
    +208        :obj:`~numpy.array`
    +209            Array of shape (N, 2), with m/z in the first column and abundance in
    +210            the second.
    +211
    +212        """
    +213
    +214        # Convert parenthesis-delimited string to array
    +215        arr = np.array(
    +216            re.findall(r"\(([^,]+),([^)]+)\)", spectrum), dtype=float
    +217        ).reshape(-1, 2)
    +218
    +219        # Normalize the array
    +220        if normalize:
    +221            arr[:, -1] = arr[:, -1] / arr[:, -1].sum()
    +222
    +223        return arr
     
    @@ -1786,282 +1789,282 @@
    Inherited Members
    -
    356class MetabRefGCInterface(MetabRefInterface):
    -357    """
    -358    Interface to the Metabolomics Reference Database.
    -359    """
    -360
    -361    def __init__(self):
    -362        """
    -363        Initialize instance.
    -364
    -365        """
    +            
    358class MetabRefGCInterface(MetabRefInterface):
    +359    """
    +360    Interface to the Metabolomics Reference Database.
    +361    """
    +362
    +363    def __init__(self):
    +364        """
    +365        Initialize instance.
     366
    -367        super().__init__()
    -368        self.GCMS_LIBRARY_URL = "https://metabref.emsl.pnnl.gov/api/mslevel/1"
    -369        self.FAMES_URL = "https://metabref.emsl.pnnl.gov/api/fames"
    -370
    -371        self.__init_format_map__()
    +367        """
    +368
    +369        super().__init__()
    +370        self.GCMS_LIBRARY_URL = "https://metabref.emsl.pnnl.gov/api/mslevel/1"
    +371        self.FAMES_URL = "https://metabref.emsl.pnnl.gov/api/fames"
     372
    -373    def __init_format_map__(self):
    -374        """
    -375        Initialize database format mapper, enabling multiple format requests.
    -376
    -377        """
    +373        self.__init_format_map__()
    +374
    +375    def __init_format_map__(self):
    +376        """
    +377        Initialize database format mapper, enabling multiple format requests.
     378
    -379        # Define format workflows
    -380        self.format_map = {
    -381            "json": lambda x, normalize, fe_kwargs: x,
    -382            "dict": lambda x,
    -383            normalize,
    -384            fe_kwargs: self._to_LowResolutionEICompound_dict(x, normalize),
    -385            "sql": lambda x,
    -386            normalize,
    -387            fe_kwargs: self._LowResolutionEICompound_dict_to_sqlite(
    -388                self._to_LowResolutionEICompound_dict(x, normalize)
    -389            ),
    -390        }
    -391
    -392        # Add aliases
    -393        self.format_map["metabref"] = self.format_map["json"]
    -394        self.format_map["datadict"] = self.format_map["dict"]
    -395        self.format_map["data-dict"] = self.format_map["dict"]
    -396        self.format_map["lowreseicompound"] = self.format_map["dict"]
    -397        self.format_map["lowres"] = self.format_map["dict"]
    -398        self.format_map["lowresgc"] = self.format_map["dict"]
    -399        self.format_map["sqlite"] = self.format_map["sql"]
    -400
    -401    def available_formats(self):
    -402        """
    -403        View list of available formats.
    -404
    -405        Returns
    -406        -------
    -407        list
    -408            Format map keys.
    -409        """
    -410
    -411        return list(self.format_map.keys())
    +379        """
    +380
    +381        # Define format workflows
    +382        self.format_map = {
    +383            "json": lambda x, normalize, fe_kwargs: x,
    +384            "dict": lambda x,
    +385            normalize,
    +386            fe_kwargs: self._to_LowResolutionEICompound_dict(x, normalize),
    +387            "sql": lambda x,
    +388            normalize,
    +389            fe_kwargs: self._LowResolutionEICompound_dict_to_sqlite(
    +390                self._to_LowResolutionEICompound_dict(x, normalize)
    +391            ),
    +392        }
    +393
    +394        # Add aliases
    +395        self.format_map["metabref"] = self.format_map["json"]
    +396        self.format_map["datadict"] = self.format_map["dict"]
    +397        self.format_map["data-dict"] = self.format_map["dict"]
    +398        self.format_map["lowreseicompound"] = self.format_map["dict"]
    +399        self.format_map["lowres"] = self.format_map["dict"]
    +400        self.format_map["lowresgc"] = self.format_map["dict"]
    +401        self.format_map["sqlite"] = self.format_map["sql"]
    +402
    +403    def available_formats(self):
    +404        """
    +405        View list of available formats.
    +406
    +407        Returns
    +408        -------
    +409        list
    +410            Format map keys.
    +411        """
     412
    -413    def get_library(self, format="json", normalize=False):
    -414        """
    -415        Request MetabRef GC/MS library.
    -416
    -417        Parameters
    -418        ----------
    -419        format : str
    -420            Format of requested library, i.e. "json", "sql", "flashentropy".
    -421            See `available_formats` method for aliases.
    -422        normalize : bool
    -423            Normalize the spectrum by its magnitude.
    -424
    -425        Returns
    -426        -------
    -427        Library in requested format.
    -428
    -429        """
    +413        return list(self.format_map.keys())
    +414
    +415    def get_library(self, format="json", normalize=False):
    +416        """
    +417        Request MetabRef GC/MS library.
    +418
    +419        Parameters
    +420        ----------
    +421        format : str
    +422            Format of requested library, i.e. "json", "sql", "flashentropy".
    +423            See `available_formats` method for aliases.
    +424        normalize : bool
    +425            Normalize the spectrum by its magnitude.
    +426
    +427        Returns
    +428        -------
    +429        Library in requested format.
     430
    -431        # Init format function
    -432        format_func = self._get_format_func(format)
    -433
    -434        return format_func(
    -435            self.get_query(self.GCMS_LIBRARY_URL)["GC-MS"], normalize, {}
    -436        )
    -437
    -438    def get_fames(self, format="json", normalize=False):
    -439        """
    -440        Request MetabRef GC/MS FAMEs library.
    -441
    -442        Parameters
    -443        ----------
    -444        format : str
    -445            Format of requested library, i.e. "json", "sql", "flashentropy".
    -446            See `available_formats` method for aliases.
    -447        normalize : bool
    -448            Normalize the spectrum by its magnitude.
    -449
    -450        Returns
    -451        -------
    -452        Library in requested format.
    -453
    -454        """
    +431        """
    +432
    +433        # Init format function
    +434        format_func = self._get_format_func(format)
    +435
    +436        return format_func(
    +437            self.get_query(self.GCMS_LIBRARY_URL)["GC-MS"], normalize, {}
    +438        )
    +439
    +440    def get_fames(self, format="json", normalize=False):
    +441        """
    +442        Request MetabRef GC/MS FAMEs library.
    +443
    +444        Parameters
    +445        ----------
    +446        format : str
    +447            Format of requested library, i.e. "json", "sql", "flashentropy".
    +448            See `available_formats` method for aliases.
    +449        normalize : bool
    +450            Normalize the spectrum by its magnitude.
    +451
    +452        Returns
    +453        -------
    +454        Library in requested format.
     455
    -456        # Init format function
    -457        format_func = self._get_format_func(format)
    -458
    -459        return format_func(self.get_query(self.FAMES_URL)["GC-MS"], normalize, {})
    +456        """
    +457
    +458        # Init format function
    +459        format_func = self._get_format_func(format)
     460
    -461    def _to_LowResolutionEICompound_dict(self, metabref_lib, normalize=False):
    -462        """
    -463        Convert MetabRef-formatted library to CoreMS LowResolutionEICompound-formatted
    -464        dictionary for local ingestion.
    -465
    -466        Parameters
    -467        ----------
    -468        metabref_lib : dict
    -469            MetabRef GC-MS library in JSON format.
    -470        normalize : bool
    -471            Normalize each spectrum by its magnitude.
    -472
    -473        Returns
    -474        -------
    -475        list of dict
    -476            List of each spectrum contained in dictionary.
    -477
    -478        """
    +461        return format_func(self.get_query(self.FAMES_URL)["GC-MS"], normalize, {})
    +462
    +463    def _to_LowResolutionEICompound_dict(self, metabref_lib, normalize=False):
    +464        """
    +465        Convert MetabRef-formatted library to CoreMS LowResolutionEICompound-formatted
    +466        dictionary for local ingestion.
    +467
    +468        Parameters
    +469        ----------
    +470        metabref_lib : dict
    +471            MetabRef GC-MS library in JSON format.
    +472        normalize : bool
    +473            Normalize each spectrum by its magnitude.
    +474
    +475        Returns
    +476        -------
    +477        list of dict
    +478            List of each spectrum contained in dictionary.
     479
    -480        # All below key:value lookups are based on CoreMS class definitions
    -481        # NOT MetabRef content. For example, MetabRef has keys for PubChem,
    -482        # USI, etc. that are not considered below.
    -483
    -484        # Dictionary to map metabref keys to corems keys
    -485        metadatar_cols = {
    -486            "casno": "cas",
    -487            "inchikey": "inchikey",
    -488            "inchi": "inchi",
    -489            "chebi": "chebi",
    -490            "smiles": "smiles",
    -491            "kegg": "kegg",
    -492            "iupac_name": "iupac_name",
    -493            "traditional_name": "traditional_name",  # Not present in metabref
    -494            "common_name": "common_name",  # Not present in metabref
    -495        }
    -496
    -497        # Dictionary to map metabref keys to corems keys
    -498        lowres_ei_compound_cols = {
    -499            "id": "metabref_id",
    -500            "molecule_name": "name",  # Is this correct?
    -501            "classify": "classify",  # Not present in metabref
    -502            "formula": "formula",
    -503            "ri": "ri",
    -504            "rt": "retention_time",
    -505            "source": "source",  # Not present in metabref
    -506            "casno": "casno",
    -507            "comments": "comment",
    -508            "source_temp_c": "source_temp_c",  # Not present in metabref
    -509            "ev": "ev",  # Not present in metabref
    -510            "peak_count": "peaks_count",
    -511            "mz": "mz",
    -512            "abundance": "abundance",
    -513        }
    -514
    -515        # Local result container
    -516        corems_lib = []
    -517
    -518        # Enumerate spectra
    -519        for i, source_ in enumerate(metabref_lib):
    -520            # Copy source to prevent modification
    -521            source = source_.copy()
    -522
    -523            # Flatten source dict
    -524            source = source.pop("spectrum_data") | source
    -525
    -526            # Parse target data
    -527            target = {
    -528                lowres_ei_compound_cols[k]: v
    -529                for k, v in source.items()
    -530                if k in lowres_ei_compound_cols
    -531            }
    -532
    -533            # Explicitly add this to connect with LowResCompoundRef later
    -534            target["rt"] = source["rt"]
    -535
    -536            # Parse (mz, abundance)
    -537            arr = self.spectrum_to_array(target["mz"], normalize=normalize)
    -538            target["mz"] = arr[:, 0]
    -539            target["abundance"] = arr[:, 1]
    -540
    -541            # Parse meta data
    -542            target["metadata"] = {
    -543                metadatar_cols[k]: v for k, v in source.items() if k in metadatar_cols
    -544            }
    -545
    -546            # Add anything else
    -547            for k in source:
    -548                if k not in lowres_ei_compound_cols:
    -549                    target[k] = source[k]
    -550
    -551            # Add to CoreMS list
    -552            corems_lib.append(target)
    -553
    -554        return corems_lib
    +480        """
    +481
    +482        # All below key:value lookups are based on CoreMS class definitions
    +483        # NOT MetabRef content. For example, MetabRef has keys for PubChem,
    +484        # USI, etc. that are not considered below.
    +485
    +486        # Dictionary to map metabref keys to corems keys
    +487        metadatar_cols = {
    +488            "casno": "cas",
    +489            "inchikey": "inchikey",
    +490            "inchi": "inchi",
    +491            "chebi": "chebi",
    +492            "smiles": "smiles",
    +493            "kegg": "kegg",
    +494            "iupac_name": "iupac_name",
    +495            "traditional_name": "traditional_name",  # Not present in metabref
    +496            "common_name": "common_name",  # Not present in metabref
    +497        }
    +498
    +499        # Dictionary to map metabref keys to corems keys
    +500        lowres_ei_compound_cols = {
    +501            "id": "metabref_id",
    +502            "molecule_name": "name",  # Is this correct?
    +503            "classify": "classify",  # Not present in metabref
    +504            "formula": "formula",
    +505            "ri": "ri",
    +506            "rt": "retention_time",
    +507            "source": "source",  # Not present in metabref
    +508            "casno": "casno",
    +509            "comments": "comment",
    +510            "source_temp_c": "source_temp_c",  # Not present in metabref
    +511            "ev": "ev",  # Not present in metabref
    +512            "peak_count": "peaks_count",
    +513            "mz": "mz",
    +514            "abundance": "abundance",
    +515        }
    +516
    +517        # Local result container
    +518        corems_lib = []
    +519
    +520        # Enumerate spectra
    +521        for i, source_ in enumerate(metabref_lib):
    +522            # Copy source to prevent modification
    +523            source = source_.copy()
    +524
    +525            # Flatten source dict
    +526            source = source.pop("spectrum_data") | source
    +527
    +528            # Parse target data
    +529            target = {
    +530                lowres_ei_compound_cols[k]: v
    +531                for k, v in source.items()
    +532                if k in lowres_ei_compound_cols
    +533            }
    +534
    +535            # Explicitly add this to connect with LowResCompoundRef later
    +536            target["rt"] = source["rt"]
    +537
    +538            # Parse (mz, abundance)
    +539            arr = self.spectrum_to_array(target["mz"], normalize=normalize)
    +540            target["mz"] = arr[:, 0]
    +541            target["abundance"] = arr[:, 1]
    +542
    +543            # Parse meta data
    +544            target["metadata"] = {
    +545                metadatar_cols[k]: v for k, v in source.items() if k in metadatar_cols
    +546            }
    +547
    +548            # Add anything else
    +549            for k in source:
    +550                if k not in lowres_ei_compound_cols:
    +551                    target[k] = source[k]
    +552
    +553            # Add to CoreMS list
    +554            corems_lib.append(target)
     555
    -556    def _LowResolutionEICompound_dict_to_sqlite(
    -557        self, lowres_ei_compound_dict, url="sqlite://"
    -558    ):
    -559        """
    -560        Convert CoreMS LowResolutionEICompound-formatted dictionary to SQLite
    -561        database for local ingestion.
    -562
    -563        Parameters
    -564        ----------
    -565        lowres_ei_compound_dict : dict
    -566            CoreMS GC-MS library formatted for LowResolutionEICompound.
    -567        url : str
    -568            URL to SQLite prefix.
    -569
    -570        Returns
    -571        -------
    -572        sqlite database
    -573            Spectra contained in SQLite database.
    -574
    -575        """
    +556        return corems_lib
    +557
    +558    def _LowResolutionEICompound_dict_to_sqlite(
    +559        self, lowres_ei_compound_dict, url="sqlite://"
    +560    ):
    +561        """
    +562        Convert CoreMS LowResolutionEICompound-formatted dictionary to SQLite
    +563        database for local ingestion.
    +564
    +565        Parameters
    +566        ----------
    +567        lowres_ei_compound_dict : dict
    +568            CoreMS GC-MS library formatted for LowResolutionEICompound.
    +569        url : str
    +570            URL to SQLite prefix.
    +571
    +572        Returns
    +573        -------
    +574        sqlite database
    +575            Spectra contained in SQLite database.
     576
    -577        # Dictionary to map corems keys to all-caps keys
    -578        capped_cols = {
    -579            "name": "NAME",
    -580            "formula": "FORM",
    -581            "ri": "RI",
    -582            "retention_time": "RT",
    -583            "source": "SOURCE",
    -584            "casno": "CASNO",
    -585            "comment": "COMMENT",
    -586            "peaks_count": "NUM PEAKS",
    -587        }
    -588
    -589        # Initialize SQLite object
    -590        sqlite_obj = EI_LowRes_SQLite(url=url)
    -591
    -592        # Iterate spectra
    -593        for _data_dict in lowres_ei_compound_dict:
    -594            # Copy source to prevent modification
    -595            data_dict = _data_dict.copy()
    -596
    -597            # Add missing capped values
    -598            for k, v in capped_cols.items():
    -599                # Key exists
    -600                if k in data_dict:
    -601                    # # This will replace the key
    -602                    # data_dict[v] = data_dict.pop(k)
    -603
    -604                    # This will keep both keys
    -605                    data_dict[v] = data_dict[k]
    -606
    -607            # Parse number of peaks
    -608            if not data_dict.get("NUM PEAKS"):
    -609                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
    -610
    -611            # Parse CAS number
    -612            if not data_dict.get("CASNO"):
    -613                data_dict["CASNO"] = data_dict.get("CAS")
    -614
    -615            if not data_dict["CASNO"]:
    -616                data_dict["CASNO"] = 0
    -617
    -618            # Build linked metadata table
    -619            if "metadata" in data_dict:
    -620                if len(data_dict["metadata"]) > 0:
    -621                    data_dict["metadatar"] = Metadatar(**data_dict.pop("metadata"))
    -622                else:
    -623                    data_dict.pop("metadata")
    -624
    -625            # Attempt addition to sqlite
    -626            try:
    -627                sqlite_obj.add_compound(data_dict)
    -628            except:
    -629                print(data_dict["NAME"])
    -630
    -631        return sqlite_obj
    +577        """
    +578
    +579        # Dictionary to map corems keys to all-caps keys
    +580        capped_cols = {
    +581            "name": "NAME",
    +582            "formula": "FORM",
    +583            "ri": "RI",
    +584            "retention_time": "RT",
    +585            "source": "SOURCE",
    +586            "casno": "CASNO",
    +587            "comment": "COMMENT",
    +588            "peaks_count": "NUM PEAKS",
    +589        }
    +590
    +591        # Initialize SQLite object
    +592        sqlite_obj = EI_LowRes_SQLite(url=url)
    +593
    +594        # Iterate spectra
    +595        for _data_dict in lowres_ei_compound_dict:
    +596            # Copy source to prevent modification
    +597            data_dict = _data_dict.copy()
    +598
    +599            # Add missing capped values
    +600            for k, v in capped_cols.items():
    +601                # Key exists
    +602                if k in data_dict:
    +603                    # # This will replace the key
    +604                    # data_dict[v] = data_dict.pop(k)
    +605
    +606                    # This will keep both keys
    +607                    data_dict[v] = data_dict[k]
    +608
    +609            # Parse number of peaks
    +610            if not data_dict.get("NUM PEAKS"):
    +611                data_dict["NUM PEAKS"] = len(data_dict.get("mz"))
    +612
    +613            # Parse CAS number
    +614            if not data_dict.get("CASNO"):
    +615                data_dict["CASNO"] = data_dict.get("CAS")
    +616
    +617            if not data_dict["CASNO"]:
    +618                data_dict["CASNO"] = 0
    +619
    +620            # Build linked metadata table
    +621            if "metadata" in data_dict:
    +622                if len(data_dict["metadata"]) > 0:
    +623                    data_dict["metadatar"] = Metadatar(**data_dict.pop("metadata"))
    +624                else:
    +625                    data_dict.pop("metadata")
    +626
    +627            # Attempt addition to sqlite
    +628            try:
    +629                sqlite_obj.add_compound(data_dict)
    +630            except:
    +631                print(data_dict["NAME"])
    +632
    +633        return sqlite_obj
     
    @@ -2079,17 +2082,17 @@
    Inherited Members
    -
    361    def __init__(self):
    -362        """
    -363        Initialize instance.
    -364
    -365        """
    +            
    363    def __init__(self):
    +364        """
    +365        Initialize instance.
     366
    -367        super().__init__()
    -368        self.GCMS_LIBRARY_URL = "https://metabref.emsl.pnnl.gov/api/mslevel/1"
    -369        self.FAMES_URL = "https://metabref.emsl.pnnl.gov/api/fames"
    -370
    -371        self.__init_format_map__()
    +367        """
    +368
    +369        super().__init__()
    +370        self.GCMS_LIBRARY_URL = "https://metabref.emsl.pnnl.gov/api/mslevel/1"
    +371        self.FAMES_URL = "https://metabref.emsl.pnnl.gov/api/fames"
    +372
    +373        self.__init_format_map__()
     
    @@ -2131,17 +2134,17 @@
    Inherited Members
    -
    401    def available_formats(self):
    -402        """
    -403        View list of available formats.
    -404
    -405        Returns
    -406        -------
    -407        list
    -408            Format map keys.
    -409        """
    -410
    -411        return list(self.format_map.keys())
    +            
    403    def available_formats(self):
    +404        """
    +405        View list of available formats.
    +406
    +407        Returns
    +408        -------
    +409        list
    +410            Format map keys.
    +411        """
    +412
    +413        return list(self.format_map.keys())
     
    @@ -2167,30 +2170,30 @@
    Returns
    -
    413    def get_library(self, format="json", normalize=False):
    -414        """
    -415        Request MetabRef GC/MS library.
    -416
    -417        Parameters
    -418        ----------
    -419        format : str
    -420            Format of requested library, i.e. "json", "sql", "flashentropy".
    -421            See `available_formats` method for aliases.
    -422        normalize : bool
    -423            Normalize the spectrum by its magnitude.
    -424
    -425        Returns
    -426        -------
    -427        Library in requested format.
    -428
    -429        """
    +            
    415    def get_library(self, format="json", normalize=False):
    +416        """
    +417        Request MetabRef GC/MS library.
    +418
    +419        Parameters
    +420        ----------
    +421        format : str
    +422            Format of requested library, i.e. "json", "sql", "flashentropy".
    +423            See `available_formats` method for aliases.
    +424        normalize : bool
    +425            Normalize the spectrum by its magnitude.
    +426
    +427        Returns
    +428        -------
    +429        Library in requested format.
     430
    -431        # Init format function
    -432        format_func = self._get_format_func(format)
    -433
    -434        return format_func(
    -435            self.get_query(self.GCMS_LIBRARY_URL)["GC-MS"], normalize, {}
    -436        )
    +431        """
    +432
    +433        # Init format function
    +434        format_func = self._get_format_func(format)
    +435
    +436        return format_func(
    +437            self.get_query(self.GCMS_LIBRARY_URL)["GC-MS"], normalize, {}
    +438        )
     
    @@ -2226,28 +2229,28 @@
    Returns
    -
    438    def get_fames(self, format="json", normalize=False):
    -439        """
    -440        Request MetabRef GC/MS FAMEs library.
    -441
    -442        Parameters
    -443        ----------
    -444        format : str
    -445            Format of requested library, i.e. "json", "sql", "flashentropy".
    -446            See `available_formats` method for aliases.
    -447        normalize : bool
    -448            Normalize the spectrum by its magnitude.
    -449
    -450        Returns
    -451        -------
    -452        Library in requested format.
    -453
    -454        """
    +            
    440    def get_fames(self, format="json", normalize=False):
    +441        """
    +442        Request MetabRef GC/MS FAMEs library.
    +443
    +444        Parameters
    +445        ----------
    +446        format : str
    +447            Format of requested library, i.e. "json", "sql", "flashentropy".
    +448            See `available_formats` method for aliases.
    +449        normalize : bool
    +450            Normalize the spectrum by its magnitude.
    +451
    +452        Returns
    +453        -------
    +454        Library in requested format.
     455
    -456        # Init format function
    -457        format_func = self._get_format_func(format)
    -458
    -459        return format_func(self.get_query(self.FAMES_URL)["GC-MS"], normalize, {})
    +456        """
    +457
    +458        # Init format function
    +459        format_func = self._get_format_func(format)
    +460
    +461        return format_func(self.get_query(self.FAMES_URL)["GC-MS"], normalize, {})
     
    @@ -2302,28 +2305,26 @@
    Inherited Members
    -
    634class MetabRefLCInterface(MetabRefInterface):
    -635    """
    -636    Interface to the Metabolomics Reference Database for LC-MS data.
    -637    """
    -638
    -639    def __init__(self):
    -640        """
    -641        Initialize instance.
    -642
    -643        """
    +            
    636class MetabRefLCInterface(MetabRefInterface):
    +637    """
    +638    Interface to the Metabolomics Reference Database for LC-MS data.
    +639    """
    +640
    +641    def __init__(self):
    +642        """
    +643        Initialize instance.
     644
    -645        super().__init__()
    +645        """
     646
    -647        # API endpoint for precursor m/z search
    -648        self.PRECURSOR_MZ_URL = (
    -649            "https://metabref.emsl.pnnl.gov/api/precursors/m/{}/t/{}/{}"
    -650        )
    -651
    -652        # API endpoint for returning full list of precursor m/z values in database
    -653        self.PRECURSOR_MZ_ALL_URL = (
    -654            "https://metabref.emsl.pnnl.gov/api/precursors/{}"
    -655        )
    +647        super().__init__()
    +648
    +649        # API endpoint for precursor m/z search
    +650        self.PRECURSOR_MZ_URL = (
    +651            "https://metabref.emsl.pnnl.gov/api/precursors/m/{}/t/{}/{}"
    +652        )
    +653
    +654        # API endpoint for returning full list of precursor m/z values in database
    +655        self.PRECURSOR_MZ_ALL_URL = "https://metabref.emsl.pnnl.gov/api/precursors/{}"
     656
     657        self.__init_format_map__()
     658
    @@ -2399,16 +2400,16 @@ 
    Inherited Members
    728 ) 729 730 return lib -731 +731 732 def request_all_precursors(self, polarity): 733 """ 734 Request all precursor m/z values from MetabRef. -735 +735 736 Parameters 737 ---------- 738 polarity : str 739 Ionization polarity, either "positive" or "negative". -740 +740 741 Returns 742 ------- 743 list @@ -2417,10 +2418,10 @@
    Inherited Members
    746 # If polarity is anything other than positive or negative, raise error 747 if polarity not in ["positive", "negative"]: 748 raise ValueError("Polarity must be 'positive' or 'negative'") -749 +749 750 # Query MetabRef for all precursor m/z values 751 return self.get_query(self.PRECURSOR_MZ_ALL_URL.format(polarity)) -752 +752 753 def get_lipid_library( 754 self, 755 mz_list, @@ -2463,48 +2464,50 @@
    Inherited Members
    792 mz_list.sort() 793 794 # Get all precursors in the library matching the polarity -795 precusors_in_lib = self.request_all_precursors( -796 polarity=polarity -797 ) -798 precusors_in_lib.sort() -799 precusors_in_lib = np.array(precusors_in_lib) -800 -801 # Compare the mz_list with the precursors in the library, keep any mzs that are within mz_tol of any precursor in the library -802 mz_list = np.array(mz_list) -803 mz_df = pd.DataFrame(mz_list, columns=['mass_feature_mz']) -804 mz_df["closest_lib_pre_mz"] = precusors_in_lib[ -805 find_closest(precusors_in_lib, mz_df.mass_feature_mz.values) -806 ] -807 mz_df["mz_diff_ppm"] = np.abs((mz_df["mass_feature_mz"] - mz_df["closest_lib_pre_mz"])/mz_df["mass_feature_mz"]*1e6) -808 mz_df_sub = mz_df[mz_df["mz_diff_ppm"] <= mz_tol_ppm] -809 -810 # Query the library for the precursors in the mz_list that are in the library to retrieve the spectra and metadata -811 lib = self.query_by_precursor( -812 mz_list=mz_df_sub.mass_feature_mz.values, -813 polarity=polarity, -814 mz_tol_ppm=mz_tol_ppm, -815 mz_tol_da_api=mz_tol_da_api, -816 ) -817 -818 # Pull out lipid metadata from the metabref library and convert to LipidMetadata dataclass -819 mol_data_dict = {x["id"]: x["Molecular Data"] for x in lib} -820 lipid_lib = {x["id"]: x["Lipid Tree"] for x in lib if "Lipid Tree" in x.keys()} -821 mol_data_dict = {k: {**v, **lipid_lib[k]} for k, v in mol_data_dict.items()} -822 mol_data_dict = { -823 k: self._dict_to_dataclass(v, LipidMetadata) -824 for k, v in mol_data_dict.items() -825 } -826 -827 # Remove lipid metadata from the metabref library -828 lib = [ -829 {k: v for k, v in x.items() if k not in ["Molecular Data", "Lipid Tree"]} -830 for x in lib -831 ] -832 -833 # Format the spectral library -834 format_func = self._get_format_func(format) -835 lib = format_func(lib, normalize=normalize, fe_kwargs=fe_kwargs) -836 return (lib, mol_data_dict) +795 precusors_in_lib = self.request_all_precursors(polarity=polarity) +796 precusors_in_lib.sort() +797 precusors_in_lib = np.array(precusors_in_lib) +798 +799 # Compare the mz_list with the precursors in the library, keep any mzs that are within mz_tol of any precursor in the library +800 mz_list = np.array(mz_list) +801 mz_df = pd.DataFrame(mz_list, columns=["mass_feature_mz"]) +802 mz_df["closest_lib_pre_mz"] = precusors_in_lib[ +803 find_closest(precusors_in_lib, mz_df.mass_feature_mz.values) +804 ] +805 mz_df["mz_diff_ppm"] = np.abs( +806 (mz_df["mass_feature_mz"] - mz_df["closest_lib_pre_mz"]) +807 / mz_df["mass_feature_mz"] +808 * 1e6 +809 ) +810 mz_df_sub = mz_df[mz_df["mz_diff_ppm"] <= mz_tol_ppm] +811 +812 # Query the library for the precursors in the mz_list that are in the library to retrieve the spectra and metadata +813 lib = self.query_by_precursor( +814 mz_list=mz_df_sub.mass_feature_mz.values, +815 polarity=polarity, +816 mz_tol_ppm=mz_tol_ppm, +817 mz_tol_da_api=mz_tol_da_api, +818 ) +819 +820 # Pull out lipid metadata from the metabref library and convert to LipidMetadata dataclass +821 mol_data_dict = {x["id"]: x["Molecular Data"] for x in lib} +822 lipid_lib = {x["id"]: x["Lipid Tree"] for x in lib if "Lipid Tree" in x.keys()} +823 mol_data_dict = {k: {**v, **lipid_lib[k]} for k, v in mol_data_dict.items()} +824 mol_data_dict = { +825 k: self._dict_to_dataclass(v, LipidMetadata) +826 for k, v in mol_data_dict.items() +827 } +828 +829 # Remove lipid metadata from the metabref library +830 lib = [ +831 {k: v for k, v in x.items() if k not in ["Molecular Data", "Lipid Tree"]} +832 for x in lib +833 ] +834 +835 # Format the spectral library +836 format_func = self._get_format_func(format) +837 lib = format_func(lib, normalize=normalize, fe_kwargs=fe_kwargs) +838 return (lib, mol_data_dict)
    @@ -2522,23 +2525,21 @@
    Inherited Members
    -
    639    def __init__(self):
    -640        """
    -641        Initialize instance.
    -642
    -643        """
    +            
    641    def __init__(self):
    +642        """
    +643        Initialize instance.
     644
    -645        super().__init__()
    +645        """
     646
    -647        # API endpoint for precursor m/z search
    -648        self.PRECURSOR_MZ_URL = (
    -649            "https://metabref.emsl.pnnl.gov/api/precursors/m/{}/t/{}/{}"
    -650        )
    -651
    -652        # API endpoint for returning full list of precursor m/z values in database
    -653        self.PRECURSOR_MZ_ALL_URL = (
    -654            "https://metabref.emsl.pnnl.gov/api/precursors/{}"
    -655        )
    +647        super().__init__()
    +648
    +649        # API endpoint for precursor m/z search
    +650        self.PRECURSOR_MZ_URL = (
    +651            "https://metabref.emsl.pnnl.gov/api/precursors/m/{}/t/{}/{}"
    +652        )
    +653
    +654        # API endpoint for returning full list of precursor m/z values in database
    +655        self.PRECURSOR_MZ_ALL_URL = "https://metabref.emsl.pnnl.gov/api/precursors/{}"
     656
     657        self.__init_format_map__()
     
    @@ -2678,12 +2679,12 @@
    Returns
    732    def request_all_precursors(self, polarity):
     733        """
     734        Request all precursor m/z values from MetabRef.
    -735        
    +735
     736        Parameters
     737        ----------
     738        polarity : str
     739            Ionization polarity, either "positive" or "negative".
    -740        
    +740
     741        Returns
     742        -------
     743        list
    @@ -2692,7 +2693,7 @@ 
    Returns
    746 # If polarity is anything other than positive or negative, raise error 747 if polarity not in ["positive", "negative"]: 748 raise ValueError("Polarity must be 'positive' or 'negative'") -749 +749 750 # Query MetabRef for all precursor m/z values 751 return self.get_query(self.PRECURSOR_MZ_ALL_URL.format(polarity))
    @@ -2769,48 +2770,50 @@
    Returns
    792 mz_list.sort() 793 794 # Get all precursors in the library matching the polarity -795 precusors_in_lib = self.request_all_precursors( -796 polarity=polarity -797 ) -798 precusors_in_lib.sort() -799 precusors_in_lib = np.array(precusors_in_lib) -800 -801 # Compare the mz_list with the precursors in the library, keep any mzs that are within mz_tol of any precursor in the library -802 mz_list = np.array(mz_list) -803 mz_df = pd.DataFrame(mz_list, columns=['mass_feature_mz']) -804 mz_df["closest_lib_pre_mz"] = precusors_in_lib[ -805 find_closest(precusors_in_lib, mz_df.mass_feature_mz.values) -806 ] -807 mz_df["mz_diff_ppm"] = np.abs((mz_df["mass_feature_mz"] - mz_df["closest_lib_pre_mz"])/mz_df["mass_feature_mz"]*1e6) -808 mz_df_sub = mz_df[mz_df["mz_diff_ppm"] <= mz_tol_ppm] -809 -810 # Query the library for the precursors in the mz_list that are in the library to retrieve the spectra and metadata -811 lib = self.query_by_precursor( -812 mz_list=mz_df_sub.mass_feature_mz.values, -813 polarity=polarity, -814 mz_tol_ppm=mz_tol_ppm, -815 mz_tol_da_api=mz_tol_da_api, -816 ) -817 -818 # Pull out lipid metadata from the metabref library and convert to LipidMetadata dataclass -819 mol_data_dict = {x["id"]: x["Molecular Data"] for x in lib} -820 lipid_lib = {x["id"]: x["Lipid Tree"] for x in lib if "Lipid Tree" in x.keys()} -821 mol_data_dict = {k: {**v, **lipid_lib[k]} for k, v in mol_data_dict.items()} -822 mol_data_dict = { -823 k: self._dict_to_dataclass(v, LipidMetadata) -824 for k, v in mol_data_dict.items() -825 } -826 -827 # Remove lipid metadata from the metabref library -828 lib = [ -829 {k: v for k, v in x.items() if k not in ["Molecular Data", "Lipid Tree"]} -830 for x in lib -831 ] -832 -833 # Format the spectral library -834 format_func = self._get_format_func(format) -835 lib = format_func(lib, normalize=normalize, fe_kwargs=fe_kwargs) -836 return (lib, mol_data_dict) +795 precusors_in_lib = self.request_all_precursors(polarity=polarity) +796 precusors_in_lib.sort() +797 precusors_in_lib = np.array(precusors_in_lib) +798 +799 # Compare the mz_list with the precursors in the library, keep any mzs that are within mz_tol of any precursor in the library +800 mz_list = np.array(mz_list) +801 mz_df = pd.DataFrame(mz_list, columns=["mass_feature_mz"]) +802 mz_df["closest_lib_pre_mz"] = precusors_in_lib[ +803 find_closest(precusors_in_lib, mz_df.mass_feature_mz.values) +804 ] +805 mz_df["mz_diff_ppm"] = np.abs( +806 (mz_df["mass_feature_mz"] - mz_df["closest_lib_pre_mz"]) +807 / mz_df["mass_feature_mz"] +808 * 1e6 +809 ) +810 mz_df_sub = mz_df[mz_df["mz_diff_ppm"] <= mz_tol_ppm] +811 +812 # Query the library for the precursors in the mz_list that are in the library to retrieve the spectra and metadata +813 lib = self.query_by_precursor( +814 mz_list=mz_df_sub.mass_feature_mz.values, +815 polarity=polarity, +816 mz_tol_ppm=mz_tol_ppm, +817 mz_tol_da_api=mz_tol_da_api, +818 ) +819 +820 # Pull out lipid metadata from the metabref library and convert to LipidMetadata dataclass +821 mol_data_dict = {x["id"]: x["Molecular Data"] for x in lib} +822 lipid_lib = {x["id"]: x["Lipid Tree"] for x in lib if "Lipid Tree" in x.keys()} +823 mol_data_dict = {k: {**v, **lipid_lib[k]} for k, v in mol_data_dict.items()} +824 mol_data_dict = { +825 k: self._dict_to_dataclass(v, LipidMetadata) +826 for k, v in mol_data_dict.items() +827 } +828 +829 # Remove lipid metadata from the metabref library +830 lib = [ +831 {k: v for k, v in x.items() if k not in ["Molecular Data", "Lipid Tree"]} +832 for x in lib +833 ] +834 +835 # Format the spectral library +836 format_func = self._get_format_func(format) +837 lib = format_func(lib, normalize=normalize, fe_kwargs=fe_kwargs) +838 return (lib, mol_data_dict)
    diff --git a/docs/corems/molecular_id/search/findOxygenPeaks.html b/docs/corems/molecular_id/search/findOxygenPeaks.html index 6b094120..30e8521c 100644 --- a/docs/corems/molecular_id/search/findOxygenPeaks.html +++ b/docs/corems/molecular_id/search/findOxygenPeaks.html @@ -92,290 +92,331 @@

    3 4from copy import deepcopy 5from threading import Thread - 6from numpy import average, std - 7from corems.molecular_id.calc.ClusterFilter import ClusteringFilter - 8from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas - 9from corems.molecular_id.factory.molecularSQL import MolForm_SQL - 10from tqdm import tqdm - 11 - 12class FindOxygenPeaks(Thread): - 13 """ Class to find Oxygen peaks in a mass spectrum for formula assignment search - 14 - 15 Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter - 16 Returns a list of MSPeak class containing the possible Molecular Formula class objects. - 17 - 18 Parameters - 19 ---------- - 20 mass_spectrum_obj : MassSpec class - 21 This is where we store MassSpec class obj, - 22 - 23 lookupTableSettings: MolecularLookupTableSettings class - 24 This is where we store MolecularLookupTableSettings class obj - 25 - 26 min_O , max_O : int - 27 minium and maximum of Oxygen to allow the software to look for - 28 it will override the settings at lookupTableSettings.usedAtoms - 29 default min = 1, max = 22 - 30 - 31 Attributes - 32 ---------- - 33 mass_spectrum_obj : MassSpec class - 34 This is where we store MassSpec class obj, - 35 lookupTableSettings: MolecularLookupTableSettings class - 36 This is where we store MolecularLookupTableSettings class obj - 37 - 38 Methods - 39 ---------- - 40 * run(). - 41 will be called when the instantiated class method start is called - 42 * get_list_found_peaks(). - 43 returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak - 44 for more details of the structure see MSPeak class and MolecularFormula class - 45 * set_mass_spec_indexes_by_found_peaks(). - 46 set the mass spectrum to interate over only the selected indexes - 47 """ - 48 def __init__(self, mass_spectrum_obj, sql_db : bool = False, min_O :int = 1, max_O : int= 22) : - 49 - 50 Thread.__init__(self) - 51 - 52 self.mass_spectrum_obj = mass_spectrum_obj - 53 self.min_0 = min_O - 54 self.max_O = max_O - 55 - 56 if not sql_db: - 57 - 58 self.sql_db = MolForm_SQL(mass_spectrum_obj.molecular_search_settings.url_database) - 59 else: - 60 - 61 self.sql_db = sql_db - 62 - 63 def run(self): - 64 """ Run the thread - 65 """ - 66 #save initial settings min peaks per class filter - 67 initial_min_peak_bool = deepcopy(self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter) - 68 - 69 #deactivate the usage of min peaks per class filter - 70 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False - 71 - 72 #save initial settings for Ox - 73 initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O']) - 74 - 75 #resets the used atoms to look only for oxygen organic compounds - 76 self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'] = (self.min_0, self.max_O) - 77 - 78 self.list_found_mspeaks = [] - 79 - 80 kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base - 81 - 82 self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base) - 83 - 84 # needs to be wrapped inside the mass_spec class - 85 ClusteringFilter().filter_kendrick(self.mass_spectrum_obj) - 86 - 87 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: - 88 print("Start most abundant mass spectral peak search") - 89 molecular_formula_obj_reference = self.find_most_abundant_formula(self.mass_spectrum_obj) - 90 - 91 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: - 92 print("Select most abundant peak with molecular formula = %s with a m/z error of %s ppm" % (molecular_formula_obj_reference.string, molecular_formula_obj_reference.mz_error)) - 93 print("Started mass spectral peak series search") - 94 - 95 self.list_found_mspeaks = self.find_series_mspeaks(self.mass_spectrum_obj, - 96 molecular_formula_obj_reference, - 97 deltamz=14) - 98 - 99 # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick()) -100 -101 self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'] = initial_ox -102 -103 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = initial_min_peak_bool -104 -105 self.mass_spectrum_obj.reset_indexes() -106 -107 self.mass_spectrum_obj.filter_by_noise_threshold() -108 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: -109 print("Done with mass spectral peak series search") -110 -111 self.sql_db.close() + 6 + 7from numpy import average, std + 8 + 9from corems.molecular_id.calc.ClusterFilter import ClusteringFilter + 10from corems.molecular_id.factory.molecularSQL import MolForm_SQL + 11from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas + 12 + 13 + 14class FindOxygenPeaks(Thread): + 15 """Class to find Oxygen peaks in a mass spectrum for formula assignment search + 16 + 17 Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter + 18 Returns a list of MSPeak class containing the possible Molecular Formula class objects. + 19 + 20 Parameters + 21 ---------- + 22 mass_spectrum_obj : MassSpec class + 23 This is where we store MassSpec class obj, + 24 + 25 lookupTableSettings: MolecularLookupTableSettings class + 26 This is where we store MolecularLookupTableSettings class obj + 27 + 28 min_O , max_O : int + 29 minium and maximum of Oxygen to allow the software to look for + 30 it will override the settings at lookupTableSettings.usedAtoms + 31 default min = 1, max = 22 + 32 + 33 Attributes + 34 ---------- + 35 mass_spectrum_obj : MassSpec class + 36 This is where we store MassSpec class obj, + 37 lookupTableSettings: MolecularLookupTableSettings class + 38 This is where we store MolecularLookupTableSettings class obj + 39 + 40 Methods + 41 ---------- + 42 * run(). + 43 will be called when the instantiated class method start is called + 44 * get_list_found_peaks(). + 45 returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak + 46 for more details of the structure see MSPeak class and MolecularFormula class + 47 * set_mass_spec_indexes_by_found_peaks(). + 48 set the mass spectrum to interate over only the selected indexes + 49 """ + 50 + 51 def __init__( + 52 self, mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22 + 53 ): + 54 Thread.__init__(self) + 55 + 56 self.mass_spectrum_obj = mass_spectrum_obj + 57 self.min_0 = min_O + 58 self.max_O = max_O + 59 + 60 if not sql_db: + 61 self.sql_db = MolForm_SQL( + 62 mass_spectrum_obj.molecular_search_settings.url_database + 63 ) + 64 else: + 65 self.sql_db = sql_db + 66 + 67 def run(self): + 68 """Run the thread""" + 69 # save initial settings min peaks per class filter + 70 initial_min_peak_bool = deepcopy( + 71 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter + 72 ) + 73 + 74 # deactivate the usage of min peaks per class filter + 75 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False + 76 + 77 # save initial settings for Ox + 78 initial_ox = deepcopy( + 79 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] + 80 ) + 81 + 82 # resets the used atoms to look only for oxygen organic compounds + 83 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = ( + 84 self.min_0, + 85 self.max_O, + 86 ) + 87 + 88 self.list_found_mspeaks = [] + 89 + 90 kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base + 91 + 92 self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base) + 93 + 94 # needs to be wrapped inside the mass_spec class + 95 ClusteringFilter().filter_kendrick(self.mass_spectrum_obj) + 96 + 97 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: + 98 print("Start most abundant mass spectral peak search") + 99 molecular_formula_obj_reference = self.find_most_abundant_formula( +100 self.mass_spectrum_obj +101 ) +102 +103 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: +104 print( +105 "Select most abundant peak with molecular formula = %s with a m/z error of %s ppm" +106 % ( +107 molecular_formula_obj_reference.string, +108 molecular_formula_obj_reference.mz_error, +109 ) +110 ) +111 print("Started mass spectral peak series search") 112 -113 def find_most_abundant_formula(self, mass_spectrum_obj): -114 """ Find the most abundant formula in the mass spectrum -115 -116 Parameters -117 ---------- -118 mass_spectrum_obj : MassSpec class -119 Mass spectrum object +113 self.list_found_mspeaks = self.find_series_mspeaks( +114 self.mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14 +115 ) +116 +117 # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick()) +118 +119 self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = initial_ox 120 -121 Returns -122 ---------- -123 MolecularFormula class obj -124 most abundant MolecularFormula with the lowest mass error -125 """ -126 #need to find a better way to cut off outliners -127 #import matplotlib.pyplot as plt -128 #plt.hist(mass_spectrum_obj.abundance, bins=100) -129 #plt.show() -130 -131 abundances = mass_spectrum_obj.abundance -132 abun_mean = average(abundances, axis=0) -133 abun_std = std(abundances, axis=0) -134 -135 upper_limit = abun_mean + 7* abun_std -136 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: -137 print("Maximum abundance limit = %s and max abundance kendrick cluster = %s" % (upper_limit, max(mass_spectrum_obj, key=lambda m: m.abundance).abundance)) -138 -139 mspeak_most_abundant = max(mass_spectrum_obj, key=lambda m: m.abundance if m.abundance <= upper_limit else 0) +121 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = ( +122 initial_min_peak_bool +123 ) +124 +125 self.mass_spectrum_obj.reset_indexes() +126 +127 self.mass_spectrum_obj.filter_by_noise_threshold() +128 if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: +129 print("Done with mass spectral peak series search") +130 +131 self.sql_db.close() +132 +133 def find_most_abundant_formula(self, mass_spectrum_obj): +134 """Find the most abundant formula in the mass spectrum +135 +136 Parameters +137 ---------- +138 mass_spectrum_obj : MassSpec class +139 Mass spectrum object 140 -141 print("Searching molecular formulas") -142 -143 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks([mspeak_most_abundant]) -144 -145 print("Finished searching molecular formulas") -146 -147 if mspeak_most_abundant: -148 -149 return mspeak_most_abundant.best_molecular_formula_candidate -150 -151 else: -152 -153 raise Exception("Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"%mspeak_most_abundant.mz_exp ) -154 -155 #return the first option -156 #return mspeak_most_abundant[0] -157 -158 def find_most_abundant_formula_test(self, mass_spectrum_obj, settings): -159 """ [Test function] Find the most abundant formula in the mass spectrum -160 -161 Parameters -162 ---------- -163 mass_spectrum_obj : MassSpec class -164 Mass spectrum object -165 settings : MolecularSearchSettings class -166 Molecular search settings object -167 -168 Returns -169 ---------- -170 MolecularFormula class obj -171 most abundant MolecularFormula with the lowest mass error -172 -173 """ -174 #this function is intended for test only. -175 # Have to sort by Kendrick to be able to select the most abundant series -176 #then select the most abundant peak inside the series -177 #or have the user select the reference mspeak on the gui -178 -179 mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak +141 Returns +142 ---------- +143 MolecularFormula class obj +144 most abundant MolecularFormula with the lowest mass error +145 """ +146 # need to find a better way to cut off outliners +147 # import matplotlib.pyplot as plt +148 # plt.hist(mass_spectrum_obj.abundance, bins=100) +149 # plt.show() +150 +151 abundances = mass_spectrum_obj.abundance +152 abun_mean = average(abundances, axis=0) +153 abun_std = std(abundances, axis=0) +154 +155 upper_limit = abun_mean + 7 * abun_std +156 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: +157 print( +158 "Maximum abundance limit = %s and max abundance kendrick cluster = %s" +159 % ( +160 upper_limit, +161 max(mass_spectrum_obj, key=lambda m: m.abundance).abundance, +162 ) +163 ) +164 +165 mspeak_most_abundant = max( +166 mass_spectrum_obj, +167 key=lambda m: m.abundance if m.abundance <= upper_limit else 0, +168 ) +169 +170 print("Searching molecular formulas") +171 +172 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( +173 [mspeak_most_abundant] +174 ) +175 +176 print("Finished searching molecular formulas") +177 +178 if mspeak_most_abundant: +179 return mspeak_most_abundant.best_molecular_formula_candidate 180 -181 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks([mspeak_most_abundant]) -182 -183 if mspeak_most_abundant: -184 -185 return mspeak_most_abundant.best_molecular_formula_candidate -186 -187 else: -188 raise Exception("Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"%mspeak_most_abundant.mz_exp ) -189 #return the first option -190 #return mspeak_most_abundant[0] -191 -192 def find_series_mspeaks(self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14): -193 """ Find a series of abundant peaks in the mass spectrum for a given molecular formula -194 -195 Parameters -196 ---------- -197 mass_spectrum_obj : MassSpec class -198 Mass spectrum object -199 molecular_formula_obj_reference : MolecularFormula class -200 Molecular formula object -201 deltamz : float -202 delta m/z to look for peaks -203 -204 Returns -205 ---------- -206 list -207 list of MSpeak class objects -208 """ -209 abundances = mass_spectrum_obj.abundance -210 abun_mean = average(abundances, axis=0) -211 abun_std = std(abundances, axis=0) -212 upper_limit = abun_mean + 7* abun_std -213 -214 list_most_abundant_peaks = list() -215 -216 min_mz = mass_spectrum_obj.min_mz_exp -217 -218 max_mz = mass_spectrum_obj.max_mz_exp -219 -220 initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc -221 -222 mass = initial_nominal_mass -223 -224 nominal_masses = [] -225 while mass <= max_mz: -226 #print "shit 1", mass, min_mz -227 mass += (deltamz) -228 nominal_masses.append(mass) -229 -230 mass = initial_nominal_mass -231 while mass >= min_mz: -232 #print "shit 1", mass, min_mz -233 mass -= (deltamz) -234 nominal_masses.append(mass) -235 -236 nominal_masses = sorted(nominal_masses) -237 -238 for nominal_mass in nominal_masses: -239 -240 first_index, last_index = mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass) -241 -242 ms_peaks = mass_spectrum_obj[first_index:last_index] -243 -244 if ms_peaks: -245 # -246 #print (nominal_mass, first_index, -247 # last_index, -248 # mass_spectrum_obj[first_index].mz_exp, -249 # mass_spectrum_obj[last_index].mz_exp -250 # ) -251 # -252 -253 mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance if m.abundance <= upper_limit else 0) -254 -255 #mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance) -256 -257 list_most_abundant_peaks.append(mspeak_most_abundant) -258 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: -259 print('Start molecular formula search') -260 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(list_most_abundant_peaks) -261 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: -262 print('Done molecular formula search') -263 return [mspeak for mspeak in list_most_abundant_peaks if mspeak] -264 -265 -266 def get_list_found_peaks(self): -267 """ Get the list of found peaks -268 -269 Returns -270 ---------- -271 list -272 list of MSpeak class objects -273 """ -274 return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp) +181 else: +182 raise Exception( +183 "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f" +184 % mspeak_most_abundant.mz_exp +185 ) +186 +187 # return the first option +188 # return mspeak_most_abundant[0] +189 +190 def find_most_abundant_formula_test(self, mass_spectrum_obj, settings): +191 """[Test function] Find the most abundant formula in the mass spectrum +192 +193 Parameters +194 ---------- +195 mass_spectrum_obj : MassSpec class +196 Mass spectrum object +197 settings : MolecularSearchSettings class +198 Molecular search settings object +199 +200 Returns +201 ---------- +202 MolecularFormula class obj +203 most abundant MolecularFormula with the lowest mass error +204 +205 """ +206 # this function is intended for test only. +207 # Have to sort by Kendrick to be able to select the most abundant series +208 # then select the most abundant peak inside the series +209 # or have the user select the reference mspeak on the gui +210 +211 mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak +212 +213 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( +214 [mspeak_most_abundant] +215 ) +216 +217 if mspeak_most_abundant: +218 return mspeak_most_abundant.best_molecular_formula_candidate +219 +220 else: +221 raise Exception( +222 "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f" +223 % mspeak_most_abundant.mz_exp +224 ) +225 # return the first option +226 # return mspeak_most_abundant[0] +227 +228 def find_series_mspeaks( +229 self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14 +230 ): +231 """Find a series of abundant peaks in the mass spectrum for a given molecular formula +232 +233 Parameters +234 ---------- +235 mass_spectrum_obj : MassSpec class +236 Mass spectrum object +237 molecular_formula_obj_reference : MolecularFormula class +238 Molecular formula object +239 deltamz : float +240 delta m/z to look for peaks +241 +242 Returns +243 ---------- +244 list +245 list of MSpeak class objects +246 """ +247 abundances = mass_spectrum_obj.abundance +248 abun_mean = average(abundances, axis=0) +249 abun_std = std(abundances, axis=0) +250 upper_limit = abun_mean + 7 * abun_std +251 +252 list_most_abundant_peaks = list() +253 +254 min_mz = mass_spectrum_obj.min_mz_exp +255 +256 max_mz = mass_spectrum_obj.max_mz_exp +257 +258 initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc +259 +260 mass = initial_nominal_mass +261 +262 nominal_masses = [] +263 while mass <= max_mz: +264 # print "shit 1", mass, min_mz +265 mass += deltamz +266 nominal_masses.append(mass) +267 +268 mass = initial_nominal_mass +269 while mass >= min_mz: +270 # print "shit 1", mass, min_mz +271 mass -= deltamz +272 nominal_masses.append(mass) +273 +274 nominal_masses = sorted(nominal_masses) 275 -276 def set_mass_spec_indexes_by_found_peaks(self): -277 """ Set the mass spectrum to interate over only the selected indexes. -278 -279 Notes -280 ---------- -281 Warning!!!! -282 set the mass spectrum to interate over only the selected indexes -283 don not forget to call mass_spectrum_obj.reset_indexes after the job is done -284 """ -285 -286 indexes = [msp.index for msp in self.list_found_mspeaks] -287 self.mass_spectrum_obj.set_indexes(indexes) -288 -289 +276 for nominal_mass in nominal_masses: +277 first_index, last_index = ( +278 mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass) +279 ) +280 +281 ms_peaks = mass_spectrum_obj[first_index:last_index] +282 +283 if ms_peaks: +284 # +285 # print (nominal_mass, first_index, +286 # last_index, +287 # mass_spectrum_obj[first_index].mz_exp, +288 # mass_spectrum_obj[last_index].mz_exp +289 # ) +290 # +291 +292 mspeak_most_abundant = max( +293 ms_peaks, +294 key=lambda m: m.abundance if m.abundance <= upper_limit else 0, +295 ) +296 +297 # mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance) +298 +299 list_most_abundant_peaks.append(mspeak_most_abundant) +300 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: +301 print("Start molecular formula search") +302 SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks( +303 list_most_abundant_peaks +304 ) +305 if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing: +306 print("Done molecular formula search") +307 return [mspeak for mspeak in list_most_abundant_peaks if mspeak] +308 +309 def get_list_found_peaks(self): +310 """Get the list of found peaks +311 +312 Returns +313 ---------- +314 list +315 list of MSpeak class objects +316 """ +317 return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp) +318 +319 def set_mass_spec_indexes_by_found_peaks(self): +320 """Set the mass spectrum to interate over only the selected indexes. +321 +322 Notes +323 ---------- +324 Warning!!!! +325 set the mass spectrum to interate over only the selected indexes +326 don not forget to call mass_spectrum_obj.reset_indexes after the job is done +327 """ +328 +329 indexes = [msp.index for msp in self.list_found_mspeaks] +330 self.mass_spectrum_obj.set_indexes(indexes)

    @@ -391,289 +432,330 @@

    -
     13class FindOxygenPeaks(Thread):
    - 14    """ Class to find Oxygen peaks in a mass spectrum for formula assignment search
    - 15     
    - 16        Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter
    - 17        Returns a list of MSPeak class containing the possible Molecular Formula class objects.  
    - 18        
    - 19        Parameters
    - 20        ----------
    - 21        mass_spectrum_obj : MassSpec class
    - 22            This is where we store MassSpec class obj,   
    - 23        
    - 24        lookupTableSettings:  MolecularLookupTableSettings class
    - 25            This is where we store MolecularLookupTableSettings class obj
    - 26        
    - 27        min_O , max_O : int
    - 28            minium and maximum of Oxygen to allow the software to look for
    - 29            it will override the settings at lookupTableSettings.usedAtoms
    - 30            default min = 1, max = 22
    - 31
    - 32        Attributes
    - 33        ----------
    - 34        mass_spectrum_obj : MassSpec class
    - 35            This is where we store MassSpec class obj,   
    - 36        lookupTableSettings:  MolecularLookupTableSettings class
    - 37            This is where we store MolecularLookupTableSettings class obj
    - 38        
    - 39        Methods
    - 40        ----------
    - 41        * run().
    - 42                will be called when the instantiated class method start is called  
    - 43        * get_list_found_peaks().
    - 44                returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak
    - 45                for more details of the structure see MSPeak class and MolecularFormula class     
    - 46        * set_mass_spec_indexes_by_found_peaks().
    - 47                set the mass spectrum to interate over only the selected indexes  
    - 48    """
    - 49    def __init__(self, mass_spectrum_obj, sql_db : bool = False, min_O :int = 1, max_O : int= 22) :
    - 50        
    - 51        Thread.__init__(self)
    - 52        
    - 53        self.mass_spectrum_obj = mass_spectrum_obj
    - 54        self.min_0 = min_O
    - 55        self.max_O = max_O
    - 56        
    - 57        if not sql_db:
    - 58            
    - 59            self.sql_db = MolForm_SQL(mass_spectrum_obj.molecular_search_settings.url_database)
    - 60        else:
    - 61
    - 62            self.sql_db = sql_db    
    - 63    
    - 64    def run(self):
    - 65        """ Run the thread
    - 66        """
    - 67        #save initial settings min peaks per class filter 
    - 68        initial_min_peak_bool = deepcopy(self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter)
    - 69
    - 70        #deactivate the usage of min peaks per class filter
    - 71        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
    - 72
    - 73        #save initial settings for Ox 
    - 74        initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'])
    - 75
    - 76        #resets the used atoms to look only for oxygen organic compounds
    - 77        self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'] =  (self.min_0, self.max_O)
    - 78        
    - 79        self.list_found_mspeaks = []
    - 80
    - 81        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    - 82        
    - 83        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    - 84        
    - 85        # needs to be wrapped inside the mass_spec class
    - 86        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    - 87        
    - 88        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    - 89            print("Start most abundant mass spectral peak search") 
    - 90        molecular_formula_obj_reference = self.find_most_abundant_formula(self.mass_spectrum_obj)
    - 91        
    - 92        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    - 93            print("Select most abundant peak with molecular formula =  %s with a m/z error of %s ppm" % (molecular_formula_obj_reference.string, molecular_formula_obj_reference.mz_error))
    - 94            print("Started mass spectral peak series search")
    - 95
    - 96        self.list_found_mspeaks = self.find_series_mspeaks(self.mass_spectrum_obj,
    - 97                                                           molecular_formula_obj_reference, 
    - 98                                                           deltamz=14)
    - 99        
    -100        # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick())
    -101        
    -102        self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'] =  initial_ox
    -103        
    -104        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = initial_min_peak_bool
    -105        
    -106        self.mass_spectrum_obj.reset_indexes()
    -107
    -108        self.mass_spectrum_obj.filter_by_noise_threshold()
    -109        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -110            print("Done with mass spectral peak series search")
    -111
    -112        self.sql_db.close()
    +            
     15class FindOxygenPeaks(Thread):
    + 16    """Class to find Oxygen peaks in a mass spectrum for formula assignment search
    + 17
    + 18    Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter
    + 19    Returns a list of MSPeak class containing the possible Molecular Formula class objects.
    + 20
    + 21    Parameters
    + 22    ----------
    + 23    mass_spectrum_obj : MassSpec class
    + 24        This is where we store MassSpec class obj,
    + 25
    + 26    lookupTableSettings:  MolecularLookupTableSettings class
    + 27        This is where we store MolecularLookupTableSettings class obj
    + 28
    + 29    min_O , max_O : int
    + 30        minium and maximum of Oxygen to allow the software to look for
    + 31        it will override the settings at lookupTableSettings.usedAtoms
    + 32        default min = 1, max = 22
    + 33
    + 34    Attributes
    + 35    ----------
    + 36    mass_spectrum_obj : MassSpec class
    + 37        This is where we store MassSpec class obj,
    + 38    lookupTableSettings:  MolecularLookupTableSettings class
    + 39        This is where we store MolecularLookupTableSettings class obj
    + 40
    + 41    Methods
    + 42    ----------
    + 43    * run().
    + 44            will be called when the instantiated class method start is called
    + 45    * get_list_found_peaks().
    + 46            returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak
    + 47            for more details of the structure see MSPeak class and MolecularFormula class
    + 48    * set_mass_spec_indexes_by_found_peaks().
    + 49            set the mass spectrum to interate over only the selected indexes
    + 50    """
    + 51
    + 52    def __init__(
    + 53        self, mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22
    + 54    ):
    + 55        Thread.__init__(self)
    + 56
    + 57        self.mass_spectrum_obj = mass_spectrum_obj
    + 58        self.min_0 = min_O
    + 59        self.max_O = max_O
    + 60
    + 61        if not sql_db:
    + 62            self.sql_db = MolForm_SQL(
    + 63                mass_spectrum_obj.molecular_search_settings.url_database
    + 64            )
    + 65        else:
    + 66            self.sql_db = sql_db
    + 67
    + 68    def run(self):
    + 69        """Run the thread"""
    + 70        # save initial settings min peaks per class filter
    + 71        initial_min_peak_bool = deepcopy(
    + 72            self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter
    + 73        )
    + 74
    + 75        # deactivate the usage of min peaks per class filter
    + 76        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
    + 77
    + 78        # save initial settings for Ox
    + 79        initial_ox = deepcopy(
    + 80            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"]
    + 81        )
    + 82
    + 83        # resets the used atoms to look only for oxygen organic compounds
    + 84        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
    + 85            self.min_0,
    + 86            self.max_O,
    + 87        )
    + 88
    + 89        self.list_found_mspeaks = []
    + 90
    + 91        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    + 92
    + 93        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    + 94
    + 95        # needs to be wrapped inside the mass_spec class
    + 96        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    + 97
    + 98        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    + 99            print("Start most abundant mass spectral peak search")
    +100        molecular_formula_obj_reference = self.find_most_abundant_formula(
    +101            self.mass_spectrum_obj
    +102        )
    +103
    +104        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +105            print(
    +106                "Select most abundant peak with molecular formula =  %s with a m/z error of %s ppm"
    +107                % (
    +108                    molecular_formula_obj_reference.string,
    +109                    molecular_formula_obj_reference.mz_error,
    +110                )
    +111            )
    +112            print("Started mass spectral peak series search")
     113
    -114    def find_most_abundant_formula(self, mass_spectrum_obj):
    -115        """ Find the most abundant formula in the mass spectrum
    -116
    -117        Parameters
    -118        ----------
    -119        mass_spectrum_obj : MassSpec class
    -120            Mass spectrum object
    +114        self.list_found_mspeaks = self.find_series_mspeaks(
    +115            self.mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
    +116        )
    +117
    +118        # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick())
    +119
    +120        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = initial_ox
     121
    -122        Returns
    -123        ----------
    -124        MolecularFormula class obj
    -125            most abundant MolecularFormula with the lowest mass error
    -126        """
    -127        #need to find a better way to cut off outliners
    -128        #import matplotlib.pyplot as plt
    -129        #plt.hist(mass_spectrum_obj.abundance, bins=100)
    -130        #plt.show()
    -131        
    -132        abundances =  mass_spectrum_obj.abundance
    -133        abun_mean = average(abundances, axis=0)
    -134        abun_std = std(abundances, axis=0)
    -135        
    -136        upper_limit = abun_mean + 7* abun_std
    -137        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -138            print("Maximum abundance limit  = %s and max abundance kendrick cluster = %s"  % (upper_limit, max(mass_spectrum_obj, key=lambda m: m.abundance).abundance))
    -139        
    -140        mspeak_most_abundant = max(mass_spectrum_obj, key=lambda m: m.abundance if m.abundance <= upper_limit else 0)
    +122        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = (
    +123            initial_min_peak_bool
    +124        )
    +125
    +126        self.mass_spectrum_obj.reset_indexes()
    +127
    +128        self.mass_spectrum_obj.filter_by_noise_threshold()
    +129        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +130            print("Done with mass spectral peak series search")
    +131
    +132        self.sql_db.close()
    +133
    +134    def find_most_abundant_formula(self, mass_spectrum_obj):
    +135        """Find the most abundant formula in the mass spectrum
    +136
    +137        Parameters
    +138        ----------
    +139        mass_spectrum_obj : MassSpec class
    +140            Mass spectrum object
     141
    -142        print("Searching molecular formulas")
    -143
    -144        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks([mspeak_most_abundant])
    -145        
    -146        print("Finished searching molecular formulas")
    -147
    -148        if mspeak_most_abundant:
    -149
    -150            return mspeak_most_abundant.best_molecular_formula_candidate
    -151        
    -152        else:
    -153        
    -154            raise Exception("Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"%mspeak_most_abundant.mz_exp )
    -155        
    -156        #return the first option
    -157        #return mspeak_most_abundant[0]
    -158
    -159    def find_most_abundant_formula_test(self, mass_spectrum_obj, settings):
    -160        """ [Test function] Find the most abundant formula in the mass spectrum
    -161        
    -162        Parameters
    -163        ----------
    -164        mass_spectrum_obj : MassSpec class
    -165            Mass spectrum object
    -166        settings : MolecularSearchSettings class
    -167            Molecular search settings object
    -168        
    -169        Returns
    -170        ----------
    -171        MolecularFormula class obj
    -172            most abundant MolecularFormula with the lowest mass error
    -173        
    -174        """
    -175        #this function is intended for test only. 
    -176        # Have to sort by Kendrick to be able to select the most abundant series 
    -177        #then select the most abundant peak inside the series
    -178        #or have the user select the reference mspeak on the gui
    -179
    -180        mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak
    +142        Returns
    +143        ----------
    +144        MolecularFormula class obj
    +145            most abundant MolecularFormula with the lowest mass error
    +146        """
    +147        # need to find a better way to cut off outliners
    +148        # import matplotlib.pyplot as plt
    +149        # plt.hist(mass_spectrum_obj.abundance, bins=100)
    +150        # plt.show()
    +151
    +152        abundances = mass_spectrum_obj.abundance
    +153        abun_mean = average(abundances, axis=0)
    +154        abun_std = std(abundances, axis=0)
    +155
    +156        upper_limit = abun_mean + 7 * abun_std
    +157        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +158            print(
    +159                "Maximum abundance limit  = %s and max abundance kendrick cluster = %s"
    +160                % (
    +161                    upper_limit,
    +162                    max(mass_spectrum_obj, key=lambda m: m.abundance).abundance,
    +163                )
    +164            )
    +165
    +166        mspeak_most_abundant = max(
    +167            mass_spectrum_obj,
    +168            key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
    +169        )
    +170
    +171        print("Searching molecular formulas")
    +172
    +173        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
    +174            [mspeak_most_abundant]
    +175        )
    +176
    +177        print("Finished searching molecular formulas")
    +178
    +179        if mspeak_most_abundant:
    +180            return mspeak_most_abundant.best_molecular_formula_candidate
     181
    -182        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks([mspeak_most_abundant])
    -183        
    -184        if mspeak_most_abundant:
    -185
    -186            return mspeak_most_abundant.best_molecular_formula_candidate 
    -187            
    -188        else:
    -189            raise Exception("Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"%mspeak_most_abundant.mz_exp )
    -190        #return the first option
    -191        #return mspeak_most_abundant[0]
    -192    
    -193    def find_series_mspeaks(self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14):
    -194        """ Find a series of abundant peaks in the mass spectrum for a given molecular formula
    -195        
    -196        Parameters
    -197        ----------
    -198        mass_spectrum_obj : MassSpec class
    -199            Mass spectrum object
    -200        molecular_formula_obj_reference : MolecularFormula class
    -201            Molecular formula object
    -202        deltamz : float
    -203            delta m/z to look for peaks
    -204            
    -205        Returns
    -206        ----------
    -207        list
    -208            list of MSpeak class objects
    -209        """
    -210        abundances =  mass_spectrum_obj.abundance
    -211        abun_mean = average(abundances, axis=0)
    -212        abun_std = std(abundances, axis=0)
    -213        upper_limit = abun_mean + 7* abun_std
    -214       
    -215        list_most_abundant_peaks = list()
    -216
    -217        min_mz = mass_spectrum_obj.min_mz_exp
    -218        
    -219        max_mz = mass_spectrum_obj.max_mz_exp
    -220        
    -221        initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc
    -222        
    -223        mass = initial_nominal_mass
    -224        
    -225        nominal_masses = []
    -226        while mass <= max_mz:
    -227            #print "shit 1", mass, min_mz
    -228            mass += (deltamz) 
    -229            nominal_masses.append(mass)
    -230        
    -231        mass = initial_nominal_mass    
    -232        while mass >= min_mz:
    -233            #print "shit 1", mass, min_mz
    -234            mass -= (deltamz) 
    -235            nominal_masses.append(mass)
    -236        
    -237        nominal_masses = sorted(nominal_masses)
    -238        
    -239        for nominal_mass in nominal_masses:
    -240            
    -241            first_index, last_index = mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass)
    -242            
    -243            ms_peaks = mass_spectrum_obj[first_index:last_index]
    -244            
    -245            if ms_peaks:   
    -246                #   
    -247                #print (nominal_mass, first_index, 
    -248                #    last_index, 
    -249                #    mass_spectrum_obj[first_index].mz_exp,
    -250                #    mass_spectrum_obj[last_index].mz_exp
    -251                #    )
    -252                #
    -253                
    -254                mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance if m.abundance <= upper_limit else 0)
    -255
    -256                #mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance)
    -257                
    -258                list_most_abundant_peaks.append(mspeak_most_abundant)
    -259        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -260            print('Start molecular formula search')
    -261        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(list_most_abundant_peaks)
    -262        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -263            print('Done molecular formula search')
    -264        return [mspeak for mspeak in list_most_abundant_peaks if mspeak]            
    -265                
    -266    
    -267    def get_list_found_peaks(self):
    -268        """ Get the list of found peaks
    -269        
    -270        Returns
    -271        ----------
    -272        list
    -273            list of MSpeak class objects
    -274        """
    -275        return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp)
    +182        else:
    +183            raise Exception(
    +184                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
    +185                % mspeak_most_abundant.mz_exp
    +186            )
    +187
    +188        # return the first option
    +189        # return mspeak_most_abundant[0]
    +190
    +191    def find_most_abundant_formula_test(self, mass_spectrum_obj, settings):
    +192        """[Test function] Find the most abundant formula in the mass spectrum
    +193
    +194        Parameters
    +195        ----------
    +196        mass_spectrum_obj : MassSpec class
    +197            Mass spectrum object
    +198        settings : MolecularSearchSettings class
    +199            Molecular search settings object
    +200
    +201        Returns
    +202        ----------
    +203        MolecularFormula class obj
    +204            most abundant MolecularFormula with the lowest mass error
    +205
    +206        """
    +207        # this function is intended for test only.
    +208        # Have to sort by Kendrick to be able to select the most abundant series
    +209        # then select the most abundant peak inside the series
    +210        # or have the user select the reference mspeak on the gui
    +211
    +212        mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak
    +213
    +214        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
    +215            [mspeak_most_abundant]
    +216        )
    +217
    +218        if mspeak_most_abundant:
    +219            return mspeak_most_abundant.best_molecular_formula_candidate
    +220
    +221        else:
    +222            raise Exception(
    +223                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
    +224                % mspeak_most_abundant.mz_exp
    +225            )
    +226        # return the first option
    +227        # return mspeak_most_abundant[0]
    +228
    +229    def find_series_mspeaks(
    +230        self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
    +231    ):
    +232        """Find a series of abundant peaks in the mass spectrum for a given molecular formula
    +233
    +234        Parameters
    +235        ----------
    +236        mass_spectrum_obj : MassSpec class
    +237            Mass spectrum object
    +238        molecular_formula_obj_reference : MolecularFormula class
    +239            Molecular formula object
    +240        deltamz : float
    +241            delta m/z to look for peaks
    +242
    +243        Returns
    +244        ----------
    +245        list
    +246            list of MSpeak class objects
    +247        """
    +248        abundances = mass_spectrum_obj.abundance
    +249        abun_mean = average(abundances, axis=0)
    +250        abun_std = std(abundances, axis=0)
    +251        upper_limit = abun_mean + 7 * abun_std
    +252
    +253        list_most_abundant_peaks = list()
    +254
    +255        min_mz = mass_spectrum_obj.min_mz_exp
    +256
    +257        max_mz = mass_spectrum_obj.max_mz_exp
    +258
    +259        initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc
    +260
    +261        mass = initial_nominal_mass
    +262
    +263        nominal_masses = []
    +264        while mass <= max_mz:
    +265            # print "shit 1", mass, min_mz
    +266            mass += deltamz
    +267            nominal_masses.append(mass)
    +268
    +269        mass = initial_nominal_mass
    +270        while mass >= min_mz:
    +271            # print "shit 1", mass, min_mz
    +272            mass -= deltamz
    +273            nominal_masses.append(mass)
    +274
    +275        nominal_masses = sorted(nominal_masses)
     276
    -277    def set_mass_spec_indexes_by_found_peaks(self):
    -278        """ Set the mass spectrum to interate over only the selected indexes.
    -279
    -280        Notes
    -281        ----------
    -282        Warning!!!!
    -283        set the mass spectrum to interate over only the selected indexes
    -284        don not forget to call mass_spectrum_obj.reset_indexes after the job is done
    -285        """
    -286        
    -287        indexes = [msp.index for msp in self.list_found_mspeaks]
    -288        self.mass_spectrum_obj.set_indexes(indexes)
    +277        for nominal_mass in nominal_masses:
    +278            first_index, last_index = (
    +279                mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass)
    +280            )
    +281
    +282            ms_peaks = mass_spectrum_obj[first_index:last_index]
    +283
    +284            if ms_peaks:
    +285                #
    +286                # print (nominal_mass, first_index,
    +287                #    last_index,
    +288                #    mass_spectrum_obj[first_index].mz_exp,
    +289                #    mass_spectrum_obj[last_index].mz_exp
    +290                #    )
    +291                #
    +292
    +293                mspeak_most_abundant = max(
    +294                    ms_peaks,
    +295                    key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
    +296                )
    +297
    +298                # mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance)
    +299
    +300                list_most_abundant_peaks.append(mspeak_most_abundant)
    +301        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +302            print("Start molecular formula search")
    +303        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
    +304            list_most_abundant_peaks
    +305        )
    +306        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +307            print("Done molecular formula search")
    +308        return [mspeak for mspeak in list_most_abundant_peaks if mspeak]
    +309
    +310    def get_list_found_peaks(self):
    +311        """Get the list of found peaks
    +312
    +313        Returns
    +314        ----------
    +315        list
    +316            list of MSpeak class objects
    +317        """
    +318        return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp)
    +319
    +320    def set_mass_spec_indexes_by_found_peaks(self):
    +321        """Set the mass spectrum to interate over only the selected indexes.
    +322
    +323        Notes
    +324        ----------
    +325        Warning!!!!
    +326        set the mass spectrum to interate over only the selected indexes
    +327        don not forget to call mass_spectrum_obj.reset_indexes after the job is done
    +328        """
    +329
    +330        indexes = [msp.index for msp in self.list_found_mspeaks]
    +331        self.mass_spectrum_obj.set_indexes(indexes)
     

    Class to find Oxygen peaks in a mass spectrum for formula assignment search

    Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter -Returns a list of MSPeak class containing the possible Molecular Formula class objects.

    +Returns a list of MSPeak class containing the possible Molecular Formula class objects.

    Parameters
    @@ -701,10 +783,10 @@
    Methods
    • run(). -will be called when the instantiated class method start is called
    • +will be called when the instantiated class method start is called
    • get_list_found_peaks(). returns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak -for more details of the structure see MSPeak class and MolecularFormula class
    • +for more details of the structure see MSPeak class and MolecularFormula class
    • set_mass_spec_indexes_by_found_peaks(). set the mass spectrum to interate over only the selected indexes
    @@ -721,20 +803,21 @@
    Methods
    -
    49    def __init__(self, mass_spectrum_obj, sql_db : bool = False, min_O :int = 1, max_O : int= 22) :
    -50        
    -51        Thread.__init__(self)
    -52        
    -53        self.mass_spectrum_obj = mass_spectrum_obj
    -54        self.min_0 = min_O
    -55        self.max_O = max_O
    -56        
    -57        if not sql_db:
    -58            
    -59            self.sql_db = MolForm_SQL(mass_spectrum_obj.molecular_search_settings.url_database)
    -60        else:
    -61
    -62            self.sql_db = sql_db    
    +            
    52    def __init__(
    +53        self, mass_spectrum_obj, sql_db: bool = False, min_O: int = 1, max_O: int = 22
    +54    ):
    +55        Thread.__init__(self)
    +56
    +57        self.mass_spectrum_obj = mass_spectrum_obj
    +58        self.min_0 = min_O
    +59        self.max_O = max_O
    +60
    +61        if not sql_db:
    +62            self.sql_db = MolForm_SQL(
    +63                mass_spectrum_obj.molecular_search_settings.url_database
    +64            )
    +65        else:
    +66            self.sql_db = sql_db
     
    @@ -805,55 +888,71 @@
    Methods
    -
     64    def run(self):
    - 65        """ Run the thread
    - 66        """
    - 67        #save initial settings min peaks per class filter 
    - 68        initial_min_peak_bool = deepcopy(self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter)
    - 69
    - 70        #deactivate the usage of min peaks per class filter
    - 71        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
    - 72
    - 73        #save initial settings for Ox 
    - 74        initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'])
    - 75
    - 76        #resets the used atoms to look only for oxygen organic compounds
    - 77        self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'] =  (self.min_0, self.max_O)
    - 78        
    - 79        self.list_found_mspeaks = []
    - 80
    - 81        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    - 82        
    - 83        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    - 84        
    - 85        # needs to be wrapped inside the mass_spec class
    - 86        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    - 87        
    - 88        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    - 89            print("Start most abundant mass spectral peak search") 
    - 90        molecular_formula_obj_reference = self.find_most_abundant_formula(self.mass_spectrum_obj)
    - 91        
    - 92        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    - 93            print("Select most abundant peak with molecular formula =  %s with a m/z error of %s ppm" % (molecular_formula_obj_reference.string, molecular_formula_obj_reference.mz_error))
    - 94            print("Started mass spectral peak series search")
    - 95
    - 96        self.list_found_mspeaks = self.find_series_mspeaks(self.mass_spectrum_obj,
    - 97                                                           molecular_formula_obj_reference, 
    - 98                                                           deltamz=14)
    - 99        
    -100        # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick())
    -101        
    -102        self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'] =  initial_ox
    -103        
    -104        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = initial_min_peak_bool
    -105        
    -106        self.mass_spectrum_obj.reset_indexes()
    -107
    -108        self.mass_spectrum_obj.filter_by_noise_threshold()
    -109        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -110            print("Done with mass spectral peak series search")
    -111
    -112        self.sql_db.close()
    +            
     68    def run(self):
    + 69        """Run the thread"""
    + 70        # save initial settings min peaks per class filter
    + 71        initial_min_peak_bool = deepcopy(
    + 72            self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter
    + 73        )
    + 74
    + 75        # deactivate the usage of min peaks per class filter
    + 76        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
    + 77
    + 78        # save initial settings for Ox
    + 79        initial_ox = deepcopy(
    + 80            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"]
    + 81        )
    + 82
    + 83        # resets the used atoms to look only for oxygen organic compounds
    + 84        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
    + 85            self.min_0,
    + 86            self.max_O,
    + 87        )
    + 88
    + 89        self.list_found_mspeaks = []
    + 90
    + 91        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    + 92
    + 93        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    + 94
    + 95        # needs to be wrapped inside the mass_spec class
    + 96        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    + 97
    + 98        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    + 99            print("Start most abundant mass spectral peak search")
    +100        molecular_formula_obj_reference = self.find_most_abundant_formula(
    +101            self.mass_spectrum_obj
    +102        )
    +103
    +104        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +105            print(
    +106                "Select most abundant peak with molecular formula =  %s with a m/z error of %s ppm"
    +107                % (
    +108                    molecular_formula_obj_reference.string,
    +109                    molecular_formula_obj_reference.mz_error,
    +110                )
    +111            )
    +112            print("Started mass spectral peak series search")
    +113
    +114        self.list_found_mspeaks = self.find_series_mspeaks(
    +115            self.mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
    +116        )
    +117
    +118        # reset indexes after done with operation that includes a filter (i.e. ClusteringFilter().filter_kendrick())
    +119
    +120        self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = initial_ox
    +121
    +122        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = (
    +123            initial_min_peak_bool
    +124        )
    +125
    +126        self.mass_spectrum_obj.reset_indexes()
    +127
    +128        self.mass_spectrum_obj.filter_by_noise_threshold()
    +129        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +130            print("Done with mass spectral peak series search")
    +131
    +132        self.sql_db.close()
     
    @@ -873,50 +972,62 @@
    Methods
    -
    114    def find_most_abundant_formula(self, mass_spectrum_obj):
    -115        """ Find the most abundant formula in the mass spectrum
    -116
    -117        Parameters
    -118        ----------
    -119        mass_spectrum_obj : MassSpec class
    -120            Mass spectrum object
    -121
    -122        Returns
    -123        ----------
    -124        MolecularFormula class obj
    -125            most abundant MolecularFormula with the lowest mass error
    -126        """
    -127        #need to find a better way to cut off outliners
    -128        #import matplotlib.pyplot as plt
    -129        #plt.hist(mass_spectrum_obj.abundance, bins=100)
    -130        #plt.show()
    -131        
    -132        abundances =  mass_spectrum_obj.abundance
    -133        abun_mean = average(abundances, axis=0)
    -134        abun_std = std(abundances, axis=0)
    -135        
    -136        upper_limit = abun_mean + 7* abun_std
    -137        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -138            print("Maximum abundance limit  = %s and max abundance kendrick cluster = %s"  % (upper_limit, max(mass_spectrum_obj, key=lambda m: m.abundance).abundance))
    -139        
    -140        mspeak_most_abundant = max(mass_spectrum_obj, key=lambda m: m.abundance if m.abundance <= upper_limit else 0)
    +            
    134    def find_most_abundant_formula(self, mass_spectrum_obj):
    +135        """Find the most abundant formula in the mass spectrum
    +136
    +137        Parameters
    +138        ----------
    +139        mass_spectrum_obj : MassSpec class
    +140            Mass spectrum object
     141
    -142        print("Searching molecular formulas")
    -143
    -144        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks([mspeak_most_abundant])
    -145        
    -146        print("Finished searching molecular formulas")
    -147
    -148        if mspeak_most_abundant:
    -149
    -150            return mspeak_most_abundant.best_molecular_formula_candidate
    -151        
    -152        else:
    -153        
    -154            raise Exception("Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"%mspeak_most_abundant.mz_exp )
    -155        
    -156        #return the first option
    -157        #return mspeak_most_abundant[0]
    +142        Returns
    +143        ----------
    +144        MolecularFormula class obj
    +145            most abundant MolecularFormula with the lowest mass error
    +146        """
    +147        # need to find a better way to cut off outliners
    +148        # import matplotlib.pyplot as plt
    +149        # plt.hist(mass_spectrum_obj.abundance, bins=100)
    +150        # plt.show()
    +151
    +152        abundances = mass_spectrum_obj.abundance
    +153        abun_mean = average(abundances, axis=0)
    +154        abun_std = std(abundances, axis=0)
    +155
    +156        upper_limit = abun_mean + 7 * abun_std
    +157        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +158            print(
    +159                "Maximum abundance limit  = %s and max abundance kendrick cluster = %s"
    +160                % (
    +161                    upper_limit,
    +162                    max(mass_spectrum_obj, key=lambda m: m.abundance).abundance,
    +163                )
    +164            )
    +165
    +166        mspeak_most_abundant = max(
    +167            mass_spectrum_obj,
    +168            key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
    +169        )
    +170
    +171        print("Searching molecular formulas")
    +172
    +173        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
    +174            [mspeak_most_abundant]
    +175        )
    +176
    +177        print("Finished searching molecular formulas")
    +178
    +179        if mspeak_most_abundant:
    +180            return mspeak_most_abundant.best_molecular_formula_candidate
    +181
    +182        else:
    +183            raise Exception(
    +184                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
    +185                % mspeak_most_abundant.mz_exp
    +186            )
    +187
    +188        # return the first option
    +189        # return mspeak_most_abundant[0]
     
    @@ -949,39 +1060,43 @@
    Returns
    -
    159    def find_most_abundant_formula_test(self, mass_spectrum_obj, settings):
    -160        """ [Test function] Find the most abundant formula in the mass spectrum
    -161        
    -162        Parameters
    -163        ----------
    -164        mass_spectrum_obj : MassSpec class
    -165            Mass spectrum object
    -166        settings : MolecularSearchSettings class
    -167            Molecular search settings object
    -168        
    -169        Returns
    -170        ----------
    -171        MolecularFormula class obj
    -172            most abundant MolecularFormula with the lowest mass error
    -173        
    -174        """
    -175        #this function is intended for test only. 
    -176        # Have to sort by Kendrick to be able to select the most abundant series 
    -177        #then select the most abundant peak inside the series
    -178        #or have the user select the reference mspeak on the gui
    -179
    -180        mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak
    -181
    -182        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks([mspeak_most_abundant])
    -183        
    -184        if mspeak_most_abundant:
    -185
    -186            return mspeak_most_abundant.best_molecular_formula_candidate 
    -187            
    -188        else:
    -189            raise Exception("Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"%mspeak_most_abundant.mz_exp )
    -190        #return the first option
    -191        #return mspeak_most_abundant[0]
    +            
    191    def find_most_abundant_formula_test(self, mass_spectrum_obj, settings):
    +192        """[Test function] Find the most abundant formula in the mass spectrum
    +193
    +194        Parameters
    +195        ----------
    +196        mass_spectrum_obj : MassSpec class
    +197            Mass spectrum object
    +198        settings : MolecularSearchSettings class
    +199            Molecular search settings object
    +200
    +201        Returns
    +202        ----------
    +203        MolecularFormula class obj
    +204            most abundant MolecularFormula with the lowest mass error
    +205
    +206        """
    +207        # this function is intended for test only.
    +208        # Have to sort by Kendrick to be able to select the most abundant series
    +209        # then select the most abundant peak inside the series
    +210        # or have the user select the reference mspeak on the gui
    +211
    +212        mspeak_most_abundant = mass_spectrum_obj.most_abundant_mspeak
    +213
    +214        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
    +215            [mspeak_most_abundant]
    +216        )
    +217
    +218        if mspeak_most_abundant:
    +219            return mspeak_most_abundant.best_molecular_formula_candidate
    +220
    +221        else:
    +222            raise Exception(
    +223                "Could not find a possible molecular formula match for the most abundant peak of m/z %.5f"
    +224                % mspeak_most_abundant.mz_exp
    +225            )
    +226        # return the first option
    +227        # return mspeak_most_abundant[0]
     
    @@ -1016,78 +1131,86 @@
    Returns
    -
    193    def find_series_mspeaks(self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14):
    -194        """ Find a series of abundant peaks in the mass spectrum for a given molecular formula
    -195        
    -196        Parameters
    -197        ----------
    -198        mass_spectrum_obj : MassSpec class
    -199            Mass spectrum object
    -200        molecular_formula_obj_reference : MolecularFormula class
    -201            Molecular formula object
    -202        deltamz : float
    -203            delta m/z to look for peaks
    -204            
    -205        Returns
    -206        ----------
    -207        list
    -208            list of MSpeak class objects
    -209        """
    -210        abundances =  mass_spectrum_obj.abundance
    -211        abun_mean = average(abundances, axis=0)
    -212        abun_std = std(abundances, axis=0)
    -213        upper_limit = abun_mean + 7* abun_std
    -214       
    -215        list_most_abundant_peaks = list()
    -216
    -217        min_mz = mass_spectrum_obj.min_mz_exp
    -218        
    -219        max_mz = mass_spectrum_obj.max_mz_exp
    -220        
    -221        initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc
    -222        
    -223        mass = initial_nominal_mass
    -224        
    -225        nominal_masses = []
    -226        while mass <= max_mz:
    -227            #print "shit 1", mass, min_mz
    -228            mass += (deltamz) 
    -229            nominal_masses.append(mass)
    -230        
    -231        mass = initial_nominal_mass    
    -232        while mass >= min_mz:
    -233            #print "shit 1", mass, min_mz
    -234            mass -= (deltamz) 
    -235            nominal_masses.append(mass)
    -236        
    -237        nominal_masses = sorted(nominal_masses)
    -238        
    -239        for nominal_mass in nominal_masses:
    -240            
    -241            first_index, last_index = mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass)
    -242            
    -243            ms_peaks = mass_spectrum_obj[first_index:last_index]
    -244            
    -245            if ms_peaks:   
    -246                #   
    -247                #print (nominal_mass, first_index, 
    -248                #    last_index, 
    -249                #    mass_spectrum_obj[first_index].mz_exp,
    -250                #    mass_spectrum_obj[last_index].mz_exp
    -251                #    )
    -252                #
    -253                
    -254                mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance if m.abundance <= upper_limit else 0)
    -255
    -256                #mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance)
    -257                
    -258                list_most_abundant_peaks.append(mspeak_most_abundant)
    -259        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -260            print('Start molecular formula search')
    -261        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(list_most_abundant_peaks)
    -262        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -263            print('Done molecular formula search')
    -264        return [mspeak for mspeak in list_most_abundant_peaks if mspeak]            
    +            
    229    def find_series_mspeaks(
    +230        self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14
    +231    ):
    +232        """Find a series of abundant peaks in the mass spectrum for a given molecular formula
    +233
    +234        Parameters
    +235        ----------
    +236        mass_spectrum_obj : MassSpec class
    +237            Mass spectrum object
    +238        molecular_formula_obj_reference : MolecularFormula class
    +239            Molecular formula object
    +240        deltamz : float
    +241            delta m/z to look for peaks
    +242
    +243        Returns
    +244        ----------
    +245        list
    +246            list of MSpeak class objects
    +247        """
    +248        abundances = mass_spectrum_obj.abundance
    +249        abun_mean = average(abundances, axis=0)
    +250        abun_std = std(abundances, axis=0)
    +251        upper_limit = abun_mean + 7 * abun_std
    +252
    +253        list_most_abundant_peaks = list()
    +254
    +255        min_mz = mass_spectrum_obj.min_mz_exp
    +256
    +257        max_mz = mass_spectrum_obj.max_mz_exp
    +258
    +259        initial_nominal_mass = molecular_formula_obj_reference.mz_nominal_calc
    +260
    +261        mass = initial_nominal_mass
    +262
    +263        nominal_masses = []
    +264        while mass <= max_mz:
    +265            # print "shit 1", mass, min_mz
    +266            mass += deltamz
    +267            nominal_masses.append(mass)
    +268
    +269        mass = initial_nominal_mass
    +270        while mass >= min_mz:
    +271            # print "shit 1", mass, min_mz
    +272            mass -= deltamz
    +273            nominal_masses.append(mass)
    +274
    +275        nominal_masses = sorted(nominal_masses)
    +276
    +277        for nominal_mass in nominal_masses:
    +278            first_index, last_index = (
    +279                mass_spectrum_obj.get_nominal_mz_first_last_indexes(nominal_mass)
    +280            )
    +281
    +282            ms_peaks = mass_spectrum_obj[first_index:last_index]
    +283
    +284            if ms_peaks:
    +285                #
    +286                # print (nominal_mass, first_index,
    +287                #    last_index,
    +288                #    mass_spectrum_obj[first_index].mz_exp,
    +289                #    mass_spectrum_obj[last_index].mz_exp
    +290                #    )
    +291                #
    +292
    +293                mspeak_most_abundant = max(
    +294                    ms_peaks,
    +295                    key=lambda m: m.abundance if m.abundance <= upper_limit else 0,
    +296                )
    +297
    +298                # mspeak_most_abundant = max(ms_peaks, key=lambda m: m.abundance)
    +299
    +300                list_most_abundant_peaks.append(mspeak_most_abundant)
    +301        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +302            print("Start molecular formula search")
    +303        SearchMolecularFormulas(mass_spectrum_obj, self.sql_db).run_worker_ms_peaks(
    +304            list_most_abundant_peaks
    +305        )
    +306        if mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +307            print("Done molecular formula search")
    +308        return [mspeak for mspeak in list_most_abundant_peaks if mspeak]
     
    @@ -1124,15 +1247,15 @@
    Returns
    -
    267    def get_list_found_peaks(self):
    -268        """ Get the list of found peaks
    -269        
    -270        Returns
    -271        ----------
    -272        list
    -273            list of MSpeak class objects
    -274        """
    -275        return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp)
    +            
    310    def get_list_found_peaks(self):
    +311        """Get the list of found peaks
    +312
    +313        Returns
    +314        ----------
    +315        list
    +316            list of MSpeak class objects
    +317        """
    +318        return sorted(self.list_found_mspeaks, key=lambda mp: mp.mz_exp)
     
    @@ -1158,18 +1281,18 @@
    Returns
    -
    277    def set_mass_spec_indexes_by_found_peaks(self):
    -278        """ Set the mass spectrum to interate over only the selected indexes.
    -279
    -280        Notes
    -281        ----------
    -282        Warning!!!!
    -283        set the mass spectrum to interate over only the selected indexes
    -284        don not forget to call mass_spectrum_obj.reset_indexes after the job is done
    -285        """
    -286        
    -287        indexes = [msp.index for msp in self.list_found_mspeaks]
    -288        self.mass_spectrum_obj.set_indexes(indexes)
    +            
    320    def set_mass_spec_indexes_by_found_peaks(self):
    +321        """Set the mass spectrum to interate over only the selected indexes.
    +322
    +323        Notes
    +324        ----------
    +325        Warning!!!!
    +326        set the mass spectrum to interate over only the selected indexes
    +327        don not forget to call mass_spectrum_obj.reset_indexes after the job is done
    +328        """
    +329
    +330        indexes = [msp.index for msp in self.list_found_mspeaks]
    +331        self.mass_spectrum_obj.set_indexes(indexes)
     
    diff --git a/docs/corems/molecular_id/search/lcms_spectral_search.html b/docs/corems/molecular_id/search/lcms_spectral_search.html index 3298beb4..639fa16f 100644 --- a/docs/corems/molecular_id/search/lcms_spectral_search.html +++ b/docs/corems/molecular_id/search/lcms_spectral_search.html @@ -257,114 +257,122 @@

    192 (self._ms[scan_oi].mz_exp, self._ms[scan_oi].abundance) 193 ).T, 194 precursor_ions_removal_da=None, -195 noise_threshold=self._ms[scan_oi].parameters.mass_spectrum.noise_threshold_min_relative_abundance / 100, -196 min_ms2_difference_in_da=peak_sep_da, -197 ) -198 search_results = fe_lib.search( -199 precursor_mz=precursor_mz, -200 peaks=query_spectrum, -201 ms1_tolerance_in_da=self.parameters.mass_spectrum['ms1'].molecular_search.max_ppm_error -202 * 10**-6 -203 * precursor_mz, -204 ms2_tolerance_in_da=peak_sep_da*0.5, -205 method={"identity"}, -206 precursor_ions_removal_da=None, -207 noise_threshold=self._ms[scan_oi].parameters.mass_spectrum.noise_threshold_min_relative_abundance / 100, -208 target="cpu", -209 )["identity_search"] -210 match_inds = np.where(search_results > min_match_score)[0] -211 -212 # If any decent matches are found, add them to the results dictionary -213 if len(match_inds) > 0: -214 match_scores = search_results[match_inds] -215 ref_ms_ids = [fe_lib[x]["id"] for x in match_inds] -216 ref_mol_ids = [ -217 fe_lib[x]["molecular_data_id"] for x in match_inds -218 ] -219 ref_precursor_mzs = [ -220 fe_lib[x]["precursor_mz"] for x in match_inds -221 ] -222 ion_types = [fe_lib[x]["ion_type"] for x in match_inds] -223 overall_results_dict[scan_oi][precursor_mz] = { -224 "ref_mol_id": ref_mol_ids, -225 "ref_ms_id": ref_ms_ids, -226 "ref_precursor_mz": ref_precursor_mzs, -227 "precursor_mz_error_ppm": [ -228 (precursor_mz - x) / precursor_mz * 10**6 -229 for x in ref_precursor_mzs -230 ], -231 "entropy_similarity": match_scores, -232 "ref_ion_type": ion_types, -233 } -234 if get_additional_metrics: -235 more_match_quals = [ -236 self.get_more_match_quals( -237 self._ms[scan_oi].mz_exp, -238 fe_lib[x], -239 mz_tol_da=peak_sep_da, -240 include_fragment_types=include_fragment_types, -241 ) -242 for x in match_inds -243 ] -244 overall_results_dict[scan_oi][precursor_mz].update( -245 { -246 "query_mz_in_ref_n": [ -247 x[0] for x in more_match_quals -248 ], -249 "query_mz_in_ref_fract": [ -250 x[1] for x in more_match_quals -251 ], -252 "ref_mz_in_query_n": [ -253 x[2] for x in more_match_quals -254 ], -255 "ref_mz_in_query_fract": [ -256 x[3] for x in more_match_quals -257 ], -258 } -259 ) -260 if include_fragment_types: -261 overall_results_dict[scan_oi][precursor_mz].update( -262 { -263 "query_frag_types": [ -264 x[4] for x in more_match_quals -265 ], -266 "ref_frag_types": [ -267 x[5] for x in more_match_quals -268 ], -269 } -270 ) -271 -272 # Drop scans with no results from dictionary -273 overall_results_dict = {k: v for k, v in overall_results_dict.items() if v} -274 -275 # Cast each entry as a MS2SearchResults object -276 for scan_id in overall_results_dict.keys(): -277 for precursor_mz in overall_results_dict[scan_id].keys(): -278 ms2_spectrum = self._ms[scan_id] -279 ms2_search_results = overall_results_dict[scan_id][precursor_mz] -280 overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults( -281 ms2_spectrum, precursor_mz, ms2_search_results -282 ) -283 -284 # Add MS2SearchResults to the existing spectral search results dictionary -285 self.spectral_search_results.update(overall_results_dict) -286 -287 # If there are mass features, associate the results with each mass feature -288 if len(self.mass_features) > 0: -289 for mass_feature_id, mass_feature in self.mass_features.items(): -290 scan_ids = mass_feature.ms2_scan_numbers -291 for ms2_scan_id in scan_ids: -292 precursor_mz = mass_feature.mz -293 try: -294 self.spectral_search_results[ms2_scan_id][precursor_mz] -295 except KeyError: -296 pass -297 else: -298 self.mass_features[ -299 mass_feature_id -300 ].ms2_similarity_results.append( -301 self.spectral_search_results[ms2_scan_id][precursor_mz] -302 ) +195 noise_threshold=self._ms[ +196 scan_oi +197 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance +198 / 100, +199 min_ms2_difference_in_da=peak_sep_da, +200 ) +201 search_results = fe_lib.search( +202 precursor_mz=precursor_mz, +203 peaks=query_spectrum, +204 ms1_tolerance_in_da=self.parameters.mass_spectrum[ +205 "ms1" +206 ].molecular_search.max_ppm_error +207 * 10**-6 +208 * precursor_mz, +209 ms2_tolerance_in_da=peak_sep_da * 0.5, +210 method={"identity"}, +211 precursor_ions_removal_da=None, +212 noise_threshold=self._ms[ +213 scan_oi +214 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance +215 / 100, +216 target="cpu", +217 )["identity_search"] +218 match_inds = np.where(search_results > min_match_score)[0] +219 +220 # If any decent matches are found, add them to the results dictionary +221 if len(match_inds) > 0: +222 match_scores = search_results[match_inds] +223 ref_ms_ids = [fe_lib[x]["id"] for x in match_inds] +224 ref_mol_ids = [ +225 fe_lib[x]["molecular_data_id"] for x in match_inds +226 ] +227 ref_precursor_mzs = [ +228 fe_lib[x]["precursor_mz"] for x in match_inds +229 ] +230 ion_types = [fe_lib[x]["ion_type"] for x in match_inds] +231 overall_results_dict[scan_oi][precursor_mz] = { +232 "ref_mol_id": ref_mol_ids, +233 "ref_ms_id": ref_ms_ids, +234 "ref_precursor_mz": ref_precursor_mzs, +235 "precursor_mz_error_ppm": [ +236 (precursor_mz - x) / precursor_mz * 10**6 +237 for x in ref_precursor_mzs +238 ], +239 "entropy_similarity": match_scores, +240 "ref_ion_type": ion_types, +241 } +242 if get_additional_metrics: +243 more_match_quals = [ +244 self.get_more_match_quals( +245 self._ms[scan_oi].mz_exp, +246 fe_lib[x], +247 mz_tol_da=peak_sep_da, +248 include_fragment_types=include_fragment_types, +249 ) +250 for x in match_inds +251 ] +252 overall_results_dict[scan_oi][precursor_mz].update( +253 { +254 "query_mz_in_ref_n": [ +255 x[0] for x in more_match_quals +256 ], +257 "query_mz_in_ref_fract": [ +258 x[1] for x in more_match_quals +259 ], +260 "ref_mz_in_query_n": [ +261 x[2] for x in more_match_quals +262 ], +263 "ref_mz_in_query_fract": [ +264 x[3] for x in more_match_quals +265 ], +266 } +267 ) +268 if include_fragment_types: +269 overall_results_dict[scan_oi][precursor_mz].update( +270 { +271 "query_frag_types": [ +272 x[4] for x in more_match_quals +273 ], +274 "ref_frag_types": [ +275 x[5] for x in more_match_quals +276 ], +277 } +278 ) +279 +280 # Drop scans with no results from dictionary +281 overall_results_dict = {k: v for k, v in overall_results_dict.items() if v} +282 +283 # Cast each entry as a MS2SearchResults object +284 for scan_id in overall_results_dict.keys(): +285 for precursor_mz in overall_results_dict[scan_id].keys(): +286 ms2_spectrum = self._ms[scan_id] +287 ms2_search_results = overall_results_dict[scan_id][precursor_mz] +288 overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults( +289 ms2_spectrum, precursor_mz, ms2_search_results +290 ) +291 +292 # Add MS2SearchResults to the existing spectral search results dictionary +293 self.spectral_search_results.update(overall_results_dict) +294 +295 # If there are mass features, associate the results with each mass feature +296 if len(self.mass_features) > 0: +297 for mass_feature_id, mass_feature in self.mass_features.items(): +298 scan_ids = mass_feature.ms2_scan_numbers +299 for ms2_scan_id in scan_ids: +300 precursor_mz = mass_feature.mz +301 try: +302 self.spectral_search_results[ms2_scan_id][precursor_mz] +303 except KeyError: +304 pass +305 else: +306 self.mass_features[ +307 mass_feature_id +308 ].ms2_similarity_results.append( +309 self.spectral_search_results[ms2_scan_id][precursor_mz] +310 )

    @@ -567,114 +575,122 @@

    193 (self._ms[scan_oi].mz_exp, self._ms[scan_oi].abundance) 194 ).T, 195 precursor_ions_removal_da=None, -196 noise_threshold=self._ms[scan_oi].parameters.mass_spectrum.noise_threshold_min_relative_abundance / 100, -197 min_ms2_difference_in_da=peak_sep_da, -198 ) -199 search_results = fe_lib.search( -200 precursor_mz=precursor_mz, -201 peaks=query_spectrum, -202 ms1_tolerance_in_da=self.parameters.mass_spectrum['ms1'].molecular_search.max_ppm_error -203 * 10**-6 -204 * precursor_mz, -205 ms2_tolerance_in_da=peak_sep_da*0.5, -206 method={"identity"}, -207 precursor_ions_removal_da=None, -208 noise_threshold=self._ms[scan_oi].parameters.mass_spectrum.noise_threshold_min_relative_abundance / 100, -209 target="cpu", -210 )["identity_search"] -211 match_inds = np.where(search_results > min_match_score)[0] -212 -213 # If any decent matches are found, add them to the results dictionary -214 if len(match_inds) > 0: -215 match_scores = search_results[match_inds] -216 ref_ms_ids = [fe_lib[x]["id"] for x in match_inds] -217 ref_mol_ids = [ -218 fe_lib[x]["molecular_data_id"] for x in match_inds -219 ] -220 ref_precursor_mzs = [ -221 fe_lib[x]["precursor_mz"] for x in match_inds -222 ] -223 ion_types = [fe_lib[x]["ion_type"] for x in match_inds] -224 overall_results_dict[scan_oi][precursor_mz] = { -225 "ref_mol_id": ref_mol_ids, -226 "ref_ms_id": ref_ms_ids, -227 "ref_precursor_mz": ref_precursor_mzs, -228 "precursor_mz_error_ppm": [ -229 (precursor_mz - x) / precursor_mz * 10**6 -230 for x in ref_precursor_mzs -231 ], -232 "entropy_similarity": match_scores, -233 "ref_ion_type": ion_types, -234 } -235 if get_additional_metrics: -236 more_match_quals = [ -237 self.get_more_match_quals( -238 self._ms[scan_oi].mz_exp, -239 fe_lib[x], -240 mz_tol_da=peak_sep_da, -241 include_fragment_types=include_fragment_types, -242 ) -243 for x in match_inds -244 ] -245 overall_results_dict[scan_oi][precursor_mz].update( -246 { -247 "query_mz_in_ref_n": [ -248 x[0] for x in more_match_quals -249 ], -250 "query_mz_in_ref_fract": [ -251 x[1] for x in more_match_quals -252 ], -253 "ref_mz_in_query_n": [ -254 x[2] for x in more_match_quals -255 ], -256 "ref_mz_in_query_fract": [ -257 x[3] for x in more_match_quals -258 ], -259 } -260 ) -261 if include_fragment_types: -262 overall_results_dict[scan_oi][precursor_mz].update( -263 { -264 "query_frag_types": [ -265 x[4] for x in more_match_quals -266 ], -267 "ref_frag_types": [ -268 x[5] for x in more_match_quals -269 ], -270 } -271 ) -272 -273 # Drop scans with no results from dictionary -274 overall_results_dict = {k: v for k, v in overall_results_dict.items() if v} -275 -276 # Cast each entry as a MS2SearchResults object -277 for scan_id in overall_results_dict.keys(): -278 for precursor_mz in overall_results_dict[scan_id].keys(): -279 ms2_spectrum = self._ms[scan_id] -280 ms2_search_results = overall_results_dict[scan_id][precursor_mz] -281 overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults( -282 ms2_spectrum, precursor_mz, ms2_search_results -283 ) -284 -285 # Add MS2SearchResults to the existing spectral search results dictionary -286 self.spectral_search_results.update(overall_results_dict) -287 -288 # If there are mass features, associate the results with each mass feature -289 if len(self.mass_features) > 0: -290 for mass_feature_id, mass_feature in self.mass_features.items(): -291 scan_ids = mass_feature.ms2_scan_numbers -292 for ms2_scan_id in scan_ids: -293 precursor_mz = mass_feature.mz -294 try: -295 self.spectral_search_results[ms2_scan_id][precursor_mz] -296 except KeyError: -297 pass -298 else: -299 self.mass_features[ -300 mass_feature_id -301 ].ms2_similarity_results.append( -302 self.spectral_search_results[ms2_scan_id][precursor_mz] -303 ) +196 noise_threshold=self._ms[ +197 scan_oi +198 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance +199 / 100, +200 min_ms2_difference_in_da=peak_sep_da, +201 ) +202 search_results = fe_lib.search( +203 precursor_mz=precursor_mz, +204 peaks=query_spectrum, +205 ms1_tolerance_in_da=self.parameters.mass_spectrum[ +206 "ms1" +207 ].molecular_search.max_ppm_error +208 * 10**-6 +209 * precursor_mz, +210 ms2_tolerance_in_da=peak_sep_da * 0.5, +211 method={"identity"}, +212 precursor_ions_removal_da=None, +213 noise_threshold=self._ms[ +214 scan_oi +215 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance +216 / 100, +217 target="cpu", +218 )["identity_search"] +219 match_inds = np.where(search_results > min_match_score)[0] +220 +221 # If any decent matches are found, add them to the results dictionary +222 if len(match_inds) > 0: +223 match_scores = search_results[match_inds] +224 ref_ms_ids = [fe_lib[x]["id"] for x in match_inds] +225 ref_mol_ids = [ +226 fe_lib[x]["molecular_data_id"] for x in match_inds +227 ] +228 ref_precursor_mzs = [ +229 fe_lib[x]["precursor_mz"] for x in match_inds +230 ] +231 ion_types = [fe_lib[x]["ion_type"] for x in match_inds] +232 overall_results_dict[scan_oi][precursor_mz] = { +233 "ref_mol_id": ref_mol_ids, +234 "ref_ms_id": ref_ms_ids, +235 "ref_precursor_mz": ref_precursor_mzs, +236 "precursor_mz_error_ppm": [ +237 (precursor_mz - x) / precursor_mz * 10**6 +238 for x in ref_precursor_mzs +239 ], +240 "entropy_similarity": match_scores, +241 "ref_ion_type": ion_types, +242 } +243 if get_additional_metrics: +244 more_match_quals = [ +245 self.get_more_match_quals( +246 self._ms[scan_oi].mz_exp, +247 fe_lib[x], +248 mz_tol_da=peak_sep_da, +249 include_fragment_types=include_fragment_types, +250 ) +251 for x in match_inds +252 ] +253 overall_results_dict[scan_oi][precursor_mz].update( +254 { +255 "query_mz_in_ref_n": [ +256 x[0] for x in more_match_quals +257 ], +258 "query_mz_in_ref_fract": [ +259 x[1] for x in more_match_quals +260 ], +261 "ref_mz_in_query_n": [ +262 x[2] for x in more_match_quals +263 ], +264 "ref_mz_in_query_fract": [ +265 x[3] for x in more_match_quals +266 ], +267 } +268 ) +269 if include_fragment_types: +270 overall_results_dict[scan_oi][precursor_mz].update( +271 { +272 "query_frag_types": [ +273 x[4] for x in more_match_quals +274 ], +275 "ref_frag_types": [ +276 x[5] for x in more_match_quals +277 ], +278 } +279 ) +280 +281 # Drop scans with no results from dictionary +282 overall_results_dict = {k: v for k, v in overall_results_dict.items() if v} +283 +284 # Cast each entry as a MS2SearchResults object +285 for scan_id in overall_results_dict.keys(): +286 for precursor_mz in overall_results_dict[scan_id].keys(): +287 ms2_spectrum = self._ms[scan_id] +288 ms2_search_results = overall_results_dict[scan_id][precursor_mz] +289 overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults( +290 ms2_spectrum, precursor_mz, ms2_search_results +291 ) +292 +293 # Add MS2SearchResults to the existing spectral search results dictionary +294 self.spectral_search_results.update(overall_results_dict) +295 +296 # If there are mass features, associate the results with each mass feature +297 if len(self.mass_features) > 0: +298 for mass_feature_id, mass_feature in self.mass_features.items(): +299 scan_ids = mass_feature.ms2_scan_numbers +300 for ms2_scan_id in scan_ids: +301 precursor_mz = mass_feature.mz +302 try: +303 self.spectral_search_results[ms2_scan_id][precursor_mz] +304 except KeyError: +305 pass +306 else: +307 self.mass_features[ +308 mass_feature_id +309 ].ms2_similarity_results.append( +310 self.spectral_search_results[ms2_scan_id][precursor_mz] +311 )

    @@ -936,114 +952,122 @@
    Raises
    193 (self._ms[scan_oi].mz_exp, self._ms[scan_oi].abundance) 194 ).T, 195 precursor_ions_removal_da=None, -196 noise_threshold=self._ms[scan_oi].parameters.mass_spectrum.noise_threshold_min_relative_abundance / 100, -197 min_ms2_difference_in_da=peak_sep_da, -198 ) -199 search_results = fe_lib.search( -200 precursor_mz=precursor_mz, -201 peaks=query_spectrum, -202 ms1_tolerance_in_da=self.parameters.mass_spectrum['ms1'].molecular_search.max_ppm_error -203 * 10**-6 -204 * precursor_mz, -205 ms2_tolerance_in_da=peak_sep_da*0.5, -206 method={"identity"}, -207 precursor_ions_removal_da=None, -208 noise_threshold=self._ms[scan_oi].parameters.mass_spectrum.noise_threshold_min_relative_abundance / 100, -209 target="cpu", -210 )["identity_search"] -211 match_inds = np.where(search_results > min_match_score)[0] -212 -213 # If any decent matches are found, add them to the results dictionary -214 if len(match_inds) > 0: -215 match_scores = search_results[match_inds] -216 ref_ms_ids = [fe_lib[x]["id"] for x in match_inds] -217 ref_mol_ids = [ -218 fe_lib[x]["molecular_data_id"] for x in match_inds -219 ] -220 ref_precursor_mzs = [ -221 fe_lib[x]["precursor_mz"] for x in match_inds -222 ] -223 ion_types = [fe_lib[x]["ion_type"] for x in match_inds] -224 overall_results_dict[scan_oi][precursor_mz] = { -225 "ref_mol_id": ref_mol_ids, -226 "ref_ms_id": ref_ms_ids, -227 "ref_precursor_mz": ref_precursor_mzs, -228 "precursor_mz_error_ppm": [ -229 (precursor_mz - x) / precursor_mz * 10**6 -230 for x in ref_precursor_mzs -231 ], -232 "entropy_similarity": match_scores, -233 "ref_ion_type": ion_types, -234 } -235 if get_additional_metrics: -236 more_match_quals = [ -237 self.get_more_match_quals( -238 self._ms[scan_oi].mz_exp, -239 fe_lib[x], -240 mz_tol_da=peak_sep_da, -241 include_fragment_types=include_fragment_types, -242 ) -243 for x in match_inds -244 ] -245 overall_results_dict[scan_oi][precursor_mz].update( -246 { -247 "query_mz_in_ref_n": [ -248 x[0] for x in more_match_quals -249 ], -250 "query_mz_in_ref_fract": [ -251 x[1] for x in more_match_quals -252 ], -253 "ref_mz_in_query_n": [ -254 x[2] for x in more_match_quals -255 ], -256 "ref_mz_in_query_fract": [ -257 x[3] for x in more_match_quals -258 ], -259 } -260 ) -261 if include_fragment_types: -262 overall_results_dict[scan_oi][precursor_mz].update( -263 { -264 "query_frag_types": [ -265 x[4] for x in more_match_quals -266 ], -267 "ref_frag_types": [ -268 x[5] for x in more_match_quals -269 ], -270 } -271 ) -272 -273 # Drop scans with no results from dictionary -274 overall_results_dict = {k: v for k, v in overall_results_dict.items() if v} -275 -276 # Cast each entry as a MS2SearchResults object -277 for scan_id in overall_results_dict.keys(): -278 for precursor_mz in overall_results_dict[scan_id].keys(): -279 ms2_spectrum = self._ms[scan_id] -280 ms2_search_results = overall_results_dict[scan_id][precursor_mz] -281 overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults( -282 ms2_spectrum, precursor_mz, ms2_search_results -283 ) -284 -285 # Add MS2SearchResults to the existing spectral search results dictionary -286 self.spectral_search_results.update(overall_results_dict) -287 -288 # If there are mass features, associate the results with each mass feature -289 if len(self.mass_features) > 0: -290 for mass_feature_id, mass_feature in self.mass_features.items(): -291 scan_ids = mass_feature.ms2_scan_numbers -292 for ms2_scan_id in scan_ids: -293 precursor_mz = mass_feature.mz -294 try: -295 self.spectral_search_results[ms2_scan_id][precursor_mz] -296 except KeyError: -297 pass -298 else: -299 self.mass_features[ -300 mass_feature_id -301 ].ms2_similarity_results.append( -302 self.spectral_search_results[ms2_scan_id][precursor_mz] -303 ) +196 noise_threshold=self._ms[ +197 scan_oi +198 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance +199 / 100, +200 min_ms2_difference_in_da=peak_sep_da, +201 ) +202 search_results = fe_lib.search( +203 precursor_mz=precursor_mz, +204 peaks=query_spectrum, +205 ms1_tolerance_in_da=self.parameters.mass_spectrum[ +206 "ms1" +207 ].molecular_search.max_ppm_error +208 * 10**-6 +209 * precursor_mz, +210 ms2_tolerance_in_da=peak_sep_da * 0.5, +211 method={"identity"}, +212 precursor_ions_removal_da=None, +213 noise_threshold=self._ms[ +214 scan_oi +215 ].parameters.mass_spectrum.noise_threshold_min_relative_abundance +216 / 100, +217 target="cpu", +218 )["identity_search"] +219 match_inds = np.where(search_results > min_match_score)[0] +220 +221 # If any decent matches are found, add them to the results dictionary +222 if len(match_inds) > 0: +223 match_scores = search_results[match_inds] +224 ref_ms_ids = [fe_lib[x]["id"] for x in match_inds] +225 ref_mol_ids = [ +226 fe_lib[x]["molecular_data_id"] for x in match_inds +227 ] +228 ref_precursor_mzs = [ +229 fe_lib[x]["precursor_mz"] for x in match_inds +230 ] +231 ion_types = [fe_lib[x]["ion_type"] for x in match_inds] +232 overall_results_dict[scan_oi][precursor_mz] = { +233 "ref_mol_id": ref_mol_ids, +234 "ref_ms_id": ref_ms_ids, +235 "ref_precursor_mz": ref_precursor_mzs, +236 "precursor_mz_error_ppm": [ +237 (precursor_mz - x) / precursor_mz * 10**6 +238 for x in ref_precursor_mzs +239 ], +240 "entropy_similarity": match_scores, +241 "ref_ion_type": ion_types, +242 } +243 if get_additional_metrics: +244 more_match_quals = [ +245 self.get_more_match_quals( +246 self._ms[scan_oi].mz_exp, +247 fe_lib[x], +248 mz_tol_da=peak_sep_da, +249 include_fragment_types=include_fragment_types, +250 ) +251 for x in match_inds +252 ] +253 overall_results_dict[scan_oi][precursor_mz].update( +254 { +255 "query_mz_in_ref_n": [ +256 x[0] for x in more_match_quals +257 ], +258 "query_mz_in_ref_fract": [ +259 x[1] for x in more_match_quals +260 ], +261 "ref_mz_in_query_n": [ +262 x[2] for x in more_match_quals +263 ], +264 "ref_mz_in_query_fract": [ +265 x[3] for x in more_match_quals +266 ], +267 } +268 ) +269 if include_fragment_types: +270 overall_results_dict[scan_oi][precursor_mz].update( +271 { +272 "query_frag_types": [ +273 x[4] for x in more_match_quals +274 ], +275 "ref_frag_types": [ +276 x[5] for x in more_match_quals +277 ], +278 } +279 ) +280 +281 # Drop scans with no results from dictionary +282 overall_results_dict = {k: v for k, v in overall_results_dict.items() if v} +283 +284 # Cast each entry as a MS2SearchResults object +285 for scan_id in overall_results_dict.keys(): +286 for precursor_mz in overall_results_dict[scan_id].keys(): +287 ms2_spectrum = self._ms[scan_id] +288 ms2_search_results = overall_results_dict[scan_id][precursor_mz] +289 overall_results_dict[scan_id][precursor_mz] = SpectrumSearchResults( +290 ms2_spectrum, precursor_mz, ms2_search_results +291 ) +292 +293 # Add MS2SearchResults to the existing spectral search results dictionary +294 self.spectral_search_results.update(overall_results_dict) +295 +296 # If there are mass features, associate the results with each mass feature +297 if len(self.mass_features) > 0: +298 for mass_feature_id, mass_feature in self.mass_features.items(): +299 scan_ids = mass_feature.ms2_scan_numbers +300 for ms2_scan_id in scan_ids: +301 precursor_mz = mass_feature.mz +302 try: +303 self.spectral_search_results[ms2_scan_id][precursor_mz] +304 except KeyError: +305 pass +306 else: +307 self.mass_features[ +308 mass_feature_id +309 ].ms2_similarity_results.append( +310 self.spectral_search_results[ms2_scan_id][precursor_mz] +311 )
    diff --git a/docs/corems/molecular_id/search/molecularFormulaSearch.html b/docs/corems/molecular_id/search/molecularFormulaSearch.html index 73b5cb8f..6594f90d 100644 --- a/docs/corems/molecular_id/search/molecularFormulaSearch.html +++ b/docs/corems/molecular_id/search/molecularFormulaSearch.html @@ -154,812 +154,963 @@

    2__date__ = "Jul 29, 2019" 3 4 - 5import multiprocessing - 6from typing import List - 7 - 8import tqdm - 9 - 10 - 11 - 12from corems import chunks, timeit - 13from corems.encapsulation.constant import Labels - 14from corems.molecular_formula.factory.MolecularFormulaFactory import LCMSLibRefMolecularFormula, MolecularFormula - 15from corems.ms_peak.factory.MSPeakClasses import _MSPeak + 5from typing import List + 6 + 7import tqdm + 8 + 9from corems import chunks, timeit + 10from corems.encapsulation.constant import Labels + 11from corems.molecular_formula.factory.MolecularFormulaFactory import ( + 12 LCMSLibRefMolecularFormula, + 13 MolecularFormula, + 14) + 15from corems.molecular_id.factory.MolecularLookupTable import MolecularCombinations 16from corems.molecular_id.factory.molecularSQL import MolForm_SQL - 17from corems.molecular_id.factory.MolecularLookupTable import MolecularCombinations + 17from corems.ms_peak.factory.MSPeakClasses import _MSPeak 18 - 19 - 20 - 21last_error = 0 - 22last_dif = 0 - 23closest_error = 0 - 24error_average = 0 - 25nbValues = 0 - 26 - 27 - 28class SearchMolecularFormulas: - 29 """ Class for searching molecular formulas in a mass spectrum. - 30 - 31 Parameters - 32 ---------- - 33 mass_spectrum_obj : MassSpectrum - 34 The mass spectrum object. - 35 sql_db : MolForm_SQL, optional - 36 The SQL database object, by default None. - 37 first_hit : bool, optional - 38 Flag to indicate whether to skip peaks that already have a molecular formula assigned, by default False. - 39 find_isotopologues : bool, optional - 40 Flag to indicate whether to find isotopologues, by default True. - 41 - 42 Attributes - 43 ---------- - 44 mass_spectrum_obj : MassSpectrum - 45 The mass spectrum object. - 46 sql_db : MolForm_SQL - 47 The SQL database object. - 48 first_hit : bool - 49 Flag to indicate whether to skip peaks that already have a molecular formula assigned. - 50 find_isotopologues : bool - 51 Flag to indicate whether to find isotopologues. - 52 - 53 - 54 Methods - 55 ------- - 56 * run_search(). - 57 Run the molecular formula search. - 58 * run_worker_mass_spectrum(). - 59 Run the molecular formula search on the mass spectrum object. - 60 * run_worker_ms_peaks(). - 61 Run the molecular formula search on the given list of mass spectrum peaks. - 62 * database_to_dict(). - 63 Convert the database results to a dictionary. - 64 * run_molecular_formula(). - 65 Run the molecular formula search on the given list of mass spectrum peaks. - 66 * search_mol_formulas(). - 67 Search for molecular formulas in the mass spectrum. - 68 - 69 """ - 70 - 71 def __init__(self, mass_spectrum_obj, sql_db=None, first_hit : bool=False, find_isotopologues : bool=True): - 72 - 73 self.first_hit = first_hit - 74 - 75 self.find_isotopologues = find_isotopologues - 76 - 77 self.mass_spectrum_obj = mass_spectrum_obj - 78 - 79 if not sql_db: - 80 - 81 self.sql_db = MolForm_SQL(url=mass_spectrum_obj.molecular_search_settings.url_database) - 82 - 83 else: - 84 - 85 self.sql_db = sql_db + 19last_error = 0 + 20last_dif = 0 + 21closest_error = 0 + 22error_average = 0 + 23nbValues = 0 + 24 + 25 + 26class SearchMolecularFormulas: + 27 """Class for searching molecular formulas in a mass spectrum. + 28 + 29 Parameters + 30 ---------- + 31 mass_spectrum_obj : MassSpectrum + 32 The mass spectrum object. + 33 sql_db : MolForm_SQL, optional + 34 The SQL database object, by default None. + 35 first_hit : bool, optional + 36 Flag to indicate whether to skip peaks that already have a molecular formula assigned, by default False. + 37 find_isotopologues : bool, optional + 38 Flag to indicate whether to find isotopologues, by default True. + 39 + 40 Attributes + 41 ---------- + 42 mass_spectrum_obj : MassSpectrum + 43 The mass spectrum object. + 44 sql_db : MolForm_SQL + 45 The SQL database object. + 46 first_hit : bool + 47 Flag to indicate whether to skip peaks that already have a molecular formula assigned. + 48 find_isotopologues : bool + 49 Flag to indicate whether to find isotopologues. + 50 + 51 + 52 Methods + 53 ------- + 54 * run_search(). + 55 Run the molecular formula search. + 56 * run_worker_mass_spectrum(). + 57 Run the molecular formula search on the mass spectrum object. + 58 * run_worker_ms_peaks(). + 59 Run the molecular formula search on the given list of mass spectrum peaks. + 60 * database_to_dict(). + 61 Convert the database results to a dictionary. + 62 * run_molecular_formula(). + 63 Run the molecular formula search on the given list of mass spectrum peaks. + 64 * search_mol_formulas(). + 65 Search for molecular formulas in the mass spectrum. + 66 + 67 """ + 68 + 69 def __init__( + 70 self, + 71 mass_spectrum_obj, + 72 sql_db=None, + 73 first_hit: bool = False, + 74 find_isotopologues: bool = True, + 75 ): + 76 self.first_hit = first_hit + 77 + 78 self.find_isotopologues = find_isotopologues + 79 + 80 self.mass_spectrum_obj = mass_spectrum_obj + 81 + 82 if not sql_db: + 83 self.sql_db = MolForm_SQL( + 84 url=mass_spectrum_obj.molecular_search_settings.url_database + 85 ) 86 - 87 def __enter__(self): - 88 """ Open the SQL database connection.""" - 89 return self - 90 - 91 def __exit__(self, exc_type, exc_val, exc_tb): - 92 """ Close the SQL database connection.""" - 93 self.sql_db.close() - 94 - 95 return False - 96 - 97 def run_search(self, mspeaks : list, query : dict, min_abundance : float, ion_type : str, ion_charge : int, adduct_atom=None): - 98 """ Run the molecular formula search. + 87 else: + 88 self.sql_db = sql_db + 89 + 90 def __enter__(self): + 91 """Open the SQL database connection.""" + 92 return self + 93 + 94 def __exit__(self, exc_type, exc_val, exc_tb): + 95 """Close the SQL database connection.""" + 96 self.sql_db.close() + 97 + 98 return False 99 -100 Parameters -101 ---------- -102 mspeaks : list of MSPeak -103 The list of mass spectrum peaks. -104 query : dict -105 The query dictionary containing the possible molecular formulas. -106 min_abundance : float -107 The minimum abundance threshold. -108 ion_type : str -109 The ion type. -110 ion_charge : int -111 The ion charge. -112 adduct_atom : str, optional -113 The adduct atom, by default None. -114 """ -115 -116 def get_formulas(nominal_overlay : float=0.1): -117 """ -118 Get the list of formulas based on the nominal overlay. -119 -120 Parameters -121 ---------- -122 nominal_overlay : float, optional -123 The nominal overlay, by default 0.1. -124 -125 Returns -126 ------- -127 list -128 The list of formulas. -129 """ -130 nominal_mz = ms_peak.nominal_mz_exp -131 -132 defect_mass = ms_peak.mz_exp - nominal_mz -133 nominal_masses = [nominal_mz] -134 -135 if (defect_mass) >= 1 - nominal_overlay: -136 nominal_masses.append(nominal_mz + 1) -137 elif (defect_mass) <= nominal_overlay: -138 nominal_masses.append(nominal_mz - 1) -139 -140 list_formulas_candidates = [] -141 -142 for nominal_mass in nominal_masses: -143 if nominal_mass in query.keys(): -144 list_formulas_candidates.extend(query.get(nominal_mass)) +100 def run_search( +101 self, +102 mspeaks: list, +103 query: dict, +104 min_abundance: float, +105 ion_type: str, +106 ion_charge: int, +107 adduct_atom=None, +108 ): +109 """Run the molecular formula search. +110 +111 Parameters +112 ---------- +113 mspeaks : list of MSPeak +114 The list of mass spectrum peaks. +115 query : dict +116 The query dictionary containing the possible molecular formulas. +117 min_abundance : float +118 The minimum abundance threshold. +119 ion_type : str +120 The ion type. +121 ion_charge : int +122 The ion charge. +123 adduct_atom : str, optional +124 The adduct atom, by default None. +125 """ +126 +127 def get_formulas(nominal_overlay: float = 0.1): +128 """ +129 Get the list of formulas based on the nominal overlay. +130 +131 Parameters +132 ---------- +133 nominal_overlay : float, optional +134 The nominal overlay, by default 0.1. +135 +136 Returns +137 ------- +138 list +139 The list of formulas. +140 """ +141 nominal_mz = ms_peak.nominal_mz_exp +142 +143 defect_mass = ms_peak.mz_exp - nominal_mz +144 nominal_masses = [nominal_mz] 145 -146 return list_formulas_candidates -147 -148 all_assigned_indexes = list() -149 -150 # molecular_search_settings = self.mass_spectrum_obj.molecular_search_settings -151 -152 search_molfrom = SearchMolecularFormulaWorker(find_isotopologues=self.find_isotopologues) -153 -154 for ms_peak in mspeaks: -155 -156 # already assigned a molecular formula -157 if self.first_hit: +146 if (defect_mass) >= 1 - nominal_overlay: +147 nominal_masses.append(nominal_mz + 1) +148 elif (defect_mass) <= nominal_overlay: +149 nominal_masses.append(nominal_mz - 1) +150 +151 list_formulas_candidates = [] +152 +153 for nominal_mass in nominal_masses: +154 if nominal_mass in query.keys(): +155 list_formulas_candidates.extend(query.get(nominal_mass)) +156 +157 return list_formulas_candidates 158 -159 if ms_peak.is_assigned: -160 continue -161 -162 ms_peak_indexes = search_molfrom.find_formulas(get_formulas(), min_abundance, self.mass_spectrum_obj, ms_peak, ion_type, ion_charge, adduct_atom) -163 -164 all_assigned_indexes.extend(ms_peak_indexes) -165 -166 # all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(all_assigned_indexes, self.mass_spectrum_obj) -167 -168 # all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(all_assigned_indexes, self.mass_spectrum_obj) -169 -170 # MolecularFormulaSearchFilters().check_min_peaks(all_assigned_indexes, self.mass_spectrum_obj) -171 # filter per min peaks per mono isotopic class +159 all_assigned_indexes = list() +160 +161 # molecular_search_settings = self.mass_spectrum_obj.molecular_search_settings +162 +163 search_molfrom = SearchMolecularFormulaWorker( +164 find_isotopologues=self.find_isotopologues +165 ) +166 +167 for ms_peak in mspeaks: +168 # already assigned a molecular formula +169 if self.first_hit: +170 if ms_peak.is_assigned: +171 continue 172 -173 def run_worker_mass_spectrum(self): -174 """ Run the molecular formula search on the mass spectrum object. -175 """ -176 self.run_molecular_formula(self.mass_spectrum_obj.sort_by_abundance()) -177 -178 def run_worker_ms_peaks(self, ms_peaks): -179 """ Run the molecular formula search on the given list of mass spectrum peaks. -180 -181 Parameters -182 ---------- -183 ms_peaks : list of MSPeak -184 The list of mass spectrum peaks. -185 """ -186 self.run_molecular_formula(ms_peaks) -187 -188 @staticmethod -189 def database_to_dict(classe_str_list, nominal_mzs, mf_search_settings, ion_charge): -190 """ Convert the database results to a dictionary. +173 ms_peak_indexes = search_molfrom.find_formulas( +174 get_formulas(), +175 min_abundance, +176 self.mass_spectrum_obj, +177 ms_peak, +178 ion_type, +179 ion_charge, +180 adduct_atom, +181 ) +182 +183 all_assigned_indexes.extend(ms_peak_indexes) +184 +185 # all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(all_assigned_indexes, self.mass_spectrum_obj) +186 +187 # all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(all_assigned_indexes, self.mass_spectrum_obj) +188 +189 # MolecularFormulaSearchFilters().check_min_peaks(all_assigned_indexes, self.mass_spectrum_obj) +190 # filter per min peaks per mono isotopic class 191 -192 Parameters -193 ---------- -194 classe_str_list : list -195 The list of class strings. -196 nominal_mzs : list -197 The list of nominal m/z values. -198 mf_search_settings : MolecularFormulaSearchSettings -199 The molecular formula search settings. -200 ion_charge : int -201 The ion charge. -202 -203 Returns -204 ------- -205 dict -206 The dictionary containing the database results. -207 """ -208 sql_db = MolForm_SQL(url=mf_search_settings.url_database) +192 def run_worker_mass_spectrum(self): +193 """Run the molecular formula search on the mass spectrum object.""" +194 self.run_molecular_formula(self.mass_spectrum_obj.sort_by_abundance()) +195 +196 def run_worker_ms_peaks(self, ms_peaks): +197 """Run the molecular formula search on the given list of mass spectrum peaks. +198 +199 Parameters +200 ---------- +201 ms_peaks : list of MSPeak +202 The list of mass spectrum peaks. +203 """ +204 self.run_molecular_formula(ms_peaks) +205 +206 @staticmethod +207 def database_to_dict(classe_str_list, nominal_mzs, mf_search_settings, ion_charge): +208 """Convert the database results to a dictionary. 209 -210 dict_res = {} -211 -212 if mf_search_settings.isProtonated: -213 dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.protonated_de_ion, nominal_mzs, ion_charge, mf_search_settings) -214 -215 if mf_search_settings.isRadical: -216 dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.radical_ion, nominal_mzs, ion_charge, mf_search_settings) -217 -218 if mf_search_settings.isAdduct: -219 -220 adduct_list = mf_search_settings.adduct_atoms_neg if ion_charge < 0 else mf_search_settings.adduct_atoms_pos -221 dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.adduct_ion, nominal_mzs, ion_charge, mf_search_settings, adducts=adduct_list) -222 -223 return dict_res -224 -225 @timeit -226 def run_molecular_formula(self, ms_peaks): -227 """ Run the molecular formula search on the given list of mass spectrum peaks. -228 -229 Parameters -230 ---------- -231 ms_peaks : list of MSPeak -232 The list of mass spectrum peaks. -233 """ -234 # number_of_process = multiprocessing.cpu_count() -235 -236 #loading this on a shared memory would be better than having to serialize it for every process -237 # waiting for python 3.8 release +210 Parameters +211 ---------- +212 classe_str_list : list +213 The list of class strings. +214 nominal_mzs : list +215 The list of nominal m/z values. +216 mf_search_settings : MolecularFormulaSearchSettings +217 The molecular formula search settings. +218 ion_charge : int +219 The ion charge. +220 +221 Returns +222 ------- +223 dict +224 The dictionary containing the database results. +225 """ +226 sql_db = MolForm_SQL(url=mf_search_settings.url_database) +227 +228 dict_res = {} +229 +230 if mf_search_settings.isProtonated: +231 dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes( +232 classe_str_list, +233 Labels.protonated_de_ion, +234 nominal_mzs, +235 ion_charge, +236 mf_search_settings, +237 ) 238 -239 # ion charge for all the ion in the mass spectrum -240 # under the current structure is possible to search for individual m/z but it takes longer than allow all the m/z to be search against -241 ion_charge = self.mass_spectrum_obj.polarity -242 -243 # use to limit the calculation of possible isotopologues -244 min_abundance = self.mass_spectrum_obj.min_abundance -245 -246 # only query the database for formulas with the nominal m/z matching the mass spectrum data -247 # default m/z overlay is m/z 0.3 unit -248 # needs to improve to bin by mass defect instead, faster db creation and faster search execution time -249 nominal_mzs = self.mass_spectrum_obj.nominal_mz -250 -251 # reset average error, only relevant is average mass error method is being used -252 SearchMolecularFormulaWorker(find_isotopologues=self.find_isotopologues).reset_error(self.mass_spectrum_obj) -253 -254 # check database for all possible molecular formula combinations based on the setting passed to self.mass_spectrum_obj.molecular_search_settings -255 classes = MolecularCombinations(self.sql_db).runworker(self.mass_spectrum_obj.molecular_search_settings) -256 -257 # split the database load to not blowout the memory -258 # TODO add to the settings -259 -260 def run(): -261 -262 for classe_chunk in chunks(classes, self.mass_spectrum_obj.molecular_search_settings.db_chunk_size): -263 -264 classes_str_list = [class_tuple[0] for class_tuple in classe_chunk] -265 -266 # load the molecular formula objs binned by ion type and heteroatoms classes, {ion type:{classe:[list_formula]}} -267 # for adduct ion type a third key is added {atoms:{ion type:{classe:[list_formula]}}} -268 dict_res = self.database_to_dict(classes_str_list, nominal_mzs, self.mass_spectrum_obj.molecular_search_settings, ion_charge) -269 -270 pbar = tqdm.tqdm(classe_chunk) -271 -272 for classe_tuple in pbar: -273 -274 # class string is a json serialized dict -275 classe_str = classe_tuple[0] -276 classe_dict = classe_tuple[1] -277 -278 if self.mass_spectrum_obj.molecular_search_settings.isProtonated: -279 -280 ion_type = Labels.protonated_de_ion -281 -282 pbar.set_description_str(desc="Started molecular formula search for class %s, (de)protonated " % classe_str, refresh=True) -283 -284 candidate_formulas = dict_res.get(ion_type).get(classe_str) +239 if mf_search_settings.isRadical: +240 dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes( +241 classe_str_list, +242 Labels.radical_ion, +243 nominal_mzs, +244 ion_charge, +245 mf_search_settings, +246 ) +247 +248 if mf_search_settings.isAdduct: +249 adduct_list = ( +250 mf_search_settings.adduct_atoms_neg +251 if ion_charge < 0 +252 else mf_search_settings.adduct_atoms_pos +253 ) +254 dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes( +255 classe_str_list, +256 Labels.adduct_ion, +257 nominal_mzs, +258 ion_charge, +259 mf_search_settings, +260 adducts=adduct_list, +261 ) +262 +263 return dict_res +264 +265 @timeit +266 def run_molecular_formula(self, ms_peaks): +267 """Run the molecular formula search on the given list of mass spectrum peaks. +268 +269 Parameters +270 ---------- +271 ms_peaks : list of MSPeak +272 The list of mass spectrum peaks. +273 """ +274 # number_of_process = multiprocessing.cpu_count() +275 +276 # loading this on a shared memory would be better than having to serialize it for every process +277 # waiting for python 3.8 release +278 +279 # ion charge for all the ion in the mass spectrum +280 # under the current structure is possible to search for individual m/z but it takes longer than allow all the m/z to be search against +281 ion_charge = self.mass_spectrum_obj.polarity +282 +283 # use to limit the calculation of possible isotopologues +284 min_abundance = self.mass_spectrum_obj.min_abundance 285 -286 if candidate_formulas: -287 -288 self.run_search(ms_peaks, candidate_formulas, -289 min_abundance, ion_type, ion_charge) +286 # only query the database for formulas with the nominal m/z matching the mass spectrum data +287 # default m/z overlay is m/z 0.3 unit +288 # needs to improve to bin by mass defect instead, faster db creation and faster search execution time +289 nominal_mzs = self.mass_spectrum_obj.nominal_mz 290 -291 if self.mass_spectrum_obj.molecular_search_settings.isRadical: -292 -293 pbar.set_description_str(desc="Started molecular formula search for class %s, radical " % classe_str, refresh=True) -294 -295 ion_type = Labels.radical_ion -296 -297 candidate_formulas = dict_res.get(ion_type).get(classe_str) -298 -299 if candidate_formulas: +291 # reset average error, only relevant is average mass error method is being used +292 SearchMolecularFormulaWorker( +293 find_isotopologues=self.find_isotopologues +294 ).reset_error(self.mass_spectrum_obj) +295 +296 # check database for all possible molecular formula combinations based on the setting passed to self.mass_spectrum_obj.molecular_search_settings +297 classes = MolecularCombinations(self.sql_db).runworker( +298 self.mass_spectrum_obj.molecular_search_settings +299 ) 300 -301 self.run_search(ms_peaks, candidate_formulas, -302 min_abundance, ion_type, ion_charge) -303 # looks for adduct, used_atom_valences should be 0 -304 # this code does not support H exchance by halogen atoms -305 if self.mass_spectrum_obj.molecular_search_settings.isAdduct: -306 -307 pbar.set_description_str(desc="Started molecular formula search for class %s, adduct " % classe_str, refresh=True) -308 -309 ion_type = Labels.adduct_ion -310 dict_atoms_formulas = dict_res.get(ion_type) -311 -312 for adduct_atom, dict_by_class in dict_atoms_formulas.items(): -313 -314 candidate_formulas = dict_by_class.get(classe_str) -315 -316 if candidate_formulas: -317 self.run_search(ms_peaks, candidate_formulas, -318 min_abundance, ion_type, ion_charge, adduct_atom=adduct_atom) -319 -320 run() -321 self.sql_db.close() -322 -323 def search_mol_formulas(self, possible_formulas_list: List[MolecularFormula], ion_type:str, -324 neutral_molform=True, find_isotopologues=True, adduct_atom=None) -> List[_MSPeak]: -325 """ Search for molecular formulas in the mass spectrum. -326 -327 Parameters -328 ---------- -329 possible_formulas_list : list of MolecularFormula -330 The list of possible molecular formulas. -331 ion_type : str -332 The ion type. -333 neutral_molform : bool, optional -334 Flag to indicate whether the molecular formulas are neutral, by default True. -335 find_isotopologues : bool, optional -336 Flag to indicate whether to find isotopologues, by default True. -337 adduct_atom : str, optional -338 The adduct atom, by default None. -339 -340 Returns -341 ------- -342 list of MSPeak -343 The list of mass spectrum peaks with assigned molecular formulas. -344 """ -345 #neutral_molform: some reference files already present the formula on ion mode, for instance, bruker reference files -346 # if that is the case than turn neutral_molform off -347 -348 SearchMolecularFormulaWorker(find_isotopologues=find_isotopologues).reset_error(self.mass_spectrum_obj) -349 -350 initial_min_peak_bool = self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter -351 initial_runtime_kendrick_filter = self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter +301 # split the database load to not blowout the memory +302 # TODO add to the settings +303 +304 def run(): +305 for classe_chunk in chunks( +306 classes, self.mass_spectrum_obj.molecular_search_settings.db_chunk_size +307 ): +308 classes_str_list = [class_tuple[0] for class_tuple in classe_chunk] +309 +310 # load the molecular formula objs binned by ion type and heteroatoms classes, {ion type:{classe:[list_formula]}} +311 # for adduct ion type a third key is added {atoms:{ion type:{classe:[list_formula]}}} +312 dict_res = self.database_to_dict( +313 classes_str_list, +314 nominal_mzs, +315 self.mass_spectrum_obj.molecular_search_settings, +316 ion_charge, +317 ) +318 +319 pbar = tqdm.tqdm(classe_chunk) +320 +321 for classe_tuple in pbar: +322 # class string is a json serialized dict +323 classe_str = classe_tuple[0] +324 classe_dict = classe_tuple[1] +325 +326 if self.mass_spectrum_obj.molecular_search_settings.isProtonated: +327 ion_type = Labels.protonated_de_ion +328 +329 pbar.set_description_str( +330 desc="Started molecular formula search for class %s, (de)protonated " +331 % classe_str, +332 refresh=True, +333 ) +334 +335 candidate_formulas = dict_res.get(ion_type).get(classe_str) +336 +337 if candidate_formulas: +338 self.run_search( +339 ms_peaks, +340 candidate_formulas, +341 min_abundance, +342 ion_type, +343 ion_charge, +344 ) +345 +346 if self.mass_spectrum_obj.molecular_search_settings.isRadical: +347 pbar.set_description_str( +348 desc="Started molecular formula search for class %s, radical " +349 % classe_str, +350 refresh=True, +351 ) 352 -353 # Are the following 3 lines redundant? -354 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False -355 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False #TODO check this line -356 self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = False -357 -358 possible_formulas_dict_nm = {} -359 -360 for mf in possible_formulas_list: -361 -362 if neutral_molform: -363 nm = int(mf.protonated_mz) -364 else: -365 nm = int(mf.mz_nominal_calc) -366 -367 if nm in possible_formulas_dict_nm.keys(): -368 -369 possible_formulas_dict_nm[nm].append(mf) -370 -371 else: -372 -373 possible_formulas_dict_nm[nm] = [mf] -374 -375 min_abundance = self.mass_spectrum_obj.min_abundance -376 -377 ion_type = ion_type -378 -379 self.run_search(self.mass_spectrum_obj, possible_formulas_dict_nm, min_abundance, ion_type, self.mass_spectrum_obj.polarity, adduct_atom=adduct_atom) -380 -381 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = initial_min_peak_bool -382 self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = initial_runtime_kendrick_filter -383 -384 mspeaks = [mspeak for mspeak in self.mass_spectrum_obj if mspeak.is_assigned] -385 -386 self.sql_db.close() -387 -388 return mspeaks +353 ion_type = Labels.radical_ion +354 +355 candidate_formulas = dict_res.get(ion_type).get(classe_str) +356 +357 if candidate_formulas: +358 self.run_search( +359 ms_peaks, +360 candidate_formulas, +361 min_abundance, +362 ion_type, +363 ion_charge, +364 ) +365 # looks for adduct, used_atom_valences should be 0 +366 # this code does not support H exchance by halogen atoms +367 if self.mass_spectrum_obj.molecular_search_settings.isAdduct: +368 pbar.set_description_str( +369 desc="Started molecular formula search for class %s, adduct " +370 % classe_str, +371 refresh=True, +372 ) +373 +374 ion_type = Labels.adduct_ion +375 dict_atoms_formulas = dict_res.get(ion_type) +376 +377 for adduct_atom, dict_by_class in dict_atoms_formulas.items(): +378 candidate_formulas = dict_by_class.get(classe_str) +379 +380 if candidate_formulas: +381 self.run_search( +382 ms_peaks, +383 candidate_formulas, +384 min_abundance, +385 ion_type, +386 ion_charge, +387 adduct_atom=adduct_atom, +388 ) 389 -390 -391class SearchMolecularFormulaWorker: -392 """ Class for searching molecular formulas in a mass spectrum. -393 -394 Parameters -395 ---------- -396 find_isotopologues : bool, optional -397 Flag to indicate whether to find isotopologues, by default True. -398 -399 Attributes -400 ---------- -401 find_isotopologues : bool -402 Flag to indicate whether to find isotopologues. -403 -404 Methods -405 ------- -406 * reset_error(). -407 Reset the error variables. -408 * set_last_error(). -409 Set the last error. -410 * find_formulas(). -411 Find the formulas. -412 * calc_error(). -413 Calculate the error. -414 """ -415 # TODO add reset error function -416 # needs this wraper to pass the class to multiprocessing -417 -418 def __init__(self, find_isotopologues=True): -419 self.find_isotopologues = find_isotopologues -420 -421 def __call__(self, args): -422 """ Call the find formulas function. -423 -424 Parameters -425 ---------- -426 args : tuple -427 The arguments. -428 -429 Returns -430 ------- -431 list -432 The list of mass spectrum peaks with assigned molecular formulas. -433 """ -434 return self.find_formulas(*args) # ,args[1] -435 -436 def reset_error(self, mass_spectrum_obj): -437 """ Reset the error variables. -438 -439 Parameters -440 ---------- -441 mass_spectrum_obj : MassSpectrum -442 The mass spectrum object. -443 """ -444 global last_error, last_dif, closest_error, error_average, nbValues -445 last_error, last_dif, closest_error, nbValues = 0.0, 0.0, 0.0, 0.0 -446 -447 error_average = 0 -448 -449 def set_last_error(self, error, mass_spectrum_obj): -450 """ Set the last error. -451 -452 Parameters -453 ---------- -454 error : float -455 The error. -456 mass_spectrum_obj : MassSpectrum -457 The mass spectrum object. -458 """ -459 # set the changes to the global variables, not internal ones -460 global last_error, last_dif, closest_error, error_average, nbValues +390 run() +391 self.sql_db.close() +392 +393 def search_mol_formulas( +394 self, +395 possible_formulas_list: List[MolecularFormula], +396 ion_type: str, +397 neutral_molform=True, +398 find_isotopologues=True, +399 adduct_atom=None, +400 ) -> List[_MSPeak]: +401 """Search for molecular formulas in the mass spectrum. +402 +403 Parameters +404 ---------- +405 possible_formulas_list : list of MolecularFormula +406 The list of possible molecular formulas. +407 ion_type : str +408 The ion type. +409 neutral_molform : bool, optional +410 Flag to indicate whether the molecular formulas are neutral, by default True. +411 find_isotopologues : bool, optional +412 Flag to indicate whether to find isotopologues, by default True. +413 adduct_atom : str, optional +414 The adduct atom, by default None. +415 +416 Returns +417 ------- +418 list of MSPeak +419 The list of mass spectrum peaks with assigned molecular formulas. +420 """ +421 # neutral_molform: some reference files already present the formula on ion mode, for instance, bruker reference files +422 # if that is the case than turn neutral_molform off +423 +424 SearchMolecularFormulaWorker(find_isotopologues=find_isotopologues).reset_error( +425 self.mass_spectrum_obj +426 ) +427 +428 initial_min_peak_bool = ( +429 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter +430 ) +431 initial_runtime_kendrick_filter = ( +432 self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter +433 ) +434 +435 # Are the following 3 lines redundant? +436 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False +437 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = ( +438 False # TODO check this line +439 ) +440 self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = ( +441 False +442 ) +443 +444 possible_formulas_dict_nm = {} +445 +446 for mf in possible_formulas_list: +447 if neutral_molform: +448 nm = int(mf.protonated_mz) +449 else: +450 nm = int(mf.mz_nominal_calc) +451 +452 if nm in possible_formulas_dict_nm.keys(): +453 possible_formulas_dict_nm[nm].append(mf) +454 +455 else: +456 possible_formulas_dict_nm[nm] = [mf] +457 +458 min_abundance = self.mass_spectrum_obj.min_abundance +459 +460 ion_type = ion_type 461 -462 if mass_spectrum_obj.molecular_search_settings.error_method == 'distance': -463 -464 dif = error - last_error -465 if dif < last_dif: -466 last_dif = dif -467 closest_error = error -468 mass_spectrum_obj.molecular_search_settings.min_ppm_error = closest_error - mass_spectrum_obj.molecular_search_settings.mz_error_range -469 mass_spectrum_obj.molecular_search_settings.max_ppm_error = closest_error + mass_spectrum_obj.molecular_search_settings.mz_error_range +462 self.run_search( +463 self.mass_spectrum_obj, +464 possible_formulas_dict_nm, +465 min_abundance, +466 ion_type, +467 self.mass_spectrum_obj.polarity, +468 adduct_atom=adduct_atom, +469 ) 470 -471 elif mass_spectrum_obj.molecular_search_settings.error_method == 'lowest': -472 -473 if error < last_error: -474 mass_spectrum_obj.molecular_search_settings.min_ppm_error = error - mass_spectrum_obj.molecular_search_settings.mz_error_range -475 mass_spectrum_obj.molecular_search_settings.max_ppm_error = error + mass_spectrum_obj.molecular_search_settings.mz_error_range -476 last_error = error +471 self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = ( +472 initial_min_peak_bool +473 ) +474 self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = ( +475 initial_runtime_kendrick_filter +476 ) 477 -478 -479 elif mass_spectrum_obj.molecular_search_settings.error_method == 'symmetrical': -480 -481 mass_spectrum_obj.molecular_search_settings.min_ppm_error = mass_spectrum_obj.molecular_search_settings.mz_error_average - mass_spectrum_obj.molecular_search_settings.mz_error_range -482 mass_spectrum_obj.molecular_search_settings.max_ppm_error = mass_spectrum_obj.molecular_search_settings.mz_error_average + mass_spectrum_obj.molecular_search_settings.mz_error_range +478 mspeaks = [mspeak for mspeak in self.mass_spectrum_obj if mspeak.is_assigned] +479 +480 self.sql_db.close() +481 +482 return mspeaks 483 -484 elif mass_spectrum_obj.molecular_search_settings.error_method == 'average': -485 -486 nbValues += 1 -487 error_average = error_average + ((error - error_average) / nbValues) -488 mass_spectrum_obj.molecular_search_settings.min_ppm_error = error_average - mass_spectrum_obj.molecular_search_settings.mz_error_range -489 mass_spectrum_obj.molecular_search_settings.max_ppm_error = error_average + mass_spectrum_obj.molecular_search_settings.mz_error_range -490 -491 else: -492 # using set mass_spectrum_obj.molecular_search_settings.min_ppm_error and max_ppm_error range -493 pass -494 -495 #returns the error based on the selected method at mass_spectrum_obj.molecular_search_settings.method -496 -497 @staticmethod -498 def calc_error(mz_exp, mz_calc, method='ppm'): -499 """ Calculate the error. -500 -501 Parameters -502 ---------- -503 mz_exp : float -504 The experimental m/z value. -505 mz_calc : float -506 The calculated m/z value. -507 method : str, optional -508 The method, by default 'ppm'. -509 -510 Raises -511 ------- -512 Exception -513 If the method is not ppm or ppb. -514 -515 Returns -516 ------- -517 float -518 The error. -519 """ -520 -521 if method == 'ppm': -522 multi_factor = 1_000_000 +484 +485class SearchMolecularFormulaWorker: +486 """Class for searching molecular formulas in a mass spectrum. +487 +488 Parameters +489 ---------- +490 find_isotopologues : bool, optional +491 Flag to indicate whether to find isotopologues, by default True. +492 +493 Attributes +494 ---------- +495 find_isotopologues : bool +496 Flag to indicate whether to find isotopologues. +497 +498 Methods +499 ------- +500 * reset_error(). +501 Reset the error variables. +502 * set_last_error(). +503 Set the last error. +504 * find_formulas(). +505 Find the formulas. +506 * calc_error(). +507 Calculate the error. +508 """ +509 +510 # TODO add reset error function +511 # needs this wraper to pass the class to multiprocessing +512 +513 def __init__(self, find_isotopologues=True): +514 self.find_isotopologues = find_isotopologues +515 +516 def __call__(self, args): +517 """Call the find formulas function. +518 +519 Parameters +520 ---------- +521 args : tuple +522 The arguments. 523 -524 elif method == 'ppb': -525 multi_factor = 1_000_000_000 -526 -527 elif method == 'perc': -528 multi_factor = 100 -529 -530 else: -531 raise Exception("method needs to be ppm or ppb, you have entered %s" % method) -532 -533 if mz_exp: -534 -535 return ((mz_exp - mz_calc) / mz_calc) * multi_factor -536 -537 else: -538 -539 raise Exception("Please set mz_calc first") -540 -541 def find_formulas(self, formulas, min_abundance, -542 mass_spectrum_obj, ms_peak, ion_type, ion_charge, adduct_atom=None): -543 -544 """ Find the formulas. -545 -546 Parameters -547 ---------- -548 formulas : list of MolecularFormula -549 The list of molecular formulas. -550 min_abundance : float -551 The minimum abundance threshold. -552 mass_spectrum_obj : MassSpectrum -553 The mass spectrum object. -554 ms_peak : MSPeak -555 The mass spectrum peak. -556 ion_type : str -557 The ion type. -558 ion_charge : int -559 The ion charge. -560 adduct_atom : str, optional -561 The adduct atom, by default None. -562 -563 Returns -564 ------- -565 list of MSPeak -566 The list of mass spectrum peaks with assigned molecular formulas. -567 -568 Notes -569 ----- -570 Uses the closest error the next search (this is not ideal, it needs to use confidence -571 metric to choose the right candidate then propagate the error using the error from the best candidate). -572 It needs to add s/n to the equation. -573 It need optimization to define the mz_error_range within a m/z unit since it is directly proportional -574 with the mass, and inversely proportional to the rp. It's not linear, i.e., sigma mass. -575 The idea it to correlate sigma to resolving power, signal to noise and sample complexity per mz unit. -576 Method='distance' -577 """ -578 mspeak_assigned_index = list() -579 -580 min_ppm_error = mass_spectrum_obj.molecular_search_settings.min_ppm_error -581 max_ppm_error = mass_spectrum_obj.molecular_search_settings.max_ppm_error -582 -583 min_abun_error = mass_spectrum_obj.molecular_search_settings.min_abun_error -584 max_abun_error = mass_spectrum_obj.molecular_search_settings.max_abun_error -585 -586 # f = open("abundance_error.txt", "a+") -587 ms_peak_mz_exp, ms_peak_abundance = ms_peak.mz_exp, ms_peak.abundance -588 # min_error = min([pmf.mz_error for pmf in possible_formulas]) -589 -590 def mass_by_ion_type(possible_formula_obj): -591 -592 if ion_type == Labels.protonated_de_ion: -593 -594 return possible_formula_obj._protonated_mz(ion_charge) -595 -596 elif ion_type == Labels.radical_ion: -597 -598 return possible_formula_obj._radical_mz(ion_charge) -599 -600 elif ion_type == Labels.adduct_ion and adduct_atom: -601 -602 return possible_formula._adduct_mz(ion_charge, adduct_atom) -603 -604 else: -605 # will return externally calculated mz if is set, #use on Bruker Reference list import -606 # if the ion type is known the ion mass based on molecular formula ion type -607 # if ion type is unknow will return neutral mass -608 return possible_formula.mz_calc -609 -610 if formulas: -611 if isinstance(formulas[0], LCMSLibRefMolecularFormula): +524 Returns +525 ------- +526 list +527 The list of mass spectrum peaks with assigned molecular formulas. +528 """ +529 return self.find_formulas(*args) # ,args[1] +530 +531 def reset_error(self, mass_spectrum_obj): +532 """Reset the error variables. +533 +534 Parameters +535 ---------- +536 mass_spectrum_obj : MassSpectrum +537 The mass spectrum object. +538 """ +539 global last_error, last_dif, closest_error, error_average, nbValues +540 last_error, last_dif, closest_error, nbValues = 0.0, 0.0, 0.0, 0.0 +541 +542 error_average = 0 +543 +544 def set_last_error(self, error, mass_spectrum_obj): +545 """Set the last error. +546 +547 Parameters +548 ---------- +549 error : float +550 The error. +551 mass_spectrum_obj : MassSpectrum +552 The mass spectrum object. +553 """ +554 # set the changes to the global variables, not internal ones +555 global last_error, last_dif, closest_error, error_average, nbValues +556 +557 if mass_spectrum_obj.molecular_search_settings.error_method == "distance": +558 dif = error - last_error +559 if dif < last_dif: +560 last_dif = dif +561 closest_error = error +562 mass_spectrum_obj.molecular_search_settings.min_ppm_error = ( +563 closest_error +564 - mass_spectrum_obj.molecular_search_settings.mz_error_range +565 ) +566 mass_spectrum_obj.molecular_search_settings.max_ppm_error = ( +567 closest_error +568 + mass_spectrum_obj.molecular_search_settings.mz_error_range +569 ) +570 +571 elif mass_spectrum_obj.molecular_search_settings.error_method == "lowest": +572 if error < last_error: +573 mass_spectrum_obj.molecular_search_settings.min_ppm_error = ( +574 error - mass_spectrum_obj.molecular_search_settings.mz_error_range +575 ) +576 mass_spectrum_obj.molecular_search_settings.max_ppm_error = ( +577 error + mass_spectrum_obj.molecular_search_settings.mz_error_range +578 ) +579 last_error = error +580 +581 elif mass_spectrum_obj.molecular_search_settings.error_method == "symmetrical": +582 mass_spectrum_obj.molecular_search_settings.min_ppm_error = ( +583 mass_spectrum_obj.molecular_search_settings.mz_error_average +584 - mass_spectrum_obj.molecular_search_settings.mz_error_range +585 ) +586 mass_spectrum_obj.molecular_search_settings.max_ppm_error = ( +587 mass_spectrum_obj.molecular_search_settings.mz_error_average +588 + mass_spectrum_obj.molecular_search_settings.mz_error_range +589 ) +590 +591 elif mass_spectrum_obj.molecular_search_settings.error_method == "average": +592 nbValues += 1 +593 error_average = error_average + ((error - error_average) / nbValues) +594 mass_spectrum_obj.molecular_search_settings.min_ppm_error = ( +595 error_average +596 - mass_spectrum_obj.molecular_search_settings.mz_error_range +597 ) +598 mass_spectrum_obj.molecular_search_settings.max_ppm_error = ( +599 error_average +600 + mass_spectrum_obj.molecular_search_settings.mz_error_range +601 ) +602 +603 else: +604 # using set mass_spectrum_obj.molecular_search_settings.min_ppm_error and max_ppm_error range +605 pass +606 +607 # returns the error based on the selected method at mass_spectrum_obj.molecular_search_settings.method +608 +609 @staticmethod +610 def calc_error(mz_exp, mz_calc, method="ppm"): +611 """Calculate the error. 612 -613 possible_mf_class = True -614 -615 else: -616 -617 possible_mf_class = False -618 -619 for possible_formula in formulas: -620 -621 if possible_formula: -622 -623 error = self.calc_error(ms_peak_mz_exp, mass_by_ion_type(possible_formula)) -624 -625 # error = possible_formula.mz_error +613 Parameters +614 ---------- +615 mz_exp : float +616 The experimental m/z value. +617 mz_calc : float +618 The calculated m/z value. +619 method : str, optional +620 The method, by default 'ppm'. +621 +622 Raises +623 ------- +624 Exception +625 If the method is not ppm or ppb. 626 -627 if min_ppm_error <= error <= max_ppm_error: -628 -629 # update the error -630 -631 self.set_last_error(error, mass_spectrum_obj) +627 Returns +628 ------- +629 float +630 The error. +631 """ 632 -633 # add molecular formula match to ms_peak -634 -635 # get molecular formula dict from sql obj -636 # formula_dict = pickle.loads(possible_formula.mol_formula) -637 #if possible_mf_class: -638 -639 # molecular_formula = deepcopy(possible_formula) -640 -641 #else: -642 -643 formula_dict = possible_formula.to_dict() -644 # create the molecular formula obj to be stored -645 if possible_mf_class: +633 if method == "ppm": +634 multi_factor = 1_000_000 +635 +636 elif method == "ppb": +637 multi_factor = 1_000_000_000 +638 +639 elif method == "perc": +640 multi_factor = 100 +641 +642 else: +643 raise Exception( +644 "method needs to be ppm or ppb, you have entered %s" % method +645 ) 646 -647 molecular_formula = LCMSLibRefMolecularFormula(formula_dict, ion_charge, ion_type=ion_type, adduct_atom=adduct_atom) -648 -649 molecular_formula.name = possible_formula.name -650 molecular_formula.kegg_id = possible_formula.kegg_id -651 molecular_formula.cas = possible_formula.cas +647 if mz_exp: +648 return ((mz_exp - mz_calc) / mz_calc) * multi_factor +649 +650 else: +651 raise Exception("Please set mz_calc first") 652 -653 else: -654 -655 molecular_formula = MolecularFormula(formula_dict, ion_charge, ion_type=ion_type, adduct_atom=adduct_atom) -656 # add the molecular formula obj to the mspeak obj -657 # add the mspeak obj and it's index for tracking next assignment step -658 -659 if self.find_isotopologues: -660 -661 # calculates isotopologues -662 isotopologues = molecular_formula.isotopologues(min_abundance, ms_peak_abundance, mass_spectrum_obj.dynamic_range) -663 -664 # search for isotopologues -665 for isotopologue_formula in isotopologues: -666 -667 molecular_formula.expected_isotopologues.append(isotopologue_formula) -668 # move this outside to improve preformace -669 # we need to increase the search space to -+1 m_z -670 first_index, last_index = mass_spectrum_obj.get_nominal_mz_first_last_indexes(isotopologue_formula.mz_nominal_calc) -671 -672 for ms_peak_iso in mass_spectrum_obj[first_index:last_index]: -673 -674 error = self.calc_error(ms_peak_iso.mz_exp, isotopologue_formula.mz_calc) -675 -676 if min_ppm_error <= error <= max_ppm_error: -677 -678 # need to define error distribution for abundance measurements -679 -680 # if mass_spectrum_obj.is_centroid: +653 def find_formulas( +654 self, +655 formulas, +656 min_abundance, +657 mass_spectrum_obj, +658 ms_peak, +659 ion_type, +660 ion_charge, +661 adduct_atom=None, +662 ): +663 """Find the formulas. +664 +665 Parameters +666 ---------- +667 formulas : list of MolecularFormula +668 The list of molecular formulas. +669 min_abundance : float +670 The minimum abundance threshold. +671 mass_spectrum_obj : MassSpectrum +672 The mass spectrum object. +673 ms_peak : MSPeak +674 The mass spectrum peak. +675 ion_type : str +676 The ion type. +677 ion_charge : int +678 The ion charge. +679 adduct_atom : str, optional +680 The adduct atom, by default None. 681 -682 abundance_error = self.calc_error(isotopologue_formula.abundance_calc, ms_peak_iso.abundance,method='perc') -683 -684 # area_error = self.calc_error(ms_peak.area, ms_peak_iso.area, method='perc') -685 -686 # margin of error was set empirically/ needs statistical calculation -687 # of margin of error for the measurement of the abundances -688 if min_abun_error <= abundance_error <= max_abun_error: -689 -690 # update the error -691 -692 self.set_last_error(error, mass_spectrum_obj) -693 -694 # isotopologue_formula.mz_error = error -695 -696 # isotopologue_formula.area_error = area_error -697 -698 # isotopologue_formula.abundance_error = abundance_error -699 -700 isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index +682 Returns +683 ------- +684 list of MSPeak +685 The list of mass spectrum peaks with assigned molecular formulas. +686 +687 Notes +688 ----- +689 Uses the closest error the next search (this is not ideal, it needs to use confidence +690 metric to choose the right candidate then propagate the error using the error from the best candidate). +691 It needs to add s/n to the equation. +692 It need optimization to define the mz_error_range within a m/z unit since it is directly proportional +693 with the mass, and inversely proportional to the rp. It's not linear, i.e., sigma mass. +694 The idea it to correlate sigma to resolving power, signal to noise and sample complexity per mz unit. +695 Method='distance' +696 """ +697 mspeak_assigned_index = list() +698 +699 min_ppm_error = mass_spectrum_obj.molecular_search_settings.min_ppm_error +700 max_ppm_error = mass_spectrum_obj.molecular_search_settings.max_ppm_error 701 -702 mono_isotopic_formula_index = len(ms_peak) -703 -704 isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index -705 -706 isotopologue_formula.mono_isotopic_formula_index = mono_isotopic_formula_index -707 -708 # add mspeaks isotopologue index to the mono isotopic MolecularFormula obj and the respective formula position -709 -710 # add molecular formula match to ms_peak -711 x = ms_peak_iso.add_molecular_formula(isotopologue_formula) +702 min_abun_error = mass_spectrum_obj.molecular_search_settings.min_abun_error +703 max_abun_error = mass_spectrum_obj.molecular_search_settings.max_abun_error +704 +705 # f = open("abundance_error.txt", "a+") +706 ms_peak_mz_exp, ms_peak_abundance = ms_peak.mz_exp, ms_peak.abundance +707 # min_error = min([pmf.mz_error for pmf in possible_formulas]) +708 +709 def mass_by_ion_type(possible_formula_obj): +710 if ion_type == Labels.protonated_de_ion: +711 return possible_formula_obj._protonated_mz(ion_charge) 712 -713 molecular_formula.mspeak_mf_isotopologues_indexes.append((ms_peak_iso.index, x)) -714 # add mspeaks mono isotopic index to the isotopologue MolecularFormula obj +713 elif ion_type == Labels.radical_ion: +714 return possible_formula_obj._radical_mz(ion_charge) 715 -716 y = ms_peak.add_molecular_formula(molecular_formula) -717 -718 mspeak_assigned_index.append((ms_peak.index, y)) -719 -720 return mspeak_assigned_index -721 -722 -723class SearchMolecularFormulasLC(SearchMolecularFormulas): -724 """ Class for searching molecular formulas in a LC object. -725 -726 Parameters -727 ---------- -728 lcms_obj : LC -729 The LC object. -730 sql_db : MolForm_SQL, optional -731 The SQL database object, by default None. -732 first_hit : bool, optional -733 Flag to indicate whether to skip peaks that already have a molecular formula assigned, by default False. -734 find_isotopologues : bool, optional -735 Flag to indicate whether to find isotopologues, by default True. -736 -737 Methods -738 ------- -739 * run_untargeted_worker_ms1(). -740 Run untargeted molecular formula search on the ms1 mass spectrum. -741 * run_target_worker_ms1(). -742 Run targeted molecular formula search on the ms1 mass spectrum. -743 -744 """ -745 def __init__(self, lcms_obj, sql_db=None, first_hit=False, find_isotopologues=True): +716 elif ion_type == Labels.adduct_ion and adduct_atom: +717 return possible_formula._adduct_mz(ion_charge, adduct_atom) +718 +719 else: +720 # will return externally calculated mz if is set, #use on Bruker Reference list import +721 # if the ion type is known the ion mass based on molecular formula ion type +722 # if ion type is unknow will return neutral mass +723 return possible_formula.mz_calc +724 +725 if formulas: +726 if isinstance(formulas[0], LCMSLibRefMolecularFormula): +727 possible_mf_class = True +728 +729 else: +730 possible_mf_class = False +731 +732 for possible_formula in formulas: +733 if possible_formula: +734 error = self.calc_error( +735 ms_peak_mz_exp, mass_by_ion_type(possible_formula) +736 ) +737 +738 # error = possible_formula.mz_error +739 +740 if min_ppm_error <= error <= max_ppm_error: +741 # update the error +742 +743 self.set_last_error(error, mass_spectrum_obj) +744 +745 # add molecular formula match to ms_peak 746 -747 self.first_hit = first_hit -748 -749 self.find_isotopologues = find_isotopologues +747 # get molecular formula dict from sql obj +748 # formula_dict = pickle.loads(possible_formula.mol_formula) +749 # if possible_mf_class: 750 -751 self.lcms_obj = lcms_obj +751 # molecular_formula = deepcopy(possible_formula) 752 -753 if not sql_db: +753 # else: 754 -755 self.sql_db = MolForm_SQL(url=lcms_obj.ms1_molecular_search_settings.url_database) -756 -757 else: -758 -759 self.sql_db = sql_db -760 -761 def run_untargeted_worker_ms1(self): -762 """ Run untargeted molecular formula search on the ms1 mass spectrum.""" -763 # do molecular formula based on the parameters set for ms1 search -764 for peak in self.lcms_obj: -765 self.mass_spectrum_obj = peak.mass_spectrum -766 self.run_molecular_formula(peak.mass_spectrum.sort_by_abundance()) -767 +755 formula_dict = possible_formula.to_dict() +756 # create the molecular formula obj to be stored +757 if possible_mf_class: +758 molecular_formula = LCMSLibRefMolecularFormula( +759 formula_dict, +760 ion_charge, +761 ion_type=ion_type, +762 adduct_atom=adduct_atom, +763 ) +764 +765 molecular_formula.name = possible_formula.name +766 molecular_formula.kegg_id = possible_formula.kegg_id +767 molecular_formula.cas = possible_formula.cas 768 -769 def run_target_worker_ms1(self): -770 """ Run targeted molecular formula search on the ms1 mass spectrum.""" -771 # do molecular formula based on the external molecular reference list -772 pbar = tqdm.tqdm(self.lcms_obj) -773 -774 for peak in self.lcms_obj: -775 -776 pbar.set_description_str(desc=f"Started molecular formulae search for mass spectrum at RT {peak.retention_time} s" , refresh=True) -777 -778 self.mass_spectrum_obj = peak.mass_spectrum -779 -780 ion_charge = self.mass_spectrum_obj.polarity -781 -782 candidate_formulas = peak.targeted_molecular_formulas -783 -784 for i in candidate_formulas: -785 if self.lcms_obj.parameters.lc_ms.verbose_processing: -786 print(i) -787 if self.mass_spectrum_obj.molecular_search_settings.isProtonated: -788 -789 ion_type = Labels.protonated_de_ion -790 -791 #ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True) -792 -793 self.search_mol_formulas( candidate_formulas, ion_type, find_isotopologues=True) -794 -795 if self.mass_spectrum_obj.molecular_search_settings.isRadical: -796 -797 ion_type = Labels.radical_ion -798 -799 #ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True) -800 self.search_mol_formulas( candidate_formulas, ion_type, find_isotopologues=True) -801 -802 if self.mass_spectrum_obj.molecular_search_settings.isAdduct: -803 -804 ion_type = Labels.adduct_ion -805 -806 adduct_list = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg if ion_charge < 0 else self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos -807 -808 for adduct_atom in adduct_list: +769 else: +770 molecular_formula = MolecularFormula( +771 formula_dict, +772 ion_charge, +773 ion_type=ion_type, +774 adduct_atom=adduct_atom, +775 ) +776 # add the molecular formula obj to the mspeak obj +777 # add the mspeak obj and it's index for tracking next assignment step +778 +779 if self.find_isotopologues: +780 # calculates isotopologues +781 isotopologues = molecular_formula.isotopologues( +782 min_abundance, +783 ms_peak_abundance, +784 mass_spectrum_obj.dynamic_range, +785 ) +786 +787 # search for isotopologues +788 for isotopologue_formula in isotopologues: +789 molecular_formula.expected_isotopologues.append( +790 isotopologue_formula +791 ) +792 # move this outside to improve preformace +793 # we need to increase the search space to -+1 m_z +794 first_index, last_index = ( +795 mass_spectrum_obj.get_nominal_mz_first_last_indexes( +796 isotopologue_formula.mz_nominal_calc +797 ) +798 ) +799 +800 for ms_peak_iso in mass_spectrum_obj[ +801 first_index:last_index +802 ]: +803 error = self.calc_error( +804 ms_peak_iso.mz_exp, isotopologue_formula.mz_calc +805 ) +806 +807 if min_ppm_error <= error <= max_ppm_error: +808 # need to define error distribution for abundance measurements 809 -810 self.search_mol_formulas( candidate_formulas, ion_type, find_isotopologues=True, adduct_atom=adduct_atom) +810 # if mass_spectrum_obj.is_centroid: +811 +812 abundance_error = self.calc_error( +813 isotopologue_formula.abundance_calc, +814 ms_peak_iso.abundance, +815 method="perc", +816 ) +817 +818 # area_error = self.calc_error(ms_peak.area, ms_peak_iso.area, method='perc') +819 +820 # margin of error was set empirically/ needs statistical calculation +821 # of margin of error for the measurement of the abundances +822 if ( +823 min_abun_error +824 <= abundance_error +825 <= max_abun_error +826 ): +827 # update the error +828 +829 self.set_last_error(error, mass_spectrum_obj) +830 +831 # isotopologue_formula.mz_error = error +832 +833 # isotopologue_formula.area_error = area_error +834 +835 # isotopologue_formula.abundance_error = abundance_error +836 +837 isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index +838 +839 mono_isotopic_formula_index = len(ms_peak) +840 +841 isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index +842 +843 isotopologue_formula.mono_isotopic_formula_index = mono_isotopic_formula_index +844 +845 # add mspeaks isotopologue index to the mono isotopic MolecularFormula obj and the respective formula position +846 +847 # add molecular formula match to ms_peak +848 x = ms_peak_iso.add_molecular_formula( +849 isotopologue_formula +850 ) +851 +852 molecular_formula.mspeak_mf_isotopologues_indexes.append( +853 (ms_peak_iso.index, x) +854 ) +855 # add mspeaks mono isotopic index to the isotopologue MolecularFormula obj +856 +857 y = ms_peak.add_molecular_formula(molecular_formula) +858 +859 mspeak_assigned_index.append((ms_peak.index, y)) +860 +861 return mspeak_assigned_index +862 +863 +864class SearchMolecularFormulasLC(SearchMolecularFormulas): +865 """Class for searching molecular formulas in a LC object. +866 +867 Parameters +868 ---------- +869 lcms_obj : LC +870 The LC object. +871 sql_db : MolForm_SQL, optional +872 The SQL database object, by default None. +873 first_hit : bool, optional +874 Flag to indicate whether to skip peaks that already have a molecular formula assigned, by default False. +875 find_isotopologues : bool, optional +876 Flag to indicate whether to find isotopologues, by default True. +877 +878 Methods +879 ------- +880 * run_untargeted_worker_ms1(). +881 Run untargeted molecular formula search on the ms1 mass spectrum. +882 * run_target_worker_ms1(). +883 Run targeted molecular formula search on the ms1 mass spectrum. +884 +885 """ +886 +887 def __init__(self, lcms_obj, sql_db=None, first_hit=False, find_isotopologues=True): +888 self.first_hit = first_hit +889 +890 self.find_isotopologues = find_isotopologues +891 +892 self.lcms_obj = lcms_obj +893 +894 if not sql_db: +895 self.sql_db = MolForm_SQL( +896 url=lcms_obj.ms1_molecular_search_settings.url_database +897 ) +898 +899 else: +900 self.sql_db = sql_db +901 +902 def run_untargeted_worker_ms1(self): +903 """Run untargeted molecular formula search on the ms1 mass spectrum.""" +904 # do molecular formula based on the parameters set for ms1 search +905 for peak in self.lcms_obj: +906 self.mass_spectrum_obj = peak.mass_spectrum +907 self.run_molecular_formula(peak.mass_spectrum.sort_by_abundance()) +908 +909 def run_target_worker_ms1(self): +910 """Run targeted molecular formula search on the ms1 mass spectrum.""" +911 # do molecular formula based on the external molecular reference list +912 pbar = tqdm.tqdm(self.lcms_obj) +913 +914 for peak in self.lcms_obj: +915 pbar.set_description_str( +916 desc=f"Started molecular formulae search for mass spectrum at RT {peak.retention_time} s", +917 refresh=True, +918 ) +919 +920 self.mass_spectrum_obj = peak.mass_spectrum +921 +922 ion_charge = self.mass_spectrum_obj.polarity +923 +924 candidate_formulas = peak.targeted_molecular_formulas +925 +926 for i in candidate_formulas: +927 if self.lcms_obj.parameters.lc_ms.verbose_processing: +928 print(i) +929 if self.mass_spectrum_obj.molecular_search_settings.isProtonated: +930 ion_type = Labels.protonated_de_ion +931 +932 # ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True) +933 +934 self.search_mol_formulas( +935 candidate_formulas, ion_type, find_isotopologues=True +936 ) +937 +938 if self.mass_spectrum_obj.molecular_search_settings.isRadical: +939 ion_type = Labels.radical_ion +940 +941 # ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True) +942 self.search_mol_formulas( +943 candidate_formulas, ion_type, find_isotopologues=True +944 ) +945 +946 if self.mass_spectrum_obj.molecular_search_settings.isAdduct: +947 ion_type = Labels.adduct_ion +948 +949 adduct_list = ( +950 self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg +951 if ion_charge < 0 +952 else self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos +953 ) +954 +955 for adduct_atom in adduct_list: +956 self.search_mol_formulas( +957 candidate_formulas, +958 ion_type, +959 find_isotopologues=True, +960 adduct_atom=adduct_atom, +961 ) @@ -1035,367 +1186,463 @@

    -
     29class SearchMolecularFormulas:
    - 30    """ Class for searching molecular formulas in a mass spectrum.
    - 31
    - 32    Parameters
    - 33    ----------
    - 34    mass_spectrum_obj : MassSpectrum
    - 35        The mass spectrum object.
    - 36    sql_db : MolForm_SQL, optional
    - 37        The SQL database object, by default None.
    - 38    first_hit : bool, optional
    - 39        Flag to indicate whether to skip peaks that already have a molecular formula assigned, by default False.
    - 40    find_isotopologues : bool, optional
    - 41        Flag to indicate whether to find isotopologues, by default True.
    - 42
    - 43    Attributes
    - 44    ----------
    - 45    mass_spectrum_obj : MassSpectrum
    - 46        The mass spectrum object.
    - 47    sql_db : MolForm_SQL
    - 48        The SQL database object.
    - 49    first_hit : bool
    - 50        Flag to indicate whether to skip peaks that already have a molecular formula assigned.
    - 51    find_isotopologues : bool
    - 52        Flag to indicate whether to find isotopologues.
    - 53    
    - 54    
    - 55    Methods
    - 56    -------
    - 57    * run_search().
    - 58        Run the molecular formula search.
    - 59    * run_worker_mass_spectrum().
    - 60        Run the molecular formula search on the mass spectrum object.
    - 61    * run_worker_ms_peaks().
    - 62        Run the molecular formula search on the given list of mass spectrum peaks.
    - 63    * database_to_dict().
    - 64        Convert the database results to a dictionary.
    - 65    * run_molecular_formula().
    - 66        Run the molecular formula search on the given list of mass spectrum peaks.
    - 67    * search_mol_formulas().
    - 68        Search for molecular formulas in the mass spectrum.
    - 69    
    - 70    """
    - 71    
    - 72    def __init__(self, mass_spectrum_obj, sql_db=None, first_hit : bool=False, find_isotopologues : bool=True):
    - 73
    - 74        self.first_hit = first_hit
    - 75
    - 76        self.find_isotopologues = find_isotopologues
    - 77
    - 78        self.mass_spectrum_obj = mass_spectrum_obj
    - 79
    - 80        if not sql_db:
    - 81
    - 82            self.sql_db = MolForm_SQL(url=mass_spectrum_obj.molecular_search_settings.url_database)
    - 83
    - 84        else:
    - 85
    - 86            self.sql_db = sql_db
    +            
     27class SearchMolecularFormulas:
    + 28    """Class for searching molecular formulas in a mass spectrum.
    + 29
    + 30    Parameters
    + 31    ----------
    + 32    mass_spectrum_obj : MassSpectrum
    + 33        The mass spectrum object.
    + 34    sql_db : MolForm_SQL, optional
    + 35        The SQL database object, by default None.
    + 36    first_hit : bool, optional
    + 37        Flag to indicate whether to skip peaks that already have a molecular formula assigned, by default False.
    + 38    find_isotopologues : bool, optional
    + 39        Flag to indicate whether to find isotopologues, by default True.
    + 40
    + 41    Attributes
    + 42    ----------
    + 43    mass_spectrum_obj : MassSpectrum
    + 44        The mass spectrum object.
    + 45    sql_db : MolForm_SQL
    + 46        The SQL database object.
    + 47    first_hit : bool
    + 48        Flag to indicate whether to skip peaks that already have a molecular formula assigned.
    + 49    find_isotopologues : bool
    + 50        Flag to indicate whether to find isotopologues.
    + 51
    + 52
    + 53    Methods
    + 54    -------
    + 55    * run_search().
    + 56        Run the molecular formula search.
    + 57    * run_worker_mass_spectrum().
    + 58        Run the molecular formula search on the mass spectrum object.
    + 59    * run_worker_ms_peaks().
    + 60        Run the molecular formula search on the given list of mass spectrum peaks.
    + 61    * database_to_dict().
    + 62        Convert the database results to a dictionary.
    + 63    * run_molecular_formula().
    + 64        Run the molecular formula search on the given list of mass spectrum peaks.
    + 65    * search_mol_formulas().
    + 66        Search for molecular formulas in the mass spectrum.
    + 67
    + 68    """
    + 69
    + 70    def __init__(
    + 71        self,
    + 72        mass_spectrum_obj,
    + 73        sql_db=None,
    + 74        first_hit: bool = False,
    + 75        find_isotopologues: bool = True,
    + 76    ):
    + 77        self.first_hit = first_hit
    + 78
    + 79        self.find_isotopologues = find_isotopologues
    + 80
    + 81        self.mass_spectrum_obj = mass_spectrum_obj
    + 82
    + 83        if not sql_db:
    + 84            self.sql_db = MolForm_SQL(
    + 85                url=mass_spectrum_obj.molecular_search_settings.url_database
    + 86            )
      87
    - 88    def __enter__(self):
    - 89        """ Open the SQL database connection."""
    - 90        return self
    - 91
    - 92    def __exit__(self, exc_type, exc_val, exc_tb):
    - 93        """ Close the SQL database connection."""
    - 94        self.sql_db.close()
    - 95
    - 96        return False
    - 97
    - 98    def run_search(self, mspeaks : list, query : dict, min_abundance : float, ion_type : str, ion_charge : int, adduct_atom=None):
    - 99        """ Run the molecular formula search.
    + 88        else:
    + 89            self.sql_db = sql_db
    + 90
    + 91    def __enter__(self):
    + 92        """Open the SQL database connection."""
    + 93        return self
    + 94
    + 95    def __exit__(self, exc_type, exc_val, exc_tb):
    + 96        """Close the SQL database connection."""
    + 97        self.sql_db.close()
    + 98
    + 99        return False
     100
    -101        Parameters
    -102        ----------
    -103        mspeaks : list of MSPeak
    -104            The list of mass spectrum peaks.
    -105        query : dict
    -106            The query dictionary containing the possible molecular formulas.
    -107        min_abundance : float
    -108            The minimum abundance threshold.
    -109        ion_type : str
    -110            The ion type.
    -111        ion_charge : int
    -112            The ion charge.
    -113        adduct_atom : str, optional
    -114            The adduct atom, by default None.
    -115        """
    -116
    -117        def get_formulas(nominal_overlay : float=0.1):
    -118            """
    -119            Get the list of formulas based on the nominal overlay.
    -120
    -121            Parameters
    -122            ----------
    -123            nominal_overlay : float, optional
    -124                The nominal overlay, by default 0.1.
    -125
    -126            Returns
    -127            -------
    -128            list
    -129                The list of formulas.
    -130            """
    -131            nominal_mz = ms_peak.nominal_mz_exp
    -132
    -133            defect_mass = ms_peak.mz_exp - nominal_mz
    -134            nominal_masses = [nominal_mz]
    -135
    -136            if (defect_mass) >= 1 - nominal_overlay:
    -137                nominal_masses.append(nominal_mz + 1)
    -138            elif (defect_mass) <= nominal_overlay:
    -139                nominal_masses.append(nominal_mz - 1)
    -140
    -141            list_formulas_candidates = []
    -142
    -143            for nominal_mass in nominal_masses:
    -144                if nominal_mass in query.keys():
    -145                    list_formulas_candidates.extend(query.get(nominal_mass))
    +101    def run_search(
    +102        self,
    +103        mspeaks: list,
    +104        query: dict,
    +105        min_abundance: float,
    +106        ion_type: str,
    +107        ion_charge: int,
    +108        adduct_atom=None,
    +109    ):
    +110        """Run the molecular formula search.
    +111
    +112        Parameters
    +113        ----------
    +114        mspeaks : list of MSPeak
    +115            The list of mass spectrum peaks.
    +116        query : dict
    +117            The query dictionary containing the possible molecular formulas.
    +118        min_abundance : float
    +119            The minimum abundance threshold.
    +120        ion_type : str
    +121            The ion type.
    +122        ion_charge : int
    +123            The ion charge.
    +124        adduct_atom : str, optional
    +125            The adduct atom, by default None.
    +126        """
    +127
    +128        def get_formulas(nominal_overlay: float = 0.1):
    +129            """
    +130            Get the list of formulas based on the nominal overlay.
    +131
    +132            Parameters
    +133            ----------
    +134            nominal_overlay : float, optional
    +135                The nominal overlay, by default 0.1.
    +136
    +137            Returns
    +138            -------
    +139            list
    +140                The list of formulas.
    +141            """
    +142            nominal_mz = ms_peak.nominal_mz_exp
    +143
    +144            defect_mass = ms_peak.mz_exp - nominal_mz
    +145            nominal_masses = [nominal_mz]
     146
    -147            return list_formulas_candidates
    -148
    -149        all_assigned_indexes = list()
    -150
    -151        # molecular_search_settings = self.mass_spectrum_obj.molecular_search_settings
    -152
    -153        search_molfrom = SearchMolecularFormulaWorker(find_isotopologues=self.find_isotopologues)
    -154
    -155        for ms_peak in mspeaks:
    -156
    -157            # already assigned a molecular formula
    -158            if self.first_hit:
    +147            if (defect_mass) >= 1 - nominal_overlay:
    +148                nominal_masses.append(nominal_mz + 1)
    +149            elif (defect_mass) <= nominal_overlay:
    +150                nominal_masses.append(nominal_mz - 1)
    +151
    +152            list_formulas_candidates = []
    +153
    +154            for nominal_mass in nominal_masses:
    +155                if nominal_mass in query.keys():
    +156                    list_formulas_candidates.extend(query.get(nominal_mass))
    +157
    +158            return list_formulas_candidates
     159
    -160                if ms_peak.is_assigned:
    -161                    continue
    -162
    -163            ms_peak_indexes = search_molfrom.find_formulas(get_formulas(), min_abundance, self.mass_spectrum_obj, ms_peak, ion_type, ion_charge, adduct_atom)    
    -164
    -165            all_assigned_indexes.extend(ms_peak_indexes)
    -166
    -167        # all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(all_assigned_indexes, self.mass_spectrum_obj)
    -168
    -169        # all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(all_assigned_indexes, self.mass_spectrum_obj)
    -170
    -171        # MolecularFormulaSearchFilters().check_min_peaks(all_assigned_indexes, self.mass_spectrum_obj)
    -172        # filter per min peaks per mono isotopic class
    +160        all_assigned_indexes = list()
    +161
    +162        # molecular_search_settings = self.mass_spectrum_obj.molecular_search_settings
    +163
    +164        search_molfrom = SearchMolecularFormulaWorker(
    +165            find_isotopologues=self.find_isotopologues
    +166        )
    +167
    +168        for ms_peak in mspeaks:
    +169            # already assigned a molecular formula
    +170            if self.first_hit:
    +171                if ms_peak.is_assigned:
    +172                    continue
     173
    -174    def run_worker_mass_spectrum(self):
    -175        """ Run the molecular formula search on the mass spectrum object.
    -176        """
    -177        self.run_molecular_formula(self.mass_spectrum_obj.sort_by_abundance())    
    -178
    -179    def run_worker_ms_peaks(self, ms_peaks):
    -180        """ Run the molecular formula search on the given list of mass spectrum peaks.
    -181
    -182        Parameters
    -183        ----------
    -184        ms_peaks : list of MSPeak
    -185            The list of mass spectrum peaks.
    -186        """
    -187        self.run_molecular_formula(ms_peaks)           
    -188
    -189    @staticmethod
    -190    def database_to_dict(classe_str_list, nominal_mzs, mf_search_settings, ion_charge):
    -191        """ Convert the database results to a dictionary.
    +174            ms_peak_indexes = search_molfrom.find_formulas(
    +175                get_formulas(),
    +176                min_abundance,
    +177                self.mass_spectrum_obj,
    +178                ms_peak,
    +179                ion_type,
    +180                ion_charge,
    +181                adduct_atom,
    +182            )
    +183
    +184            all_assigned_indexes.extend(ms_peak_indexes)
    +185
    +186        # all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(all_assigned_indexes, self.mass_spectrum_obj)
    +187
    +188        # all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(all_assigned_indexes, self.mass_spectrum_obj)
    +189
    +190        # MolecularFormulaSearchFilters().check_min_peaks(all_assigned_indexes, self.mass_spectrum_obj)
    +191        # filter per min peaks per mono isotopic class
     192
    -193        Parameters
    -194        ----------
    -195        classe_str_list : list
    -196            The list of class strings.
    -197        nominal_mzs : list
    -198            The list of nominal m/z values.
    -199        mf_search_settings : MolecularFormulaSearchSettings
    -200            The molecular formula search settings.
    -201        ion_charge : int
    -202            The ion charge.
    -203
    -204        Returns
    -205        -------
    -206        dict
    -207            The dictionary containing the database results.
    -208        """
    -209        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    +193    def run_worker_mass_spectrum(self):
    +194        """Run the molecular formula search on the mass spectrum object."""
    +195        self.run_molecular_formula(self.mass_spectrum_obj.sort_by_abundance())
    +196
    +197    def run_worker_ms_peaks(self, ms_peaks):
    +198        """Run the molecular formula search on the given list of mass spectrum peaks.
    +199
    +200        Parameters
    +201        ----------
    +202        ms_peaks : list of MSPeak
    +203            The list of mass spectrum peaks.
    +204        """
    +205        self.run_molecular_formula(ms_peaks)
    +206
    +207    @staticmethod
    +208    def database_to_dict(classe_str_list, nominal_mzs, mf_search_settings, ion_charge):
    +209        """Convert the database results to a dictionary.
     210
    -211        dict_res = {}
    -212
    -213        if mf_search_settings.isProtonated:
    -214            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.protonated_de_ion, nominal_mzs, ion_charge, mf_search_settings)    
    -215
    -216        if mf_search_settings.isRadical:
    -217            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.radical_ion, nominal_mzs, ion_charge,  mf_search_settings)    
    -218
    -219        if mf_search_settings.isAdduct:
    -220
    -221            adduct_list = mf_search_settings.adduct_atoms_neg if ion_charge < 0 else mf_search_settings.adduct_atoms_pos
    -222            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.adduct_ion, nominal_mzs, ion_charge, mf_search_settings, adducts=adduct_list)    
    -223
    -224        return dict_res
    -225
    -226    @timeit       
    -227    def run_molecular_formula(self, ms_peaks):
    -228        """ Run the molecular formula search on the given list of mass spectrum peaks.
    -229
    -230        Parameters
    -231        ----------
    -232        ms_peaks : list of MSPeak
    -233            The list of mass spectrum peaks.
    -234        """
    -235        # number_of_process = multiprocessing.cpu_count()
    -236
    -237        #loading this on a shared memory would be better than having to serialize it for every process
    -238        #    waiting for python 3.8 release
    +211        Parameters
    +212        ----------
    +213        classe_str_list : list
    +214            The list of class strings.
    +215        nominal_mzs : list
    +216            The list of nominal m/z values.
    +217        mf_search_settings : MolecularFormulaSearchSettings
    +218            The molecular formula search settings.
    +219        ion_charge : int
    +220            The ion charge.
    +221
    +222        Returns
    +223        -------
    +224        dict
    +225            The dictionary containing the database results.
    +226        """
    +227        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    +228
    +229        dict_res = {}
    +230
    +231        if mf_search_settings.isProtonated:
    +232            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(
    +233                classe_str_list,
    +234                Labels.protonated_de_ion,
    +235                nominal_mzs,
    +236                ion_charge,
    +237                mf_search_settings,
    +238            )
     239
    -240        # ion charge for all the ion in the mass spectrum
    -241        # under the current structure is possible to search for individual m/z but it takes longer than allow all the m/z to be search against
    -242        ion_charge = self.mass_spectrum_obj.polarity
    -243
    -244        # use to limit the calculation of possible isotopologues
    -245        min_abundance = self.mass_spectrum_obj.min_abundance
    -246
    -247        # only query the database for formulas with the nominal m/z matching the mass spectrum data
    -248        # default m/z overlay is m/z 0.3 unit
    -249        # needs to improve to bin by mass defect instead, faster db creation and faster search execution time 
    -250        nominal_mzs = self.mass_spectrum_obj.nominal_mz
    -251
    -252        # reset average error, only relevant is average mass error method is being used
    -253        SearchMolecularFormulaWorker(find_isotopologues=self.find_isotopologues).reset_error(self.mass_spectrum_obj)
    -254
    -255        # check database for all possible molecular formula combinations based on the setting passed to self.mass_spectrum_obj.molecular_search_settings
    -256        classes = MolecularCombinations(self.sql_db).runworker(self.mass_spectrum_obj.molecular_search_settings)
    -257
    -258        # split the database load to not blowout the memory
    -259        # TODO add to the settings
    -260
    -261        def run():
    -262
    -263            for classe_chunk in chunks(classes, self.mass_spectrum_obj.molecular_search_settings.db_chunk_size): 
    -264
    -265                classes_str_list = [class_tuple[0] for class_tuple in classe_chunk]
    -266
    -267                # load the molecular formula objs binned by ion type and heteroatoms classes, {ion type:{classe:[list_formula]}}
    -268                # for adduct ion type a third key is added {atoms:{ion type:{classe:[list_formula]}}} 
    -269                dict_res = self.database_to_dict(classes_str_list, nominal_mzs, self.mass_spectrum_obj.molecular_search_settings, ion_charge)
    -270
    -271                pbar = tqdm.tqdm(classe_chunk)
    -272
    -273                for classe_tuple in pbar:
    -274
    -275                    # class string is a json serialized dict
    -276                    classe_str = classe_tuple[0]
    -277                    classe_dict = classe_tuple[1]
    -278
    -279                    if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    -280
    -281                        ion_type = Labels.protonated_de_ion
    -282
    -283                        pbar.set_description_str(desc="Started molecular formula search for class %s, (de)protonated " % classe_str, refresh=True)
    -284
    -285                        candidate_formulas = dict_res.get(ion_type).get(classe_str)
    +240        if mf_search_settings.isRadical:
    +241            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(
    +242                classe_str_list,
    +243                Labels.radical_ion,
    +244                nominal_mzs,
    +245                ion_charge,
    +246                mf_search_settings,
    +247            )
    +248
    +249        if mf_search_settings.isAdduct:
    +250            adduct_list = (
    +251                mf_search_settings.adduct_atoms_neg
    +252                if ion_charge < 0
    +253                else mf_search_settings.adduct_atoms_pos
    +254            )
    +255            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(
    +256                classe_str_list,
    +257                Labels.adduct_ion,
    +258                nominal_mzs,
    +259                ion_charge,
    +260                mf_search_settings,
    +261                adducts=adduct_list,
    +262            )
    +263
    +264        return dict_res
    +265
    +266    @timeit
    +267    def run_molecular_formula(self, ms_peaks):
    +268        """Run the molecular formula search on the given list of mass spectrum peaks.
    +269
    +270        Parameters
    +271        ----------
    +272        ms_peaks : list of MSPeak
    +273            The list of mass spectrum peaks.
    +274        """
    +275        # number_of_process = multiprocessing.cpu_count()
    +276
    +277        # loading this on a shared memory would be better than having to serialize it for every process
    +278        #    waiting for python 3.8 release
    +279
    +280        # ion charge for all the ion in the mass spectrum
    +281        # under the current structure is possible to search for individual m/z but it takes longer than allow all the m/z to be search against
    +282        ion_charge = self.mass_spectrum_obj.polarity
    +283
    +284        # use to limit the calculation of possible isotopologues
    +285        min_abundance = self.mass_spectrum_obj.min_abundance
     286
    -287                        if candidate_formulas:
    -288
    -289                            self.run_search(ms_peaks, candidate_formulas,
    -290                                            min_abundance, ion_type, ion_charge)
    +287        # only query the database for formulas with the nominal m/z matching the mass spectrum data
    +288        # default m/z overlay is m/z 0.3 unit
    +289        # needs to improve to bin by mass defect instead, faster db creation and faster search execution time
    +290        nominal_mzs = self.mass_spectrum_obj.nominal_mz
     291
    -292                    if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    -293
    -294                        pbar.set_description_str(desc="Started molecular formula search for class %s, radical " % classe_str, refresh=True)
    -295
    -296                        ion_type = Labels.radical_ion
    -297
    -298                        candidate_formulas = dict_res.get(ion_type).get(classe_str)
    -299
    -300                        if candidate_formulas:
    +292        # reset average error, only relevant is average mass error method is being used
    +293        SearchMolecularFormulaWorker(
    +294            find_isotopologues=self.find_isotopologues
    +295        ).reset_error(self.mass_spectrum_obj)
    +296
    +297        # check database for all possible molecular formula combinations based on the setting passed to self.mass_spectrum_obj.molecular_search_settings
    +298        classes = MolecularCombinations(self.sql_db).runworker(
    +299            self.mass_spectrum_obj.molecular_search_settings
    +300        )
     301
    -302                            self.run_search(ms_peaks, candidate_formulas,
    -303                                            min_abundance, ion_type, ion_charge)
    -304                    # looks for adduct, used_atom_valences should be 0 
    -305                    # this code does not support H exchance by halogen atoms
    -306                    if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    -307
    -308                        pbar.set_description_str(desc="Started molecular formula search for class %s, adduct " % classe_str, refresh=True)
    -309
    -310                        ion_type = Labels.adduct_ion
    -311                        dict_atoms_formulas =  dict_res.get(ion_type)
    -312
    -313                        for adduct_atom, dict_by_class in dict_atoms_formulas.items():
    -314
    -315                            candidate_formulas = dict_by_class.get(classe_str)
    -316
    -317                            if candidate_formulas:
    -318                                self.run_search(ms_peaks, candidate_formulas,
    -319                                                min_abundance, ion_type, ion_charge, adduct_atom=adduct_atom)
    -320
    -321        run()
    -322        self.sql_db.close()
    -323
    -324    def search_mol_formulas(self, possible_formulas_list: List[MolecularFormula], ion_type:str, 
    -325                            neutral_molform=True, find_isotopologues=True, adduct_atom=None) -> List[_MSPeak]:
    -326        """ Search for molecular formulas in the mass spectrum.
    -327
    -328        Parameters
    -329        ----------
    -330        possible_formulas_list : list of MolecularFormula
    -331            The list of possible molecular formulas.
    -332        ion_type : str
    -333            The ion type.
    -334        neutral_molform : bool, optional
    -335            Flag to indicate whether the molecular formulas are neutral, by default True.
    -336        find_isotopologues : bool, optional
    -337            Flag to indicate whether to find isotopologues, by default True.
    -338        adduct_atom : str, optional
    -339            The adduct atom, by default None.
    -340
    -341        Returns
    -342        -------
    -343        list of MSPeak
    -344            The list of mass spectrum peaks with assigned molecular formulas.
    -345        """
    -346        #neutral_molform: some reference files already present the formula on ion mode, for instance, bruker reference files
    -347        #    if that is the case than turn neutral_molform off
    -348        
    -349        SearchMolecularFormulaWorker(find_isotopologues=find_isotopologues).reset_error(self.mass_spectrum_obj)
    -350
    -351        initial_min_peak_bool = self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter
    -352        initial_runtime_kendrick_filter = self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter
    +302        # split the database load to not blowout the memory
    +303        # TODO add to the settings
    +304
    +305        def run():
    +306            for classe_chunk in chunks(
    +307                classes, self.mass_spectrum_obj.molecular_search_settings.db_chunk_size
    +308            ):
    +309                classes_str_list = [class_tuple[0] for class_tuple in classe_chunk]
    +310
    +311                # load the molecular formula objs binned by ion type and heteroatoms classes, {ion type:{classe:[list_formula]}}
    +312                # for adduct ion type a third key is added {atoms:{ion type:{classe:[list_formula]}}}
    +313                dict_res = self.database_to_dict(
    +314                    classes_str_list,
    +315                    nominal_mzs,
    +316                    self.mass_spectrum_obj.molecular_search_settings,
    +317                    ion_charge,
    +318                )
    +319
    +320                pbar = tqdm.tqdm(classe_chunk)
    +321
    +322                for classe_tuple in pbar:
    +323                    # class string is a json serialized dict
    +324                    classe_str = classe_tuple[0]
    +325                    classe_dict = classe_tuple[1]
    +326
    +327                    if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    +328                        ion_type = Labels.protonated_de_ion
    +329
    +330                        pbar.set_description_str(
    +331                            desc="Started molecular formula search for class %s, (de)protonated "
    +332                            % classe_str,
    +333                            refresh=True,
    +334                        )
    +335
    +336                        candidate_formulas = dict_res.get(ion_type).get(classe_str)
    +337
    +338                        if candidate_formulas:
    +339                            self.run_search(
    +340                                ms_peaks,
    +341                                candidate_formulas,
    +342                                min_abundance,
    +343                                ion_type,
    +344                                ion_charge,
    +345                            )
    +346
    +347                    if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    +348                        pbar.set_description_str(
    +349                            desc="Started molecular formula search for class %s, radical "
    +350                            % classe_str,
    +351                            refresh=True,
    +352                        )
     353
    -354        # Are the following 3 lines redundant? 
    -355        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
    -356        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False #TODO check this line
    -357        self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = False
    -358
    -359        possible_formulas_dict_nm = {}
    -360
    -361        for mf in possible_formulas_list:
    -362
    -363            if neutral_molform:
    -364                nm = int(mf.protonated_mz)
    -365            else:
    -366                nm = int(mf.mz_nominal_calc)
    -367        
    -368            if nm in possible_formulas_dict_nm.keys():
    -369
    -370                possible_formulas_dict_nm[nm].append(mf)
    -371
    -372            else:
    -373                
    -374                possible_formulas_dict_nm[nm] = [mf]
    -375
    -376        min_abundance = self.mass_spectrum_obj.min_abundance
    -377        
    -378        ion_type = ion_type
    -379        
    -380        self.run_search(self.mass_spectrum_obj, possible_formulas_dict_nm, min_abundance, ion_type, self.mass_spectrum_obj.polarity, adduct_atom=adduct_atom)          
    -381
    -382        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = initial_min_peak_bool
    -383        self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = initial_runtime_kendrick_filter
    -384
    -385        mspeaks = [mspeak for mspeak in self.mass_spectrum_obj if mspeak.is_assigned]
    -386
    -387        self.sql_db.close()
    -388
    -389        return mspeaks
    +354                        ion_type = Labels.radical_ion
    +355
    +356                        candidate_formulas = dict_res.get(ion_type).get(classe_str)
    +357
    +358                        if candidate_formulas:
    +359                            self.run_search(
    +360                                ms_peaks,
    +361                                candidate_formulas,
    +362                                min_abundance,
    +363                                ion_type,
    +364                                ion_charge,
    +365                            )
    +366                    # looks for adduct, used_atom_valences should be 0
    +367                    # this code does not support H exchance by halogen atoms
    +368                    if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    +369                        pbar.set_description_str(
    +370                            desc="Started molecular formula search for class %s, adduct "
    +371                            % classe_str,
    +372                            refresh=True,
    +373                        )
    +374
    +375                        ion_type = Labels.adduct_ion
    +376                        dict_atoms_formulas = dict_res.get(ion_type)
    +377
    +378                        for adduct_atom, dict_by_class in dict_atoms_formulas.items():
    +379                            candidate_formulas = dict_by_class.get(classe_str)
    +380
    +381                            if candidate_formulas:
    +382                                self.run_search(
    +383                                    ms_peaks,
    +384                                    candidate_formulas,
    +385                                    min_abundance,
    +386                                    ion_type,
    +387                                    ion_charge,
    +388                                    adduct_atom=adduct_atom,
    +389                                )
    +390
    +391        run()
    +392        self.sql_db.close()
    +393
    +394    def search_mol_formulas(
    +395        self,
    +396        possible_formulas_list: List[MolecularFormula],
    +397        ion_type: str,
    +398        neutral_molform=True,
    +399        find_isotopologues=True,
    +400        adduct_atom=None,
    +401    ) -> List[_MSPeak]:
    +402        """Search for molecular formulas in the mass spectrum.
    +403
    +404        Parameters
    +405        ----------
    +406        possible_formulas_list : list of MolecularFormula
    +407            The list of possible molecular formulas.
    +408        ion_type : str
    +409            The ion type.
    +410        neutral_molform : bool, optional
    +411            Flag to indicate whether the molecular formulas are neutral, by default True.
    +412        find_isotopologues : bool, optional
    +413            Flag to indicate whether to find isotopologues, by default True.
    +414        adduct_atom : str, optional
    +415            The adduct atom, by default None.
    +416
    +417        Returns
    +418        -------
    +419        list of MSPeak
    +420            The list of mass spectrum peaks with assigned molecular formulas.
    +421        """
    +422        # neutral_molform: some reference files already present the formula on ion mode, for instance, bruker reference files
    +423        #    if that is the case than turn neutral_molform off
    +424
    +425        SearchMolecularFormulaWorker(find_isotopologues=find_isotopologues).reset_error(
    +426            self.mass_spectrum_obj
    +427        )
    +428
    +429        initial_min_peak_bool = (
    +430            self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter
    +431        )
    +432        initial_runtime_kendrick_filter = (
    +433            self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter
    +434        )
    +435
    +436        # Are the following 3 lines redundant?
    +437        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
    +438        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = (
    +439            False  # TODO check this line
    +440        )
    +441        self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = (
    +442            False
    +443        )
    +444
    +445        possible_formulas_dict_nm = {}
    +446
    +447        for mf in possible_formulas_list:
    +448            if neutral_molform:
    +449                nm = int(mf.protonated_mz)
    +450            else:
    +451                nm = int(mf.mz_nominal_calc)
    +452
    +453            if nm in possible_formulas_dict_nm.keys():
    +454                possible_formulas_dict_nm[nm].append(mf)
    +455
    +456            else:
    +457                possible_formulas_dict_nm[nm] = [mf]
    +458
    +459        min_abundance = self.mass_spectrum_obj.min_abundance
    +460
    +461        ion_type = ion_type
    +462
    +463        self.run_search(
    +464            self.mass_spectrum_obj,
    +465            possible_formulas_dict_nm,
    +466            min_abundance,
    +467            ion_type,
    +468            self.mass_spectrum_obj.polarity,
    +469            adduct_atom=adduct_atom,
    +470        )
    +471
    +472        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = (
    +473            initial_min_peak_bool
    +474        )
    +475        self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = (
    +476            initial_runtime_kendrick_filter
    +477        )
    +478
    +479        mspeaks = [mspeak for mspeak in self.mass_spectrum_obj if mspeak.is_assigned]
    +480
    +481        self.sql_db.close()
    +482
    +483        return mspeaks
     
    @@ -1456,21 +1703,26 @@
    Methods
    -
    72    def __init__(self, mass_spectrum_obj, sql_db=None, first_hit : bool=False, find_isotopologues : bool=True):
    -73
    -74        self.first_hit = first_hit
    -75
    -76        self.find_isotopologues = find_isotopologues
    -77
    -78        self.mass_spectrum_obj = mass_spectrum_obj
    -79
    -80        if not sql_db:
    -81
    -82            self.sql_db = MolForm_SQL(url=mass_spectrum_obj.molecular_search_settings.url_database)
    -83
    -84        else:
    -85
    -86            self.sql_db = sql_db
    +            
    70    def __init__(
    +71        self,
    +72        mass_spectrum_obj,
    +73        sql_db=None,
    +74        first_hit: bool = False,
    +75        find_isotopologues: bool = True,
    +76    ):
    +77        self.first_hit = first_hit
    +78
    +79        self.find_isotopologues = find_isotopologues
    +80
    +81        self.mass_spectrum_obj = mass_spectrum_obj
    +82
    +83        if not sql_db:
    +84            self.sql_db = MolForm_SQL(
    +85                url=mass_spectrum_obj.molecular_search_settings.url_database
    +86            )
    +87
    +88        else:
    +89            self.sql_db = sql_db
     
    @@ -1521,81 +1773,97 @@
    Methods
    -
     98    def run_search(self, mspeaks : list, query : dict, min_abundance : float, ion_type : str, ion_charge : int, adduct_atom=None):
    - 99        """ Run the molecular formula search.
    -100
    -101        Parameters
    -102        ----------
    -103        mspeaks : list of MSPeak
    -104            The list of mass spectrum peaks.
    -105        query : dict
    -106            The query dictionary containing the possible molecular formulas.
    -107        min_abundance : float
    -108            The minimum abundance threshold.
    -109        ion_type : str
    -110            The ion type.
    -111        ion_charge : int
    -112            The ion charge.
    -113        adduct_atom : str, optional
    -114            The adduct atom, by default None.
    -115        """
    -116
    -117        def get_formulas(nominal_overlay : float=0.1):
    -118            """
    -119            Get the list of formulas based on the nominal overlay.
    -120
    -121            Parameters
    -122            ----------
    -123            nominal_overlay : float, optional
    -124                The nominal overlay, by default 0.1.
    -125
    -126            Returns
    -127            -------
    -128            list
    -129                The list of formulas.
    -130            """
    -131            nominal_mz = ms_peak.nominal_mz_exp
    -132
    -133            defect_mass = ms_peak.mz_exp - nominal_mz
    -134            nominal_masses = [nominal_mz]
    -135
    -136            if (defect_mass) >= 1 - nominal_overlay:
    -137                nominal_masses.append(nominal_mz + 1)
    -138            elif (defect_mass) <= nominal_overlay:
    -139                nominal_masses.append(nominal_mz - 1)
    -140
    -141            list_formulas_candidates = []
    -142
    -143            for nominal_mass in nominal_masses:
    -144                if nominal_mass in query.keys():
    -145                    list_formulas_candidates.extend(query.get(nominal_mass))
    +            
    101    def run_search(
    +102        self,
    +103        mspeaks: list,
    +104        query: dict,
    +105        min_abundance: float,
    +106        ion_type: str,
    +107        ion_charge: int,
    +108        adduct_atom=None,
    +109    ):
    +110        """Run the molecular formula search.
    +111
    +112        Parameters
    +113        ----------
    +114        mspeaks : list of MSPeak
    +115            The list of mass spectrum peaks.
    +116        query : dict
    +117            The query dictionary containing the possible molecular formulas.
    +118        min_abundance : float
    +119            The minimum abundance threshold.
    +120        ion_type : str
    +121            The ion type.
    +122        ion_charge : int
    +123            The ion charge.
    +124        adduct_atom : str, optional
    +125            The adduct atom, by default None.
    +126        """
    +127
    +128        def get_formulas(nominal_overlay: float = 0.1):
    +129            """
    +130            Get the list of formulas based on the nominal overlay.
    +131
    +132            Parameters
    +133            ----------
    +134            nominal_overlay : float, optional
    +135                The nominal overlay, by default 0.1.
    +136
    +137            Returns
    +138            -------
    +139            list
    +140                The list of formulas.
    +141            """
    +142            nominal_mz = ms_peak.nominal_mz_exp
    +143
    +144            defect_mass = ms_peak.mz_exp - nominal_mz
    +145            nominal_masses = [nominal_mz]
     146
    -147            return list_formulas_candidates
    -148
    -149        all_assigned_indexes = list()
    -150
    -151        # molecular_search_settings = self.mass_spectrum_obj.molecular_search_settings
    -152
    -153        search_molfrom = SearchMolecularFormulaWorker(find_isotopologues=self.find_isotopologues)
    -154
    -155        for ms_peak in mspeaks:
    -156
    -157            # already assigned a molecular formula
    -158            if self.first_hit:
    +147            if (defect_mass) >= 1 - nominal_overlay:
    +148                nominal_masses.append(nominal_mz + 1)
    +149            elif (defect_mass) <= nominal_overlay:
    +150                nominal_masses.append(nominal_mz - 1)
    +151
    +152            list_formulas_candidates = []
    +153
    +154            for nominal_mass in nominal_masses:
    +155                if nominal_mass in query.keys():
    +156                    list_formulas_candidates.extend(query.get(nominal_mass))
    +157
    +158            return list_formulas_candidates
     159
    -160                if ms_peak.is_assigned:
    -161                    continue
    -162
    -163            ms_peak_indexes = search_molfrom.find_formulas(get_formulas(), min_abundance, self.mass_spectrum_obj, ms_peak, ion_type, ion_charge, adduct_atom)    
    -164
    -165            all_assigned_indexes.extend(ms_peak_indexes)
    -166
    -167        # all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(all_assigned_indexes, self.mass_spectrum_obj)
    -168
    -169        # all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(all_assigned_indexes, self.mass_spectrum_obj)
    -170
    -171        # MolecularFormulaSearchFilters().check_min_peaks(all_assigned_indexes, self.mass_spectrum_obj)
    -172        # filter per min peaks per mono isotopic class
    +160        all_assigned_indexes = list()
    +161
    +162        # molecular_search_settings = self.mass_spectrum_obj.molecular_search_settings
    +163
    +164        search_molfrom = SearchMolecularFormulaWorker(
    +165            find_isotopologues=self.find_isotopologues
    +166        )
    +167
    +168        for ms_peak in mspeaks:
    +169            # already assigned a molecular formula
    +170            if self.first_hit:
    +171                if ms_peak.is_assigned:
    +172                    continue
    +173
    +174            ms_peak_indexes = search_molfrom.find_formulas(
    +175                get_formulas(),
    +176                min_abundance,
    +177                self.mass_spectrum_obj,
    +178                ms_peak,
    +179                ion_type,
    +180                ion_charge,
    +181                adduct_atom,
    +182            )
    +183
    +184            all_assigned_indexes.extend(ms_peak_indexes)
    +185
    +186        # all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(all_assigned_indexes, self.mass_spectrum_obj)
    +187
    +188        # all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(all_assigned_indexes, self.mass_spectrum_obj)
    +189
    +190        # MolecularFormulaSearchFilters().check_min_peaks(all_assigned_indexes, self.mass_spectrum_obj)
    +191        # filter per min peaks per mono isotopic class
     
    @@ -1632,10 +1900,9 @@
    Parameters
    -
    174    def run_worker_mass_spectrum(self):
    -175        """ Run the molecular formula search on the mass spectrum object.
    -176        """
    -177        self.run_molecular_formula(self.mass_spectrum_obj.sort_by_abundance())    
    +            
    193    def run_worker_mass_spectrum(self):
    +194        """Run the molecular formula search on the mass spectrum object."""
    +195        self.run_molecular_formula(self.mass_spectrum_obj.sort_by_abundance())
     
    @@ -1655,15 +1922,15 @@
    Parameters
    -
    179    def run_worker_ms_peaks(self, ms_peaks):
    -180        """ Run the molecular formula search on the given list of mass spectrum peaks.
    -181
    -182        Parameters
    -183        ----------
    -184        ms_peaks : list of MSPeak
    -185            The list of mass spectrum peaks.
    -186        """
    -187        self.run_molecular_formula(ms_peaks)           
    +            
    197    def run_worker_ms_peaks(self, ms_peaks):
    +198        """Run the molecular formula search on the given list of mass spectrum peaks.
    +199
    +200        Parameters
    +201        ----------
    +202        ms_peaks : list of MSPeak
    +203            The list of mass spectrum peaks.
    +204        """
    +205        self.run_molecular_formula(ms_peaks)
     
    @@ -1691,42 +1958,64 @@
    Parameters
    -
    189    @staticmethod
    -190    def database_to_dict(classe_str_list, nominal_mzs, mf_search_settings, ion_charge):
    -191        """ Convert the database results to a dictionary.
    -192
    -193        Parameters
    -194        ----------
    -195        classe_str_list : list
    -196            The list of class strings.
    -197        nominal_mzs : list
    -198            The list of nominal m/z values.
    -199        mf_search_settings : MolecularFormulaSearchSettings
    -200            The molecular formula search settings.
    -201        ion_charge : int
    -202            The ion charge.
    -203
    -204        Returns
    -205        -------
    -206        dict
    -207            The dictionary containing the database results.
    -208        """
    -209        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    +            
    207    @staticmethod
    +208    def database_to_dict(classe_str_list, nominal_mzs, mf_search_settings, ion_charge):
    +209        """Convert the database results to a dictionary.
     210
    -211        dict_res = {}
    -212
    -213        if mf_search_settings.isProtonated:
    -214            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.protonated_de_ion, nominal_mzs, ion_charge, mf_search_settings)    
    -215
    -216        if mf_search_settings.isRadical:
    -217            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.radical_ion, nominal_mzs, ion_charge,  mf_search_settings)    
    -218
    -219        if mf_search_settings.isAdduct:
    -220
    -221            adduct_list = mf_search_settings.adduct_atoms_neg if ion_charge < 0 else mf_search_settings.adduct_atoms_pos
    -222            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.adduct_ion, nominal_mzs, ion_charge, mf_search_settings, adducts=adduct_list)    
    -223
    -224        return dict_res
    +211        Parameters
    +212        ----------
    +213        classe_str_list : list
    +214            The list of class strings.
    +215        nominal_mzs : list
    +216            The list of nominal m/z values.
    +217        mf_search_settings : MolecularFormulaSearchSettings
    +218            The molecular formula search settings.
    +219        ion_charge : int
    +220            The ion charge.
    +221
    +222        Returns
    +223        -------
    +224        dict
    +225            The dictionary containing the database results.
    +226        """
    +227        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    +228
    +229        dict_res = {}
    +230
    +231        if mf_search_settings.isProtonated:
    +232            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(
    +233                classe_str_list,
    +234                Labels.protonated_de_ion,
    +235                nominal_mzs,
    +236                ion_charge,
    +237                mf_search_settings,
    +238            )
    +239
    +240        if mf_search_settings.isRadical:
    +241            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(
    +242                classe_str_list,
    +243                Labels.radical_ion,
    +244                nominal_mzs,
    +245                ion_charge,
    +246                mf_search_settings,
    +247            )
    +248
    +249        if mf_search_settings.isAdduct:
    +250            adduct_list = (
    +251                mf_search_settings.adduct_atoms_neg
    +252                if ion_charge < 0
    +253                else mf_search_settings.adduct_atoms_pos
    +254            )
    +255            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(
    +256                classe_str_list,
    +257                Labels.adduct_ion,
    +258                nominal_mzs,
    +259                ion_charge,
    +260                mf_search_settings,
    +261                adducts=adduct_list,
    +262            )
    +263
    +264        return dict_res
     
    @@ -1765,16 +2054,16 @@
    Returns
    -
    17    def timed(*args, **kw):
    -18        ts = time.time()
    -19        result = method(*args, **kw)
    -20        te = time.time()
    -21        if 'log_time' in kw:
    -22            name = kw.get('log_name', method.__name__.upper())
    -23            kw['log_time'][name] = int((te - ts) * 1000)
    -24        else:
    -25            print("%r  %2.2f ms" % (method.__name__, (te - ts) * 1000))
    -26        return result
    +            
    18    def timed(*args, **kw):
    +19        ts = time.time()
    +20        result = method(*args, **kw)
    +21        te = time.time()
    +22        if "log_time" in kw:
    +23            name = kw.get("log_name", method.__name__.upper())
    +24            kw["log_time"][name] = int((te - ts) * 1000)
    +25        else:
    +26            print("%r  %2.2f ms" % (method.__name__, (te - ts) * 1000))
    +27        return result
     
    @@ -1801,72 +2090,96 @@
    Parameters
    -
    324    def search_mol_formulas(self, possible_formulas_list: List[MolecularFormula], ion_type:str, 
    -325                            neutral_molform=True, find_isotopologues=True, adduct_atom=None) -> List[_MSPeak]:
    -326        """ Search for molecular formulas in the mass spectrum.
    -327
    -328        Parameters
    -329        ----------
    -330        possible_formulas_list : list of MolecularFormula
    -331            The list of possible molecular formulas.
    -332        ion_type : str
    -333            The ion type.
    -334        neutral_molform : bool, optional
    -335            Flag to indicate whether the molecular formulas are neutral, by default True.
    -336        find_isotopologues : bool, optional
    -337            Flag to indicate whether to find isotopologues, by default True.
    -338        adduct_atom : str, optional
    -339            The adduct atom, by default None.
    -340
    -341        Returns
    -342        -------
    -343        list of MSPeak
    -344            The list of mass spectrum peaks with assigned molecular formulas.
    -345        """
    -346        #neutral_molform: some reference files already present the formula on ion mode, for instance, bruker reference files
    -347        #    if that is the case than turn neutral_molform off
    -348        
    -349        SearchMolecularFormulaWorker(find_isotopologues=find_isotopologues).reset_error(self.mass_spectrum_obj)
    -350
    -351        initial_min_peak_bool = self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter
    -352        initial_runtime_kendrick_filter = self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter
    -353
    -354        # Are the following 3 lines redundant? 
    -355        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
    -356        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False #TODO check this line
    -357        self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = False
    -358
    -359        possible_formulas_dict_nm = {}
    -360
    -361        for mf in possible_formulas_list:
    -362
    -363            if neutral_molform:
    -364                nm = int(mf.protonated_mz)
    -365            else:
    -366                nm = int(mf.mz_nominal_calc)
    -367        
    -368            if nm in possible_formulas_dict_nm.keys():
    -369
    -370                possible_formulas_dict_nm[nm].append(mf)
    -371
    -372            else:
    -373                
    -374                possible_formulas_dict_nm[nm] = [mf]
    -375
    -376        min_abundance = self.mass_spectrum_obj.min_abundance
    -377        
    -378        ion_type = ion_type
    -379        
    -380        self.run_search(self.mass_spectrum_obj, possible_formulas_dict_nm, min_abundance, ion_type, self.mass_spectrum_obj.polarity, adduct_atom=adduct_atom)          
    -381
    -382        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = initial_min_peak_bool
    -383        self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = initial_runtime_kendrick_filter
    -384
    -385        mspeaks = [mspeak for mspeak in self.mass_spectrum_obj if mspeak.is_assigned]
    -386
    -387        self.sql_db.close()
    -388
    -389        return mspeaks
    +            
    394    def search_mol_formulas(
    +395        self,
    +396        possible_formulas_list: List[MolecularFormula],
    +397        ion_type: str,
    +398        neutral_molform=True,
    +399        find_isotopologues=True,
    +400        adduct_atom=None,
    +401    ) -> List[_MSPeak]:
    +402        """Search for molecular formulas in the mass spectrum.
    +403
    +404        Parameters
    +405        ----------
    +406        possible_formulas_list : list of MolecularFormula
    +407            The list of possible molecular formulas.
    +408        ion_type : str
    +409            The ion type.
    +410        neutral_molform : bool, optional
    +411            Flag to indicate whether the molecular formulas are neutral, by default True.
    +412        find_isotopologues : bool, optional
    +413            Flag to indicate whether to find isotopologues, by default True.
    +414        adduct_atom : str, optional
    +415            The adduct atom, by default None.
    +416
    +417        Returns
    +418        -------
    +419        list of MSPeak
    +420            The list of mass spectrum peaks with assigned molecular formulas.
    +421        """
    +422        # neutral_molform: some reference files already present the formula on ion mode, for instance, bruker reference files
    +423        #    if that is the case than turn neutral_molform off
    +424
    +425        SearchMolecularFormulaWorker(find_isotopologues=find_isotopologues).reset_error(
    +426            self.mass_spectrum_obj
    +427        )
    +428
    +429        initial_min_peak_bool = (
    +430            self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter
    +431        )
    +432        initial_runtime_kendrick_filter = (
    +433            self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter
    +434        )
    +435
    +436        # Are the following 3 lines redundant?
    +437        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False
    +438        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = (
    +439            False  # TODO check this line
    +440        )
    +441        self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = (
    +442            False
    +443        )
    +444
    +445        possible_formulas_dict_nm = {}
    +446
    +447        for mf in possible_formulas_list:
    +448            if neutral_molform:
    +449                nm = int(mf.protonated_mz)
    +450            else:
    +451                nm = int(mf.mz_nominal_calc)
    +452
    +453            if nm in possible_formulas_dict_nm.keys():
    +454                possible_formulas_dict_nm[nm].append(mf)
    +455
    +456            else:
    +457                possible_formulas_dict_nm[nm] = [mf]
    +458
    +459        min_abundance = self.mass_spectrum_obj.min_abundance
    +460
    +461        ion_type = ion_type
    +462
    +463        self.run_search(
    +464            self.mass_spectrum_obj,
    +465            possible_formulas_dict_nm,
    +466            min_abundance,
    +467            ion_type,
    +468            self.mass_spectrum_obj.polarity,
    +469            adduct_atom=adduct_atom,
    +470        )
    +471
    +472        self.mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = (
    +473            initial_min_peak_bool
    +474        )
    +475        self.mass_spectrum_obj.molecular_search_settings.use_runtime_kendrick_filter = (
    +476            initial_runtime_kendrick_filter
    +477        )
    +478
    +479        mspeaks = [mspeak for mspeak in self.mass_spectrum_obj if mspeak.is_assigned]
    +480
    +481        self.sql_db.close()
    +482
    +483        return mspeaks
     
    @@ -1908,336 +2221,383 @@
    Returns
    -
    392class SearchMolecularFormulaWorker:
    -393    """ Class for searching molecular formulas in a mass spectrum.
    -394    
    -395    Parameters
    -396    ----------
    -397    find_isotopologues : bool, optional
    -398        Flag to indicate whether to find isotopologues, by default True.
    -399    
    -400    Attributes
    -401    ----------
    -402    find_isotopologues : bool
    -403        Flag to indicate whether to find isotopologues.
    -404    
    -405    Methods
    -406    -------
    -407    * reset_error().
    -408        Reset the error variables.
    -409    * set_last_error().
    -410        Set the last error.
    -411    * find_formulas().
    -412        Find the formulas.
    -413    * calc_error().
    -414        Calculate the error.  
    -415    """
    -416    # TODO add reset error function
    -417    # needs this wraper to pass the class to multiprocessing
    -418
    -419    def __init__(self, find_isotopologues=True):
    -420        self.find_isotopologues = find_isotopologues
    -421
    -422    def __call__(self, args):
    -423        """ Call the find formulas function. 
    -424        
    -425        Parameters
    -426        ----------
    -427        args : tuple
    -428            The arguments.
    -429        
    -430        Returns
    -431        -------
    -432        list
    -433            The list of mass spectrum peaks with assigned molecular formulas.
    -434        """
    -435        return self.find_formulas(*args)  # ,args[1]
    -436
    -437    def reset_error(self, mass_spectrum_obj):
    -438        """ Reset the error variables.
    -439        
    -440        Parameters
    -441        ----------
    -442        mass_spectrum_obj : MassSpectrum
    -443            The mass spectrum object.
    -444        """
    -445        global last_error, last_dif, closest_error, error_average, nbValues  
    -446        last_error, last_dif, closest_error, nbValues  = 0.0, 0.0, 0.0, 0.0
    -447
    -448        error_average = 0
    -449
    -450    def set_last_error(self, error, mass_spectrum_obj):
    -451        """ Set the last error.
    -452        
    -453        Parameters
    -454        ----------
    -455        error : float
    -456            The error.
    -457        mass_spectrum_obj : MassSpectrum
    -458            The mass spectrum object.
    -459        """
    -460        # set the changes to the global variables, not internal ones
    -461        global last_error, last_dif, closest_error, error_average, nbValues
    -462
    -463        if mass_spectrum_obj.molecular_search_settings.error_method == 'distance':
    -464
    -465            dif = error - last_error
    -466            if dif < last_dif:
    -467                last_dif = dif
    -468                closest_error = error
    -469                mass_spectrum_obj.molecular_search_settings.min_ppm_error = closest_error - mass_spectrum_obj.molecular_search_settings.mz_error_range
    -470                mass_spectrum_obj.molecular_search_settings.max_ppm_error = closest_error + mass_spectrum_obj.molecular_search_settings.mz_error_range
    -471
    -472        elif mass_spectrum_obj.molecular_search_settings.error_method == 'lowest':
    -473
    -474            if error < last_error:
    -475                mass_spectrum_obj.molecular_search_settings.min_ppm_error = error - mass_spectrum_obj.molecular_search_settings.mz_error_range
    -476                mass_spectrum_obj.molecular_search_settings.max_ppm_error = error + mass_spectrum_obj.molecular_search_settings.mz_error_range
    -477                last_error = error
    -478
    -479
    -480        elif mass_spectrum_obj.molecular_search_settings.error_method == 'symmetrical':
    -481
    -482            mass_spectrum_obj.molecular_search_settings.min_ppm_error = mass_spectrum_obj.molecular_search_settings.mz_error_average - mass_spectrum_obj.molecular_search_settings.mz_error_range
    -483            mass_spectrum_obj.molecular_search_settings.max_ppm_error = mass_spectrum_obj.molecular_search_settings.mz_error_average + mass_spectrum_obj.molecular_search_settings.mz_error_range
    -484
    -485        elif mass_spectrum_obj.molecular_search_settings.error_method == 'average':
    -486
    -487            nbValues += 1
    -488            error_average = error_average + ((error - error_average) / nbValues)
    -489            mass_spectrum_obj.molecular_search_settings.min_ppm_error = error_average - mass_spectrum_obj.molecular_search_settings.mz_error_range
    -490            mass_spectrum_obj.molecular_search_settings.max_ppm_error = error_average + mass_spectrum_obj.molecular_search_settings.mz_error_range    
    -491
    -492        else:
    -493            # using set mass_spectrum_obj.molecular_search_settings.min_ppm_error  and max_ppm_error range
    -494            pass
    -495
    -496        #returns the error based on the selected method at mass_spectrum_obj.molecular_search_settings.method
    -497        
    -498    @staticmethod
    -499    def calc_error(mz_exp, mz_calc, method='ppm'):
    -500        """ Calculate the error.
    -501        
    -502        Parameters
    -503        ----------
    -504        mz_exp : float
    -505            The experimental m/z value.
    -506        mz_calc : float
    -507            The calculated m/z value.
    -508        method : str, optional
    -509            The method, by default 'ppm'.
    -510        
    -511        Raises
    -512        -------
    -513        Exception
    -514            If the method is not ppm or ppb.
    -515        
    -516        Returns
    -517        -------
    -518        float
    -519            The error.
    -520        """
    -521
    -522        if method == 'ppm':
    -523            multi_factor = 1_000_000
    +            
    486class SearchMolecularFormulaWorker:
    +487    """Class for searching molecular formulas in a mass spectrum.
    +488
    +489    Parameters
    +490    ----------
    +491    find_isotopologues : bool, optional
    +492        Flag to indicate whether to find isotopologues, by default True.
    +493
    +494    Attributes
    +495    ----------
    +496    find_isotopologues : bool
    +497        Flag to indicate whether to find isotopologues.
    +498
    +499    Methods
    +500    -------
    +501    * reset_error().
    +502        Reset the error variables.
    +503    * set_last_error().
    +504        Set the last error.
    +505    * find_formulas().
    +506        Find the formulas.
    +507    * calc_error().
    +508        Calculate the error.
    +509    """
    +510
    +511    # TODO add reset error function
    +512    # needs this wraper to pass the class to multiprocessing
    +513
    +514    def __init__(self, find_isotopologues=True):
    +515        self.find_isotopologues = find_isotopologues
    +516
    +517    def __call__(self, args):
    +518        """Call the find formulas function.
    +519
    +520        Parameters
    +521        ----------
    +522        args : tuple
    +523            The arguments.
     524
    -525        elif method == 'ppb':
    -526            multi_factor = 1_000_000_000
    -527
    -528        elif method == 'perc':
    -529            multi_factor = 100
    -530
    -531        else:
    -532            raise Exception("method needs to be ppm or ppb, you have entered %s" % method)
    -533
    -534        if mz_exp:
    -535
    -536            return ((mz_exp - mz_calc) / mz_calc) * multi_factor
    -537
    -538        else:
    -539
    -540            raise Exception("Please set mz_calc first")
    -541
    -542    def find_formulas(self, formulas, min_abundance,
    -543                      mass_spectrum_obj, ms_peak, ion_type, ion_charge, adduct_atom=None):
    -544        
    -545        """ Find the formulas.
    -546        
    -547        Parameters
    -548        ----------
    -549        formulas : list of MolecularFormula
    -550            The list of molecular formulas.
    -551        min_abundance : float
    -552            The minimum abundance threshold.
    -553        mass_spectrum_obj : MassSpectrum
    -554            The mass spectrum object.
    -555        ms_peak : MSPeak
    -556            The mass spectrum peak.
    -557        ion_type : str
    -558            The ion type.
    -559        ion_charge : int
    -560            The ion charge.
    -561        adduct_atom : str, optional
    -562            The adduct atom, by default None.
    -563        
    -564        Returns
    -565        -------
    -566        list of MSPeak
    -567            The list of mass spectrum peaks with assigned molecular formulas.
    -568        
    -569        Notes
    -570        -----
    -571        Uses the closest error the next search (this is not ideal, it needs to use confidence 
    -572        metric to choose the right candidate then propagate the error using the error from the best candidate).
    -573        It needs to add s/n to the equation.
    -574        It need optimization to define the mz_error_range within a m/z unit since it is directly proportional 
    -575        with the mass, and inversely proportional to the rp. It's not linear, i.e., sigma mass.
    -576        The idea it to correlate sigma to resolving power, signal to noise and sample complexity per mz unit.
    -577        Method='distance'
    -578        """
    -579        mspeak_assigned_index = list()
    -580
    -581        min_ppm_error = mass_spectrum_obj.molecular_search_settings.min_ppm_error 
    -582        max_ppm_error = mass_spectrum_obj.molecular_search_settings.max_ppm_error
    -583
    -584        min_abun_error = mass_spectrum_obj.molecular_search_settings.min_abun_error
    -585        max_abun_error = mass_spectrum_obj.molecular_search_settings.max_abun_error
    -586
    -587        # f = open("abundance_error.txt", "a+")    
    -588        ms_peak_mz_exp, ms_peak_abundance = ms_peak.mz_exp, ms_peak.abundance
    -589        # min_error = min([pmf.mz_error for pmf in possible_formulas])
    -590
    -591        def mass_by_ion_type(possible_formula_obj):
    -592
    -593            if ion_type == Labels.protonated_de_ion:
    -594
    -595                return possible_formula_obj._protonated_mz(ion_charge)
    -596
    -597            elif ion_type == Labels.radical_ion:
    -598
    -599                return possible_formula_obj._radical_mz(ion_charge)
    -600
    -601            elif ion_type == Labels.adduct_ion and adduct_atom:
    -602
    -603                return possible_formula._adduct_mz(ion_charge, adduct_atom)
    -604
    -605            else:
    -606                # will return externally calculated mz if is set, #use on Bruker Reference list import
    -607                # if the ion type is known the ion mass based on molecular formula ion type
    -608                # if ion type is unknow will return neutral mass 
    -609                return possible_formula.mz_calc
    -610
    -611        if formulas:
    -612            if isinstance(formulas[0], LCMSLibRefMolecularFormula):
    +525        Returns
    +526        -------
    +527        list
    +528            The list of mass spectrum peaks with assigned molecular formulas.
    +529        """
    +530        return self.find_formulas(*args)  # ,args[1]
    +531
    +532    def reset_error(self, mass_spectrum_obj):
    +533        """Reset the error variables.
    +534
    +535        Parameters
    +536        ----------
    +537        mass_spectrum_obj : MassSpectrum
    +538            The mass spectrum object.
    +539        """
    +540        global last_error, last_dif, closest_error, error_average, nbValues
    +541        last_error, last_dif, closest_error, nbValues = 0.0, 0.0, 0.0, 0.0
    +542
    +543        error_average = 0
    +544
    +545    def set_last_error(self, error, mass_spectrum_obj):
    +546        """Set the last error.
    +547
    +548        Parameters
    +549        ----------
    +550        error : float
    +551            The error.
    +552        mass_spectrum_obj : MassSpectrum
    +553            The mass spectrum object.
    +554        """
    +555        # set the changes to the global variables, not internal ones
    +556        global last_error, last_dif, closest_error, error_average, nbValues
    +557
    +558        if mass_spectrum_obj.molecular_search_settings.error_method == "distance":
    +559            dif = error - last_error
    +560            if dif < last_dif:
    +561                last_dif = dif
    +562                closest_error = error
    +563                mass_spectrum_obj.molecular_search_settings.min_ppm_error = (
    +564                    closest_error
    +565                    - mass_spectrum_obj.molecular_search_settings.mz_error_range
    +566                )
    +567                mass_spectrum_obj.molecular_search_settings.max_ppm_error = (
    +568                    closest_error
    +569                    + mass_spectrum_obj.molecular_search_settings.mz_error_range
    +570                )
    +571
    +572        elif mass_spectrum_obj.molecular_search_settings.error_method == "lowest":
    +573            if error < last_error:
    +574                mass_spectrum_obj.molecular_search_settings.min_ppm_error = (
    +575                    error - mass_spectrum_obj.molecular_search_settings.mz_error_range
    +576                )
    +577                mass_spectrum_obj.molecular_search_settings.max_ppm_error = (
    +578                    error + mass_spectrum_obj.molecular_search_settings.mz_error_range
    +579                )
    +580                last_error = error
    +581
    +582        elif mass_spectrum_obj.molecular_search_settings.error_method == "symmetrical":
    +583            mass_spectrum_obj.molecular_search_settings.min_ppm_error = (
    +584                mass_spectrum_obj.molecular_search_settings.mz_error_average
    +585                - mass_spectrum_obj.molecular_search_settings.mz_error_range
    +586            )
    +587            mass_spectrum_obj.molecular_search_settings.max_ppm_error = (
    +588                mass_spectrum_obj.molecular_search_settings.mz_error_average
    +589                + mass_spectrum_obj.molecular_search_settings.mz_error_range
    +590            )
    +591
    +592        elif mass_spectrum_obj.molecular_search_settings.error_method == "average":
    +593            nbValues += 1
    +594            error_average = error_average + ((error - error_average) / nbValues)
    +595            mass_spectrum_obj.molecular_search_settings.min_ppm_error = (
    +596                error_average
    +597                - mass_spectrum_obj.molecular_search_settings.mz_error_range
    +598            )
    +599            mass_spectrum_obj.molecular_search_settings.max_ppm_error = (
    +600                error_average
    +601                + mass_spectrum_obj.molecular_search_settings.mz_error_range
    +602            )
    +603
    +604        else:
    +605            # using set mass_spectrum_obj.molecular_search_settings.min_ppm_error  and max_ppm_error range
    +606            pass
    +607
    +608        # returns the error based on the selected method at mass_spectrum_obj.molecular_search_settings.method
    +609
    +610    @staticmethod
    +611    def calc_error(mz_exp, mz_calc, method="ppm"):
    +612        """Calculate the error.
     613
    -614                possible_mf_class = True
    -615            
    -616            else:
    -617
    -618                possible_mf_class = False    
    -619
    -620        for possible_formula in formulas:
    -621
    -622            if possible_formula:
    -623
    -624                error = self.calc_error(ms_peak_mz_exp, mass_by_ion_type(possible_formula))
    -625
    -626                # error = possible_formula.mz_error
    +614        Parameters
    +615        ----------
    +616        mz_exp : float
    +617            The experimental m/z value.
    +618        mz_calc : float
    +619            The calculated m/z value.
    +620        method : str, optional
    +621            The method, by default 'ppm'.
    +622
    +623        Raises
    +624        -------
    +625        Exception
    +626            If the method is not ppm or ppb.
     627
    -628                if min_ppm_error <= error <= max_ppm_error:
    -629
    -630                    # update the error
    -631
    -632                    self.set_last_error(error, mass_spectrum_obj)
    +628        Returns
    +629        -------
    +630        float
    +631            The error.
    +632        """
     633
    -634                    # add molecular formula match to ms_peak
    -635
    -636                    # get molecular formula dict from sql obj
    -637                    # formula_dict = pickle.loads(possible_formula.mol_formula)
    -638                    #if possible_mf_class:
    -639                        
    -640                    #    molecular_formula = deepcopy(possible_formula)
    -641                    
    -642                    #else:
    -643                        
    -644                    formula_dict = possible_formula.to_dict()
    -645                    # create the molecular formula obj to be stored
    -646                    if possible_mf_class:
    +634        if method == "ppm":
    +635            multi_factor = 1_000_000
    +636
    +637        elif method == "ppb":
    +638            multi_factor = 1_000_000_000
    +639
    +640        elif method == "perc":
    +641            multi_factor = 100
    +642
    +643        else:
    +644            raise Exception(
    +645                "method needs to be ppm or ppb, you have entered %s" % method
    +646            )
     647
    -648                        molecular_formula = LCMSLibRefMolecularFormula(formula_dict, ion_charge, ion_type=ion_type, adduct_atom=adduct_atom)
    -649                        
    -650                        molecular_formula.name = possible_formula.name
    -651                        molecular_formula.kegg_id = possible_formula.kegg_id
    -652                        molecular_formula.cas = possible_formula.cas
    +648        if mz_exp:
    +649            return ((mz_exp - mz_calc) / mz_calc) * multi_factor
    +650
    +651        else:
    +652            raise Exception("Please set mz_calc first")
     653
    -654                    else:
    -655
    -656                        molecular_formula = MolecularFormula(formula_dict, ion_charge, ion_type=ion_type, adduct_atom=adduct_atom)
    -657                    # add the molecular formula obj to the mspeak obj
    -658                    # add the mspeak obj and it's index for tracking next assignment step
    -659
    -660                    if self.find_isotopologues:
    -661
    -662                        # calculates isotopologues
    -663                        isotopologues = molecular_formula.isotopologues(min_abundance, ms_peak_abundance, mass_spectrum_obj.dynamic_range)
    -664
    -665                        # search for isotopologues
    -666                        for isotopologue_formula in isotopologues:
    -667
    -668                            molecular_formula.expected_isotopologues.append(isotopologue_formula)
    -669                            # move this outside to improve preformace
    -670                            # we need to increase the search space to -+1 m_z 
    -671                            first_index, last_index = mass_spectrum_obj.get_nominal_mz_first_last_indexes(isotopologue_formula.mz_nominal_calc)
    -672
    -673                            for ms_peak_iso in mass_spectrum_obj[first_index:last_index]:
    -674
    -675                                error = self.calc_error(ms_peak_iso.mz_exp, isotopologue_formula.mz_calc)
    -676
    -677                                if min_ppm_error <= error <= max_ppm_error:
    -678
    -679                                    # need to define error distribution for abundance measurements
    -680
    -681                                    # if mass_spectrum_obj.is_centroid:
    +654    def find_formulas(
    +655        self,
    +656        formulas,
    +657        min_abundance,
    +658        mass_spectrum_obj,
    +659        ms_peak,
    +660        ion_type,
    +661        ion_charge,
    +662        adduct_atom=None,
    +663    ):
    +664        """Find the formulas.
    +665
    +666        Parameters
    +667        ----------
    +668        formulas : list of MolecularFormula
    +669            The list of molecular formulas.
    +670        min_abundance : float
    +671            The minimum abundance threshold.
    +672        mass_spectrum_obj : MassSpectrum
    +673            The mass spectrum object.
    +674        ms_peak : MSPeak
    +675            The mass spectrum peak.
    +676        ion_type : str
    +677            The ion type.
    +678        ion_charge : int
    +679            The ion charge.
    +680        adduct_atom : str, optional
    +681            The adduct atom, by default None.
     682
    -683                                    abundance_error = self.calc_error(isotopologue_formula.abundance_calc, ms_peak_iso.abundance,method='perc')            
    -684
    -685                                    # area_error = self.calc_error(ms_peak.area, ms_peak_iso.area, method='perc')
    -686
    -687                                    # margin of error was set empirically/ needs statistical calculation
    -688                                    #  of margin of error for the measurement of the abundances
    -689                                    if min_abun_error <= abundance_error <= max_abun_error:
    -690
    -691                                        # update the error
    -692
    -693                                        self.set_last_error(error, mass_spectrum_obj)
    -694
    -695                                        # isotopologue_formula.mz_error = error
    -696
    -697                                        # isotopologue_formula.area_error = area_error
    -698
    -699                                        # isotopologue_formula.abundance_error = abundance_error
    -700
    -701                                        isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index
    +683        Returns
    +684        -------
    +685        list of MSPeak
    +686            The list of mass spectrum peaks with assigned molecular formulas.
    +687
    +688        Notes
    +689        -----
    +690        Uses the closest error the next search (this is not ideal, it needs to use confidence
    +691        metric to choose the right candidate then propagate the error using the error from the best candidate).
    +692        It needs to add s/n to the equation.
    +693        It need optimization to define the mz_error_range within a m/z unit since it is directly proportional
    +694        with the mass, and inversely proportional to the rp. It's not linear, i.e., sigma mass.
    +695        The idea it to correlate sigma to resolving power, signal to noise and sample complexity per mz unit.
    +696        Method='distance'
    +697        """
    +698        mspeak_assigned_index = list()
    +699
    +700        min_ppm_error = mass_spectrum_obj.molecular_search_settings.min_ppm_error
    +701        max_ppm_error = mass_spectrum_obj.molecular_search_settings.max_ppm_error
     702
    -703                                        mono_isotopic_formula_index = len(ms_peak)
    -704
    -705                                        isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index
    -706
    -707                                        isotopologue_formula.mono_isotopic_formula_index = mono_isotopic_formula_index
    -708
    -709                                        # add mspeaks isotopologue index to the mono isotopic MolecularFormula obj and the respective formula position  
    -710
    -711                                        # add molecular formula match to ms_peak
    -712                                        x = ms_peak_iso.add_molecular_formula(isotopologue_formula)
    +703        min_abun_error = mass_spectrum_obj.molecular_search_settings.min_abun_error
    +704        max_abun_error = mass_spectrum_obj.molecular_search_settings.max_abun_error
    +705
    +706        # f = open("abundance_error.txt", "a+")
    +707        ms_peak_mz_exp, ms_peak_abundance = ms_peak.mz_exp, ms_peak.abundance
    +708        # min_error = min([pmf.mz_error for pmf in possible_formulas])
    +709
    +710        def mass_by_ion_type(possible_formula_obj):
    +711            if ion_type == Labels.protonated_de_ion:
    +712                return possible_formula_obj._protonated_mz(ion_charge)
     713
    -714                                        molecular_formula.mspeak_mf_isotopologues_indexes.append((ms_peak_iso.index, x))
    -715                                        # add mspeaks mono isotopic index to the isotopologue MolecularFormula obj
    +714            elif ion_type == Labels.radical_ion:
    +715                return possible_formula_obj._radical_mz(ion_charge)
     716
    -717                    y = ms_peak.add_molecular_formula(molecular_formula)            
    -718
    -719                    mspeak_assigned_index.append((ms_peak.index, y))
    -720
    -721        return mspeak_assigned_index
    +717            elif ion_type == Labels.adduct_ion and adduct_atom:
    +718                return possible_formula._adduct_mz(ion_charge, adduct_atom)
    +719
    +720            else:
    +721                # will return externally calculated mz if is set, #use on Bruker Reference list import
    +722                # if the ion type is known the ion mass based on molecular formula ion type
    +723                # if ion type is unknow will return neutral mass
    +724                return possible_formula.mz_calc
    +725
    +726        if formulas:
    +727            if isinstance(formulas[0], LCMSLibRefMolecularFormula):
    +728                possible_mf_class = True
    +729
    +730            else:
    +731                possible_mf_class = False
    +732
    +733        for possible_formula in formulas:
    +734            if possible_formula:
    +735                error = self.calc_error(
    +736                    ms_peak_mz_exp, mass_by_ion_type(possible_formula)
    +737                )
    +738
    +739                # error = possible_formula.mz_error
    +740
    +741                if min_ppm_error <= error <= max_ppm_error:
    +742                    # update the error
    +743
    +744                    self.set_last_error(error, mass_spectrum_obj)
    +745
    +746                    # add molecular formula match to ms_peak
    +747
    +748                    # get molecular formula dict from sql obj
    +749                    # formula_dict = pickle.loads(possible_formula.mol_formula)
    +750                    # if possible_mf_class:
    +751
    +752                    #    molecular_formula = deepcopy(possible_formula)
    +753
    +754                    # else:
    +755
    +756                    formula_dict = possible_formula.to_dict()
    +757                    # create the molecular formula obj to be stored
    +758                    if possible_mf_class:
    +759                        molecular_formula = LCMSLibRefMolecularFormula(
    +760                            formula_dict,
    +761                            ion_charge,
    +762                            ion_type=ion_type,
    +763                            adduct_atom=adduct_atom,
    +764                        )
    +765
    +766                        molecular_formula.name = possible_formula.name
    +767                        molecular_formula.kegg_id = possible_formula.kegg_id
    +768                        molecular_formula.cas = possible_formula.cas
    +769
    +770                    else:
    +771                        molecular_formula = MolecularFormula(
    +772                            formula_dict,
    +773                            ion_charge,
    +774                            ion_type=ion_type,
    +775                            adduct_atom=adduct_atom,
    +776                        )
    +777                    # add the molecular formula obj to the mspeak obj
    +778                    # add the mspeak obj and it's index for tracking next assignment step
    +779
    +780                    if self.find_isotopologues:
    +781                        # calculates isotopologues
    +782                        isotopologues = molecular_formula.isotopologues(
    +783                            min_abundance,
    +784                            ms_peak_abundance,
    +785                            mass_spectrum_obj.dynamic_range,
    +786                        )
    +787
    +788                        # search for isotopologues
    +789                        for isotopologue_formula in isotopologues:
    +790                            molecular_formula.expected_isotopologues.append(
    +791                                isotopologue_formula
    +792                            )
    +793                            # move this outside to improve preformace
    +794                            # we need to increase the search space to -+1 m_z
    +795                            first_index, last_index = (
    +796                                mass_spectrum_obj.get_nominal_mz_first_last_indexes(
    +797                                    isotopologue_formula.mz_nominal_calc
    +798                                )
    +799                            )
    +800
    +801                            for ms_peak_iso in mass_spectrum_obj[
    +802                                first_index:last_index
    +803                            ]:
    +804                                error = self.calc_error(
    +805                                    ms_peak_iso.mz_exp, isotopologue_formula.mz_calc
    +806                                )
    +807
    +808                                if min_ppm_error <= error <= max_ppm_error:
    +809                                    # need to define error distribution for abundance measurements
    +810
    +811                                    # if mass_spectrum_obj.is_centroid:
    +812
    +813                                    abundance_error = self.calc_error(
    +814                                        isotopologue_formula.abundance_calc,
    +815                                        ms_peak_iso.abundance,
    +816                                        method="perc",
    +817                                    )
    +818
    +819                                    # area_error = self.calc_error(ms_peak.area, ms_peak_iso.area, method='perc')
    +820
    +821                                    # margin of error was set empirically/ needs statistical calculation
    +822                                    #  of margin of error for the measurement of the abundances
    +823                                    if (
    +824                                        min_abun_error
    +825                                        <= abundance_error
    +826                                        <= max_abun_error
    +827                                    ):
    +828                                        # update the error
    +829
    +830                                        self.set_last_error(error, mass_spectrum_obj)
    +831
    +832                                        # isotopologue_formula.mz_error = error
    +833
    +834                                        # isotopologue_formula.area_error = area_error
    +835
    +836                                        # isotopologue_formula.abundance_error = abundance_error
    +837
    +838                                        isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index
    +839
    +840                                        mono_isotopic_formula_index = len(ms_peak)
    +841
    +842                                        isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index
    +843
    +844                                        isotopologue_formula.mono_isotopic_formula_index = mono_isotopic_formula_index
    +845
    +846                                        # add mspeaks isotopologue index to the mono isotopic MolecularFormula obj and the respective formula position
    +847
    +848                                        # add molecular formula match to ms_peak
    +849                                        x = ms_peak_iso.add_molecular_formula(
    +850                                            isotopologue_formula
    +851                                        )
    +852
    +853                                        molecular_formula.mspeak_mf_isotopologues_indexes.append(
    +854                                            (ms_peak_iso.index, x)
    +855                                        )
    +856                                        # add mspeaks mono isotopic index to the isotopologue MolecularFormula obj
    +857
    +858                    y = ms_peak.add_molecular_formula(molecular_formula)
    +859
    +860                    mspeak_assigned_index.append((ms_peak.index, y))
    +861
    +862        return mspeak_assigned_index
     
    @@ -2282,8 +2642,8 @@
    Methods
    -
    419    def __init__(self, find_isotopologues=True):
    -420        self.find_isotopologues = find_isotopologues
    +            
    514    def __init__(self, find_isotopologues=True):
    +515        self.find_isotopologues = find_isotopologues
     
    @@ -2312,18 +2672,18 @@
    Methods
    -
    437    def reset_error(self, mass_spectrum_obj):
    -438        """ Reset the error variables.
    -439        
    -440        Parameters
    -441        ----------
    -442        mass_spectrum_obj : MassSpectrum
    -443            The mass spectrum object.
    -444        """
    -445        global last_error, last_dif, closest_error, error_average, nbValues  
    -446        last_error, last_dif, closest_error, nbValues  = 0.0, 0.0, 0.0, 0.0
    -447
    -448        error_average = 0
    +            
    532    def reset_error(self, mass_spectrum_obj):
    +533        """Reset the error variables.
    +534
    +535        Parameters
    +536        ----------
    +537        mass_spectrum_obj : MassSpectrum
    +538            The mass spectrum object.
    +539        """
    +540        global last_error, last_dif, closest_error, error_average, nbValues
    +541        last_error, last_dif, closest_error, nbValues = 0.0, 0.0, 0.0, 0.0
    +542
    +543        error_average = 0
     
    @@ -2350,53 +2710,70 @@
    Parameters
    -
    450    def set_last_error(self, error, mass_spectrum_obj):
    -451        """ Set the last error.
    -452        
    -453        Parameters
    -454        ----------
    -455        error : float
    -456            The error.
    -457        mass_spectrum_obj : MassSpectrum
    -458            The mass spectrum object.
    -459        """
    -460        # set the changes to the global variables, not internal ones
    -461        global last_error, last_dif, closest_error, error_average, nbValues
    -462
    -463        if mass_spectrum_obj.molecular_search_settings.error_method == 'distance':
    -464
    -465            dif = error - last_error
    -466            if dif < last_dif:
    -467                last_dif = dif
    -468                closest_error = error
    -469                mass_spectrum_obj.molecular_search_settings.min_ppm_error = closest_error - mass_spectrum_obj.molecular_search_settings.mz_error_range
    -470                mass_spectrum_obj.molecular_search_settings.max_ppm_error = closest_error + mass_spectrum_obj.molecular_search_settings.mz_error_range
    -471
    -472        elif mass_spectrum_obj.molecular_search_settings.error_method == 'lowest':
    -473
    -474            if error < last_error:
    -475                mass_spectrum_obj.molecular_search_settings.min_ppm_error = error - mass_spectrum_obj.molecular_search_settings.mz_error_range
    -476                mass_spectrum_obj.molecular_search_settings.max_ppm_error = error + mass_spectrum_obj.molecular_search_settings.mz_error_range
    -477                last_error = error
    -478
    -479
    -480        elif mass_spectrum_obj.molecular_search_settings.error_method == 'symmetrical':
    -481
    -482            mass_spectrum_obj.molecular_search_settings.min_ppm_error = mass_spectrum_obj.molecular_search_settings.mz_error_average - mass_spectrum_obj.molecular_search_settings.mz_error_range
    -483            mass_spectrum_obj.molecular_search_settings.max_ppm_error = mass_spectrum_obj.molecular_search_settings.mz_error_average + mass_spectrum_obj.molecular_search_settings.mz_error_range
    -484
    -485        elif mass_spectrum_obj.molecular_search_settings.error_method == 'average':
    -486
    -487            nbValues += 1
    -488            error_average = error_average + ((error - error_average) / nbValues)
    -489            mass_spectrum_obj.molecular_search_settings.min_ppm_error = error_average - mass_spectrum_obj.molecular_search_settings.mz_error_range
    -490            mass_spectrum_obj.molecular_search_settings.max_ppm_error = error_average + mass_spectrum_obj.molecular_search_settings.mz_error_range    
    -491
    -492        else:
    -493            # using set mass_spectrum_obj.molecular_search_settings.min_ppm_error  and max_ppm_error range
    -494            pass
    -495
    -496        #returns the error based on the selected method at mass_spectrum_obj.molecular_search_settings.method
    +            
    545    def set_last_error(self, error, mass_spectrum_obj):
    +546        """Set the last error.
    +547
    +548        Parameters
    +549        ----------
    +550        error : float
    +551            The error.
    +552        mass_spectrum_obj : MassSpectrum
    +553            The mass spectrum object.
    +554        """
    +555        # set the changes to the global variables, not internal ones
    +556        global last_error, last_dif, closest_error, error_average, nbValues
    +557
    +558        if mass_spectrum_obj.molecular_search_settings.error_method == "distance":
    +559            dif = error - last_error
    +560            if dif < last_dif:
    +561                last_dif = dif
    +562                closest_error = error
    +563                mass_spectrum_obj.molecular_search_settings.min_ppm_error = (
    +564                    closest_error
    +565                    - mass_spectrum_obj.molecular_search_settings.mz_error_range
    +566                )
    +567                mass_spectrum_obj.molecular_search_settings.max_ppm_error = (
    +568                    closest_error
    +569                    + mass_spectrum_obj.molecular_search_settings.mz_error_range
    +570                )
    +571
    +572        elif mass_spectrum_obj.molecular_search_settings.error_method == "lowest":
    +573            if error < last_error:
    +574                mass_spectrum_obj.molecular_search_settings.min_ppm_error = (
    +575                    error - mass_spectrum_obj.molecular_search_settings.mz_error_range
    +576                )
    +577                mass_spectrum_obj.molecular_search_settings.max_ppm_error = (
    +578                    error + mass_spectrum_obj.molecular_search_settings.mz_error_range
    +579                )
    +580                last_error = error
    +581
    +582        elif mass_spectrum_obj.molecular_search_settings.error_method == "symmetrical":
    +583            mass_spectrum_obj.molecular_search_settings.min_ppm_error = (
    +584                mass_spectrum_obj.molecular_search_settings.mz_error_average
    +585                - mass_spectrum_obj.molecular_search_settings.mz_error_range
    +586            )
    +587            mass_spectrum_obj.molecular_search_settings.max_ppm_error = (
    +588                mass_spectrum_obj.molecular_search_settings.mz_error_average
    +589                + mass_spectrum_obj.molecular_search_settings.mz_error_range
    +590            )
    +591
    +592        elif mass_spectrum_obj.molecular_search_settings.error_method == "average":
    +593            nbValues += 1
    +594            error_average = error_average + ((error - error_average) / nbValues)
    +595            mass_spectrum_obj.molecular_search_settings.min_ppm_error = (
    +596                error_average
    +597                - mass_spectrum_obj.molecular_search_settings.mz_error_range
    +598            )
    +599            mass_spectrum_obj.molecular_search_settings.max_ppm_error = (
    +600                error_average
    +601                + mass_spectrum_obj.molecular_search_settings.mz_error_range
    +602            )
    +603
    +604        else:
    +605            # using set mass_spectrum_obj.molecular_search_settings.min_ppm_error  and max_ppm_error range
    +606            pass
    +607
    +608        # returns the error based on the selected method at mass_spectrum_obj.molecular_search_settings.method
     
    @@ -2426,49 +2803,49 @@
    Parameters
    -
    498    @staticmethod
    -499    def calc_error(mz_exp, mz_calc, method='ppm'):
    -500        """ Calculate the error.
    -501        
    -502        Parameters
    -503        ----------
    -504        mz_exp : float
    -505            The experimental m/z value.
    -506        mz_calc : float
    -507            The calculated m/z value.
    -508        method : str, optional
    -509            The method, by default 'ppm'.
    -510        
    -511        Raises
    -512        -------
    -513        Exception
    -514            If the method is not ppm or ppb.
    -515        
    -516        Returns
    -517        -------
    -518        float
    -519            The error.
    -520        """
    -521
    -522        if method == 'ppm':
    -523            multi_factor = 1_000_000
    -524
    -525        elif method == 'ppb':
    -526            multi_factor = 1_000_000_000
    -527
    -528        elif method == 'perc':
    -529            multi_factor = 100
    -530
    -531        else:
    -532            raise Exception("method needs to be ppm or ppb, you have entered %s" % method)
    -533
    -534        if mz_exp:
    -535
    -536            return ((mz_exp - mz_calc) / mz_calc) * multi_factor
    -537
    -538        else:
    -539
    -540            raise Exception("Please set mz_calc first")
    +            
    610    @staticmethod
    +611    def calc_error(mz_exp, mz_calc, method="ppm"):
    +612        """Calculate the error.
    +613
    +614        Parameters
    +615        ----------
    +616        mz_exp : float
    +617            The experimental m/z value.
    +618        mz_calc : float
    +619            The calculated m/z value.
    +620        method : str, optional
    +621            The method, by default 'ppm'.
    +622
    +623        Raises
    +624        -------
    +625        Exception
    +626            If the method is not ppm or ppb.
    +627
    +628        Returns
    +629        -------
    +630        float
    +631            The error.
    +632        """
    +633
    +634        if method == "ppm":
    +635            multi_factor = 1_000_000
    +636
    +637        elif method == "ppb":
    +638            multi_factor = 1_000_000_000
    +639
    +640        elif method == "perc":
    +641            multi_factor = 100
    +642
    +643        else:
    +644            raise Exception(
    +645                "method needs to be ppm or ppb, you have entered %s" % method
    +646            )
    +647
    +648        if mz_exp:
    +649            return ((mz_exp - mz_calc) / mz_calc) * multi_factor
    +650
    +651        else:
    +652            raise Exception("Please set mz_calc first")
     
    @@ -2511,186 +2888,215 @@
    Returns
    -
    542    def find_formulas(self, formulas, min_abundance,
    -543                      mass_spectrum_obj, ms_peak, ion_type, ion_charge, adduct_atom=None):
    -544        
    -545        """ Find the formulas.
    -546        
    -547        Parameters
    -548        ----------
    -549        formulas : list of MolecularFormula
    -550            The list of molecular formulas.
    -551        min_abundance : float
    -552            The minimum abundance threshold.
    -553        mass_spectrum_obj : MassSpectrum
    -554            The mass spectrum object.
    -555        ms_peak : MSPeak
    -556            The mass spectrum peak.
    -557        ion_type : str
    -558            The ion type.
    -559        ion_charge : int
    -560            The ion charge.
    -561        adduct_atom : str, optional
    -562            The adduct atom, by default None.
    -563        
    -564        Returns
    -565        -------
    -566        list of MSPeak
    -567            The list of mass spectrum peaks with assigned molecular formulas.
    -568        
    -569        Notes
    -570        -----
    -571        Uses the closest error the next search (this is not ideal, it needs to use confidence 
    -572        metric to choose the right candidate then propagate the error using the error from the best candidate).
    -573        It needs to add s/n to the equation.
    -574        It need optimization to define the mz_error_range within a m/z unit since it is directly proportional 
    -575        with the mass, and inversely proportional to the rp. It's not linear, i.e., sigma mass.
    -576        The idea it to correlate sigma to resolving power, signal to noise and sample complexity per mz unit.
    -577        Method='distance'
    -578        """
    -579        mspeak_assigned_index = list()
    -580
    -581        min_ppm_error = mass_spectrum_obj.molecular_search_settings.min_ppm_error 
    -582        max_ppm_error = mass_spectrum_obj.molecular_search_settings.max_ppm_error
    -583
    -584        min_abun_error = mass_spectrum_obj.molecular_search_settings.min_abun_error
    -585        max_abun_error = mass_spectrum_obj.molecular_search_settings.max_abun_error
    -586
    -587        # f = open("abundance_error.txt", "a+")    
    -588        ms_peak_mz_exp, ms_peak_abundance = ms_peak.mz_exp, ms_peak.abundance
    -589        # min_error = min([pmf.mz_error for pmf in possible_formulas])
    -590
    -591        def mass_by_ion_type(possible_formula_obj):
    -592
    -593            if ion_type == Labels.protonated_de_ion:
    -594
    -595                return possible_formula_obj._protonated_mz(ion_charge)
    -596
    -597            elif ion_type == Labels.radical_ion:
    -598
    -599                return possible_formula_obj._radical_mz(ion_charge)
    -600
    -601            elif ion_type == Labels.adduct_ion and adduct_atom:
    -602
    -603                return possible_formula._adduct_mz(ion_charge, adduct_atom)
    -604
    -605            else:
    -606                # will return externally calculated mz if is set, #use on Bruker Reference list import
    -607                # if the ion type is known the ion mass based on molecular formula ion type
    -608                # if ion type is unknow will return neutral mass 
    -609                return possible_formula.mz_calc
    -610
    -611        if formulas:
    -612            if isinstance(formulas[0], LCMSLibRefMolecularFormula):
    -613
    -614                possible_mf_class = True
    -615            
    -616            else:
    -617
    -618                possible_mf_class = False    
    -619
    -620        for possible_formula in formulas:
    -621
    -622            if possible_formula:
    -623
    -624                error = self.calc_error(ms_peak_mz_exp, mass_by_ion_type(possible_formula))
    -625
    -626                # error = possible_formula.mz_error
    -627
    -628                if min_ppm_error <= error <= max_ppm_error:
    -629
    -630                    # update the error
    -631
    -632                    self.set_last_error(error, mass_spectrum_obj)
    -633
    -634                    # add molecular formula match to ms_peak
    -635
    -636                    # get molecular formula dict from sql obj
    -637                    # formula_dict = pickle.loads(possible_formula.mol_formula)
    -638                    #if possible_mf_class:
    -639                        
    -640                    #    molecular_formula = deepcopy(possible_formula)
    -641                    
    -642                    #else:
    -643                        
    -644                    formula_dict = possible_formula.to_dict()
    -645                    # create the molecular formula obj to be stored
    -646                    if possible_mf_class:
    -647
    -648                        molecular_formula = LCMSLibRefMolecularFormula(formula_dict, ion_charge, ion_type=ion_type, adduct_atom=adduct_atom)
    -649                        
    -650                        molecular_formula.name = possible_formula.name
    -651                        molecular_formula.kegg_id = possible_formula.kegg_id
    -652                        molecular_formula.cas = possible_formula.cas
    -653
    -654                    else:
    -655
    -656                        molecular_formula = MolecularFormula(formula_dict, ion_charge, ion_type=ion_type, adduct_atom=adduct_atom)
    -657                    # add the molecular formula obj to the mspeak obj
    -658                    # add the mspeak obj and it's index for tracking next assignment step
    -659
    -660                    if self.find_isotopologues:
    -661
    -662                        # calculates isotopologues
    -663                        isotopologues = molecular_formula.isotopologues(min_abundance, ms_peak_abundance, mass_spectrum_obj.dynamic_range)
    -664
    -665                        # search for isotopologues
    -666                        for isotopologue_formula in isotopologues:
    -667
    -668                            molecular_formula.expected_isotopologues.append(isotopologue_formula)
    -669                            # move this outside to improve preformace
    -670                            # we need to increase the search space to -+1 m_z 
    -671                            first_index, last_index = mass_spectrum_obj.get_nominal_mz_first_last_indexes(isotopologue_formula.mz_nominal_calc)
    -672
    -673                            for ms_peak_iso in mass_spectrum_obj[first_index:last_index]:
    -674
    -675                                error = self.calc_error(ms_peak_iso.mz_exp, isotopologue_formula.mz_calc)
    -676
    -677                                if min_ppm_error <= error <= max_ppm_error:
    -678
    -679                                    # need to define error distribution for abundance measurements
    -680
    -681                                    # if mass_spectrum_obj.is_centroid:
    +            
    654    def find_formulas(
    +655        self,
    +656        formulas,
    +657        min_abundance,
    +658        mass_spectrum_obj,
    +659        ms_peak,
    +660        ion_type,
    +661        ion_charge,
    +662        adduct_atom=None,
    +663    ):
    +664        """Find the formulas.
    +665
    +666        Parameters
    +667        ----------
    +668        formulas : list of MolecularFormula
    +669            The list of molecular formulas.
    +670        min_abundance : float
    +671            The minimum abundance threshold.
    +672        mass_spectrum_obj : MassSpectrum
    +673            The mass spectrum object.
    +674        ms_peak : MSPeak
    +675            The mass spectrum peak.
    +676        ion_type : str
    +677            The ion type.
    +678        ion_charge : int
    +679            The ion charge.
    +680        adduct_atom : str, optional
    +681            The adduct atom, by default None.
     682
    -683                                    abundance_error = self.calc_error(isotopologue_formula.abundance_calc, ms_peak_iso.abundance,method='perc')            
    -684
    -685                                    # area_error = self.calc_error(ms_peak.area, ms_peak_iso.area, method='perc')
    -686
    -687                                    # margin of error was set empirically/ needs statistical calculation
    -688                                    #  of margin of error for the measurement of the abundances
    -689                                    if min_abun_error <= abundance_error <= max_abun_error:
    -690
    -691                                        # update the error
    -692
    -693                                        self.set_last_error(error, mass_spectrum_obj)
    -694
    -695                                        # isotopologue_formula.mz_error = error
    -696
    -697                                        # isotopologue_formula.area_error = area_error
    -698
    -699                                        # isotopologue_formula.abundance_error = abundance_error
    -700
    -701                                        isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index
    +683        Returns
    +684        -------
    +685        list of MSPeak
    +686            The list of mass spectrum peaks with assigned molecular formulas.
    +687
    +688        Notes
    +689        -----
    +690        Uses the closest error the next search (this is not ideal, it needs to use confidence
    +691        metric to choose the right candidate then propagate the error using the error from the best candidate).
    +692        It needs to add s/n to the equation.
    +693        It need optimization to define the mz_error_range within a m/z unit since it is directly proportional
    +694        with the mass, and inversely proportional to the rp. It's not linear, i.e., sigma mass.
    +695        The idea it to correlate sigma to resolving power, signal to noise and sample complexity per mz unit.
    +696        Method='distance'
    +697        """
    +698        mspeak_assigned_index = list()
    +699
    +700        min_ppm_error = mass_spectrum_obj.molecular_search_settings.min_ppm_error
    +701        max_ppm_error = mass_spectrum_obj.molecular_search_settings.max_ppm_error
     702
    -703                                        mono_isotopic_formula_index = len(ms_peak)
    -704
    -705                                        isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index
    -706
    -707                                        isotopologue_formula.mono_isotopic_formula_index = mono_isotopic_formula_index
    -708
    -709                                        # add mspeaks isotopologue index to the mono isotopic MolecularFormula obj and the respective formula position  
    -710
    -711                                        # add molecular formula match to ms_peak
    -712                                        x = ms_peak_iso.add_molecular_formula(isotopologue_formula)
    +703        min_abun_error = mass_spectrum_obj.molecular_search_settings.min_abun_error
    +704        max_abun_error = mass_spectrum_obj.molecular_search_settings.max_abun_error
    +705
    +706        # f = open("abundance_error.txt", "a+")
    +707        ms_peak_mz_exp, ms_peak_abundance = ms_peak.mz_exp, ms_peak.abundance
    +708        # min_error = min([pmf.mz_error for pmf in possible_formulas])
    +709
    +710        def mass_by_ion_type(possible_formula_obj):
    +711            if ion_type == Labels.protonated_de_ion:
    +712                return possible_formula_obj._protonated_mz(ion_charge)
     713
    -714                                        molecular_formula.mspeak_mf_isotopologues_indexes.append((ms_peak_iso.index, x))
    -715                                        # add mspeaks mono isotopic index to the isotopologue MolecularFormula obj
    +714            elif ion_type == Labels.radical_ion:
    +715                return possible_formula_obj._radical_mz(ion_charge)
     716
    -717                    y = ms_peak.add_molecular_formula(molecular_formula)            
    -718
    -719                    mspeak_assigned_index.append((ms_peak.index, y))
    -720
    -721        return mspeak_assigned_index
    +717            elif ion_type == Labels.adduct_ion and adduct_atom:
    +718                return possible_formula._adduct_mz(ion_charge, adduct_atom)
    +719
    +720            else:
    +721                # will return externally calculated mz if is set, #use on Bruker Reference list import
    +722                # if the ion type is known the ion mass based on molecular formula ion type
    +723                # if ion type is unknow will return neutral mass
    +724                return possible_formula.mz_calc
    +725
    +726        if formulas:
    +727            if isinstance(formulas[0], LCMSLibRefMolecularFormula):
    +728                possible_mf_class = True
    +729
    +730            else:
    +731                possible_mf_class = False
    +732
    +733        for possible_formula in formulas:
    +734            if possible_formula:
    +735                error = self.calc_error(
    +736                    ms_peak_mz_exp, mass_by_ion_type(possible_formula)
    +737                )
    +738
    +739                # error = possible_formula.mz_error
    +740
    +741                if min_ppm_error <= error <= max_ppm_error:
    +742                    # update the error
    +743
    +744                    self.set_last_error(error, mass_spectrum_obj)
    +745
    +746                    # add molecular formula match to ms_peak
    +747
    +748                    # get molecular formula dict from sql obj
    +749                    # formula_dict = pickle.loads(possible_formula.mol_formula)
    +750                    # if possible_mf_class:
    +751
    +752                    #    molecular_formula = deepcopy(possible_formula)
    +753
    +754                    # else:
    +755
    +756                    formula_dict = possible_formula.to_dict()
    +757                    # create the molecular formula obj to be stored
    +758                    if possible_mf_class:
    +759                        molecular_formula = LCMSLibRefMolecularFormula(
    +760                            formula_dict,
    +761                            ion_charge,
    +762                            ion_type=ion_type,
    +763                            adduct_atom=adduct_atom,
    +764                        )
    +765
    +766                        molecular_formula.name = possible_formula.name
    +767                        molecular_formula.kegg_id = possible_formula.kegg_id
    +768                        molecular_formula.cas = possible_formula.cas
    +769
    +770                    else:
    +771                        molecular_formula = MolecularFormula(
    +772                            formula_dict,
    +773                            ion_charge,
    +774                            ion_type=ion_type,
    +775                            adduct_atom=adduct_atom,
    +776                        )
    +777                    # add the molecular formula obj to the mspeak obj
    +778                    # add the mspeak obj and it's index for tracking next assignment step
    +779
    +780                    if self.find_isotopologues:
    +781                        # calculates isotopologues
    +782                        isotopologues = molecular_formula.isotopologues(
    +783                            min_abundance,
    +784                            ms_peak_abundance,
    +785                            mass_spectrum_obj.dynamic_range,
    +786                        )
    +787
    +788                        # search for isotopologues
    +789                        for isotopologue_formula in isotopologues:
    +790                            molecular_formula.expected_isotopologues.append(
    +791                                isotopologue_formula
    +792                            )
    +793                            # move this outside to improve preformace
    +794                            # we need to increase the search space to -+1 m_z
    +795                            first_index, last_index = (
    +796                                mass_spectrum_obj.get_nominal_mz_first_last_indexes(
    +797                                    isotopologue_formula.mz_nominal_calc
    +798                                )
    +799                            )
    +800
    +801                            for ms_peak_iso in mass_spectrum_obj[
    +802                                first_index:last_index
    +803                            ]:
    +804                                error = self.calc_error(
    +805                                    ms_peak_iso.mz_exp, isotopologue_formula.mz_calc
    +806                                )
    +807
    +808                                if min_ppm_error <= error <= max_ppm_error:
    +809                                    # need to define error distribution for abundance measurements
    +810
    +811                                    # if mass_spectrum_obj.is_centroid:
    +812
    +813                                    abundance_error = self.calc_error(
    +814                                        isotopologue_formula.abundance_calc,
    +815                                        ms_peak_iso.abundance,
    +816                                        method="perc",
    +817                                    )
    +818
    +819                                    # area_error = self.calc_error(ms_peak.area, ms_peak_iso.area, method='perc')
    +820
    +821                                    # margin of error was set empirically/ needs statistical calculation
    +822                                    #  of margin of error for the measurement of the abundances
    +823                                    if (
    +824                                        min_abun_error
    +825                                        <= abundance_error
    +826                                        <= max_abun_error
    +827                                    ):
    +828                                        # update the error
    +829
    +830                                        self.set_last_error(error, mass_spectrum_obj)
    +831
    +832                                        # isotopologue_formula.mz_error = error
    +833
    +834                                        # isotopologue_formula.area_error = area_error
    +835
    +836                                        # isotopologue_formula.abundance_error = abundance_error
    +837
    +838                                        isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index
    +839
    +840                                        mono_isotopic_formula_index = len(ms_peak)
    +841
    +842                                        isotopologue_formula.mspeak_index_mono_isotopic = ms_peak.index
    +843
    +844                                        isotopologue_formula.mono_isotopic_formula_index = mono_isotopic_formula_index
    +845
    +846                                        # add mspeaks isotopologue index to the mono isotopic MolecularFormula obj and the respective formula position
    +847
    +848                                        # add molecular formula match to ms_peak
    +849                                        x = ms_peak_iso.add_molecular_formula(
    +850                                            isotopologue_formula
    +851                                        )
    +852
    +853                                        molecular_formula.mspeak_mf_isotopologues_indexes.append(
    +854                                            (ms_peak_iso.index, x)
    +855                                        )
    +856                                        # add mspeaks mono isotopic index to the isotopologue MolecularFormula obj
    +857
    +858                    y = ms_peak.add_molecular_formula(molecular_formula)
    +859
    +860                    mspeak_assigned_index.append((ms_peak.index, y))
    +861
    +862        return mspeak_assigned_index
     
    @@ -2723,10 +3129,10 @@
    Returns
    Notes
    -

    Uses the closest error the next search (this is not ideal, it needs to use confidence +

    Uses the closest error the next search (this is not ideal, it needs to use confidence metric to choose the right candidate then propagate the error using the error from the best candidate). It needs to add s/n to the equation. -It need optimization to define the mz_error_range within a m/z unit since it is directly proportional +It need optimization to define the mz_error_range within a m/z unit since it is directly proportional with the mass, and inversely proportional to the rp. It's not linear, i.e., sigma mass. The idea it to correlate sigma to resolving power, signal to noise and sample complexity per mz unit. Method='distance'

    @@ -2746,94 +3152,104 @@
    Notes
    -
    724class SearchMolecularFormulasLC(SearchMolecularFormulas):
    -725    """ Class for searching molecular formulas in a LC object.
    -726    
    -727    Parameters
    -728    ----------
    -729    lcms_obj : LC
    -730        The LC object.
    -731    sql_db : MolForm_SQL, optional
    -732        The SQL database object, by default None.
    -733    first_hit : bool, optional
    -734        Flag to indicate whether to skip peaks that already have a molecular formula assigned, by default False.    
    -735    find_isotopologues : bool, optional
    -736        Flag to indicate whether to find isotopologues, by default True.
    -737    
    -738    Methods
    -739    -------
    -740    * run_untargeted_worker_ms1().
    -741        Run untargeted molecular formula search on the ms1 mass spectrum.
    -742    * run_target_worker_ms1().
    -743        Run targeted molecular formula search on the ms1 mass spectrum.
    -744    
    -745    """
    -746    def __init__(self, lcms_obj, sql_db=None, first_hit=False, find_isotopologues=True):
    -747
    -748        self.first_hit = first_hit
    -749
    -750        self.find_isotopologues = find_isotopologues
    -751
    -752        self.lcms_obj = lcms_obj
    -753
    -754        if not sql_db:
    -755
    -756            self.sql_db = MolForm_SQL(url=lcms_obj.ms1_molecular_search_settings.url_database)
    -757
    -758        else:
    -759
    -760            self.sql_db = sql_db
    -761
    -762    def run_untargeted_worker_ms1(self):
    -763        """ Run untargeted molecular formula search on the ms1 mass spectrum."""
    -764        # do molecular formula based on the parameters set for ms1 search 
    -765        for peak in self.lcms_obj:
    -766           self.mass_spectrum_obj = peak.mass_spectrum
    -767           self.run_molecular_formula(peak.mass_spectrum.sort_by_abundance())        
    -768
    -769
    -770    def run_target_worker_ms1(self):
    -771        """ Run targeted molecular formula search on the ms1 mass spectrum."""
    -772        # do molecular formula based on the external molecular reference list
    -773        pbar = tqdm.tqdm(self.lcms_obj)
    -774
    -775        for peak in self.lcms_obj:
    -776            
    -777            pbar.set_description_str(desc=f"Started molecular formulae search for mass spectrum at RT {peak.retention_time} s" , refresh=True)
    -778
    -779            self.mass_spectrum_obj = peak.mass_spectrum
    -780
    -781            ion_charge = self.mass_spectrum_obj.polarity
    -782            
    -783            candidate_formulas = peak.targeted_molecular_formulas
    -784
    -785            for i in candidate_formulas:
    -786                if self.lcms_obj.parameters.lc_ms.verbose_processing:
    -787                    print(i)
    -788            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    -789
    -790                ion_type = Labels.protonated_de_ion
    -791                
    -792                #ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True)
    -793
    -794                self.search_mol_formulas( candidate_formulas, ion_type, find_isotopologues=True)
    -795
    -796            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    -797                
    -798                ion_type = Labels.radical_ion
    -799                
    -800                #ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True)
    -801                self.search_mol_formulas( candidate_formulas, ion_type, find_isotopologues=True)
    -802
    -803            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    -804                
    -805                ion_type = Labels.adduct_ion
    -806                
    -807                adduct_list = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg if ion_charge < 0 else self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
    -808
    -809                for adduct_atom in adduct_list:
    -810
    -811                    self.search_mol_formulas( candidate_formulas, ion_type, find_isotopologues=True, adduct_atom=adduct_atom)
    +            
    865class SearchMolecularFormulasLC(SearchMolecularFormulas):
    +866    """Class for searching molecular formulas in a LC object.
    +867
    +868    Parameters
    +869    ----------
    +870    lcms_obj : LC
    +871        The LC object.
    +872    sql_db : MolForm_SQL, optional
    +873        The SQL database object, by default None.
    +874    first_hit : bool, optional
    +875        Flag to indicate whether to skip peaks that already have a molecular formula assigned, by default False.
    +876    find_isotopologues : bool, optional
    +877        Flag to indicate whether to find isotopologues, by default True.
    +878
    +879    Methods
    +880    -------
    +881    * run_untargeted_worker_ms1().
    +882        Run untargeted molecular formula search on the ms1 mass spectrum.
    +883    * run_target_worker_ms1().
    +884        Run targeted molecular formula search on the ms1 mass spectrum.
    +885
    +886    """
    +887
    +888    def __init__(self, lcms_obj, sql_db=None, first_hit=False, find_isotopologues=True):
    +889        self.first_hit = first_hit
    +890
    +891        self.find_isotopologues = find_isotopologues
    +892
    +893        self.lcms_obj = lcms_obj
    +894
    +895        if not sql_db:
    +896            self.sql_db = MolForm_SQL(
    +897                url=lcms_obj.ms1_molecular_search_settings.url_database
    +898            )
    +899
    +900        else:
    +901            self.sql_db = sql_db
    +902
    +903    def run_untargeted_worker_ms1(self):
    +904        """Run untargeted molecular formula search on the ms1 mass spectrum."""
    +905        # do molecular formula based on the parameters set for ms1 search
    +906        for peak in self.lcms_obj:
    +907            self.mass_spectrum_obj = peak.mass_spectrum
    +908            self.run_molecular_formula(peak.mass_spectrum.sort_by_abundance())
    +909
    +910    def run_target_worker_ms1(self):
    +911        """Run targeted molecular formula search on the ms1 mass spectrum."""
    +912        # do molecular formula based on the external molecular reference list
    +913        pbar = tqdm.tqdm(self.lcms_obj)
    +914
    +915        for peak in self.lcms_obj:
    +916            pbar.set_description_str(
    +917                desc=f"Started molecular formulae search for mass spectrum at RT {peak.retention_time} s",
    +918                refresh=True,
    +919            )
    +920
    +921            self.mass_spectrum_obj = peak.mass_spectrum
    +922
    +923            ion_charge = self.mass_spectrum_obj.polarity
    +924
    +925            candidate_formulas = peak.targeted_molecular_formulas
    +926
    +927            for i in candidate_formulas:
    +928                if self.lcms_obj.parameters.lc_ms.verbose_processing:
    +929                    print(i)
    +930            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    +931                ion_type = Labels.protonated_de_ion
    +932
    +933                # ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True)
    +934
    +935                self.search_mol_formulas(
    +936                    candidate_formulas, ion_type, find_isotopologues=True
    +937                )
    +938
    +939            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    +940                ion_type = Labels.radical_ion
    +941
    +942                # ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True)
    +943                self.search_mol_formulas(
    +944                    candidate_formulas, ion_type, find_isotopologues=True
    +945                )
    +946
    +947            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    +948                ion_type = Labels.adduct_ion
    +949
    +950                adduct_list = (
    +951                    self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
    +952                    if ion_charge < 0
    +953                    else self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
    +954                )
    +955
    +956                for adduct_atom in adduct_list:
    +957                    self.search_mol_formulas(
    +958                        candidate_formulas,
    +959                        ion_type,
    +960                        find_isotopologues=True,
    +961                        adduct_atom=adduct_atom,
    +962                    )
     
    @@ -2873,21 +3289,20 @@
    Methods
    -
    746    def __init__(self, lcms_obj, sql_db=None, first_hit=False, find_isotopologues=True):
    -747
    -748        self.first_hit = first_hit
    -749
    -750        self.find_isotopologues = find_isotopologues
    -751
    -752        self.lcms_obj = lcms_obj
    -753
    -754        if not sql_db:
    -755
    -756            self.sql_db = MolForm_SQL(url=lcms_obj.ms1_molecular_search_settings.url_database)
    -757
    -758        else:
    -759
    -760            self.sql_db = sql_db
    +            
    888    def __init__(self, lcms_obj, sql_db=None, first_hit=False, find_isotopologues=True):
    +889        self.first_hit = first_hit
    +890
    +891        self.find_isotopologues = find_isotopologues
    +892
    +893        self.lcms_obj = lcms_obj
    +894
    +895        if not sql_db:
    +896            self.sql_db = MolForm_SQL(
    +897                url=lcms_obj.ms1_molecular_search_settings.url_database
    +898            )
    +899
    +900        else:
    +901            self.sql_db = sql_db
     
    @@ -2938,12 +3353,12 @@
    Methods
    -
    762    def run_untargeted_worker_ms1(self):
    -763        """ Run untargeted molecular formula search on the ms1 mass spectrum."""
    -764        # do molecular formula based on the parameters set for ms1 search 
    -765        for peak in self.lcms_obj:
    -766           self.mass_spectrum_obj = peak.mass_spectrum
    -767           self.run_molecular_formula(peak.mass_spectrum.sort_by_abundance())        
    +            
    903    def run_untargeted_worker_ms1(self):
    +904        """Run untargeted molecular formula search on the ms1 mass spectrum."""
    +905        # do molecular formula based on the parameters set for ms1 search
    +906        for peak in self.lcms_obj:
    +907            self.mass_spectrum_obj = peak.mass_spectrum
    +908            self.run_molecular_formula(peak.mass_spectrum.sort_by_abundance())
     
    @@ -2963,48 +3378,59 @@
    Methods
    -
    770    def run_target_worker_ms1(self):
    -771        """ Run targeted molecular formula search on the ms1 mass spectrum."""
    -772        # do molecular formula based on the external molecular reference list
    -773        pbar = tqdm.tqdm(self.lcms_obj)
    -774
    -775        for peak in self.lcms_obj:
    -776            
    -777            pbar.set_description_str(desc=f"Started molecular formulae search for mass spectrum at RT {peak.retention_time} s" , refresh=True)
    -778
    -779            self.mass_spectrum_obj = peak.mass_spectrum
    -780
    -781            ion_charge = self.mass_spectrum_obj.polarity
    -782            
    -783            candidate_formulas = peak.targeted_molecular_formulas
    -784
    -785            for i in candidate_formulas:
    -786                if self.lcms_obj.parameters.lc_ms.verbose_processing:
    -787                    print(i)
    -788            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    -789
    -790                ion_type = Labels.protonated_de_ion
    -791                
    -792                #ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True)
    -793
    -794                self.search_mol_formulas( candidate_formulas, ion_type, find_isotopologues=True)
    -795
    -796            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    -797                
    -798                ion_type = Labels.radical_ion
    -799                
    -800                #ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True)
    -801                self.search_mol_formulas( candidate_formulas, ion_type, find_isotopologues=True)
    -802
    -803            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    -804                
    -805                ion_type = Labels.adduct_ion
    -806                
    -807                adduct_list = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg if ion_charge < 0 else self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
    -808
    -809                for adduct_atom in adduct_list:
    -810
    -811                    self.search_mol_formulas( candidate_formulas, ion_type, find_isotopologues=True, adduct_atom=adduct_atom)
    +            
    910    def run_target_worker_ms1(self):
    +911        """Run targeted molecular formula search on the ms1 mass spectrum."""
    +912        # do molecular formula based on the external molecular reference list
    +913        pbar = tqdm.tqdm(self.lcms_obj)
    +914
    +915        for peak in self.lcms_obj:
    +916            pbar.set_description_str(
    +917                desc=f"Started molecular formulae search for mass spectrum at RT {peak.retention_time} s",
    +918                refresh=True,
    +919            )
    +920
    +921            self.mass_spectrum_obj = peak.mass_spectrum
    +922
    +923            ion_charge = self.mass_spectrum_obj.polarity
    +924
    +925            candidate_formulas = peak.targeted_molecular_formulas
    +926
    +927            for i in candidate_formulas:
    +928                if self.lcms_obj.parameters.lc_ms.verbose_processing:
    +929                    print(i)
    +930            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    +931                ion_type = Labels.protonated_de_ion
    +932
    +933                # ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True)
    +934
    +935                self.search_mol_formulas(
    +936                    candidate_formulas, ion_type, find_isotopologues=True
    +937                )
    +938
    +939            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    +940                ion_type = Labels.radical_ion
    +941
    +942                # ms_peaks_assigned = self.search_mol_formulas(peak.targeted_molecular_formulas, ion_type, find_isotopologues=True)
    +943                self.search_mol_formulas(
    +944                    candidate_formulas, ion_type, find_isotopologues=True
    +945                )
    +946
    +947            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    +948                ion_type = Labels.adduct_ion
    +949
    +950                adduct_list = (
    +951                    self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
    +952                    if ion_charge < 0
    +953                    else self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
    +954                )
    +955
    +956                for adduct_atom in adduct_list:
    +957                    self.search_mol_formulas(
    +958                        candidate_formulas,
    +959                        ion_type,
    +960                        find_isotopologues=True,
    +961                        adduct_atom=adduct_atom,
    +962                    )
     
    diff --git a/docs/corems/molecular_id/search/priorityAssignment.html b/docs/corems/molecular_id/search/priorityAssignment.html index 91a0a959..0e9af5ab 100644 --- a/docs/corems/molecular_id/search/priorityAssignment.html +++ b/docs/corems/molecular_id/search/priorityAssignment.html @@ -90,661 +90,728 @@

    -
      1import os,  sys
    -  2sys.path.append('.')
    -  3from copy import deepcopy
    -  4from threading import Thread
    -  5from itertools import product
    -  6
    -  7import tqdm
    +                        
      1import os
    +  2import sys
    +  3
    +  4sys.path.append(".")
    +  5from copy import deepcopy
    +  6from threading import Thread
    +  7from itertools import product
       8
    -  9from corems.encapsulation.constant import Labels, Atoms
    - 10from corems.molecular_id.calc.MolecularFilter import MolecularFormulaSearchFilters
    - 11from corems.molecular_id.factory.MolecularLookupTable import MolecularCombinations
    - 12from corems.molecular_id.search.findOxygenPeaks import FindOxygenPeaks
    - 13from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulaWorker
    - 14from corems.molecular_id.factory.molecularSQL import MolForm_SQL
    - 15from corems.molecular_id.calc.ClusterFilter import ClusteringFilter
    - 16import json
    - 17
    - 18
    - 19class OxygenPriorityAssignment(Thread):
    - 20    """A class for assigning priority to oxygen classes in a molecular search.
    - 21
    - 22    Parameters
    - 23    ----------
    - 24    mass_spectrum_obj : MassSpectrum
    - 25        The mass spectrum object.
    - 26    sql_db : bool, optional
    - 27        Whether to use an SQL database. The default is False.
    - 28    
    - 29    Attributes
    - 30    ----------
    - 31    mass_spectrum_obj : MassSpectrum
    - 32        The mass spectrum object.
    - 33    sql_db : MolForm_SQL
    - 34        The SQL database object.
    - 35
    - 36    Methods
    - 37    -------
    - 38    * run().
    - 39        Run the priority assignment process.  
    - 40    * create_data_base().
    - 41        Create the molecular database for the specified heteroatomic classes.  
    - 42    * run_worker_mass_spectrum(assign_classes_order_tuples).
    - 43        Run the molecular formula search for each class in the specified order.  
    - 44    * get_dict_molecular_database(classe_str_list).
    - 45        Get the molecular database as a dictionary.  
    - 46    * ox_classes_and_peaks_in_order_().
    - 47        Get the oxygen classes and associated peaks in order.  
    - 48    * get_classes_in_order(dict_ox_class_and_ms_peak)  
    - 49        Get the classes in order.  
    - 50    """
    - 51
    - 52    def __init__(self, mass_spectrum_obj, sql_db=False):
    - 53        #TODO:- add support for other atoms and adducts: Done
    - 54        #        - add dbe range on search runtime : Done
    - 55        #        - add docs
    - 56        #        - improve performace : Done 
    - 57        
    - 58        Thread.__init__(self)
    - 59        self.mass_spectrum_obj = mass_spectrum_obj
    - 60        #  initiated at create_molecular_database()
    - 61        #self.dict_molecular_lookup_table = None
    - 62        
    - 63        if not sql_db:
    +  9import tqdm
    + 10
    + 11from corems.encapsulation.constant import Labels, Atoms
    + 12from corems.molecular_id.calc.MolecularFilter import MolecularFormulaSearchFilters
    + 13from corems.molecular_id.search.findOxygenPeaks import FindOxygenPeaks
    + 14from corems.molecular_id.search.molecularFormulaSearch import (
    + 15    SearchMolecularFormulaWorker,
    + 16)
    + 17from corems.molecular_id.factory.molecularSQL import MolForm_SQL
    + 18from corems.molecular_id.calc.ClusterFilter import ClusteringFilter
    + 19
    + 20
    + 21class OxygenPriorityAssignment(Thread):
    + 22    """A class for assigning priority to oxygen classes in a molecular search.
    + 23
    + 24    Parameters
    + 25    ----------
    + 26    mass_spectrum_obj : MassSpectrum
    + 27        The mass spectrum object.
    + 28    sql_db : bool, optional
    + 29        Whether to use an SQL database. The default is False.
    + 30
    + 31    Attributes
    + 32    ----------
    + 33    mass_spectrum_obj : MassSpectrum
    + 34        The mass spectrum object.
    + 35    sql_db : MolForm_SQL
    + 36        The SQL database object.
    + 37
    + 38    Methods
    + 39    -------
    + 40    * run().
    + 41        Run the priority assignment process.
    + 42    * create_data_base().
    + 43        Create the molecular database for the specified heteroatomic classes.
    + 44    * run_worker_mass_spectrum(assign_classes_order_tuples).
    + 45        Run the molecular formula search for each class in the specified order.
    + 46    * get_dict_molecular_database(classe_str_list).
    + 47        Get the molecular database as a dictionary.
    + 48    * ox_classes_and_peaks_in_order_().
    + 49        Get the oxygen classes and associated peaks in order.
    + 50    * get_classes_in_order(dict_ox_class_and_ms_peak)
    + 51        Get the classes in order.
    + 52    """
    + 53
    + 54    def __init__(self, mass_spectrum_obj, sql_db=False):
    + 55        # TODO:- add support for other atoms and adducts: Done
    + 56        #        - add dbe range on search runtime : Done
    + 57        #        - add docs
    + 58        #        - improve performace : Done
    + 59
    + 60        Thread.__init__(self)
    + 61        self.mass_spectrum_obj = mass_spectrum_obj
    + 62        #  initiated at create_molecular_database()
    + 63        # self.dict_molecular_lookup_table = None
      64
    - 65            self.sql_db = MolForm_SQL(url=mass_spectrum_obj.molecular_search_settings.url_database)
    - 66
    - 67        else:
    - 68
    - 69            self.sql_db = sql_db
    - 70
    - 71    def run(self):
    - 72        """Run the priority assignment process.
    - 73        """
    - 74        # get Oxygen classes dict and the associate mspeak class 
    - 75        # list_of_classes_min_max_dbe = self.class_and_dbes_in_order()
    - 76        # create database separated to give the user the chance to use mass spec filters
    - 77             
    - 78        assign_classes_order_str_dict_tuple_list = self.create_data_base()
    - 79        
    - 80        if assign_classes_order_str_dict_tuple_list:
    - 81
    + 65        if not sql_db:
    + 66            self.sql_db = MolForm_SQL(
    + 67                url=mass_spectrum_obj.molecular_search_settings.url_database
    + 68            )
    + 69
    + 70        else:
    + 71            self.sql_db = sql_db
    + 72
    + 73    def run(self):
    + 74        """Run the priority assignment process."""
    + 75        # get Oxygen classes dict and the associate mspeak class
    + 76        # list_of_classes_min_max_dbe = self.class_and_dbes_in_order()
    + 77        # create database separated to give the user the chance to use mass spec filters
    + 78
    + 79        assign_classes_order_str_dict_tuple_list = self.create_data_base()
    + 80
    + 81        if assign_classes_order_str_dict_tuple_list:
      82            self.run_worker_mass_spectrum(assign_classes_order_str_dict_tuple_list)
      83
      84        else:
    - 85
    - 86            raise RuntimeError('call create_data_base() first')
    - 87
    - 88        self.sql_db.close()   
    - 89
    - 90    def create_data_base(self):
    - 91        """Create the molecular database for the specified heteroatomic classes.
    - 92
    - 93        Returns
    - 94        -------
    - 95        assign_classes_order_str_dict_tuple_ : list
    - 96            A list of tuples containing the class names and dictionaries of class attributes.
    - 97        """
    + 85            raise RuntimeError("call create_data_base() first")
    + 86
    + 87        self.sql_db.close()
    + 88
    + 89    def create_data_base(self):
    + 90        """Create the molecular database for the specified heteroatomic classes.
    + 91
    + 92        Returns
    + 93        -------
    + 94        assign_classes_order_str_dict_tuple_ : list
    + 95            A list of tuples containing the class names and dictionaries of class attributes.
    + 96        """
    + 97
      98        def create_molecular_database():
    - 99            """ Checks and creates the database entries for the specified heteroatomic classes.
    -100            """
    -101            min_o = min(self.mass_spectrum_obj, key=lambda msp: msp[0]['O'])[0]['O'] - 2
    -102            
    -103            if min_o <= 0:
    -104                min_o = 1
    -105
    -106            max_o = max(self.mass_spectrum_obj, key=lambda msp: msp[0]['O'])[0]['O'] + 2
    -107
    -108            #min_dbe = min(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
    -109
    -110            #max_dbe = max(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
    -111
    -112            #self.lookupTableSettings.use_pah_line_rule = False
    -113            
    -114            #self.lookupTableSettings.min_dbe = min_dbe/2#min_dbe - 7 if  (min_dbe - 7) > 0 else 0
    -115            
    -116            #self.lookupTableSettings.max_dbe = max_dbe * 2 #max_dbe + 7
    -117
    -118            self.mass_spectrum_obj.reset_indexes()
    -119
    -120            self.mass_spectrum_obj.filter_by_noise_threshold()
    -121
    -122            #initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    -123
    -124            self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'] = (min_o, max_o)
    -125
    -126            self.nominal_mzs = self.mass_spectrum_obj.nominal_mz
    + 99            """Checks and creates the database entries for the specified heteroatomic classes."""
    +100            min_o = min(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] - 2
    +101
    +102            if min_o <= 0:
    +103                min_o = 1
    +104
    +105            max_o = max(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] + 2
    +106
    +107            # min_dbe = min(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
    +108
    +109            # max_dbe = max(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
    +110
    +111            # self.lookupTableSettings.use_pah_line_rule = False
    +112
    +113            # self.lookupTableSettings.min_dbe = min_dbe/2#min_dbe - 7 if  (min_dbe - 7) > 0 else 0
    +114
    +115            # self.lookupTableSettings.max_dbe = max_dbe * 2 #max_dbe + 7
    +116
    +117            self.mass_spectrum_obj.reset_indexes()
    +118
    +119            self.mass_spectrum_obj.filter_by_noise_threshold()
    +120
    +121            # initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    +122
    +123            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
    +124                min_o,
    +125                max_o,
    +126            )
     127
    -128
    -129        # get the most abundant peak and them every 14Da, only allow Ox and its derivatives
    -130        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -131            print("Getting Oxygen Series") 
    -132        find_formula_thread = FindOxygenPeaks(self.mass_spectrum_obj, self.sql_db)
    -133        find_formula_thread.run()
    -134        
    -135        #mass spec obj indexes are set to interate over only the peaks with a molecular formula candidate
    -136        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -137            print("Getting Oxygen Series")
    -138        find_formula_thread.set_mass_spec_indexes_by_found_peaks()
    -139        
    -140        #get the Ox class and the DBE for the lowest error molecular formula candidate
    -141        dict_ox_class_and_ms_peak = self.ox_classes_and_peaks_in_order_()
    -142                      
    -143        # sort the classes by abundance
    -144        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -145            print("Getting Oxygen Series Order")
    -146        assign_classes_order_str_dict_tuple_list = self.get_classes_in_order(dict_ox_class_and_ms_peak)
    -147        
    -148        create_molecular_database()
    -149                
    -150        return assign_classes_order_str_dict_tuple_list
    -151        
    -152    def run_worker_mass_spectrum(self, assign_classes_order_tuples):
    -153        """ Run the molecular formula search for each class in the specified order.
    +128            self.nominal_mzs = self.mass_spectrum_obj.nominal_mz
    +129
    +130        # get the most abundant peak and them every 14Da, only allow Ox and its derivatives
    +131        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +132            print("Getting Oxygen Series")
    +133        find_formula_thread = FindOxygenPeaks(self.mass_spectrum_obj, self.sql_db)
    +134        find_formula_thread.run()
    +135
    +136        # mass spec obj indexes are set to interate over only the peaks with a molecular formula candidate
    +137        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +138            print("Getting Oxygen Series")
    +139        find_formula_thread.set_mass_spec_indexes_by_found_peaks()
    +140
    +141        # get the Ox class and the DBE for the lowest error molecular formula candidate
    +142        dict_ox_class_and_ms_peak = self.ox_classes_and_peaks_in_order_()
    +143
    +144        # sort the classes by abundance
    +145        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    +146            print("Getting Oxygen Series Order")
    +147        assign_classes_order_str_dict_tuple_list = self.get_classes_in_order(
    +148            dict_ox_class_and_ms_peak
    +149        )
    +150
    +151        create_molecular_database()
    +152
    +153        return assign_classes_order_str_dict_tuple_list
     154
    -155        Parameters
    -156        ----------
    -157        assign_classes_order_tuples : list 
    -158            A list of tuples containing the class names and dictionaries of class attributes.
    -159        """
    -160
    -161        def check_adduct_class(classe_dict):
    -162            """ Check if the class contains any adduct atoms.
    +155    def run_worker_mass_spectrum(self, assign_classes_order_tuples):
    +156        """Run the molecular formula search for each class in the specified order.
    +157
    +158        Parameters
    +159        ----------
    +160        assign_classes_order_tuples : list
    +161            A list of tuples containing the class names and dictionaries of class attributes.
    +162        """
     163
    -164            Parameters
    -165            ----------
    -166            classe_dict : dict
    -167                The dictionary of class attributes.
    -168
    -169            Returns
    -170            -------
    -171            bool
    -172                True if the class contains adduct atoms, False otherwise.
    -173            """
    -174            return any([key in classe_dict.keys() for key in self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg])
    -175        
    -176        def set_min_max_dbe_by_oxygen(classe_dict):
    -177            """ Calculate the minimum and maximum DBE based on the number of oxygen atoms.
    -178
    -179            Parameters
    -180            ----------
    -181            classe_dict : dict 
    -182                The dictionary of class attributes.
    -183            """
    -184            # calculates min and max DBE based on the Oxygen number
    -185            # ref :https://pubs.acs.org/doi/full/10.1021/ac200464q
    -186            # if class does not has O it use the pha rule
    -187            # ref : Vlad Lobodin manuscript to be include here
    -188            
    -189            #atoms_exchanges = ['N']
    -190            #if 'O' in classe_dict.keys():
    -191            #    
    -192            #    Oxygen_number = classe_dict.get("O")
    -193            #    for atom in atoms_exchanges:
    -194            #        if atom in classe_dict.keys():
    -195            #            Oxygen_number += classe_dict.get(atom)
    -196            #
    -197            #    self.mass_spectrum_obj.molecular_search_settings.min_dbe = (Oxygen_number/3) - 0.5 
    -198            #    self.mass_spectrum_obj.molecular_search_settings.max_dbe = Oxygen_number*3 + 0.5 + 2
    +164        def check_adduct_class(classe_dict):
    +165            """Check if the class contains any adduct atoms.
    +166
    +167            Parameters
    +168            ----------
    +169            classe_dict : dict
    +170                The dictionary of class attributes.
    +171
    +172            Returns
    +173            -------
    +174            bool
    +175                True if the class contains adduct atoms, False otherwise.
    +176            """
    +177            return any(
    +178                [
    +179                    key in classe_dict.keys()
    +180                    for key in self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
    +181                ]
    +182            )
    +183
    +184        def set_min_max_dbe_by_oxygen(classe_dict):
    +185            """Calculate the minimum and maximum DBE based on the number of oxygen atoms.
    +186
    +187            Parameters
    +188            ----------
    +189            classe_dict : dict
    +190                The dictionary of class attributes.
    +191            """
    +192            # calculates min and max DBE based on the Oxygen number
    +193            # ref :https://pubs.acs.org/doi/full/10.1021/ac200464q
    +194            # if class does not has O it use the pha rule
    +195            # ref : Vlad Lobodin manuscript to be include here
    +196
    +197            # atoms_exchanges = ['N']
    +198            # if 'O' in classe_dict.keys():
     199            #
    -200            #else:
    -201                
    -202            self.mass_spectrum_obj.molecular_search_settings.use_pah_line_rule = True
    -203
    -204        def run_search(possible_formulas_dict, mass_spectrum_obj, min_abundance):
    -205            """ Run the molecular formula search for each mass spectrum peak.
    -206
    -207            Parameters
    -208            ----------
    -209            possible_formulas_dict : dict
    -210                A dictionary of possible molecular formulas.
    -211            mass_spectrum_obj : MassSpectrum
    -212                The mass spectrum object.
    -213            min_abundance : float
    -214                The minimum abundance threshold.
    -215
    -216            Returns
    -217            -------
    -218            list 
    -219                A list of assigned peak indexes.
    -220            """
    -221            all_assigned_indexes = list()
    -222            
    -223            for ms_peak in mass_spectrum_obj.sort_by_abundance():
    -224
    -225                if ms_peak: continue
    -226                #already assigned a molecular formula
    -227               
    -228                nominal_mz  = ms_peak.nominal_mz_exp
    -229
    -230                #get mono isotopic peaks that was added a molecular formula obj
    -231                #TODO update error variables
    -232
    -233                possible_formulas_nominal = possible_formulas_dict.get(nominal_mz)
    -234                
    -235                if possible_formulas_nominal:
    -236
    -237                    ms_peak_indexes = SearchMolecularFormulaWorker().find_formulas(possible_formulas_nominal, min_abundance, mass_spectrum_obj, ms_peak)    
    -238
    -239                    all_assigned_indexes.extend(ms_peak_indexes)
    -240            
    -241            
    -242            #filter peaks by percentile threshold of found isotopologues 
    -243            all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(all_assigned_indexes, mass_spectrum_obj)
    -244
    -245            #filter noise by kendrick density
    -246            all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(all_assigned_indexes, mass_spectrum_obj)
    -247
    -248            #filter per min peaks per mono isotopic class
    -249            # this function should always be the last filter, 
    -250            # thefore no need to return remaining indexes
    -251            MolecularFormulaSearchFilters().check_min_peaks(all_assigned_indexes, mass_spectrum_obj)
    +200            #    Oxygen_number = classe_dict.get("O")
    +201            #    for atom in atoms_exchanges:
    +202            #        if atom in classe_dict.keys():
    +203            #            Oxygen_number += classe_dict.get(atom)
    +204            #
    +205            #    self.mass_spectrum_obj.molecular_search_settings.min_dbe = (Oxygen_number/3) - 0.5
    +206            #    self.mass_spectrum_obj.molecular_search_settings.max_dbe = Oxygen_number*3 + 0.5 + 2
    +207            #
    +208            # else:
    +209
    +210            self.mass_spectrum_obj.molecular_search_settings.use_pah_line_rule = True
    +211
    +212        def run_search(possible_formulas_dict, mass_spectrum_obj, min_abundance):
    +213            """Run the molecular formula search for each mass spectrum peak.
    +214
    +215            Parameters
    +216            ----------
    +217            possible_formulas_dict : dict
    +218                A dictionary of possible molecular formulas.
    +219            mass_spectrum_obj : MassSpectrum
    +220                The mass spectrum object.
    +221            min_abundance : float
    +222                The minimum abundance threshold.
    +223
    +224            Returns
    +225            -------
    +226            list
    +227                A list of assigned peak indexes.
    +228            """
    +229            all_assigned_indexes = list()
    +230
    +231            for ms_peak in mass_spectrum_obj.sort_by_abundance():
    +232                if ms_peak:
    +233                    continue
    +234                # already assigned a molecular formula
    +235
    +236                nominal_mz = ms_peak.nominal_mz_exp
    +237
    +238                # get mono isotopic peaks that was added a molecular formula obj
    +239                # TODO update error variables
    +240
    +241                possible_formulas_nominal = possible_formulas_dict.get(nominal_mz)
    +242
    +243                if possible_formulas_nominal:
    +244                    ms_peak_indexes = SearchMolecularFormulaWorker().find_formulas(
    +245                        possible_formulas_nominal,
    +246                        min_abundance,
    +247                        mass_spectrum_obj,
    +248                        ms_peak,
    +249                    )
    +250
    +251                    all_assigned_indexes.extend(ms_peak_indexes)
     252
    -253        #error_average = self.mass_spectrum_obj.molecular_search_settings.mz_error_average
    -254        
    -255        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    -256        
    -257        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    -258
    -259        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    -260
    -261        min_abundance = self.mass_spectrum_obj.min_abundance
    +253            # filter peaks by percentile threshold of found isotopologues
    +254            all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(
    +255                all_assigned_indexes, mass_spectrum_obj
    +256            )
    +257
    +258            # filter noise by kendrick density
    +259            all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(
    +260                all_assigned_indexes, mass_spectrum_obj
    +261            )
     262
    -263        list_classes_str = [i[0] for i in assign_classes_order_tuples]
    -264
    -265        pbar = tqdm.tqdm(assign_classes_order_tuples)
    -266        
    -267        dict_molecular_lookup_table = self.get_dict_molecular_database(list_classes_str)
    -268
    -269        for classe_tuple in pbar:
    -270
    -271            classe_str  = classe_tuple[0]
    -272            classe_dict = classe_tuple[1]
    -273            
    -274            set_min_max_dbe_by_oxygen(classe_dict)
    -275            
    -276            #if len(classe_dict.keys()) == 2:
    -277            #    if classe_dict.get('S') == 1:
    -278            #       continue
    -279            # limits the dbe by the Ox class most abundant,
    -280            # need to add other atoms contribution to be more accurate
    -281            # but +-7 should be sufficient to cover the range 
    -282            
    -283            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    -284
    -285                    #tqdm.set_description_str(desc=None, refresh=True)
    -286                    pbar.set_description_str(desc="Started molecular formula search for class %s, (de)protonated " % classe_str, refresh=True)
    -287
    -288                    ion_type = Labels.protonated_de_ion
    +263            # filter per min peaks per mono isotopic class
    +264            # this function should always be the last filter,
    +265            # thefore no need to return remaining indexes
    +266            MolecularFormulaSearchFilters().check_min_peaks(
    +267                all_assigned_indexes, mass_spectrum_obj
    +268            )
    +269
    +270        # error_average = self.mass_spectrum_obj.molecular_search_settings.mz_error_average
    +271
    +272        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    +273
    +274        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    +275
    +276        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    +277
    +278        min_abundance = self.mass_spectrum_obj.min_abundance
    +279
    +280        list_classes_str = [i[0] for i in assign_classes_order_tuples]
    +281
    +282        pbar = tqdm.tqdm(assign_classes_order_tuples)
    +283
    +284        dict_molecular_lookup_table = self.get_dict_molecular_database(list_classes_str)
    +285
    +286        for classe_tuple in pbar:
    +287            classe_str = classe_tuple[0]
    +288            classe_dict = classe_tuple[1]
     289
    -290                    possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(classe_str)
    -291                    
    -292                    if possible_formulas_dict:
    -293
    -294                        run_search(possible_formulas_dict, self.mass_spectrum_obj, min_abundance)
    -295
    -296            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    -297
    -298                    #print("Started molecular formula search for class %s,  radical" % classe_str)
    -299                    pbar.set_description_str(desc="Started molecular formula search for class %s, radical" % classe_str, refresh=True)
    -300
    -301                    ion_type = Labels.radical_ion
    -302                    
    -303                    possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(classe_str)
    -304                    
    -305                    if possible_formulas_dict:
    +290            set_min_max_dbe_by_oxygen(classe_dict)
    +291
    +292            # if len(classe_dict.keys()) == 2:
    +293            #    if classe_dict.get('S') == 1:
    +294            #       continue
    +295            # limits the dbe by the Ox class most abundant,
    +296            # need to add other atoms contribution to be more accurate
    +297            # but +-7 should be sufficient to cover the range
    +298
    +299            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    +300                # tqdm.set_description_str(desc=None, refresh=True)
    +301                pbar.set_description_str(
    +302                    desc="Started molecular formula search for class %s, (de)protonated "
    +303                    % classe_str,
    +304                    refresh=True,
    +305                )
     306
    -307                        run_search(possible_formulas_dict, self.mass_spectrum_obj, min_abundance)
    +307                ion_type = Labels.protonated_de_ion
     308
    -309            # looks for adduct, used_atom_valences should be 0 
    -310            # this code does not support H exchance by halogen atoms
    -311            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    -312                
    -313                pbar.set_description_str(desc="Started molecular formula search for class %s, adduct" % classe_str, refresh=True)
    -314                #print("Started molecular formula search for class %s, adduct" % classe_str)
    -315                
    -316                ion_type = Labels.radical_ion
    -317                
    -318                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(classe_str)
    -319
    -320                ''' commenting  unfinished code for release 2.0, see end of file for details'''
    -321                # possible_formulas_adduct =self.add_adducts(possible_formulas_dict)
    -322
    -323                # if possible_formulas_adduct:
    -324
    -325                run_search(possible_formulas_dict, self.mass_spectrum_obj, min_abundance)
    -326        
    -327        
    -328    def get_dict_molecular_database(self, classe_str_list):
    -329        """ Get the molecular database as a dictionary.
    -330
    -331        Parameters
    -332        ----------
    -333        classe_str_list : list  
    -334            A list of class names.
    -335
    -336        Returns
    -337        -------
    -338        dict
    -339            A dictionary containing the molecular database.
    -340        """
    -341        nominal_mzs = self.nominal_mzs
    -342        mf_search_settings = self.mass_spectrum_obj.molecular_search_settings
    -343        ion_charge = self.mass_spectrum_obj.polarity
    -344
    -345        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    -346        
    -347        dict_res = {}
    +309                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
    +310                    classe_str
    +311                )
    +312
    +313                if possible_formulas_dict:
    +314                    run_search(
    +315                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
    +316                    )
    +317
    +318            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    +319                # print("Started molecular formula search for class %s,  radical" % classe_str)
    +320                pbar.set_description_str(
    +321                    desc="Started molecular formula search for class %s, radical"
    +322                    % classe_str,
    +323                    refresh=True,
    +324                )
    +325
    +326                ion_type = Labels.radical_ion
    +327
    +328                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
    +329                    classe_str
    +330                )
    +331
    +332                if possible_formulas_dict:
    +333                    run_search(
    +334                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
    +335                    )
    +336
    +337            # looks for adduct, used_atom_valences should be 0
    +338            # this code does not support H exchance by halogen atoms
    +339            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    +340                pbar.set_description_str(
    +341                    desc="Started molecular formula search for class %s, adduct"
    +342                    % classe_str,
    +343                    refresh=True,
    +344                )
    +345                # print("Started molecular formula search for class %s, adduct" % classe_str)
    +346
    +347                ion_type = Labels.radical_ion
     348
    -349        if mf_search_settings.isProtonated:
    -350            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.protonated_de_ion, nominal_mzs, ion_charge, mf_search_settings)    
    -351            
    -352        if mf_search_settings.isRadical:
    -353            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.radical_ion, nominal_mzs, ion_charge, mf_search_settings)    
    -354
    -355        if mf_search_settings.isAdduct:
    -356            
    -357            adduct_list = mf_search_settings.adduct_atoms_neg if ion_charge < 0 else mf_search_settings.adduct_atoms_pos
    -358            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.adduct_ion, nominal_mzs, ion_charge, mf_search_settings, adducts=adduct_list)    
    -359
    -360        return dict_res
    +349                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
    +350                    classe_str
    +351                )
    +352
    +353                """ commenting  unfinished code for release 2.0, see end of file for details"""
    +354                # possible_formulas_adduct =self.add_adducts(possible_formulas_dict)
    +355
    +356                # if possible_formulas_adduct:
    +357
    +358                run_search(
    +359                    possible_formulas_dict, self.mass_spectrum_obj, min_abundance
    +360                )
     361
    -362    def ox_classes_and_peaks_in_order_(self) -> dict:
    -363        """ Get the oxygen classes and associated peaks in order.
    +362    def get_dict_molecular_database(self, classe_str_list):
    +363        """Get the molecular database as a dictionary.
     364
    -365        Returns
    -366        -------
    -367        dict 
    -368            A dictionary containing the oxygen classes and associated peaks.
    -369        """
    -370        # order is only valid in python 3.4 and above
    -371        # change to OrderedDict if your version is lower
    -372        dict_ox_class_and_ms_peak = dict()
    -373        
    -374        for mspeak in self.mass_spectrum_obj.sort_by_abundance(reverse=True):
    -375            
    -376            #change this filter to cia filter, give more option here, confidence, number of isotopologue found etc
    -377
    -378            ox_classe = mspeak.best_molecular_formula_candidate.class_label
    -379            
    -380            if ox_classe in dict_ox_class_and_ms_peak.keys():
    -381                
    -382                #get the most abundant of the same ox class
    -383                if mspeak.abundance > dict_ox_class_and_ms_peak[ox_classe].abundance:
    -384
    -385                    dict_ox_class_and_ms_peak[ox_classe] = (mspeak)
    -386            else:
    -387                    
    -388                dict_ox_class_and_ms_peak[ox_classe] = (mspeak)
    -389        
    -390        return dict_ox_class_and_ms_peak
    +365        Parameters
    +366        ----------
    +367        classe_str_list : list
    +368            A list of class names.
    +369
    +370        Returns
    +371        -------
    +372        dict
    +373            A dictionary containing the molecular database.
    +374        """
    +375        nominal_mzs = self.nominal_mzs
    +376        mf_search_settings = self.mass_spectrum_obj.molecular_search_settings
    +377        ion_charge = self.mass_spectrum_obj.polarity
    +378
    +379        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    +380
    +381        dict_res = {}
    +382
    +383        if mf_search_settings.isProtonated:
    +384            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(
    +385                classe_str_list,
    +386                Labels.protonated_de_ion,
    +387                nominal_mzs,
    +388                ion_charge,
    +389                mf_search_settings,
    +390            )
     391
    -392    def get_classes_in_order(self, dict_ox_class_and_ms_peak)-> [(str, dict)]: 
    -393        """ Get the classes in order.
    -394        
    -395        Parameters
    -396        ----------
    -397        dict_ox_class_and_ms_peak : dict
    -398            A dictionary containing the oxygen classes and associated peaks.
    -399        
    -400        Returns
    -401        -------
    -402        list 
    -403            A list of tuples containing the class names and dictionaries of class attributes.
    -404
    -405        Notes
    -406        -----
    -407        structure is 
    -408            ('HC', {'HC': 1})
    -409        """
    -410        
    -411        
    -412        usedAtoms = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    -413        
    -414        usedAtoms.pop("C")
    -415        usedAtoms.pop("H")
    -416        usedAtoms.pop("O")
    +392        if mf_search_settings.isRadical:
    +393            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(
    +394                classe_str_list,
    +395                Labels.radical_ion,
    +396                nominal_mzs,
    +397                ion_charge,
    +398                mf_search_settings,
    +399            )
    +400
    +401        if mf_search_settings.isAdduct:
    +402            adduct_list = (
    +403                mf_search_settings.adduct_atoms_neg
    +404                if ion_charge < 0
    +405                else mf_search_settings.adduct_atoms_pos
    +406            )
    +407            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(
    +408                classe_str_list,
    +409                Labels.adduct_ion,
    +410                nominal_mzs,
    +411                ion_charge,
    +412                mf_search_settings,
    +413                adducts=adduct_list,
    +414            )
    +415
    +416        return dict_res
     417
    -418        min_n, max_n = usedAtoms.get('N') if usedAtoms.get('N') else (0,0)
    -419        min_s, max_s = usedAtoms.get('S') if usedAtoms.get('S') else (0,0)
    -420        min_p, max_p = usedAtoms.get('P') if usedAtoms.get('P') else (0,0)
    -421
    -422        possible_n = [n for n in range(min_n, max_n + 1)]
    -423        possible_s = [s for s in range(min_s, max_s + 1)]
    -424        possible_p = [p for p in range(min_p, max_p + 1)]
    -425        
    -426        #used to enforce order for commum atoms 
    -427        # and track the atom index in on the tuple in all_atoms_tuples
    -428        atoms_in_order = ['N', 'S', 'P']
    -429        
    -430        #do number atoms prodcut and remove then from the usedAtoms dict
    -431        all_atoms_tuples = product(possible_n, possible_s, possible_p)
    -432        for atom in atoms_in_order:
    -433            
    -434            usedAtoms.pop(atom, None)
    -435        
    -436        #iterate over other atoms besides C,H, N, O, S and P
    -437        
    -438        for selected_atom_label, min_max_tuple in usedAtoms.items():
    -439            
    -440            min_x = min_max_tuple[0]
    -441            max_x = min_max_tuple[1]
    -442
    -443            possible_x = [x for x in range(min_x, max_x + 1)]
    -444            all_atoms_tuples = product(all_atoms_tuples, possible_x)
    -445            
    -446            #merge tuples
    -447            all_atoms_tuples = [all_atoms_combined[0] + (all_atoms_combined[1],) for all_atoms_combined in
    -448                                all_atoms_tuples]
    -449            
    -450            #add atom label to the atoms_in_order list
    -451            
    -452            #important to index where the atom position is in on the tuple in all_atoms_tuples
    -453            atoms_in_order.append(selected_atom_label)
    -454
    -455        classes_strings_dict_tuples, hc_class = self.get_class_strings_dict(all_atoms_tuples, atoms_in_order)
    +418    def ox_classes_and_peaks_in_order_(self) -> dict:
    +419        """Get the oxygen classes and associated peaks in order.
    +420
    +421        Returns
    +422        -------
    +423        dict
    +424            A dictionary containing the oxygen classes and associated peaks.
    +425        """
    +426        # order is only valid in python 3.4 and above
    +427        # change to OrderedDict if your version is lower
    +428        dict_ox_class_and_ms_peak = dict()
    +429
    +430        for mspeak in self.mass_spectrum_obj.sort_by_abundance(reverse=True):
    +431            # change this filter to cia filter, give more option here, confidence, number of isotopologue found etc
    +432
    +433            ox_classe = mspeak.best_molecular_formula_candidate.class_label
    +434
    +435            if ox_classe in dict_ox_class_and_ms_peak.keys():
    +436                # get the most abundant of the same ox class
    +437                if mspeak.abundance > dict_ox_class_and_ms_peak[ox_classe].abundance:
    +438                    dict_ox_class_and_ms_peak[ox_classe] = mspeak
    +439            else:
    +440                dict_ox_class_and_ms_peak[ox_classe] = mspeak
    +441
    +442        return dict_ox_class_and_ms_peak
    +443
    +444    def get_classes_in_order(self, dict_ox_class_and_ms_peak) -> [(str, dict)]:
    +445        """Get the classes in order.
    +446
    +447        Parameters
    +448        ----------
    +449        dict_ox_class_and_ms_peak : dict
    +450            A dictionary containing the oxygen classes and associated peaks.
    +451
    +452        Returns
    +453        -------
    +454        list
    +455            A list of tuples containing the class names and dictionaries of class attributes.
     456
    -457        combined_classes = self.combine_ox_class_with_other(atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak)
    -458        
    -459        combination_classes_ordered = self.sort_classes(atoms_in_order, combined_classes)
    -460        
    -461        oxygen_class_str_dict_tuple = [(ox_class, mspeak[0].class_dict) for ox_class, mspeak in dict_ox_class_and_ms_peak.items()] 
    +457        Notes
    +458        -----
    +459        structure is
    +460            ('HC', {'HC': 1})
    +461        """
     462
    -463        ## add classes together and ignores classes selected from the main series
    -464        for class_tuple in  combination_classes_ordered:
    -465            if class_tuple not in oxygen_class_str_dict_tuple:
    -466                oxygen_class_str_dict_tuple.append(class_tuple)
    -467        
    -468        return oxygen_class_str_dict_tuple
    -469
    -470    @staticmethod
    -471    def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(str, dict)]: 
    -472        """ Get the class strings and dictionaries.
    -473        
    -474        Parameters
    -475        ----------
    -476        all_atoms_tuples : tuple
    -477            A tuple containing the atoms.
    -478        atoms_in_order : list
    -479            A list of atoms in order.
    -480        
    -481        Returns
    -482        --------
    -483        list 
    -484            A list of tuples containing the class strings and dictionaries.
    -485        
    -486        """
    -487        classe_list= []
    -488        hc_class = []
    -489        
    -490        for all_atoms_tuple in all_atoms_tuples:
    -491            
    -492            classe_str = ''
    -493            classe_dict = dict()
    -494            
    -495            for each_atoms_index, atoms_number in enumerate(all_atoms_tuple):
    -496                
    -497                if atoms_number != 0:
    -498                    
    -499                    classe_str = (classe_str + atoms_in_order[each_atoms_index] + str(atoms_number) + ' ')
    -500                    
    -501                    classe_dict[atoms_in_order[each_atoms_index]] = atoms_number
    +463        usedAtoms = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    +464
    +465        usedAtoms.pop("C")
    +466        usedAtoms.pop("H")
    +467        usedAtoms.pop("O")
    +468
    +469        min_n, max_n = usedAtoms.get("N") if usedAtoms.get("N") else (0, 0)
    +470        min_s, max_s = usedAtoms.get("S") if usedAtoms.get("S") else (0, 0)
    +471        min_p, max_p = usedAtoms.get("P") if usedAtoms.get("P") else (0, 0)
    +472
    +473        possible_n = [n for n in range(min_n, max_n + 1)]
    +474        possible_s = [s for s in range(min_s, max_s + 1)]
    +475        possible_p = [p for p in range(min_p, max_p + 1)]
    +476
    +477        # used to enforce order for commum atoms
    +478        # and track the atom index in on the tuple in all_atoms_tuples
    +479        atoms_in_order = ["N", "S", "P"]
    +480
    +481        # do number atoms prodcut and remove then from the usedAtoms dict
    +482        all_atoms_tuples = product(possible_n, possible_s, possible_p)
    +483        for atom in atoms_in_order:
    +484            usedAtoms.pop(atom, None)
    +485
    +486        # iterate over other atoms besides C,H, N, O, S and P
    +487
    +488        for selected_atom_label, min_max_tuple in usedAtoms.items():
    +489            min_x = min_max_tuple[0]
    +490            max_x = min_max_tuple[1]
    +491
    +492            possible_x = [x for x in range(min_x, max_x + 1)]
    +493            all_atoms_tuples = product(all_atoms_tuples, possible_x)
    +494
    +495            # merge tuples
    +496            all_atoms_tuples = [
    +497                all_atoms_combined[0] + (all_atoms_combined[1],)
    +498                for all_atoms_combined in all_atoms_tuples
    +499            ]
    +500
    +501            # add atom label to the atoms_in_order list
     502
    -503            classe_str = classe_str.strip()
    -504            
    -505            if len(classe_str) > 0:
    -506            
    -507                classe_list.append((classe_str,classe_dict))
    -508
    -509            elif len(classe_str) == 0:
    -510
    -511                hc_class.append(('HC', {'HC':1}))
    -512        
    -513        return classe_list, hc_class
    -514    
    -515    @staticmethod
    -516    def combine_ox_class_with_other( atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak) -> [dict]:
    -517        """ Combine the oxygen classes with other classes.
    -518        
    -519        Parameters
    -520        ----------
    -521        atoms_in_order : list
    -522            A list of atoms in order.
    -523        classes_strings_dict_tuples : list
    -524            
    -525        dict_ox_class_and_ms_peak : dict
    -526            A dictionary containing the oxygen classes and associated peaks.
    -527        
    -528        Returns
    -529        -------
    -530        list 
    -531            A list of dictionaries.
    -532        """
    -533        #sort methods that uses the key of classes dictionary and the atoms_in_order as reference
    -534        # c_tuple[1] = class_dict, because is one key:value map we loop through keys and get the first item only 
    -535        # sort by len first then sort based on the atoms_in_order list
    -536        atoms_in_order = Atoms.atoms_order
    -537
    -538        Oxygen_mfs = dict_ox_class_and_ms_peak.values()
    -539        
    -540        
    -541        #sort_method = lambda word: (len(word[0]), [atoms_in_order.index(atom) for atom in list( word[1].keys())])
    -542        
    -543        #print(classes_strings_dict_tuples)
    -544        #classe_in_order = sorted(classes_strings_dict_tuples, key = sort_method)
    -545        #print(classe_in_order)
    -546        
    -547        combination = []
    -548        
    -549        # _ ignoring the class_str
    -550        for _ , other_classe_dict in classes_strings_dict_tuples:
    -551          
    -552           #combination.extend([[other_classe_str + ' ' + Oxygen_mf[0].class_label , {**other_classe_dict, **Oxygen_mf[0].class_dict}] for Oxygen_mf in Oxygen_mfs])
    -553           combination.extend([{**other_classe_dict, **Oxygen_mf[0].class_dict} for Oxygen_mf in Oxygen_mfs])
    -554 
    -555        return combination
    -556    
    -557    @staticmethod
    -558    def sort_classes( atoms_in_order, combination_tuples) -> [(str, dict)]: 
    -559        """ Sort the classes.
    -560        
    -561        Parameters
    -562        ----------
    -563        atoms_in_order : list
    -564            A list of atoms in order.
    -565        combination_tuples : list
    -566            
    -567        Returns
    -568        -------
    -569        list 
    -570            A list of tuples containing the class strings and dictionaries.
    -571        """
    -572        join_list_of_list_classes = list()
    -573        atoms_in_order =  ['N','S','P','O'] + atoms_in_order[3:]
    -574        
    -575        sort_method = lambda atoms_keys: [atoms_in_order.index(atoms_keys)] #(len(word[0]), print(word[1]))#[atoms_in_order.index(atom) for atom in list( word[1].keys())])
    -576        for class_dict in combination_tuples:
    -577            
    -578            sorted_dict_keys = sorted(class_dict, key = sort_method)
    -579            class_str = ' '.join([atom + str(class_dict[atom]) for atom in sorted_dict_keys])
    -580            new_class_dict = { atom: class_dict[atom] for atom in sorted_dict_keys}
    -581            join_list_of_list_classes.append((class_str, new_class_dict))
    -582        
    -583        return join_list_of_list_classes
    -584 
    -585    
    -586    '''
    -587    The code bellow is unfinished, might be added to next release, 2.1
    -588    def add_adducts(self, possible_formulas):
    -589        """ Add adducts to the molecular formula candidates.
    -590
    -591        Parameters
    -592        ----------
    -593        possible_formulas : dict
    -594            A dictionary of possible molecular formulas.
    -595        
    -596        Returns
    -597        -------
    -598        dict 
    -599            A dictionary of possible molecular formulas with adducts.
    -600        
    -601        """
    -602        ion_type = Labels.adduct_ion
    +503            # important to index where the atom position is in on the tuple in all_atoms_tuples
    +504            atoms_in_order.append(selected_atom_label)
    +505
    +506        classes_strings_dict_tuples, hc_class = self.get_class_strings_dict(
    +507            all_atoms_tuples, atoms_in_order
    +508        )
    +509
    +510        combined_classes = self.combine_ox_class_with_other(
    +511            atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
    +512        )
    +513
    +514        combination_classes_ordered = self.sort_classes(
    +515            atoms_in_order, combined_classes
    +516        )
    +517
    +518        oxygen_class_str_dict_tuple = [
    +519            (ox_class, mspeak[0].class_dict)
    +520            for ox_class, mspeak in dict_ox_class_and_ms_peak.items()
    +521        ]
    +522
    +523        ## add classes together and ignores classes selected from the main series
    +524        for class_tuple in combination_classes_ordered:
    +525            if class_tuple not in oxygen_class_str_dict_tuple:
    +526                oxygen_class_str_dict_tuple.append(class_tuple)
    +527
    +528        return oxygen_class_str_dict_tuple
    +529
    +530    @staticmethod
    +531    def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(str, dict)]:
    +532        """Get the class strings and dictionaries.
    +533
    +534        Parameters
    +535        ----------
    +536        all_atoms_tuples : tuple
    +537            A tuple containing the atoms.
    +538        atoms_in_order : list
    +539            A list of atoms in order.
    +540
    +541        Returns
    +542        --------
    +543        list
    +544            A list of tuples containing the class strings and dictionaries.
    +545
    +546        """
    +547        classe_list = []
    +548        hc_class = []
    +549
    +550        for all_atoms_tuple in all_atoms_tuples:
    +551            classe_str = ""
    +552            classe_dict = dict()
    +553
    +554            for each_atoms_index, atoms_number in enumerate(all_atoms_tuple):
    +555                if atoms_number != 0:
    +556                    classe_str = (
    +557                        classe_str
    +558                        + atoms_in_order[each_atoms_index]
    +559                        + str(atoms_number)
    +560                        + " "
    +561                    )
    +562
    +563                    classe_dict[atoms_in_order[each_atoms_index]] = atoms_number
    +564
    +565            classe_str = classe_str.strip()
    +566
    +567            if len(classe_str) > 0:
    +568                classe_list.append((classe_str, classe_dict))
    +569
    +570            elif len(classe_str) == 0:
    +571                hc_class.append(("HC", {"HC": 1}))
    +572
    +573        return classe_list, hc_class
    +574
    +575    @staticmethod
    +576    def combine_ox_class_with_other(
    +577        atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
    +578    ) -> [dict]:
    +579        """Combine the oxygen classes with other classes.
    +580
    +581        Parameters
    +582        ----------
    +583        atoms_in_order : list
    +584            A list of atoms in order.
    +585        classes_strings_dict_tuples : list
    +586
    +587        dict_ox_class_and_ms_peak : dict
    +588            A dictionary containing the oxygen classes and associated peaks.
    +589
    +590        Returns
    +591        -------
    +592        list
    +593            A list of dictionaries.
    +594        """
    +595        # sort methods that uses the key of classes dictionary and the atoms_in_order as reference
    +596        # c_tuple[1] = class_dict, because is one key:value map we loop through keys and get the first item only
    +597        # sort by len first then sort based on the atoms_in_order list
    +598        atoms_in_order = Atoms.atoms_order
    +599
    +600        Oxygen_mfs = dict_ox_class_and_ms_peak.values()
    +601
    +602        # sort_method = lambda word: (len(word[0]), [atoms_in_order.index(atom) for atom in list( word[1].keys())])
     603
    -604        if self.mass_spectrum_obj.polarity < 0:
    -605            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
    -606            molform_model = MolecularFormulaDict
    -607        else:
    -608            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
    -609            molform_model = MolecularFormulaTablePos
    -610
    -611        new_dict = {}
    -612        
    -613        for nominal_mz, list_formulas in possible_formulas.items():
    -614            
    -615            for adduct_atom in adduct_atoms:
    -616                
    -617                adduct_atom_mass= Atoms.atomic_masses.get(adduct_atom) 
    -618
    -619                for molecularFormulaTable in  list_formulas:
    -620                    
    -621                    formula_dict = json.loads(molecularFormulaTable.mol_formula)
    -622                    
    -623                    if adduct_atom in formula_dict.keys():
    -624                        formula_dict[adduct_atom] += 1  
    -625                    else:
    -626                        formula_dict[adduct_atom] = 1      
    -627                    
    -628                    mz = adduct_atom_mass + molecularFormulaTable.mz
    -629                    nm = int(mz)
    -630                    
    -631                    new_formul_obj = molform_model( **{"mol_formula" : json.dumps(formula_dict),
    -632                                            "mz" : mz,
    -633                                            "ion_type" : ion_type,
    -634                                            "nominal_mz" : nm,
    -635                                            "ion_charge" : molecularFormulaTable.ion_charge,
    -636                                            "classe" : molecularFormulaTable.classe,
    -637                                            "C" : molecularFormulaTable.C,
    -638                                            "H" : molecularFormulaTable.H,
    -639                                            "N" : molecularFormulaTable.N,
    -640                                            "O" : molecularFormulaTable.O,
    -641                                            "S" : molecularFormulaTable.S,
    -642                                            "P" : molecularFormulaTable.P,
    -643                                            "H_C" : molecularFormulaTable.H_C,
    -644                                            "O_C" : molecularFormulaTable.O_C,
    -645                                            "DBE" : molecularFormulaTable.DBE,
    -646                                            })
    -647                    if nm in new_dict:
    -648                        new_dict[nm].append(new_formul_obj)
    -649                    
    -650                    else:
    -651                        new_dict[nm]= [new_formul_obj]
    -652                    
    -653        return new_dict
    -654
    -655    '''
    +604        # print(classes_strings_dict_tuples)
    +605        # classe_in_order = sorted(classes_strings_dict_tuples, key = sort_method)
    +606        # print(classe_in_order)
    +607
    +608        combination = []
    +609
    +610        # _ ignoring the class_str
    +611        for _, other_classe_dict in classes_strings_dict_tuples:
    +612            # combination.extend([[other_classe_str + ' ' + Oxygen_mf[0].class_label , {**other_classe_dict, **Oxygen_mf[0].class_dict}] for Oxygen_mf in Oxygen_mfs])
    +613            combination.extend(
    +614                [
    +615                    {**other_classe_dict, **Oxygen_mf[0].class_dict}
    +616                    for Oxygen_mf in Oxygen_mfs
    +617                ]
    +618            )
    +619
    +620        return combination
    +621
    +622    @staticmethod
    +623    def sort_classes(atoms_in_order, combination_tuples) -> [(str, dict)]:
    +624        """Sort the classes.
    +625
    +626        Parameters
    +627        ----------
    +628        atoms_in_order : list
    +629            A list of atoms in order.
    +630        combination_tuples : list
    +631
    +632        Returns
    +633        -------
    +634        list
    +635            A list of tuples containing the class strings and dictionaries.
    +636        """
    +637        join_list_of_list_classes = list()
    +638        atoms_in_order = ["N", "S", "P", "O"] + atoms_in_order[3:]
    +639
    +640        sort_method = (
    +641            lambda atoms_keys: [atoms_in_order.index(atoms_keys)]
    +642        )  # (len(word[0]), print(word[1]))#[atoms_in_order.index(atom) for atom in list( word[1].keys())])
    +643        for class_dict in combination_tuples:
    +644            sorted_dict_keys = sorted(class_dict, key=sort_method)
    +645            class_str = " ".join(
    +646                [atom + str(class_dict[atom]) for atom in sorted_dict_keys]
    +647            )
    +648            new_class_dict = {atom: class_dict[atom] for atom in sorted_dict_keys}
    +649            join_list_of_list_classes.append((class_str, new_class_dict))
    +650
    +651        return join_list_of_list_classes
    +652
    +653    '''
    +654    The code bellow is unfinished, might be added to next release, 2.1
    +655    def add_adducts(self, possible_formulas):
    +656        """ Add adducts to the molecular formula candidates.
    +657
    +658        Parameters
    +659        ----------
    +660        possible_formulas : dict
    +661            A dictionary of possible molecular formulas.
    +662        
    +663        Returns
    +664        -------
    +665        dict 
    +666            A dictionary of possible molecular formulas with adducts.
    +667        
    +668        """
    +669        ion_type = Labels.adduct_ion
    +670
    +671        if self.mass_spectrum_obj.polarity < 0:
    +672            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
    +673            molform_model = MolecularFormulaDict
    +674        else:
    +675            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
    +676            molform_model = MolecularFormulaTablePos
    +677
    +678        new_dict = {}
    +679        
    +680        for nominal_mz, list_formulas in possible_formulas.items():
    +681            
    +682            for adduct_atom in adduct_atoms:
    +683                
    +684                adduct_atom_mass= Atoms.atomic_masses.get(adduct_atom) 
    +685
    +686                for molecularFormulaTable in  list_formulas:
    +687                    
    +688                    formula_dict = json.loads(molecularFormulaTable.mol_formula)
    +689                    
    +690                    if adduct_atom in formula_dict.keys():
    +691                        formula_dict[adduct_atom] += 1  
    +692                    else:
    +693                        formula_dict[adduct_atom] = 1      
    +694                    
    +695                    mz = adduct_atom_mass + molecularFormulaTable.mz
    +696                    nm = int(mz)
    +697                    
    +698                    new_formul_obj = molform_model( **{"mol_formula" : json.dumps(formula_dict),
    +699                                            "mz" : mz,
    +700                                            "ion_type" : ion_type,
    +701                                            "nominal_mz" : nm,
    +702                                            "ion_charge" : molecularFormulaTable.ion_charge,
    +703                                            "classe" : molecularFormulaTable.classe,
    +704                                            "C" : molecularFormulaTable.C,
    +705                                            "H" : molecularFormulaTable.H,
    +706                                            "N" : molecularFormulaTable.N,
    +707                                            "O" : molecularFormulaTable.O,
    +708                                            "S" : molecularFormulaTable.S,
    +709                                            "P" : molecularFormulaTable.P,
    +710                                            "H_C" : molecularFormulaTable.H_C,
    +711                                            "O_C" : molecularFormulaTable.O_C,
    +712                                            "DBE" : molecularFormulaTable.DBE,
    +713                                            })
    +714                    if nm in new_dict:
    +715                        new_dict[nm].append(new_formul_obj)
    +716                    
    +717                    else:
    +718                        new_dict[nm]= [new_formul_obj]
    +719                    
    +720        return new_dict
    +721
    +722    '''
     
    @@ -760,643 +827,708 @@

    -
     21class OxygenPriorityAssignment(Thread):
    - 22    """A class for assigning priority to oxygen classes in a molecular search.
    - 23
    - 24    Parameters
    - 25    ----------
    - 26    mass_spectrum_obj : MassSpectrum
    - 27        The mass spectrum object.
    - 28    sql_db : bool, optional
    - 29        Whether to use an SQL database. The default is False.
    - 30    
    - 31    Attributes
    - 32    ----------
    - 33    mass_spectrum_obj : MassSpectrum
    - 34        The mass spectrum object.
    - 35    sql_db : MolForm_SQL
    - 36        The SQL database object.
    - 37
    - 38    Methods
    - 39    -------
    - 40    * run().
    - 41        Run the priority assignment process.  
    - 42    * create_data_base().
    - 43        Create the molecular database for the specified heteroatomic classes.  
    - 44    * run_worker_mass_spectrum(assign_classes_order_tuples).
    - 45        Run the molecular formula search for each class in the specified order.  
    - 46    * get_dict_molecular_database(classe_str_list).
    - 47        Get the molecular database as a dictionary.  
    - 48    * ox_classes_and_peaks_in_order_().
    - 49        Get the oxygen classes and associated peaks in order.  
    - 50    * get_classes_in_order(dict_ox_class_and_ms_peak)  
    - 51        Get the classes in order.  
    - 52    """
    - 53
    - 54    def __init__(self, mass_spectrum_obj, sql_db=False):
    - 55        #TODO:- add support for other atoms and adducts: Done
    - 56        #        - add dbe range on search runtime : Done
    - 57        #        - add docs
    - 58        #        - improve performace : Done 
    - 59        
    - 60        Thread.__init__(self)
    - 61        self.mass_spectrum_obj = mass_spectrum_obj
    - 62        #  initiated at create_molecular_database()
    - 63        #self.dict_molecular_lookup_table = None
    - 64        
    - 65        if not sql_db:
    - 66
    - 67            self.sql_db = MolForm_SQL(url=mass_spectrum_obj.molecular_search_settings.url_database)
    - 68
    - 69        else:
    +            
     22class OxygenPriorityAssignment(Thread):
    + 23    """A class for assigning priority to oxygen classes in a molecular search.
    + 24
    + 25    Parameters
    + 26    ----------
    + 27    mass_spectrum_obj : MassSpectrum
    + 28        The mass spectrum object.
    + 29    sql_db : bool, optional
    + 30        Whether to use an SQL database. The default is False.
    + 31
    + 32    Attributes
    + 33    ----------
    + 34    mass_spectrum_obj : MassSpectrum
    + 35        The mass spectrum object.
    + 36    sql_db : MolForm_SQL
    + 37        The SQL database object.
    + 38
    + 39    Methods
    + 40    -------
    + 41    * run().
    + 42        Run the priority assignment process.
    + 43    * create_data_base().
    + 44        Create the molecular database for the specified heteroatomic classes.
    + 45    * run_worker_mass_spectrum(assign_classes_order_tuples).
    + 46        Run the molecular formula search for each class in the specified order.
    + 47    * get_dict_molecular_database(classe_str_list).
    + 48        Get the molecular database as a dictionary.
    + 49    * ox_classes_and_peaks_in_order_().
    + 50        Get the oxygen classes and associated peaks in order.
    + 51    * get_classes_in_order(dict_ox_class_and_ms_peak)
    + 52        Get the classes in order.
    + 53    """
    + 54
    + 55    def __init__(self, mass_spectrum_obj, sql_db=False):
    + 56        # TODO:- add support for other atoms and adducts: Done
    + 57        #        - add dbe range on search runtime : Done
    + 58        #        - add docs
    + 59        #        - improve performace : Done
    + 60
    + 61        Thread.__init__(self)
    + 62        self.mass_spectrum_obj = mass_spectrum_obj
    + 63        #  initiated at create_molecular_database()
    + 64        # self.dict_molecular_lookup_table = None
    + 65
    + 66        if not sql_db:
    + 67            self.sql_db = MolForm_SQL(
    + 68                url=mass_spectrum_obj.molecular_search_settings.url_database
    + 69            )
      70
    - 71            self.sql_db = sql_db
    - 72
    - 73    def run(self):
    - 74        """Run the priority assignment process.
    - 75        """
    - 76        # get Oxygen classes dict and the associate mspeak class 
    + 71        else:
    + 72            self.sql_db = sql_db
    + 73
    + 74    def run(self):
    + 75        """Run the priority assignment process."""
    + 76        # get Oxygen classes dict and the associate mspeak class
      77        # list_of_classes_min_max_dbe = self.class_and_dbes_in_order()
      78        # create database separated to give the user the chance to use mass spec filters
    - 79             
    + 79
      80        assign_classes_order_str_dict_tuple_list = self.create_data_base()
    - 81        
    + 81
      82        if assign_classes_order_str_dict_tuple_list:
    - 83
    - 84            self.run_worker_mass_spectrum(assign_classes_order_str_dict_tuple_list)
    - 85
    - 86        else:
    + 83            self.run_worker_mass_spectrum(assign_classes_order_str_dict_tuple_list)
    + 84
    + 85        else:
    + 86            raise RuntimeError("call create_data_base() first")
      87
    - 88            raise RuntimeError('call create_data_base() first')
    + 88        self.sql_db.close()
      89
    - 90        self.sql_db.close()   
    - 91
    - 92    def create_data_base(self):
    - 93        """Create the molecular database for the specified heteroatomic classes.
    - 94
    - 95        Returns
    - 96        -------
    - 97        assign_classes_order_str_dict_tuple_ : list
    - 98            A list of tuples containing the class names and dictionaries of class attributes.
    - 99        """
    -100        def create_molecular_database():
    -101            """ Checks and creates the database entries for the specified heteroatomic classes.
    -102            """
    -103            min_o = min(self.mass_spectrum_obj, key=lambda msp: msp[0]['O'])[0]['O'] - 2
    -104            
    -105            if min_o <= 0:
    -106                min_o = 1
    + 90    def create_data_base(self):
    + 91        """Create the molecular database for the specified heteroatomic classes.
    + 92
    + 93        Returns
    + 94        -------
    + 95        assign_classes_order_str_dict_tuple_ : list
    + 96            A list of tuples containing the class names and dictionaries of class attributes.
    + 97        """
    + 98
    + 99        def create_molecular_database():
    +100            """Checks and creates the database entries for the specified heteroatomic classes."""
    +101            min_o = min(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] - 2
    +102
    +103            if min_o <= 0:
    +104                min_o = 1
    +105
    +106            max_o = max(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] + 2
     107
    -108            max_o = max(self.mass_spectrum_obj, key=lambda msp: msp[0]['O'])[0]['O'] + 2
    +108            # min_dbe = min(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
     109
    -110            #min_dbe = min(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
    +110            # max_dbe = max(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
     111
    -112            #max_dbe = max(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
    +112            # self.lookupTableSettings.use_pah_line_rule = False
     113
    -114            #self.lookupTableSettings.use_pah_line_rule = False
    -115            
    -116            #self.lookupTableSettings.min_dbe = min_dbe/2#min_dbe - 7 if  (min_dbe - 7) > 0 else 0
    -117            
    -118            #self.lookupTableSettings.max_dbe = max_dbe * 2 #max_dbe + 7
    +114            # self.lookupTableSettings.min_dbe = min_dbe/2#min_dbe - 7 if  (min_dbe - 7) > 0 else 0
    +115
    +116            # self.lookupTableSettings.max_dbe = max_dbe * 2 #max_dbe + 7
    +117
    +118            self.mass_spectrum_obj.reset_indexes()
     119
    -120            self.mass_spectrum_obj.reset_indexes()
    +120            self.mass_spectrum_obj.filter_by_noise_threshold()
     121
    -122            self.mass_spectrum_obj.filter_by_noise_threshold()
    +122            # initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
     123
    -124            #initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    -125
    -126            self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'] = (min_o, max_o)
    -127
    -128            self.nominal_mzs = self.mass_spectrum_obj.nominal_mz
    -129
    +124            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
    +125                min_o,
    +126                max_o,
    +127            )
    +128
    +129            self.nominal_mzs = self.mass_spectrum_obj.nominal_mz
     130
     131        # get the most abundant peak and them every 14Da, only allow Ox and its derivatives
     132        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -133            print("Getting Oxygen Series") 
    +133            print("Getting Oxygen Series")
     134        find_formula_thread = FindOxygenPeaks(self.mass_spectrum_obj, self.sql_db)
     135        find_formula_thread.run()
    -136        
    -137        #mass spec obj indexes are set to interate over only the peaks with a molecular formula candidate
    +136
    +137        # mass spec obj indexes are set to interate over only the peaks with a molecular formula candidate
     138        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
     139            print("Getting Oxygen Series")
     140        find_formula_thread.set_mass_spec_indexes_by_found_peaks()
    -141        
    -142        #get the Ox class and the DBE for the lowest error molecular formula candidate
    +141
    +142        # get the Ox class and the DBE for the lowest error molecular formula candidate
     143        dict_ox_class_and_ms_peak = self.ox_classes_and_peaks_in_order_()
    -144                      
    +144
     145        # sort the classes by abundance
     146        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
     147            print("Getting Oxygen Series Order")
    -148        assign_classes_order_str_dict_tuple_list = self.get_classes_in_order(dict_ox_class_and_ms_peak)
    -149        
    -150        create_molecular_database()
    -151                
    -152        return assign_classes_order_str_dict_tuple_list
    -153        
    -154    def run_worker_mass_spectrum(self, assign_classes_order_tuples):
    -155        """ Run the molecular formula search for each class in the specified order.
    -156
    -157        Parameters
    -158        ----------
    -159        assign_classes_order_tuples : list 
    -160            A list of tuples containing the class names and dictionaries of class attributes.
    -161        """
    -162
    -163        def check_adduct_class(classe_dict):
    -164            """ Check if the class contains any adduct atoms.
    -165
    -166            Parameters
    -167            ----------
    -168            classe_dict : dict
    -169                The dictionary of class attributes.
    -170
    -171            Returns
    -172            -------
    -173            bool
    -174                True if the class contains adduct atoms, False otherwise.
    -175            """
    -176            return any([key in classe_dict.keys() for key in self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg])
    -177        
    -178        def set_min_max_dbe_by_oxygen(classe_dict):
    -179            """ Calculate the minimum and maximum DBE based on the number of oxygen atoms.
    -180
    -181            Parameters
    -182            ----------
    -183            classe_dict : dict 
    -184                The dictionary of class attributes.
    -185            """
    -186            # calculates min and max DBE based on the Oxygen number
    -187            # ref :https://pubs.acs.org/doi/full/10.1021/ac200464q
    -188            # if class does not has O it use the pha rule
    -189            # ref : Vlad Lobodin manuscript to be include here
    -190            
    -191            #atoms_exchanges = ['N']
    -192            #if 'O' in classe_dict.keys():
    -193            #    
    -194            #    Oxygen_number = classe_dict.get("O")
    -195            #    for atom in atoms_exchanges:
    -196            #        if atom in classe_dict.keys():
    -197            #            Oxygen_number += classe_dict.get(atom)
    -198            #
    -199            #    self.mass_spectrum_obj.molecular_search_settings.min_dbe = (Oxygen_number/3) - 0.5 
    -200            #    self.mass_spectrum_obj.molecular_search_settings.max_dbe = Oxygen_number*3 + 0.5 + 2
    -201            #
    -202            #else:
    -203                
    -204            self.mass_spectrum_obj.molecular_search_settings.use_pah_line_rule = True
    -205
    -206        def run_search(possible_formulas_dict, mass_spectrum_obj, min_abundance):
    -207            """ Run the molecular formula search for each mass spectrum peak.
    -208
    -209            Parameters
    -210            ----------
    -211            possible_formulas_dict : dict
    -212                A dictionary of possible molecular formulas.
    -213            mass_spectrum_obj : MassSpectrum
    -214                The mass spectrum object.
    -215            min_abundance : float
    -216                The minimum abundance threshold.
    -217
    -218            Returns
    -219            -------
    -220            list 
    -221                A list of assigned peak indexes.
    -222            """
    -223            all_assigned_indexes = list()
    -224            
    -225            for ms_peak in mass_spectrum_obj.sort_by_abundance():
    -226
    -227                if ms_peak: continue
    -228                #already assigned a molecular formula
    -229               
    -230                nominal_mz  = ms_peak.nominal_mz_exp
    +148        assign_classes_order_str_dict_tuple_list = self.get_classes_in_order(
    +149            dict_ox_class_and_ms_peak
    +150        )
    +151
    +152        create_molecular_database()
    +153
    +154        return assign_classes_order_str_dict_tuple_list
    +155
    +156    def run_worker_mass_spectrum(self, assign_classes_order_tuples):
    +157        """Run the molecular formula search for each class in the specified order.
    +158
    +159        Parameters
    +160        ----------
    +161        assign_classes_order_tuples : list
    +162            A list of tuples containing the class names and dictionaries of class attributes.
    +163        """
    +164
    +165        def check_adduct_class(classe_dict):
    +166            """Check if the class contains any adduct atoms.
    +167
    +168            Parameters
    +169            ----------
    +170            classe_dict : dict
    +171                The dictionary of class attributes.
    +172
    +173            Returns
    +174            -------
    +175            bool
    +176                True if the class contains adduct atoms, False otherwise.
    +177            """
    +178            return any(
    +179                [
    +180                    key in classe_dict.keys()
    +181                    for key in self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
    +182                ]
    +183            )
    +184
    +185        def set_min_max_dbe_by_oxygen(classe_dict):
    +186            """Calculate the minimum and maximum DBE based on the number of oxygen atoms.
    +187
    +188            Parameters
    +189            ----------
    +190            classe_dict : dict
    +191                The dictionary of class attributes.
    +192            """
    +193            # calculates min and max DBE based on the Oxygen number
    +194            # ref :https://pubs.acs.org/doi/full/10.1021/ac200464q
    +195            # if class does not has O it use the pha rule
    +196            # ref : Vlad Lobodin manuscript to be include here
    +197
    +198            # atoms_exchanges = ['N']
    +199            # if 'O' in classe_dict.keys():
    +200            #
    +201            #    Oxygen_number = classe_dict.get("O")
    +202            #    for atom in atoms_exchanges:
    +203            #        if atom in classe_dict.keys():
    +204            #            Oxygen_number += classe_dict.get(atom)
    +205            #
    +206            #    self.mass_spectrum_obj.molecular_search_settings.min_dbe = (Oxygen_number/3) - 0.5
    +207            #    self.mass_spectrum_obj.molecular_search_settings.max_dbe = Oxygen_number*3 + 0.5 + 2
    +208            #
    +209            # else:
    +210
    +211            self.mass_spectrum_obj.molecular_search_settings.use_pah_line_rule = True
    +212
    +213        def run_search(possible_formulas_dict, mass_spectrum_obj, min_abundance):
    +214            """Run the molecular formula search for each mass spectrum peak.
    +215
    +216            Parameters
    +217            ----------
    +218            possible_formulas_dict : dict
    +219                A dictionary of possible molecular formulas.
    +220            mass_spectrum_obj : MassSpectrum
    +221                The mass spectrum object.
    +222            min_abundance : float
    +223                The minimum abundance threshold.
    +224
    +225            Returns
    +226            -------
    +227            list
    +228                A list of assigned peak indexes.
    +229            """
    +230            all_assigned_indexes = list()
     231
    -232                #get mono isotopic peaks that was added a molecular formula obj
    -233                #TODO update error variables
    -234
    -235                possible_formulas_nominal = possible_formulas_dict.get(nominal_mz)
    -236                
    -237                if possible_formulas_nominal:
    +232            for ms_peak in mass_spectrum_obj.sort_by_abundance():
    +233                if ms_peak:
    +234                    continue
    +235                # already assigned a molecular formula
    +236
    +237                nominal_mz = ms_peak.nominal_mz_exp
     238
    -239                    ms_peak_indexes = SearchMolecularFormulaWorker().find_formulas(possible_formulas_nominal, min_abundance, mass_spectrum_obj, ms_peak)    
    -240
    -241                    all_assigned_indexes.extend(ms_peak_indexes)
    -242            
    -243            
    -244            #filter peaks by percentile threshold of found isotopologues 
    -245            all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(all_assigned_indexes, mass_spectrum_obj)
    -246
    -247            #filter noise by kendrick density
    -248            all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(all_assigned_indexes, mass_spectrum_obj)
    -249
    -250            #filter per min peaks per mono isotopic class
    -251            # this function should always be the last filter, 
    -252            # thefore no need to return remaining indexes
    -253            MolecularFormulaSearchFilters().check_min_peaks(all_assigned_indexes, mass_spectrum_obj)
    -254
    -255        #error_average = self.mass_spectrum_obj.molecular_search_settings.mz_error_average
    -256        
    -257        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    -258        
    -259        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    -260
    -261        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    -262
    -263        min_abundance = self.mass_spectrum_obj.min_abundance
    -264
    -265        list_classes_str = [i[0] for i in assign_classes_order_tuples]
    -266
    -267        pbar = tqdm.tqdm(assign_classes_order_tuples)
    -268        
    -269        dict_molecular_lookup_table = self.get_dict_molecular_database(list_classes_str)
    +239                # get mono isotopic peaks that was added a molecular formula obj
    +240                # TODO update error variables
    +241
    +242                possible_formulas_nominal = possible_formulas_dict.get(nominal_mz)
    +243
    +244                if possible_formulas_nominal:
    +245                    ms_peak_indexes = SearchMolecularFormulaWorker().find_formulas(
    +246                        possible_formulas_nominal,
    +247                        min_abundance,
    +248                        mass_spectrum_obj,
    +249                        ms_peak,
    +250                    )
    +251
    +252                    all_assigned_indexes.extend(ms_peak_indexes)
    +253
    +254            # filter peaks by percentile threshold of found isotopologues
    +255            all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(
    +256                all_assigned_indexes, mass_spectrum_obj
    +257            )
    +258
    +259            # filter noise by kendrick density
    +260            all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(
    +261                all_assigned_indexes, mass_spectrum_obj
    +262            )
    +263
    +264            # filter per min peaks per mono isotopic class
    +265            # this function should always be the last filter,
    +266            # thefore no need to return remaining indexes
    +267            MolecularFormulaSearchFilters().check_min_peaks(
    +268                all_assigned_indexes, mass_spectrum_obj
    +269            )
     270
    -271        for classe_tuple in pbar:
    +271        # error_average = self.mass_spectrum_obj.molecular_search_settings.mz_error_average
     272
    -273            classe_str  = classe_tuple[0]
    -274            classe_dict = classe_tuple[1]
    -275            
    -276            set_min_max_dbe_by_oxygen(classe_dict)
    -277            
    -278            #if len(classe_dict.keys()) == 2:
    -279            #    if classe_dict.get('S') == 1:
    -280            #       continue
    -281            # limits the dbe by the Ox class most abundant,
    -282            # need to add other atoms contribution to be more accurate
    -283            # but +-7 should be sufficient to cover the range 
    -284            
    -285            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    +273        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    +274
    +275        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    +276
    +277        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    +278
    +279        min_abundance = self.mass_spectrum_obj.min_abundance
    +280
    +281        list_classes_str = [i[0] for i in assign_classes_order_tuples]
    +282
    +283        pbar = tqdm.tqdm(assign_classes_order_tuples)
    +284
    +285        dict_molecular_lookup_table = self.get_dict_molecular_database(list_classes_str)
     286
    -287                    #tqdm.set_description_str(desc=None, refresh=True)
    -288                    pbar.set_description_str(desc="Started molecular formula search for class %s, (de)protonated " % classe_str, refresh=True)
    -289
    -290                    ion_type = Labels.protonated_de_ion
    -291
    -292                    possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(classe_str)
    -293                    
    -294                    if possible_formulas_dict:
    -295
    -296                        run_search(possible_formulas_dict, self.mass_spectrum_obj, min_abundance)
    -297
    -298            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    +287        for classe_tuple in pbar:
    +288            classe_str = classe_tuple[0]
    +289            classe_dict = classe_tuple[1]
    +290
    +291            set_min_max_dbe_by_oxygen(classe_dict)
    +292
    +293            # if len(classe_dict.keys()) == 2:
    +294            #    if classe_dict.get('S') == 1:
    +295            #       continue
    +296            # limits the dbe by the Ox class most abundant,
    +297            # need to add other atoms contribution to be more accurate
    +298            # but +-7 should be sufficient to cover the range
     299
    -300                    #print("Started molecular formula search for class %s,  radical" % classe_str)
    -301                    pbar.set_description_str(desc="Started molecular formula search for class %s, radical" % classe_str, refresh=True)
    -302
    -303                    ion_type = Labels.radical_ion
    -304                    
    -305                    possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(classe_str)
    -306                    
    -307                    if possible_formulas_dict:
    -308
    -309                        run_search(possible_formulas_dict, self.mass_spectrum_obj, min_abundance)
    -310
    -311            # looks for adduct, used_atom_valences should be 0 
    -312            # this code does not support H exchance by halogen atoms
    -313            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    -314                
    -315                pbar.set_description_str(desc="Started molecular formula search for class %s, adduct" % classe_str, refresh=True)
    -316                #print("Started molecular formula search for class %s, adduct" % classe_str)
    -317                
    -318                ion_type = Labels.radical_ion
    -319                
    -320                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(classe_str)
    -321
    -322                ''' commenting  unfinished code for release 2.0, see end of file for details'''
    -323                # possible_formulas_adduct =self.add_adducts(possible_formulas_dict)
    -324
    -325                # if possible_formulas_adduct:
    +300            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    +301                # tqdm.set_description_str(desc=None, refresh=True)
    +302                pbar.set_description_str(
    +303                    desc="Started molecular formula search for class %s, (de)protonated "
    +304                    % classe_str,
    +305                    refresh=True,
    +306                )
    +307
    +308                ion_type = Labels.protonated_de_ion
    +309
    +310                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
    +311                    classe_str
    +312                )
    +313
    +314                if possible_formulas_dict:
    +315                    run_search(
    +316                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
    +317                    )
    +318
    +319            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    +320                # print("Started molecular formula search for class %s,  radical" % classe_str)
    +321                pbar.set_description_str(
    +322                    desc="Started molecular formula search for class %s, radical"
    +323                    % classe_str,
    +324                    refresh=True,
    +325                )
     326
    -327                run_search(possible_formulas_dict, self.mass_spectrum_obj, min_abundance)
    -328        
    -329        
    -330    def get_dict_molecular_database(self, classe_str_list):
    -331        """ Get the molecular database as a dictionary.
    +327                ion_type = Labels.radical_ion
    +328
    +329                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
    +330                    classe_str
    +331                )
     332
    -333        Parameters
    -334        ----------
    -335        classe_str_list : list  
    -336            A list of class names.
    +333                if possible_formulas_dict:
    +334                    run_search(
    +335                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
    +336                    )
     337
    -338        Returns
    -339        -------
    -340        dict
    -341            A dictionary containing the molecular database.
    -342        """
    -343        nominal_mzs = self.nominal_mzs
    -344        mf_search_settings = self.mass_spectrum_obj.molecular_search_settings
    -345        ion_charge = self.mass_spectrum_obj.polarity
    -346
    -347        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    -348        
    -349        dict_res = {}
    -350
    -351        if mf_search_settings.isProtonated:
    -352            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.protonated_de_ion, nominal_mzs, ion_charge, mf_search_settings)    
    -353            
    -354        if mf_search_settings.isRadical:
    -355            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.radical_ion, nominal_mzs, ion_charge, mf_search_settings)    
    +338            # looks for adduct, used_atom_valences should be 0
    +339            # this code does not support H exchance by halogen atoms
    +340            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    +341                pbar.set_description_str(
    +342                    desc="Started molecular formula search for class %s, adduct"
    +343                    % classe_str,
    +344                    refresh=True,
    +345                )
    +346                # print("Started molecular formula search for class %s, adduct" % classe_str)
    +347
    +348                ion_type = Labels.radical_ion
    +349
    +350                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
    +351                    classe_str
    +352                )
    +353
    +354                """ commenting  unfinished code for release 2.0, see end of file for details"""
    +355                # possible_formulas_adduct =self.add_adducts(possible_formulas_dict)
     356
    -357        if mf_search_settings.isAdduct:
    -358            
    -359            adduct_list = mf_search_settings.adduct_atoms_neg if ion_charge < 0 else mf_search_settings.adduct_atoms_pos
    -360            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.adduct_ion, nominal_mzs, ion_charge, mf_search_settings, adducts=adduct_list)    
    -361
    -362        return dict_res
    -363
    -364    def ox_classes_and_peaks_in_order_(self) -> dict:
    -365        """ Get the oxygen classes and associated peaks in order.
    -366
    -367        Returns
    -368        -------
    -369        dict 
    -370            A dictionary containing the oxygen classes and associated peaks.
    -371        """
    -372        # order is only valid in python 3.4 and above
    -373        # change to OrderedDict if your version is lower
    -374        dict_ox_class_and_ms_peak = dict()
    -375        
    -376        for mspeak in self.mass_spectrum_obj.sort_by_abundance(reverse=True):
    -377            
    -378            #change this filter to cia filter, give more option here, confidence, number of isotopologue found etc
    +357                # if possible_formulas_adduct:
    +358
    +359                run_search(
    +360                    possible_formulas_dict, self.mass_spectrum_obj, min_abundance
    +361                )
    +362
    +363    def get_dict_molecular_database(self, classe_str_list):
    +364        """Get the molecular database as a dictionary.
    +365
    +366        Parameters
    +367        ----------
    +368        classe_str_list : list
    +369            A list of class names.
    +370
    +371        Returns
    +372        -------
    +373        dict
    +374            A dictionary containing the molecular database.
    +375        """
    +376        nominal_mzs = self.nominal_mzs
    +377        mf_search_settings = self.mass_spectrum_obj.molecular_search_settings
    +378        ion_charge = self.mass_spectrum_obj.polarity
     379
    -380            ox_classe = mspeak.best_molecular_formula_candidate.class_label
    -381            
    -382            if ox_classe in dict_ox_class_and_ms_peak.keys():
    -383                
    -384                #get the most abundant of the same ox class
    -385                if mspeak.abundance > dict_ox_class_and_ms_peak[ox_classe].abundance:
    -386
    -387                    dict_ox_class_and_ms_peak[ox_classe] = (mspeak)
    -388            else:
    -389                    
    -390                dict_ox_class_and_ms_peak[ox_classe] = (mspeak)
    -391        
    -392        return dict_ox_class_and_ms_peak
    -393
    -394    def get_classes_in_order(self, dict_ox_class_and_ms_peak)-> [(str, dict)]: 
    -395        """ Get the classes in order.
    -396        
    -397        Parameters
    -398        ----------
    -399        dict_ox_class_and_ms_peak : dict
    -400            A dictionary containing the oxygen classes and associated peaks.
    -401        
    -402        Returns
    -403        -------
    -404        list 
    -405            A list of tuples containing the class names and dictionaries of class attributes.
    -406
    -407        Notes
    -408        -----
    -409        structure is 
    -410            ('HC', {'HC': 1})
    -411        """
    -412        
    -413        
    -414        usedAtoms = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    -415        
    -416        usedAtoms.pop("C")
    -417        usedAtoms.pop("H")
    -418        usedAtoms.pop("O")
    -419
    -420        min_n, max_n = usedAtoms.get('N') if usedAtoms.get('N') else (0,0)
    -421        min_s, max_s = usedAtoms.get('S') if usedAtoms.get('S') else (0,0)
    -422        min_p, max_p = usedAtoms.get('P') if usedAtoms.get('P') else (0,0)
    -423
    -424        possible_n = [n for n in range(min_n, max_n + 1)]
    -425        possible_s = [s for s in range(min_s, max_s + 1)]
    -426        possible_p = [p for p in range(min_p, max_p + 1)]
    -427        
    -428        #used to enforce order for commum atoms 
    -429        # and track the atom index in on the tuple in all_atoms_tuples
    -430        atoms_in_order = ['N', 'S', 'P']
    -431        
    -432        #do number atoms prodcut and remove then from the usedAtoms dict
    -433        all_atoms_tuples = product(possible_n, possible_s, possible_p)
    -434        for atom in atoms_in_order:
    -435            
    -436            usedAtoms.pop(atom, None)
    -437        
    -438        #iterate over other atoms besides C,H, N, O, S and P
    -439        
    -440        for selected_atom_label, min_max_tuple in usedAtoms.items():
    -441            
    -442            min_x = min_max_tuple[0]
    -443            max_x = min_max_tuple[1]
    +380        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    +381
    +382        dict_res = {}
    +383
    +384        if mf_search_settings.isProtonated:
    +385            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(
    +386                classe_str_list,
    +387                Labels.protonated_de_ion,
    +388                nominal_mzs,
    +389                ion_charge,
    +390                mf_search_settings,
    +391            )
    +392
    +393        if mf_search_settings.isRadical:
    +394            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(
    +395                classe_str_list,
    +396                Labels.radical_ion,
    +397                nominal_mzs,
    +398                ion_charge,
    +399                mf_search_settings,
    +400            )
    +401
    +402        if mf_search_settings.isAdduct:
    +403            adduct_list = (
    +404                mf_search_settings.adduct_atoms_neg
    +405                if ion_charge < 0
    +406                else mf_search_settings.adduct_atoms_pos
    +407            )
    +408            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(
    +409                classe_str_list,
    +410                Labels.adduct_ion,
    +411                nominal_mzs,
    +412                ion_charge,
    +413                mf_search_settings,
    +414                adducts=adduct_list,
    +415            )
    +416
    +417        return dict_res
    +418
    +419    def ox_classes_and_peaks_in_order_(self) -> dict:
    +420        """Get the oxygen classes and associated peaks in order.
    +421
    +422        Returns
    +423        -------
    +424        dict
    +425            A dictionary containing the oxygen classes and associated peaks.
    +426        """
    +427        # order is only valid in python 3.4 and above
    +428        # change to OrderedDict if your version is lower
    +429        dict_ox_class_and_ms_peak = dict()
    +430
    +431        for mspeak in self.mass_spectrum_obj.sort_by_abundance(reverse=True):
    +432            # change this filter to cia filter, give more option here, confidence, number of isotopologue found etc
    +433
    +434            ox_classe = mspeak.best_molecular_formula_candidate.class_label
    +435
    +436            if ox_classe in dict_ox_class_and_ms_peak.keys():
    +437                # get the most abundant of the same ox class
    +438                if mspeak.abundance > dict_ox_class_and_ms_peak[ox_classe].abundance:
    +439                    dict_ox_class_and_ms_peak[ox_classe] = mspeak
    +440            else:
    +441                dict_ox_class_and_ms_peak[ox_classe] = mspeak
    +442
    +443        return dict_ox_class_and_ms_peak
     444
    -445            possible_x = [x for x in range(min_x, max_x + 1)]
    -446            all_atoms_tuples = product(all_atoms_tuples, possible_x)
    -447            
    -448            #merge tuples
    -449            all_atoms_tuples = [all_atoms_combined[0] + (all_atoms_combined[1],) for all_atoms_combined in
    -450                                all_atoms_tuples]
    -451            
    -452            #add atom label to the atoms_in_order list
    -453            
    -454            #important to index where the atom position is in on the tuple in all_atoms_tuples
    -455            atoms_in_order.append(selected_atom_label)
    -456
    -457        classes_strings_dict_tuples, hc_class = self.get_class_strings_dict(all_atoms_tuples, atoms_in_order)
    -458
    -459        combined_classes = self.combine_ox_class_with_other(atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak)
    -460        
    -461        combination_classes_ordered = self.sort_classes(atoms_in_order, combined_classes)
    -462        
    -463        oxygen_class_str_dict_tuple = [(ox_class, mspeak[0].class_dict) for ox_class, mspeak in dict_ox_class_and_ms_peak.items()] 
    -464
    -465        ## add classes together and ignores classes selected from the main series
    -466        for class_tuple in  combination_classes_ordered:
    -467            if class_tuple not in oxygen_class_str_dict_tuple:
    -468                oxygen_class_str_dict_tuple.append(class_tuple)
    -469        
    -470        return oxygen_class_str_dict_tuple
    -471
    -472    @staticmethod
    -473    def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(str, dict)]: 
    -474        """ Get the class strings and dictionaries.
    -475        
    -476        Parameters
    -477        ----------
    -478        all_atoms_tuples : tuple
    -479            A tuple containing the atoms.
    -480        atoms_in_order : list
    -481            A list of atoms in order.
    -482        
    -483        Returns
    -484        --------
    -485        list 
    -486            A list of tuples containing the class strings and dictionaries.
    -487        
    -488        """
    -489        classe_list= []
    -490        hc_class = []
    -491        
    -492        for all_atoms_tuple in all_atoms_tuples:
    -493            
    -494            classe_str = ''
    -495            classe_dict = dict()
    -496            
    -497            for each_atoms_index, atoms_number in enumerate(all_atoms_tuple):
    -498                
    -499                if atoms_number != 0:
    -500                    
    -501                    classe_str = (classe_str + atoms_in_order[each_atoms_index] + str(atoms_number) + ' ')
    -502                    
    -503                    classe_dict[atoms_in_order[each_atoms_index]] = atoms_number
    -504
    -505            classe_str = classe_str.strip()
    -506            
    -507            if len(classe_str) > 0:
    -508            
    -509                classe_list.append((classe_str,classe_dict))
    +445    def get_classes_in_order(self, dict_ox_class_and_ms_peak) -> [(str, dict)]:
    +446        """Get the classes in order.
    +447
    +448        Parameters
    +449        ----------
    +450        dict_ox_class_and_ms_peak : dict
    +451            A dictionary containing the oxygen classes and associated peaks.
    +452
    +453        Returns
    +454        -------
    +455        list
    +456            A list of tuples containing the class names and dictionaries of class attributes.
    +457
    +458        Notes
    +459        -----
    +460        structure is
    +461            ('HC', {'HC': 1})
    +462        """
    +463
    +464        usedAtoms = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    +465
    +466        usedAtoms.pop("C")
    +467        usedAtoms.pop("H")
    +468        usedAtoms.pop("O")
    +469
    +470        min_n, max_n = usedAtoms.get("N") if usedAtoms.get("N") else (0, 0)
    +471        min_s, max_s = usedAtoms.get("S") if usedAtoms.get("S") else (0, 0)
    +472        min_p, max_p = usedAtoms.get("P") if usedAtoms.get("P") else (0, 0)
    +473
    +474        possible_n = [n for n in range(min_n, max_n + 1)]
    +475        possible_s = [s for s in range(min_s, max_s + 1)]
    +476        possible_p = [p for p in range(min_p, max_p + 1)]
    +477
    +478        # used to enforce order for commum atoms
    +479        # and track the atom index in on the tuple in all_atoms_tuples
    +480        atoms_in_order = ["N", "S", "P"]
    +481
    +482        # do number atoms prodcut and remove then from the usedAtoms dict
    +483        all_atoms_tuples = product(possible_n, possible_s, possible_p)
    +484        for atom in atoms_in_order:
    +485            usedAtoms.pop(atom, None)
    +486
    +487        # iterate over other atoms besides C,H, N, O, S and P
    +488
    +489        for selected_atom_label, min_max_tuple in usedAtoms.items():
    +490            min_x = min_max_tuple[0]
    +491            max_x = min_max_tuple[1]
    +492
    +493            possible_x = [x for x in range(min_x, max_x + 1)]
    +494            all_atoms_tuples = product(all_atoms_tuples, possible_x)
    +495
    +496            # merge tuples
    +497            all_atoms_tuples = [
    +498                all_atoms_combined[0] + (all_atoms_combined[1],)
    +499                for all_atoms_combined in all_atoms_tuples
    +500            ]
    +501
    +502            # add atom label to the atoms_in_order list
    +503
    +504            # important to index where the atom position is in on the tuple in all_atoms_tuples
    +505            atoms_in_order.append(selected_atom_label)
    +506
    +507        classes_strings_dict_tuples, hc_class = self.get_class_strings_dict(
    +508            all_atoms_tuples, atoms_in_order
    +509        )
     510
    -511            elif len(classe_str) == 0:
    -512
    -513                hc_class.append(('HC', {'HC':1}))
    -514        
    -515        return classe_list, hc_class
    -516    
    -517    @staticmethod
    -518    def combine_ox_class_with_other( atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak) -> [dict]:
    -519        """ Combine the oxygen classes with other classes.
    -520        
    -521        Parameters
    -522        ----------
    -523        atoms_in_order : list
    -524            A list of atoms in order.
    -525        classes_strings_dict_tuples : list
    -526            
    -527        dict_ox_class_and_ms_peak : dict
    -528            A dictionary containing the oxygen classes and associated peaks.
    -529        
    -530        Returns
    -531        -------
    -532        list 
    -533            A list of dictionaries.
    -534        """
    -535        #sort methods that uses the key of classes dictionary and the atoms_in_order as reference
    -536        # c_tuple[1] = class_dict, because is one key:value map we loop through keys and get the first item only 
    -537        # sort by len first then sort based on the atoms_in_order list
    -538        atoms_in_order = Atoms.atoms_order
    -539
    -540        Oxygen_mfs = dict_ox_class_and_ms_peak.values()
    -541        
    -542        
    -543        #sort_method = lambda word: (len(word[0]), [atoms_in_order.index(atom) for atom in list( word[1].keys())])
    -544        
    -545        #print(classes_strings_dict_tuples)
    -546        #classe_in_order = sorted(classes_strings_dict_tuples, key = sort_method)
    -547        #print(classe_in_order)
    -548        
    -549        combination = []
    -550        
    -551        # _ ignoring the class_str
    -552        for _ , other_classe_dict in classes_strings_dict_tuples:
    -553          
    -554           #combination.extend([[other_classe_str + ' ' + Oxygen_mf[0].class_label , {**other_classe_dict, **Oxygen_mf[0].class_dict}] for Oxygen_mf in Oxygen_mfs])
    -555           combination.extend([{**other_classe_dict, **Oxygen_mf[0].class_dict} for Oxygen_mf in Oxygen_mfs])
    -556 
    -557        return combination
    -558    
    -559    @staticmethod
    -560    def sort_classes( atoms_in_order, combination_tuples) -> [(str, dict)]: 
    -561        """ Sort the classes.
    -562        
    -563        Parameters
    -564        ----------
    -565        atoms_in_order : list
    -566            A list of atoms in order.
    -567        combination_tuples : list
    -568            
    -569        Returns
    -570        -------
    -571        list 
    -572            A list of tuples containing the class strings and dictionaries.
    -573        """
    -574        join_list_of_list_classes = list()
    -575        atoms_in_order =  ['N','S','P','O'] + atoms_in_order[3:]
    -576        
    -577        sort_method = lambda atoms_keys: [atoms_in_order.index(atoms_keys)] #(len(word[0]), print(word[1]))#[atoms_in_order.index(atom) for atom in list( word[1].keys())])
    -578        for class_dict in combination_tuples:
    -579            
    -580            sorted_dict_keys = sorted(class_dict, key = sort_method)
    -581            class_str = ' '.join([atom + str(class_dict[atom]) for atom in sorted_dict_keys])
    -582            new_class_dict = { atom: class_dict[atom] for atom in sorted_dict_keys}
    -583            join_list_of_list_classes.append((class_str, new_class_dict))
    -584        
    -585        return join_list_of_list_classes
    -586 
    -587    
    -588    '''
    -589    The code bellow is unfinished, might be added to next release, 2.1
    -590    def add_adducts(self, possible_formulas):
    -591        """ Add adducts to the molecular formula candidates.
    -592
    -593        Parameters
    -594        ----------
    -595        possible_formulas : dict
    -596            A dictionary of possible molecular formulas.
    -597        
    -598        Returns
    -599        -------
    -600        dict 
    -601            A dictionary of possible molecular formulas with adducts.
    -602        
    -603        """
    -604        ion_type = Labels.adduct_ion
    -605
    -606        if self.mass_spectrum_obj.polarity < 0:
    -607            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
    -608            molform_model = MolecularFormulaDict
    -609        else:
    -610            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
    -611            molform_model = MolecularFormulaTablePos
    -612
    -613        new_dict = {}
    -614        
    -615        for nominal_mz, list_formulas in possible_formulas.items():
    -616            
    -617            for adduct_atom in adduct_atoms:
    -618                
    -619                adduct_atom_mass= Atoms.atomic_masses.get(adduct_atom) 
    +511        combined_classes = self.combine_ox_class_with_other(
    +512            atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
    +513        )
    +514
    +515        combination_classes_ordered = self.sort_classes(
    +516            atoms_in_order, combined_classes
    +517        )
    +518
    +519        oxygen_class_str_dict_tuple = [
    +520            (ox_class, mspeak[0].class_dict)
    +521            for ox_class, mspeak in dict_ox_class_and_ms_peak.items()
    +522        ]
    +523
    +524        ## add classes together and ignores classes selected from the main series
    +525        for class_tuple in combination_classes_ordered:
    +526            if class_tuple not in oxygen_class_str_dict_tuple:
    +527                oxygen_class_str_dict_tuple.append(class_tuple)
    +528
    +529        return oxygen_class_str_dict_tuple
    +530
    +531    @staticmethod
    +532    def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(str, dict)]:
    +533        """Get the class strings and dictionaries.
    +534
    +535        Parameters
    +536        ----------
    +537        all_atoms_tuples : tuple
    +538            A tuple containing the atoms.
    +539        atoms_in_order : list
    +540            A list of atoms in order.
    +541
    +542        Returns
    +543        --------
    +544        list
    +545            A list of tuples containing the class strings and dictionaries.
    +546
    +547        """
    +548        classe_list = []
    +549        hc_class = []
    +550
    +551        for all_atoms_tuple in all_atoms_tuples:
    +552            classe_str = ""
    +553            classe_dict = dict()
    +554
    +555            for each_atoms_index, atoms_number in enumerate(all_atoms_tuple):
    +556                if atoms_number != 0:
    +557                    classe_str = (
    +558                        classe_str
    +559                        + atoms_in_order[each_atoms_index]
    +560                        + str(atoms_number)
    +561                        + " "
    +562                    )
    +563
    +564                    classe_dict[atoms_in_order[each_atoms_index]] = atoms_number
    +565
    +566            classe_str = classe_str.strip()
    +567
    +568            if len(classe_str) > 0:
    +569                classe_list.append((classe_str, classe_dict))
    +570
    +571            elif len(classe_str) == 0:
    +572                hc_class.append(("HC", {"HC": 1}))
    +573
    +574        return classe_list, hc_class
    +575
    +576    @staticmethod
    +577    def combine_ox_class_with_other(
    +578        atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
    +579    ) -> [dict]:
    +580        """Combine the oxygen classes with other classes.
    +581
    +582        Parameters
    +583        ----------
    +584        atoms_in_order : list
    +585            A list of atoms in order.
    +586        classes_strings_dict_tuples : list
    +587
    +588        dict_ox_class_and_ms_peak : dict
    +589            A dictionary containing the oxygen classes and associated peaks.
    +590
    +591        Returns
    +592        -------
    +593        list
    +594            A list of dictionaries.
    +595        """
    +596        # sort methods that uses the key of classes dictionary and the atoms_in_order as reference
    +597        # c_tuple[1] = class_dict, because is one key:value map we loop through keys and get the first item only
    +598        # sort by len first then sort based on the atoms_in_order list
    +599        atoms_in_order = Atoms.atoms_order
    +600
    +601        Oxygen_mfs = dict_ox_class_and_ms_peak.values()
    +602
    +603        # sort_method = lambda word: (len(word[0]), [atoms_in_order.index(atom) for atom in list( word[1].keys())])
    +604
    +605        # print(classes_strings_dict_tuples)
    +606        # classe_in_order = sorted(classes_strings_dict_tuples, key = sort_method)
    +607        # print(classe_in_order)
    +608
    +609        combination = []
    +610
    +611        # _ ignoring the class_str
    +612        for _, other_classe_dict in classes_strings_dict_tuples:
    +613            # combination.extend([[other_classe_str + ' ' + Oxygen_mf[0].class_label , {**other_classe_dict, **Oxygen_mf[0].class_dict}] for Oxygen_mf in Oxygen_mfs])
    +614            combination.extend(
    +615                [
    +616                    {**other_classe_dict, **Oxygen_mf[0].class_dict}
    +617                    for Oxygen_mf in Oxygen_mfs
    +618                ]
    +619            )
     620
    -621                for molecularFormulaTable in  list_formulas:
    -622                    
    -623                    formula_dict = json.loads(molecularFormulaTable.mol_formula)
    -624                    
    -625                    if adduct_atom in formula_dict.keys():
    -626                        formula_dict[adduct_atom] += 1  
    -627                    else:
    -628                        formula_dict[adduct_atom] = 1      
    -629                    
    -630                    mz = adduct_atom_mass + molecularFormulaTable.mz
    -631                    nm = int(mz)
    -632                    
    -633                    new_formul_obj = molform_model( **{"mol_formula" : json.dumps(formula_dict),
    -634                                            "mz" : mz,
    -635                                            "ion_type" : ion_type,
    -636                                            "nominal_mz" : nm,
    -637                                            "ion_charge" : molecularFormulaTable.ion_charge,
    -638                                            "classe" : molecularFormulaTable.classe,
    -639                                            "C" : molecularFormulaTable.C,
    -640                                            "H" : molecularFormulaTable.H,
    -641                                            "N" : molecularFormulaTable.N,
    -642                                            "O" : molecularFormulaTable.O,
    -643                                            "S" : molecularFormulaTable.S,
    -644                                            "P" : molecularFormulaTable.P,
    -645                                            "H_C" : molecularFormulaTable.H_C,
    -646                                            "O_C" : molecularFormulaTable.O_C,
    -647                                            "DBE" : molecularFormulaTable.DBE,
    -648                                            })
    -649                    if nm in new_dict:
    -650                        new_dict[nm].append(new_formul_obj)
    -651                    
    -652                    else:
    -653                        new_dict[nm]= [new_formul_obj]
    -654                    
    -655        return new_dict
    -656
    -657    '''
    +621        return combination
    +622
    +623    @staticmethod
    +624    def sort_classes(atoms_in_order, combination_tuples) -> [(str, dict)]:
    +625        """Sort the classes.
    +626
    +627        Parameters
    +628        ----------
    +629        atoms_in_order : list
    +630            A list of atoms in order.
    +631        combination_tuples : list
    +632
    +633        Returns
    +634        -------
    +635        list
    +636            A list of tuples containing the class strings and dictionaries.
    +637        """
    +638        join_list_of_list_classes = list()
    +639        atoms_in_order = ["N", "S", "P", "O"] + atoms_in_order[3:]
    +640
    +641        sort_method = (
    +642            lambda atoms_keys: [atoms_in_order.index(atoms_keys)]
    +643        )  # (len(word[0]), print(word[1]))#[atoms_in_order.index(atom) for atom in list( word[1].keys())])
    +644        for class_dict in combination_tuples:
    +645            sorted_dict_keys = sorted(class_dict, key=sort_method)
    +646            class_str = " ".join(
    +647                [atom + str(class_dict[atom]) for atom in sorted_dict_keys]
    +648            )
    +649            new_class_dict = {atom: class_dict[atom] for atom in sorted_dict_keys}
    +650            join_list_of_list_classes.append((class_str, new_class_dict))
    +651
    +652        return join_list_of_list_classes
    +653
    +654    '''
    +655    The code bellow is unfinished, might be added to next release, 2.1
    +656    def add_adducts(self, possible_formulas):
    +657        """ Add adducts to the molecular formula candidates.
    +658
    +659        Parameters
    +660        ----------
    +661        possible_formulas : dict
    +662            A dictionary of possible molecular formulas.
    +663        
    +664        Returns
    +665        -------
    +666        dict 
    +667            A dictionary of possible molecular formulas with adducts.
    +668        
    +669        """
    +670        ion_type = Labels.adduct_ion
    +671
    +672        if self.mass_spectrum_obj.polarity < 0:
    +673            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
    +674            molform_model = MolecularFormulaDict
    +675        else:
    +676            adduct_atoms = self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_pos
    +677            molform_model = MolecularFormulaTablePos
    +678
    +679        new_dict = {}
    +680        
    +681        for nominal_mz, list_formulas in possible_formulas.items():
    +682            
    +683            for adduct_atom in adduct_atoms:
    +684                
    +685                adduct_atom_mass= Atoms.atomic_masses.get(adduct_atom) 
    +686
    +687                for molecularFormulaTable in  list_formulas:
    +688                    
    +689                    formula_dict = json.loads(molecularFormulaTable.mol_formula)
    +690                    
    +691                    if adduct_atom in formula_dict.keys():
    +692                        formula_dict[adduct_atom] += 1  
    +693                    else:
    +694                        formula_dict[adduct_atom] = 1      
    +695                    
    +696                    mz = adduct_atom_mass + molecularFormulaTable.mz
    +697                    nm = int(mz)
    +698                    
    +699                    new_formul_obj = molform_model( **{"mol_formula" : json.dumps(formula_dict),
    +700                                            "mz" : mz,
    +701                                            "ion_type" : ion_type,
    +702                                            "nominal_mz" : nm,
    +703                                            "ion_charge" : molecularFormulaTable.ion_charge,
    +704                                            "classe" : molecularFormulaTable.classe,
    +705                                            "C" : molecularFormulaTable.C,
    +706                                            "H" : molecularFormulaTable.H,
    +707                                            "N" : molecularFormulaTable.N,
    +708                                            "O" : molecularFormulaTable.O,
    +709                                            "S" : molecularFormulaTable.S,
    +710                                            "P" : molecularFormulaTable.P,
    +711                                            "H_C" : molecularFormulaTable.H_C,
    +712                                            "O_C" : molecularFormulaTable.O_C,
    +713                                            "DBE" : molecularFormulaTable.DBE,
    +714                                            })
    +715                    if nm in new_dict:
    +716                        new_dict[nm].append(new_formul_obj)
    +717                    
    +718                    else:
    +719                        new_dict[nm]= [new_formul_obj]
    +720                    
    +721        return new_dict
    +722
    +723    '''
     
    @@ -1424,16 +1556,16 @@
    Methods
    • run(). -Run the priority assignment process.
    • +Run the priority assignment process.
    • create_data_base(). -Create the molecular database for the specified heteroatomic classes.
    • +Create the molecular database for the specified heteroatomic classes.
    • run_worker_mass_spectrum(assign_classes_order_tuples). -Run the molecular formula search for each class in the specified order.
    • +Run the molecular formula search for each class in the specified order.
    • get_dict_molecular_database(classe_str_list). -Get the molecular database as a dictionary.
    • +Get the molecular database as a dictionary.
    • ox_classes_and_peaks_in_order_(). -Get the oxygen classes and associated peaks in order.
    • -
    • get_classes_in_order(dict_ox_class_and_ms_peak)
      +Get the oxygen classes and associated peaks in order.
    • +
    • get_classes_in_order(dict_ox_class_and_ms_peak) Get the classes in order.
    @@ -1449,24 +1581,24 @@

    Methods
    -
    54    def __init__(self, mass_spectrum_obj, sql_db=False):
    -55        #TODO:- add support for other atoms and adducts: Done
    -56        #        - add dbe range on search runtime : Done
    -57        #        - add docs
    -58        #        - improve performace : Done 
    -59        
    -60        Thread.__init__(self)
    -61        self.mass_spectrum_obj = mass_spectrum_obj
    -62        #  initiated at create_molecular_database()
    -63        #self.dict_molecular_lookup_table = None
    -64        
    -65        if not sql_db:
    -66
    -67            self.sql_db = MolForm_SQL(url=mass_spectrum_obj.molecular_search_settings.url_database)
    -68
    -69        else:
    +            
    55    def __init__(self, mass_spectrum_obj, sql_db=False):
    +56        # TODO:- add support for other atoms and adducts: Done
    +57        #        - add dbe range on search runtime : Done
    +58        #        - add docs
    +59        #        - improve performace : Done
    +60
    +61        Thread.__init__(self)
    +62        self.mass_spectrum_obj = mass_spectrum_obj
    +63        #  initiated at create_molecular_database()
    +64        # self.dict_molecular_lookup_table = None
    +65
    +66        if not sql_db:
    +67            self.sql_db = MolForm_SQL(
    +68                url=mass_spectrum_obj.molecular_search_settings.url_database
    +69            )
     70
    -71            self.sql_db = sql_db
    +71        else:
    +72            self.sql_db = sql_db
     
    @@ -1515,24 +1647,21 @@
    Methods
    -
    73    def run(self):
    -74        """Run the priority assignment process.
    -75        """
    -76        # get Oxygen classes dict and the associate mspeak class 
    +            
    74    def run(self):
    +75        """Run the priority assignment process."""
    +76        # get Oxygen classes dict and the associate mspeak class
     77        # list_of_classes_min_max_dbe = self.class_and_dbes_in_order()
     78        # create database separated to give the user the chance to use mass spec filters
    -79             
    +79
     80        assign_classes_order_str_dict_tuple_list = self.create_data_base()
    -81        
    +81
     82        if assign_classes_order_str_dict_tuple_list:
    -83
    -84            self.run_worker_mass_spectrum(assign_classes_order_str_dict_tuple_list)
    -85
    -86        else:
    +83            self.run_worker_mass_spectrum(assign_classes_order_str_dict_tuple_list)
    +84
    +85        else:
    +86            raise RuntimeError("call create_data_base() first")
     87
    -88            raise RuntimeError('call create_data_base() first')
    -89
    -90        self.sql_db.close()   
    +88        self.sql_db.close()
     
    @@ -1552,67 +1681,71 @@
    Methods
    -
     92    def create_data_base(self):
    - 93        """Create the molecular database for the specified heteroatomic classes.
    - 94
    - 95        Returns
    - 96        -------
    - 97        assign_classes_order_str_dict_tuple_ : list
    - 98            A list of tuples containing the class names and dictionaries of class attributes.
    - 99        """
    -100        def create_molecular_database():
    -101            """ Checks and creates the database entries for the specified heteroatomic classes.
    -102            """
    -103            min_o = min(self.mass_spectrum_obj, key=lambda msp: msp[0]['O'])[0]['O'] - 2
    -104            
    -105            if min_o <= 0:
    -106                min_o = 1
    +            
     90    def create_data_base(self):
    + 91        """Create the molecular database for the specified heteroatomic classes.
    + 92
    + 93        Returns
    + 94        -------
    + 95        assign_classes_order_str_dict_tuple_ : list
    + 96            A list of tuples containing the class names and dictionaries of class attributes.
    + 97        """
    + 98
    + 99        def create_molecular_database():
    +100            """Checks and creates the database entries for the specified heteroatomic classes."""
    +101            min_o = min(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] - 2
    +102
    +103            if min_o <= 0:
    +104                min_o = 1
    +105
    +106            max_o = max(self.mass_spectrum_obj, key=lambda msp: msp[0]["O"])[0]["O"] + 2
     107
    -108            max_o = max(self.mass_spectrum_obj, key=lambda msp: msp[0]['O'])[0]['O'] + 2
    +108            # min_dbe = min(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
     109
    -110            #min_dbe = min(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
    +110            # max_dbe = max(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
     111
    -112            #max_dbe = max(self.mass_spectrum_obj, key=lambda msp: msp[0].dbe)[0].dbe
    +112            # self.lookupTableSettings.use_pah_line_rule = False
     113
    -114            #self.lookupTableSettings.use_pah_line_rule = False
    -115            
    -116            #self.lookupTableSettings.min_dbe = min_dbe/2#min_dbe - 7 if  (min_dbe - 7) > 0 else 0
    -117            
    -118            #self.lookupTableSettings.max_dbe = max_dbe * 2 #max_dbe + 7
    +114            # self.lookupTableSettings.min_dbe = min_dbe/2#min_dbe - 7 if  (min_dbe - 7) > 0 else 0
    +115
    +116            # self.lookupTableSettings.max_dbe = max_dbe * 2 #max_dbe + 7
    +117
    +118            self.mass_spectrum_obj.reset_indexes()
     119
    -120            self.mass_spectrum_obj.reset_indexes()
    +120            self.mass_spectrum_obj.filter_by_noise_threshold()
     121
    -122            self.mass_spectrum_obj.filter_by_noise_threshold()
    +122            # initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
     123
    -124            #initial_ox = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    -125
    -126            self.mass_spectrum_obj.molecular_search_settings.usedAtoms['O'] = (min_o, max_o)
    -127
    -128            self.nominal_mzs = self.mass_spectrum_obj.nominal_mz
    -129
    +124            self.mass_spectrum_obj.molecular_search_settings.usedAtoms["O"] = (
    +125                min_o,
    +126                max_o,
    +127            )
    +128
    +129            self.nominal_mzs = self.mass_spectrum_obj.nominal_mz
     130
     131        # get the most abundant peak and them every 14Da, only allow Ox and its derivatives
     132        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
    -133            print("Getting Oxygen Series") 
    +133            print("Getting Oxygen Series")
     134        find_formula_thread = FindOxygenPeaks(self.mass_spectrum_obj, self.sql_db)
     135        find_formula_thread.run()
    -136        
    -137        #mass spec obj indexes are set to interate over only the peaks with a molecular formula candidate
    +136
    +137        # mass spec obj indexes are set to interate over only the peaks with a molecular formula candidate
     138        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
     139            print("Getting Oxygen Series")
     140        find_formula_thread.set_mass_spec_indexes_by_found_peaks()
    -141        
    -142        #get the Ox class and the DBE for the lowest error molecular formula candidate
    +141
    +142        # get the Ox class and the DBE for the lowest error molecular formula candidate
     143        dict_ox_class_and_ms_peak = self.ox_classes_and_peaks_in_order_()
    -144                      
    +144
     145        # sort the classes by abundance
     146        if self.mass_spectrum_obj.parameters.mass_spectrum.verbose_processing:
     147            print("Getting Oxygen Series Order")
    -148        assign_classes_order_str_dict_tuple_list = self.get_classes_in_order(dict_ox_class_and_ms_peak)
    -149        
    -150        create_molecular_database()
    -151                
    -152        return assign_classes_order_str_dict_tuple_list
    +148        assign_classes_order_str_dict_tuple_list = self.get_classes_in_order(
    +149            dict_ox_class_and_ms_peak
    +150        )
    +151
    +152        create_molecular_database()
    +153
    +154        return assign_classes_order_str_dict_tuple_list
     
    @@ -1639,180 +1772,212 @@
    Returns
    -
    154    def run_worker_mass_spectrum(self, assign_classes_order_tuples):
    -155        """ Run the molecular formula search for each class in the specified order.
    -156
    -157        Parameters
    -158        ----------
    -159        assign_classes_order_tuples : list 
    -160            A list of tuples containing the class names and dictionaries of class attributes.
    -161        """
    -162
    -163        def check_adduct_class(classe_dict):
    -164            """ Check if the class contains any adduct atoms.
    -165
    -166            Parameters
    -167            ----------
    -168            classe_dict : dict
    -169                The dictionary of class attributes.
    -170
    -171            Returns
    -172            -------
    -173            bool
    -174                True if the class contains adduct atoms, False otherwise.
    -175            """
    -176            return any([key in classe_dict.keys() for key in self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg])
    -177        
    -178        def set_min_max_dbe_by_oxygen(classe_dict):
    -179            """ Calculate the minimum and maximum DBE based on the number of oxygen atoms.
    -180
    -181            Parameters
    -182            ----------
    -183            classe_dict : dict 
    -184                The dictionary of class attributes.
    -185            """
    -186            # calculates min and max DBE based on the Oxygen number
    -187            # ref :https://pubs.acs.org/doi/full/10.1021/ac200464q
    -188            # if class does not has O it use the pha rule
    -189            # ref : Vlad Lobodin manuscript to be include here
    -190            
    -191            #atoms_exchanges = ['N']
    -192            #if 'O' in classe_dict.keys():
    -193            #    
    -194            #    Oxygen_number = classe_dict.get("O")
    -195            #    for atom in atoms_exchanges:
    -196            #        if atom in classe_dict.keys():
    -197            #            Oxygen_number += classe_dict.get(atom)
    -198            #
    -199            #    self.mass_spectrum_obj.molecular_search_settings.min_dbe = (Oxygen_number/3) - 0.5 
    -200            #    self.mass_spectrum_obj.molecular_search_settings.max_dbe = Oxygen_number*3 + 0.5 + 2
    -201            #
    -202            #else:
    -203                
    -204            self.mass_spectrum_obj.molecular_search_settings.use_pah_line_rule = True
    -205
    -206        def run_search(possible_formulas_dict, mass_spectrum_obj, min_abundance):
    -207            """ Run the molecular formula search for each mass spectrum peak.
    -208
    -209            Parameters
    -210            ----------
    -211            possible_formulas_dict : dict
    -212                A dictionary of possible molecular formulas.
    -213            mass_spectrum_obj : MassSpectrum
    -214                The mass spectrum object.
    -215            min_abundance : float
    -216                The minimum abundance threshold.
    -217
    -218            Returns
    -219            -------
    -220            list 
    -221                A list of assigned peak indexes.
    -222            """
    -223            all_assigned_indexes = list()
    -224            
    -225            for ms_peak in mass_spectrum_obj.sort_by_abundance():
    -226
    -227                if ms_peak: continue
    -228                #already assigned a molecular formula
    -229               
    -230                nominal_mz  = ms_peak.nominal_mz_exp
    +            
    156    def run_worker_mass_spectrum(self, assign_classes_order_tuples):
    +157        """Run the molecular formula search for each class in the specified order.
    +158
    +159        Parameters
    +160        ----------
    +161        assign_classes_order_tuples : list
    +162            A list of tuples containing the class names and dictionaries of class attributes.
    +163        """
    +164
    +165        def check_adduct_class(classe_dict):
    +166            """Check if the class contains any adduct atoms.
    +167
    +168            Parameters
    +169            ----------
    +170            classe_dict : dict
    +171                The dictionary of class attributes.
    +172
    +173            Returns
    +174            -------
    +175            bool
    +176                True if the class contains adduct atoms, False otherwise.
    +177            """
    +178            return any(
    +179                [
    +180                    key in classe_dict.keys()
    +181                    for key in self.mass_spectrum_obj.molecular_search_settings.adduct_atoms_neg
    +182                ]
    +183            )
    +184
    +185        def set_min_max_dbe_by_oxygen(classe_dict):
    +186            """Calculate the minimum and maximum DBE based on the number of oxygen atoms.
    +187
    +188            Parameters
    +189            ----------
    +190            classe_dict : dict
    +191                The dictionary of class attributes.
    +192            """
    +193            # calculates min and max DBE based on the Oxygen number
    +194            # ref :https://pubs.acs.org/doi/full/10.1021/ac200464q
    +195            # if class does not has O it use the pha rule
    +196            # ref : Vlad Lobodin manuscript to be include here
    +197
    +198            # atoms_exchanges = ['N']
    +199            # if 'O' in classe_dict.keys():
    +200            #
    +201            #    Oxygen_number = classe_dict.get("O")
    +202            #    for atom in atoms_exchanges:
    +203            #        if atom in classe_dict.keys():
    +204            #            Oxygen_number += classe_dict.get(atom)
    +205            #
    +206            #    self.mass_spectrum_obj.molecular_search_settings.min_dbe = (Oxygen_number/3) - 0.5
    +207            #    self.mass_spectrum_obj.molecular_search_settings.max_dbe = Oxygen_number*3 + 0.5 + 2
    +208            #
    +209            # else:
    +210
    +211            self.mass_spectrum_obj.molecular_search_settings.use_pah_line_rule = True
    +212
    +213        def run_search(possible_formulas_dict, mass_spectrum_obj, min_abundance):
    +214            """Run the molecular formula search for each mass spectrum peak.
    +215
    +216            Parameters
    +217            ----------
    +218            possible_formulas_dict : dict
    +219                A dictionary of possible molecular formulas.
    +220            mass_spectrum_obj : MassSpectrum
    +221                The mass spectrum object.
    +222            min_abundance : float
    +223                The minimum abundance threshold.
    +224
    +225            Returns
    +226            -------
    +227            list
    +228                A list of assigned peak indexes.
    +229            """
    +230            all_assigned_indexes = list()
     231
    -232                #get mono isotopic peaks that was added a molecular formula obj
    -233                #TODO update error variables
    -234
    -235                possible_formulas_nominal = possible_formulas_dict.get(nominal_mz)
    -236                
    -237                if possible_formulas_nominal:
    +232            for ms_peak in mass_spectrum_obj.sort_by_abundance():
    +233                if ms_peak:
    +234                    continue
    +235                # already assigned a molecular formula
    +236
    +237                nominal_mz = ms_peak.nominal_mz_exp
     238
    -239                    ms_peak_indexes = SearchMolecularFormulaWorker().find_formulas(possible_formulas_nominal, min_abundance, mass_spectrum_obj, ms_peak)    
    -240
    -241                    all_assigned_indexes.extend(ms_peak_indexes)
    -242            
    -243            
    -244            #filter peaks by percentile threshold of found isotopologues 
    -245            all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(all_assigned_indexes, mass_spectrum_obj)
    -246
    -247            #filter noise by kendrick density
    -248            all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(all_assigned_indexes, mass_spectrum_obj)
    -249
    -250            #filter per min peaks per mono isotopic class
    -251            # this function should always be the last filter, 
    -252            # thefore no need to return remaining indexes
    -253            MolecularFormulaSearchFilters().check_min_peaks(all_assigned_indexes, mass_spectrum_obj)
    -254
    -255        #error_average = self.mass_spectrum_obj.molecular_search_settings.mz_error_average
    -256        
    -257        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    -258        
    -259        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    -260
    -261        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    -262
    -263        min_abundance = self.mass_spectrum_obj.min_abundance
    -264
    -265        list_classes_str = [i[0] for i in assign_classes_order_tuples]
    -266
    -267        pbar = tqdm.tqdm(assign_classes_order_tuples)
    -268        
    -269        dict_molecular_lookup_table = self.get_dict_molecular_database(list_classes_str)
    +239                # get mono isotopic peaks that was added a molecular formula obj
    +240                # TODO update error variables
    +241
    +242                possible_formulas_nominal = possible_formulas_dict.get(nominal_mz)
    +243
    +244                if possible_formulas_nominal:
    +245                    ms_peak_indexes = SearchMolecularFormulaWorker().find_formulas(
    +246                        possible_formulas_nominal,
    +247                        min_abundance,
    +248                        mass_spectrum_obj,
    +249                        ms_peak,
    +250                    )
    +251
    +252                    all_assigned_indexes.extend(ms_peak_indexes)
    +253
    +254            # filter peaks by percentile threshold of found isotopologues
    +255            all_assigned_indexes = MolecularFormulaSearchFilters().filter_isotopologue(
    +256                all_assigned_indexes, mass_spectrum_obj
    +257            )
    +258
    +259            # filter noise by kendrick density
    +260            all_assigned_indexes = MolecularFormulaSearchFilters().filter_kendrick(
    +261                all_assigned_indexes, mass_spectrum_obj
    +262            )
    +263
    +264            # filter per min peaks per mono isotopic class
    +265            # this function should always be the last filter,
    +266            # thefore no need to return remaining indexes
    +267            MolecularFormulaSearchFilters().check_min_peaks(
    +268                all_assigned_indexes, mass_spectrum_obj
    +269            )
     270
    -271        for classe_tuple in pbar:
    +271        # error_average = self.mass_spectrum_obj.molecular_search_settings.mz_error_average
     272
    -273            classe_str  = classe_tuple[0]
    -274            classe_dict = classe_tuple[1]
    -275            
    -276            set_min_max_dbe_by_oxygen(classe_dict)
    -277            
    -278            #if len(classe_dict.keys()) == 2:
    -279            #    if classe_dict.get('S') == 1:
    -280            #       continue
    -281            # limits the dbe by the Ox class most abundant,
    -282            # need to add other atoms contribution to be more accurate
    -283            # but +-7 should be sufficient to cover the range 
    -284            
    -285            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    +273        kmd_base = self.mass_spectrum_obj.mspeaks_settings.kendrick_base
    +274
    +275        self.mass_spectrum_obj.change_kendrick_base_all_mspeaks(kmd_base)
    +276
    +277        ClusteringFilter().filter_kendrick(self.mass_spectrum_obj)
    +278
    +279        min_abundance = self.mass_spectrum_obj.min_abundance
    +280
    +281        list_classes_str = [i[0] for i in assign_classes_order_tuples]
    +282
    +283        pbar = tqdm.tqdm(assign_classes_order_tuples)
    +284
    +285        dict_molecular_lookup_table = self.get_dict_molecular_database(list_classes_str)
     286
    -287                    #tqdm.set_description_str(desc=None, refresh=True)
    -288                    pbar.set_description_str(desc="Started molecular formula search for class %s, (de)protonated " % classe_str, refresh=True)
    -289
    -290                    ion_type = Labels.protonated_de_ion
    -291
    -292                    possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(classe_str)
    -293                    
    -294                    if possible_formulas_dict:
    -295
    -296                        run_search(possible_formulas_dict, self.mass_spectrum_obj, min_abundance)
    -297
    -298            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    +287        for classe_tuple in pbar:
    +288            classe_str = classe_tuple[0]
    +289            classe_dict = classe_tuple[1]
    +290
    +291            set_min_max_dbe_by_oxygen(classe_dict)
    +292
    +293            # if len(classe_dict.keys()) == 2:
    +294            #    if classe_dict.get('S') == 1:
    +295            #       continue
    +296            # limits the dbe by the Ox class most abundant,
    +297            # need to add other atoms contribution to be more accurate
    +298            # but +-7 should be sufficient to cover the range
     299
    -300                    #print("Started molecular formula search for class %s,  radical" % classe_str)
    -301                    pbar.set_description_str(desc="Started molecular formula search for class %s, radical" % classe_str, refresh=True)
    -302
    -303                    ion_type = Labels.radical_ion
    -304                    
    -305                    possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(classe_str)
    -306                    
    -307                    if possible_formulas_dict:
    -308
    -309                        run_search(possible_formulas_dict, self.mass_spectrum_obj, min_abundance)
    -310
    -311            # looks for adduct, used_atom_valences should be 0 
    -312            # this code does not support H exchance by halogen atoms
    -313            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    -314                
    -315                pbar.set_description_str(desc="Started molecular formula search for class %s, adduct" % classe_str, refresh=True)
    -316                #print("Started molecular formula search for class %s, adduct" % classe_str)
    -317                
    -318                ion_type = Labels.radical_ion
    -319                
    -320                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(classe_str)
    -321
    -322                ''' commenting  unfinished code for release 2.0, see end of file for details'''
    -323                # possible_formulas_adduct =self.add_adducts(possible_formulas_dict)
    -324
    -325                # if possible_formulas_adduct:
    +300            if self.mass_spectrum_obj.molecular_search_settings.isProtonated:
    +301                # tqdm.set_description_str(desc=None, refresh=True)
    +302                pbar.set_description_str(
    +303                    desc="Started molecular formula search for class %s, (de)protonated "
    +304                    % classe_str,
    +305                    refresh=True,
    +306                )
    +307
    +308                ion_type = Labels.protonated_de_ion
    +309
    +310                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
    +311                    classe_str
    +312                )
    +313
    +314                if possible_formulas_dict:
    +315                    run_search(
    +316                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
    +317                    )
    +318
    +319            if self.mass_spectrum_obj.molecular_search_settings.isRadical:
    +320                # print("Started molecular formula search for class %s,  radical" % classe_str)
    +321                pbar.set_description_str(
    +322                    desc="Started molecular formula search for class %s, radical"
    +323                    % classe_str,
    +324                    refresh=True,
    +325                )
     326
    -327                run_search(possible_formulas_dict, self.mass_spectrum_obj, min_abundance)
    +327                ion_type = Labels.radical_ion
    +328
    +329                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
    +330                    classe_str
    +331                )
    +332
    +333                if possible_formulas_dict:
    +334                    run_search(
    +335                        possible_formulas_dict, self.mass_spectrum_obj, min_abundance
    +336                    )
    +337
    +338            # looks for adduct, used_atom_valences should be 0
    +339            # this code does not support H exchance by halogen atoms
    +340            if self.mass_spectrum_obj.molecular_search_settings.isAdduct:
    +341                pbar.set_description_str(
    +342                    desc="Started molecular formula search for class %s, adduct"
    +343                    % classe_str,
    +344                    refresh=True,
    +345                )
    +346                # print("Started molecular formula search for class %s, adduct" % classe_str)
    +347
    +348                ion_type = Labels.radical_ion
    +349
    +350                possible_formulas_dict = dict_molecular_lookup_table.get(ion_type).get(
    +351                    classe_str
    +352                )
    +353
    +354                """ commenting  unfinished code for release 2.0, see end of file for details"""
    +355                # possible_formulas_adduct =self.add_adducts(possible_formulas_dict)
    +356
    +357                # if possible_formulas_adduct:
    +358
    +359                run_search(
    +360                    possible_formulas_dict, self.mass_spectrum_obj, min_abundance
    +361                )
     
    @@ -1839,39 +2004,61 @@
    Parameters
    -
    330    def get_dict_molecular_database(self, classe_str_list):
    -331        """ Get the molecular database as a dictionary.
    -332
    -333        Parameters
    -334        ----------
    -335        classe_str_list : list  
    -336            A list of class names.
    -337
    -338        Returns
    -339        -------
    -340        dict
    -341            A dictionary containing the molecular database.
    -342        """
    -343        nominal_mzs = self.nominal_mzs
    -344        mf_search_settings = self.mass_spectrum_obj.molecular_search_settings
    -345        ion_charge = self.mass_spectrum_obj.polarity
    -346
    -347        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    -348        
    -349        dict_res = {}
    -350
    -351        if mf_search_settings.isProtonated:
    -352            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.protonated_de_ion, nominal_mzs, ion_charge, mf_search_settings)    
    -353            
    -354        if mf_search_settings.isRadical:
    -355            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.radical_ion, nominal_mzs, ion_charge, mf_search_settings)    
    -356
    -357        if mf_search_settings.isAdduct:
    -358            
    -359            adduct_list = mf_search_settings.adduct_atoms_neg if ion_charge < 0 else mf_search_settings.adduct_atoms_pos
    -360            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(classe_str_list, Labels.adduct_ion, nominal_mzs, ion_charge, mf_search_settings, adducts=adduct_list)    
    -361
    -362        return dict_res
    +            
    363    def get_dict_molecular_database(self, classe_str_list):
    +364        """Get the molecular database as a dictionary.
    +365
    +366        Parameters
    +367        ----------
    +368        classe_str_list : list
    +369            A list of class names.
    +370
    +371        Returns
    +372        -------
    +373        dict
    +374            A dictionary containing the molecular database.
    +375        """
    +376        nominal_mzs = self.nominal_mzs
    +377        mf_search_settings = self.mass_spectrum_obj.molecular_search_settings
    +378        ion_charge = self.mass_spectrum_obj.polarity
    +379
    +380        sql_db = MolForm_SQL(url=mf_search_settings.url_database)
    +381
    +382        dict_res = {}
    +383
    +384        if mf_search_settings.isProtonated:
    +385            dict_res[Labels.protonated_de_ion] = sql_db.get_dict_by_classes(
    +386                classe_str_list,
    +387                Labels.protonated_de_ion,
    +388                nominal_mzs,
    +389                ion_charge,
    +390                mf_search_settings,
    +391            )
    +392
    +393        if mf_search_settings.isRadical:
    +394            dict_res[Labels.radical_ion] = sql_db.get_dict_by_classes(
    +395                classe_str_list,
    +396                Labels.radical_ion,
    +397                nominal_mzs,
    +398                ion_charge,
    +399                mf_search_settings,
    +400            )
    +401
    +402        if mf_search_settings.isAdduct:
    +403            adduct_list = (
    +404                mf_search_settings.adduct_atoms_neg
    +405                if ion_charge < 0
    +406                else mf_search_settings.adduct_atoms_pos
    +407            )
    +408            dict_res[Labels.adduct_ion] = sql_db.get_dict_by_classes(
    +409                classe_str_list,
    +410                Labels.adduct_ion,
    +411                nominal_mzs,
    +412                ion_charge,
    +413                mf_search_settings,
    +414                adducts=adduct_list,
    +415            )
    +416
    +417        return dict_res
     
    @@ -1904,35 +2091,31 @@
    Returns
    -
    364    def ox_classes_and_peaks_in_order_(self) -> dict:
    -365        """ Get the oxygen classes and associated peaks in order.
    -366
    -367        Returns
    -368        -------
    -369        dict 
    -370            A dictionary containing the oxygen classes and associated peaks.
    -371        """
    -372        # order is only valid in python 3.4 and above
    -373        # change to OrderedDict if your version is lower
    -374        dict_ox_class_and_ms_peak = dict()
    -375        
    -376        for mspeak in self.mass_spectrum_obj.sort_by_abundance(reverse=True):
    -377            
    -378            #change this filter to cia filter, give more option here, confidence, number of isotopologue found etc
    -379
    -380            ox_classe = mspeak.best_molecular_formula_candidate.class_label
    -381            
    -382            if ox_classe in dict_ox_class_and_ms_peak.keys():
    -383                
    -384                #get the most abundant of the same ox class
    -385                if mspeak.abundance > dict_ox_class_and_ms_peak[ox_classe].abundance:
    -386
    -387                    dict_ox_class_and_ms_peak[ox_classe] = (mspeak)
    -388            else:
    -389                    
    -390                dict_ox_class_and_ms_peak[ox_classe] = (mspeak)
    -391        
    -392        return dict_ox_class_and_ms_peak
    +            
    419    def ox_classes_and_peaks_in_order_(self) -> dict:
    +420        """Get the oxygen classes and associated peaks in order.
    +421
    +422        Returns
    +423        -------
    +424        dict
    +425            A dictionary containing the oxygen classes and associated peaks.
    +426        """
    +427        # order is only valid in python 3.4 and above
    +428        # change to OrderedDict if your version is lower
    +429        dict_ox_class_and_ms_peak = dict()
    +430
    +431        for mspeak in self.mass_spectrum_obj.sort_by_abundance(reverse=True):
    +432            # change this filter to cia filter, give more option here, confidence, number of isotopologue found etc
    +433
    +434            ox_classe = mspeak.best_molecular_formula_candidate.class_label
    +435
    +436            if ox_classe in dict_ox_class_and_ms_peak.keys():
    +437                # get the most abundant of the same ox class
    +438                if mspeak.abundance > dict_ox_class_and_ms_peak[ox_classe].abundance:
    +439                    dict_ox_class_and_ms_peak[ox_classe] = mspeak
    +440            else:
    +441                dict_ox_class_and_ms_peak[ox_classe] = mspeak
    +442
    +443        return dict_ox_class_and_ms_peak
     
    @@ -1958,83 +2141,91 @@
    Returns
    -
    394    def get_classes_in_order(self, dict_ox_class_and_ms_peak)-> [(str, dict)]: 
    -395        """ Get the classes in order.
    -396        
    -397        Parameters
    -398        ----------
    -399        dict_ox_class_and_ms_peak : dict
    -400            A dictionary containing the oxygen classes and associated peaks.
    -401        
    -402        Returns
    -403        -------
    -404        list 
    -405            A list of tuples containing the class names and dictionaries of class attributes.
    -406
    -407        Notes
    -408        -----
    -409        structure is 
    -410            ('HC', {'HC': 1})
    -411        """
    -412        
    -413        
    -414        usedAtoms = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    -415        
    -416        usedAtoms.pop("C")
    -417        usedAtoms.pop("H")
    -418        usedAtoms.pop("O")
    -419
    -420        min_n, max_n = usedAtoms.get('N') if usedAtoms.get('N') else (0,0)
    -421        min_s, max_s = usedAtoms.get('S') if usedAtoms.get('S') else (0,0)
    -422        min_p, max_p = usedAtoms.get('P') if usedAtoms.get('P') else (0,0)
    -423
    -424        possible_n = [n for n in range(min_n, max_n + 1)]
    -425        possible_s = [s for s in range(min_s, max_s + 1)]
    -426        possible_p = [p for p in range(min_p, max_p + 1)]
    -427        
    -428        #used to enforce order for commum atoms 
    -429        # and track the atom index in on the tuple in all_atoms_tuples
    -430        atoms_in_order = ['N', 'S', 'P']
    -431        
    -432        #do number atoms prodcut and remove then from the usedAtoms dict
    -433        all_atoms_tuples = product(possible_n, possible_s, possible_p)
    -434        for atom in atoms_in_order:
    -435            
    -436            usedAtoms.pop(atom, None)
    -437        
    -438        #iterate over other atoms besides C,H, N, O, S and P
    -439        
    -440        for selected_atom_label, min_max_tuple in usedAtoms.items():
    -441            
    -442            min_x = min_max_tuple[0]
    -443            max_x = min_max_tuple[1]
    -444
    -445            possible_x = [x for x in range(min_x, max_x + 1)]
    -446            all_atoms_tuples = product(all_atoms_tuples, possible_x)
    -447            
    -448            #merge tuples
    -449            all_atoms_tuples = [all_atoms_combined[0] + (all_atoms_combined[1],) for all_atoms_combined in
    -450                                all_atoms_tuples]
    -451            
    -452            #add atom label to the atoms_in_order list
    -453            
    -454            #important to index where the atom position is in on the tuple in all_atoms_tuples
    -455            atoms_in_order.append(selected_atom_label)
    -456
    -457        classes_strings_dict_tuples, hc_class = self.get_class_strings_dict(all_atoms_tuples, atoms_in_order)
    -458
    -459        combined_classes = self.combine_ox_class_with_other(atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak)
    -460        
    -461        combination_classes_ordered = self.sort_classes(atoms_in_order, combined_classes)
    -462        
    -463        oxygen_class_str_dict_tuple = [(ox_class, mspeak[0].class_dict) for ox_class, mspeak in dict_ox_class_and_ms_peak.items()] 
    -464
    -465        ## add classes together and ignores classes selected from the main series
    -466        for class_tuple in  combination_classes_ordered:
    -467            if class_tuple not in oxygen_class_str_dict_tuple:
    -468                oxygen_class_str_dict_tuple.append(class_tuple)
    -469        
    -470        return oxygen_class_str_dict_tuple
    +            
    445    def get_classes_in_order(self, dict_ox_class_and_ms_peak) -> [(str, dict)]:
    +446        """Get the classes in order.
    +447
    +448        Parameters
    +449        ----------
    +450        dict_ox_class_and_ms_peak : dict
    +451            A dictionary containing the oxygen classes and associated peaks.
    +452
    +453        Returns
    +454        -------
    +455        list
    +456            A list of tuples containing the class names and dictionaries of class attributes.
    +457
    +458        Notes
    +459        -----
    +460        structure is
    +461            ('HC', {'HC': 1})
    +462        """
    +463
    +464        usedAtoms = deepcopy(self.mass_spectrum_obj.molecular_search_settings.usedAtoms)
    +465
    +466        usedAtoms.pop("C")
    +467        usedAtoms.pop("H")
    +468        usedAtoms.pop("O")
    +469
    +470        min_n, max_n = usedAtoms.get("N") if usedAtoms.get("N") else (0, 0)
    +471        min_s, max_s = usedAtoms.get("S") if usedAtoms.get("S") else (0, 0)
    +472        min_p, max_p = usedAtoms.get("P") if usedAtoms.get("P") else (0, 0)
    +473
    +474        possible_n = [n for n in range(min_n, max_n + 1)]
    +475        possible_s = [s for s in range(min_s, max_s + 1)]
    +476        possible_p = [p for p in range(min_p, max_p + 1)]
    +477
    +478        # used to enforce order for commum atoms
    +479        # and track the atom index in on the tuple in all_atoms_tuples
    +480        atoms_in_order = ["N", "S", "P"]
    +481
    +482        # do number atoms prodcut and remove then from the usedAtoms dict
    +483        all_atoms_tuples = product(possible_n, possible_s, possible_p)
    +484        for atom in atoms_in_order:
    +485            usedAtoms.pop(atom, None)
    +486
    +487        # iterate over other atoms besides C,H, N, O, S and P
    +488
    +489        for selected_atom_label, min_max_tuple in usedAtoms.items():
    +490            min_x = min_max_tuple[0]
    +491            max_x = min_max_tuple[1]
    +492
    +493            possible_x = [x for x in range(min_x, max_x + 1)]
    +494            all_atoms_tuples = product(all_atoms_tuples, possible_x)
    +495
    +496            # merge tuples
    +497            all_atoms_tuples = [
    +498                all_atoms_combined[0] + (all_atoms_combined[1],)
    +499                for all_atoms_combined in all_atoms_tuples
    +500            ]
    +501
    +502            # add atom label to the atoms_in_order list
    +503
    +504            # important to index where the atom position is in on the tuple in all_atoms_tuples
    +505            atoms_in_order.append(selected_atom_label)
    +506
    +507        classes_strings_dict_tuples, hc_class = self.get_class_strings_dict(
    +508            all_atoms_tuples, atoms_in_order
    +509        )
    +510
    +511        combined_classes = self.combine_ox_class_with_other(
    +512            atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
    +513        )
    +514
    +515        combination_classes_ordered = self.sort_classes(
    +516            atoms_in_order, combined_classes
    +517        )
    +518
    +519        oxygen_class_str_dict_tuple = [
    +520            (ox_class, mspeak[0].class_dict)
    +521            for ox_class, mspeak in dict_ox_class_and_ms_peak.items()
    +522        ]
    +523
    +524        ## add classes together and ignores classes selected from the main series
    +525        for class_tuple in combination_classes_ordered:
    +526            if class_tuple not in oxygen_class_str_dict_tuple:
    +527                oxygen_class_str_dict_tuple.append(class_tuple)
    +528
    +529        return oxygen_class_str_dict_tuple
     
    @@ -2055,7 +2246,7 @@
    Returns
    Notes
    -

    structure is +

    structure is ('HC', {'HC': 1})

    @@ -2073,50 +2264,50 @@

    Notes
    -
    472    @staticmethod
    -473    def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(str, dict)]: 
    -474        """ Get the class strings and dictionaries.
    -475        
    -476        Parameters
    -477        ----------
    -478        all_atoms_tuples : tuple
    -479            A tuple containing the atoms.
    -480        atoms_in_order : list
    -481            A list of atoms in order.
    -482        
    -483        Returns
    -484        --------
    -485        list 
    -486            A list of tuples containing the class strings and dictionaries.
    -487        
    -488        """
    -489        classe_list= []
    -490        hc_class = []
    -491        
    -492        for all_atoms_tuple in all_atoms_tuples:
    -493            
    -494            classe_str = ''
    -495            classe_dict = dict()
    -496            
    -497            for each_atoms_index, atoms_number in enumerate(all_atoms_tuple):
    -498                
    -499                if atoms_number != 0:
    -500                    
    -501                    classe_str = (classe_str + atoms_in_order[each_atoms_index] + str(atoms_number) + ' ')
    -502                    
    -503                    classe_dict[atoms_in_order[each_atoms_index]] = atoms_number
    -504
    -505            classe_str = classe_str.strip()
    -506            
    -507            if len(classe_str) > 0:
    -508            
    -509                classe_list.append((classe_str,classe_dict))
    -510
    -511            elif len(classe_str) == 0:
    -512
    -513                hc_class.append(('HC', {'HC':1}))
    -514        
    -515        return classe_list, hc_class
    +            
    531    @staticmethod
    +532    def get_class_strings_dict(all_atoms_tuples, atoms_in_order) -> [(str, dict)]:
    +533        """Get the class strings and dictionaries.
    +534
    +535        Parameters
    +536        ----------
    +537        all_atoms_tuples : tuple
    +538            A tuple containing the atoms.
    +539        atoms_in_order : list
    +540            A list of atoms in order.
    +541
    +542        Returns
    +543        --------
    +544        list
    +545            A list of tuples containing the class strings and dictionaries.
    +546
    +547        """
    +548        classe_list = []
    +549        hc_class = []
    +550
    +551        for all_atoms_tuple in all_atoms_tuples:
    +552            classe_str = ""
    +553            classe_dict = dict()
    +554
    +555            for each_atoms_index, atoms_number in enumerate(all_atoms_tuple):
    +556                if atoms_number != 0:
    +557                    classe_str = (
    +558                        classe_str
    +559                        + atoms_in_order[each_atoms_index]
    +560                        + str(atoms_number)
    +561                        + " "
    +562                    )
    +563
    +564                    classe_dict[atoms_in_order[each_atoms_index]] = atoms_number
    +565
    +566            classe_str = classe_str.strip()
    +567
    +568            if len(classe_str) > 0:
    +569                classe_list.append((classe_str, classe_dict))
    +570
    +571            elif len(classe_str) == 0:
    +572                hc_class.append(("HC", {"HC": 1}))
    +573
    +574        return classe_list, hc_class
     
    @@ -2152,47 +2343,52 @@
    Returns
    -
    517    @staticmethod
    -518    def combine_ox_class_with_other( atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak) -> [dict]:
    -519        """ Combine the oxygen classes with other classes.
    -520        
    -521        Parameters
    -522        ----------
    -523        atoms_in_order : list
    -524            A list of atoms in order.
    -525        classes_strings_dict_tuples : list
    -526            
    -527        dict_ox_class_and_ms_peak : dict
    -528            A dictionary containing the oxygen classes and associated peaks.
    -529        
    -530        Returns
    -531        -------
    -532        list 
    -533            A list of dictionaries.
    -534        """
    -535        #sort methods that uses the key of classes dictionary and the atoms_in_order as reference
    -536        # c_tuple[1] = class_dict, because is one key:value map we loop through keys and get the first item only 
    -537        # sort by len first then sort based on the atoms_in_order list
    -538        atoms_in_order = Atoms.atoms_order
    -539
    -540        Oxygen_mfs = dict_ox_class_and_ms_peak.values()
    -541        
    -542        
    -543        #sort_method = lambda word: (len(word[0]), [atoms_in_order.index(atom) for atom in list( word[1].keys())])
    -544        
    -545        #print(classes_strings_dict_tuples)
    -546        #classe_in_order = sorted(classes_strings_dict_tuples, key = sort_method)
    -547        #print(classe_in_order)
    -548        
    -549        combination = []
    -550        
    -551        # _ ignoring the class_str
    -552        for _ , other_classe_dict in classes_strings_dict_tuples:
    -553          
    -554           #combination.extend([[other_classe_str + ' ' + Oxygen_mf[0].class_label , {**other_classe_dict, **Oxygen_mf[0].class_dict}] for Oxygen_mf in Oxygen_mfs])
    -555           combination.extend([{**other_classe_dict, **Oxygen_mf[0].class_dict} for Oxygen_mf in Oxygen_mfs])
    -556 
    -557        return combination
    +            
    576    @staticmethod
    +577    def combine_ox_class_with_other(
    +578        atoms_in_order, classes_strings_dict_tuples, dict_ox_class_and_ms_peak
    +579    ) -> [dict]:
    +580        """Combine the oxygen classes with other classes.
    +581
    +582        Parameters
    +583        ----------
    +584        atoms_in_order : list
    +585            A list of atoms in order.
    +586        classes_strings_dict_tuples : list
    +587
    +588        dict_ox_class_and_ms_peak : dict
    +589            A dictionary containing the oxygen classes and associated peaks.
    +590
    +591        Returns
    +592        -------
    +593        list
    +594            A list of dictionaries.
    +595        """
    +596        # sort methods that uses the key of classes dictionary and the atoms_in_order as reference
    +597        # c_tuple[1] = class_dict, because is one key:value map we loop through keys and get the first item only
    +598        # sort by len first then sort based on the atoms_in_order list
    +599        atoms_in_order = Atoms.atoms_order
    +600
    +601        Oxygen_mfs = dict_ox_class_and_ms_peak.values()
    +602
    +603        # sort_method = lambda word: (len(word[0]), [atoms_in_order.index(atom) for atom in list( word[1].keys())])
    +604
    +605        # print(classes_strings_dict_tuples)
    +606        # classe_in_order = sorted(classes_strings_dict_tuples, key = sort_method)
    +607        # print(classe_in_order)
    +608
    +609        combination = []
    +610
    +611        # _ ignoring the class_str
    +612        for _, other_classe_dict in classes_strings_dict_tuples:
    +613            # combination.extend([[other_classe_str + ' ' + Oxygen_mf[0].class_label , {**other_classe_dict, **Oxygen_mf[0].class_dict}] for Oxygen_mf in Oxygen_mfs])
    +614            combination.extend(
    +615                [
    +616                    {**other_classe_dict, **Oxygen_mf[0].class_dict}
    +617                    for Oxygen_mf in Oxygen_mfs
    +618                ]
    +619            )
    +620
    +621        return combination
     
    @@ -2229,33 +2425,36 @@
    Returns
    -
    559    @staticmethod
    -560    def sort_classes( atoms_in_order, combination_tuples) -> [(str, dict)]: 
    -561        """ Sort the classes.
    -562        
    -563        Parameters
    -564        ----------
    -565        atoms_in_order : list
    -566            A list of atoms in order.
    -567        combination_tuples : list
    -568            
    -569        Returns
    -570        -------
    -571        list 
    -572            A list of tuples containing the class strings and dictionaries.
    -573        """
    -574        join_list_of_list_classes = list()
    -575        atoms_in_order =  ['N','S','P','O'] + atoms_in_order[3:]
    -576        
    -577        sort_method = lambda atoms_keys: [atoms_in_order.index(atoms_keys)] #(len(word[0]), print(word[1]))#[atoms_in_order.index(atom) for atom in list( word[1].keys())])
    -578        for class_dict in combination_tuples:
    -579            
    -580            sorted_dict_keys = sorted(class_dict, key = sort_method)
    -581            class_str = ' '.join([atom + str(class_dict[atom]) for atom in sorted_dict_keys])
    -582            new_class_dict = { atom: class_dict[atom] for atom in sorted_dict_keys}
    -583            join_list_of_list_classes.append((class_str, new_class_dict))
    -584        
    -585        return join_list_of_list_classes
    +            
    623    @staticmethod
    +624    def sort_classes(atoms_in_order, combination_tuples) -> [(str, dict)]:
    +625        """Sort the classes.
    +626
    +627        Parameters
    +628        ----------
    +629        atoms_in_order : list
    +630            A list of atoms in order.
    +631        combination_tuples : list
    +632
    +633        Returns
    +634        -------
    +635        list
    +636            A list of tuples containing the class strings and dictionaries.
    +637        """
    +638        join_list_of_list_classes = list()
    +639        atoms_in_order = ["N", "S", "P", "O"] + atoms_in_order[3:]
    +640
    +641        sort_method = (
    +642            lambda atoms_keys: [atoms_in_order.index(atoms_keys)]
    +643        )  # (len(word[0]), print(word[1]))#[atoms_in_order.index(atom) for atom in list( word[1].keys())])
    +644        for class_dict in combination_tuples:
    +645            sorted_dict_keys = sorted(class_dict, key=sort_method)
    +646            class_str = " ".join(
    +647                [atom + str(class_dict[atom]) for atom in sorted_dict_keys]
    +648            )
    +649            new_class_dict = {atom: class_dict[atom] for atom in sorted_dict_keys}
    +650            join_list_of_list_classes.append((class_str, new_class_dict))
    +651
    +652        return join_list_of_list_classes
     
    diff --git a/docs/corems/ms_peak/calc/MSPeakCalc.html b/docs/corems/ms_peak/calc/MSPeakCalc.html index 16cbdba4..adebcb81 100644 --- a/docs/corems/ms_peak/calc/MSPeakCalc.html +++ b/docs/corems/ms_peak/calc/MSPeakCalc.html @@ -114,891 +114,1013 @@

    -
      1__author__ = "Yuri E. Corilo"
    -  2__date__ = "Jun 04, 2019"
    -  3
    -  4import warnings
    -  5
    -  6
    -  7from scipy.stats import norm, cauchy
    -  8from numpy import linspace, sqrt, log, trapz, pi, log, poly1d, polyfit,flip, square,exp, nan, ceil, rint, floor
    -  9from corems.encapsulation.constant import Atoms
    - 10from corems.encapsulation.factory.parameters import MSParameters
    - 11from lmfit import models
    - 12import pyswarm
    - 13
    - 14
    - 15class MSPeakCalculation:
    - 16    """Class to perform calculations on MSPeak objects.
    - 17
    - 18    This class provides methods to perform various calculations on MSPeak objects, such as calculating Kendrick Mass Defect (KMD) and Kendrick Mass (KM), calculating peak area, and fitting peak lineshape using different models.
    - 19
    - 20    Parameters
    - 21    ----------
    - 22    None
    - 23
    - 24    Attributes
    - 25    ----------
    - 26    _ms_parent : MSParent
    - 27        The parent MSParent object associated with the MSPeakCalculation object.
    - 28    mz_exp : float
    - 29        The experimental m/z value of the peak.
    - 30    peak_left_index : int
    - 31        The start scan index of the peak.
    - 32    peak_right_index : int
    - 33        The final scan index of the peak.
    - 34    resolving_power : float
    - 35        The resolving power of the peak.
    - 36
    - 37    Methods
    - 38    -------
    - 39    * _calc_kmd(dict_base).
    - 40        Calculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula.
    - 41    * calc_area().
    - 42        Calculate the peak area using numpy's trapezoidal fit.
    - 43    * fit_peak(mz_extend=6, delta_rp=0, model='Gaussian').
    - 44        Perform lineshape analysis on a peak using lmfit module.
    - 45    * voigt_pso(w, r, yoff, width, loc, a).
    - 46        Calculate the Voigt function for particle swarm optimization (PSO) fitting.
    - 47    * objective_pso(x, w, u).
    - 48        Calculate the objective function for PSO fitting.
    - 49    * minimize_pso(lower, upper, w, u).
    - 50        Minimize the objective function using the particle swarm optimization algorithm.
    - 51    * fit_peak_pso(mz_extend=6, upsample_multiplier=5).
    - 52        Perform lineshape analysis on a peak using particle swarm optimization (PSO) fitting.
    - 53    * voigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    - 54        [Legacy] Perform voigt lineshape analysis on a peak.
    - 55    * pseudovoigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1, fraction=0.5).
    - 56        [Legacy] Perform pseudovoigt lineshape analysis on a peak.
    - 57    * lorentz(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    - 58        [Legacy] Perform lorentz lineshape analysis on a peak.
    - 59    * gaussian(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    - 60        [Legacy] Perform gaussian lineshape analysis on a peak.
    - 61    * get_mz_domain(oversample_multiplier, mz_overlay).
    - 62        [Legacy] Resample/interpolate datapoints for lineshape analysis.
    - 63    * number_possible_assignments().
    - 64        Return the number of possible molecular formula assignments for the peak.
    - 65    * molecular_formula_lowest_error().
    - 66        Return the molecular formula with the smallest absolute mz error.
    - 67    * molecular_formula_highest_prob_score().
    - 68        Return the molecular formula with the highest confidence score.
    - 69    * molecular_formula_earth_filter(lowest_error=True).
    - 70        Filter molecular formula using the 'Earth' filter.
    - 71    * molecular_formula_water_filter(lowest_error=True).
    - 72        Filter molecular formula using the 'Water' filter.
    - 73    * molecular_formula_air_filter(lowest_error=True).
    - 74        Filter molecular formula using the 'Air' filter.
    - 75    * cia_score_S_P_error().
    - 76        Compound Identification Algorithm SP Error - Assignment Filter.
    - 77    * cia_score_N_S_P_error().
    - 78        Compound Identification Algorithm NSP Error - Assignment Filter.
    - 79    
    - 80    """
    - 81   
    - 82
    - 83    def _calc_kmd(self, dict_base):
    - 84        """ Calculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula    
    - 85        
    - 86        Parameters
    - 87        ----------
    - 88        dict_base : dict
    - 89            dictionary with the base formula to be used in the calculation
    - 90            Default is CH2, e.g.
    - 91                dict_base = {"C": 1, "H": 2}
    - 92        """
    - 93
    - 94        if self._ms_parent:
    - 95            # msPeak obj does have a ms object parent
    - 96            kendrick_rounding_method = self._ms_parent.mspeaks_settings.kendrick_rounding_method  # rounding method can be one of floor, ceil or round
    - 97            # msPeak obj does not have a ms object parent
    - 98        else:
    - 99            kendrick_rounding_method = MSParameters.ms_peak.kendrick_rounding_method
    -100        
    -101        mass = 0
    -102        for atom in dict_base.keys():
    -103            mass += Atoms.atomic_masses.get(atom) * dict_base.get(atom)
    -104
    -105        kendrick_mass = (int(mass) / mass) * self.mz_exp
    -106
    -107        if kendrick_rounding_method == 'ceil':
    -108
    -109            nominal_km = ceil(kendrick_mass)
    -110
    -111        elif kendrick_rounding_method == 'round': 
    -112
    -113            nominal_km = rint(kendrick_mass)
    -114
    -115        elif kendrick_rounding_method == 'floor':
    -116
    -117            nominal_km = floor(kendrick_mass)
    -118
    -119        else:
    -120            raise  Exception("%s method was not implemented, please refer to corems.ms_peak.calc.MSPeakCalc Class" % kendrick_rounding_method)
    -121
    -122        kmd = (nominal_km - kendrick_mass) 
    -123
    -124        # kmd = (nominal_km - km) * 1
    -125        #kmd = round(kmd,0)
    -126
    -127        return kmd, kendrick_mass, nominal_km
    -128
    -129    def calc_area(self):
    -130        """ Calculate the peak area using numpy's trapezoidal fit
    -131
    -132        uses provided mz_domain to accurately integrate areas independent of digital resolution
    -133
    -134        Returns
    -135        -------
    -136        float
    -137            peak area
    -138        """
    -139        if self.peak_right_index > self.peak_left_index:
    -140
    -141            yy = self._ms_parent.abundance_profile[self.peak_left_index:self.peak_right_index]
    -142            xx = self._ms_parent.mz_exp_profile[self.peak_left_index:self.peak_right_index]
    -143            # check if the axis is high to low m/z or not. if its MSFromFreq its high mz first, if its from Profile, its low mz first
    -144            if xx[0] > xx[-1]:
    -145                xx = flip(xx)    
    -146                yy = flip(yy)   
    -147            return float(trapz(yy, xx))
    -148
    -149        else:
    -150
    -151            warnings.warn("Peak Area Calculation for m/z {} has failed".format(self.mz_exp))
    -152            return nan
    -153
    -154    def fit_peak(self,mz_extend=6, delta_rp = 0, model='Gaussian'):
    -155        """ Lineshape analysis on a peak using lmfit module. 
    -156
    -157        Model and fit peak lineshape by defined function - using lmfit module
    -158        Does not oversample/resample/interpolate data points 
    -159        Better to go back to time domain and perform more zero filling - if possible.
    -160
    -161        Parameters
    -162        ----------
    -163        mz_extend : int
    -164            extra points left and right of peak definition to include in fitting
    -165        delta_rp : float
    -166            delta resolving power to add to resolving power
    -167        model : str
    -168            Type of lineshape model to use.
    -169            Models allowed: Gaussian, Lorentz, Voigt
    -170
    -171        Returns
    -172        -----
    -173        mz_domain : ndarray
    -174            x-axis domain for fit
    -175        fit_peak : lmfit object
    -176            fit results object from lmfit module
    -177        
    -178        Notes
    -179        -----
    -180        Returns the calculated mz domain, initial defined abundance profile, and the fit peak results object from lmfit module
    -181        mz_extend here extends the x-axis domain so that we have sufficient points either side of the apex to fit.
    -182        Takes about 10ms per peak
    -183        """
    -184        start_index = self.peak_left_index - mz_extend  if not self.peak_left_index == 0 else 0
    -185        final_index = self.peak_right_index + mz_extend  if not self.peak_right_index == len(self._ms_parent.mz_exp_profile) else self.peak_right_index
    -186
    -187        # check if MSPeak contains the resolving power info
    -188        if self.resolving_power:
    -189            # full width half maximum distance
    -190            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))
    -191
    -192            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    -193            abundance_domain = self._ms_parent.abundance_profile[start_index:final_index]
    -194
    -195            if model=='Gaussian':
    -196                # stardard deviation
    -197                sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    -198                amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -199                model = models.GaussianModel()
    -200                params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -201
    -202            elif model=='Lorentz':
    -203                # stardard deviation
    -204                sigma = self.fwhm / 2
    -205                amplitude = sigma* pi * self.abundance
    -206                model = models.LorentzianModel()
    -207                params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -208
    -209            elif model=='Voigt':
    -210                # stardard deviation
    -211                sigma = self.fwhm / 3.6013
    -212                amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -213                model = models.VoigtModel()
    -214                params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma, gamma = sigma)
    -215            else:
    -216                raise LookupError('model lineshape not known or defined')
    -217
    -218            #calc_abundance = model.eval(params=params, x=mz_domain) #Same as initial fit, returned in fit_peak object
    -219            fit_peak = model.fit(abundance_domain,params=params, x=mz_domain)
    -220            return mz_domain, fit_peak
    -221
    -222        else:
    -223            raise LookupError(
    -224                'resolving power is not defined, try to use set_max_resolving_power()')
    -225
    -226
    -227    def voigt_pso(self,w, r, yoff, width, loc, a):
    -228        """ Voigt function for particle swarm optimisation (PSO) fitting
    -229
    -230        From https://github.com/pnnl/nmrfit/blob/master/nmrfit/equations.py.
    -231        Calculates a Voigt function over w based on the relevant properties of the distribution.
    -232
    -233        Parameters
    -234        ----------
    -235        w : ndarray
    -236            Array over which the Voigt function will be evaluated.
    -237        r : float
    -238            Ratio between the Guassian and Lorentzian functions.
    -239        yoff : float
    -240            Y-offset of the Voigt function.
    -241        width : float
    -242            The width of the Voigt function.
    -243        loc : float
    -244            Center of the Voigt function.
    -245        a : float
    -246            Area of the Voigt function.
    -247        Returns
    -248        -------
    -249        V : ndarray
    -250            Array defining the Voigt function over w.
    -251
    -252        References
    -253        ----------
    -254        1. https://github.com/pnnl/nmrfit 
    -255
    -256        Notes
    -257        -----
    -258        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    -259
    -260        """
    -261        # Lorentzian component
    -262        L = (2 / (pi * width)) * 1 / (1 + ((w - loc) / (0.5 * width))**2)
    -263
    -264        # Gaussian component
    -265        G = (2 / width) * sqrt(log(2) / pi) * exp(-((w - loc) / (width / (2 * sqrt(log(2)))))**2)
    -266
    -267        # Voigt body
    -268        V = (yoff + a) * (r * L + (1 - r) * G)
    -269
    -270        return V
    -271
    -272
    -273    def objective_pso(self, x, w, u):
    -274        """ Objective function for particle swarm optimisation (PSO) fitting
    -275
    -276        The objective function used to fit supplied data.  Evaluates sum of squared differences between the fit and the data.
    -277
    -278        Parameters
    -279        ----------
    -280        x : list of floats
    -281            Parameter vector.
    -282        w : ndarray
    -283            Array of frequency data.
    -284        u : ndarray
    -285            Array of data to be fit.
    -286
    -287        Returns
    -288        -------
    -289        rmse : float
    -290            Root mean square error between the data and fit.
    -291
    -292        References
    -293        ----------
    -294        1. https://github.com/pnnl/nmrfit 
    -295
    -296        """
    -297        # global parameters
    -298        r, width, loc, a = x
    -299        yoff = 0
    -300
    -301        # calculate fit for V
    -302        V_fit = self.voigt_pso(w, r, yoff, width, loc, a)
    -303
    -304        # real component RMSE
    -305        rmse = sqrt(square((u - V_fit)).mean(axis=None))
    -306
    -307        # return the total RMSE
    -308        return rmse
    -309
    -310    def minimize_pso(self,lower, upper, w, u):
    -311        """ Minimization function for particle swarm optimisation (PSO) fitting
    -312
    -313        Minimizes the objective function using the particle swarm optimization algorithm.
    -314        Minimization function based on defined parameters   
    -315
    -316
    -317        Parameters
    -318        ----------
    -319        lower : list of floats
    -320            Lower bounds for the parameters.
    -321        upper : list of floats
    -322            Upper bounds for the parameters.
    -323        w : ndarray
    -324            Array of frequency data.
    -325        u : ndarray
    -326            Array of data to be fit.
    -327
    -328        Notes
    -329        -----
    -330        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    -331        Current parameters take ~2 seconds per peak.
    -332
    -333
    -334        References
    -335        ----------
    -336        1. https://github.com/pnnl/nmrfit 
    -337
    -338        """
    -339        #TODO - allow support to pass swarmsize, maxiter, omega, phip, phig parameters.
    -340        #TODO - Refactor PSO fitting into its own class?
    -341        
    -342        xopt, fopt = pyswarm.pso(self.objective_pso, lower, upper, args=(w, u),
    -343                                    swarmsize=1000,
    -344                                    maxiter=5000,
    -345                                    omega=-0.2134,
    -346                                    phip=-0.3344,
    -347                                    phig=2.3259)
    -348        return xopt, fopt
    -349
    -350    def fit_peak_pso(self, mz_extend : int=6, upsample_multiplier : int=5):
    -351        """ Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting 
    -352
    -353        Function to fit a Voigt peakshape using particle swarm optimisation (PSO).
    -354        Should return better results than lmfit, but much more computationally expensive
    -355
    -356        Parameters
    -357        ----------
    -358        mz_extend : int, optional
    -359            extra points left and right of peak definition to include in fitting. Defaults to 6.
    -360        upsample_multiplier : int, optional
    -361            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 5.
    -362
    -363        Returns
    -364        -------
    -365        xopt : array
    -366            variables describing the voigt function.
    -367            G/L ratio, width (fwhm), apex (x-axis), area.
    -368            y-axis offset is fixed at 0 
    -369        fopt : float
    -370            objective score (rmse)
    -371        psfit : array
    -372            recalculated y values based on function and optimised fit
    -373        psfit_hdp : tuple of arrays
    -374            0 - linspace x-axis upsampled grid
    -375            1 - recalculated y values based on function and upsampled x-axis grid
    -376            Does not change results, but aids in visualisation of the 'true' voigt lineshape
    -377
    -378        Notes
    -379        -----
    -380        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    -381        """
    -382        # TODO - Add ability to pass pso args (i.e. swarm size, maxiter, omega, phig, etc)
    -383        # TODO: fix xopt. Magnitude mode data through CoreMS/Bruker starts at 0 but is noise centered well above 0.
    -384            # Thermo data is noise reduced by also noise subtracted, so starts at 0
    -385            # Absorption mode/phased data will have positive and negative components and may not be baseline corrected
    -386
    -387        start_index = self.peak_left_index - mz_extend  if not self.peak_left_index == 0 else 0
    -388        final_index = self.peak_right_index + mz_extend  if not self.peak_right_index == len(self._ms_parent.mz_exp_profile) else self.peak_right_index
    -389
    -390        # check if MSPeak contains the resolving power info
    -391        if self.resolving_power:
    -392            # full width half maximum distance
    -393            self.fwhm = (self.mz_exp / (self.resolving_power))
    -394
    -395            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    -396            abundance_domain = self._ms_parent.abundance_profile[start_index:final_index]
    -397            lower = [0, self.fwhm*0.8, (self.mz_exp-0.0005), 0]
    -398            upper = [1, self.fwhm*1.2, (self.mz_exp+0.0005), self.abundance/self.signal_to_noise]
    -399            xopt, fopt = self.minimize_pso(lower,upper,mz_domain,abundance_domain)
    -400            
    -401            psfit = self.voigt_pso(mz_domain,xopt[0],0,xopt[1],xopt[2],xopt[3])
    -402            psfit_hdp_x = linspace(min(mz_domain),max(mz_domain),num=len(mz_domain)*upsample_multiplier)
    -403            psfit_hdp = self.voigt_pso(psfit_hdp_x,xopt[0],0,xopt[1],xopt[2],xopt[3])
    -404            return xopt, fopt, psfit, (psfit_hdp_x, psfit_hdp)
    -405        else:
    -406            raise LookupError(
    -407                'resolving power is not defined, try to use set_max_resolving_power()')
    -408
    -409             
    -410    def voigt(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1):
    -411        """ [Legacy] Voigt lineshape analysis function
    -412        Legacy function for voigt lineshape analysis
    -413
    -414        Parameters
    -415        ----------
    -416        oversample_multiplier : int
    -417            factor to increase x-axis points by for simulation of fitted lineshape function
    -418        delta_rp : float
    -419            delta resolving power to add to resolving power
    -420        mz_overlay : int
    -421            extra points left and right of peak definition to include in fitting
    -422        
    -423        Returns
    -424        -------
    -425        mz_domain : ndarray
    -426            x-axis domain for fit
    -427        calc_abundance : ndarray
    -428            calculated abundance profile based on voigt function
    -429        """
    -430        
    -431        
    -432        if self.resolving_power:
    -433
    -434            # full width half maximum distance
    -435            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -436
    -437            # stardart deviation
    -438            sigma = self.fwhm / 3.6013
    -439
    -440            # half width baseline distance
    -441            
    -442            #mz_domain = linspace(self.mz_exp - hw_base_distance,
    -443            #                     self.mz_exp + hw_base_distance, datapoint)
    -444            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -445            
    -446            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -447            
    -448            #TODO derive amplitude
    -449            amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -450
    -451            model = models.VoigtModel()
    -452
    -453            params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma, gamma = sigma)
    -454
    -455            calc_abundance = model.eval(params=params, x=mz_domain)
    -456
    -457            return mz_domain, calc_abundance
    -458        
    -459        else:
    -460            
    -461            raise LookupError(
    -462                'resolving power is not defined, try to use set_max_resolving_power()')
    -463
    -464    def pseudovoigt(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1, fraction =0.5):
    -465        """ [Legacy] pseudovoigt lineshape function
    -466
    -467        Legacy function for pseudovoigt lineshape analysis. 
    -468        Note - Code may not be functional currently.
    -469
    -470        Parameters
    -471        ----------
    -472        oversample_multiplier : int, optional
    -473            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 1.
    -474        delta_rp : float, optional
    -475            delta resolving power to add to resolving power. Defaults to 0.
    -476        mz_overlay : int, optional
    -477            extra points left and right of peak definition to include in fitting. Defaults to 1.
    -478        fraction : float, optional
    -479            fraction of gaussian component in pseudovoigt function. Defaults to 0.5.
    -480
    -481        """
    -482        if self.resolving_power:
    -483
    -484            # full width half maximum distance
    -485            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -486
    -487            # stardart deviation
    -488            sigma = self.fwhm / 2
    -489
    -490            # half width baseline distance
    -491            
    -492            #mz_domain = linspace(self.mz_exp - hw_base_distance,
    -493            #                     self.mz_exp + hw_base_distance, datapoint)
    -494            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -495            
    -496            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -497            model = models.PseudoVoigtModel()
    -498            
    -499            # TODO derive amplitude
    -500            gamma = sigma
    -501            
    -502            amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -503            amplitude = (sqrt(pi/log(2)) * (pi*sigma*self.abundance)) /( (pi*(1-gamma)) + (sqrt(pi*log(2)) * gamma) )
    -504
    -505            params = model.make_params(center=self.mz_exp, sigma = sigma)
    -506
    -507            calc_abundance = model.eval(params=params, x=mz_domain)
    -508
    -509            return mz_domain, calc_abundance
    -510        
    -511        else:
    -512            
    -513            raise LookupError(
    -514                'resolving power is not defined, try to use set_max_resolving_power()')
    -515
    -516
    -517    def lorentz(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1):
    -518        """ [Legacy] Lorentz lineshape analysis function    
    -519        
    -520        Legacy function for lorentz lineshape analysis
    -521
    -522        Parameters
    -523        ----------
    -524        oversample_multiplier : int
    -525            factor to increase x-axis points by for simulation of fitted lineshape function
    -526        delta_rp : float
    -527            delta resolving power to add to resolving power
    -528        mz_overlay : int
    -529            extra points left and right of peak definition to include in fitting
    -530        
    -531        Returns
    -532        -------
    -533        mz_domain : ndarray
    -534            x-axis domain for fit
    -535        calc_abundance : ndarray
    -536            calculated abundance profile based on lorentz function
    -537        
    -538        """
    -539        if self.resolving_power:
    -540
    -541            # full width half maximum distance
    -542            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -543
    -544            # stardart deviation
    -545            sigma = self.fwhm / 2
    -546
    -547            # half width baseline distance
    -548            hw_base_distance = (8 * sigma)
    -549
    -550            #mz_domain = linspace(self.mz_exp - hw_base_distance,
    -551            #                     self.mz_exp + hw_base_distance, datapoint)
    -552            
    -553            
    -554            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -555            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -556            model = models.LorentzianModel()
    -557            
    -558            amplitude = sigma* pi * self.abundance
    -559
    -560            params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -561
    -562            calc_abundance = model.eval(params=params, x=mz_domain)
    -563
    -564            return mz_domain, calc_abundance
    -565        
    -566        else:
    -567            
    -568            raise LookupError(
    -569                'resolving power is not defined, try to use set_max_resolving_power()')
    -570
    -571    def gaussian(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1):
    -572        """ [Legacy] Gaussian lineshape analysis function
    -573        Legacy gaussian lineshape analysis function
    -574        
    -575        Parameters
    -576        ----------
    -577        oversample_multiplier : int
    -578            factor to increase x-axis points by for simulation of fitted lineshape function
    -579        delta_rp : float
    -580            delta resolving power to add to resolving power
    -581        mz_overlay : int
    -582            extra points left and right of peak definition to include in fitting
    -583
    -584        Returns
    -585        -------
    -586        mz_domain : ndarray 
    -587            x-axis domain for fit
    -588        calc_abundance : ndarray
    -589            calculated abundance profile based on gaussian function
    -590        
    -591
    -592        """
    -593
    -594        # check if MSPeak contains the resolving power info
    -595        if self.resolving_power:
    -596            # full width half maximum distance
    -597            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -598
    -599            # stardart deviation
    -600            sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    -601
    -602            # half width baseline distance
    -603            #hw_base_distance = (3.2 * s)
    -604
    -605            #match_loz_factor = 3
    -606
    -607            #n_d = hw_base_distance * match_loz_factor
    -608
    -609            #mz_domain = linspace(
    -610            #    self.mz_exp - n_d, self.mz_exp + n_d, datapoint)
    -611
    -612            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -613            
    -614            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -615            
    -616            #calc_abundance = norm.pdf(mz_domain, self.mz_exp, s)
    -617
    -618            model = models.GaussianModel()
    -619            
    -620            amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -621
    -622            params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -623
    -624            calc_abundance = model.eval(params=params, x=mz_domain)
    -625            
    -626            return mz_domain, calc_abundance 
    -627
    -628        else:
    -629            raise LookupError(
    -630                'resolving power is not defined, try to use set_max_resolving_power()')
    -631
    -632    def get_mz_domain(self, oversample_multiplier, mz_overlay):
    -633        """  [Legacy] function to resample/interpolate datapoints for lineshape analysis
    -634
    -635        This code is used for the legacy line fitting functions and not recommended.
    -636        Legacy function to support expanding mz domain for legacy lineshape functions
    -637
    -638        Parameters
    -639        ----------
    -640        oversample_multiplier : int
    -641            factor to increase x-axis points by for simulation of fitted lineshape function
    -642        mz_overlay : int
    -643            extra points left and right of peak definition to include in fitting
    -644        
    -645        Returns
    -646        -------
    -647        mz_domain : ndarray
    -648            x-axis domain for fit
    -649        
    -650        """
    -651        start_index = self.peak_left_index - mz_overlay  if not self.peak_left_index == 0 else 0
    -652        final_index = self.peak_right_index + mz_overlay  if not self.peak_right_index == len(self._ms_parent.mz_exp_profile) else self.peak_right_index
    -653
    -654        if oversample_multiplier == 1:
    -655
    -656            mz_domain = self._ms_parent.mz_exp_profile[start_index: final_index]
    -657            
    -658        else:
    -659            # we assume a linear correlation for m/z and datapoits 
    -660            # which is only true if the m/z range in narrow (within 1 m/z unit)
    -661            # this is not true for a wide m/z range
    -662                        
    -663            indexes = range(start_index, final_index+1)
    -664            mz = self._ms_parent.mz_exp_profile[indexes]
    -665            pol = poly1d(polyfit(indexes, mz, 1))
    -666            oversampled_indexes = linspace(start_index, final_index, (final_index-start_index) * oversample_multiplier)    
    -667            mz_domain = pol(oversampled_indexes)
    -668
    -669        return mz_domain
    -670    
    -671    @property
    -672    def number_possible_assignments(self,):
    -673        
    -674        return len(self.molecular_formulas)
    -675
    -676    def molecular_formula_lowest_error(self):
    -677       """ Return the molecular formula with the smallest absolute mz error
    -678       
    -679       """
    -680       
    -681       return min(self.molecular_formulas, key=lambda m: abs(m.mz_error))
    -682
    -683    def molecular_formula_highest_prob_score(self):
    -684        """ Return the molecular formula with the highest confidence score score
    -685         
    -686        """
    -687       
    -688        return max(self.molecular_formulas, key=lambda m: abs(m.confidence_score))
    -689
    -690    def molecular_formula_earth_filter(self, lowest_error=True):
    -691        """ Filter molecular formula using the 'Earth' filter
    -692        
    -693        This function applies the Formularity-esque 'Earth' filter to possible molecular formula assignments.
    -694        Earth Filter:
    -695            O > 0 AND N <= 3 AND P <= 2 AND 3P <= O
    -696
    -697        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter. 
    -698        Otherwise, it will return all Earth-filter compliant formulas. 
    -699
    -700        Parameters
    -701        ----------
    -702        lowest_error : bool, optional.
    -703            Return only the lowest error formula which also fits the Earth filter. 
    -704            If False, return all Earth-filter compliant formulas. Default is True.
    -705
    -706        Returns
    -707        -------
    -708        list
    -709            List of molecular formula objects which fit the Earth filter
    -710
    -711        References
    -712        ----------
    -713        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    -714            Anal. Chem. 2017, 89, 23, 12659–12665
    -715            doi: 10.1021/acs.analchem.7b03318
    -716        """
    -717        
    -718        candidates = list(filter(lambda mf: mf.get("O") > 0 and mf.get("N") <=3 and mf.get("P") <= 2 and (3 * mf.get("P")) <= mf.get("O"), self.molecular_formulas))
    -719        if len(candidates) >0:
    -720            if lowest_error:
    -721                return min(candidates, key=lambda m: abs(m.mz_error))
    -722            else:
    -723                return candidates
    -724        else:
    -725            return candidates
    -726
    -727    def molecular_formula_water_filter(self, lowest_error=True):
    -728        """ Filter molecular formula using the 'Water' filter
    -729
    -730        This function applies the Formularity-esque 'Water' filter to possible molecular formula assignments.
    -731        Water Filter:
    -732            O > 0 AND N <= 3 AND S <= 2 AND P <= 2
    -733        
    -734        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Water filter.
    -735        Otherwise, it will return all Water-filter compliant formulas.
    -736
    -737        Parameters
    -738        ----------
    -739        lowest_error : bool, optional
    -740            Return only the lowest error formula which also fits the Water filter.
    -741            If False, return all Water-filter compliant formulas. Defaults to 2
    -742
    -743        Returns 
    -744        -------
    -745        list
    -746            List of molecular formula objects which fit the Water filter
    -747
    -748        References
    -749        ----------
    -750        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    -751            Anal. Chem. 2017, 89, 23, 12659–12665
    -752            doi: 10.1021/acs.analchem.7b03318
    -753        """
    -754       
    -755        candidates = list(filter(lambda mf: mf.get("O") > 0 and mf.get("N") <=3 and mf.get("S") <=2 and  mf.get("P") <= 2, self.molecular_formulas))
    -756        if len(candidates) >0:
    -757            if lowest_error:
    -758                return min(candidates, key=lambda m: abs(m.mz_error))
    -759            else:
    -760                return candidates
    -761        else:
    -762            return candidates
    -763    
    -764    def molecular_formula_air_filter(self, lowest_error=True):
    -765        """ Filter molecular formula using the 'Air' filter
    -766
    -767        This function applies the Formularity-esque 'Air' filter to possible molecular formula assignments.
    -768        Air Filter:
    -769            O > 0 AND N <= 3 AND S <= 1 AND P = 0 AND 3(S+N) <= O
    -770        
    -771        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Air filter.
    -772        Otherwise, it will return all Air-filter compliant formulas.
    -773
    -774        Parameters
    -775        ----------
    -776        lowest_error : bool, optional
    -777            Return only the lowest error formula which also fits the Air filter.
    -778            If False, return all Air-filter compliant formulas. Defaults to True.
    -779
    -780        Returns
    -781        -------
    -782        list
    -783            List of molecular formula objects which fit the Air filter
    -784            
    -785        References
    -786        ----------
    -787        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    -788            Anal. Chem. 2017, 89, 23, 12659–12665
    -789            doi: 10.1021/acs.analchem.7b03318
    -790        """
    -791
    -792       
    -793        candidates = list(filter(lambda mf: mf.get("O") > 0 and mf.get("N") <=2 and mf.get("S") <=1 and  mf.get("P") == 0 and 3* (mf.get("S") + mf.get("N")) <= mf.get("O"), self.molecular_formulas))
    -794        
    -795        if len(candidates) >0:
    -796            if lowest_error:
    -797                return min(candidates, key=lambda m: abs(m.mz_error))
    -798            else:
    -799                return candidates
    -800        else:
    -801            return candidates
    -802
    -803    def cia_score_S_P_error(self):
    -804        """ Compound Identification Algorithm SP Error - Assignment Filter
    -805         
    -806        This function applies the Compound Identification Algorithm (CIA) SP Error filter to possible molecular formula assignments.
    -807
    -808        It takes the molecular formula with the lowest S+P count, and returns the formula with the lowest absolute error from this subset.
    -809        
    -810        Returns
    -811        -------
    -812        MolecularFormula
    -813            A single molecular formula which fits the rules of the CIA SP Error filter
    -814
    -815
    -816        References
    -817        ----------
    -818        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    -819            Anal. Chem. 2006, 78, 13, 4363–4373
    -820            doi: 10.1021/ac0600306
    -821        """
    -822        #case EFormulaScore.HAcap:
    -823
    -824        lowest_S_P_mf = min(self.molecular_formulas, key=lambda mf: mf.get('S') + mf.get('P'))
    -825        lowest_S_P_count = lowest_S_P_mf.get("S") + lowest_S_P_mf.get("P")
    -826        
    -827        list_same_s_p = list(filter(lambda mf: mf.get('S') + mf.get('P') == lowest_S_P_count, self.molecular_formulas))
    -828
    -829        #check if list is not empty
    -830        if list_same_s_p:
    -831        
    -832            return min(list_same_s_p, key=lambda m: abs(m.mz_error))
    -833        
    -834        else:
    -835        
    -836            return lowest_S_P_mf
    -837    
    -838    def cia_score_N_S_P_error(self):
    -839        """ Compound Identification Algorithm NSP Error - Assignment Filter
    -840        
    -841        This function applies the Compound Identification Algorithm (CIA) NSP Error filter to possible molecular formula assignments.
    -842
    -843        It takes the molecular formula with the lowest N+S+P count, and returns the formula with the lowest absolute error from this subset.
    -844
    -845        Returns
    -846        -------
    -847        MolecularFormula
    -848            A single molecular formula which fits the rules of the CIA NSP Error filter
    -849
    -850        References
    -851        ----------
    -852        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    -853            Anal. Chem. 2006, 78, 13, 4363–4373
    -854            doi: 10.1021/ac0600306
    -855
    -856        Raises
    -857        -------
    -858        Exception
    -859            If no molecular formula are associated with mass spectrum peak.
    -860        """
    -861        #case EFormulaScore.HAcap:
    -862        if self.molecular_formulas:
    -863
    -864            lowest_N_S_P_mf = min(self.molecular_formulas, key=lambda mf: mf.get('N') + mf.get('S') + mf.get('P'))
    -865            lowest_N_S_P_count = lowest_N_S_P_mf.get("N") + lowest_N_S_P_mf.get("S") + lowest_N_S_P_mf.get("P")
    -866
    -867            list_same_N_S_P = list(filter(lambda mf: mf.get('N') + mf.get('S') + mf.get('P') == lowest_N_S_P_count, self.molecular_formulas))
    -868
    -869            if list_same_N_S_P:
    -870
    -871                SP_filtered_list =  list(filter(lambda mf: (mf.get("S") <= 3 ) and  (mf.get("P")  <= 1 ), list_same_N_S_P))
    -872                
    -873                if SP_filtered_list:
    -874                    
    -875                    return min(SP_filtered_list, key=lambda m: abs(m.mz_error)) 
    -876                
    -877                else:    
    -878                    
    -879                    return min(list_same_N_S_P, key=lambda m: abs(m.mz_error))            
    -880            
    -881            else:
    -882                
    -883                return lowest_N_S_P_mf 
    -884        else:
    -885            raise Exception("No molecular formula associated with the mass spectrum peak at m/z: %.6f" % self.mz_exp)
    +                        
       1__author__ = "Yuri E. Corilo"
    +   2__date__ = "Jun 04, 2019"
    +   3
    +   4import warnings
    +   5
    +   6import pyswarm
    +   7from lmfit import models
    +   8from numpy import (
    +   9    ceil,
    +  10    exp,
    +  11    flip,
    +  12    floor,
    +  13    linspace,
    +  14    log,
    +  15    nan,
    +  16    pi,
    +  17    poly1d,
    +  18    polyfit,
    +  19    rint,
    +  20    sqrt,
    +  21    square,
    +  22    trapz,
    +  23)
    +  24
    +  25from corems.encapsulation.constant import Atoms
    +  26from corems.encapsulation.factory.parameters import MSParameters
    +  27
    +  28
    +  29class MSPeakCalculation:
    +  30    """Class to perform calculations on MSPeak objects.
    +  31
    +  32    This class provides methods to perform various calculations on MSPeak objects, such as calculating Kendrick Mass Defect (KMD) and Kendrick Mass (KM), calculating peak area, and fitting peak lineshape using different models.
    +  33
    +  34    Parameters
    +  35    ----------
    +  36    None
    +  37
    +  38    Attributes
    +  39    ----------
    +  40    _ms_parent : MSParent
    +  41        The parent MSParent object associated with the MSPeakCalculation object.
    +  42    mz_exp : float
    +  43        The experimental m/z value of the peak.
    +  44    peak_left_index : int
    +  45        The start scan index of the peak.
    +  46    peak_right_index : int
    +  47        The final scan index of the peak.
    +  48    resolving_power : float
    +  49        The resolving power of the peak.
    +  50
    +  51    Methods
    +  52    -------
    +  53    * _calc_kmd(dict_base).
    +  54        Calculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula.
    +  55    * calc_area().
    +  56        Calculate the peak area using numpy's trapezoidal fit.
    +  57    * fit_peak(mz_extend=6, delta_rp=0, model='Gaussian').
    +  58        Perform lineshape analysis on a peak using lmfit module.
    +  59    * voigt_pso(w, r, yoff, width, loc, a).
    +  60        Calculate the Voigt function for particle swarm optimization (PSO) fitting.
    +  61    * objective_pso(x, w, u).
    +  62        Calculate the objective function for PSO fitting.
    +  63    * minimize_pso(lower, upper, w, u).
    +  64        Minimize the objective function using the particle swarm optimization algorithm.
    +  65    * fit_peak_pso(mz_extend=6, upsample_multiplier=5).
    +  66        Perform lineshape analysis on a peak using particle swarm optimization (PSO) fitting.
    +  67    * voigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    +  68        [Legacy] Perform voigt lineshape analysis on a peak.
    +  69    * pseudovoigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1, fraction=0.5).
    +  70        [Legacy] Perform pseudovoigt lineshape analysis on a peak.
    +  71    * lorentz(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    +  72        [Legacy] Perform lorentz lineshape analysis on a peak.
    +  73    * gaussian(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    +  74        [Legacy] Perform gaussian lineshape analysis on a peak.
    +  75    * get_mz_domain(oversample_multiplier, mz_overlay).
    +  76        [Legacy] Resample/interpolate datapoints for lineshape analysis.
    +  77    * number_possible_assignments().
    +  78        Return the number of possible molecular formula assignments for the peak.
    +  79    * molecular_formula_lowest_error().
    +  80        Return the molecular formula with the smallest absolute mz error.
    +  81    * molecular_formula_highest_prob_score().
    +  82        Return the molecular formula with the highest confidence score.
    +  83    * molecular_formula_earth_filter(lowest_error=True).
    +  84        Filter molecular formula using the 'Earth' filter.
    +  85    * molecular_formula_water_filter(lowest_error=True).
    +  86        Filter molecular formula using the 'Water' filter.
    +  87    * molecular_formula_air_filter(lowest_error=True).
    +  88        Filter molecular formula using the 'Air' filter.
    +  89    * cia_score_S_P_error().
    +  90        Compound Identification Algorithm SP Error - Assignment Filter.
    +  91    * cia_score_N_S_P_error().
    +  92        Compound Identification Algorithm NSP Error - Assignment Filter.
    +  93
    +  94    """
    +  95
    +  96    def _calc_kmd(self, dict_base):
    +  97        """Calculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula
    +  98
    +  99        Parameters
    + 100        ----------
    + 101        dict_base : dict
    + 102            dictionary with the base formula to be used in the calculation
    + 103            Default is CH2, e.g.
    + 104                dict_base = {"C": 1, "H": 2}
    + 105        """
    + 106
    + 107        if self._ms_parent:
    + 108            # msPeak obj does have a ms object parent
    + 109            kendrick_rounding_method = (
    + 110                self._ms_parent.mspeaks_settings.kendrick_rounding_method
    + 111            )  # rounding method can be one of floor, ceil or round
    + 112            # msPeak obj does not have a ms object parent
    + 113        else:
    + 114            kendrick_rounding_method = MSParameters.ms_peak.kendrick_rounding_method
    + 115
    + 116        mass = 0
    + 117        for atom in dict_base.keys():
    + 118            mass += Atoms.atomic_masses.get(atom) * dict_base.get(atom)
    + 119
    + 120        kendrick_mass = (int(mass) / mass) * self.mz_exp
    + 121
    + 122        if kendrick_rounding_method == "ceil":
    + 123            nominal_km = ceil(kendrick_mass)
    + 124
    + 125        elif kendrick_rounding_method == "round":
    + 126            nominal_km = rint(kendrick_mass)
    + 127
    + 128        elif kendrick_rounding_method == "floor":
    + 129            nominal_km = floor(kendrick_mass)
    + 130
    + 131        else:
    + 132            raise Exception(
    + 133                "%s method was not implemented, please refer to corems.ms_peak.calc.MSPeakCalc Class"
    + 134                % kendrick_rounding_method
    + 135            )
    + 136
    + 137        kmd = nominal_km - kendrick_mass
    + 138
    + 139        # kmd = (nominal_km - km) * 1
    + 140        # kmd = round(kmd,0)
    + 141
    + 142        return kmd, kendrick_mass, nominal_km
    + 143
    + 144    def calc_area(self):
    + 145        """Calculate the peak area using numpy's trapezoidal fit
    + 146
    + 147        uses provided mz_domain to accurately integrate areas independent of digital resolution
    + 148
    + 149        Returns
    + 150        -------
    + 151        float
    + 152            peak area
    + 153        """
    + 154        if self.peak_right_index > self.peak_left_index:
    + 155            yy = self._ms_parent.abundance_profile[
    + 156                self.peak_left_index : self.peak_right_index
    + 157            ]
    + 158            xx = self._ms_parent.mz_exp_profile[
    + 159                self.peak_left_index : self.peak_right_index
    + 160            ]
    + 161            # check if the axis is high to low m/z or not. if its MSFromFreq its high mz first, if its from Profile, its low mz first
    + 162            if xx[0] > xx[-1]:
    + 163                xx = flip(xx)
    + 164                yy = flip(yy)
    + 165            return float(trapz(yy, xx))
    + 166
    + 167        else:
    + 168            warnings.warn(
    + 169                "Peak Area Calculation for m/z {} has failed".format(self.mz_exp)
    + 170            )
    + 171            return nan
    + 172
    + 173    def fit_peak(self, mz_extend=6, delta_rp=0, model="Gaussian"):
    + 174        """Lineshape analysis on a peak using lmfit module.
    + 175
    + 176        Model and fit peak lineshape by defined function - using lmfit module
    + 177        Does not oversample/resample/interpolate data points
    + 178        Better to go back to time domain and perform more zero filling - if possible.
    + 179
    + 180        Parameters
    + 181        ----------
    + 182        mz_extend : int
    + 183            extra points left and right of peak definition to include in fitting
    + 184        delta_rp : float
    + 185            delta resolving power to add to resolving power
    + 186        model : str
    + 187            Type of lineshape model to use.
    + 188            Models allowed: Gaussian, Lorentz, Voigt
    + 189
    + 190        Returns
    + 191        -----
    + 192        mz_domain : ndarray
    + 193            x-axis domain for fit
    + 194        fit_peak : lmfit object
    + 195            fit results object from lmfit module
    + 196
    + 197        Notes
    + 198        -----
    + 199        Returns the calculated mz domain, initial defined abundance profile, and the fit peak results object from lmfit module
    + 200        mz_extend here extends the x-axis domain so that we have sufficient points either side of the apex to fit.
    + 201        Takes about 10ms per peak
    + 202        """
    + 203        start_index = (
    + 204            self.peak_left_index - mz_extend if not self.peak_left_index == 0 else 0
    + 205        )
    + 206        final_index = (
    + 207            self.peak_right_index + mz_extend
    + 208            if not self.peak_right_index == len(self._ms_parent.mz_exp_profile)
    + 209            else self.peak_right_index
    + 210        )
    + 211
    + 212        # check if MSPeak contains the resolving power info
    + 213        if self.resolving_power:
    + 214            # full width half maximum distance
    + 215            self.fwhm = self.mz_exp / (self.resolving_power + delta_rp)
    + 216
    + 217            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    + 218            abundance_domain = self._ms_parent.abundance_profile[
    + 219                start_index:final_index
    + 220            ]
    + 221
    + 222            if model == "Gaussian":
    + 223                # stardard deviation
    + 224                sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    + 225                amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 226                model = models.GaussianModel()
    + 227                params = model.make_params(
    + 228                    center=self.mz_exp, amplitude=amplitude, sigma=sigma
    + 229                )
    + 230
    + 231            elif model == "Lorentz":
    + 232                # stardard deviation
    + 233                sigma = self.fwhm / 2
    + 234                amplitude = sigma * pi * self.abundance
    + 235                model = models.LorentzianModel()
    + 236                params = model.make_params(
    + 237                    center=self.mz_exp, amplitude=amplitude, sigma=sigma
    + 238                )
    + 239
    + 240            elif model == "Voigt":
    + 241                # stardard deviation
    + 242                sigma = self.fwhm / 3.6013
    + 243                amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 244                model = models.VoigtModel()
    + 245                params = model.make_params(
    + 246                    center=self.mz_exp, amplitude=amplitude, sigma=sigma, gamma=sigma
    + 247                )
    + 248            else:
    + 249                raise LookupError("model lineshape not known or defined")
    + 250
    + 251            # calc_abundance = model.eval(params=params, x=mz_domain) #Same as initial fit, returned in fit_peak object
    + 252            fit_peak = model.fit(abundance_domain, params=params, x=mz_domain)
    + 253            return mz_domain, fit_peak
    + 254
    + 255        else:
    + 256            raise LookupError(
    + 257                "resolving power is not defined, try to use set_max_resolving_power()"
    + 258            )
    + 259
    + 260    def voigt_pso(self, w, r, yoff, width, loc, a):
    + 261        """Voigt function for particle swarm optimisation (PSO) fitting
    + 262
    + 263        From https://github.com/pnnl/nmrfit/blob/master/nmrfit/equations.py.
    + 264        Calculates a Voigt function over w based on the relevant properties of the distribution.
    + 265
    + 266        Parameters
    + 267        ----------
    + 268        w : ndarray
    + 269            Array over which the Voigt function will be evaluated.
    + 270        r : float
    + 271            Ratio between the Guassian and Lorentzian functions.
    + 272        yoff : float
    + 273            Y-offset of the Voigt function.
    + 274        width : float
    + 275            The width of the Voigt function.
    + 276        loc : float
    + 277            Center of the Voigt function.
    + 278        a : float
    + 279            Area of the Voigt function.
    + 280        Returns
    + 281        -------
    + 282        V : ndarray
    + 283            Array defining the Voigt function over w.
    + 284
    + 285        References
    + 286        ----------
    + 287        1. https://github.com/pnnl/nmrfit
    + 288
    + 289        Notes
    + 290        -----
    + 291        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    + 292
    + 293        """
    + 294        # Lorentzian component
    + 295        L = (2 / (pi * width)) * 1 / (1 + ((w - loc) / (0.5 * width)) ** 2)
    + 296
    + 297        # Gaussian component
    + 298        G = (
    + 299            (2 / width)
    + 300            * sqrt(log(2) / pi)
    + 301            * exp(-(((w - loc) / (width / (2 * sqrt(log(2))))) ** 2))
    + 302        )
    + 303
    + 304        # Voigt body
    + 305        V = (yoff + a) * (r * L + (1 - r) * G)
    + 306
    + 307        return V
    + 308
    + 309    def objective_pso(self, x, w, u):
    + 310        """Objective function for particle swarm optimisation (PSO) fitting
    + 311
    + 312        The objective function used to fit supplied data.  Evaluates sum of squared differences between the fit and the data.
    + 313
    + 314        Parameters
    + 315        ----------
    + 316        x : list of floats
    + 317            Parameter vector.
    + 318        w : ndarray
    + 319            Array of frequency data.
    + 320        u : ndarray
    + 321            Array of data to be fit.
    + 322
    + 323        Returns
    + 324        -------
    + 325        rmse : float
    + 326            Root mean square error between the data and fit.
    + 327
    + 328        References
    + 329        ----------
    + 330        1. https://github.com/pnnl/nmrfit
    + 331
    + 332        """
    + 333        # global parameters
    + 334        r, width, loc, a = x
    + 335        yoff = 0
    + 336
    + 337        # calculate fit for V
    + 338        V_fit = self.voigt_pso(w, r, yoff, width, loc, a)
    + 339
    + 340        # real component RMSE
    + 341        rmse = sqrt(square((u - V_fit)).mean(axis=None))
    + 342
    + 343        # return the total RMSE
    + 344        return rmse
    + 345
    + 346    def minimize_pso(self, lower, upper, w, u):
    + 347        """Minimization function for particle swarm optimisation (PSO) fitting
    + 348
    + 349        Minimizes the objective function using the particle swarm optimization algorithm.
    + 350        Minimization function based on defined parameters
    + 351
    + 352
    + 353        Parameters
    + 354        ----------
    + 355        lower : list of floats
    + 356            Lower bounds for the parameters.
    + 357        upper : list of floats
    + 358            Upper bounds for the parameters.
    + 359        w : ndarray
    + 360            Array of frequency data.
    + 361        u : ndarray
    + 362            Array of data to be fit.
    + 363
    + 364        Notes
    + 365        -----
    + 366        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    + 367        Current parameters take ~2 seconds per peak.
    + 368
    + 369
    + 370        References
    + 371        ----------
    + 372        1. https://github.com/pnnl/nmrfit
    + 373
    + 374        """
    + 375        # TODO - allow support to pass swarmsize, maxiter, omega, phip, phig parameters.
    + 376        # TODO - Refactor PSO fitting into its own class?
    + 377
    + 378        xopt, fopt = pyswarm.pso(
    + 379            self.objective_pso,
    + 380            lower,
    + 381            upper,
    + 382            args=(w, u),
    + 383            swarmsize=1000,
    + 384            maxiter=5000,
    + 385            omega=-0.2134,
    + 386            phip=-0.3344,
    + 387            phig=2.3259,
    + 388        )
    + 389        return xopt, fopt
    + 390
    + 391    def fit_peak_pso(self, mz_extend: int = 6, upsample_multiplier: int = 5):
    + 392        """Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting
    + 393
    + 394        Function to fit a Voigt peakshape using particle swarm optimisation (PSO).
    + 395        Should return better results than lmfit, but much more computationally expensive
    + 396
    + 397        Parameters
    + 398        ----------
    + 399        mz_extend : int, optional
    + 400            extra points left and right of peak definition to include in fitting. Defaults to 6.
    + 401        upsample_multiplier : int, optional
    + 402            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 5.
    + 403
    + 404        Returns
    + 405        -------
    + 406        xopt : array
    + 407            variables describing the voigt function.
    + 408            G/L ratio, width (fwhm), apex (x-axis), area.
    + 409            y-axis offset is fixed at 0
    + 410        fopt : float
    + 411            objective score (rmse)
    + 412        psfit : array
    + 413            recalculated y values based on function and optimised fit
    + 414        psfit_hdp : tuple of arrays
    + 415            0 - linspace x-axis upsampled grid
    + 416            1 - recalculated y values based on function and upsampled x-axis grid
    + 417            Does not change results, but aids in visualisation of the 'true' voigt lineshape
    + 418
    + 419        Notes
    + 420        -----
    + 421        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    + 422        """
    + 423        # TODO - Add ability to pass pso args (i.e. swarm size, maxiter, omega, phig, etc)
    + 424        # TODO: fix xopt. Magnitude mode data through CoreMS/Bruker starts at 0 but is noise centered well above 0.
    + 425        # Thermo data is noise reduced by also noise subtracted, so starts at 0
    + 426        # Absorption mode/phased data will have positive and negative components and may not be baseline corrected
    + 427
    + 428        start_index = (
    + 429            self.peak_left_index - mz_extend if not self.peak_left_index == 0 else 0
    + 430        )
    + 431        final_index = (
    + 432            self.peak_right_index + mz_extend
    + 433            if not self.peak_right_index == len(self._ms_parent.mz_exp_profile)
    + 434            else self.peak_right_index
    + 435        )
    + 436
    + 437        # check if MSPeak contains the resolving power info
    + 438        if self.resolving_power:
    + 439            # full width half maximum distance
    + 440            self.fwhm = self.mz_exp / (self.resolving_power)
    + 441
    + 442            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    + 443            abundance_domain = self._ms_parent.abundance_profile[
    + 444                start_index:final_index
    + 445            ]
    + 446            lower = [0, self.fwhm * 0.8, (self.mz_exp - 0.0005), 0]
    + 447            upper = [
    + 448                1,
    + 449                self.fwhm * 1.2,
    + 450                (self.mz_exp + 0.0005),
    + 451                self.abundance / self.signal_to_noise,
    + 452            ]
    + 453            xopt, fopt = self.minimize_pso(lower, upper, mz_domain, abundance_domain)
    + 454
    + 455            psfit = self.voigt_pso(mz_domain, xopt[0], 0, xopt[1], xopt[2], xopt[3])
    + 456            psfit_hdp_x = linspace(
    + 457                min(mz_domain), max(mz_domain), num=len(mz_domain) * upsample_multiplier
    + 458            )
    + 459            psfit_hdp = self.voigt_pso(
    + 460                psfit_hdp_x, xopt[0], 0, xopt[1], xopt[2], xopt[3]
    + 461            )
    + 462            return xopt, fopt, psfit, (psfit_hdp_x, psfit_hdp)
    + 463        else:
    + 464            raise LookupError(
    + 465                "resolving power is not defined, try to use set_max_resolving_power()"
    + 466            )
    + 467
    + 468    def voigt(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):
    + 469        """[Legacy] Voigt lineshape analysis function
    + 470        Legacy function for voigt lineshape analysis
    + 471
    + 472        Parameters
    + 473        ----------
    + 474        oversample_multiplier : int
    + 475            factor to increase x-axis points by for simulation of fitted lineshape function
    + 476        delta_rp : float
    + 477            delta resolving power to add to resolving power
    + 478        mz_overlay : int
    + 479            extra points left and right of peak definition to include in fitting
    + 480
    + 481        Returns
    + 482        -------
    + 483        mz_domain : ndarray
    + 484            x-axis domain for fit
    + 485        calc_abundance : ndarray
    + 486            calculated abundance profile based on voigt function
    + 487        """
    + 488
    + 489        if self.resolving_power:
    + 490            # full width half maximum distance
    + 491            self.fwhm = self.mz_exp / (
    + 492                self.resolving_power + delta_rp
    + 493            )  # self.resolving_power)
    + 494
    + 495            # stardart deviation
    + 496            sigma = self.fwhm / 3.6013
    + 497
    + 498            # half width baseline distance
    + 499
    + 500            # mz_domain = linspace(self.mz_exp - hw_base_distance,
    + 501            #                     self.mz_exp + hw_base_distance, datapoint)
    + 502            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    + 503
    + 504            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    + 505
    + 506            # TODO derive amplitude
    + 507            amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 508
    + 509            model = models.VoigtModel()
    + 510
    + 511            params = model.make_params(
    + 512                center=self.mz_exp, amplitude=amplitude, sigma=sigma, gamma=sigma
    + 513            )
    + 514
    + 515            calc_abundance = model.eval(params=params, x=mz_domain)
    + 516
    + 517            return mz_domain, calc_abundance
    + 518
    + 519        else:
    + 520            raise LookupError(
    + 521                "resolving power is not defined, try to use set_max_resolving_power()"
    + 522            )
    + 523
    + 524    def pseudovoigt(
    + 525        self, oversample_multiplier=1, delta_rp=0, mz_overlay=1, fraction=0.5
    + 526    ):
    + 527        """[Legacy] pseudovoigt lineshape function
    + 528
    + 529        Legacy function for pseudovoigt lineshape analysis.
    + 530        Note - Code may not be functional currently.
    + 531
    + 532        Parameters
    + 533        ----------
    + 534        oversample_multiplier : int, optional
    + 535            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 1.
    + 536        delta_rp : float, optional
    + 537            delta resolving power to add to resolving power. Defaults to 0.
    + 538        mz_overlay : int, optional
    + 539            extra points left and right of peak definition to include in fitting. Defaults to 1.
    + 540        fraction : float, optional
    + 541            fraction of gaussian component in pseudovoigt function. Defaults to 0.5.
    + 542
    + 543        """
    + 544        if self.resolving_power:
    + 545            # full width half maximum distance
    + 546            self.fwhm = self.mz_exp / (
    + 547                self.resolving_power + delta_rp
    + 548            )  # self.resolving_power)
    + 549
    + 550            # stardart deviation
    + 551            sigma = self.fwhm / 2
    + 552
    + 553            # half width baseline distance
    + 554
    + 555            # mz_domain = linspace(self.mz_exp - hw_base_distance,
    + 556            #                     self.mz_exp + hw_base_distance, datapoint)
    + 557            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    + 558
    + 559            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    + 560            model = models.PseudoVoigtModel()
    + 561
    + 562            # TODO derive amplitude
    + 563            gamma = sigma
    + 564
    + 565            amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 566            amplitude = (sqrt(pi / log(2)) * (pi * sigma * self.abundance)) / (
    + 567                (pi * (1 - gamma)) + (sqrt(pi * log(2)) * gamma)
    + 568            )
    + 569
    + 570            params = model.make_params(center=self.mz_exp, sigma=sigma)
    + 571
    + 572            calc_abundance = model.eval(params=params, x=mz_domain)
    + 573
    + 574            return mz_domain, calc_abundance
    + 575
    + 576        else:
    + 577            raise LookupError(
    + 578                "resolving power is not defined, try to use set_max_resolving_power()"
    + 579            )
    + 580
    + 581    def lorentz(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):
    + 582        """[Legacy] Lorentz lineshape analysis function
    + 583
    + 584        Legacy function for lorentz lineshape analysis
    + 585
    + 586        Parameters
    + 587        ----------
    + 588        oversample_multiplier : int
    + 589            factor to increase x-axis points by for simulation of fitted lineshape function
    + 590        delta_rp : float
    + 591            delta resolving power to add to resolving power
    + 592        mz_overlay : int
    + 593            extra points left and right of peak definition to include in fitting
    + 594
    + 595        Returns
    + 596        -------
    + 597        mz_domain : ndarray
    + 598            x-axis domain for fit
    + 599        calc_abundance : ndarray
    + 600            calculated abundance profile based on lorentz function
    + 601
    + 602        """
    + 603        if self.resolving_power:
    + 604            # full width half maximum distance
    + 605            self.fwhm = self.mz_exp / (
    + 606                self.resolving_power + delta_rp
    + 607            )  # self.resolving_power)
    + 608
    + 609            # stardart deviation
    + 610            sigma = self.fwhm / 2
    + 611
    + 612            # half width baseline distance
    + 613            hw_base_distance = 8 * sigma
    + 614
    + 615            # mz_domain = linspace(self.mz_exp - hw_base_distance,
    + 616            #                     self.mz_exp + hw_base_distance, datapoint)
    + 617
    + 618            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    + 619            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    + 620            model = models.LorentzianModel()
    + 621
    + 622            amplitude = sigma * pi * self.abundance
    + 623
    + 624            params = model.make_params(
    + 625                center=self.mz_exp, amplitude=amplitude, sigma=sigma
    + 626            )
    + 627
    + 628            calc_abundance = model.eval(params=params, x=mz_domain)
    + 629
    + 630            return mz_domain, calc_abundance
    + 631
    + 632        else:
    + 633            raise LookupError(
    + 634                "resolving power is not defined, try to use set_max_resolving_power()"
    + 635            )
    + 636
    + 637    def gaussian(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):
    + 638        """[Legacy] Gaussian lineshape analysis function
    + 639        Legacy gaussian lineshape analysis function
    + 640
    + 641        Parameters
    + 642        ----------
    + 643        oversample_multiplier : int
    + 644            factor to increase x-axis points by for simulation of fitted lineshape function
    + 645        delta_rp : float
    + 646            delta resolving power to add to resolving power
    + 647        mz_overlay : int
    + 648            extra points left and right of peak definition to include in fitting
    + 649
    + 650        Returns
    + 651        -------
    + 652        mz_domain : ndarray
    + 653            x-axis domain for fit
    + 654        calc_abundance : ndarray
    + 655            calculated abundance profile based on gaussian function
    + 656
    + 657
    + 658        """
    + 659
    + 660        # check if MSPeak contains the resolving power info
    + 661        if self.resolving_power:
    + 662            # full width half maximum distance
    + 663            self.fwhm = self.mz_exp / (
    + 664                self.resolving_power + delta_rp
    + 665            )  # self.resolving_power)
    + 666
    + 667            # stardart deviation
    + 668            sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    + 669
    + 670            # half width baseline distance
    + 671            # hw_base_distance = (3.2 * s)
    + 672
    + 673            # match_loz_factor = 3
    + 674
    + 675            # n_d = hw_base_distance * match_loz_factor
    + 676
    + 677            # mz_domain = linspace(
    + 678            #    self.mz_exp - n_d, self.mz_exp + n_d, datapoint)
    + 679
    + 680            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    + 681
    + 682            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    + 683
    + 684            # calc_abundance = norm.pdf(mz_domain, self.mz_exp, s)
    + 685
    + 686            model = models.GaussianModel()
    + 687
    + 688            amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 689
    + 690            params = model.make_params(
    + 691                center=self.mz_exp, amplitude=amplitude, sigma=sigma
    + 692            )
    + 693
    + 694            calc_abundance = model.eval(params=params, x=mz_domain)
    + 695
    + 696            return mz_domain, calc_abundance
    + 697
    + 698        else:
    + 699            raise LookupError(
    + 700                "resolving power is not defined, try to use set_max_resolving_power()"
    + 701            )
    + 702
    + 703    def get_mz_domain(self, oversample_multiplier, mz_overlay):
    + 704        """[Legacy] function to resample/interpolate datapoints for lineshape analysis
    + 705
    + 706        This code is used for the legacy line fitting functions and not recommended.
    + 707        Legacy function to support expanding mz domain for legacy lineshape functions
    + 708
    + 709        Parameters
    + 710        ----------
    + 711        oversample_multiplier : int
    + 712            factor to increase x-axis points by for simulation of fitted lineshape function
    + 713        mz_overlay : int
    + 714            extra points left and right of peak definition to include in fitting
    + 715
    + 716        Returns
    + 717        -------
    + 718        mz_domain : ndarray
    + 719            x-axis domain for fit
    + 720
    + 721        """
    + 722        start_index = (
    + 723            self.peak_left_index - mz_overlay if not self.peak_left_index == 0 else 0
    + 724        )
    + 725        final_index = (
    + 726            self.peak_right_index + mz_overlay
    + 727            if not self.peak_right_index == len(self._ms_parent.mz_exp_profile)
    + 728            else self.peak_right_index
    + 729        )
    + 730
    + 731        if oversample_multiplier == 1:
    + 732            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    + 733
    + 734        else:
    + 735            # we assume a linear correlation for m/z and datapoits
    + 736            # which is only true if the m/z range in narrow (within 1 m/z unit)
    + 737            # this is not true for a wide m/z range
    + 738
    + 739            indexes = range(start_index, final_index + 1)
    + 740            mz = self._ms_parent.mz_exp_profile[indexes]
    + 741            pol = poly1d(polyfit(indexes, mz, 1))
    + 742            oversampled_indexes = linspace(
    + 743                start_index,
    + 744                final_index,
    + 745                (final_index - start_index) * oversample_multiplier,
    + 746            )
    + 747            mz_domain = pol(oversampled_indexes)
    + 748
    + 749        return mz_domain
    + 750
    + 751    @property
    + 752    def number_possible_assignments(
    + 753        self,
    + 754    ):
    + 755        return len(self.molecular_formulas)
    + 756
    + 757    def molecular_formula_lowest_error(self):
    + 758        """Return the molecular formula with the smallest absolute mz error"""
    + 759
    + 760        return min(self.molecular_formulas, key=lambda m: abs(m.mz_error))
    + 761
    + 762    def molecular_formula_highest_prob_score(self):
    + 763        """Return the molecular formula with the highest confidence score score"""
    + 764
    + 765        return max(self.molecular_formulas, key=lambda m: abs(m.confidence_score))
    + 766
    + 767    def molecular_formula_earth_filter(self, lowest_error=True):
    + 768        """Filter molecular formula using the 'Earth' filter
    + 769
    + 770        This function applies the Formularity-esque 'Earth' filter to possible molecular formula assignments.
    + 771        Earth Filter:
    + 772            O > 0 AND N <= 3 AND P <= 2 AND 3P <= O
    + 773
    + 774        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter.
    + 775        Otherwise, it will return all Earth-filter compliant formulas.
    + 776
    + 777        Parameters
    + 778        ----------
    + 779        lowest_error : bool, optional.
    + 780            Return only the lowest error formula which also fits the Earth filter.
    + 781            If False, return all Earth-filter compliant formulas. Default is True.
    + 782
    + 783        Returns
    + 784        -------
    + 785        list
    + 786            List of molecular formula objects which fit the Earth filter
    + 787
    + 788        References
    + 789        ----------
    + 790        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    + 791            Anal. Chem. 2017, 89, 23, 12659–12665
    + 792            doi: 10.1021/acs.analchem.7b03318
    + 793        """
    + 794
    + 795        candidates = list(
    + 796            filter(
    + 797                lambda mf: mf.get("O") > 0
    + 798                and mf.get("N") <= 3
    + 799                and mf.get("P") <= 2
    + 800                and (3 * mf.get("P")) <= mf.get("O"),
    + 801                self.molecular_formulas,
    + 802            )
    + 803        )
    + 804        if len(candidates) > 0:
    + 805            if lowest_error:
    + 806                return min(candidates, key=lambda m: abs(m.mz_error))
    + 807            else:
    + 808                return candidates
    + 809        else:
    + 810            return candidates
    + 811
    + 812    def molecular_formula_water_filter(self, lowest_error=True):
    + 813        """Filter molecular formula using the 'Water' filter
    + 814
    + 815        This function applies the Formularity-esque 'Water' filter to possible molecular formula assignments.
    + 816        Water Filter:
    + 817            O > 0 AND N <= 3 AND S <= 2 AND P <= 2
    + 818
    + 819        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Water filter.
    + 820        Otherwise, it will return all Water-filter compliant formulas.
    + 821
    + 822        Parameters
    + 823        ----------
    + 824        lowest_error : bool, optional
    + 825            Return only the lowest error formula which also fits the Water filter.
    + 826            If False, return all Water-filter compliant formulas. Defaults to 2
    + 827
    + 828        Returns
    + 829        -------
    + 830        list
    + 831            List of molecular formula objects which fit the Water filter
    + 832
    + 833        References
    + 834        ----------
    + 835        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    + 836            Anal. Chem. 2017, 89, 23, 12659–12665
    + 837            doi: 10.1021/acs.analchem.7b03318
    + 838        """
    + 839
    + 840        candidates = list(
    + 841            filter(
    + 842                lambda mf: mf.get("O") > 0
    + 843                and mf.get("N") <= 3
    + 844                and mf.get("S") <= 2
    + 845                and mf.get("P") <= 2,
    + 846                self.molecular_formulas,
    + 847            )
    + 848        )
    + 849        if len(candidates) > 0:
    + 850            if lowest_error:
    + 851                return min(candidates, key=lambda m: abs(m.mz_error))
    + 852            else:
    + 853                return candidates
    + 854        else:
    + 855            return candidates
    + 856
    + 857    def molecular_formula_air_filter(self, lowest_error=True):
    + 858        """Filter molecular formula using the 'Air' filter
    + 859
    + 860        This function applies the Formularity-esque 'Air' filter to possible molecular formula assignments.
    + 861        Air Filter:
    + 862            O > 0 AND N <= 3 AND S <= 1 AND P = 0 AND 3(S+N) <= O
    + 863
    + 864        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Air filter.
    + 865        Otherwise, it will return all Air-filter compliant formulas.
    + 866
    + 867        Parameters
    + 868        ----------
    + 869        lowest_error : bool, optional
    + 870            Return only the lowest error formula which also fits the Air filter.
    + 871            If False, return all Air-filter compliant formulas. Defaults to True.
    + 872
    + 873        Returns
    + 874        -------
    + 875        list
    + 876            List of molecular formula objects which fit the Air filter
    + 877
    + 878        References
    + 879        ----------
    + 880        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    + 881            Anal. Chem. 2017, 89, 23, 12659–12665
    + 882            doi: 10.1021/acs.analchem.7b03318
    + 883        """
    + 884
    + 885        candidates = list(
    + 886            filter(
    + 887                lambda mf: mf.get("O") > 0
    + 888                and mf.get("N") <= 2
    + 889                and mf.get("S") <= 1
    + 890                and mf.get("P") == 0
    + 891                and 3 * (mf.get("S") + mf.get("N")) <= mf.get("O"),
    + 892                self.molecular_formulas,
    + 893            )
    + 894        )
    + 895
    + 896        if len(candidates) > 0:
    + 897            if lowest_error:
    + 898                return min(candidates, key=lambda m: abs(m.mz_error))
    + 899            else:
    + 900                return candidates
    + 901        else:
    + 902            return candidates
    + 903
    + 904    def cia_score_S_P_error(self):
    + 905        """Compound Identification Algorithm SP Error - Assignment Filter
    + 906
    + 907        This function applies the Compound Identification Algorithm (CIA) SP Error filter to possible molecular formula assignments.
    + 908
    + 909        It takes the molecular formula with the lowest S+P count, and returns the formula with the lowest absolute error from this subset.
    + 910
    + 911        Returns
    + 912        -------
    + 913        MolecularFormula
    + 914            A single molecular formula which fits the rules of the CIA SP Error filter
    + 915
    + 916
    + 917        References
    + 918        ----------
    + 919        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    + 920            Anal. Chem. 2006, 78, 13, 4363–4373
    + 921            doi: 10.1021/ac0600306
    + 922        """
    + 923        # case EFormulaScore.HAcap:
    + 924
    + 925        lowest_S_P_mf = min(
    + 926            self.molecular_formulas, key=lambda mf: mf.get("S") + mf.get("P")
    + 927        )
    + 928        lowest_S_P_count = lowest_S_P_mf.get("S") + lowest_S_P_mf.get("P")
    + 929
    + 930        list_same_s_p = list(
    + 931            filter(
    + 932                lambda mf: mf.get("S") + mf.get("P") == lowest_S_P_count,
    + 933                self.molecular_formulas,
    + 934            )
    + 935        )
    + 936
    + 937        # check if list is not empty
    + 938        if list_same_s_p:
    + 939            return min(list_same_s_p, key=lambda m: abs(m.mz_error))
    + 940
    + 941        else:
    + 942            return lowest_S_P_mf
    + 943
    + 944    def cia_score_N_S_P_error(self):
    + 945        """Compound Identification Algorithm NSP Error - Assignment Filter
    + 946
    + 947        This function applies the Compound Identification Algorithm (CIA) NSP Error filter to possible molecular formula assignments.
    + 948
    + 949        It takes the molecular formula with the lowest N+S+P count, and returns the formula with the lowest absolute error from this subset.
    + 950
    + 951        Returns
    + 952        -------
    + 953        MolecularFormula
    + 954            A single molecular formula which fits the rules of the CIA NSP Error filter
    + 955
    + 956        References
    + 957        ----------
    + 958        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    + 959            Anal. Chem. 2006, 78, 13, 4363–4373
    + 960            doi: 10.1021/ac0600306
    + 961
    + 962        Raises
    + 963        -------
    + 964        Exception
    + 965            If no molecular formula are associated with mass spectrum peak.
    + 966        """
    + 967        # case EFormulaScore.HAcap:
    + 968        if self.molecular_formulas:
    + 969            lowest_N_S_P_mf = min(
    + 970                self.molecular_formulas,
    + 971                key=lambda mf: mf.get("N") + mf.get("S") + mf.get("P"),
    + 972            )
    + 973            lowest_N_S_P_count = (
    + 974                lowest_N_S_P_mf.get("N")
    + 975                + lowest_N_S_P_mf.get("S")
    + 976                + lowest_N_S_P_mf.get("P")
    + 977            )
    + 978
    + 979            list_same_N_S_P = list(
    + 980                filter(
    + 981                    lambda mf: mf.get("N") + mf.get("S") + mf.get("P")
    + 982                    == lowest_N_S_P_count,
    + 983                    self.molecular_formulas,
    + 984                )
    + 985            )
    + 986
    + 987            if list_same_N_S_P:
    + 988                SP_filtered_list = list(
    + 989                    filter(
    + 990                        lambda mf: (mf.get("S") <= 3) and (mf.get("P") <= 1),
    + 991                        list_same_N_S_P,
    + 992                    )
    + 993                )
    + 994
    + 995                if SP_filtered_list:
    + 996                    return min(SP_filtered_list, key=lambda m: abs(m.mz_error))
    + 997
    + 998                else:
    + 999                    return min(list_same_N_S_P, key=lambda m: abs(m.mz_error))
    +1000
    +1001            else:
    +1002                return lowest_N_S_P_mf
    +1003        else:
    +1004            raise Exception(
    +1005                "No molecular formula associated with the mass spectrum peak at m/z: %.6f"
    +1006                % self.mz_exp
    +1007            )
     
    @@ -1014,877 +1136,985 @@

    -
     16class MSPeakCalculation:
    - 17    """Class to perform calculations on MSPeak objects.
    - 18
    - 19    This class provides methods to perform various calculations on MSPeak objects, such as calculating Kendrick Mass Defect (KMD) and Kendrick Mass (KM), calculating peak area, and fitting peak lineshape using different models.
    - 20
    - 21    Parameters
    - 22    ----------
    - 23    None
    - 24
    - 25    Attributes
    - 26    ----------
    - 27    _ms_parent : MSParent
    - 28        The parent MSParent object associated with the MSPeakCalculation object.
    - 29    mz_exp : float
    - 30        The experimental m/z value of the peak.
    - 31    peak_left_index : int
    - 32        The start scan index of the peak.
    - 33    peak_right_index : int
    - 34        The final scan index of the peak.
    - 35    resolving_power : float
    - 36        The resolving power of the peak.
    - 37
    - 38    Methods
    - 39    -------
    - 40    * _calc_kmd(dict_base).
    - 41        Calculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula.
    - 42    * calc_area().
    - 43        Calculate the peak area using numpy's trapezoidal fit.
    - 44    * fit_peak(mz_extend=6, delta_rp=0, model='Gaussian').
    - 45        Perform lineshape analysis on a peak using lmfit module.
    - 46    * voigt_pso(w, r, yoff, width, loc, a).
    - 47        Calculate the Voigt function for particle swarm optimization (PSO) fitting.
    - 48    * objective_pso(x, w, u).
    - 49        Calculate the objective function for PSO fitting.
    - 50    * minimize_pso(lower, upper, w, u).
    - 51        Minimize the objective function using the particle swarm optimization algorithm.
    - 52    * fit_peak_pso(mz_extend=6, upsample_multiplier=5).
    - 53        Perform lineshape analysis on a peak using particle swarm optimization (PSO) fitting.
    - 54    * voigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    - 55        [Legacy] Perform voigt lineshape analysis on a peak.
    - 56    * pseudovoigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1, fraction=0.5).
    - 57        [Legacy] Perform pseudovoigt lineshape analysis on a peak.
    - 58    * lorentz(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    - 59        [Legacy] Perform lorentz lineshape analysis on a peak.
    - 60    * gaussian(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    - 61        [Legacy] Perform gaussian lineshape analysis on a peak.
    - 62    * get_mz_domain(oversample_multiplier, mz_overlay).
    - 63        [Legacy] Resample/interpolate datapoints for lineshape analysis.
    - 64    * number_possible_assignments().
    - 65        Return the number of possible molecular formula assignments for the peak.
    - 66    * molecular_formula_lowest_error().
    - 67        Return the molecular formula with the smallest absolute mz error.
    - 68    * molecular_formula_highest_prob_score().
    - 69        Return the molecular formula with the highest confidence score.
    - 70    * molecular_formula_earth_filter(lowest_error=True).
    - 71        Filter molecular formula using the 'Earth' filter.
    - 72    * molecular_formula_water_filter(lowest_error=True).
    - 73        Filter molecular formula using the 'Water' filter.
    - 74    * molecular_formula_air_filter(lowest_error=True).
    - 75        Filter molecular formula using the 'Air' filter.
    - 76    * cia_score_S_P_error().
    - 77        Compound Identification Algorithm SP Error - Assignment Filter.
    - 78    * cia_score_N_S_P_error().
    - 79        Compound Identification Algorithm NSP Error - Assignment Filter.
    - 80    
    - 81    """
    - 82   
    - 83
    - 84    def _calc_kmd(self, dict_base):
    - 85        """ Calculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula    
    - 86        
    - 87        Parameters
    - 88        ----------
    - 89        dict_base : dict
    - 90            dictionary with the base formula to be used in the calculation
    - 91            Default is CH2, e.g.
    - 92                dict_base = {"C": 1, "H": 2}
    - 93        """
    - 94
    - 95        if self._ms_parent:
    - 96            # msPeak obj does have a ms object parent
    - 97            kendrick_rounding_method = self._ms_parent.mspeaks_settings.kendrick_rounding_method  # rounding method can be one of floor, ceil or round
    - 98            # msPeak obj does not have a ms object parent
    - 99        else:
    -100            kendrick_rounding_method = MSParameters.ms_peak.kendrick_rounding_method
    -101        
    -102        mass = 0
    -103        for atom in dict_base.keys():
    -104            mass += Atoms.atomic_masses.get(atom) * dict_base.get(atom)
    -105
    -106        kendrick_mass = (int(mass) / mass) * self.mz_exp
    -107
    -108        if kendrick_rounding_method == 'ceil':
    -109
    -110            nominal_km = ceil(kendrick_mass)
    -111
    -112        elif kendrick_rounding_method == 'round': 
    -113
    -114            nominal_km = rint(kendrick_mass)
    -115
    -116        elif kendrick_rounding_method == 'floor':
    -117
    -118            nominal_km = floor(kendrick_mass)
    -119
    -120        else:
    -121            raise  Exception("%s method was not implemented, please refer to corems.ms_peak.calc.MSPeakCalc Class" % kendrick_rounding_method)
    -122
    -123        kmd = (nominal_km - kendrick_mass) 
    -124
    -125        # kmd = (nominal_km - km) * 1
    -126        #kmd = round(kmd,0)
    -127
    -128        return kmd, kendrick_mass, nominal_km
    -129
    -130    def calc_area(self):
    -131        """ Calculate the peak area using numpy's trapezoidal fit
    -132
    -133        uses provided mz_domain to accurately integrate areas independent of digital resolution
    -134
    -135        Returns
    -136        -------
    -137        float
    -138            peak area
    -139        """
    -140        if self.peak_right_index > self.peak_left_index:
    -141
    -142            yy = self._ms_parent.abundance_profile[self.peak_left_index:self.peak_right_index]
    -143            xx = self._ms_parent.mz_exp_profile[self.peak_left_index:self.peak_right_index]
    -144            # check if the axis is high to low m/z or not. if its MSFromFreq its high mz first, if its from Profile, its low mz first
    -145            if xx[0] > xx[-1]:
    -146                xx = flip(xx)    
    -147                yy = flip(yy)   
    -148            return float(trapz(yy, xx))
    -149
    -150        else:
    -151
    -152            warnings.warn("Peak Area Calculation for m/z {} has failed".format(self.mz_exp))
    -153            return nan
    -154
    -155    def fit_peak(self,mz_extend=6, delta_rp = 0, model='Gaussian'):
    -156        """ Lineshape analysis on a peak using lmfit module. 
    -157
    -158        Model and fit peak lineshape by defined function - using lmfit module
    -159        Does not oversample/resample/interpolate data points 
    -160        Better to go back to time domain and perform more zero filling - if possible.
    -161
    -162        Parameters
    -163        ----------
    -164        mz_extend : int
    -165            extra points left and right of peak definition to include in fitting
    -166        delta_rp : float
    -167            delta resolving power to add to resolving power
    -168        model : str
    -169            Type of lineshape model to use.
    -170            Models allowed: Gaussian, Lorentz, Voigt
    -171
    -172        Returns
    -173        -----
    -174        mz_domain : ndarray
    -175            x-axis domain for fit
    -176        fit_peak : lmfit object
    -177            fit results object from lmfit module
    -178        
    -179        Notes
    -180        -----
    -181        Returns the calculated mz domain, initial defined abundance profile, and the fit peak results object from lmfit module
    -182        mz_extend here extends the x-axis domain so that we have sufficient points either side of the apex to fit.
    -183        Takes about 10ms per peak
    -184        """
    -185        start_index = self.peak_left_index - mz_extend  if not self.peak_left_index == 0 else 0
    -186        final_index = self.peak_right_index + mz_extend  if not self.peak_right_index == len(self._ms_parent.mz_exp_profile) else self.peak_right_index
    -187
    -188        # check if MSPeak contains the resolving power info
    -189        if self.resolving_power:
    -190            # full width half maximum distance
    -191            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))
    -192
    -193            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    -194            abundance_domain = self._ms_parent.abundance_profile[start_index:final_index]
    -195
    -196            if model=='Gaussian':
    -197                # stardard deviation
    -198                sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    -199                amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -200                model = models.GaussianModel()
    -201                params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -202
    -203            elif model=='Lorentz':
    -204                # stardard deviation
    -205                sigma = self.fwhm / 2
    -206                amplitude = sigma* pi * self.abundance
    -207                model = models.LorentzianModel()
    -208                params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -209
    -210            elif model=='Voigt':
    -211                # stardard deviation
    -212                sigma = self.fwhm / 3.6013
    -213                amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -214                model = models.VoigtModel()
    -215                params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma, gamma = sigma)
    -216            else:
    -217                raise LookupError('model lineshape not known or defined')
    -218
    -219            #calc_abundance = model.eval(params=params, x=mz_domain) #Same as initial fit, returned in fit_peak object
    -220            fit_peak = model.fit(abundance_domain,params=params, x=mz_domain)
    -221            return mz_domain, fit_peak
    -222
    -223        else:
    -224            raise LookupError(
    -225                'resolving power is not defined, try to use set_max_resolving_power()')
    -226
    -227
    -228    def voigt_pso(self,w, r, yoff, width, loc, a):
    -229        """ Voigt function for particle swarm optimisation (PSO) fitting
    -230
    -231        From https://github.com/pnnl/nmrfit/blob/master/nmrfit/equations.py.
    -232        Calculates a Voigt function over w based on the relevant properties of the distribution.
    -233
    -234        Parameters
    -235        ----------
    -236        w : ndarray
    -237            Array over which the Voigt function will be evaluated.
    -238        r : float
    -239            Ratio between the Guassian and Lorentzian functions.
    -240        yoff : float
    -241            Y-offset of the Voigt function.
    -242        width : float
    -243            The width of the Voigt function.
    -244        loc : float
    -245            Center of the Voigt function.
    -246        a : float
    -247            Area of the Voigt function.
    -248        Returns
    -249        -------
    -250        V : ndarray
    -251            Array defining the Voigt function over w.
    -252
    -253        References
    -254        ----------
    -255        1. https://github.com/pnnl/nmrfit 
    -256
    -257        Notes
    -258        -----
    -259        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    -260
    -261        """
    -262        # Lorentzian component
    -263        L = (2 / (pi * width)) * 1 / (1 + ((w - loc) / (0.5 * width))**2)
    -264
    -265        # Gaussian component
    -266        G = (2 / width) * sqrt(log(2) / pi) * exp(-((w - loc) / (width / (2 * sqrt(log(2)))))**2)
    -267
    -268        # Voigt body
    -269        V = (yoff + a) * (r * L + (1 - r) * G)
    -270
    -271        return V
    -272
    -273
    -274    def objective_pso(self, x, w, u):
    -275        """ Objective function for particle swarm optimisation (PSO) fitting
    -276
    -277        The objective function used to fit supplied data.  Evaluates sum of squared differences between the fit and the data.
    -278
    -279        Parameters
    -280        ----------
    -281        x : list of floats
    -282            Parameter vector.
    -283        w : ndarray
    -284            Array of frequency data.
    -285        u : ndarray
    -286            Array of data to be fit.
    -287
    -288        Returns
    -289        -------
    -290        rmse : float
    -291            Root mean square error between the data and fit.
    -292
    -293        References
    -294        ----------
    -295        1. https://github.com/pnnl/nmrfit 
    -296
    -297        """
    -298        # global parameters
    -299        r, width, loc, a = x
    -300        yoff = 0
    -301
    -302        # calculate fit for V
    -303        V_fit = self.voigt_pso(w, r, yoff, width, loc, a)
    -304
    -305        # real component RMSE
    -306        rmse = sqrt(square((u - V_fit)).mean(axis=None))
    -307
    -308        # return the total RMSE
    -309        return rmse
    -310
    -311    def minimize_pso(self,lower, upper, w, u):
    -312        """ Minimization function for particle swarm optimisation (PSO) fitting
    -313
    -314        Minimizes the objective function using the particle swarm optimization algorithm.
    -315        Minimization function based on defined parameters   
    -316
    -317
    -318        Parameters
    -319        ----------
    -320        lower : list of floats
    -321            Lower bounds for the parameters.
    -322        upper : list of floats
    -323            Upper bounds for the parameters.
    -324        w : ndarray
    -325            Array of frequency data.
    -326        u : ndarray
    -327            Array of data to be fit.
    -328
    -329        Notes
    -330        -----
    -331        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    -332        Current parameters take ~2 seconds per peak.
    -333
    -334
    -335        References
    -336        ----------
    -337        1. https://github.com/pnnl/nmrfit 
    -338
    -339        """
    -340        #TODO - allow support to pass swarmsize, maxiter, omega, phip, phig parameters.
    -341        #TODO - Refactor PSO fitting into its own class?
    -342        
    -343        xopt, fopt = pyswarm.pso(self.objective_pso, lower, upper, args=(w, u),
    -344                                    swarmsize=1000,
    -345                                    maxiter=5000,
    -346                                    omega=-0.2134,
    -347                                    phip=-0.3344,
    -348                                    phig=2.3259)
    -349        return xopt, fopt
    -350
    -351    def fit_peak_pso(self, mz_extend : int=6, upsample_multiplier : int=5):
    -352        """ Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting 
    -353
    -354        Function to fit a Voigt peakshape using particle swarm optimisation (PSO).
    -355        Should return better results than lmfit, but much more computationally expensive
    -356
    -357        Parameters
    -358        ----------
    -359        mz_extend : int, optional
    -360            extra points left and right of peak definition to include in fitting. Defaults to 6.
    -361        upsample_multiplier : int, optional
    -362            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 5.
    -363
    -364        Returns
    -365        -------
    -366        xopt : array
    -367            variables describing the voigt function.
    -368            G/L ratio, width (fwhm), apex (x-axis), area.
    -369            y-axis offset is fixed at 0 
    -370        fopt : float
    -371            objective score (rmse)
    -372        psfit : array
    -373            recalculated y values based on function and optimised fit
    -374        psfit_hdp : tuple of arrays
    -375            0 - linspace x-axis upsampled grid
    -376            1 - recalculated y values based on function and upsampled x-axis grid
    -377            Does not change results, but aids in visualisation of the 'true' voigt lineshape
    -378
    -379        Notes
    -380        -----
    -381        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    -382        """
    -383        # TODO - Add ability to pass pso args (i.e. swarm size, maxiter, omega, phig, etc)
    -384        # TODO: fix xopt. Magnitude mode data through CoreMS/Bruker starts at 0 but is noise centered well above 0.
    -385            # Thermo data is noise reduced by also noise subtracted, so starts at 0
    -386            # Absorption mode/phased data will have positive and negative components and may not be baseline corrected
    -387
    -388        start_index = self.peak_left_index - mz_extend  if not self.peak_left_index == 0 else 0
    -389        final_index = self.peak_right_index + mz_extend  if not self.peak_right_index == len(self._ms_parent.mz_exp_profile) else self.peak_right_index
    -390
    -391        # check if MSPeak contains the resolving power info
    -392        if self.resolving_power:
    -393            # full width half maximum distance
    -394            self.fwhm = (self.mz_exp / (self.resolving_power))
    -395
    -396            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    -397            abundance_domain = self._ms_parent.abundance_profile[start_index:final_index]
    -398            lower = [0, self.fwhm*0.8, (self.mz_exp-0.0005), 0]
    -399            upper = [1, self.fwhm*1.2, (self.mz_exp+0.0005), self.abundance/self.signal_to_noise]
    -400            xopt, fopt = self.minimize_pso(lower,upper,mz_domain,abundance_domain)
    -401            
    -402            psfit = self.voigt_pso(mz_domain,xopt[0],0,xopt[1],xopt[2],xopt[3])
    -403            psfit_hdp_x = linspace(min(mz_domain),max(mz_domain),num=len(mz_domain)*upsample_multiplier)
    -404            psfit_hdp = self.voigt_pso(psfit_hdp_x,xopt[0],0,xopt[1],xopt[2],xopt[3])
    -405            return xopt, fopt, psfit, (psfit_hdp_x, psfit_hdp)
    -406        else:
    -407            raise LookupError(
    -408                'resolving power is not defined, try to use set_max_resolving_power()')
    -409
    -410             
    -411    def voigt(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1):
    -412        """ [Legacy] Voigt lineshape analysis function
    -413        Legacy function for voigt lineshape analysis
    -414
    -415        Parameters
    -416        ----------
    -417        oversample_multiplier : int
    -418            factor to increase x-axis points by for simulation of fitted lineshape function
    -419        delta_rp : float
    -420            delta resolving power to add to resolving power
    -421        mz_overlay : int
    -422            extra points left and right of peak definition to include in fitting
    -423        
    -424        Returns
    -425        -------
    -426        mz_domain : ndarray
    -427            x-axis domain for fit
    -428        calc_abundance : ndarray
    -429            calculated abundance profile based on voigt function
    -430        """
    -431        
    -432        
    -433        if self.resolving_power:
    -434
    -435            # full width half maximum distance
    -436            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -437
    -438            # stardart deviation
    -439            sigma = self.fwhm / 3.6013
    -440
    -441            # half width baseline distance
    -442            
    -443            #mz_domain = linspace(self.mz_exp - hw_base_distance,
    -444            #                     self.mz_exp + hw_base_distance, datapoint)
    -445            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -446            
    -447            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -448            
    -449            #TODO derive amplitude
    -450            amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -451
    -452            model = models.VoigtModel()
    -453
    -454            params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma, gamma = sigma)
    -455
    -456            calc_abundance = model.eval(params=params, x=mz_domain)
    -457
    -458            return mz_domain, calc_abundance
    -459        
    -460        else:
    -461            
    -462            raise LookupError(
    -463                'resolving power is not defined, try to use set_max_resolving_power()')
    -464
    -465    def pseudovoigt(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1, fraction =0.5):
    -466        """ [Legacy] pseudovoigt lineshape function
    -467
    -468        Legacy function for pseudovoigt lineshape analysis. 
    -469        Note - Code may not be functional currently.
    -470
    -471        Parameters
    -472        ----------
    -473        oversample_multiplier : int, optional
    -474            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 1.
    -475        delta_rp : float, optional
    -476            delta resolving power to add to resolving power. Defaults to 0.
    -477        mz_overlay : int, optional
    -478            extra points left and right of peak definition to include in fitting. Defaults to 1.
    -479        fraction : float, optional
    -480            fraction of gaussian component in pseudovoigt function. Defaults to 0.5.
    -481
    -482        """
    -483        if self.resolving_power:
    -484
    -485            # full width half maximum distance
    -486            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -487
    -488            # stardart deviation
    -489            sigma = self.fwhm / 2
    -490
    -491            # half width baseline distance
    -492            
    -493            #mz_domain = linspace(self.mz_exp - hw_base_distance,
    -494            #                     self.mz_exp + hw_base_distance, datapoint)
    -495            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -496            
    -497            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -498            model = models.PseudoVoigtModel()
    -499            
    -500            # TODO derive amplitude
    -501            gamma = sigma
    -502            
    -503            amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -504            amplitude = (sqrt(pi/log(2)) * (pi*sigma*self.abundance)) /( (pi*(1-gamma)) + (sqrt(pi*log(2)) * gamma) )
    -505
    -506            params = model.make_params(center=self.mz_exp, sigma = sigma)
    -507
    -508            calc_abundance = model.eval(params=params, x=mz_domain)
    -509
    -510            return mz_domain, calc_abundance
    -511        
    -512        else:
    -513            
    -514            raise LookupError(
    -515                'resolving power is not defined, try to use set_max_resolving_power()')
    -516
    -517
    -518    def lorentz(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1):
    -519        """ [Legacy] Lorentz lineshape analysis function    
    -520        
    -521        Legacy function for lorentz lineshape analysis
    -522
    -523        Parameters
    -524        ----------
    -525        oversample_multiplier : int
    -526            factor to increase x-axis points by for simulation of fitted lineshape function
    -527        delta_rp : float
    -528            delta resolving power to add to resolving power
    -529        mz_overlay : int
    -530            extra points left and right of peak definition to include in fitting
    -531        
    -532        Returns
    -533        -------
    -534        mz_domain : ndarray
    -535            x-axis domain for fit
    -536        calc_abundance : ndarray
    -537            calculated abundance profile based on lorentz function
    -538        
    -539        """
    -540        if self.resolving_power:
    -541
    -542            # full width half maximum distance
    -543            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -544
    -545            # stardart deviation
    -546            sigma = self.fwhm / 2
    -547
    -548            # half width baseline distance
    -549            hw_base_distance = (8 * sigma)
    -550
    -551            #mz_domain = linspace(self.mz_exp - hw_base_distance,
    -552            #                     self.mz_exp + hw_base_distance, datapoint)
    -553            
    -554            
    -555            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -556            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -557            model = models.LorentzianModel()
    -558            
    -559            amplitude = sigma* pi * self.abundance
    -560
    -561            params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -562
    -563            calc_abundance = model.eval(params=params, x=mz_domain)
    -564
    -565            return mz_domain, calc_abundance
    -566        
    -567        else:
    -568            
    -569            raise LookupError(
    -570                'resolving power is not defined, try to use set_max_resolving_power()')
    -571
    -572    def gaussian(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1):
    -573        """ [Legacy] Gaussian lineshape analysis function
    -574        Legacy gaussian lineshape analysis function
    -575        
    -576        Parameters
    -577        ----------
    -578        oversample_multiplier : int
    -579            factor to increase x-axis points by for simulation of fitted lineshape function
    -580        delta_rp : float
    -581            delta resolving power to add to resolving power
    -582        mz_overlay : int
    -583            extra points left and right of peak definition to include in fitting
    -584
    -585        Returns
    -586        -------
    -587        mz_domain : ndarray 
    -588            x-axis domain for fit
    -589        calc_abundance : ndarray
    -590            calculated abundance profile based on gaussian function
    -591        
    -592
    -593        """
    -594
    -595        # check if MSPeak contains the resolving power info
    -596        if self.resolving_power:
    -597            # full width half maximum distance
    -598            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -599
    -600            # stardart deviation
    -601            sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    -602
    -603            # half width baseline distance
    -604            #hw_base_distance = (3.2 * s)
    -605
    -606            #match_loz_factor = 3
    -607
    -608            #n_d = hw_base_distance * match_loz_factor
    -609
    -610            #mz_domain = linspace(
    -611            #    self.mz_exp - n_d, self.mz_exp + n_d, datapoint)
    -612
    -613            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -614            
    -615            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -616            
    -617            #calc_abundance = norm.pdf(mz_domain, self.mz_exp, s)
    -618
    -619            model = models.GaussianModel()
    -620            
    -621            amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -622
    -623            params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -624
    -625            calc_abundance = model.eval(params=params, x=mz_domain)
    -626            
    -627            return mz_domain, calc_abundance 
    -628
    -629        else:
    -630            raise LookupError(
    -631                'resolving power is not defined, try to use set_max_resolving_power()')
    -632
    -633    def get_mz_domain(self, oversample_multiplier, mz_overlay):
    -634        """  [Legacy] function to resample/interpolate datapoints for lineshape analysis
    -635
    -636        This code is used for the legacy line fitting functions and not recommended.
    -637        Legacy function to support expanding mz domain for legacy lineshape functions
    -638
    -639        Parameters
    -640        ----------
    -641        oversample_multiplier : int
    -642            factor to increase x-axis points by for simulation of fitted lineshape function
    -643        mz_overlay : int
    -644            extra points left and right of peak definition to include in fitting
    -645        
    -646        Returns
    -647        -------
    -648        mz_domain : ndarray
    -649            x-axis domain for fit
    -650        
    -651        """
    -652        start_index = self.peak_left_index - mz_overlay  if not self.peak_left_index == 0 else 0
    -653        final_index = self.peak_right_index + mz_overlay  if not self.peak_right_index == len(self._ms_parent.mz_exp_profile) else self.peak_right_index
    -654
    -655        if oversample_multiplier == 1:
    -656
    -657            mz_domain = self._ms_parent.mz_exp_profile[start_index: final_index]
    -658            
    -659        else:
    -660            # we assume a linear correlation for m/z and datapoits 
    -661            # which is only true if the m/z range in narrow (within 1 m/z unit)
    -662            # this is not true for a wide m/z range
    -663                        
    -664            indexes = range(start_index, final_index+1)
    -665            mz = self._ms_parent.mz_exp_profile[indexes]
    -666            pol = poly1d(polyfit(indexes, mz, 1))
    -667            oversampled_indexes = linspace(start_index, final_index, (final_index-start_index) * oversample_multiplier)    
    -668            mz_domain = pol(oversampled_indexes)
    -669
    -670        return mz_domain
    -671    
    -672    @property
    -673    def number_possible_assignments(self,):
    -674        
    -675        return len(self.molecular_formulas)
    -676
    -677    def molecular_formula_lowest_error(self):
    -678       """ Return the molecular formula with the smallest absolute mz error
    -679       
    -680       """
    -681       
    -682       return min(self.molecular_formulas, key=lambda m: abs(m.mz_error))
    -683
    -684    def molecular_formula_highest_prob_score(self):
    -685        """ Return the molecular formula with the highest confidence score score
    -686         
    -687        """
    -688       
    -689        return max(self.molecular_formulas, key=lambda m: abs(m.confidence_score))
    -690
    -691    def molecular_formula_earth_filter(self, lowest_error=True):
    -692        """ Filter molecular formula using the 'Earth' filter
    -693        
    -694        This function applies the Formularity-esque 'Earth' filter to possible molecular formula assignments.
    -695        Earth Filter:
    -696            O > 0 AND N <= 3 AND P <= 2 AND 3P <= O
    -697
    -698        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter. 
    -699        Otherwise, it will return all Earth-filter compliant formulas. 
    -700
    -701        Parameters
    -702        ----------
    -703        lowest_error : bool, optional.
    -704            Return only the lowest error formula which also fits the Earth filter. 
    -705            If False, return all Earth-filter compliant formulas. Default is True.
    -706
    -707        Returns
    -708        -------
    -709        list
    -710            List of molecular formula objects which fit the Earth filter
    -711
    -712        References
    -713        ----------
    -714        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    -715            Anal. Chem. 2017, 89, 23, 12659–12665
    -716            doi: 10.1021/acs.analchem.7b03318
    -717        """
    -718        
    -719        candidates = list(filter(lambda mf: mf.get("O") > 0 and mf.get("N") <=3 and mf.get("P") <= 2 and (3 * mf.get("P")) <= mf.get("O"), self.molecular_formulas))
    -720        if len(candidates) >0:
    -721            if lowest_error:
    -722                return min(candidates, key=lambda m: abs(m.mz_error))
    -723            else:
    -724                return candidates
    -725        else:
    -726            return candidates
    -727
    -728    def molecular_formula_water_filter(self, lowest_error=True):
    -729        """ Filter molecular formula using the 'Water' filter
    -730
    -731        This function applies the Formularity-esque 'Water' filter to possible molecular formula assignments.
    -732        Water Filter:
    -733            O > 0 AND N <= 3 AND S <= 2 AND P <= 2
    -734        
    -735        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Water filter.
    -736        Otherwise, it will return all Water-filter compliant formulas.
    -737
    -738        Parameters
    -739        ----------
    -740        lowest_error : bool, optional
    -741            Return only the lowest error formula which also fits the Water filter.
    -742            If False, return all Water-filter compliant formulas. Defaults to 2
    -743
    -744        Returns 
    -745        -------
    -746        list
    -747            List of molecular formula objects which fit the Water filter
    -748
    -749        References
    -750        ----------
    -751        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    -752            Anal. Chem. 2017, 89, 23, 12659–12665
    -753            doi: 10.1021/acs.analchem.7b03318
    -754        """
    -755       
    -756        candidates = list(filter(lambda mf: mf.get("O") > 0 and mf.get("N") <=3 and mf.get("S") <=2 and  mf.get("P") <= 2, self.molecular_formulas))
    -757        if len(candidates) >0:
    -758            if lowest_error:
    -759                return min(candidates, key=lambda m: abs(m.mz_error))
    -760            else:
    -761                return candidates
    -762        else:
    -763            return candidates
    -764    
    -765    def molecular_formula_air_filter(self, lowest_error=True):
    -766        """ Filter molecular formula using the 'Air' filter
    -767
    -768        This function applies the Formularity-esque 'Air' filter to possible molecular formula assignments.
    -769        Air Filter:
    -770            O > 0 AND N <= 3 AND S <= 1 AND P = 0 AND 3(S+N) <= O
    -771        
    -772        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Air filter.
    -773        Otherwise, it will return all Air-filter compliant formulas.
    -774
    -775        Parameters
    -776        ----------
    -777        lowest_error : bool, optional
    -778            Return only the lowest error formula which also fits the Air filter.
    -779            If False, return all Air-filter compliant formulas. Defaults to True.
    -780
    -781        Returns
    -782        -------
    -783        list
    -784            List of molecular formula objects which fit the Air filter
    -785            
    -786        References
    -787        ----------
    -788        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    -789            Anal. Chem. 2017, 89, 23, 12659–12665
    -790            doi: 10.1021/acs.analchem.7b03318
    -791        """
    -792
    -793       
    -794        candidates = list(filter(lambda mf: mf.get("O") > 0 and mf.get("N") <=2 and mf.get("S") <=1 and  mf.get("P") == 0 and 3* (mf.get("S") + mf.get("N")) <= mf.get("O"), self.molecular_formulas))
    -795        
    -796        if len(candidates) >0:
    -797            if lowest_error:
    -798                return min(candidates, key=lambda m: abs(m.mz_error))
    -799            else:
    -800                return candidates
    -801        else:
    -802            return candidates
    -803
    -804    def cia_score_S_P_error(self):
    -805        """ Compound Identification Algorithm SP Error - Assignment Filter
    -806         
    -807        This function applies the Compound Identification Algorithm (CIA) SP Error filter to possible molecular formula assignments.
    -808
    -809        It takes the molecular formula with the lowest S+P count, and returns the formula with the lowest absolute error from this subset.
    -810        
    -811        Returns
    -812        -------
    -813        MolecularFormula
    -814            A single molecular formula which fits the rules of the CIA SP Error filter
    -815
    -816
    -817        References
    -818        ----------
    -819        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    -820            Anal. Chem. 2006, 78, 13, 4363–4373
    -821            doi: 10.1021/ac0600306
    -822        """
    -823        #case EFormulaScore.HAcap:
    -824
    -825        lowest_S_P_mf = min(self.molecular_formulas, key=lambda mf: mf.get('S') + mf.get('P'))
    -826        lowest_S_P_count = lowest_S_P_mf.get("S") + lowest_S_P_mf.get("P")
    -827        
    -828        list_same_s_p = list(filter(lambda mf: mf.get('S') + mf.get('P') == lowest_S_P_count, self.molecular_formulas))
    -829
    -830        #check if list is not empty
    -831        if list_same_s_p:
    -832        
    -833            return min(list_same_s_p, key=lambda m: abs(m.mz_error))
    -834        
    -835        else:
    -836        
    -837            return lowest_S_P_mf
    -838    
    -839    def cia_score_N_S_P_error(self):
    -840        """ Compound Identification Algorithm NSP Error - Assignment Filter
    -841        
    -842        This function applies the Compound Identification Algorithm (CIA) NSP Error filter to possible molecular formula assignments.
    -843
    -844        It takes the molecular formula with the lowest N+S+P count, and returns the formula with the lowest absolute error from this subset.
    -845
    -846        Returns
    -847        -------
    -848        MolecularFormula
    -849            A single molecular formula which fits the rules of the CIA NSP Error filter
    -850
    -851        References
    -852        ----------
    -853        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    -854            Anal. Chem. 2006, 78, 13, 4363–4373
    -855            doi: 10.1021/ac0600306
    -856
    -857        Raises
    -858        -------
    -859        Exception
    -860            If no molecular formula are associated with mass spectrum peak.
    -861        """
    -862        #case EFormulaScore.HAcap:
    -863        if self.molecular_formulas:
    -864
    -865            lowest_N_S_P_mf = min(self.molecular_formulas, key=lambda mf: mf.get('N') + mf.get('S') + mf.get('P'))
    -866            lowest_N_S_P_count = lowest_N_S_P_mf.get("N") + lowest_N_S_P_mf.get("S") + lowest_N_S_P_mf.get("P")
    -867
    -868            list_same_N_S_P = list(filter(lambda mf: mf.get('N') + mf.get('S') + mf.get('P') == lowest_N_S_P_count, self.molecular_formulas))
    -869
    -870            if list_same_N_S_P:
    -871
    -872                SP_filtered_list =  list(filter(lambda mf: (mf.get("S") <= 3 ) and  (mf.get("P")  <= 1 ), list_same_N_S_P))
    -873                
    -874                if SP_filtered_list:
    -875                    
    -876                    return min(SP_filtered_list, key=lambda m: abs(m.mz_error)) 
    -877                
    -878                else:    
    -879                    
    -880                    return min(list_same_N_S_P, key=lambda m: abs(m.mz_error))            
    -881            
    -882            else:
    -883                
    -884                return lowest_N_S_P_mf 
    -885        else:
    -886            raise Exception("No molecular formula associated with the mass spectrum peak at m/z: %.6f" % self.mz_exp)
    +            
      30class MSPeakCalculation:
    +  31    """Class to perform calculations on MSPeak objects.
    +  32
    +  33    This class provides methods to perform various calculations on MSPeak objects, such as calculating Kendrick Mass Defect (KMD) and Kendrick Mass (KM), calculating peak area, and fitting peak lineshape using different models.
    +  34
    +  35    Parameters
    +  36    ----------
    +  37    None
    +  38
    +  39    Attributes
    +  40    ----------
    +  41    _ms_parent : MSParent
    +  42        The parent MSParent object associated with the MSPeakCalculation object.
    +  43    mz_exp : float
    +  44        The experimental m/z value of the peak.
    +  45    peak_left_index : int
    +  46        The start scan index of the peak.
    +  47    peak_right_index : int
    +  48        The final scan index of the peak.
    +  49    resolving_power : float
    +  50        The resolving power of the peak.
    +  51
    +  52    Methods
    +  53    -------
    +  54    * _calc_kmd(dict_base).
    +  55        Calculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula.
    +  56    * calc_area().
    +  57        Calculate the peak area using numpy's trapezoidal fit.
    +  58    * fit_peak(mz_extend=6, delta_rp=0, model='Gaussian').
    +  59        Perform lineshape analysis on a peak using lmfit module.
    +  60    * voigt_pso(w, r, yoff, width, loc, a).
    +  61        Calculate the Voigt function for particle swarm optimization (PSO) fitting.
    +  62    * objective_pso(x, w, u).
    +  63        Calculate the objective function for PSO fitting.
    +  64    * minimize_pso(lower, upper, w, u).
    +  65        Minimize the objective function using the particle swarm optimization algorithm.
    +  66    * fit_peak_pso(mz_extend=6, upsample_multiplier=5).
    +  67        Perform lineshape analysis on a peak using particle swarm optimization (PSO) fitting.
    +  68    * voigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    +  69        [Legacy] Perform voigt lineshape analysis on a peak.
    +  70    * pseudovoigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1, fraction=0.5).
    +  71        [Legacy] Perform pseudovoigt lineshape analysis on a peak.
    +  72    * lorentz(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    +  73        [Legacy] Perform lorentz lineshape analysis on a peak.
    +  74    * gaussian(oversample_multiplier=1, delta_rp=0, mz_overlay=1).
    +  75        [Legacy] Perform gaussian lineshape analysis on a peak.
    +  76    * get_mz_domain(oversample_multiplier, mz_overlay).
    +  77        [Legacy] Resample/interpolate datapoints for lineshape analysis.
    +  78    * number_possible_assignments().
    +  79        Return the number of possible molecular formula assignments for the peak.
    +  80    * molecular_formula_lowest_error().
    +  81        Return the molecular formula with the smallest absolute mz error.
    +  82    * molecular_formula_highest_prob_score().
    +  83        Return the molecular formula with the highest confidence score.
    +  84    * molecular_formula_earth_filter(lowest_error=True).
    +  85        Filter molecular formula using the 'Earth' filter.
    +  86    * molecular_formula_water_filter(lowest_error=True).
    +  87        Filter molecular formula using the 'Water' filter.
    +  88    * molecular_formula_air_filter(lowest_error=True).
    +  89        Filter molecular formula using the 'Air' filter.
    +  90    * cia_score_S_P_error().
    +  91        Compound Identification Algorithm SP Error - Assignment Filter.
    +  92    * cia_score_N_S_P_error().
    +  93        Compound Identification Algorithm NSP Error - Assignment Filter.
    +  94
    +  95    """
    +  96
    +  97    def _calc_kmd(self, dict_base):
    +  98        """Calculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula
    +  99
    + 100        Parameters
    + 101        ----------
    + 102        dict_base : dict
    + 103            dictionary with the base formula to be used in the calculation
    + 104            Default is CH2, e.g.
    + 105                dict_base = {"C": 1, "H": 2}
    + 106        """
    + 107
    + 108        if self._ms_parent:
    + 109            # msPeak obj does have a ms object parent
    + 110            kendrick_rounding_method = (
    + 111                self._ms_parent.mspeaks_settings.kendrick_rounding_method
    + 112            )  # rounding method can be one of floor, ceil or round
    + 113            # msPeak obj does not have a ms object parent
    + 114        else:
    + 115            kendrick_rounding_method = MSParameters.ms_peak.kendrick_rounding_method
    + 116
    + 117        mass = 0
    + 118        for atom in dict_base.keys():
    + 119            mass += Atoms.atomic_masses.get(atom) * dict_base.get(atom)
    + 120
    + 121        kendrick_mass = (int(mass) / mass) * self.mz_exp
    + 122
    + 123        if kendrick_rounding_method == "ceil":
    + 124            nominal_km = ceil(kendrick_mass)
    + 125
    + 126        elif kendrick_rounding_method == "round":
    + 127            nominal_km = rint(kendrick_mass)
    + 128
    + 129        elif kendrick_rounding_method == "floor":
    + 130            nominal_km = floor(kendrick_mass)
    + 131
    + 132        else:
    + 133            raise Exception(
    + 134                "%s method was not implemented, please refer to corems.ms_peak.calc.MSPeakCalc Class"
    + 135                % kendrick_rounding_method
    + 136            )
    + 137
    + 138        kmd = nominal_km - kendrick_mass
    + 139
    + 140        # kmd = (nominal_km - km) * 1
    + 141        # kmd = round(kmd,0)
    + 142
    + 143        return kmd, kendrick_mass, nominal_km
    + 144
    + 145    def calc_area(self):
    + 146        """Calculate the peak area using numpy's trapezoidal fit
    + 147
    + 148        uses provided mz_domain to accurately integrate areas independent of digital resolution
    + 149
    + 150        Returns
    + 151        -------
    + 152        float
    + 153            peak area
    + 154        """
    + 155        if self.peak_right_index > self.peak_left_index:
    + 156            yy = self._ms_parent.abundance_profile[
    + 157                self.peak_left_index : self.peak_right_index
    + 158            ]
    + 159            xx = self._ms_parent.mz_exp_profile[
    + 160                self.peak_left_index : self.peak_right_index
    + 161            ]
    + 162            # check if the axis is high to low m/z or not. if its MSFromFreq its high mz first, if its from Profile, its low mz first
    + 163            if xx[0] > xx[-1]:
    + 164                xx = flip(xx)
    + 165                yy = flip(yy)
    + 166            return float(trapz(yy, xx))
    + 167
    + 168        else:
    + 169            warnings.warn(
    + 170                "Peak Area Calculation for m/z {} has failed".format(self.mz_exp)
    + 171            )
    + 172            return nan
    + 173
    + 174    def fit_peak(self, mz_extend=6, delta_rp=0, model="Gaussian"):
    + 175        """Lineshape analysis on a peak using lmfit module.
    + 176
    + 177        Model and fit peak lineshape by defined function - using lmfit module
    + 178        Does not oversample/resample/interpolate data points
    + 179        Better to go back to time domain and perform more zero filling - if possible.
    + 180
    + 181        Parameters
    + 182        ----------
    + 183        mz_extend : int
    + 184            extra points left and right of peak definition to include in fitting
    + 185        delta_rp : float
    + 186            delta resolving power to add to resolving power
    + 187        model : str
    + 188            Type of lineshape model to use.
    + 189            Models allowed: Gaussian, Lorentz, Voigt
    + 190
    + 191        Returns
    + 192        -----
    + 193        mz_domain : ndarray
    + 194            x-axis domain for fit
    + 195        fit_peak : lmfit object
    + 196            fit results object from lmfit module
    + 197
    + 198        Notes
    + 199        -----
    + 200        Returns the calculated mz domain, initial defined abundance profile, and the fit peak results object from lmfit module
    + 201        mz_extend here extends the x-axis domain so that we have sufficient points either side of the apex to fit.
    + 202        Takes about 10ms per peak
    + 203        """
    + 204        start_index = (
    + 205            self.peak_left_index - mz_extend if not self.peak_left_index == 0 else 0
    + 206        )
    + 207        final_index = (
    + 208            self.peak_right_index + mz_extend
    + 209            if not self.peak_right_index == len(self._ms_parent.mz_exp_profile)
    + 210            else self.peak_right_index
    + 211        )
    + 212
    + 213        # check if MSPeak contains the resolving power info
    + 214        if self.resolving_power:
    + 215            # full width half maximum distance
    + 216            self.fwhm = self.mz_exp / (self.resolving_power + delta_rp)
    + 217
    + 218            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    + 219            abundance_domain = self._ms_parent.abundance_profile[
    + 220                start_index:final_index
    + 221            ]
    + 222
    + 223            if model == "Gaussian":
    + 224                # stardard deviation
    + 225                sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    + 226                amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 227                model = models.GaussianModel()
    + 228                params = model.make_params(
    + 229                    center=self.mz_exp, amplitude=amplitude, sigma=sigma
    + 230                )
    + 231
    + 232            elif model == "Lorentz":
    + 233                # stardard deviation
    + 234                sigma = self.fwhm / 2
    + 235                amplitude = sigma * pi * self.abundance
    + 236                model = models.LorentzianModel()
    + 237                params = model.make_params(
    + 238                    center=self.mz_exp, amplitude=amplitude, sigma=sigma
    + 239                )
    + 240
    + 241            elif model == "Voigt":
    + 242                # stardard deviation
    + 243                sigma = self.fwhm / 3.6013
    + 244                amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 245                model = models.VoigtModel()
    + 246                params = model.make_params(
    + 247                    center=self.mz_exp, amplitude=amplitude, sigma=sigma, gamma=sigma
    + 248                )
    + 249            else:
    + 250                raise LookupError("model lineshape not known or defined")
    + 251
    + 252            # calc_abundance = model.eval(params=params, x=mz_domain) #Same as initial fit, returned in fit_peak object
    + 253            fit_peak = model.fit(abundance_domain, params=params, x=mz_domain)
    + 254            return mz_domain, fit_peak
    + 255
    + 256        else:
    + 257            raise LookupError(
    + 258                "resolving power is not defined, try to use set_max_resolving_power()"
    + 259            )
    + 260
    + 261    def voigt_pso(self, w, r, yoff, width, loc, a):
    + 262        """Voigt function for particle swarm optimisation (PSO) fitting
    + 263
    + 264        From https://github.com/pnnl/nmrfit/blob/master/nmrfit/equations.py.
    + 265        Calculates a Voigt function over w based on the relevant properties of the distribution.
    + 266
    + 267        Parameters
    + 268        ----------
    + 269        w : ndarray
    + 270            Array over which the Voigt function will be evaluated.
    + 271        r : float
    + 272            Ratio between the Guassian and Lorentzian functions.
    + 273        yoff : float
    + 274            Y-offset of the Voigt function.
    + 275        width : float
    + 276            The width of the Voigt function.
    + 277        loc : float
    + 278            Center of the Voigt function.
    + 279        a : float
    + 280            Area of the Voigt function.
    + 281        Returns
    + 282        -------
    + 283        V : ndarray
    + 284            Array defining the Voigt function over w.
    + 285
    + 286        References
    + 287        ----------
    + 288        1. https://github.com/pnnl/nmrfit
    + 289
    + 290        Notes
    + 291        -----
    + 292        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    + 293
    + 294        """
    + 295        # Lorentzian component
    + 296        L = (2 / (pi * width)) * 1 / (1 + ((w - loc) / (0.5 * width)) ** 2)
    + 297
    + 298        # Gaussian component
    + 299        G = (
    + 300            (2 / width)
    + 301            * sqrt(log(2) / pi)
    + 302            * exp(-(((w - loc) / (width / (2 * sqrt(log(2))))) ** 2))
    + 303        )
    + 304
    + 305        # Voigt body
    + 306        V = (yoff + a) * (r * L + (1 - r) * G)
    + 307
    + 308        return V
    + 309
    + 310    def objective_pso(self, x, w, u):
    + 311        """Objective function for particle swarm optimisation (PSO) fitting
    + 312
    + 313        The objective function used to fit supplied data.  Evaluates sum of squared differences between the fit and the data.
    + 314
    + 315        Parameters
    + 316        ----------
    + 317        x : list of floats
    + 318            Parameter vector.
    + 319        w : ndarray
    + 320            Array of frequency data.
    + 321        u : ndarray
    + 322            Array of data to be fit.
    + 323
    + 324        Returns
    + 325        -------
    + 326        rmse : float
    + 327            Root mean square error between the data and fit.
    + 328
    + 329        References
    + 330        ----------
    + 331        1. https://github.com/pnnl/nmrfit
    + 332
    + 333        """
    + 334        # global parameters
    + 335        r, width, loc, a = x
    + 336        yoff = 0
    + 337
    + 338        # calculate fit for V
    + 339        V_fit = self.voigt_pso(w, r, yoff, width, loc, a)
    + 340
    + 341        # real component RMSE
    + 342        rmse = sqrt(square((u - V_fit)).mean(axis=None))
    + 343
    + 344        # return the total RMSE
    + 345        return rmse
    + 346
    + 347    def minimize_pso(self, lower, upper, w, u):
    + 348        """Minimization function for particle swarm optimisation (PSO) fitting
    + 349
    + 350        Minimizes the objective function using the particle swarm optimization algorithm.
    + 351        Minimization function based on defined parameters
    + 352
    + 353
    + 354        Parameters
    + 355        ----------
    + 356        lower : list of floats
    + 357            Lower bounds for the parameters.
    + 358        upper : list of floats
    + 359            Upper bounds for the parameters.
    + 360        w : ndarray
    + 361            Array of frequency data.
    + 362        u : ndarray
    + 363            Array of data to be fit.
    + 364
    + 365        Notes
    + 366        -----
    + 367        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    + 368        Current parameters take ~2 seconds per peak.
    + 369
    + 370
    + 371        References
    + 372        ----------
    + 373        1. https://github.com/pnnl/nmrfit
    + 374
    + 375        """
    + 376        # TODO - allow support to pass swarmsize, maxiter, omega, phip, phig parameters.
    + 377        # TODO - Refactor PSO fitting into its own class?
    + 378
    + 379        xopt, fopt = pyswarm.pso(
    + 380            self.objective_pso,
    + 381            lower,
    + 382            upper,
    + 383            args=(w, u),
    + 384            swarmsize=1000,
    + 385            maxiter=5000,
    + 386            omega=-0.2134,
    + 387            phip=-0.3344,
    + 388            phig=2.3259,
    + 389        )
    + 390        return xopt, fopt
    + 391
    + 392    def fit_peak_pso(self, mz_extend: int = 6, upsample_multiplier: int = 5):
    + 393        """Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting
    + 394
    + 395        Function to fit a Voigt peakshape using particle swarm optimisation (PSO).
    + 396        Should return better results than lmfit, but much more computationally expensive
    + 397
    + 398        Parameters
    + 399        ----------
    + 400        mz_extend : int, optional
    + 401            extra points left and right of peak definition to include in fitting. Defaults to 6.
    + 402        upsample_multiplier : int, optional
    + 403            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 5.
    + 404
    + 405        Returns
    + 406        -------
    + 407        xopt : array
    + 408            variables describing the voigt function.
    + 409            G/L ratio, width (fwhm), apex (x-axis), area.
    + 410            y-axis offset is fixed at 0
    + 411        fopt : float
    + 412            objective score (rmse)
    + 413        psfit : array
    + 414            recalculated y values based on function and optimised fit
    + 415        psfit_hdp : tuple of arrays
    + 416            0 - linspace x-axis upsampled grid
    + 417            1 - recalculated y values based on function and upsampled x-axis grid
    + 418            Does not change results, but aids in visualisation of the 'true' voigt lineshape
    + 419
    + 420        Notes
    + 421        -----
    + 422        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    + 423        """
    + 424        # TODO - Add ability to pass pso args (i.e. swarm size, maxiter, omega, phig, etc)
    + 425        # TODO: fix xopt. Magnitude mode data through CoreMS/Bruker starts at 0 but is noise centered well above 0.
    + 426        # Thermo data is noise reduced by also noise subtracted, so starts at 0
    + 427        # Absorption mode/phased data will have positive and negative components and may not be baseline corrected
    + 428
    + 429        start_index = (
    + 430            self.peak_left_index - mz_extend if not self.peak_left_index == 0 else 0
    + 431        )
    + 432        final_index = (
    + 433            self.peak_right_index + mz_extend
    + 434            if not self.peak_right_index == len(self._ms_parent.mz_exp_profile)
    + 435            else self.peak_right_index
    + 436        )
    + 437
    + 438        # check if MSPeak contains the resolving power info
    + 439        if self.resolving_power:
    + 440            # full width half maximum distance
    + 441            self.fwhm = self.mz_exp / (self.resolving_power)
    + 442
    + 443            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    + 444            abundance_domain = self._ms_parent.abundance_profile[
    + 445                start_index:final_index
    + 446            ]
    + 447            lower = [0, self.fwhm * 0.8, (self.mz_exp - 0.0005), 0]
    + 448            upper = [
    + 449                1,
    + 450                self.fwhm * 1.2,
    + 451                (self.mz_exp + 0.0005),
    + 452                self.abundance / self.signal_to_noise,
    + 453            ]
    + 454            xopt, fopt = self.minimize_pso(lower, upper, mz_domain, abundance_domain)
    + 455
    + 456            psfit = self.voigt_pso(mz_domain, xopt[0], 0, xopt[1], xopt[2], xopt[3])
    + 457            psfit_hdp_x = linspace(
    + 458                min(mz_domain), max(mz_domain), num=len(mz_domain) * upsample_multiplier
    + 459            )
    + 460            psfit_hdp = self.voigt_pso(
    + 461                psfit_hdp_x, xopt[0], 0, xopt[1], xopt[2], xopt[3]
    + 462            )
    + 463            return xopt, fopt, psfit, (psfit_hdp_x, psfit_hdp)
    + 464        else:
    + 465            raise LookupError(
    + 466                "resolving power is not defined, try to use set_max_resolving_power()"
    + 467            )
    + 468
    + 469    def voigt(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):
    + 470        """[Legacy] Voigt lineshape analysis function
    + 471        Legacy function for voigt lineshape analysis
    + 472
    + 473        Parameters
    + 474        ----------
    + 475        oversample_multiplier : int
    + 476            factor to increase x-axis points by for simulation of fitted lineshape function
    + 477        delta_rp : float
    + 478            delta resolving power to add to resolving power
    + 479        mz_overlay : int
    + 480            extra points left and right of peak definition to include in fitting
    + 481
    + 482        Returns
    + 483        -------
    + 484        mz_domain : ndarray
    + 485            x-axis domain for fit
    + 486        calc_abundance : ndarray
    + 487            calculated abundance profile based on voigt function
    + 488        """
    + 489
    + 490        if self.resolving_power:
    + 491            # full width half maximum distance
    + 492            self.fwhm = self.mz_exp / (
    + 493                self.resolving_power + delta_rp
    + 494            )  # self.resolving_power)
    + 495
    + 496            # stardart deviation
    + 497            sigma = self.fwhm / 3.6013
    + 498
    + 499            # half width baseline distance
    + 500
    + 501            # mz_domain = linspace(self.mz_exp - hw_base_distance,
    + 502            #                     self.mz_exp + hw_base_distance, datapoint)
    + 503            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    + 504
    + 505            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    + 506
    + 507            # TODO derive amplitude
    + 508            amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 509
    + 510            model = models.VoigtModel()
    + 511
    + 512            params = model.make_params(
    + 513                center=self.mz_exp, amplitude=amplitude, sigma=sigma, gamma=sigma
    + 514            )
    + 515
    + 516            calc_abundance = model.eval(params=params, x=mz_domain)
    + 517
    + 518            return mz_domain, calc_abundance
    + 519
    + 520        else:
    + 521            raise LookupError(
    + 522                "resolving power is not defined, try to use set_max_resolving_power()"
    + 523            )
    + 524
    + 525    def pseudovoigt(
    + 526        self, oversample_multiplier=1, delta_rp=0, mz_overlay=1, fraction=0.5
    + 527    ):
    + 528        """[Legacy] pseudovoigt lineshape function
    + 529
    + 530        Legacy function for pseudovoigt lineshape analysis.
    + 531        Note - Code may not be functional currently.
    + 532
    + 533        Parameters
    + 534        ----------
    + 535        oversample_multiplier : int, optional
    + 536            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 1.
    + 537        delta_rp : float, optional
    + 538            delta resolving power to add to resolving power. Defaults to 0.
    + 539        mz_overlay : int, optional
    + 540            extra points left and right of peak definition to include in fitting. Defaults to 1.
    + 541        fraction : float, optional
    + 542            fraction of gaussian component in pseudovoigt function. Defaults to 0.5.
    + 543
    + 544        """
    + 545        if self.resolving_power:
    + 546            # full width half maximum distance
    + 547            self.fwhm = self.mz_exp / (
    + 548                self.resolving_power + delta_rp
    + 549            )  # self.resolving_power)
    + 550
    + 551            # stardart deviation
    + 552            sigma = self.fwhm / 2
    + 553
    + 554            # half width baseline distance
    + 555
    + 556            # mz_domain = linspace(self.mz_exp - hw_base_distance,
    + 557            #                     self.mz_exp + hw_base_distance, datapoint)
    + 558            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    + 559
    + 560            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    + 561            model = models.PseudoVoigtModel()
    + 562
    + 563            # TODO derive amplitude
    + 564            gamma = sigma
    + 565
    + 566            amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 567            amplitude = (sqrt(pi / log(2)) * (pi * sigma * self.abundance)) / (
    + 568                (pi * (1 - gamma)) + (sqrt(pi * log(2)) * gamma)
    + 569            )
    + 570
    + 571            params = model.make_params(center=self.mz_exp, sigma=sigma)
    + 572
    + 573            calc_abundance = model.eval(params=params, x=mz_domain)
    + 574
    + 575            return mz_domain, calc_abundance
    + 576
    + 577        else:
    + 578            raise LookupError(
    + 579                "resolving power is not defined, try to use set_max_resolving_power()"
    + 580            )
    + 581
    + 582    def lorentz(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):
    + 583        """[Legacy] Lorentz lineshape analysis function
    + 584
    + 585        Legacy function for lorentz lineshape analysis
    + 586
    + 587        Parameters
    + 588        ----------
    + 589        oversample_multiplier : int
    + 590            factor to increase x-axis points by for simulation of fitted lineshape function
    + 591        delta_rp : float
    + 592            delta resolving power to add to resolving power
    + 593        mz_overlay : int
    + 594            extra points left and right of peak definition to include in fitting
    + 595
    + 596        Returns
    + 597        -------
    + 598        mz_domain : ndarray
    + 599            x-axis domain for fit
    + 600        calc_abundance : ndarray
    + 601            calculated abundance profile based on lorentz function
    + 602
    + 603        """
    + 604        if self.resolving_power:
    + 605            # full width half maximum distance
    + 606            self.fwhm = self.mz_exp / (
    + 607                self.resolving_power + delta_rp
    + 608            )  # self.resolving_power)
    + 609
    + 610            # stardart deviation
    + 611            sigma = self.fwhm / 2
    + 612
    + 613            # half width baseline distance
    + 614            hw_base_distance = 8 * sigma
    + 615
    + 616            # mz_domain = linspace(self.mz_exp - hw_base_distance,
    + 617            #                     self.mz_exp + hw_base_distance, datapoint)
    + 618
    + 619            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    + 620            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    + 621            model = models.LorentzianModel()
    + 622
    + 623            amplitude = sigma * pi * self.abundance
    + 624
    + 625            params = model.make_params(
    + 626                center=self.mz_exp, amplitude=amplitude, sigma=sigma
    + 627            )
    + 628
    + 629            calc_abundance = model.eval(params=params, x=mz_domain)
    + 630
    + 631            return mz_domain, calc_abundance
    + 632
    + 633        else:
    + 634            raise LookupError(
    + 635                "resolving power is not defined, try to use set_max_resolving_power()"
    + 636            )
    + 637
    + 638    def gaussian(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):
    + 639        """[Legacy] Gaussian lineshape analysis function
    + 640        Legacy gaussian lineshape analysis function
    + 641
    + 642        Parameters
    + 643        ----------
    + 644        oversample_multiplier : int
    + 645            factor to increase x-axis points by for simulation of fitted lineshape function
    + 646        delta_rp : float
    + 647            delta resolving power to add to resolving power
    + 648        mz_overlay : int
    + 649            extra points left and right of peak definition to include in fitting
    + 650
    + 651        Returns
    + 652        -------
    + 653        mz_domain : ndarray
    + 654            x-axis domain for fit
    + 655        calc_abundance : ndarray
    + 656            calculated abundance profile based on gaussian function
    + 657
    + 658
    + 659        """
    + 660
    + 661        # check if MSPeak contains the resolving power info
    + 662        if self.resolving_power:
    + 663            # full width half maximum distance
    + 664            self.fwhm = self.mz_exp / (
    + 665                self.resolving_power + delta_rp
    + 666            )  # self.resolving_power)
    + 667
    + 668            # stardart deviation
    + 669            sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    + 670
    + 671            # half width baseline distance
    + 672            # hw_base_distance = (3.2 * s)
    + 673
    + 674            # match_loz_factor = 3
    + 675
    + 676            # n_d = hw_base_distance * match_loz_factor
    + 677
    + 678            # mz_domain = linspace(
    + 679            #    self.mz_exp - n_d, self.mz_exp + n_d, datapoint)
    + 680
    + 681            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    + 682
    + 683            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    + 684
    + 685            # calc_abundance = norm.pdf(mz_domain, self.mz_exp, s)
    + 686
    + 687            model = models.GaussianModel()
    + 688
    + 689            amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    + 690
    + 691            params = model.make_params(
    + 692                center=self.mz_exp, amplitude=amplitude, sigma=sigma
    + 693            )
    + 694
    + 695            calc_abundance = model.eval(params=params, x=mz_domain)
    + 696
    + 697            return mz_domain, calc_abundance
    + 698
    + 699        else:
    + 700            raise LookupError(
    + 701                "resolving power is not defined, try to use set_max_resolving_power()"
    + 702            )
    + 703
    + 704    def get_mz_domain(self, oversample_multiplier, mz_overlay):
    + 705        """[Legacy] function to resample/interpolate datapoints for lineshape analysis
    + 706
    + 707        This code is used for the legacy line fitting functions and not recommended.
    + 708        Legacy function to support expanding mz domain for legacy lineshape functions
    + 709
    + 710        Parameters
    + 711        ----------
    + 712        oversample_multiplier : int
    + 713            factor to increase x-axis points by for simulation of fitted lineshape function
    + 714        mz_overlay : int
    + 715            extra points left and right of peak definition to include in fitting
    + 716
    + 717        Returns
    + 718        -------
    + 719        mz_domain : ndarray
    + 720            x-axis domain for fit
    + 721
    + 722        """
    + 723        start_index = (
    + 724            self.peak_left_index - mz_overlay if not self.peak_left_index == 0 else 0
    + 725        )
    + 726        final_index = (
    + 727            self.peak_right_index + mz_overlay
    + 728            if not self.peak_right_index == len(self._ms_parent.mz_exp_profile)
    + 729            else self.peak_right_index
    + 730        )
    + 731
    + 732        if oversample_multiplier == 1:
    + 733            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    + 734
    + 735        else:
    + 736            # we assume a linear correlation for m/z and datapoits
    + 737            # which is only true if the m/z range in narrow (within 1 m/z unit)
    + 738            # this is not true for a wide m/z range
    + 739
    + 740            indexes = range(start_index, final_index + 1)
    + 741            mz = self._ms_parent.mz_exp_profile[indexes]
    + 742            pol = poly1d(polyfit(indexes, mz, 1))
    + 743            oversampled_indexes = linspace(
    + 744                start_index,
    + 745                final_index,
    + 746                (final_index - start_index) * oversample_multiplier,
    + 747            )
    + 748            mz_domain = pol(oversampled_indexes)
    + 749
    + 750        return mz_domain
    + 751
    + 752    @property
    + 753    def number_possible_assignments(
    + 754        self,
    + 755    ):
    + 756        return len(self.molecular_formulas)
    + 757
    + 758    def molecular_formula_lowest_error(self):
    + 759        """Return the molecular formula with the smallest absolute mz error"""
    + 760
    + 761        return min(self.molecular_formulas, key=lambda m: abs(m.mz_error))
    + 762
    + 763    def molecular_formula_highest_prob_score(self):
    + 764        """Return the molecular formula with the highest confidence score score"""
    + 765
    + 766        return max(self.molecular_formulas, key=lambda m: abs(m.confidence_score))
    + 767
    + 768    def molecular_formula_earth_filter(self, lowest_error=True):
    + 769        """Filter molecular formula using the 'Earth' filter
    + 770
    + 771        This function applies the Formularity-esque 'Earth' filter to possible molecular formula assignments.
    + 772        Earth Filter:
    + 773            O > 0 AND N <= 3 AND P <= 2 AND 3P <= O
    + 774
    + 775        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter.
    + 776        Otherwise, it will return all Earth-filter compliant formulas.
    + 777
    + 778        Parameters
    + 779        ----------
    + 780        lowest_error : bool, optional.
    + 781            Return only the lowest error formula which also fits the Earth filter.
    + 782            If False, return all Earth-filter compliant formulas. Default is True.
    + 783
    + 784        Returns
    + 785        -------
    + 786        list
    + 787            List of molecular formula objects which fit the Earth filter
    + 788
    + 789        References
    + 790        ----------
    + 791        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    + 792            Anal. Chem. 2017, 89, 23, 12659–12665
    + 793            doi: 10.1021/acs.analchem.7b03318
    + 794        """
    + 795
    + 796        candidates = list(
    + 797            filter(
    + 798                lambda mf: mf.get("O") > 0
    + 799                and mf.get("N") <= 3
    + 800                and mf.get("P") <= 2
    + 801                and (3 * mf.get("P")) <= mf.get("O"),
    + 802                self.molecular_formulas,
    + 803            )
    + 804        )
    + 805        if len(candidates) > 0:
    + 806            if lowest_error:
    + 807                return min(candidates, key=lambda m: abs(m.mz_error))
    + 808            else:
    + 809                return candidates
    + 810        else:
    + 811            return candidates
    + 812
    + 813    def molecular_formula_water_filter(self, lowest_error=True):
    + 814        """Filter molecular formula using the 'Water' filter
    + 815
    + 816        This function applies the Formularity-esque 'Water' filter to possible molecular formula assignments.
    + 817        Water Filter:
    + 818            O > 0 AND N <= 3 AND S <= 2 AND P <= 2
    + 819
    + 820        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Water filter.
    + 821        Otherwise, it will return all Water-filter compliant formulas.
    + 822
    + 823        Parameters
    + 824        ----------
    + 825        lowest_error : bool, optional
    + 826            Return only the lowest error formula which also fits the Water filter.
    + 827            If False, return all Water-filter compliant formulas. Defaults to 2
    + 828
    + 829        Returns
    + 830        -------
    + 831        list
    + 832            List of molecular formula objects which fit the Water filter
    + 833
    + 834        References
    + 835        ----------
    + 836        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    + 837            Anal. Chem. 2017, 89, 23, 12659–12665
    + 838            doi: 10.1021/acs.analchem.7b03318
    + 839        """
    + 840
    + 841        candidates = list(
    + 842            filter(
    + 843                lambda mf: mf.get("O") > 0
    + 844                and mf.get("N") <= 3
    + 845                and mf.get("S") <= 2
    + 846                and mf.get("P") <= 2,
    + 847                self.molecular_formulas,
    + 848            )
    + 849        )
    + 850        if len(candidates) > 0:
    + 851            if lowest_error:
    + 852                return min(candidates, key=lambda m: abs(m.mz_error))
    + 853            else:
    + 854                return candidates
    + 855        else:
    + 856            return candidates
    + 857
    + 858    def molecular_formula_air_filter(self, lowest_error=True):
    + 859        """Filter molecular formula using the 'Air' filter
    + 860
    + 861        This function applies the Formularity-esque 'Air' filter to possible molecular formula assignments.
    + 862        Air Filter:
    + 863            O > 0 AND N <= 3 AND S <= 1 AND P = 0 AND 3(S+N) <= O
    + 864
    + 865        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Air filter.
    + 866        Otherwise, it will return all Air-filter compliant formulas.
    + 867
    + 868        Parameters
    + 869        ----------
    + 870        lowest_error : bool, optional
    + 871            Return only the lowest error formula which also fits the Air filter.
    + 872            If False, return all Air-filter compliant formulas. Defaults to True.
    + 873
    + 874        Returns
    + 875        -------
    + 876        list
    + 877            List of molecular formula objects which fit the Air filter
    + 878
    + 879        References
    + 880        ----------
    + 881        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    + 882            Anal. Chem. 2017, 89, 23, 12659–12665
    + 883            doi: 10.1021/acs.analchem.7b03318
    + 884        """
    + 885
    + 886        candidates = list(
    + 887            filter(
    + 888                lambda mf: mf.get("O") > 0
    + 889                and mf.get("N") <= 2
    + 890                and mf.get("S") <= 1
    + 891                and mf.get("P") == 0
    + 892                and 3 * (mf.get("S") + mf.get("N")) <= mf.get("O"),
    + 893                self.molecular_formulas,
    + 894            )
    + 895        )
    + 896
    + 897        if len(candidates) > 0:
    + 898            if lowest_error:
    + 899                return min(candidates, key=lambda m: abs(m.mz_error))
    + 900            else:
    + 901                return candidates
    + 902        else:
    + 903            return candidates
    + 904
    + 905    def cia_score_S_P_error(self):
    + 906        """Compound Identification Algorithm SP Error - Assignment Filter
    + 907
    + 908        This function applies the Compound Identification Algorithm (CIA) SP Error filter to possible molecular formula assignments.
    + 909
    + 910        It takes the molecular formula with the lowest S+P count, and returns the formula with the lowest absolute error from this subset.
    + 911
    + 912        Returns
    + 913        -------
    + 914        MolecularFormula
    + 915            A single molecular formula which fits the rules of the CIA SP Error filter
    + 916
    + 917
    + 918        References
    + 919        ----------
    + 920        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    + 921            Anal. Chem. 2006, 78, 13, 4363–4373
    + 922            doi: 10.1021/ac0600306
    + 923        """
    + 924        # case EFormulaScore.HAcap:
    + 925
    + 926        lowest_S_P_mf = min(
    + 927            self.molecular_formulas, key=lambda mf: mf.get("S") + mf.get("P")
    + 928        )
    + 929        lowest_S_P_count = lowest_S_P_mf.get("S") + lowest_S_P_mf.get("P")
    + 930
    + 931        list_same_s_p = list(
    + 932            filter(
    + 933                lambda mf: mf.get("S") + mf.get("P") == lowest_S_P_count,
    + 934                self.molecular_formulas,
    + 935            )
    + 936        )
    + 937
    + 938        # check if list is not empty
    + 939        if list_same_s_p:
    + 940            return min(list_same_s_p, key=lambda m: abs(m.mz_error))
    + 941
    + 942        else:
    + 943            return lowest_S_P_mf
    + 944
    + 945    def cia_score_N_S_P_error(self):
    + 946        """Compound Identification Algorithm NSP Error - Assignment Filter
    + 947
    + 948        This function applies the Compound Identification Algorithm (CIA) NSP Error filter to possible molecular formula assignments.
    + 949
    + 950        It takes the molecular formula with the lowest N+S+P count, and returns the formula with the lowest absolute error from this subset.
    + 951
    + 952        Returns
    + 953        -------
    + 954        MolecularFormula
    + 955            A single molecular formula which fits the rules of the CIA NSP Error filter
    + 956
    + 957        References
    + 958        ----------
    + 959        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    + 960            Anal. Chem. 2006, 78, 13, 4363–4373
    + 961            doi: 10.1021/ac0600306
    + 962
    + 963        Raises
    + 964        -------
    + 965        Exception
    + 966            If no molecular formula are associated with mass spectrum peak.
    + 967        """
    + 968        # case EFormulaScore.HAcap:
    + 969        if self.molecular_formulas:
    + 970            lowest_N_S_P_mf = min(
    + 971                self.molecular_formulas,
    + 972                key=lambda mf: mf.get("N") + mf.get("S") + mf.get("P"),
    + 973            )
    + 974            lowest_N_S_P_count = (
    + 975                lowest_N_S_P_mf.get("N")
    + 976                + lowest_N_S_P_mf.get("S")
    + 977                + lowest_N_S_P_mf.get("P")
    + 978            )
    + 979
    + 980            list_same_N_S_P = list(
    + 981                filter(
    + 982                    lambda mf: mf.get("N") + mf.get("S") + mf.get("P")
    + 983                    == lowest_N_S_P_count,
    + 984                    self.molecular_formulas,
    + 985                )
    + 986            )
    + 987
    + 988            if list_same_N_S_P:
    + 989                SP_filtered_list = list(
    + 990                    filter(
    + 991                        lambda mf: (mf.get("S") <= 3) and (mf.get("P") <= 1),
    + 992                        list_same_N_S_P,
    + 993                    )
    + 994                )
    + 995
    + 996                if SP_filtered_list:
    + 997                    return min(SP_filtered_list, key=lambda m: abs(m.mz_error))
    + 998
    + 999                else:
    +1000                    return min(list_same_N_S_P, key=lambda m: abs(m.mz_error))
    +1001
    +1002            else:
    +1003                return lowest_N_S_P_mf
    +1004        else:
    +1005            raise Exception(
    +1006                "No molecular formula associated with the mass spectrum peak at m/z: %.6f"
    +1007                % self.mz_exp
    +1008            )
     
    @@ -1971,30 +2201,34 @@
    Methods
    -
    130    def calc_area(self):
    -131        """ Calculate the peak area using numpy's trapezoidal fit
    -132
    -133        uses provided mz_domain to accurately integrate areas independent of digital resolution
    -134
    -135        Returns
    -136        -------
    -137        float
    -138            peak area
    -139        """
    -140        if self.peak_right_index > self.peak_left_index:
    -141
    -142            yy = self._ms_parent.abundance_profile[self.peak_left_index:self.peak_right_index]
    -143            xx = self._ms_parent.mz_exp_profile[self.peak_left_index:self.peak_right_index]
    -144            # check if the axis is high to low m/z or not. if its MSFromFreq its high mz first, if its from Profile, its low mz first
    -145            if xx[0] > xx[-1]:
    -146                xx = flip(xx)    
    -147                yy = flip(yy)   
    -148            return float(trapz(yy, xx))
    +            
    145    def calc_area(self):
    +146        """Calculate the peak area using numpy's trapezoidal fit
    +147
    +148        uses provided mz_domain to accurately integrate areas independent of digital resolution
     149
    -150        else:
    -151
    -152            warnings.warn("Peak Area Calculation for m/z {} has failed".format(self.mz_exp))
    -153            return nan
    +150        Returns
    +151        -------
    +152        float
    +153            peak area
    +154        """
    +155        if self.peak_right_index > self.peak_left_index:
    +156            yy = self._ms_parent.abundance_profile[
    +157                self.peak_left_index : self.peak_right_index
    +158            ]
    +159            xx = self._ms_parent.mz_exp_profile[
    +160                self.peak_left_index : self.peak_right_index
    +161            ]
    +162            # check if the axis is high to low m/z or not. if its MSFromFreq its high mz first, if its from Profile, its low mz first
    +163            if xx[0] > xx[-1]:
    +164                xx = flip(xx)
    +165                yy = flip(yy)
    +166            return float(trapz(yy, xx))
    +167
    +168        else:
    +169            warnings.warn(
    +170                "Peak Area Calculation for m/z {} has failed".format(self.mz_exp)
    +171            )
    +172            return nan
     
    @@ -2022,84 +2256,99 @@
    Returns
    -
    155    def fit_peak(self,mz_extend=6, delta_rp = 0, model='Gaussian'):
    -156        """ Lineshape analysis on a peak using lmfit module. 
    -157
    -158        Model and fit peak lineshape by defined function - using lmfit module
    -159        Does not oversample/resample/interpolate data points 
    -160        Better to go back to time domain and perform more zero filling - if possible.
    -161
    -162        Parameters
    -163        ----------
    -164        mz_extend : int
    -165            extra points left and right of peak definition to include in fitting
    -166        delta_rp : float
    -167            delta resolving power to add to resolving power
    -168        model : str
    -169            Type of lineshape model to use.
    -170            Models allowed: Gaussian, Lorentz, Voigt
    -171
    -172        Returns
    -173        -----
    -174        mz_domain : ndarray
    -175            x-axis domain for fit
    -176        fit_peak : lmfit object
    -177            fit results object from lmfit module
    -178        
    -179        Notes
    -180        -----
    -181        Returns the calculated mz domain, initial defined abundance profile, and the fit peak results object from lmfit module
    -182        mz_extend here extends the x-axis domain so that we have sufficient points either side of the apex to fit.
    -183        Takes about 10ms per peak
    -184        """
    -185        start_index = self.peak_left_index - mz_extend  if not self.peak_left_index == 0 else 0
    -186        final_index = self.peak_right_index + mz_extend  if not self.peak_right_index == len(self._ms_parent.mz_exp_profile) else self.peak_right_index
    -187
    -188        # check if MSPeak contains the resolving power info
    -189        if self.resolving_power:
    -190            # full width half maximum distance
    -191            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))
    -192
    -193            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    -194            abundance_domain = self._ms_parent.abundance_profile[start_index:final_index]
    -195
    -196            if model=='Gaussian':
    -197                # stardard deviation
    -198                sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    -199                amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -200                model = models.GaussianModel()
    -201                params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -202
    -203            elif model=='Lorentz':
    -204                # stardard deviation
    -205                sigma = self.fwhm / 2
    -206                amplitude = sigma* pi * self.abundance
    -207                model = models.LorentzianModel()
    -208                params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -209
    -210            elif model=='Voigt':
    -211                # stardard deviation
    -212                sigma = self.fwhm / 3.6013
    -213                amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -214                model = models.VoigtModel()
    -215                params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma, gamma = sigma)
    -216            else:
    -217                raise LookupError('model lineshape not known or defined')
    -218
    -219            #calc_abundance = model.eval(params=params, x=mz_domain) #Same as initial fit, returned in fit_peak object
    -220            fit_peak = model.fit(abundance_domain,params=params, x=mz_domain)
    -221            return mz_domain, fit_peak
    +            
    174    def fit_peak(self, mz_extend=6, delta_rp=0, model="Gaussian"):
    +175        """Lineshape analysis on a peak using lmfit module.
    +176
    +177        Model and fit peak lineshape by defined function - using lmfit module
    +178        Does not oversample/resample/interpolate data points
    +179        Better to go back to time domain and perform more zero filling - if possible.
    +180
    +181        Parameters
    +182        ----------
    +183        mz_extend : int
    +184            extra points left and right of peak definition to include in fitting
    +185        delta_rp : float
    +186            delta resolving power to add to resolving power
    +187        model : str
    +188            Type of lineshape model to use.
    +189            Models allowed: Gaussian, Lorentz, Voigt
    +190
    +191        Returns
    +192        -----
    +193        mz_domain : ndarray
    +194            x-axis domain for fit
    +195        fit_peak : lmfit object
    +196            fit results object from lmfit module
    +197
    +198        Notes
    +199        -----
    +200        Returns the calculated mz domain, initial defined abundance profile, and the fit peak results object from lmfit module
    +201        mz_extend here extends the x-axis domain so that we have sufficient points either side of the apex to fit.
    +202        Takes about 10ms per peak
    +203        """
    +204        start_index = (
    +205            self.peak_left_index - mz_extend if not self.peak_left_index == 0 else 0
    +206        )
    +207        final_index = (
    +208            self.peak_right_index + mz_extend
    +209            if not self.peak_right_index == len(self._ms_parent.mz_exp_profile)
    +210            else self.peak_right_index
    +211        )
    +212
    +213        # check if MSPeak contains the resolving power info
    +214        if self.resolving_power:
    +215            # full width half maximum distance
    +216            self.fwhm = self.mz_exp / (self.resolving_power + delta_rp)
    +217
    +218            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    +219            abundance_domain = self._ms_parent.abundance_profile[
    +220                start_index:final_index
    +221            ]
     222
    -223        else:
    -224            raise LookupError(
    -225                'resolving power is not defined, try to use set_max_resolving_power()')
    +223            if model == "Gaussian":
    +224                # stardard deviation
    +225                sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    +226                amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    +227                model = models.GaussianModel()
    +228                params = model.make_params(
    +229                    center=self.mz_exp, amplitude=amplitude, sigma=sigma
    +230                )
    +231
    +232            elif model == "Lorentz":
    +233                # stardard deviation
    +234                sigma = self.fwhm / 2
    +235                amplitude = sigma * pi * self.abundance
    +236                model = models.LorentzianModel()
    +237                params = model.make_params(
    +238                    center=self.mz_exp, amplitude=amplitude, sigma=sigma
    +239                )
    +240
    +241            elif model == "Voigt":
    +242                # stardard deviation
    +243                sigma = self.fwhm / 3.6013
    +244                amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    +245                model = models.VoigtModel()
    +246                params = model.make_params(
    +247                    center=self.mz_exp, amplitude=amplitude, sigma=sigma, gamma=sigma
    +248                )
    +249            else:
    +250                raise LookupError("model lineshape not known or defined")
    +251
    +252            # calc_abundance = model.eval(params=params, x=mz_domain) #Same as initial fit, returned in fit_peak object
    +253            fit_peak = model.fit(abundance_domain, params=params, x=mz_domain)
    +254            return mz_domain, fit_peak
    +255
    +256        else:
    +257            raise LookupError(
    +258                "resolving power is not defined, try to use set_max_resolving_power()"
    +259            )
     
    -

    Lineshape analysis on a peak using lmfit module.

    +

    Lineshape analysis on a peak using lmfit module.

    Model and fit peak lineshape by defined function - using lmfit module -Does not oversample/resample/interpolate data points +Does not oversample/resample/interpolate data points Better to go back to time domain and perform more zero filling - if possible.

    Parameters
    @@ -2143,50 +2392,54 @@
    Notes
    -
    228    def voigt_pso(self,w, r, yoff, width, loc, a):
    -229        """ Voigt function for particle swarm optimisation (PSO) fitting
    -230
    -231        From https://github.com/pnnl/nmrfit/blob/master/nmrfit/equations.py.
    -232        Calculates a Voigt function over w based on the relevant properties of the distribution.
    -233
    -234        Parameters
    -235        ----------
    -236        w : ndarray
    -237            Array over which the Voigt function will be evaluated.
    -238        r : float
    -239            Ratio between the Guassian and Lorentzian functions.
    -240        yoff : float
    -241            Y-offset of the Voigt function.
    -242        width : float
    -243            The width of the Voigt function.
    -244        loc : float
    -245            Center of the Voigt function.
    -246        a : float
    -247            Area of the Voigt function.
    -248        Returns
    -249        -------
    -250        V : ndarray
    -251            Array defining the Voigt function over w.
    -252
    -253        References
    -254        ----------
    -255        1. https://github.com/pnnl/nmrfit 
    -256
    -257        Notes
    -258        -----
    -259        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    -260
    -261        """
    -262        # Lorentzian component
    -263        L = (2 / (pi * width)) * 1 / (1 + ((w - loc) / (0.5 * width))**2)
    -264
    -265        # Gaussian component
    -266        G = (2 / width) * sqrt(log(2) / pi) * exp(-((w - loc) / (width / (2 * sqrt(log(2)))))**2)
    -267
    -268        # Voigt body
    -269        V = (yoff + a) * (r * L + (1 - r) * G)
    -270
    -271        return V
    +            
    261    def voigt_pso(self, w, r, yoff, width, loc, a):
    +262        """Voigt function for particle swarm optimisation (PSO) fitting
    +263
    +264        From https://github.com/pnnl/nmrfit/blob/master/nmrfit/equations.py.
    +265        Calculates a Voigt function over w based on the relevant properties of the distribution.
    +266
    +267        Parameters
    +268        ----------
    +269        w : ndarray
    +270            Array over which the Voigt function will be evaluated.
    +271        r : float
    +272            Ratio between the Guassian and Lorentzian functions.
    +273        yoff : float
    +274            Y-offset of the Voigt function.
    +275        width : float
    +276            The width of the Voigt function.
    +277        loc : float
    +278            Center of the Voigt function.
    +279        a : float
    +280            Area of the Voigt function.
    +281        Returns
    +282        -------
    +283        V : ndarray
    +284            Array defining the Voigt function over w.
    +285
    +286        References
    +287        ----------
    +288        1. https://github.com/pnnl/nmrfit
    +289
    +290        Notes
    +291        -----
    +292        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    +293
    +294        """
    +295        # Lorentzian component
    +296        L = (2 / (pi * width)) * 1 / (1 + ((w - loc) / (0.5 * width)) ** 2)
    +297
    +298        # Gaussian component
    +299        G = (
    +300            (2 / width)
    +301            * sqrt(log(2) / pi)
    +302            * exp(-(((w - loc) / (width / (2 * sqrt(log(2))))) ** 2))
    +303        )
    +304
    +305        # Voigt body
    +306        V = (yoff + a) * (r * L + (1 - r) * G)
    +307
    +308        return V
     
    @@ -2222,7 +2475,7 @@
    Returns
    References
      -
    1. https://github.com/pnnl/nmrfit
    2. +
    3. https://github.com/pnnl/nmrfit
    Notes
    @@ -2243,42 +2496,42 @@
    Notes
    -
    274    def objective_pso(self, x, w, u):
    -275        """ Objective function for particle swarm optimisation (PSO) fitting
    -276
    -277        The objective function used to fit supplied data.  Evaluates sum of squared differences between the fit and the data.
    -278
    -279        Parameters
    -280        ----------
    -281        x : list of floats
    -282            Parameter vector.
    -283        w : ndarray
    -284            Array of frequency data.
    -285        u : ndarray
    -286            Array of data to be fit.
    -287
    -288        Returns
    -289        -------
    -290        rmse : float
    -291            Root mean square error between the data and fit.
    -292
    -293        References
    -294        ----------
    -295        1. https://github.com/pnnl/nmrfit 
    -296
    -297        """
    -298        # global parameters
    -299        r, width, loc, a = x
    -300        yoff = 0
    -301
    -302        # calculate fit for V
    -303        V_fit = self.voigt_pso(w, r, yoff, width, loc, a)
    -304
    -305        # real component RMSE
    -306        rmse = sqrt(square((u - V_fit)).mean(axis=None))
    -307
    -308        # return the total RMSE
    -309        return rmse
    +            
    310    def objective_pso(self, x, w, u):
    +311        """Objective function for particle swarm optimisation (PSO) fitting
    +312
    +313        The objective function used to fit supplied data.  Evaluates sum of squared differences between the fit and the data.
    +314
    +315        Parameters
    +316        ----------
    +317        x : list of floats
    +318            Parameter vector.
    +319        w : ndarray
    +320            Array of frequency data.
    +321        u : ndarray
    +322            Array of data to be fit.
    +323
    +324        Returns
    +325        -------
    +326        rmse : float
    +327            Root mean square error between the data and fit.
    +328
    +329        References
    +330        ----------
    +331        1. https://github.com/pnnl/nmrfit
    +332
    +333        """
    +334        # global parameters
    +335        r, width, loc, a = x
    +336        yoff = 0
    +337
    +338        # calculate fit for V
    +339        V_fit = self.voigt_pso(w, r, yoff, width, loc, a)
    +340
    +341        # real component RMSE
    +342        rmse = sqrt(square((u - V_fit)).mean(axis=None))
    +343
    +344        # return the total RMSE
    +345        return rmse
     
    @@ -2324,52 +2577,57 @@
    References
    -
    311    def minimize_pso(self,lower, upper, w, u):
    -312        """ Minimization function for particle swarm optimisation (PSO) fitting
    -313
    -314        Minimizes the objective function using the particle swarm optimization algorithm.
    -315        Minimization function based on defined parameters   
    -316
    -317
    -318        Parameters
    -319        ----------
    -320        lower : list of floats
    -321            Lower bounds for the parameters.
    -322        upper : list of floats
    -323            Upper bounds for the parameters.
    -324        w : ndarray
    -325            Array of frequency data.
    -326        u : ndarray
    -327            Array of data to be fit.
    -328
    -329        Notes
    -330        -----
    -331        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    -332        Current parameters take ~2 seconds per peak.
    -333
    -334
    -335        References
    -336        ----------
    -337        1. https://github.com/pnnl/nmrfit 
    -338
    -339        """
    -340        #TODO - allow support to pass swarmsize, maxiter, omega, phip, phig parameters.
    -341        #TODO - Refactor PSO fitting into its own class?
    -342        
    -343        xopt, fopt = pyswarm.pso(self.objective_pso, lower, upper, args=(w, u),
    -344                                    swarmsize=1000,
    -345                                    maxiter=5000,
    -346                                    omega=-0.2134,
    -347                                    phip=-0.3344,
    -348                                    phig=2.3259)
    -349        return xopt, fopt
    +            
    347    def minimize_pso(self, lower, upper, w, u):
    +348        """Minimization function for particle swarm optimisation (PSO) fitting
    +349
    +350        Minimizes the objective function using the particle swarm optimization algorithm.
    +351        Minimization function based on defined parameters
    +352
    +353
    +354        Parameters
    +355        ----------
    +356        lower : list of floats
    +357            Lower bounds for the parameters.
    +358        upper : list of floats
    +359            Upper bounds for the parameters.
    +360        w : ndarray
    +361            Array of frequency data.
    +362        u : ndarray
    +363            Array of data to be fit.
    +364
    +365        Notes
    +366        -----
    +367        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    +368        Current parameters take ~2 seconds per peak.
    +369
    +370
    +371        References
    +372        ----------
    +373        1. https://github.com/pnnl/nmrfit
    +374
    +375        """
    +376        # TODO - allow support to pass swarmsize, maxiter, omega, phip, phig parameters.
    +377        # TODO - Refactor PSO fitting into its own class?
    +378
    +379        xopt, fopt = pyswarm.pso(
    +380            self.objective_pso,
    +381            lower,
    +382            upper,
    +383            args=(w, u),
    +384            swarmsize=1000,
    +385            maxiter=5000,
    +386            omega=-0.2134,
    +387            phip=-0.3344,
    +388            phig=2.3259,
    +389        )
    +390        return xopt, fopt
     

    Minimization function for particle swarm optimisation (PSO) fitting

    Minimizes the objective function using the particle swarm optimization algorithm. -Minimization function based on defined parameters

    +Minimization function based on defined parameters

    Parameters
    @@ -2409,68 +2667,86 @@
    References
    -
    351    def fit_peak_pso(self, mz_extend : int=6, upsample_multiplier : int=5):
    -352        """ Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting 
    -353
    -354        Function to fit a Voigt peakshape using particle swarm optimisation (PSO).
    -355        Should return better results than lmfit, but much more computationally expensive
    -356
    -357        Parameters
    -358        ----------
    -359        mz_extend : int, optional
    -360            extra points left and right of peak definition to include in fitting. Defaults to 6.
    -361        upsample_multiplier : int, optional
    -362            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 5.
    -363
    -364        Returns
    -365        -------
    -366        xopt : array
    -367            variables describing the voigt function.
    -368            G/L ratio, width (fwhm), apex (x-axis), area.
    -369            y-axis offset is fixed at 0 
    -370        fopt : float
    -371            objective score (rmse)
    -372        psfit : array
    -373            recalculated y values based on function and optimised fit
    -374        psfit_hdp : tuple of arrays
    -375            0 - linspace x-axis upsampled grid
    -376            1 - recalculated y values based on function and upsampled x-axis grid
    -377            Does not change results, but aids in visualisation of the 'true' voigt lineshape
    -378
    -379        Notes
    -380        -----
    -381        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    -382        """
    -383        # TODO - Add ability to pass pso args (i.e. swarm size, maxiter, omega, phig, etc)
    -384        # TODO: fix xopt. Magnitude mode data through CoreMS/Bruker starts at 0 but is noise centered well above 0.
    -385            # Thermo data is noise reduced by also noise subtracted, so starts at 0
    -386            # Absorption mode/phased data will have positive and negative components and may not be baseline corrected
    -387
    -388        start_index = self.peak_left_index - mz_extend  if not self.peak_left_index == 0 else 0
    -389        final_index = self.peak_right_index + mz_extend  if not self.peak_right_index == len(self._ms_parent.mz_exp_profile) else self.peak_right_index
    -390
    -391        # check if MSPeak contains the resolving power info
    -392        if self.resolving_power:
    -393            # full width half maximum distance
    -394            self.fwhm = (self.mz_exp / (self.resolving_power))
    -395
    -396            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    -397            abundance_domain = self._ms_parent.abundance_profile[start_index:final_index]
    -398            lower = [0, self.fwhm*0.8, (self.mz_exp-0.0005), 0]
    -399            upper = [1, self.fwhm*1.2, (self.mz_exp+0.0005), self.abundance/self.signal_to_noise]
    -400            xopt, fopt = self.minimize_pso(lower,upper,mz_domain,abundance_domain)
    -401            
    -402            psfit = self.voigt_pso(mz_domain,xopt[0],0,xopt[1],xopt[2],xopt[3])
    -403            psfit_hdp_x = linspace(min(mz_domain),max(mz_domain),num=len(mz_domain)*upsample_multiplier)
    -404            psfit_hdp = self.voigt_pso(psfit_hdp_x,xopt[0],0,xopt[1],xopt[2],xopt[3])
    -405            return xopt, fopt, psfit, (psfit_hdp_x, psfit_hdp)
    -406        else:
    -407            raise LookupError(
    -408                'resolving power is not defined, try to use set_max_resolving_power()')
    +            
    392    def fit_peak_pso(self, mz_extend: int = 6, upsample_multiplier: int = 5):
    +393        """Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting
    +394
    +395        Function to fit a Voigt peakshape using particle swarm optimisation (PSO).
    +396        Should return better results than lmfit, but much more computationally expensive
    +397
    +398        Parameters
    +399        ----------
    +400        mz_extend : int, optional
    +401            extra points left and right of peak definition to include in fitting. Defaults to 6.
    +402        upsample_multiplier : int, optional
    +403            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 5.
    +404
    +405        Returns
    +406        -------
    +407        xopt : array
    +408            variables describing the voigt function.
    +409            G/L ratio, width (fwhm), apex (x-axis), area.
    +410            y-axis offset is fixed at 0
    +411        fopt : float
    +412            objective score (rmse)
    +413        psfit : array
    +414            recalculated y values based on function and optimised fit
    +415        psfit_hdp : tuple of arrays
    +416            0 - linspace x-axis upsampled grid
    +417            1 - recalculated y values based on function and upsampled x-axis grid
    +418            Does not change results, but aids in visualisation of the 'true' voigt lineshape
    +419
    +420        Notes
    +421        -----
    +422        Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.
    +423        """
    +424        # TODO - Add ability to pass pso args (i.e. swarm size, maxiter, omega, phig, etc)
    +425        # TODO: fix xopt. Magnitude mode data through CoreMS/Bruker starts at 0 but is noise centered well above 0.
    +426        # Thermo data is noise reduced by also noise subtracted, so starts at 0
    +427        # Absorption mode/phased data will have positive and negative components and may not be baseline corrected
    +428
    +429        start_index = (
    +430            self.peak_left_index - mz_extend if not self.peak_left_index == 0 else 0
    +431        )
    +432        final_index = (
    +433            self.peak_right_index + mz_extend
    +434            if not self.peak_right_index == len(self._ms_parent.mz_exp_profile)
    +435            else self.peak_right_index
    +436        )
    +437
    +438        # check if MSPeak contains the resolving power info
    +439        if self.resolving_power:
    +440            # full width half maximum distance
    +441            self.fwhm = self.mz_exp / (self.resolving_power)
    +442
    +443            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    +444            abundance_domain = self._ms_parent.abundance_profile[
    +445                start_index:final_index
    +446            ]
    +447            lower = [0, self.fwhm * 0.8, (self.mz_exp - 0.0005), 0]
    +448            upper = [
    +449                1,
    +450                self.fwhm * 1.2,
    +451                (self.mz_exp + 0.0005),
    +452                self.abundance / self.signal_to_noise,
    +453            ]
    +454            xopt, fopt = self.minimize_pso(lower, upper, mz_domain, abundance_domain)
    +455
    +456            psfit = self.voigt_pso(mz_domain, xopt[0], 0, xopt[1], xopt[2], xopt[3])
    +457            psfit_hdp_x = linspace(
    +458                min(mz_domain), max(mz_domain), num=len(mz_domain) * upsample_multiplier
    +459            )
    +460            psfit_hdp = self.voigt_pso(
    +461                psfit_hdp_x, xopt[0], 0, xopt[1], xopt[2], xopt[3]
    +462            )
    +463            return xopt, fopt, psfit, (psfit_hdp_x, psfit_hdp)
    +464        else:
    +465            raise LookupError(
    +466                "resolving power is not defined, try to use set_max_resolving_power()"
    +467            )
     
    -

    Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting

    +

    Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting

    Function to fit a Voigt peakshape using particle swarm optimisation (PSO). Should return better results than lmfit, but much more computationally expensive

    @@ -2519,59 +2795,61 @@
    Notes
    -
    411    def voigt(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1):
    -412        """ [Legacy] Voigt lineshape analysis function
    -413        Legacy function for voigt lineshape analysis
    -414
    -415        Parameters
    -416        ----------
    -417        oversample_multiplier : int
    -418            factor to increase x-axis points by for simulation of fitted lineshape function
    -419        delta_rp : float
    -420            delta resolving power to add to resolving power
    -421        mz_overlay : int
    -422            extra points left and right of peak definition to include in fitting
    -423        
    -424        Returns
    -425        -------
    -426        mz_domain : ndarray
    -427            x-axis domain for fit
    -428        calc_abundance : ndarray
    -429            calculated abundance profile based on voigt function
    -430        """
    -431        
    -432        
    -433        if self.resolving_power:
    -434
    -435            # full width half maximum distance
    -436            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -437
    -438            # stardart deviation
    -439            sigma = self.fwhm / 3.6013
    -440
    -441            # half width baseline distance
    -442            
    -443            #mz_domain = linspace(self.mz_exp - hw_base_distance,
    -444            #                     self.mz_exp + hw_base_distance, datapoint)
    -445            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -446            
    -447            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -448            
    -449            #TODO derive amplitude
    -450            amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -451
    -452            model = models.VoigtModel()
    -453
    -454            params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma, gamma = sigma)
    -455
    -456            calc_abundance = model.eval(params=params, x=mz_domain)
    -457
    -458            return mz_domain, calc_abundance
    -459        
    -460        else:
    -461            
    -462            raise LookupError(
    -463                'resolving power is not defined, try to use set_max_resolving_power()')
    +            
    469    def voigt(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):
    +470        """[Legacy] Voigt lineshape analysis function
    +471        Legacy function for voigt lineshape analysis
    +472
    +473        Parameters
    +474        ----------
    +475        oversample_multiplier : int
    +476            factor to increase x-axis points by for simulation of fitted lineshape function
    +477        delta_rp : float
    +478            delta resolving power to add to resolving power
    +479        mz_overlay : int
    +480            extra points left and right of peak definition to include in fitting
    +481
    +482        Returns
    +483        -------
    +484        mz_domain : ndarray
    +485            x-axis domain for fit
    +486        calc_abundance : ndarray
    +487            calculated abundance profile based on voigt function
    +488        """
    +489
    +490        if self.resolving_power:
    +491            # full width half maximum distance
    +492            self.fwhm = self.mz_exp / (
    +493                self.resolving_power + delta_rp
    +494            )  # self.resolving_power)
    +495
    +496            # stardart deviation
    +497            sigma = self.fwhm / 3.6013
    +498
    +499            # half width baseline distance
    +500
    +501            # mz_domain = linspace(self.mz_exp - hw_base_distance,
    +502            #                     self.mz_exp + hw_base_distance, datapoint)
    +503            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    +504
    +505            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    +506
    +507            # TODO derive amplitude
    +508            amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    +509
    +510            model = models.VoigtModel()
    +511
    +512            params = model.make_params(
    +513                center=self.mz_exp, amplitude=amplitude, sigma=sigma, gamma=sigma
    +514            )
    +515
    +516            calc_abundance = model.eval(params=params, x=mz_domain)
    +517
    +518            return mz_domain, calc_abundance
    +519
    +520        else:
    +521            raise LookupError(
    +522                "resolving power is not defined, try to use set_max_resolving_power()"
    +523            )
     
    @@ -2612,63 +2890,68 @@
    Returns
    -
    465    def pseudovoigt(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1, fraction =0.5):
    -466        """ [Legacy] pseudovoigt lineshape function
    -467
    -468        Legacy function for pseudovoigt lineshape analysis. 
    -469        Note - Code may not be functional currently.
    -470
    -471        Parameters
    -472        ----------
    -473        oversample_multiplier : int, optional
    -474            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 1.
    -475        delta_rp : float, optional
    -476            delta resolving power to add to resolving power. Defaults to 0.
    -477        mz_overlay : int, optional
    -478            extra points left and right of peak definition to include in fitting. Defaults to 1.
    -479        fraction : float, optional
    -480            fraction of gaussian component in pseudovoigt function. Defaults to 0.5.
    -481
    -482        """
    -483        if self.resolving_power:
    -484
    -485            # full width half maximum distance
    -486            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -487
    -488            # stardart deviation
    -489            sigma = self.fwhm / 2
    -490
    -491            # half width baseline distance
    -492            
    -493            #mz_domain = linspace(self.mz_exp - hw_base_distance,
    -494            #                     self.mz_exp + hw_base_distance, datapoint)
    -495            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -496            
    -497            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -498            model = models.PseudoVoigtModel()
    -499            
    -500            # TODO derive amplitude
    -501            gamma = sigma
    -502            
    -503            amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -504            amplitude = (sqrt(pi/log(2)) * (pi*sigma*self.abundance)) /( (pi*(1-gamma)) + (sqrt(pi*log(2)) * gamma) )
    -505
    -506            params = model.make_params(center=self.mz_exp, sigma = sigma)
    -507
    -508            calc_abundance = model.eval(params=params, x=mz_domain)
    -509
    -510            return mz_domain, calc_abundance
    -511        
    -512        else:
    -513            
    -514            raise LookupError(
    -515                'resolving power is not defined, try to use set_max_resolving_power()')
    +            
    525    def pseudovoigt(
    +526        self, oversample_multiplier=1, delta_rp=0, mz_overlay=1, fraction=0.5
    +527    ):
    +528        """[Legacy] pseudovoigt lineshape function
    +529
    +530        Legacy function for pseudovoigt lineshape analysis.
    +531        Note - Code may not be functional currently.
    +532
    +533        Parameters
    +534        ----------
    +535        oversample_multiplier : int, optional
    +536            factor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 1.
    +537        delta_rp : float, optional
    +538            delta resolving power to add to resolving power. Defaults to 0.
    +539        mz_overlay : int, optional
    +540            extra points left and right of peak definition to include in fitting. Defaults to 1.
    +541        fraction : float, optional
    +542            fraction of gaussian component in pseudovoigt function. Defaults to 0.5.
    +543
    +544        """
    +545        if self.resolving_power:
    +546            # full width half maximum distance
    +547            self.fwhm = self.mz_exp / (
    +548                self.resolving_power + delta_rp
    +549            )  # self.resolving_power)
    +550
    +551            # stardart deviation
    +552            sigma = self.fwhm / 2
    +553
    +554            # half width baseline distance
    +555
    +556            # mz_domain = linspace(self.mz_exp - hw_base_distance,
    +557            #                     self.mz_exp + hw_base_distance, datapoint)
    +558            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    +559
    +560            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    +561            model = models.PseudoVoigtModel()
    +562
    +563            # TODO derive amplitude
    +564            gamma = sigma
    +565
    +566            amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    +567            amplitude = (sqrt(pi / log(2)) * (pi * sigma * self.abundance)) / (
    +568                (pi * (1 - gamma)) + (sqrt(pi * log(2)) * gamma)
    +569            )
    +570
    +571            params = model.make_params(center=self.mz_exp, sigma=sigma)
    +572
    +573            calc_abundance = model.eval(params=params, x=mz_domain)
    +574
    +575            return mz_domain, calc_abundance
    +576
    +577        else:
    +578            raise LookupError(
    +579                "resolving power is not defined, try to use set_max_resolving_power()"
    +580            )
     

    [Legacy] pseudovoigt lineshape function

    -

    Legacy function for pseudovoigt lineshape analysis. +

    Legacy function for pseudovoigt lineshape analysis. Note - Code may not be functional currently.

    Parameters
    @@ -2698,63 +2981,65 @@
    Parameters
    -
    518    def lorentz(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1):
    -519        """ [Legacy] Lorentz lineshape analysis function    
    -520        
    -521        Legacy function for lorentz lineshape analysis
    -522
    -523        Parameters
    -524        ----------
    -525        oversample_multiplier : int
    -526            factor to increase x-axis points by for simulation of fitted lineshape function
    -527        delta_rp : float
    -528            delta resolving power to add to resolving power
    -529        mz_overlay : int
    -530            extra points left and right of peak definition to include in fitting
    -531        
    -532        Returns
    -533        -------
    -534        mz_domain : ndarray
    -535            x-axis domain for fit
    -536        calc_abundance : ndarray
    -537            calculated abundance profile based on lorentz function
    -538        
    -539        """
    -540        if self.resolving_power:
    -541
    -542            # full width half maximum distance
    -543            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -544
    -545            # stardart deviation
    -546            sigma = self.fwhm / 2
    -547
    -548            # half width baseline distance
    -549            hw_base_distance = (8 * sigma)
    -550
    -551            #mz_domain = linspace(self.mz_exp - hw_base_distance,
    -552            #                     self.mz_exp + hw_base_distance, datapoint)
    -553            
    -554            
    -555            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -556            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -557            model = models.LorentzianModel()
    -558            
    -559            amplitude = sigma* pi * self.abundance
    -560
    -561            params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -562
    -563            calc_abundance = model.eval(params=params, x=mz_domain)
    -564
    -565            return mz_domain, calc_abundance
    -566        
    -567        else:
    -568            
    -569            raise LookupError(
    -570                'resolving power is not defined, try to use set_max_resolving_power()')
    +            
    582    def lorentz(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):
    +583        """[Legacy] Lorentz lineshape analysis function
    +584
    +585        Legacy function for lorentz lineshape analysis
    +586
    +587        Parameters
    +588        ----------
    +589        oversample_multiplier : int
    +590            factor to increase x-axis points by for simulation of fitted lineshape function
    +591        delta_rp : float
    +592            delta resolving power to add to resolving power
    +593        mz_overlay : int
    +594            extra points left and right of peak definition to include in fitting
    +595
    +596        Returns
    +597        -------
    +598        mz_domain : ndarray
    +599            x-axis domain for fit
    +600        calc_abundance : ndarray
    +601            calculated abundance profile based on lorentz function
    +602
    +603        """
    +604        if self.resolving_power:
    +605            # full width half maximum distance
    +606            self.fwhm = self.mz_exp / (
    +607                self.resolving_power + delta_rp
    +608            )  # self.resolving_power)
    +609
    +610            # stardart deviation
    +611            sigma = self.fwhm / 2
    +612
    +613            # half width baseline distance
    +614            hw_base_distance = 8 * sigma
    +615
    +616            # mz_domain = linspace(self.mz_exp - hw_base_distance,
    +617            #                     self.mz_exp + hw_base_distance, datapoint)
    +618
    +619            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    +620            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    +621            model = models.LorentzianModel()
    +622
    +623            amplitude = sigma * pi * self.abundance
    +624
    +625            params = model.make_params(
    +626                center=self.mz_exp, amplitude=amplitude, sigma=sigma
    +627            )
    +628
    +629            calc_abundance = model.eval(params=params, x=mz_domain)
    +630
    +631            return mz_domain, calc_abundance
    +632
    +633        else:
    +634            raise LookupError(
    +635                "resolving power is not defined, try to use set_max_resolving_power()"
    +636            )
     
    -

    [Legacy] Lorentz lineshape analysis function

    +

    [Legacy] Lorentz lineshape analysis function

    Legacy function for lorentz lineshape analysis

    @@ -2792,66 +3077,71 @@
    Returns
    -
    572    def gaussian(self, oversample_multiplier=1, delta_rp = 0, mz_overlay=1):
    -573        """ [Legacy] Gaussian lineshape analysis function
    -574        Legacy gaussian lineshape analysis function
    -575        
    -576        Parameters
    -577        ----------
    -578        oversample_multiplier : int
    -579            factor to increase x-axis points by for simulation of fitted lineshape function
    -580        delta_rp : float
    -581            delta resolving power to add to resolving power
    -582        mz_overlay : int
    -583            extra points left and right of peak definition to include in fitting
    -584
    -585        Returns
    -586        -------
    -587        mz_domain : ndarray 
    -588            x-axis domain for fit
    -589        calc_abundance : ndarray
    -590            calculated abundance profile based on gaussian function
    -591        
    -592
    -593        """
    -594
    -595        # check if MSPeak contains the resolving power info
    -596        if self.resolving_power:
    -597            # full width half maximum distance
    -598            self.fwhm = (self.mz_exp / (self.resolving_power + delta_rp))#self.resolving_power)
    -599
    -600            # stardart deviation
    -601            sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    -602
    -603            # half width baseline distance
    -604            #hw_base_distance = (3.2 * s)
    -605
    -606            #match_loz_factor = 3
    -607
    -608            #n_d = hw_base_distance * match_loz_factor
    -609
    -610            #mz_domain = linspace(
    -611            #    self.mz_exp - n_d, self.mz_exp + n_d, datapoint)
    -612
    -613            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)    
    -614            
    -615            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    -616            
    -617            #calc_abundance = norm.pdf(mz_domain, self.mz_exp, s)
    -618
    -619            model = models.GaussianModel()
    -620            
    -621            amplitude = (sqrt(2*pi)*sigma) * self.abundance
    -622
    -623            params = model.make_params(center=self.mz_exp, amplitude=amplitude, sigma = sigma)
    -624
    -625            calc_abundance = model.eval(params=params, x=mz_domain)
    -626            
    -627            return mz_domain, calc_abundance 
    -628
    -629        else:
    -630            raise LookupError(
    -631                'resolving power is not defined, try to use set_max_resolving_power()')
    +            
    638    def gaussian(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):
    +639        """[Legacy] Gaussian lineshape analysis function
    +640        Legacy gaussian lineshape analysis function
    +641
    +642        Parameters
    +643        ----------
    +644        oversample_multiplier : int
    +645            factor to increase x-axis points by for simulation of fitted lineshape function
    +646        delta_rp : float
    +647            delta resolving power to add to resolving power
    +648        mz_overlay : int
    +649            extra points left and right of peak definition to include in fitting
    +650
    +651        Returns
    +652        -------
    +653        mz_domain : ndarray
    +654            x-axis domain for fit
    +655        calc_abundance : ndarray
    +656            calculated abundance profile based on gaussian function
    +657
    +658
    +659        """
    +660
    +661        # check if MSPeak contains the resolving power info
    +662        if self.resolving_power:
    +663            # full width half maximum distance
    +664            self.fwhm = self.mz_exp / (
    +665                self.resolving_power + delta_rp
    +666            )  # self.resolving_power)
    +667
    +668            # stardart deviation
    +669            sigma = self.fwhm / (2 * sqrt(2 * log(2)))
    +670
    +671            # half width baseline distance
    +672            # hw_base_distance = (3.2 * s)
    +673
    +674            # match_loz_factor = 3
    +675
    +676            # n_d = hw_base_distance * match_loz_factor
    +677
    +678            # mz_domain = linspace(
    +679            #    self.mz_exp - n_d, self.mz_exp + n_d, datapoint)
    +680
    +681            mz_domain = self.get_mz_domain(oversample_multiplier, mz_overlay)
    +682
    +683            # gaussian_pdf = lambda x0, x, s: (1/ math.sqrt(2*math.pi*math.pow(s,2))) * math.exp(-1 * math.pow(x-x0,2) / 2*math.pow(s,2) )
    +684
    +685            # calc_abundance = norm.pdf(mz_domain, self.mz_exp, s)
    +686
    +687            model = models.GaussianModel()
    +688
    +689            amplitude = (sqrt(2 * pi) * sigma) * self.abundance
    +690
    +691            params = model.make_params(
    +692                center=self.mz_exp, amplitude=amplitude, sigma=sigma
    +693            )
    +694
    +695            calc_abundance = model.eval(params=params, x=mz_domain)
    +696
    +697            return mz_domain, calc_abundance
    +698
    +699        else:
    +700            raise LookupError(
    +701                "resolving power is not defined, try to use set_max_resolving_power()"
    +702            )
     
    @@ -2892,44 +3182,53 @@
    Returns
    -
    633    def get_mz_domain(self, oversample_multiplier, mz_overlay):
    -634        """  [Legacy] function to resample/interpolate datapoints for lineshape analysis
    -635
    -636        This code is used for the legacy line fitting functions and not recommended.
    -637        Legacy function to support expanding mz domain for legacy lineshape functions
    -638
    -639        Parameters
    -640        ----------
    -641        oversample_multiplier : int
    -642            factor to increase x-axis points by for simulation of fitted lineshape function
    -643        mz_overlay : int
    -644            extra points left and right of peak definition to include in fitting
    -645        
    -646        Returns
    -647        -------
    -648        mz_domain : ndarray
    -649            x-axis domain for fit
    -650        
    -651        """
    -652        start_index = self.peak_left_index - mz_overlay  if not self.peak_left_index == 0 else 0
    -653        final_index = self.peak_right_index + mz_overlay  if not self.peak_right_index == len(self._ms_parent.mz_exp_profile) else self.peak_right_index
    -654
    -655        if oversample_multiplier == 1:
    -656
    -657            mz_domain = self._ms_parent.mz_exp_profile[start_index: final_index]
    -658            
    -659        else:
    -660            # we assume a linear correlation for m/z and datapoits 
    -661            # which is only true if the m/z range in narrow (within 1 m/z unit)
    -662            # this is not true for a wide m/z range
    -663                        
    -664            indexes = range(start_index, final_index+1)
    -665            mz = self._ms_parent.mz_exp_profile[indexes]
    -666            pol = poly1d(polyfit(indexes, mz, 1))
    -667            oversampled_indexes = linspace(start_index, final_index, (final_index-start_index) * oversample_multiplier)    
    -668            mz_domain = pol(oversampled_indexes)
    -669
    -670        return mz_domain
    +            
    704    def get_mz_domain(self, oversample_multiplier, mz_overlay):
    +705        """[Legacy] function to resample/interpolate datapoints for lineshape analysis
    +706
    +707        This code is used for the legacy line fitting functions and not recommended.
    +708        Legacy function to support expanding mz domain for legacy lineshape functions
    +709
    +710        Parameters
    +711        ----------
    +712        oversample_multiplier : int
    +713            factor to increase x-axis points by for simulation of fitted lineshape function
    +714        mz_overlay : int
    +715            extra points left and right of peak definition to include in fitting
    +716
    +717        Returns
    +718        -------
    +719        mz_domain : ndarray
    +720            x-axis domain for fit
    +721
    +722        """
    +723        start_index = (
    +724            self.peak_left_index - mz_overlay if not self.peak_left_index == 0 else 0
    +725        )
    +726        final_index = (
    +727            self.peak_right_index + mz_overlay
    +728            if not self.peak_right_index == len(self._ms_parent.mz_exp_profile)
    +729            else self.peak_right_index
    +730        )
    +731
    +732        if oversample_multiplier == 1:
    +733            mz_domain = self._ms_parent.mz_exp_profile[start_index:final_index]
    +734
    +735        else:
    +736            # we assume a linear correlation for m/z and datapoits
    +737            # which is only true if the m/z range in narrow (within 1 m/z unit)
    +738            # this is not true for a wide m/z range
    +739
    +740            indexes = range(start_index, final_index + 1)
    +741            mz = self._ms_parent.mz_exp_profile[indexes]
    +742            pol = poly1d(polyfit(indexes, mz, 1))
    +743            oversampled_indexes = linspace(
    +744                start_index,
    +745                final_index,
    +746                (final_index - start_index) * oversample_multiplier,
    +747            )
    +748            mz_domain = pol(oversampled_indexes)
    +749
    +750        return mz_domain
     
    @@ -2979,12 +3278,10 @@
    Returns
    -
    677    def molecular_formula_lowest_error(self):
    -678       """ Return the molecular formula with the smallest absolute mz error
    -679       
    -680       """
    -681       
    -682       return min(self.molecular_formulas, key=lambda m: abs(m.mz_error))
    +            
    758    def molecular_formula_lowest_error(self):
    +759        """Return the molecular formula with the smallest absolute mz error"""
    +760
    +761        return min(self.molecular_formulas, key=lambda m: abs(m.mz_error))
     
    @@ -3004,12 +3301,10 @@
    Returns
    -
    684    def molecular_formula_highest_prob_score(self):
    -685        """ Return the molecular formula with the highest confidence score score
    -686         
    -687        """
    -688       
    -689        return max(self.molecular_formulas, key=lambda m: abs(m.confidence_score))
    +            
    763    def molecular_formula_highest_prob_score(self):
    +764        """Return the molecular formula with the highest confidence score score"""
    +765
    +766        return max(self.molecular_formulas, key=lambda m: abs(m.confidence_score))
     
    @@ -3029,42 +3324,50 @@
    Returns
    -
    691    def molecular_formula_earth_filter(self, lowest_error=True):
    -692        """ Filter molecular formula using the 'Earth' filter
    -693        
    -694        This function applies the Formularity-esque 'Earth' filter to possible molecular formula assignments.
    -695        Earth Filter:
    -696            O > 0 AND N <= 3 AND P <= 2 AND 3P <= O
    -697
    -698        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter. 
    -699        Otherwise, it will return all Earth-filter compliant formulas. 
    -700
    -701        Parameters
    -702        ----------
    -703        lowest_error : bool, optional.
    -704            Return only the lowest error formula which also fits the Earth filter. 
    -705            If False, return all Earth-filter compliant formulas. Default is True.
    -706
    -707        Returns
    -708        -------
    -709        list
    -710            List of molecular formula objects which fit the Earth filter
    -711
    -712        References
    -713        ----------
    -714        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    -715            Anal. Chem. 2017, 89, 23, 12659–12665
    -716            doi: 10.1021/acs.analchem.7b03318
    -717        """
    -718        
    -719        candidates = list(filter(lambda mf: mf.get("O") > 0 and mf.get("N") <=3 and mf.get("P") <= 2 and (3 * mf.get("P")) <= mf.get("O"), self.molecular_formulas))
    -720        if len(candidates) >0:
    -721            if lowest_error:
    -722                return min(candidates, key=lambda m: abs(m.mz_error))
    -723            else:
    -724                return candidates
    -725        else:
    -726            return candidates
    +            
    768    def molecular_formula_earth_filter(self, lowest_error=True):
    +769        """Filter molecular formula using the 'Earth' filter
    +770
    +771        This function applies the Formularity-esque 'Earth' filter to possible molecular formula assignments.
    +772        Earth Filter:
    +773            O > 0 AND N <= 3 AND P <= 2 AND 3P <= O
    +774
    +775        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter.
    +776        Otherwise, it will return all Earth-filter compliant formulas.
    +777
    +778        Parameters
    +779        ----------
    +780        lowest_error : bool, optional.
    +781            Return only the lowest error formula which also fits the Earth filter.
    +782            If False, return all Earth-filter compliant formulas. Default is True.
    +783
    +784        Returns
    +785        -------
    +786        list
    +787            List of molecular formula objects which fit the Earth filter
    +788
    +789        References
    +790        ----------
    +791        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    +792            Anal. Chem. 2017, 89, 23, 12659–12665
    +793            doi: 10.1021/acs.analchem.7b03318
    +794        """
    +795
    +796        candidates = list(
    +797            filter(
    +798                lambda mf: mf.get("O") > 0
    +799                and mf.get("N") <= 3
    +800                and mf.get("P") <= 2
    +801                and (3 * mf.get("P")) <= mf.get("O"),
    +802                self.molecular_formulas,
    +803            )
    +804        )
    +805        if len(candidates) > 0:
    +806            if lowest_error:
    +807                return min(candidates, key=lambda m: abs(m.mz_error))
    +808            else:
    +809                return candidates
    +810        else:
    +811            return candidates
     
    @@ -3074,14 +3377,14 @@
    Returns
    Earth Filter: O > 0 AND N <= 3 AND P <= 2 AND 3P <= O

    -

    If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter. -Otherwise, it will return all Earth-filter compliant formulas.

    +

    If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter. +Otherwise, it will return all Earth-filter compliant formulas.

    Parameters
    • lowest_error (bool, optional.): -Return only the lowest error formula which also fits the Earth filter. +Return only the lowest error formula which also fits the Earth filter. If False, return all Earth-filter compliant formulas. Default is True.
    @@ -3113,42 +3416,50 @@
    References
    -
    728    def molecular_formula_water_filter(self, lowest_error=True):
    -729        """ Filter molecular formula using the 'Water' filter
    -730
    -731        This function applies the Formularity-esque 'Water' filter to possible molecular formula assignments.
    -732        Water Filter:
    -733            O > 0 AND N <= 3 AND S <= 2 AND P <= 2
    -734        
    -735        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Water filter.
    -736        Otherwise, it will return all Water-filter compliant formulas.
    -737
    -738        Parameters
    -739        ----------
    -740        lowest_error : bool, optional
    -741            Return only the lowest error formula which also fits the Water filter.
    -742            If False, return all Water-filter compliant formulas. Defaults to 2
    -743
    -744        Returns 
    -745        -------
    -746        list
    -747            List of molecular formula objects which fit the Water filter
    -748
    -749        References
    -750        ----------
    -751        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    -752            Anal. Chem. 2017, 89, 23, 12659–12665
    -753            doi: 10.1021/acs.analchem.7b03318
    -754        """
    -755       
    -756        candidates = list(filter(lambda mf: mf.get("O") > 0 and mf.get("N") <=3 and mf.get("S") <=2 and  mf.get("P") <= 2, self.molecular_formulas))
    -757        if len(candidates) >0:
    -758            if lowest_error:
    -759                return min(candidates, key=lambda m: abs(m.mz_error))
    -760            else:
    -761                return candidates
    -762        else:
    -763            return candidates
    +            
    813    def molecular_formula_water_filter(self, lowest_error=True):
    +814        """Filter molecular formula using the 'Water' filter
    +815
    +816        This function applies the Formularity-esque 'Water' filter to possible molecular formula assignments.
    +817        Water Filter:
    +818            O > 0 AND N <= 3 AND S <= 2 AND P <= 2
    +819
    +820        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Water filter.
    +821        Otherwise, it will return all Water-filter compliant formulas.
    +822
    +823        Parameters
    +824        ----------
    +825        lowest_error : bool, optional
    +826            Return only the lowest error formula which also fits the Water filter.
    +827            If False, return all Water-filter compliant formulas. Defaults to 2
    +828
    +829        Returns
    +830        -------
    +831        list
    +832            List of molecular formula objects which fit the Water filter
    +833
    +834        References
    +835        ----------
    +836        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    +837            Anal. Chem. 2017, 89, 23, 12659–12665
    +838            doi: 10.1021/acs.analchem.7b03318
    +839        """
    +840
    +841        candidates = list(
    +842            filter(
    +843                lambda mf: mf.get("O") > 0
    +844                and mf.get("N") <= 3
    +845                and mf.get("S") <= 2
    +846                and mf.get("P") <= 2,
    +847                self.molecular_formulas,
    +848            )
    +849        )
    +850        if len(candidates) > 0:
    +851            if lowest_error:
    +852                return min(candidates, key=lambda m: abs(m.mz_error))
    +853            else:
    +854                return candidates
    +855        else:
    +856            return candidates
     
    @@ -3171,8 +3482,9 @@
    Parameters
    Returns
    -

    list - List of molecular formula objects which fit the Water filter

    +
      +
    • list: List of molecular formula objects which fit the Water filter
    • +
    References
    @@ -3196,44 +3508,52 @@
    References
    -
    765    def molecular_formula_air_filter(self, lowest_error=True):
    -766        """ Filter molecular formula using the 'Air' filter
    -767
    -768        This function applies the Formularity-esque 'Air' filter to possible molecular formula assignments.
    -769        Air Filter:
    -770            O > 0 AND N <= 3 AND S <= 1 AND P = 0 AND 3(S+N) <= O
    -771        
    -772        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Air filter.
    -773        Otherwise, it will return all Air-filter compliant formulas.
    -774
    -775        Parameters
    -776        ----------
    -777        lowest_error : bool, optional
    -778            Return only the lowest error formula which also fits the Air filter.
    -779            If False, return all Air-filter compliant formulas. Defaults to True.
    -780
    -781        Returns
    -782        -------
    -783        list
    -784            List of molecular formula objects which fit the Air filter
    -785            
    -786        References
    -787        ----------
    -788        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    -789            Anal. Chem. 2017, 89, 23, 12659–12665
    -790            doi: 10.1021/acs.analchem.7b03318
    -791        """
    -792
    -793       
    -794        candidates = list(filter(lambda mf: mf.get("O") > 0 and mf.get("N") <=2 and mf.get("S") <=1 and  mf.get("P") == 0 and 3* (mf.get("S") + mf.get("N")) <= mf.get("O"), self.molecular_formulas))
    -795        
    -796        if len(candidates) >0:
    -797            if lowest_error:
    -798                return min(candidates, key=lambda m: abs(m.mz_error))
    -799            else:
    -800                return candidates
    -801        else:
    -802            return candidates
    +            
    858    def molecular_formula_air_filter(self, lowest_error=True):
    +859        """Filter molecular formula using the 'Air' filter
    +860
    +861        This function applies the Formularity-esque 'Air' filter to possible molecular formula assignments.
    +862        Air Filter:
    +863            O > 0 AND N <= 3 AND S <= 1 AND P = 0 AND 3(S+N) <= O
    +864
    +865        If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Air filter.
    +866        Otherwise, it will return all Air-filter compliant formulas.
    +867
    +868        Parameters
    +869        ----------
    +870        lowest_error : bool, optional
    +871            Return only the lowest error formula which also fits the Air filter.
    +872            If False, return all Air-filter compliant formulas. Defaults to True.
    +873
    +874        Returns
    +875        -------
    +876        list
    +877            List of molecular formula objects which fit the Air filter
    +878
    +879        References
    +880        ----------
    +881        1. Nikola Tolic et al., "Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra"
    +882            Anal. Chem. 2017, 89, 23, 12659–12665
    +883            doi: 10.1021/acs.analchem.7b03318
    +884        """
    +885
    +886        candidates = list(
    +887            filter(
    +888                lambda mf: mf.get("O") > 0
    +889                and mf.get("N") <= 2
    +890                and mf.get("S") <= 1
    +891                and mf.get("P") == 0
    +892                and 3 * (mf.get("S") + mf.get("N")) <= mf.get("O"),
    +893                self.molecular_formulas,
    +894            )
    +895        )
    +896
    +897        if len(candidates) > 0:
    +898            if lowest_error:
    +899                return min(candidates, key=lambda m: abs(m.mz_error))
    +900            else:
    +901                return candidates
    +902        else:
    +903            return candidates
     
    @@ -3282,40 +3602,45 @@
    References
    -
    804    def cia_score_S_P_error(self):
    -805        """ Compound Identification Algorithm SP Error - Assignment Filter
    -806         
    -807        This function applies the Compound Identification Algorithm (CIA) SP Error filter to possible molecular formula assignments.
    -808
    -809        It takes the molecular formula with the lowest S+P count, and returns the formula with the lowest absolute error from this subset.
    -810        
    -811        Returns
    -812        -------
    -813        MolecularFormula
    -814            A single molecular formula which fits the rules of the CIA SP Error filter
    -815
    -816
    -817        References
    -818        ----------
    -819        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    -820            Anal. Chem. 2006, 78, 13, 4363–4373
    -821            doi: 10.1021/ac0600306
    -822        """
    -823        #case EFormulaScore.HAcap:
    -824
    -825        lowest_S_P_mf = min(self.molecular_formulas, key=lambda mf: mf.get('S') + mf.get('P'))
    -826        lowest_S_P_count = lowest_S_P_mf.get("S") + lowest_S_P_mf.get("P")
    -827        
    -828        list_same_s_p = list(filter(lambda mf: mf.get('S') + mf.get('P') == lowest_S_P_count, self.molecular_formulas))
    -829
    -830        #check if list is not empty
    -831        if list_same_s_p:
    -832        
    -833            return min(list_same_s_p, key=lambda m: abs(m.mz_error))
    -834        
    -835        else:
    -836        
    -837            return lowest_S_P_mf
    +            
    905    def cia_score_S_P_error(self):
    +906        """Compound Identification Algorithm SP Error - Assignment Filter
    +907
    +908        This function applies the Compound Identification Algorithm (CIA) SP Error filter to possible molecular formula assignments.
    +909
    +910        It takes the molecular formula with the lowest S+P count, and returns the formula with the lowest absolute error from this subset.
    +911
    +912        Returns
    +913        -------
    +914        MolecularFormula
    +915            A single molecular formula which fits the rules of the CIA SP Error filter
    +916
    +917
    +918        References
    +919        ----------
    +920        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    +921            Anal. Chem. 2006, 78, 13, 4363–4373
    +922            doi: 10.1021/ac0600306
    +923        """
    +924        # case EFormulaScore.HAcap:
    +925
    +926        lowest_S_P_mf = min(
    +927            self.molecular_formulas, key=lambda mf: mf.get("S") + mf.get("P")
    +928        )
    +929        lowest_S_P_count = lowest_S_P_mf.get("S") + lowest_S_P_mf.get("P")
    +930
    +931        list_same_s_p = list(
    +932            filter(
    +933                lambda mf: mf.get("S") + mf.get("P") == lowest_S_P_count,
    +934                self.molecular_formulas,
    +935            )
    +936        )
    +937
    +938        # check if list is not empty
    +939        if list_same_s_p:
    +940            return min(list_same_s_p, key=lambda m: abs(m.mz_error))
    +941
    +942        else:
    +943            return lowest_S_P_mf
     
    @@ -3353,54 +3678,70 @@
    References
    -
    839    def cia_score_N_S_P_error(self):
    -840        """ Compound Identification Algorithm NSP Error - Assignment Filter
    -841        
    -842        This function applies the Compound Identification Algorithm (CIA) NSP Error filter to possible molecular formula assignments.
    -843
    -844        It takes the molecular formula with the lowest N+S+P count, and returns the formula with the lowest absolute error from this subset.
    -845
    -846        Returns
    -847        -------
    -848        MolecularFormula
    -849            A single molecular formula which fits the rules of the CIA NSP Error filter
    -850
    -851        References
    -852        ----------
    -853        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    -854            Anal. Chem. 2006, 78, 13, 4363–4373
    -855            doi: 10.1021/ac0600306
    -856
    -857        Raises
    -858        -------
    -859        Exception
    -860            If no molecular formula are associated with mass spectrum peak.
    -861        """
    -862        #case EFormulaScore.HAcap:
    -863        if self.molecular_formulas:
    -864
    -865            lowest_N_S_P_mf = min(self.molecular_formulas, key=lambda mf: mf.get('N') + mf.get('S') + mf.get('P'))
    -866            lowest_N_S_P_count = lowest_N_S_P_mf.get("N") + lowest_N_S_P_mf.get("S") + lowest_N_S_P_mf.get("P")
    -867
    -868            list_same_N_S_P = list(filter(lambda mf: mf.get('N') + mf.get('S') + mf.get('P') == lowest_N_S_P_count, self.molecular_formulas))
    -869
    -870            if list_same_N_S_P:
    -871
    -872                SP_filtered_list =  list(filter(lambda mf: (mf.get("S") <= 3 ) and  (mf.get("P")  <= 1 ), list_same_N_S_P))
    -873                
    -874                if SP_filtered_list:
    -875                    
    -876                    return min(SP_filtered_list, key=lambda m: abs(m.mz_error)) 
    -877                
    -878                else:    
    -879                    
    -880                    return min(list_same_N_S_P, key=lambda m: abs(m.mz_error))            
    -881            
    -882            else:
    -883                
    -884                return lowest_N_S_P_mf 
    -885        else:
    -886            raise Exception("No molecular formula associated with the mass spectrum peak at m/z: %.6f" % self.mz_exp)
    +            
     945    def cia_score_N_S_P_error(self):
    + 946        """Compound Identification Algorithm NSP Error - Assignment Filter
    + 947
    + 948        This function applies the Compound Identification Algorithm (CIA) NSP Error filter to possible molecular formula assignments.
    + 949
    + 950        It takes the molecular formula with the lowest N+S+P count, and returns the formula with the lowest absolute error from this subset.
    + 951
    + 952        Returns
    + 953        -------
    + 954        MolecularFormula
    + 955            A single molecular formula which fits the rules of the CIA NSP Error filter
    + 956
    + 957        References
    + 958        ----------
    + 959        1. Elizabeth B. Kujawinski and Mark D. Behn, "Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter"
    + 960            Anal. Chem. 2006, 78, 13, 4363–4373
    + 961            doi: 10.1021/ac0600306
    + 962
    + 963        Raises
    + 964        -------
    + 965        Exception
    + 966            If no molecular formula are associated with mass spectrum peak.
    + 967        """
    + 968        # case EFormulaScore.HAcap:
    + 969        if self.molecular_formulas:
    + 970            lowest_N_S_P_mf = min(
    + 971                self.molecular_formulas,
    + 972                key=lambda mf: mf.get("N") + mf.get("S") + mf.get("P"),
    + 973            )
    + 974            lowest_N_S_P_count = (
    + 975                lowest_N_S_P_mf.get("N")
    + 976                + lowest_N_S_P_mf.get("S")
    + 977                + lowest_N_S_P_mf.get("P")
    + 978            )
    + 979
    + 980            list_same_N_S_P = list(
    + 981                filter(
    + 982                    lambda mf: mf.get("N") + mf.get("S") + mf.get("P")
    + 983                    == lowest_N_S_P_count,
    + 984                    self.molecular_formulas,
    + 985                )
    + 986            )
    + 987
    + 988            if list_same_N_S_P:
    + 989                SP_filtered_list = list(
    + 990                    filter(
    + 991                        lambda mf: (mf.get("S") <= 3) and (mf.get("P") <= 1),
    + 992                        list_same_N_S_P,
    + 993                    )
    + 994                )
    + 995
    + 996                if SP_filtered_list:
    + 997                    return min(SP_filtered_list, key=lambda m: abs(m.mz_error))
    + 998
    + 999                else:
    +1000                    return min(list_same_N_S_P, key=lambda m: abs(m.mz_error))
    +1001
    +1002            else:
    +1003                return lowest_N_S_P_mf
    +1004        else:
    +1005            raise Exception(
    +1006                "No molecular formula associated with the mass spectrum peak at m/z: %.6f"
    +1007                % self.mz_exp
    +1008            )
     
    diff --git a/docs/corems/ms_peak/factory/MSPeakClasses.html b/docs/corems/ms_peak/factory/MSPeakClasses.html index 3455ef0e..07647979 100644 --- a/docs/corems/ms_peak/factory/MSPeakClasses.html +++ b/docs/corems/ms_peak/factory/MSPeakClasses.html @@ -93,17 +93,17 @@

      1__author__ = "Yuri E. Corilo"
       2__date__ = "Jun 12, 2019"
       3
    -  4from copy import deepcopy
    -  5import math
    -  6from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula
    -  7from numpy import nan
    -  8from corems.ms_peak.calc.MSPeakCalc import MSPeakCalculation
    -  9from corems.mass_spectra.calc import SignalProcessing as sp
    - 10from numpy import NaN, power
    +  4import math
    +  5
    +  6from numpy import nan
    +  7
    +  8from corems.mass_spectra.calc import SignalProcessing as sp
    +  9from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula
    + 10from corems.ms_peak.calc.MSPeakCalc import MSPeakCalculation
      11
      12
      13class _MSPeak(MSPeakCalculation):
    - 14    """ A class representing a peak in a mass spectrum.
    + 14    """A class representing a peak in a mass spectrum.
      15
      16    Parameters:
      17    ----------
    @@ -219,8 +219,8 @@ 

    127 return self.molecular_formulas[position] 128 129 def change_kendrick_base(self, kendrick_dict_base): -130 """ Changes the kendrick base for the peak. -131 +130 """Changes the kendrick base for the peak. +131 132 Parameters: 133 ---------- 134 kendrick_dict_base : dict @@ -232,7 +232,7 @@

    140 ) 141 142 def add_molecular_formula(self, molecular_formula_obj): -143 """ Adds a molecular formula to the peak. +143 """Adds a molecular formula to the peak. 144 145 Parameters: 146 ---------- @@ -243,7 +243,7 @@

    151 ------- 152 MolecularFormula 153 The molecular formula object added. -154 +154 155 """ 156 # freeze state 157 molecular_formula_obj._mspeak_parent = self @@ -258,7 +258,7 @@

    166 return molecular_formula_obj 167 168 def remove_molecular_formula(self, mf_obj): -169 """ Removes a molecular formula from the peak. +169 """Removes a molecular formula from the peak. 170 171 Parameters: 172 ---------- @@ -268,315 +268,321 @@

    176 self.molecular_formulas.remove(mf_obj) 177 178 def clear_molecular_formulas(self): -179 """ Clears all molecular formulas associated with the peak. -180 """ -181 self.molecular_formulas = [] -182 -183 @property -184 def mz_exp(self): -185 """ The experimental m/z value of the peak.""" -186 if self.mz_cal: -187 return self.mz_cal -188 else: -189 return self._mz_exp -190 -191 @mz_exp.setter -192 def mz_exp(self, mz_exp): -193 """ Sets the experimental m/z value of the peak.""" -194 self._mz_exp = mz_exp -195 -196 @property -197 def area(self): -198 """ The area of the peak.""" -199 if self._ms_parent.is_centroid: -200 return nan -201 else: -202 return self.calc_area() -203 -204 @property -205 def nominal_mz_exp(self): -206 """ The experimental nominal (integer) m/z value of the peak.""" -207 return math.floor(self.mz_exp) -208 -209 @property -210 def kmd(self): -211 """ The Kendrick mass defect of the peak.""" -212 return self._kmd -213 -214 @property -215 def kendrick_mass(self): -216 """ The Kendrick mass of the peak.""" -217 return self._kendrick_mass -218 -219 @property -220 def knm(self): -221 """ The Kendrick nominal mass of the peak.""" -222 return self._nominal_km -223 -224 @property -225 def is_assigned(self) -> bool: -226 """ Whether the peak is assigned or not.""" -227 return bool(self.molecular_formulas) -228 -229 def plot_simulation( -230 self, -231 sim_type="lorentz", -232 ax=None, -233 color="green", -234 oversample_multiplier=1, -235 delta_rp=0, -236 mz_overlay=1, -237 ): -238 """ Plots the simulated peak. -239 -240 Parameters: -241 ---------- -242 sim_type : str, optional -243 The type of simulation to be plotted. -244 Default is "lorentz". -245 ax : matplotlib.axes, optional -246 The axes to plot the simulated peak. -247 Default is None. -248 color : str, optional -249 The color of the simulated peak. -250 Default is "green". -251 oversample_multiplier : int, optional -252 The oversample multiplier. -253 Default is 1. -254 delta_rp : int, optional -255 A delta value to the resolving power -256 Default is 0. -257 mz_overlay : int, optional -258 The mz overlay. -259 Default is 1. -260 -261 Returns: -262 ------- -263 matplotlib.axes -264 The axes where the simulated peak was plotted. -265 -266 """ -267 if self._ms_parent: -268 import matplotlib.pyplot as plt -269 -270 x, y = eval( -271 "self." -272 + sim_type -273 + "(oversample_multiplier=" -274 + str(oversample_multiplier) -275 + ", delta_rp=" -276 + str(delta_rp) -277 + ", mz_overlay=" -278 + str(mz_overlay) -279 + ")" -280 ) -281 -282 if ax is None: -283 ax = plt.gca() -284 ax.plot(x, y, color=color, label="Simulation") -285 ax.set(xlabel="m/z", ylabel="abundance") -286 -287 plt.legend() -288 return ax -289 -290 def plot( -291 self, ax=None, color :str="black", derivative : bool=True, deriv_color :str="red"): # pragma: no cover -292 """ Plots the peak. -293 -294 Parameters: -295 ---------- -296 ax : matplotlib.axes, optional -297 The axes to plot the peak. -298 Default is None. -299 color : str, optional -300 The color of the peak. -301 Default is "black". -302 derivative : bool, optional -303 Whether to plot the derivative of the peak. -304 Default is True. -305 deriv_color : str, optional -306 The color of the derivative of the peak. -307 Default is "red". -308 -309 Returns: -310 ------- -311 matplotlib.axes -312 The axes where the peak was plotted. -313 -314 """ -315 if self._ms_parent: -316 import matplotlib.pyplot as plt +179 """Clears all molecular formulas associated with the peak.""" +180 self.molecular_formulas = [] +181 +182 @property +183 def mz_exp(self): +184 """The experimental m/z value of the peak.""" +185 if self.mz_cal: +186 return self.mz_cal +187 else: +188 return self._mz_exp +189 +190 @mz_exp.setter +191 def mz_exp(self, mz_exp): +192 """Sets the experimental m/z value of the peak.""" +193 self._mz_exp = mz_exp +194 +195 @property +196 def area(self): +197 """The area of the peak.""" +198 if self._ms_parent.is_centroid: +199 return nan +200 else: +201 return self.calc_area() +202 +203 @property +204 def nominal_mz_exp(self): +205 """The experimental nominal (integer) m/z value of the peak.""" +206 return math.floor(self.mz_exp) +207 +208 @property +209 def kmd(self): +210 """The Kendrick mass defect of the peak.""" +211 return self._kmd +212 +213 @property +214 def kendrick_mass(self): +215 """The Kendrick mass of the peak.""" +216 return self._kendrick_mass +217 +218 @property +219 def knm(self): +220 """The Kendrick nominal mass of the peak.""" +221 return self._nominal_km +222 +223 @property +224 def is_assigned(self) -> bool: +225 """Whether the peak is assigned or not.""" +226 return bool(self.molecular_formulas) +227 +228 def plot_simulation( +229 self, +230 sim_type="lorentz", +231 ax=None, +232 color="green", +233 oversample_multiplier=1, +234 delta_rp=0, +235 mz_overlay=1, +236 ): +237 """Plots the simulated peak. +238 +239 Parameters: +240 ---------- +241 sim_type : str, optional +242 The type of simulation to be plotted. +243 Default is "lorentz". +244 ax : matplotlib.axes, optional +245 The axes to plot the simulated peak. +246 Default is None. +247 color : str, optional +248 The color of the simulated peak. +249 Default is "green". +250 oversample_multiplier : int, optional +251 The oversample multiplier. +252 Default is 1. +253 delta_rp : int, optional +254 A delta value to the resolving power +255 Default is 0. +256 mz_overlay : int, optional +257 The mz overlay. +258 Default is 1. +259 +260 Returns: +261 ------- +262 matplotlib.axes +263 The axes where the simulated peak was plotted. +264 +265 """ +266 if self._ms_parent: +267 import matplotlib.pyplot as plt +268 +269 x, y = eval( +270 "self." +271 + sim_type +272 + "(oversample_multiplier=" +273 + str(oversample_multiplier) +274 + ", delta_rp=" +275 + str(delta_rp) +276 + ", mz_overlay=" +277 + str(mz_overlay) +278 + ")" +279 ) +280 +281 if ax is None: +282 ax = plt.gca() +283 ax.plot(x, y, color=color, label="Simulation") +284 ax.set(xlabel="m/z", ylabel="abundance") +285 +286 plt.legend() +287 return ax +288 +289 def plot( +290 self, +291 ax=None, +292 color: str = "black", +293 derivative: bool = True, +294 deriv_color: str = "red", +295 ): # pragma: no cover +296 """Plots the peak. +297 +298 Parameters: +299 ---------- +300 ax : matplotlib.axes, optional +301 The axes to plot the peak. +302 Default is None. +303 color : str, optional +304 The color of the peak. +305 Default is "black". +306 derivative : bool, optional +307 Whether to plot the derivative of the peak. +308 Default is True. +309 deriv_color : str, optional +310 The color of the derivative of the peak. +311 Default is "red". +312 +313 Returns: +314 ------- +315 matplotlib.axes +316 The axes where the peak was plotted. 317 -318 if ax is None: -319 ax = plt.gca() -320 x = self._ms_parent.mz_exp_profile[self.peak_left_index : self.peak_right_index] -321 y = self._ms_parent.abundance_profile[self.peak_left_index : self.peak_right_index] -322 -323 ax.plot(x, y, color=color, label="Data") -324 ax.set(xlabel="m/z", ylabel="abundance") -325 if derivative and not self._ms_parent.is_centroid: -326 dy = sp.derivate( -327 self._ms_parent.abundance_profile[ -328 self.peak_left_index : self.peak_right_index + 1 -329 ] -330 ) -331 ax.plot(x, dy, c=deriv_color) -332 else: -333 ax.plot( -334 (self.mz_exp, self.mz_exp), -335 (0, self.abundance), -336 color=color, -337 label="Data", +318 """ +319 if self._ms_parent: +320 import matplotlib.pyplot as plt +321 +322 if ax is None: +323 ax = plt.gca() +324 x = self._ms_parent.mz_exp_profile[ +325 self.peak_left_index : self.peak_right_index +326 ] +327 y = self._ms_parent.abundance_profile[ +328 self.peak_left_index : self.peak_right_index +329 ] +330 +331 ax.plot(x, y, color=color, label="Data") +332 ax.set(xlabel="m/z", ylabel="abundance") +333 if derivative and not self._ms_parent.is_centroid: +334 dy = sp.derivate( +335 self._ms_parent.abundance_profile[ +336 self.peak_left_index : self.peak_right_index + 1 +337 ] 338 ) -339 -340 # plt.legend() -341 -342 return ax -343 -344 else: -345 raise AttributeError("No parent mass spectrum object found to plot the peak.") -346 -347 @property -348 def best_molecular_formula_candidate(self): -349 """ The best molecular formula candidate for the peak. -350 -351 Returns a single best formula candidate based on the user defined score method. -352 Score method is set with: -353 molecular_search_settings.score_method -354 -355 Returns -356 ------- -357 MolecularFormula -358 The best molecular formula candidate for the peak. -359 -360 """ -361 if ( -362 self._ms_parent.molecular_search_settings.score_method -363 == "N_S_P_lowest_error" -364 ): -365 return self.cia_score_N_S_P_error() -366 -367 elif ( -368 self._ms_parent.molecular_search_settings.score_method == "S_P_lowest_error" -369 ): -370 return self.cia_score_S_P_error() -371 -372 elif self._ms_parent.molecular_search_settings.score_method == "lowest_error": -373 return self.molecular_formula_lowest_error() -374 -375 elif ( -376 self._ms_parent.molecular_search_settings.score_method == "air_filter_error" -377 ): -378 return self.molecular_formula_air_filter() -379 -380 elif ( -381 self._ms_parent.molecular_search_settings.score_method -382 == "water_filter_error" -383 ): -384 return self.molecular_formula_water_filter() -385 -386 elif ( -387 self._ms_parent.molecular_search_settings.score_method -388 == "earth_filter_error" -389 ): -390 return self.molecular_formula_earth_filter() -391 -392 elif self._ms_parent.molecular_search_settings.score_method == "prob_score": -393 return self.molecular_formula_highest_prob_score() -394 else: -395 raise TypeError( -396 "Unknown score method selected: % s, \ -397 Please check score_method at \ -398 encapsulation.settings.molecular_id.MolecularIDSettings.MolecularFormulaSearchSettings", -399 self._ms_parent.parameters.molecular_search.score_method, -400 ) +339 ax.plot(x, dy, c=deriv_color) +340 else: +341 ax.plot( +342 (self.mz_exp, self.mz_exp), +343 (0, self.abundance), +344 color=color, +345 label="Data", +346 ) +347 +348 # plt.legend() +349 +350 return ax +351 +352 else: +353 raise AttributeError( +354 "No parent mass spectrum object found to plot the peak." +355 ) +356 +357 @property +358 def best_molecular_formula_candidate(self): +359 """The best molecular formula candidate for the peak. +360 +361 Returns a single best formula candidate based on the user defined score method. +362 Score method is set with: +363 molecular_search_settings.score_method +364 +365 Returns +366 ------- +367 MolecularFormula +368 The best molecular formula candidate for the peak. +369 +370 """ +371 if ( +372 self._ms_parent.molecular_search_settings.score_method +373 == "N_S_P_lowest_error" +374 ): +375 return self.cia_score_N_S_P_error() +376 +377 elif ( +378 self._ms_parent.molecular_search_settings.score_method == "S_P_lowest_error" +379 ): +380 return self.cia_score_S_P_error() +381 +382 elif self._ms_parent.molecular_search_settings.score_method == "lowest_error": +383 return self.molecular_formula_lowest_error() +384 +385 elif ( +386 self._ms_parent.molecular_search_settings.score_method == "air_filter_error" +387 ): +388 return self.molecular_formula_air_filter() +389 +390 elif ( +391 self._ms_parent.molecular_search_settings.score_method +392 == "water_filter_error" +393 ): +394 return self.molecular_formula_water_filter() +395 +396 elif ( +397 self._ms_parent.molecular_search_settings.score_method +398 == "earth_filter_error" +399 ): +400 return self.molecular_formula_earth_filter() 401 -402 -403class ICRMassPeak(_MSPeak): -404 """A class representing a peak in an ICR mass spectrum. -405 -406 """ -407 def __init__(self, *args, ms_parent=None, exp_freq=None): -408 super().__init__(*args, exp_freq=exp_freq, ms_parent=ms_parent) -409 -410 def resolving_power_calc(self, B, T): -411 """ Calculate the theoretical resolving power of the peak. -412 -413 Parameters -414 ---------- -415 T: float -416 transient time -417 B: float -418 Magnetic Filed Strength (Tesla) -419 -420 Returns -421 ------- -422 float -423 Theoretical resolving power of the peak. -424 -425 References -426 ---------- -427 1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.) -428 DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K -429 """ -430 return (1.274e7 * self.ion_charge * B * T) / (self.mz_exp * self.ion_charge) -431 -432 def set_calc_resolving_power(self, B : float, T : float): -433 """ Set the resolving power of the peak to the calculated one. -434 """ -435 self.resolving_power = self.resolving_power_calc(B, T) -436 -437 def _mz_to_f_bruker(self): -438 """ [Not Functional] Convert a peak m/z value to frequency -439 -440 # Currently Broken - Not sure why -441 if self.mz_cal: -442 mz_val = self.mz_cal -443 else: -444 mz_val = self.mz_exp -445 Aterm, Bterm, Cterm = self._ms_parent.Aterm, self._ms_parent.Bterm, self._ms_parent.Cterm -446 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage -447 -448 if Cterm == 0: -449 -450 if Bterm == 0: -451 #uncalibrated data -452 freq_domain = Aterm / mz_val -453 -454 else: -455 -456 freq_domain = (Aterm / (mz_val)) - Bterm +402 elif self._ms_parent.molecular_search_settings.score_method == "prob_score": +403 return self.molecular_formula_highest_prob_score() +404 else: +405 raise TypeError( +406 "Unknown score method selected: % s, \ +407 Please check score_method at \ +408 encapsulation.settings.molecular_id.MolecularIDSettings.MolecularFormulaSearchSettings", +409 self._ms_parent.parameters.molecular_search.score_method, +410 ) +411 +412 +413class ICRMassPeak(_MSPeak): +414 """A class representing a peak in an ICR mass spectrum.""" +415 +416 def __init__(self, *args, ms_parent=None, exp_freq=None): +417 super().__init__(*args, exp_freq=exp_freq, ms_parent=ms_parent) +418 +419 def resolving_power_calc(self, B, T): +420 """Calculate the theoretical resolving power of the peak. +421 +422 Parameters +423 ---------- +424 T: float +425 transient time +426 B: float +427 Magnetic Filed Strength (Tesla) +428 +429 Returns +430 ------- +431 float +432 Theoretical resolving power of the peak. +433 +434 References +435 ---------- +436 1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.) +437 DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K +438 """ +439 return (1.274e7 * self.ion_charge * B * T) / (self.mz_exp * self.ion_charge) +440 +441 def set_calc_resolving_power(self, B: float, T: float): +442 """Set the resolving power of the peak to the calculated one.""" +443 self.resolving_power = self.resolving_power_calc(B, T) +444 +445 def _mz_to_f_bruker(self): +446 """[Not Functional] Convert a peak m/z value to frequency +447 +448 # Currently Broken - Not sure why +449 if self.mz_cal: +450 mz_val = self.mz_cal +451 else: +452 mz_val = self.mz_exp +453 Aterm, Bterm, Cterm = self._ms_parent.Aterm, self._ms_parent.Bterm, self._ms_parent.Cterm +454 # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage +455 +456 if Cterm == 0: 457 -458 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to -459 else: -460 -461 freq_domain = (Aterm / mz_val) + (Bterm / power(mz_val, 2)) + Cterm -462 -463 return freq_domain -464 """ -465 raise RuntimeError("Function not confirmed to work, disabled.") -466 -467class TOFMassPeak(_MSPeak): -468 """ A class representing a peak in a TOF mass spectrum. -469 -470 -471 """ -472 def __init__(self, *args, exp_freq=None): -473 super().__init__(*args, exp_freq=exp_freq) +458 if Bterm == 0: +459 #uncalibrated data +460 freq_domain = Aterm / mz_val +461 +462 else: +463 +464 freq_domain = (Aterm / (mz_val)) - Bterm +465 +466 # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to +467 else: +468 +469 freq_domain = (Aterm / mz_val) + (Bterm / power(mz_val, 2)) + Cterm +470 +471 return freq_domain +472 """ +473 raise RuntimeError("Function not confirmed to work, disabled.") 474 -475 def set_calc_resolving_power(self): -476 return 0 -477 +475 +476class TOFMassPeak(_MSPeak): +477 """A class representing a peak in a TOF mass spectrum.""" 478 -479class OrbiMassPeak(_MSPeak): -480 """ A class representing a peak in an Orbitrap mass spectrum. -481 -482 """ -483 def __init__(self, *args, exp_freq=None): -484 super().__init__(*args, exp_freq=exp_freq) +479 def __init__(self, *args, exp_freq=None): +480 super().__init__(*args, exp_freq=exp_freq) +481 +482 def set_calc_resolving_power(self): +483 return 0 +484 485 -486 def set_calc_resolving_power(self): -487 return 0 +486class OrbiMassPeak(_MSPeak): +487 """A class representing a peak in an Orbitrap mass spectrum.""" +488 +489 def __init__(self, *args, exp_freq=None): +490 super().__init__(*args, exp_freq=exp_freq) +491 +492 def set_calc_resolving_power(self): +493 return 0

    @@ -592,69 +598,67 @@

    -
    404class ICRMassPeak(_MSPeak):
    -405    """A class representing a peak in an ICR mass spectrum.
    -406    
    -407    """
    -408    def __init__(self, *args, ms_parent=None, exp_freq=None):
    -409        super().__init__(*args, exp_freq=exp_freq, ms_parent=ms_parent)
    -410
    -411    def resolving_power_calc(self, B, T):
    -412        """ Calculate the theoretical resolving power of the peak.
    -413        
    -414        Parameters
    -415        ----------
    -416        T: float
    -417            transient time
    -418        B: float
    -419            Magnetic Filed Strength (Tesla)
    -420        
    -421        Returns
    -422        -------
    -423        float
    -424            Theoretical resolving power of the peak.
    -425
    -426        References
    -427        ----------
    -428        1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)
    -429            DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    -430        """
    -431        return (1.274e7 * self.ion_charge * B * T) / (self.mz_exp * self.ion_charge)
    -432
    -433    def set_calc_resolving_power(self, B : float, T : float):
    -434        """ Set the resolving power of the peak to the calculated one.
    -435        """
    -436        self.resolving_power = self.resolving_power_calc(B, T)
    -437
    -438    def _mz_to_f_bruker(self):
    -439        """ [Not Functional] Convert a peak m/z value to frequency
    -440        
    -441        # Currently Broken - Not sure why
    -442        if self.mz_cal:
    -443            mz_val = self.mz_cal
    -444        else:
    -445            mz_val = self.mz_exp
    -446        Aterm, Bterm, Cterm = self._ms_parent.Aterm, self._ms_parent.Bterm, self._ms_parent.Cterm
    -447        # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage
    -448        
    -449        if Cterm == 0:
    -450            
    -451            if Bterm == 0:
    -452                #uncalibrated data
    -453                freq_domain = Aterm / mz_val
    -454                
    -455            else:
    -456                
    -457                freq_domain = (Aterm / (mz_val)) - Bterm
    +            
    414class ICRMassPeak(_MSPeak):
    +415    """A class representing a peak in an ICR mass spectrum."""
    +416
    +417    def __init__(self, *args, ms_parent=None, exp_freq=None):
    +418        super().__init__(*args, exp_freq=exp_freq, ms_parent=ms_parent)
    +419
    +420    def resolving_power_calc(self, B, T):
    +421        """Calculate the theoretical resolving power of the peak.
    +422
    +423        Parameters
    +424        ----------
    +425        T: float
    +426            transient time
    +427        B: float
    +428            Magnetic Filed Strength (Tesla)
    +429
    +430        Returns
    +431        -------
    +432        float
    +433            Theoretical resolving power of the peak.
    +434
    +435        References
    +436        ----------
    +437        1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)
    +438            DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    +439        """
    +440        return (1.274e7 * self.ion_charge * B * T) / (self.mz_exp * self.ion_charge)
    +441
    +442    def set_calc_resolving_power(self, B: float, T: float):
    +443        """Set the resolving power of the peak to the calculated one."""
    +444        self.resolving_power = self.resolving_power_calc(B, T)
    +445
    +446    def _mz_to_f_bruker(self):
    +447        """[Not Functional] Convert a peak m/z value to frequency
    +448
    +449        # Currently Broken - Not sure why
    +450        if self.mz_cal:
    +451            mz_val = self.mz_cal
    +452        else:
    +453            mz_val = self.mz_exp
    +454        Aterm, Bterm, Cterm = self._ms_parent.Aterm, self._ms_parent.Bterm, self._ms_parent.Cterm
    +455        # Check if the Bterm of Ledford equation scales with the ICR trap voltage or not then Bterm = Bterm*trap_voltage
    +456
    +457        if Cterm == 0:
     458
    -459        # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to
    -460        else:
    -461
    -462            freq_domain = (Aterm / mz_val) + (Bterm / power(mz_val, 2)) + Cterm
    -463
    -464        return freq_domain
    -465        """
    -466        raise RuntimeError("Function not confirmed to work, disabled.") 
    +459            if Bterm == 0:
    +460                #uncalibrated data
    +461                freq_domain = Aterm / mz_val
    +462
    +463            else:
    +464
    +465                freq_domain = (Aterm / (mz_val)) - Bterm
    +466
    +467        # @will I need you insight here, not sure what is the inverted ledford equation that Bruker refers to
    +468        else:
    +469
    +470            freq_domain = (Aterm / mz_val) + (Bterm / power(mz_val, 2)) + Cterm
    +471
    +472        return freq_domain
    +473        """
    +474        raise RuntimeError("Function not confirmed to work, disabled.")
     
    @@ -672,8 +676,8 @@

    -
    408    def __init__(self, *args, ms_parent=None, exp_freq=None):
    -409        super().__init__(*args, exp_freq=exp_freq, ms_parent=ms_parent)
    +            
    417    def __init__(self, *args, ms_parent=None, exp_freq=None):
    +418        super().__init__(*args, exp_freq=exp_freq, ms_parent=ms_parent)
     
    @@ -691,27 +695,27 @@

    -
    411    def resolving_power_calc(self, B, T):
    -412        """ Calculate the theoretical resolving power of the peak.
    -413        
    -414        Parameters
    -415        ----------
    -416        T: float
    -417            transient time
    -418        B: float
    -419            Magnetic Filed Strength (Tesla)
    -420        
    -421        Returns
    -422        -------
    -423        float
    -424            Theoretical resolving power of the peak.
    -425
    -426        References
    -427        ----------
    -428        1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)
    -429            DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    -430        """
    -431        return (1.274e7 * self.ion_charge * B * T) / (self.mz_exp * self.ion_charge)
    +            
    420    def resolving_power_calc(self, B, T):
    +421        """Calculate the theoretical resolving power of the peak.
    +422
    +423        Parameters
    +424        ----------
    +425        T: float
    +426            transient time
    +427        B: float
    +428            Magnetic Filed Strength (Tesla)
    +429
    +430        Returns
    +431        -------
    +432        float
    +433            Theoretical resolving power of the peak.
    +434
    +435        References
    +436        ----------
    +437        1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)
    +438            DOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    +439        """
    +440        return (1.274e7 * self.ion_charge * B * T) / (self.mz_exp * self.ion_charge)
     
    @@ -753,10 +757,9 @@
    References
    -
    433    def set_calc_resolving_power(self, B : float, T : float):
    -434        """ Set the resolving power of the peak to the calculated one.
    -435        """
    -436        self.resolving_power = self.resolving_power_calc(B, T)
    +            
    442    def set_calc_resolving_power(self, B: float, T: float):
    +443        """Set the resolving power of the peak to the calculated one."""
    +444        self.resolving_power = self.resolving_power_calc(B, T)
     
    @@ -837,16 +840,14 @@
    Inherited Members
    -
    468class TOFMassPeak(_MSPeak):
    -469    """ A class representing a peak in a TOF mass spectrum.
    -470    
    -471    
    -472    """
    -473    def __init__(self, *args, exp_freq=None):
    -474        super().__init__(*args, exp_freq=exp_freq)
    -475
    -476    def set_calc_resolving_power(self):
    -477        return 0
    +            
    477class TOFMassPeak(_MSPeak):
    +478    """A class representing a peak in a TOF mass spectrum."""
    +479
    +480    def __init__(self, *args, exp_freq=None):
    +481        super().__init__(*args, exp_freq=exp_freq)
    +482
    +483    def set_calc_resolving_power(self):
    +484        return 0
     
    @@ -864,8 +865,8 @@
    Inherited Members
    -
    473    def __init__(self, *args, exp_freq=None):
    -474        super().__init__(*args, exp_freq=exp_freq)
    +            
    480    def __init__(self, *args, exp_freq=None):
    +481        super().__init__(*args, exp_freq=exp_freq)
     
    @@ -883,8 +884,8 @@
    Inherited Members
    -
    476    def set_calc_resolving_power(self):
    -477        return 0
    +            
    483    def set_calc_resolving_power(self):
    +484        return 0
     
    @@ -963,15 +964,14 @@
    Inherited Members
    -
    480class OrbiMassPeak(_MSPeak):
    -481    """ A class representing a peak in an Orbitrap mass spectrum.
    -482    
    -483    """
    -484    def __init__(self, *args, exp_freq=None):
    -485        super().__init__(*args, exp_freq=exp_freq)
    -486
    -487    def set_calc_resolving_power(self):
    -488        return 0
    +            
    487class OrbiMassPeak(_MSPeak):
    +488    """A class representing a peak in an Orbitrap mass spectrum."""
    +489
    +490    def __init__(self, *args, exp_freq=None):
    +491        super().__init__(*args, exp_freq=exp_freq)
    +492
    +493    def set_calc_resolving_power(self):
    +494        return 0
     
    @@ -989,8 +989,8 @@
    Inherited Members
    -
    484    def __init__(self, *args, exp_freq=None):
    -485        super().__init__(*args, exp_freq=exp_freq)
    +            
    490    def __init__(self, *args, exp_freq=None):
    +491        super().__init__(*args, exp_freq=exp_freq)
     
    @@ -1008,8 +1008,8 @@
    Inherited Members
    -
    487    def set_calc_resolving_power(self):
    -488        return 0
    +            
    493    def set_calc_resolving_power(self):
    +494        return 0
     
    diff --git a/docs/corems/transient/calc/TransientCalc.html b/docs/corems/transient/calc/TransientCalc.html index 17febeb2..0181bf52 100644 --- a/docs/corems/transient/calc/TransientCalc.html +++ b/docs/corems/transient/calc/TransientCalc.html @@ -87,349 +87,362 @@

      1import gc
       2import warnings
       3
    -  4from numpy import hamming, hanning, blackman, zeros, fft, sqrt, arange, where, power, absolute, kaiser, pi, sin, linspace, ceil, log2
    -  5
    -  6__author__ = "Yuri E. Corilo"
    -  7__date__ = "Jun 12, 2019"
    -  8
    -  9
    - 10class TransientCalculations(object):
    - 11    """ Transient Calculations
    - 12    
    - 13    Parameters
    - 14    ----------
    - 15    parameters : corems.transient.parameters.TransientParameters
    - 16        The transient parameters
    - 17    bandwidth : float
    - 18        The bandwidth of the transient (Hz)
    - 19    number_data_points : int
    - 20        The number of data points of the transient
    - 21    exc_low_freq : float
    - 22        The low frequency of the excitation (Hz)
    - 23    exc_high_freq : float
    - 24        The high frequency of the excitation (Hz)
    - 25    
    - 26    Attributes
    - 27    ----------
    - 28    parameters : corems.transient.parameters.TransientParameters
    - 29        The transient parameters
    - 30    bandwidth : float
    - 31        The bandwidth of the transient (Hz)
    - 32    number_data_points : int
    - 33        The number of data points of the transient
    - 34    exc_low_freq : float
    - 35        The low frequency of the excitation (Hz)
    - 36    exc_high_freq : float
    - 37        The high frequency of the excitation (Hz)
    - 38    
    - 39    Methods
    - 40    -------
    - 41    * cal_transient_time().
    - 42        Calculate the time domain length of the transient
    - 43    * zero_fill(transient).
    - 44        Zero fill the transient
    - 45    * truncation(transient).
    - 46        Truncate the transient
    - 47    * apodization(transient).  
    - 48        Apodization of the transient
    - 49    * calculate_frequency_domain(number_data_points).
    - 50        Calculate the frequency domain (axis) of the transient
    - 51    * cut_freq_domain(freqdomain_X, freqdomain_Y).
    - 52        Cut the frequency domain of the transient
    - 53    * phase_and_absorption_mode_ft().
    - 54        [Not Functional] Produce a phased absorption mode FT spectrum
    - 55    * magnitude_mode_ft(transient).
    - 56        Perform magnitude mode FT of the transient
    - 57    * correct_dc_offset().
    - 58        [Not Yet Implemented] Correct the DC offset of the transient 
    - 59    
    - 60    """
    - 61
    - 62    
    - 63    def cal_transient_time(self):
    - 64        """ Calculate the time domain length of the transient 
    - 65        
    - 66        Returns
    - 67        -------
    - 68        float
    - 69            The time domain length of the transient (s)
    - 70        """
    - 71        return (1 / self.bandwidth) * ((self.number_data_points) / 2)
    - 72        
    - 73    def zero_fill(self, transient ):
    - 74        """ Zero fill the transient
    - 75        
    - 76        Parameters
    - 77        ----------
    - 78        transient : numpy.ndarray
    - 79            The transient data points
    - 80        
    +  4from numpy import (
    +  5    arange,
    +  6    blackman,
    +  7    ceil,
    +  8    fft,
    +  9    hamming,
    + 10    hanning,
    + 11    kaiser,
    + 12    linspace,
    + 13    log2,
    + 14    pi,
    + 15    power,
    + 16    sin,
    + 17    sqrt,
    + 18    where,
    + 19    zeros,
    + 20)
    + 21
    + 22__author__ = "Yuri E. Corilo"
    + 23__date__ = "Jun 12, 2019"
    + 24
    + 25
    + 26class TransientCalculations(object):
    + 27    """Transient Calculations
    + 28
    + 29    Parameters
    + 30    ----------
    + 31    parameters : corems.transient.parameters.TransientParameters
    + 32        The transient parameters
    + 33    bandwidth : float
    + 34        The bandwidth of the transient (Hz)
    + 35    number_data_points : int
    + 36        The number of data points of the transient
    + 37    exc_low_freq : float
    + 38        The low frequency of the excitation (Hz)
    + 39    exc_high_freq : float
    + 40        The high frequency of the excitation (Hz)
    + 41
    + 42    Attributes
    + 43    ----------
    + 44    parameters : corems.transient.parameters.TransientParameters
    + 45        The transient parameters
    + 46    bandwidth : float
    + 47        The bandwidth of the transient (Hz)
    + 48    number_data_points : int
    + 49        The number of data points of the transient
    + 50    exc_low_freq : float
    + 51        The low frequency of the excitation (Hz)
    + 52    exc_high_freq : float
    + 53        The high frequency of the excitation (Hz)
    + 54
    + 55    Methods
    + 56    -------
    + 57    * cal_transient_time().
    + 58        Calculate the time domain length of the transient
    + 59    * zero_fill(transient).
    + 60        Zero fill the transient
    + 61    * truncation(transient).
    + 62        Truncate the transient
    + 63    * apodization(transient).
    + 64        Apodization of the transient
    + 65    * calculate_frequency_domain(number_data_points).
    + 66        Calculate the frequency domain (axis) of the transient
    + 67    * cut_freq_domain(freqdomain_X, freqdomain_Y).
    + 68        Cut the frequency domain of the transient
    + 69    * phase_and_absorption_mode_ft().
    + 70        [Not Functional] Produce a phased absorption mode FT spectrum
    + 71    * magnitude_mode_ft(transient).
    + 72        Perform magnitude mode FT of the transient
    + 73    * correct_dc_offset().
    + 74        [Not Yet Implemented] Correct the DC offset of the transient
    + 75
    + 76    """
    + 77
    + 78    def cal_transient_time(self):
    + 79        """Calculate the time domain length of the transient
    + 80
      81        Returns
      82        -------
    - 83        numpy.ndarray
    - 84            The transient data points zerofilled
    - 85            
    - 86        Notes
    - 87        -----
    - 88        The number of zero fills is defined by the transient parameter number_of_zero_fills.
    - 89        The function first calculate the next power of two of the transient length and zero fills to that length, to take advantage of FFT algorithm.
    - 90            If the parameter next_power_of_two is set to False, the function will zero fill to the length of the original transient times the number of zero fills
    - 91
    - 92        """
    - 93        if self.parameters.next_power_of_two:
    - 94            exponent = int(ceil(log2(len(transient)*(self.parameters.number_of_zero_fills+1))))
    - 95            zeros_filled_transient = zeros(2**exponent)
    - 96        else:
    - 97            zeros_filled_transient = zeros(len(transient)*(self.parameters.number_of_zero_fills+1))
    - 98                
    - 99        zeros_filled_transient[0:len(transient)] = transient    
    -100        
    -101        del transient
    -102        
    -103        gc.collect()
    -104        
    -105        return  zeros_filled_transient 
    -106    
    -107    def truncation(self, transient):
    -108        """ Truncate the transient
    -109        
    -110        Parameters
    -111        ----------
    -112        transient : numpy.ndarray
    -113            The transient data points
    -114        
    -115        Returns
    -116        -------
    -117        numpy.ndarray
    -118            The truncated transient data points
    -119            
    -120        Notes
    -121        -----
    -122        The number of truncations is defined by the transient parameter number_of_truncations
    -123        """
    -124        
    -125        data_count = len(transient)
    -126            
    -127        for _ in range(self.parameters.number_of_truncations):
    -128        
    -129            data_count = int(data_count / 2)
    -130         
    -131        time_domain_truncated = transient[0:data_count]
    -132        
    -133        del transient
    -134                
    -135        gc.collect()  
    -136        
    -137        return time_domain_truncated
    -138    
    -139    def apodization(self, transient):
    -140        """ Apodization of the transient
    -141
    -142        Parameters
    -143        ----------
    -144        transient : numpy.ndarray
    -145            The transient data points
    -146        
    -147        Returns
    -148        -------
    -149        numpy.ndarray
    -150            The apodized transient data points
    -151        
    -152        Notes
    -153        -----
    -154        The apodization method is defined by the transient parameter apodization_method.
    -155        The following apodization methods are available:
    -156            Hamming,
    -157            Hanning,
    -158            Blackman,
    -159            Full-Sine,
    -160            Half-Sine,
    -161            Kaiser,
    -162            Half-Kaiser.
    -163        
    -164        For Kaiser and Half-Kaiser, an additional parameter 'beta' is required, set by the transient parameter kaiser_beta.
    -165
    -166        """
    -167        
    -168        apodi_method = self.parameters.apodization_method
    -169        beta = self.parameters.kaiser_beta
    -170        
    -171        length = len(transient)
    -172            
    -173        if apodi_method == "Hamming":
    -174                H_function = hamming(length)
    -175        elif apodi_method == "Hanning":
    -176                H_function = hanning(length)
    -177        elif apodi_method == "Blackman":
    -178                H_function = blackman(length)
    -179        elif apodi_method == "Full-Sine":
    -180                H_function = sin(linspace(0,pi,num=length))
    -181        elif apodi_method == "Half-Sine":
    -182                H_function = sin(linspace((pi/2),0,num=length))
    -183        elif apodi_method == "Kaiser":
    -184                H_function = kaiser(length,beta)
    -185        elif apodi_method == "Half-Kaiser":
    -186                H_function = kaiser(length*2,beta)[length:]
    -187            
    -188        S_x = transient * H_function
    -189        
    -190        del transient
    -191        gc.collect()  
    -192            
    -193        return S_x
    -194    
    -195    def calculate_frequency_domain(self, number_data_points):
    -196        """ Calculate the frequency domain (axis) of the transient
    -197
    -198        Parameters
    -199        ----------
    -200        number_data_points : int
    -201            The number of data points of the transient
    -202        
    -203        Returns
    -204        -------
    -205        numpy.ndarray
    -206            The frequency domain of the transient (Hz)
    -207        
    -208        
    -209        """
    -210        
    -211        qntpoints = arange(0,(number_data_points))
    -212        
    -213        factor_distancy = (self.bandwidth)/(number_data_points)  
    -214                
    -215        frequency_domain = qntpoints * factor_distancy
    -216        
    -217        del qntpoints   
    -218        del factor_distancy
    -219        gc.collect()  
    -220        
    -221        return frequency_domain  
    -222    
    -223    def cut_freq_domain(self, freqdomain_X, freqdomain_Y):
    -224        """ Cut the frequency domain of the transient
    + 83        float
    + 84            The time domain length of the transient (s)
    + 85        """
    + 86        return (1 / self.bandwidth) * ((self.number_data_points) / 2)
    + 87
    + 88    def zero_fill(self, transient):
    + 89        """Zero fill the transient
    + 90
    + 91        Parameters
    + 92        ----------
    + 93        transient : numpy.ndarray
    + 94            The transient data points
    + 95
    + 96        Returns
    + 97        -------
    + 98        numpy.ndarray
    + 99            The transient data points zerofilled
    +100
    +101        Notes
    +102        -----
    +103        The number of zero fills is defined by the transient parameter number_of_zero_fills.
    +104        The function first calculate the next power of two of the transient length and zero fills to that length, to take advantage of FFT algorithm.
    +105            If the parameter next_power_of_two is set to False, the function will zero fill to the length of the original transient times the number of zero fills
    +106
    +107        """
    +108        if self.parameters.next_power_of_two:
    +109            exponent = int(
    +110                ceil(log2(len(transient) * (self.parameters.number_of_zero_fills + 1)))
    +111            )
    +112            zeros_filled_transient = zeros(2**exponent)
    +113        else:
    +114            zeros_filled_transient = zeros(
    +115                len(transient) * (self.parameters.number_of_zero_fills + 1)
    +116            )
    +117
    +118        zeros_filled_transient[0 : len(transient)] = transient
    +119
    +120        del transient
    +121
    +122        gc.collect()
    +123
    +124        return zeros_filled_transient
    +125
    +126    def truncation(self, transient):
    +127        """Truncate the transient
    +128
    +129        Parameters
    +130        ----------
    +131        transient : numpy.ndarray
    +132            The transient data points
    +133
    +134        Returns
    +135        -------
    +136        numpy.ndarray
    +137            The truncated transient data points
    +138
    +139        Notes
    +140        -----
    +141        The number of truncations is defined by the transient parameter number_of_truncations
    +142        """
    +143
    +144        data_count = len(transient)
    +145
    +146        for _ in range(self.parameters.number_of_truncations):
    +147            data_count = int(data_count / 2)
    +148
    +149        time_domain_truncated = transient[0:data_count]
    +150
    +151        del transient
    +152
    +153        gc.collect()
    +154
    +155        return time_domain_truncated
    +156
    +157    def apodization(self, transient):
    +158        """Apodization of the transient
    +159
    +160        Parameters
    +161        ----------
    +162        transient : numpy.ndarray
    +163            The transient data points
    +164
    +165        Returns
    +166        -------
    +167        numpy.ndarray
    +168            The apodized transient data points
    +169
    +170        Notes
    +171        -----
    +172        The apodization method is defined by the transient parameter apodization_method.
    +173        The following apodization methods are available:
    +174            Hamming,
    +175            Hanning,
    +176            Blackman,
    +177            Full-Sine,
    +178            Half-Sine,
    +179            Kaiser,
    +180            Half-Kaiser.
    +181
    +182        For Kaiser and Half-Kaiser, an additional parameter 'beta' is required, set by the transient parameter kaiser_beta.
    +183
    +184        """
    +185
    +186        apodi_method = self.parameters.apodization_method
    +187        beta = self.parameters.kaiser_beta
    +188
    +189        length = len(transient)
    +190
    +191        if apodi_method == "Hamming":
    +192            H_function = hamming(length)
    +193        elif apodi_method == "Hanning":
    +194            H_function = hanning(length)
    +195        elif apodi_method == "Blackman":
    +196            H_function = blackman(length)
    +197        elif apodi_method == "Full-Sine":
    +198            H_function = sin(linspace(0, pi, num=length))
    +199        elif apodi_method == "Half-Sine":
    +200            H_function = sin(linspace((pi / 2), 0, num=length))
    +201        elif apodi_method == "Kaiser":
    +202            H_function = kaiser(length, beta)
    +203        elif apodi_method == "Half-Kaiser":
    +204            H_function = kaiser(length * 2, beta)[length:]
    +205
    +206        S_x = transient * H_function
    +207
    +208        del transient
    +209        gc.collect()
    +210
    +211        return S_x
    +212
    +213    def calculate_frequency_domain(self, number_data_points):
    +214        """Calculate the frequency domain (axis) of the transient
    +215
    +216        Parameters
    +217        ----------
    +218        number_data_points : int
    +219            The number of data points of the transient
    +220
    +221        Returns
    +222        -------
    +223        numpy.ndarray
    +224            The frequency domain of the transient (Hz)
     225
    -226        Parameters
    -227        ----------
    -228        freqdomain_X : numpy.ndarray
    -229            The frequency domain of the transient (Hz)
    -230        freqdomain_Y : numpy.ndarray
    -231            The frequency domain of the transient (Hz)
    -232        
    -233        Returns
    -234        -------
    -235        numpy.ndarray
    -236            The frequency domain of the transient (Hz)
    -237        numpy.ndarray
    -238            The frequency domain of the transient (Hz)
    -239        
    -240        
    -241        """
    -242        # If the mw_low and mw_high are set, the frequency domain is cut to the mw range
    -243        # this accounts for the detection settings, not the excitation settings.
    -244        # TODO: Implement this - right now the f to mz function is in the ms class, not the transient class, so it doesnt work.
    -245        #if (self._mw_low != 0) & (self._mw_high != 0):
    -246        #    high_freq = self._f_to_mz(self._mw_high)
    -247        #    low_freq = self._f_to_mz(self._mw_low)
    -248        #
    -249        #    final =  where(freqdomain_X < high_freq)[-1][-1]
    -250        #      start =  where(freqdomain_X > low_freq)[0][0]
    -251        #else:
    -252        if self._qpd_enabled == 1:
    -253            low_freq = self._exc_low_freq *2
    -254            high_freq = self._exc_high_freq *2
    -255        else:
    -256            low_freq = self._exc_low_freq
    -257            high_freq = self._exc_high_freq 
    +226
    +227        """
    +228
    +229        qntpoints = arange(0, (number_data_points))
    +230
    +231        factor_distancy = (self.bandwidth) / (number_data_points)
    +232
    +233        frequency_domain = qntpoints * factor_distancy
    +234
    +235        del qntpoints
    +236        del factor_distancy
    +237        gc.collect()
    +238
    +239        return frequency_domain
    +240
    +241    def cut_freq_domain(self, freqdomain_X, freqdomain_Y):
    +242        """Cut the frequency domain of the transient
    +243
    +244        Parameters
    +245        ----------
    +246        freqdomain_X : numpy.ndarray
    +247            The frequency domain of the transient (Hz)
    +248        freqdomain_Y : numpy.ndarray
    +249            The frequency domain of the transient (Hz)
    +250
    +251        Returns
    +252        -------
    +253        numpy.ndarray
    +254            The frequency domain of the transient (Hz)
    +255        numpy.ndarray
    +256            The frequency domain of the transient (Hz)
    +257
     258
    -259        if self._exc_low_freq > self._exc_high_freq:
    -260            # TODO: This needs to be tested
    -261            # I'm not sure that this is relevant anyway - the excitation pulse is ramped in frequency but the detection is simulatenous
    -262            warnings.warn("This is not tested. Please check the results.")
    -263            final =  where(freqdomain_X > low_freq)[0][0]
    -264            start =  where(freqdomain_X > high_freq)[0][0]
    -265
    -266        else:
    -267            
    -268            final =  where(freqdomain_X < high_freq)[-1][-1]
    -269            start =  where(freqdomain_X > low_freq)[0][0]
    -270        
    -271        
    -272        return freqdomain_X[start:final], freqdomain_Y[start:final]
    -273        #del freqdomain_X, freqdomain_Y
    -274        #gc.collect()
    -275    
    -276    def phase_and_absorption_mode_ft(self):
    -277        """ [Not Functional] Produce a phased absorption mode FT spectrum
    -278        
    -279        """
    -280        #anyone wants to play with this part please make yourself comfortable. I will:
    -281        pass 
    -282            
    -283    def perform_magniture_mode_ft(self, transient):
    -284        """ Perform magnitude mode FT of the transient
    -285
    -286        Parameters
    -287        ---------- 
    -288        transient : numpy.ndarray
    -289            The transient data points
    -290        
    -291        Returns
    -292        -------
    -293        numpy.ndarray
    -294            The frequency domain of the transient (Hz)
    -295        numpy.ndarray
    -296            The magnitude of the transient (a.u.)
    -297        
    -298        
    -299        """
    -300
    -301        
    -302        A = fft.rfft(transient)
    -303        
    -304        #A = fft.fft(transient)
    -305        #A = A[0:int(len(A)/2)]
    -306
    -307        factor = int(self.parameters.number_of_zero_fills-1)
    -308        if self.parameters.number_of_zero_fills:
    -309            if self.parameters.number_of_zero_fills == 1:
    -310                factor = 1/2
    -311                
    -312            else:
    -313                factor = int(1/self.parameters.number_of_zero_fills+1)
    -314                
    -315            Max_index = int(len(A)/factor)    
    -316        
    -317        else:
    -318            Max_index = int(len(A))
    -319        
    -320        A = A[0:Max_index]
    -321        
    -322        datapoints = len(A)
    -323        
    -324        freqdomain_X = self.calculate_frequency_domain(datapoints)
    -325        
    -326        magnitude_Y = sqrt((power(A.real,2)) + (power(A.imag,2)))
    -327        
    -328        freqdomain_X_cut, magnitude_Y_cut = self.cut_freq_domain(freqdomain_X, magnitude_Y)  
    -329        
    -330        del transient 
    -331        #del freqdomain_X
    -332        #del magnitude_Y
    -333        gc.collect()
    -334        
    -335        return freqdomain_X_cut, magnitude_Y_cut
    -336    
    -337    def correct_dc_offset(self):
    -338        """ [Not Yet Implemented] Correct the DC offset of the transient
    -339
    -340        A simple baseline correction to compensate for a DC offset in the recorded transient.
    -341        Not implemented.
    -342        
    -343        """
    -344        pass
    -345    
    -346    
    +259        """
    +260        # If the mw_low and mw_high are set, the frequency domain is cut to the mw range
    +261        # this accounts for the detection settings, not the excitation settings.
    +262        # TODO: Implement this - right now the f to mz function is in the ms class, not the transient class, so it doesnt work.
    +263        # if (self._mw_low != 0) & (self._mw_high != 0):
    +264        #    high_freq = self._f_to_mz(self._mw_high)
    +265        #    low_freq = self._f_to_mz(self._mw_low)
    +266        #
    +267        #    final =  where(freqdomain_X < high_freq)[-1][-1]
    +268        #      start =  where(freqdomain_X > low_freq)[0][0]
    +269        # else:
    +270        if self._qpd_enabled == 1:
    +271            low_freq = self._exc_low_freq * 2
    +272            high_freq = self._exc_high_freq * 2
    +273        else:
    +274            low_freq = self._exc_low_freq
    +275            high_freq = self._exc_high_freq
    +276
    +277        if self._exc_low_freq > self._exc_high_freq:
    +278            # TODO: This needs to be tested
    +279            # I'm not sure that this is relevant anyway - the excitation pulse is ramped in frequency but the detection is simulatenous
    +280            warnings.warn("This is not tested. Please check the results.")
    +281            final = where(freqdomain_X > low_freq)[0][0]
    +282            start = where(freqdomain_X > high_freq)[0][0]
    +283
    +284        else:
    +285            final = where(freqdomain_X < high_freq)[-1][-1]
    +286            start = where(freqdomain_X > low_freq)[0][0]
    +287
    +288        return freqdomain_X[start:final], freqdomain_Y[start:final]
    +289        # del freqdomain_X, freqdomain_Y
    +290        # gc.collect()
    +291
    +292    def phase_and_absorption_mode_ft(self):
    +293        """[Not Functional] Produce a phased absorption mode FT spectrum"""
    +294        # anyone wants to play with this part please make yourself comfortable. I will:
    +295        pass
    +296
    +297    def perform_magniture_mode_ft(self, transient):
    +298        """Perform magnitude mode FT of the transient
    +299
    +300        Parameters
    +301        ----------
    +302        transient : numpy.ndarray
    +303            The transient data points
    +304
    +305        Returns
    +306        -------
    +307        numpy.ndarray
    +308            The frequency domain of the transient (Hz)
    +309        numpy.ndarray
    +310            The magnitude of the transient (a.u.)
    +311
    +312
    +313        """
    +314
    +315        A = fft.rfft(transient)
    +316
    +317        # A = fft.fft(transient)
    +318        # A = A[0:int(len(A)/2)]
    +319
    +320        factor = int(self.parameters.number_of_zero_fills - 1)
    +321        if self.parameters.number_of_zero_fills:
    +322            if self.parameters.number_of_zero_fills == 1:
    +323                factor = 1 / 2
    +324
    +325            else:
    +326                factor = int(1 / self.parameters.number_of_zero_fills + 1)
    +327
    +328            Max_index = int(len(A) / factor)
    +329
    +330        else:
    +331            Max_index = int(len(A))
    +332
    +333        A = A[0:Max_index]
    +334
    +335        datapoints = len(A)
    +336
    +337        freqdomain_X = self.calculate_frequency_domain(datapoints)
    +338
    +339        magnitude_Y = sqrt((power(A.real, 2)) + (power(A.imag, 2)))
    +340
    +341        freqdomain_X_cut, magnitude_Y_cut = self.cut_freq_domain(
    +342            freqdomain_X, magnitude_Y
    +343        )
    +344
    +345        del transient
    +346        # del freqdomain_X
    +347        # del magnitude_Y
    +348        gc.collect()
    +349
    +350        return freqdomain_X_cut, magnitude_Y_cut
    +351
    +352    def correct_dc_offset(self):
    +353        """[Not Yet Implemented] Correct the DC offset of the transient
    +354
    +355        A simple baseline correction to compensate for a DC offset in the recorded transient.
    +356        Not implemented.
    +357
    +358        """
    +359        pass
     
    @@ -445,341 +458,340 @@

    -
     11class TransientCalculations(object):
    - 12    """ Transient Calculations
    - 13    
    - 14    Parameters
    - 15    ----------
    - 16    parameters : corems.transient.parameters.TransientParameters
    - 17        The transient parameters
    - 18    bandwidth : float
    - 19        The bandwidth of the transient (Hz)
    - 20    number_data_points : int
    - 21        The number of data points of the transient
    - 22    exc_low_freq : float
    - 23        The low frequency of the excitation (Hz)
    - 24    exc_high_freq : float
    - 25        The high frequency of the excitation (Hz)
    - 26    
    - 27    Attributes
    - 28    ----------
    - 29    parameters : corems.transient.parameters.TransientParameters
    - 30        The transient parameters
    - 31    bandwidth : float
    - 32        The bandwidth of the transient (Hz)
    - 33    number_data_points : int
    - 34        The number of data points of the transient
    - 35    exc_low_freq : float
    - 36        The low frequency of the excitation (Hz)
    - 37    exc_high_freq : float
    - 38        The high frequency of the excitation (Hz)
    - 39    
    - 40    Methods
    - 41    -------
    - 42    * cal_transient_time().
    - 43        Calculate the time domain length of the transient
    - 44    * zero_fill(transient).
    - 45        Zero fill the transient
    - 46    * truncation(transient).
    - 47        Truncate the transient
    - 48    * apodization(transient).  
    - 49        Apodization of the transient
    - 50    * calculate_frequency_domain(number_data_points).
    - 51        Calculate the frequency domain (axis) of the transient
    - 52    * cut_freq_domain(freqdomain_X, freqdomain_Y).
    - 53        Cut the frequency domain of the transient
    - 54    * phase_and_absorption_mode_ft().
    - 55        [Not Functional] Produce a phased absorption mode FT spectrum
    - 56    * magnitude_mode_ft(transient).
    - 57        Perform magnitude mode FT of the transient
    - 58    * correct_dc_offset().
    - 59        [Not Yet Implemented] Correct the DC offset of the transient 
    - 60    
    - 61    """
    - 62
    - 63    
    - 64    def cal_transient_time(self):
    - 65        """ Calculate the time domain length of the transient 
    - 66        
    - 67        Returns
    - 68        -------
    - 69        float
    - 70            The time domain length of the transient (s)
    - 71        """
    - 72        return (1 / self.bandwidth) * ((self.number_data_points) / 2)
    - 73        
    - 74    def zero_fill(self, transient ):
    - 75        """ Zero fill the transient
    - 76        
    - 77        Parameters
    - 78        ----------
    - 79        transient : numpy.ndarray
    - 80            The transient data points
    - 81        
    +            
     27class TransientCalculations(object):
    + 28    """Transient Calculations
    + 29
    + 30    Parameters
    + 31    ----------
    + 32    parameters : corems.transient.parameters.TransientParameters
    + 33        The transient parameters
    + 34    bandwidth : float
    + 35        The bandwidth of the transient (Hz)
    + 36    number_data_points : int
    + 37        The number of data points of the transient
    + 38    exc_low_freq : float
    + 39        The low frequency of the excitation (Hz)
    + 40    exc_high_freq : float
    + 41        The high frequency of the excitation (Hz)
    + 42
    + 43    Attributes
    + 44    ----------
    + 45    parameters : corems.transient.parameters.TransientParameters
    + 46        The transient parameters
    + 47    bandwidth : float
    + 48        The bandwidth of the transient (Hz)
    + 49    number_data_points : int
    + 50        The number of data points of the transient
    + 51    exc_low_freq : float
    + 52        The low frequency of the excitation (Hz)
    + 53    exc_high_freq : float
    + 54        The high frequency of the excitation (Hz)
    + 55
    + 56    Methods
    + 57    -------
    + 58    * cal_transient_time().
    + 59        Calculate the time domain length of the transient
    + 60    * zero_fill(transient).
    + 61        Zero fill the transient
    + 62    * truncation(transient).
    + 63        Truncate the transient
    + 64    * apodization(transient).
    + 65        Apodization of the transient
    + 66    * calculate_frequency_domain(number_data_points).
    + 67        Calculate the frequency domain (axis) of the transient
    + 68    * cut_freq_domain(freqdomain_X, freqdomain_Y).
    + 69        Cut the frequency domain of the transient
    + 70    * phase_and_absorption_mode_ft().
    + 71        [Not Functional] Produce a phased absorption mode FT spectrum
    + 72    * magnitude_mode_ft(transient).
    + 73        Perform magnitude mode FT of the transient
    + 74    * correct_dc_offset().
    + 75        [Not Yet Implemented] Correct the DC offset of the transient
    + 76
    + 77    """
    + 78
    + 79    def cal_transient_time(self):
    + 80        """Calculate the time domain length of the transient
    + 81
      82        Returns
      83        -------
    - 84        numpy.ndarray
    - 85            The transient data points zerofilled
    - 86            
    - 87        Notes
    - 88        -----
    - 89        The number of zero fills is defined by the transient parameter number_of_zero_fills.
    - 90        The function first calculate the next power of two of the transient length and zero fills to that length, to take advantage of FFT algorithm.
    - 91            If the parameter next_power_of_two is set to False, the function will zero fill to the length of the original transient times the number of zero fills
    - 92
    - 93        """
    - 94        if self.parameters.next_power_of_two:
    - 95            exponent = int(ceil(log2(len(transient)*(self.parameters.number_of_zero_fills+1))))
    - 96            zeros_filled_transient = zeros(2**exponent)
    - 97        else:
    - 98            zeros_filled_transient = zeros(len(transient)*(self.parameters.number_of_zero_fills+1))
    - 99                
    -100        zeros_filled_transient[0:len(transient)] = transient    
    -101        
    -102        del transient
    -103        
    -104        gc.collect()
    -105        
    -106        return  zeros_filled_transient 
    -107    
    -108    def truncation(self, transient):
    -109        """ Truncate the transient
    -110        
    -111        Parameters
    -112        ----------
    -113        transient : numpy.ndarray
    -114            The transient data points
    -115        
    -116        Returns
    -117        -------
    -118        numpy.ndarray
    -119            The truncated transient data points
    -120            
    -121        Notes
    -122        -----
    -123        The number of truncations is defined by the transient parameter number_of_truncations
    -124        """
    -125        
    -126        data_count = len(transient)
    -127            
    -128        for _ in range(self.parameters.number_of_truncations):
    -129        
    -130            data_count = int(data_count / 2)
    -131         
    -132        time_domain_truncated = transient[0:data_count]
    -133        
    -134        del transient
    -135                
    -136        gc.collect()  
    -137        
    -138        return time_domain_truncated
    -139    
    -140    def apodization(self, transient):
    -141        """ Apodization of the transient
    -142
    -143        Parameters
    -144        ----------
    -145        transient : numpy.ndarray
    -146            The transient data points
    -147        
    -148        Returns
    -149        -------
    -150        numpy.ndarray
    -151            The apodized transient data points
    -152        
    -153        Notes
    -154        -----
    -155        The apodization method is defined by the transient parameter apodization_method.
    -156        The following apodization methods are available:
    -157            Hamming,
    -158            Hanning,
    -159            Blackman,
    -160            Full-Sine,
    -161            Half-Sine,
    -162            Kaiser,
    -163            Half-Kaiser.
    -164        
    -165        For Kaiser and Half-Kaiser, an additional parameter 'beta' is required, set by the transient parameter kaiser_beta.
    -166
    -167        """
    -168        
    -169        apodi_method = self.parameters.apodization_method
    -170        beta = self.parameters.kaiser_beta
    -171        
    -172        length = len(transient)
    -173            
    -174        if apodi_method == "Hamming":
    -175                H_function = hamming(length)
    -176        elif apodi_method == "Hanning":
    -177                H_function = hanning(length)
    -178        elif apodi_method == "Blackman":
    -179                H_function = blackman(length)
    -180        elif apodi_method == "Full-Sine":
    -181                H_function = sin(linspace(0,pi,num=length))
    -182        elif apodi_method == "Half-Sine":
    -183                H_function = sin(linspace((pi/2),0,num=length))
    -184        elif apodi_method == "Kaiser":
    -185                H_function = kaiser(length,beta)
    -186        elif apodi_method == "Half-Kaiser":
    -187                H_function = kaiser(length*2,beta)[length:]
    -188            
    -189        S_x = transient * H_function
    -190        
    -191        del transient
    -192        gc.collect()  
    -193            
    -194        return S_x
    -195    
    -196    def calculate_frequency_domain(self, number_data_points):
    -197        """ Calculate the frequency domain (axis) of the transient
    -198
    -199        Parameters
    -200        ----------
    -201        number_data_points : int
    -202            The number of data points of the transient
    -203        
    -204        Returns
    -205        -------
    -206        numpy.ndarray
    -207            The frequency domain of the transient (Hz)
    -208        
    -209        
    -210        """
    -211        
    -212        qntpoints = arange(0,(number_data_points))
    -213        
    -214        factor_distancy = (self.bandwidth)/(number_data_points)  
    -215                
    -216        frequency_domain = qntpoints * factor_distancy
    -217        
    -218        del qntpoints   
    -219        del factor_distancy
    -220        gc.collect()  
    -221        
    -222        return frequency_domain  
    -223    
    -224    def cut_freq_domain(self, freqdomain_X, freqdomain_Y):
    -225        """ Cut the frequency domain of the transient
    + 84        float
    + 85            The time domain length of the transient (s)
    + 86        """
    + 87        return (1 / self.bandwidth) * ((self.number_data_points) / 2)
    + 88
    + 89    def zero_fill(self, transient):
    + 90        """Zero fill the transient
    + 91
    + 92        Parameters
    + 93        ----------
    + 94        transient : numpy.ndarray
    + 95            The transient data points
    + 96
    + 97        Returns
    + 98        -------
    + 99        numpy.ndarray
    +100            The transient data points zerofilled
    +101
    +102        Notes
    +103        -----
    +104        The number of zero fills is defined by the transient parameter number_of_zero_fills.
    +105        The function first calculate the next power of two of the transient length and zero fills to that length, to take advantage of FFT algorithm.
    +106            If the parameter next_power_of_two is set to False, the function will zero fill to the length of the original transient times the number of zero fills
    +107
    +108        """
    +109        if self.parameters.next_power_of_two:
    +110            exponent = int(
    +111                ceil(log2(len(transient) * (self.parameters.number_of_zero_fills + 1)))
    +112            )
    +113            zeros_filled_transient = zeros(2**exponent)
    +114        else:
    +115            zeros_filled_transient = zeros(
    +116                len(transient) * (self.parameters.number_of_zero_fills + 1)
    +117            )
    +118
    +119        zeros_filled_transient[0 : len(transient)] = transient
    +120
    +121        del transient
    +122
    +123        gc.collect()
    +124
    +125        return zeros_filled_transient
    +126
    +127    def truncation(self, transient):
    +128        """Truncate the transient
    +129
    +130        Parameters
    +131        ----------
    +132        transient : numpy.ndarray
    +133            The transient data points
    +134
    +135        Returns
    +136        -------
    +137        numpy.ndarray
    +138            The truncated transient data points
    +139
    +140        Notes
    +141        -----
    +142        The number of truncations is defined by the transient parameter number_of_truncations
    +143        """
    +144
    +145        data_count = len(transient)
    +146
    +147        for _ in range(self.parameters.number_of_truncations):
    +148            data_count = int(data_count / 2)
    +149
    +150        time_domain_truncated = transient[0:data_count]
    +151
    +152        del transient
    +153
    +154        gc.collect()
    +155
    +156        return time_domain_truncated
    +157
    +158    def apodization(self, transient):
    +159        """Apodization of the transient
    +160
    +161        Parameters
    +162        ----------
    +163        transient : numpy.ndarray
    +164            The transient data points
    +165
    +166        Returns
    +167        -------
    +168        numpy.ndarray
    +169            The apodized transient data points
    +170
    +171        Notes
    +172        -----
    +173        The apodization method is defined by the transient parameter apodization_method.
    +174        The following apodization methods are available:
    +175            Hamming,
    +176            Hanning,
    +177            Blackman,
    +178            Full-Sine,
    +179            Half-Sine,
    +180            Kaiser,
    +181            Half-Kaiser.
    +182
    +183        For Kaiser and Half-Kaiser, an additional parameter 'beta' is required, set by the transient parameter kaiser_beta.
    +184
    +185        """
    +186
    +187        apodi_method = self.parameters.apodization_method
    +188        beta = self.parameters.kaiser_beta
    +189
    +190        length = len(transient)
    +191
    +192        if apodi_method == "Hamming":
    +193            H_function = hamming(length)
    +194        elif apodi_method == "Hanning":
    +195            H_function = hanning(length)
    +196        elif apodi_method == "Blackman":
    +197            H_function = blackman(length)
    +198        elif apodi_method == "Full-Sine":
    +199            H_function = sin(linspace(0, pi, num=length))
    +200        elif apodi_method == "Half-Sine":
    +201            H_function = sin(linspace((pi / 2), 0, num=length))
    +202        elif apodi_method == "Kaiser":
    +203            H_function = kaiser(length, beta)
    +204        elif apodi_method == "Half-Kaiser":
    +205            H_function = kaiser(length * 2, beta)[length:]
    +206
    +207        S_x = transient * H_function
    +208
    +209        del transient
    +210        gc.collect()
    +211
    +212        return S_x
    +213
    +214    def calculate_frequency_domain(self, number_data_points):
    +215        """Calculate the frequency domain (axis) of the transient
    +216
    +217        Parameters
    +218        ----------
    +219        number_data_points : int
    +220            The number of data points of the transient
    +221
    +222        Returns
    +223        -------
    +224        numpy.ndarray
    +225            The frequency domain of the transient (Hz)
     226
    -227        Parameters
    -228        ----------
    -229        freqdomain_X : numpy.ndarray
    -230            The frequency domain of the transient (Hz)
    -231        freqdomain_Y : numpy.ndarray
    -232            The frequency domain of the transient (Hz)
    -233        
    -234        Returns
    -235        -------
    -236        numpy.ndarray
    -237            The frequency domain of the transient (Hz)
    -238        numpy.ndarray
    -239            The frequency domain of the transient (Hz)
    -240        
    -241        
    -242        """
    -243        # If the mw_low and mw_high are set, the frequency domain is cut to the mw range
    -244        # this accounts for the detection settings, not the excitation settings.
    -245        # TODO: Implement this - right now the f to mz function is in the ms class, not the transient class, so it doesnt work.
    -246        #if (self._mw_low != 0) & (self._mw_high != 0):
    -247        #    high_freq = self._f_to_mz(self._mw_high)
    -248        #    low_freq = self._f_to_mz(self._mw_low)
    -249        #
    -250        #    final =  where(freqdomain_X < high_freq)[-1][-1]
    -251        #      start =  where(freqdomain_X > low_freq)[0][0]
    -252        #else:
    -253        if self._qpd_enabled == 1:
    -254            low_freq = self._exc_low_freq *2
    -255            high_freq = self._exc_high_freq *2
    -256        else:
    -257            low_freq = self._exc_low_freq
    -258            high_freq = self._exc_high_freq 
    +227
    +228        """
    +229
    +230        qntpoints = arange(0, (number_data_points))
    +231
    +232        factor_distancy = (self.bandwidth) / (number_data_points)
    +233
    +234        frequency_domain = qntpoints * factor_distancy
    +235
    +236        del qntpoints
    +237        del factor_distancy
    +238        gc.collect()
    +239
    +240        return frequency_domain
    +241
    +242    def cut_freq_domain(self, freqdomain_X, freqdomain_Y):
    +243        """Cut the frequency domain of the transient
    +244
    +245        Parameters
    +246        ----------
    +247        freqdomain_X : numpy.ndarray
    +248            The frequency domain of the transient (Hz)
    +249        freqdomain_Y : numpy.ndarray
    +250            The frequency domain of the transient (Hz)
    +251
    +252        Returns
    +253        -------
    +254        numpy.ndarray
    +255            The frequency domain of the transient (Hz)
    +256        numpy.ndarray
    +257            The frequency domain of the transient (Hz)
    +258
     259
    -260        if self._exc_low_freq > self._exc_high_freq:
    -261            # TODO: This needs to be tested
    -262            # I'm not sure that this is relevant anyway - the excitation pulse is ramped in frequency but the detection is simulatenous
    -263            warnings.warn("This is not tested. Please check the results.")
    -264            final =  where(freqdomain_X > low_freq)[0][0]
    -265            start =  where(freqdomain_X > high_freq)[0][0]
    -266
    -267        else:
    -268            
    -269            final =  where(freqdomain_X < high_freq)[-1][-1]
    -270            start =  where(freqdomain_X > low_freq)[0][0]
    -271        
    -272        
    -273        return freqdomain_X[start:final], freqdomain_Y[start:final]
    -274        #del freqdomain_X, freqdomain_Y
    -275        #gc.collect()
    -276    
    -277    def phase_and_absorption_mode_ft(self):
    -278        """ [Not Functional] Produce a phased absorption mode FT spectrum
    -279        
    -280        """
    -281        #anyone wants to play with this part please make yourself comfortable. I will:
    -282        pass 
    -283            
    -284    def perform_magniture_mode_ft(self, transient):
    -285        """ Perform magnitude mode FT of the transient
    -286
    -287        Parameters
    -288        ---------- 
    -289        transient : numpy.ndarray
    -290            The transient data points
    -291        
    -292        Returns
    -293        -------
    -294        numpy.ndarray
    -295            The frequency domain of the transient (Hz)
    -296        numpy.ndarray
    -297            The magnitude of the transient (a.u.)
    -298        
    -299        
    -300        """
    -301
    -302        
    -303        A = fft.rfft(transient)
    -304        
    -305        #A = fft.fft(transient)
    -306        #A = A[0:int(len(A)/2)]
    -307
    -308        factor = int(self.parameters.number_of_zero_fills-1)
    -309        if self.parameters.number_of_zero_fills:
    -310            if self.parameters.number_of_zero_fills == 1:
    -311                factor = 1/2
    -312                
    -313            else:
    -314                factor = int(1/self.parameters.number_of_zero_fills+1)
    -315                
    -316            Max_index = int(len(A)/factor)    
    -317        
    -318        else:
    -319            Max_index = int(len(A))
    -320        
    -321        A = A[0:Max_index]
    -322        
    -323        datapoints = len(A)
    -324        
    -325        freqdomain_X = self.calculate_frequency_domain(datapoints)
    -326        
    -327        magnitude_Y = sqrt((power(A.real,2)) + (power(A.imag,2)))
    -328        
    -329        freqdomain_X_cut, magnitude_Y_cut = self.cut_freq_domain(freqdomain_X, magnitude_Y)  
    -330        
    -331        del transient 
    -332        #del freqdomain_X
    -333        #del magnitude_Y
    -334        gc.collect()
    -335        
    -336        return freqdomain_X_cut, magnitude_Y_cut
    -337    
    -338    def correct_dc_offset(self):
    -339        """ [Not Yet Implemented] Correct the DC offset of the transient
    -340
    -341        A simple baseline correction to compensate for a DC offset in the recorded transient.
    -342        Not implemented.
    -343        
    -344        """
    -345        pass
    +260        """
    +261        # If the mw_low and mw_high are set, the frequency domain is cut to the mw range
    +262        # this accounts for the detection settings, not the excitation settings.
    +263        # TODO: Implement this - right now the f to mz function is in the ms class, not the transient class, so it doesnt work.
    +264        # if (self._mw_low != 0) & (self._mw_high != 0):
    +265        #    high_freq = self._f_to_mz(self._mw_high)
    +266        #    low_freq = self._f_to_mz(self._mw_low)
    +267        #
    +268        #    final =  where(freqdomain_X < high_freq)[-1][-1]
    +269        #      start =  where(freqdomain_X > low_freq)[0][0]
    +270        # else:
    +271        if self._qpd_enabled == 1:
    +272            low_freq = self._exc_low_freq * 2
    +273            high_freq = self._exc_high_freq * 2
    +274        else:
    +275            low_freq = self._exc_low_freq
    +276            high_freq = self._exc_high_freq
    +277
    +278        if self._exc_low_freq > self._exc_high_freq:
    +279            # TODO: This needs to be tested
    +280            # I'm not sure that this is relevant anyway - the excitation pulse is ramped in frequency but the detection is simulatenous
    +281            warnings.warn("This is not tested. Please check the results.")
    +282            final = where(freqdomain_X > low_freq)[0][0]
    +283            start = where(freqdomain_X > high_freq)[0][0]
    +284
    +285        else:
    +286            final = where(freqdomain_X < high_freq)[-1][-1]
    +287            start = where(freqdomain_X > low_freq)[0][0]
    +288
    +289        return freqdomain_X[start:final], freqdomain_Y[start:final]
    +290        # del freqdomain_X, freqdomain_Y
    +291        # gc.collect()
    +292
    +293    def phase_and_absorption_mode_ft(self):
    +294        """[Not Functional] Produce a phased absorption mode FT spectrum"""
    +295        # anyone wants to play with this part please make yourself comfortable. I will:
    +296        pass
    +297
    +298    def perform_magniture_mode_ft(self, transient):
    +299        """Perform magnitude mode FT of the transient
    +300
    +301        Parameters
    +302        ----------
    +303        transient : numpy.ndarray
    +304            The transient data points
    +305
    +306        Returns
    +307        -------
    +308        numpy.ndarray
    +309            The frequency domain of the transient (Hz)
    +310        numpy.ndarray
    +311            The magnitude of the transient (a.u.)
    +312
    +313
    +314        """
    +315
    +316        A = fft.rfft(transient)
    +317
    +318        # A = fft.fft(transient)
    +319        # A = A[0:int(len(A)/2)]
    +320
    +321        factor = int(self.parameters.number_of_zero_fills - 1)
    +322        if self.parameters.number_of_zero_fills:
    +323            if self.parameters.number_of_zero_fills == 1:
    +324                factor = 1 / 2
    +325
    +326            else:
    +327                factor = int(1 / self.parameters.number_of_zero_fills + 1)
    +328
    +329            Max_index = int(len(A) / factor)
    +330
    +331        else:
    +332            Max_index = int(len(A))
    +333
    +334        A = A[0:Max_index]
    +335
    +336        datapoints = len(A)
    +337
    +338        freqdomain_X = self.calculate_frequency_domain(datapoints)
    +339
    +340        magnitude_Y = sqrt((power(A.real, 2)) + (power(A.imag, 2)))
    +341
    +342        freqdomain_X_cut, magnitude_Y_cut = self.cut_freq_domain(
    +343            freqdomain_X, magnitude_Y
    +344        )
    +345
    +346        del transient
    +347        # del freqdomain_X
    +348        # del magnitude_Y
    +349        gc.collect()
    +350
    +351        return freqdomain_X_cut, magnitude_Y_cut
    +352
    +353    def correct_dc_offset(self):
    +354        """[Not Yet Implemented] Correct the DC offset of the transient
    +355
    +356        A simple baseline correction to compensate for a DC offset in the recorded transient.
    +357        Not implemented.
    +358
    +359        """
    +360        pass
     
    @@ -824,7 +836,7 @@
    Methods
    Zero fill the transient
  • truncation(transient). Truncate the transient
  • -
  • apodization(transient).
    +
  • apodization(transient). Apodization of the transient
  • calculate_frequency_domain(number_data_points). Calculate the frequency domain (axis) of the transient
  • @@ -851,19 +863,19 @@
    Methods
    -
    64    def cal_transient_time(self):
    -65        """ Calculate the time domain length of the transient 
    -66        
    -67        Returns
    -68        -------
    -69        float
    -70            The time domain length of the transient (s)
    -71        """
    -72        return (1 / self.bandwidth) * ((self.number_data_points) / 2)
    +            
    79    def cal_transient_time(self):
    +80        """Calculate the time domain length of the transient
    +81
    +82        Returns
    +83        -------
    +84        float
    +85            The time domain length of the transient (s)
    +86        """
    +87        return (1 / self.bandwidth) * ((self.number_data_points) / 2)
     
    -

    Calculate the time domain length of the transient

    +

    Calculate the time domain length of the transient

    Returns
    @@ -885,39 +897,43 @@
    Returns
    -
     74    def zero_fill(self, transient ):
    - 75        """ Zero fill the transient
    - 76        
    - 77        Parameters
    - 78        ----------
    - 79        transient : numpy.ndarray
    - 80            The transient data points
    - 81        
    - 82        Returns
    - 83        -------
    - 84        numpy.ndarray
    - 85            The transient data points zerofilled
    - 86            
    - 87        Notes
    - 88        -----
    - 89        The number of zero fills is defined by the transient parameter number_of_zero_fills.
    - 90        The function first calculate the next power of two of the transient length and zero fills to that length, to take advantage of FFT algorithm.
    - 91            If the parameter next_power_of_two is set to False, the function will zero fill to the length of the original transient times the number of zero fills
    - 92
    - 93        """
    - 94        if self.parameters.next_power_of_two:
    - 95            exponent = int(ceil(log2(len(transient)*(self.parameters.number_of_zero_fills+1))))
    - 96            zeros_filled_transient = zeros(2**exponent)
    - 97        else:
    - 98            zeros_filled_transient = zeros(len(transient)*(self.parameters.number_of_zero_fills+1))
    - 99                
    -100        zeros_filled_transient[0:len(transient)] = transient    
    -101        
    -102        del transient
    -103        
    -104        gc.collect()
    -105        
    -106        return  zeros_filled_transient 
    +            
     89    def zero_fill(self, transient):
    + 90        """Zero fill the transient
    + 91
    + 92        Parameters
    + 93        ----------
    + 94        transient : numpy.ndarray
    + 95            The transient data points
    + 96
    + 97        Returns
    + 98        -------
    + 99        numpy.ndarray
    +100            The transient data points zerofilled
    +101
    +102        Notes
    +103        -----
    +104        The number of zero fills is defined by the transient parameter number_of_zero_fills.
    +105        The function first calculate the next power of two of the transient length and zero fills to that length, to take advantage of FFT algorithm.
    +106            If the parameter next_power_of_two is set to False, the function will zero fill to the length of the original transient times the number of zero fills
    +107
    +108        """
    +109        if self.parameters.next_power_of_two:
    +110            exponent = int(
    +111                ceil(log2(len(transient) * (self.parameters.number_of_zero_fills + 1)))
    +112            )
    +113            zeros_filled_transient = zeros(2**exponent)
    +114        else:
    +115            zeros_filled_transient = zeros(
    +116                len(transient) * (self.parameters.number_of_zero_fills + 1)
    +117            )
    +118
    +119        zeros_filled_transient[0 : len(transient)] = transient
    +120
    +121        del transient
    +122
    +123        gc.collect()
    +124
    +125        return zeros_filled_transient
     
    @@ -956,37 +972,36 @@
    Notes
    -
    108    def truncation(self, transient):
    -109        """ Truncate the transient
    -110        
    -111        Parameters
    -112        ----------
    -113        transient : numpy.ndarray
    -114            The transient data points
    -115        
    -116        Returns
    -117        -------
    -118        numpy.ndarray
    -119            The truncated transient data points
    -120            
    -121        Notes
    -122        -----
    -123        The number of truncations is defined by the transient parameter number_of_truncations
    -124        """
    -125        
    -126        data_count = len(transient)
    -127            
    -128        for _ in range(self.parameters.number_of_truncations):
    -129        
    -130            data_count = int(data_count / 2)
    -131         
    -132        time_domain_truncated = transient[0:data_count]
    -133        
    -134        del transient
    -135                
    -136        gc.collect()  
    -137        
    -138        return time_domain_truncated
    +            
    127    def truncation(self, transient):
    +128        """Truncate the transient
    +129
    +130        Parameters
    +131        ----------
    +132        transient : numpy.ndarray
    +133            The transient data points
    +134
    +135        Returns
    +136        -------
    +137        numpy.ndarray
    +138            The truncated transient data points
    +139
    +140        Notes
    +141        -----
    +142        The number of truncations is defined by the transient parameter number_of_truncations
    +143        """
    +144
    +145        data_count = len(transient)
    +146
    +147        for _ in range(self.parameters.number_of_truncations):
    +148            data_count = int(data_count / 2)
    +149
    +150        time_domain_truncated = transient[0:data_count]
    +151
    +152        del transient
    +153
    +154        gc.collect()
    +155
    +156        return time_domain_truncated
     
    @@ -1023,61 +1038,61 @@
    Notes
    -
    140    def apodization(self, transient):
    -141        """ Apodization of the transient
    -142
    -143        Parameters
    -144        ----------
    -145        transient : numpy.ndarray
    -146            The transient data points
    -147        
    -148        Returns
    -149        -------
    -150        numpy.ndarray
    -151            The apodized transient data points
    -152        
    -153        Notes
    -154        -----
    -155        The apodization method is defined by the transient parameter apodization_method.
    -156        The following apodization methods are available:
    -157            Hamming,
    -158            Hanning,
    -159            Blackman,
    -160            Full-Sine,
    -161            Half-Sine,
    -162            Kaiser,
    -163            Half-Kaiser.
    -164        
    -165        For Kaiser and Half-Kaiser, an additional parameter 'beta' is required, set by the transient parameter kaiser_beta.
    -166
    -167        """
    -168        
    -169        apodi_method = self.parameters.apodization_method
    -170        beta = self.parameters.kaiser_beta
    -171        
    -172        length = len(transient)
    -173            
    -174        if apodi_method == "Hamming":
    -175                H_function = hamming(length)
    -176        elif apodi_method == "Hanning":
    -177                H_function = hanning(length)
    -178        elif apodi_method == "Blackman":
    -179                H_function = blackman(length)
    -180        elif apodi_method == "Full-Sine":
    -181                H_function = sin(linspace(0,pi,num=length))
    -182        elif apodi_method == "Half-Sine":
    -183                H_function = sin(linspace((pi/2),0,num=length))
    -184        elif apodi_method == "Kaiser":
    -185                H_function = kaiser(length,beta)
    -186        elif apodi_method == "Half-Kaiser":
    -187                H_function = kaiser(length*2,beta)[length:]
    -188            
    -189        S_x = transient * H_function
    -190        
    -191        del transient
    -192        gc.collect()  
    -193            
    -194        return S_x
    +            
    158    def apodization(self, transient):
    +159        """Apodization of the transient
    +160
    +161        Parameters
    +162        ----------
    +163        transient : numpy.ndarray
    +164            The transient data points
    +165
    +166        Returns
    +167        -------
    +168        numpy.ndarray
    +169            The apodized transient data points
    +170
    +171        Notes
    +172        -----
    +173        The apodization method is defined by the transient parameter apodization_method.
    +174        The following apodization methods are available:
    +175            Hamming,
    +176            Hanning,
    +177            Blackman,
    +178            Full-Sine,
    +179            Half-Sine,
    +180            Kaiser,
    +181            Half-Kaiser.
    +182
    +183        For Kaiser and Half-Kaiser, an additional parameter 'beta' is required, set by the transient parameter kaiser_beta.
    +184
    +185        """
    +186
    +187        apodi_method = self.parameters.apodization_method
    +188        beta = self.parameters.kaiser_beta
    +189
    +190        length = len(transient)
    +191
    +192        if apodi_method == "Hamming":
    +193            H_function = hamming(length)
    +194        elif apodi_method == "Hanning":
    +195            H_function = hanning(length)
    +196        elif apodi_method == "Blackman":
    +197            H_function = blackman(length)
    +198        elif apodi_method == "Full-Sine":
    +199            H_function = sin(linspace(0, pi, num=length))
    +200        elif apodi_method == "Half-Sine":
    +201            H_function = sin(linspace((pi / 2), 0, num=length))
    +202        elif apodi_method == "Kaiser":
    +203            H_function = kaiser(length, beta)
    +204        elif apodi_method == "Half-Kaiser":
    +205            H_function = kaiser(length * 2, beta)[length:]
    +206
    +207        S_x = transient * H_function
    +208
    +209        del transient
    +210        gc.collect()
    +211
    +212        return S_x
     
    @@ -1124,33 +1139,33 @@
    Notes
    -
    196    def calculate_frequency_domain(self, number_data_points):
    -197        """ Calculate the frequency domain (axis) of the transient
    -198
    -199        Parameters
    -200        ----------
    -201        number_data_points : int
    -202            The number of data points of the transient
    -203        
    -204        Returns
    -205        -------
    -206        numpy.ndarray
    -207            The frequency domain of the transient (Hz)
    -208        
    -209        
    -210        """
    -211        
    -212        qntpoints = arange(0,(number_data_points))
    -213        
    -214        factor_distancy = (self.bandwidth)/(number_data_points)  
    -215                
    -216        frequency_domain = qntpoints * factor_distancy
    -217        
    -218        del qntpoints   
    -219        del factor_distancy
    -220        gc.collect()  
    -221        
    -222        return frequency_domain  
    +            
    214    def calculate_frequency_domain(self, number_data_points):
    +215        """Calculate the frequency domain (axis) of the transient
    +216
    +217        Parameters
    +218        ----------
    +219        number_data_points : int
    +220            The number of data points of the transient
    +221
    +222        Returns
    +223        -------
    +224        numpy.ndarray
    +225            The frequency domain of the transient (Hz)
    +226
    +227
    +228        """
    +229
    +230        qntpoints = arange(0, (number_data_points))
    +231
    +232        factor_distancy = (self.bandwidth) / (number_data_points)
    +233
    +234        frequency_domain = qntpoints * factor_distancy
    +235
    +236        del qntpoints
    +237        del factor_distancy
    +238        gc.collect()
    +239
    +240        return frequency_domain
     
    @@ -1183,58 +1198,56 @@
    Returns
    -
    224    def cut_freq_domain(self, freqdomain_X, freqdomain_Y):
    -225        """ Cut the frequency domain of the transient
    -226
    -227        Parameters
    -228        ----------
    -229        freqdomain_X : numpy.ndarray
    -230            The frequency domain of the transient (Hz)
    -231        freqdomain_Y : numpy.ndarray
    -232            The frequency domain of the transient (Hz)
    -233        
    -234        Returns
    -235        -------
    -236        numpy.ndarray
    -237            The frequency domain of the transient (Hz)
    -238        numpy.ndarray
    -239            The frequency domain of the transient (Hz)
    -240        
    -241        
    -242        """
    -243        # If the mw_low and mw_high are set, the frequency domain is cut to the mw range
    -244        # this accounts for the detection settings, not the excitation settings.
    -245        # TODO: Implement this - right now the f to mz function is in the ms class, not the transient class, so it doesnt work.
    -246        #if (self._mw_low != 0) & (self._mw_high != 0):
    -247        #    high_freq = self._f_to_mz(self._mw_high)
    -248        #    low_freq = self._f_to_mz(self._mw_low)
    -249        #
    -250        #    final =  where(freqdomain_X < high_freq)[-1][-1]
    -251        #      start =  where(freqdomain_X > low_freq)[0][0]
    -252        #else:
    -253        if self._qpd_enabled == 1:
    -254            low_freq = self._exc_low_freq *2
    -255            high_freq = self._exc_high_freq *2
    -256        else:
    -257            low_freq = self._exc_low_freq
    -258            high_freq = self._exc_high_freq 
    +            
    242    def cut_freq_domain(self, freqdomain_X, freqdomain_Y):
    +243        """Cut the frequency domain of the transient
    +244
    +245        Parameters
    +246        ----------
    +247        freqdomain_X : numpy.ndarray
    +248            The frequency domain of the transient (Hz)
    +249        freqdomain_Y : numpy.ndarray
    +250            The frequency domain of the transient (Hz)
    +251
    +252        Returns
    +253        -------
    +254        numpy.ndarray
    +255            The frequency domain of the transient (Hz)
    +256        numpy.ndarray
    +257            The frequency domain of the transient (Hz)
    +258
     259
    -260        if self._exc_low_freq > self._exc_high_freq:
    -261            # TODO: This needs to be tested
    -262            # I'm not sure that this is relevant anyway - the excitation pulse is ramped in frequency but the detection is simulatenous
    -263            warnings.warn("This is not tested. Please check the results.")
    -264            final =  where(freqdomain_X > low_freq)[0][0]
    -265            start =  where(freqdomain_X > high_freq)[0][0]
    -266
    -267        else:
    -268            
    -269            final =  where(freqdomain_X < high_freq)[-1][-1]
    -270            start =  where(freqdomain_X > low_freq)[0][0]
    -271        
    -272        
    -273        return freqdomain_X[start:final], freqdomain_Y[start:final]
    -274        #del freqdomain_X, freqdomain_Y
    -275        #gc.collect()
    +260        """
    +261        # If the mw_low and mw_high are set, the frequency domain is cut to the mw range
    +262        # this accounts for the detection settings, not the excitation settings.
    +263        # TODO: Implement this - right now the f to mz function is in the ms class, not the transient class, so it doesnt work.
    +264        # if (self._mw_low != 0) & (self._mw_high != 0):
    +265        #    high_freq = self._f_to_mz(self._mw_high)
    +266        #    low_freq = self._f_to_mz(self._mw_low)
    +267        #
    +268        #    final =  where(freqdomain_X < high_freq)[-1][-1]
    +269        #      start =  where(freqdomain_X > low_freq)[0][0]
    +270        # else:
    +271        if self._qpd_enabled == 1:
    +272            low_freq = self._exc_low_freq * 2
    +273            high_freq = self._exc_high_freq * 2
    +274        else:
    +275            low_freq = self._exc_low_freq
    +276            high_freq = self._exc_high_freq
    +277
    +278        if self._exc_low_freq > self._exc_high_freq:
    +279            # TODO: This needs to be tested
    +280            # I'm not sure that this is relevant anyway - the excitation pulse is ramped in frequency but the detection is simulatenous
    +281            warnings.warn("This is not tested. Please check the results.")
    +282            final = where(freqdomain_X > low_freq)[0][0]
    +283            start = where(freqdomain_X > high_freq)[0][0]
    +284
    +285        else:
    +286            final = where(freqdomain_X < high_freq)[-1][-1]
    +287            start = where(freqdomain_X > low_freq)[0][0]
    +288
    +289        return freqdomain_X[start:final], freqdomain_Y[start:final]
    +290        # del freqdomain_X, freqdomain_Y
    +291        # gc.collect()
     
    @@ -1270,12 +1283,10 @@
    Returns
    -
    277    def phase_and_absorption_mode_ft(self):
    -278        """ [Not Functional] Produce a phased absorption mode FT spectrum
    -279        
    -280        """
    -281        #anyone wants to play with this part please make yourself comfortable. I will:
    -282        pass 
    +            
    293    def phase_and_absorption_mode_ft(self):
    +294        """[Not Functional] Produce a phased absorption mode FT spectrum"""
    +295        # anyone wants to play with this part please make yourself comfortable. I will:
    +296        pass
     
    @@ -1295,68 +1306,71 @@
    Returns
    -
    284    def perform_magniture_mode_ft(self, transient):
    -285        """ Perform magnitude mode FT of the transient
    -286
    -287        Parameters
    -288        ---------- 
    -289        transient : numpy.ndarray
    -290            The transient data points
    -291        
    -292        Returns
    -293        -------
    -294        numpy.ndarray
    -295            The frequency domain of the transient (Hz)
    -296        numpy.ndarray
    -297            The magnitude of the transient (a.u.)
    -298        
    -299        
    -300        """
    -301
    -302        
    -303        A = fft.rfft(transient)
    -304        
    -305        #A = fft.fft(transient)
    -306        #A = A[0:int(len(A)/2)]
    -307
    -308        factor = int(self.parameters.number_of_zero_fills-1)
    -309        if self.parameters.number_of_zero_fills:
    -310            if self.parameters.number_of_zero_fills == 1:
    -311                factor = 1/2
    -312                
    -313            else:
    -314                factor = int(1/self.parameters.number_of_zero_fills+1)
    -315                
    -316            Max_index = int(len(A)/factor)    
    -317        
    -318        else:
    -319            Max_index = int(len(A))
    -320        
    -321        A = A[0:Max_index]
    -322        
    -323        datapoints = len(A)
    -324        
    -325        freqdomain_X = self.calculate_frequency_domain(datapoints)
    -326        
    -327        magnitude_Y = sqrt((power(A.real,2)) + (power(A.imag,2)))
    -328        
    -329        freqdomain_X_cut, magnitude_Y_cut = self.cut_freq_domain(freqdomain_X, magnitude_Y)  
    -330        
    -331        del transient 
    -332        #del freqdomain_X
    -333        #del magnitude_Y
    -334        gc.collect()
    -335        
    -336        return freqdomain_X_cut, magnitude_Y_cut
    +            
    298    def perform_magniture_mode_ft(self, transient):
    +299        """Perform magnitude mode FT of the transient
    +300
    +301        Parameters
    +302        ----------
    +303        transient : numpy.ndarray
    +304            The transient data points
    +305
    +306        Returns
    +307        -------
    +308        numpy.ndarray
    +309            The frequency domain of the transient (Hz)
    +310        numpy.ndarray
    +311            The magnitude of the transient (a.u.)
    +312
    +313
    +314        """
    +315
    +316        A = fft.rfft(transient)
    +317
    +318        # A = fft.fft(transient)
    +319        # A = A[0:int(len(A)/2)]
    +320
    +321        factor = int(self.parameters.number_of_zero_fills - 1)
    +322        if self.parameters.number_of_zero_fills:
    +323            if self.parameters.number_of_zero_fills == 1:
    +324                factor = 1 / 2
    +325
    +326            else:
    +327                factor = int(1 / self.parameters.number_of_zero_fills + 1)
    +328
    +329            Max_index = int(len(A) / factor)
    +330
    +331        else:
    +332            Max_index = int(len(A))
    +333
    +334        A = A[0:Max_index]
    +335
    +336        datapoints = len(A)
    +337
    +338        freqdomain_X = self.calculate_frequency_domain(datapoints)
    +339
    +340        magnitude_Y = sqrt((power(A.real, 2)) + (power(A.imag, 2)))
    +341
    +342        freqdomain_X_cut, magnitude_Y_cut = self.cut_freq_domain(
    +343            freqdomain_X, magnitude_Y
    +344        )
    +345
    +346        del transient
    +347        # del freqdomain_X
    +348        # del magnitude_Y
    +349        gc.collect()
    +350
    +351        return freqdomain_X_cut, magnitude_Y_cut
     

    Perform magnitude mode FT of the transient

    -

    Parameters

    +
    Parameters
    -

    transient : numpy.ndarray - The transient data points

    +
      +
    • transient (numpy.ndarray): +The transient data points
    • +
    Returns
    @@ -1379,14 +1393,14 @@
    Returns
    -
    338    def correct_dc_offset(self):
    -339        """ [Not Yet Implemented] Correct the DC offset of the transient
    -340
    -341        A simple baseline correction to compensate for a DC offset in the recorded transient.
    -342        Not implemented.
    -343        
    -344        """
    -345        pass
    +            
    353    def correct_dc_offset(self):
    +354        """[Not Yet Implemented] Correct the DC offset of the transient
    +355
    +356        A simple baseline correction to compensate for a DC offset in the recorded transient.
    +357        Not implemented.
    +358
    +359        """
    +360        pass
     
    diff --git a/docs/corems/transient/factory/TransientClasses.html b/docs/corems/transient/factory/TransientClasses.html index b16749b9..65c027bf 100644 --- a/docs/corems/transient/factory/TransientClasses.html +++ b/docs/corems/transient/factory/TransientClasses.html @@ -127,452 +127,453 @@

    8from corems.transient.calc.TransientCalc import TransientCalculations 9import matplotlib.pyplot as plt 10from copy import deepcopy - 11from corems.encapsulation.input.parameter_from_json import load_and_set_parameters_class, load_and_set_toml_parameters_class - 12 - 13 - 14__author__ = "Yuri E. Corilo" - 15__date__ = "Jun 19, 2019" + 11from corems.encapsulation.input.parameter_from_json import ( + 12 load_and_set_parameters_class, + 13 load_and_set_toml_parameters_class, + 14) + 15 16 - 17 - 18class Transient(TransientCalculations): - 19 """ The Transient object contains the transient data and the parameters used to process it - 20 - 21 Parameters - 22 ---------- - 23 data : numpy.ndarray - 24 Array with the transient data - 25 d_params : dict - 26 Dictionary with the parameters to be set - 27 - 28 Attributes - 29 ---------- - 30 calibration_terms : tuple - 31 Tuple with the calibration terms (A, B, C) - 32 bandwidth : float - 33 The bandwidth of the transient (Hz) - 34 number_data_points : int - 35 The number of data points of the transient - 36 polarity : int - 37 The polarity of the transient - 38 transient_time : float - 39 The time domain length of the transient - 40 d_params : dict - 41 Dictionary with the parameters to be set - 42 frequency_domain : numpy.ndarray - 43 Array with the frequency domain - 44 magnitude : numpy.ndarray - 45 Array with the magnitude - 46 _full_filename_path : str - 47 The full path of the transient file - 48 _exc_high_freq : float - 49 The high frequency of the excitation (Hz) - 50 _exc_low_freq : float - 51 The low frequency of the excitation (Hz) - 52 _parameters : corems.transient.parameters.TransientParameters - 53 The transient parameters - 54 _transient_data : numpy.ndarray - 55 Array with the transient data - 56 - 57 - 58 Methods - 59 ------- - 60 * get_frequency_domain(plot_result=True). - 61 Get the frequency domain and magnitude from the transient data - 62 * get_mass_spectrum(auto_process=True, plot_result=True, keep_profile=True). - 63 Get the mass spectrum from the transient data - 64 * set_processing_parameter(apodization_method, number_of_truncations, number_of_zero_fills). - 65 Set the processing parameters - 66 * scale_plot_size(factor=1.5). - 67 Scale the plot size by a factor - 68 * plot_transient(ax=None, c='k'). - 69 Plot the transient data - 70 * plot_zerofilled_transient(ax=None, c='k'). - 71 Plot the transient data with zero fill - 72 * plot_apodized_transient(ax=None, c='k'). - 73 Plot the transient data with apodization - 74 * plot_frequency_domain(ax=None, c='k'). - 75 Plot the frequency domain and magnitude - 76 * set_parameter_from_toml(parameters_path). - 77 Set the processing parameters from a toml file - 78 * set_parameter_from_json(parameters_path). - 79 Set the processing parameters from a json file - 80 - 81 - 82 - 83 """ - 84 - 85 - 86 def __init__(self, data, d_params): + 17__author__ = "Yuri E. Corilo" + 18__date__ = "Jun 19, 2019" + 19 + 20 + 21class Transient(TransientCalculations): + 22 """The Transient object contains the transient data and the parameters used to process it + 23 + 24 Parameters + 25 ---------- + 26 data : numpy.ndarray + 27 Array with the transient data + 28 d_params : dict + 29 Dictionary with the parameters to be set + 30 + 31 Attributes + 32 ---------- + 33 calibration_terms : tuple + 34 Tuple with the calibration terms (A, B, C) + 35 bandwidth : float + 36 The bandwidth of the transient (Hz) + 37 number_data_points : int + 38 The number of data points of the transient + 39 polarity : int + 40 The polarity of the transient + 41 transient_time : float + 42 The time domain length of the transient + 43 d_params : dict + 44 Dictionary with the parameters to be set + 45 frequency_domain : numpy.ndarray + 46 Array with the frequency domain + 47 magnitude : numpy.ndarray + 48 Array with the magnitude + 49 _full_filename_path : str + 50 The full path of the transient file + 51 _exc_high_freq : float + 52 The high frequency of the excitation (Hz) + 53 _exc_low_freq : float + 54 The low frequency of the excitation (Hz) + 55 _parameters : corems.transient.parameters.TransientParameters + 56 The transient parameters + 57 _transient_data : numpy.ndarray + 58 Array with the transient data + 59 + 60 + 61 Methods + 62 ------- + 63 * get_frequency_domain(plot_result=True). + 64 Get the frequency domain and magnitude from the transient data + 65 * get_mass_spectrum(auto_process=True, plot_result=True, keep_profile=True). + 66 Get the mass spectrum from the transient data + 67 * set_processing_parameter(apodization_method, number_of_truncations, number_of_zero_fills). + 68 Set the processing parameters + 69 * scale_plot_size(factor=1.5). + 70 Scale the plot size by a factor + 71 * plot_transient(ax=None, c='k'). + 72 Plot the transient data + 73 * plot_zerofilled_transient(ax=None, c='k'). + 74 Plot the transient data with zero fill + 75 * plot_apodized_transient(ax=None, c='k'). + 76 Plot the transient data with apodization + 77 * plot_frequency_domain(ax=None, c='k'). + 78 Plot the frequency domain and magnitude + 79 * set_parameter_from_toml(parameters_path). + 80 Set the processing parameters from a toml file + 81 * set_parameter_from_json(parameters_path). + 82 Set the processing parameters from a json file + 83 + 84 + 85 + 86 """ 87 - 88 self._transient_data = data - 89 - 90 self.d_params = d_params - 91 - 92 self.frequency_domain = None - 93 - 94 self.magnitude = None - 95 - 96 self.__set__parameters__objects(d_params) - 97 - 98 self.__set__transient__time() - 99 -100 def __set__parameters__objects(self, d_params): -101 """ Set the parameters objects from the dictionary d_params -102 -103 Parameters -104 ---------- -105 d_params : dict -106 Dictionary with the parameters to be set -107 -108 """ -109 -110 self._full_filename_path = d_params.get("filename_path") -111 -112 self.calibration_terms = ( -113 d_params.get("Aterm"), -114 d_params.get("Bterm"), -115 d_params.get("Cterm"), -116 ) -117 -118 self._exc_high_freq = d_params.get("exc_high_freq") -119 -120 self._exc_low_freq = d_params.get("exc_low_freq") -121 -122 self._qpd_enabled = d_params.get("qpd_enabled") #Quadrupolar detection enabled -123 -124 self._mw_low = d_params.get("mw_low") # low mass for detection -125 -126 self._mw_high = d_params.get("mw_high") # high mass for detection -127 -128 self.bandwidth = d_params.get("bandwidth") -129 -130 self.number_data_points = d_params.get("number_data_points") -131 -132 self.polarity = int(d_params.get("polarity")) -133 -134 self.location = 220 -135 -136 self._parameters = deepcopy(MSParameters.transient) -137 -138 def scale_plot_size(self, factor=1.5): -139 """Scale the plot size by a factor -140 -141 Parameters -142 ---------- -143 factor : float, optional -144 The factor to scale the plot size, by default 1.5 -145 """ -146 -147 default_dpi = rcParamsDefault["figure.dpi"] -148 rcParams["figure.dpi"] = default_dpi * factor -149 -150 def __set__transient__time(self): -151 """ Set the transient time variable with the calculated length.""" -152 self.transient_time = self.cal_transient_time() -153 -154 def set_processing_parameter(self, apodization_method: str, number_of_truncations: int, number_of_zero_fills: int): -155 """ Set the processing parameters -156 -157 Parameters -158 ---------- -159 apodization_method : str -160 Apodization method to be used -161 number_of_truncations : int -162 Number of truncations to be used -163 number_of_zero_fills : int -164 Number of zero fills to be used -165 """ -166 -167 self.parameters.apodization_method = apodization_method -168 -169 self.parameters.number_of_truncations = number_of_truncations -170 -171 self.parameters.number_of_zero_fills = number_of_zero_fills + 88 def __init__(self, data, d_params): + 89 self._transient_data = data + 90 + 91 self.d_params = d_params + 92 + 93 self.frequency_domain = None + 94 + 95 self.magnitude = None + 96 + 97 self.__set__parameters__objects(d_params) + 98 + 99 self.__set__transient__time() +100 +101 def __set__parameters__objects(self, d_params): +102 """Set the parameters objects from the dictionary d_params +103 +104 Parameters +105 ---------- +106 d_params : dict +107 Dictionary with the parameters to be set +108 +109 """ +110 +111 self._full_filename_path = d_params.get("filename_path") +112 +113 self.calibration_terms = ( +114 d_params.get("Aterm"), +115 d_params.get("Bterm"), +116 d_params.get("Cterm"), +117 ) +118 +119 self._exc_high_freq = d_params.get("exc_high_freq") +120 +121 self._exc_low_freq = d_params.get("exc_low_freq") +122 +123 self._qpd_enabled = d_params.get("qpd_enabled") # Quadrupolar detection enabled +124 +125 self._mw_low = d_params.get("mw_low") # low mass for detection +126 +127 self._mw_high = d_params.get("mw_high") # high mass for detection +128 +129 self.bandwidth = d_params.get("bandwidth") +130 +131 self.number_data_points = d_params.get("number_data_points") +132 +133 self.polarity = int(d_params.get("polarity")) +134 +135 self.location = 220 +136 +137 self._parameters = deepcopy(MSParameters.transient) +138 +139 def scale_plot_size(self, factor=1.5): +140 """Scale the plot size by a factor +141 +142 Parameters +143 ---------- +144 factor : float, optional +145 The factor to scale the plot size, by default 1.5 +146 """ +147 +148 default_dpi = rcParamsDefault["figure.dpi"] +149 rcParams["figure.dpi"] = default_dpi * factor +150 +151 def __set__transient__time(self): +152 """Set the transient time variable with the calculated length.""" +153 self.transient_time = self.cal_transient_time() +154 +155 def set_processing_parameter( +156 self, +157 apodization_method: str, +158 number_of_truncations: int, +159 number_of_zero_fills: int, +160 ): +161 """Set the processing parameters +162 +163 Parameters +164 ---------- +165 apodization_method : str +166 Apodization method to be used +167 number_of_truncations : int +168 Number of truncations to be used +169 number_of_zero_fills : int +170 Number of zero fills to be used +171 """ 172 -173 @property -174 def parameters(self): -175 """ The transient parameters""" -176 return self._parameters -177 -178 @parameters.setter -179 def parameters(self, instance_TransientParameters): -180 self._parameters = instance_TransientParameters -181 -182 def set_parameter_from_toml(self, parameters_path): -183 """ Set the processing parameters from a toml file -184 """ -185 self._parameters = load_and_set_toml_parameters_class('Transient', self._parameters, parameters_path=parameters_path) -186 -187 def set_parameter_from_json(self, parameters_path): -188 """ Set the processing parameters from a json file -189 """ -190 self._parameters = load_and_set_parameters_class('Transient', self._parameters, parameters_path=parameters_path) -191 -192 def get_frequency_domain(self, plot_result=True): -193 """ Get the frequency domain and magnitude from the transient data -194 -195 Parameters -196 ---------- -197 plot_result : bool, optional -198 Plot the frequency domain and magnitude, by default True -199 -200 Returns -201 ------- -202 frequency_domain : numpy.ndarray -203 Array with the frequency domain -204 magnitude : numpy.ndarray -205 Array with the magnitude -206 """ +173 self.parameters.apodization_method = apodization_method +174 +175 self.parameters.number_of_truncations = number_of_truncations +176 +177 self.parameters.number_of_zero_fills = number_of_zero_fills +178 +179 @property +180 def parameters(self): +181 """The transient parameters""" +182 return self._parameters +183 +184 @parameters.setter +185 def parameters(self, instance_TransientParameters): +186 self._parameters = instance_TransientParameters +187 +188 def set_parameter_from_toml(self, parameters_path): +189 """Set the processing parameters from a toml file""" +190 self._parameters = load_and_set_toml_parameters_class( +191 "Transient", self._parameters, parameters_path=parameters_path +192 ) +193 +194 def set_parameter_from_json(self, parameters_path): +195 """Set the processing parameters from a json file""" +196 self._parameters = load_and_set_parameters_class( +197 "Transient", self._parameters, parameters_path=parameters_path +198 ) +199 +200 def get_frequency_domain(self, plot_result=True): +201 """Get the frequency domain and magnitude from the transient data +202 +203 Parameters +204 ---------- +205 plot_result : bool, optional +206 Plot the frequency domain and magnitude, by default True 207 -208 -209 if self.parameters.number_of_truncations > 0: -210 -211 new_time_domain = self.truncation(self._transient_data) -212 -213 else: -214 -215 new_time_domain = self._transient_data -216 -217 if self.parameters.apodization_method is not None: +208 Returns +209 ------- +210 frequency_domain : numpy.ndarray +211 Array with the frequency domain +212 magnitude : numpy.ndarray +213 Array with the magnitude +214 """ +215 +216 if self.parameters.number_of_truncations > 0: +217 new_time_domain = self.truncation(self._transient_data) 218 -219 new_time_domain = self.apodization(new_time_domain) -220 -221 if plot_result: -222 -223 self._plot_transient(self._transient_data) +219 else: +220 new_time_domain = self._transient_data +221 +222 if self.parameters.apodization_method is not None: +223 new_time_domain = self.apodization(new_time_domain) 224 -225 self._plot_transient(new_time_domain) -226 -227 time_domain_y_zero_filled = self.zero_fill(new_time_domain) -228 -229 self.transient_time = self.transient_time * ( -230 self.parameters.number_of_zero_fills + 1 -231 ) -232 -233 if plot_result: -234 -235 self._plot_transient(time_domain_y_zero_filled) -236 -237 return self.perform_magniture_mode_ft(time_domain_y_zero_filled) -238 # return frequency_domain, magnitude -239 -240 def get_mass_spectrum(self, auto_process : bool=True, plot_result : bool=True, -241 keep_profile : bool=True) -> MassSpecfromFreq: -242 -243 """ Get the mass spectrum from the transient data -244 -245 Parameters -246 ---------- -247 auto_process : bool, optional -248 Process the transient data, by default True -249 plot_result : bool, optional -250 Plot the frequency domain and magnitude, by default True -251 keep_profile : bool, optional -252 Keep the profile data, by default True -253 -254 Returns -255 ------- -256 MassSpecfromFreq -257 Mass spectrum object -258 """ -259 -260 frequency_domain, magnitude = self.get_frequency_domain(plot_result=plot_result) -261 -262 if plot_result: -263 -264 self._plot_frequency_domain(frequency_domain, magnitude) -265 -266 self.d_params["filename"] = self.filename -267 self.d_params["dir_location"] = self.dir_location -268 -269 -270 return MassSpecfromFreq( -271 frequency_domain, magnitude, self.d_params, -272 auto_process=auto_process, keep_profile=keep_profile) -273 -274 @property -275 def filename(self): -276 -277 # return dirname(self._full_filename_path) -278 return basename(normpath(self._full_filename_path)) -279 -280 @property -281 def dir_location(self): -282 -283 return dirname(self._full_filename_path).strip( -284 basename(normpath(self._full_filename_path)) -285 ) -286 -287 @property -288 def A_therm(self): -289 -290 return self.calibration_terms[0] +225 if plot_result: +226 self._plot_transient(self._transient_data) +227 +228 self._plot_transient(new_time_domain) +229 +230 time_domain_y_zero_filled = self.zero_fill(new_time_domain) +231 +232 self.transient_time = self.transient_time * ( +233 self.parameters.number_of_zero_fills + 1 +234 ) +235 +236 if plot_result: +237 self._plot_transient(time_domain_y_zero_filled) +238 +239 return self.perform_magniture_mode_ft(time_domain_y_zero_filled) +240 # return frequency_domain, magnitude +241 +242 def get_mass_spectrum( +243 self, +244 auto_process: bool = True, +245 plot_result: bool = True, +246 keep_profile: bool = True, +247 ) -> MassSpecfromFreq: +248 """Get the mass spectrum from the transient data +249 +250 Parameters +251 ---------- +252 auto_process : bool, optional +253 Process the transient data, by default True +254 plot_result : bool, optional +255 Plot the frequency domain and magnitude, by default True +256 keep_profile : bool, optional +257 Keep the profile data, by default True +258 +259 Returns +260 ------- +261 MassSpecfromFreq +262 Mass spectrum object +263 """ +264 +265 frequency_domain, magnitude = self.get_frequency_domain(plot_result=plot_result) +266 +267 if plot_result: +268 self._plot_frequency_domain(frequency_domain, magnitude) +269 +270 self.d_params["filename"] = self.filename +271 self.d_params["dir_location"] = self.dir_location +272 +273 return MassSpecfromFreq( +274 frequency_domain, +275 magnitude, +276 self.d_params, +277 auto_process=auto_process, +278 keep_profile=keep_profile, +279 ) +280 +281 @property +282 def filename(self): +283 # return dirname(self._full_filename_path) +284 return basename(normpath(self._full_filename_path)) +285 +286 @property +287 def dir_location(self): +288 return dirname(self._full_filename_path).strip( +289 basename(normpath(self._full_filename_path)) +290 ) 291 292 @property -293 def B_therm(self): -294 -295 return self.calibration_terms[1] -296 -297 @property -298 def C_therm(self): +293 def A_therm(self): +294 return self.calibration_terms[0] +295 +296 @property +297 def B_therm(self): +298 return self.calibration_terms[1] 299 -300 return self.calibration_terms[2] -301 -302 def _plot_frequency_domain(self, frequency_domain, magnitude): # pragma: no cover -303 """ Plot the frequency domain and magnitude -304 -305 Parameters -306 ---------- -307 frequency_domain : numpy.ndarray -308 Array with the frequency domain -309 magnitude : numpy.ndarray -310 Array with the magnitude -311 """ -312 -313 self.location += 1 -314 plt.subplot(self.location) -315 plt.plot(frequency_domain, magnitude, color="green") -316 plt.xlabel("Hz") -317 plt.ylabel("Magnitude") -318 # reset grid location index to 0 -319 self.location = 220 -320 # plt.show() -321 -322 def _plot_transient(self, transient_data): # pragma: no cover -323 """ Plot the transient data -324 -325 Parameters -326 ---------- -327 transient_data : numpy.ndarray -328 Array with the transient data -329 -330 """ +300 @property +301 def C_therm(self): +302 return self.calibration_terms[2] +303 +304 def _plot_frequency_domain(self, frequency_domain, magnitude): # pragma: no cover +305 """Plot the frequency domain and magnitude +306 +307 Parameters +308 ---------- +309 frequency_domain : numpy.ndarray +310 Array with the frequency domain +311 magnitude : numpy.ndarray +312 Array with the magnitude +313 """ +314 +315 self.location += 1 +316 plt.subplot(self.location) +317 plt.plot(frequency_domain, magnitude, color="green") +318 plt.xlabel("Hz") +319 plt.ylabel("Magnitude") +320 # reset grid location index to 0 +321 self.location = 220 +322 # plt.show() +323 +324 def _plot_transient(self, transient_data): # pragma: no cover +325 """Plot the transient data +326 +327 Parameters +328 ---------- +329 transient_data : numpy.ndarray +330 Array with the transient data 331 -332 self.location += 1 -333 # print( self.location) -334 time_axis = linspace(0, self.transient_time, num=len(transient_data)) -335 plt.subplot(self.location) -336 plt.plot(time_axis, transient_data, color="green") -337 plt.xlabel("Time (s)") -338 plt.ylabel("Magnitude") -339 # plt.show() -340 -341 def plot_transient(self, ax=None, c='k'): # pragma: no cover -342 """ Plot the transient data -343 -344 Parameters -345 ---------- -346 ax : matplotlib.axes, optional -347 Matplotlib axes object, by default None -348 c : str, optional -349 Color, by default 'k' -350 -351 Returns -352 ------- -353 matplotlib.axes -354 Matplotlib axes object -355 -356 """ +332 """ +333 +334 self.location += 1 +335 # print( self.location) +336 time_axis = linspace(0, self.transient_time, num=len(transient_data)) +337 plt.subplot(self.location) +338 plt.plot(time_axis, transient_data, color="green") +339 plt.xlabel("Time (s)") +340 plt.ylabel("Magnitude") +341 # plt.show() +342 +343 def plot_transient(self, ax=None, c="k"): # pragma: no cover +344 """Plot the transient data +345 +346 Parameters +347 ---------- +348 ax : matplotlib.axes, optional +349 Matplotlib axes object, by default None +350 c : str, optional +351 Color, by default 'k' +352 +353 Returns +354 ------- +355 matplotlib.axes +356 Matplotlib axes object 357 -358 # self.location +=1 -359 # print( self.location) -360 if ax is None: -361 ax = plt.gca() -362 time_axis = linspace(0, self.transient_time, num=len(self._transient_data)) -363 # plt.subplot(self.location) -364 ax.plot(time_axis, self._transient_data, color=c) -365 plt.xlabel("Time (s)") -366 plt.ylabel("Magnitude") -367 # plt.show() -368 return ax -369 -370 def plot_zerofilled_transient(self, ax=None, c='k'): # pragma: no cover -371 """ Plot the transient data with zero fill -372 -373 Parameters -374 ---------- -375 ax : matplotlib.axes, optional -376 Matplotlib axes object, by default None -377 c : str, optional -378 Color, by default 'k' -379 -380 Returns -381 ------- -382 matplotlib.axes -383 Matplotlib axes object -384 -385 """ -386 if ax is None: -387 ax = plt.gca() -388 new_time_domain = self.apodization(self._transient_data) -389 time_domain_y_zero_filled = self.zero_fill(new_time_domain) -390 self.transient_time = self.transient_time * ( -391 self.parameters.number_of_zero_fills + 1 -392 ) -393 time_axis = linspace(0, self.transient_time, num=len(time_domain_y_zero_filled)) -394 # plt.subplot(self.location) -395 ax.plot(time_axis, time_domain_y_zero_filled, color=c) -396 plt.xlabel("Time (s)") -397 plt.ylabel("Magnitude") -398 # plt.show() -399 return ax -400 -401 def plot_apodized_transient(self, ax=None, c='k'): # pragma: no cover -402 """ Plot the transient data with apodization -403 -404 Parameters -405 ---------- -406 ax : matplotlib.axes, optional -407 Matplotlib axes object, by default None -408 c : str, optional -409 Color, by default 'k' -410 -411 Returns -412 ------- -413 matplotlib.axes -414 Matplotlib axes object -415 -416 """ -417 # self.location +=1 -418 # print( self.location) -419 if ax is None: -420 ax = plt.gca() -421 new_time_domain = self.apodization(self._transient_data) -422 time_axis = linspace(0, self.transient_time, num=len(new_time_domain)) -423 # plt.subplot(self.location) -424 ax.plot(time_axis, new_time_domain, color=c) -425 plt.xlabel("Time (s)") -426 plt.ylabel("Magnitude") -427 # plt.show() -428 return ax -429 -430 -431 def plot_frequency_domain(self, ax=None, c='k'): # pragma: no cover -432 """ Plot the frequency domain and magnitude -433 -434 Parameters -435 ---------- -436 ax : matplotlib.axes, optional -437 Matplotlib axes object, by default None -438 c : str, optional -439 Color, by default 'k' -440 -441 Returns -442 ------- -443 matplotlib.axes -444 Matplotlib axes object -445 -446 """ -447 # self.location +=1 -448 # plt.subplot(self.location) -449 if ax is None: -450 ax = plt.gca() -451 frequency_domain, magnitude = self.get_frequency_domain(plot_result=False) -452 ax.plot(frequency_domain / 1000, magnitude, color=c) -453 plt.xlabel("KHz") -454 plt.ylabel("Magnitude") -455 # plt.show() -456 return ax +358 """ +359 +360 # self.location +=1 +361 # print( self.location) +362 if ax is None: +363 ax = plt.gca() +364 time_axis = linspace(0, self.transient_time, num=len(self._transient_data)) +365 # plt.subplot(self.location) +366 ax.plot(time_axis, self._transient_data, color=c) +367 plt.xlabel("Time (s)") +368 plt.ylabel("Magnitude") +369 # plt.show() +370 return ax +371 +372 def plot_zerofilled_transient(self, ax=None, c="k"): # pragma: no cover +373 """Plot the transient data with zero fill +374 +375 Parameters +376 ---------- +377 ax : matplotlib.axes, optional +378 Matplotlib axes object, by default None +379 c : str, optional +380 Color, by default 'k' +381 +382 Returns +383 ------- +384 matplotlib.axes +385 Matplotlib axes object +386 +387 """ +388 if ax is None: +389 ax = plt.gca() +390 new_time_domain = self.apodization(self._transient_data) +391 time_domain_y_zero_filled = self.zero_fill(new_time_domain) +392 self.transient_time = self.transient_time * ( +393 self.parameters.number_of_zero_fills + 1 +394 ) +395 time_axis = linspace(0, self.transient_time, num=len(time_domain_y_zero_filled)) +396 # plt.subplot(self.location) +397 ax.plot(time_axis, time_domain_y_zero_filled, color=c) +398 plt.xlabel("Time (s)") +399 plt.ylabel("Magnitude") +400 # plt.show() +401 return ax +402 +403 def plot_apodized_transient(self, ax=None, c="k"): # pragma: no cover +404 """Plot the transient data with apodization +405 +406 Parameters +407 ---------- +408 ax : matplotlib.axes, optional +409 Matplotlib axes object, by default None +410 c : str, optional +411 Color, by default 'k' +412 +413 Returns +414 ------- +415 matplotlib.axes +416 Matplotlib axes object +417 +418 """ +419 # self.location +=1 +420 # print( self.location) +421 if ax is None: +422 ax = plt.gca() +423 new_time_domain = self.apodization(self._transient_data) +424 time_axis = linspace(0, self.transient_time, num=len(new_time_domain)) +425 # plt.subplot(self.location) +426 ax.plot(time_axis, new_time_domain, color=c) +427 plt.xlabel("Time (s)") +428 plt.ylabel("Magnitude") +429 # plt.show() +430 return ax +431 +432 def plot_frequency_domain(self, ax=None, c="k"): # pragma: no cover +433 """Plot the frequency domain and magnitude +434 +435 Parameters +436 ---------- +437 ax : matplotlib.axes, optional +438 Matplotlib axes object, by default None +439 c : str, optional +440 Color, by default 'k' +441 +442 Returns +443 ------- +444 matplotlib.axes +445 Matplotlib axes object +446 +447 """ +448 # self.location +=1 +449 # plt.subplot(self.location) +450 if ax is None: +451 ax = plt.gca() +452 frequency_domain, magnitude = self.get_frequency_domain(plot_result=False) +453 ax.plot(frequency_domain / 1000, magnitude, color=c) +454 plt.xlabel("KHz") +455 plt.ylabel("Magnitude") +456 # plt.show() +457 return ax

    @@ -588,445 +589,443 @@

    -
     19class Transient(TransientCalculations):
    - 20    """ The Transient object contains the transient data and the parameters used to process it
    - 21   
    - 22    Parameters
    - 23    ----------
    - 24    data : numpy.ndarray
    - 25        Array with the transient data
    - 26    d_params : dict
    - 27        Dictionary with the parameters to be set
    - 28    
    - 29    Attributes
    - 30    ----------
    - 31    calibration_terms : tuple
    - 32        Tuple with the calibration terms (A, B, C)
    - 33    bandwidth : float
    - 34        The bandwidth of the transient (Hz)
    - 35    number_data_points : int
    - 36        The number of data points of the transient
    - 37    polarity : int
    - 38        The polarity of the transient     
    - 39    transient_time : float
    - 40        The time domain length of the transient
    - 41    d_params : dict
    - 42        Dictionary with the parameters to be set
    - 43    frequency_domain : numpy.ndarray
    - 44        Array with the frequency domain
    - 45    magnitude : numpy.ndarray
    - 46        Array with the magnitude
    - 47    _full_filename_path : str
    - 48        The full path of the transient file
    - 49    _exc_high_freq : float  
    - 50        The high frequency of the excitation (Hz)
    - 51    _exc_low_freq : float
    - 52        The low frequency of the excitation (Hz)
    - 53    _parameters : corems.transient.parameters.TransientParameters
    - 54        The transient parameters
    - 55    _transient_data : numpy.ndarray
    - 56        Array with the transient data
    - 57
    - 58
    - 59    Methods
    - 60    -------
    - 61    * get_frequency_domain(plot_result=True).
    - 62        Get the frequency domain and magnitude from the transient data
    - 63    * get_mass_spectrum(auto_process=True, plot_result=True, keep_profile=True).   
    - 64        Get the mass spectrum from the transient data
    - 65    * set_processing_parameter(apodization_method, number_of_truncations, number_of_zero_fills).   
    - 66        Set the processing parameters
    - 67    * scale_plot_size(factor=1.5).
    - 68        Scale the plot size by a factor
    - 69    * plot_transient(ax=None, c='k').
    - 70        Plot the transient data
    - 71    * plot_zerofilled_transient(ax=None, c='k').
    - 72        Plot the transient data with zero fill
    - 73    * plot_apodized_transient(ax=None, c='k').
    - 74        Plot the transient data with apodization
    - 75    * plot_frequency_domain(ax=None, c='k').
    - 76        Plot the frequency domain and magnitude
    - 77    * set_parameter_from_toml(parameters_path).
    - 78        Set the processing parameters from a toml file
    - 79    * set_parameter_from_json(parameters_path).
    - 80        Set the processing parameters from a json file
    - 81    
    - 82
    - 83    
    - 84    """
    - 85    
    - 86    
    - 87    def __init__(self, data, d_params):
    +            
     22class Transient(TransientCalculations):
    + 23    """The Transient object contains the transient data and the parameters used to process it
    + 24
    + 25    Parameters
    + 26    ----------
    + 27    data : numpy.ndarray
    + 28        Array with the transient data
    + 29    d_params : dict
    + 30        Dictionary with the parameters to be set
    + 31
    + 32    Attributes
    + 33    ----------
    + 34    calibration_terms : tuple
    + 35        Tuple with the calibration terms (A, B, C)
    + 36    bandwidth : float
    + 37        The bandwidth of the transient (Hz)
    + 38    number_data_points : int
    + 39        The number of data points of the transient
    + 40    polarity : int
    + 41        The polarity of the transient
    + 42    transient_time : float
    + 43        The time domain length of the transient
    + 44    d_params : dict
    + 45        Dictionary with the parameters to be set
    + 46    frequency_domain : numpy.ndarray
    + 47        Array with the frequency domain
    + 48    magnitude : numpy.ndarray
    + 49        Array with the magnitude
    + 50    _full_filename_path : str
    + 51        The full path of the transient file
    + 52    _exc_high_freq : float
    + 53        The high frequency of the excitation (Hz)
    + 54    _exc_low_freq : float
    + 55        The low frequency of the excitation (Hz)
    + 56    _parameters : corems.transient.parameters.TransientParameters
    + 57        The transient parameters
    + 58    _transient_data : numpy.ndarray
    + 59        Array with the transient data
    + 60
    + 61
    + 62    Methods
    + 63    -------
    + 64    * get_frequency_domain(plot_result=True).
    + 65        Get the frequency domain and magnitude from the transient data
    + 66    * get_mass_spectrum(auto_process=True, plot_result=True, keep_profile=True).
    + 67        Get the mass spectrum from the transient data
    + 68    * set_processing_parameter(apodization_method, number_of_truncations, number_of_zero_fills).
    + 69        Set the processing parameters
    + 70    * scale_plot_size(factor=1.5).
    + 71        Scale the plot size by a factor
    + 72    * plot_transient(ax=None, c='k').
    + 73        Plot the transient data
    + 74    * plot_zerofilled_transient(ax=None, c='k').
    + 75        Plot the transient data with zero fill
    + 76    * plot_apodized_transient(ax=None, c='k').
    + 77        Plot the transient data with apodization
    + 78    * plot_frequency_domain(ax=None, c='k').
    + 79        Plot the frequency domain and magnitude
    + 80    * set_parameter_from_toml(parameters_path).
    + 81        Set the processing parameters from a toml file
    + 82    * set_parameter_from_json(parameters_path).
    + 83        Set the processing parameters from a json file
    + 84
    + 85
    + 86
    + 87    """
      88
    - 89        self._transient_data = data
    - 90
    - 91        self.d_params = d_params
    - 92
    - 93        self.frequency_domain = None
    - 94
    - 95        self.magnitude = None
    - 96
    - 97        self.__set__parameters__objects(d_params)
    - 98
    - 99        self.__set__transient__time()
    -100
    -101    def __set__parameters__objects(self, d_params):
    -102        """ Set the parameters objects from the dictionary d_params
    -103
    -104        Parameters
    -105        ----------
    -106        d_params : dict
    -107            Dictionary with the parameters to be set
    -108        
    -109        """
    -110
    -111        self._full_filename_path = d_params.get("filename_path")
    -112
    -113        self.calibration_terms = (
    -114            d_params.get("Aterm"),
    -115            d_params.get("Bterm"),
    -116            d_params.get("Cterm"),
    -117        )
    -118
    -119        self._exc_high_freq = d_params.get("exc_high_freq")
    -120
    -121        self._exc_low_freq = d_params.get("exc_low_freq")
    -122
    -123        self._qpd_enabled = d_params.get("qpd_enabled") #Quadrupolar detection enabled
    -124
    -125        self._mw_low = d_params.get("mw_low") # low mass for detection
    -126
    -127        self._mw_high = d_params.get("mw_high") # high mass for detection
    -128
    -129        self.bandwidth = d_params.get("bandwidth")
    -130
    -131        self.number_data_points = d_params.get("number_data_points")
    -132
    -133        self.polarity = int(d_params.get("polarity"))
    -134
    -135        self.location = 220
    -136
    -137        self._parameters = deepcopy(MSParameters.transient)
    -138
    -139    def scale_plot_size(self, factor=1.5):
    -140        """Scale the plot size by a factor
    -141
    -142        Parameters
    -143        ----------
    -144        factor : float, optional
    -145            The factor to scale the plot size, by default 1.5
    -146        """
    -147        
    -148        default_dpi = rcParamsDefault["figure.dpi"]
    -149        rcParams["figure.dpi"] = default_dpi * factor
    -150
    -151    def __set__transient__time(self):
    -152        """ Set the transient time variable with the calculated length."""
    -153        self.transient_time = self.cal_transient_time()
    -154
    -155    def set_processing_parameter(self, apodization_method: str, number_of_truncations: int, number_of_zero_fills: int):
    -156        """ Set the processing parameters
    -157
    -158        Parameters
    -159        ----------
    -160        apodization_method : str
    -161            Apodization method to be used
    -162        number_of_truncations : int
    -163            Number of truncations to be used
    -164        number_of_zero_fills : int
    -165            Number of zero fills to be used
    -166        """
    -167
    -168        self.parameters.apodization_method = apodization_method
    -169
    -170        self.parameters.number_of_truncations = number_of_truncations
    -171
    -172        self.parameters.number_of_zero_fills = number_of_zero_fills
    + 89    def __init__(self, data, d_params):
    + 90        self._transient_data = data
    + 91
    + 92        self.d_params = d_params
    + 93
    + 94        self.frequency_domain = None
    + 95
    + 96        self.magnitude = None
    + 97
    + 98        self.__set__parameters__objects(d_params)
    + 99
    +100        self.__set__transient__time()
    +101
    +102    def __set__parameters__objects(self, d_params):
    +103        """Set the parameters objects from the dictionary d_params
    +104
    +105        Parameters
    +106        ----------
    +107        d_params : dict
    +108            Dictionary with the parameters to be set
    +109
    +110        """
    +111
    +112        self._full_filename_path = d_params.get("filename_path")
    +113
    +114        self.calibration_terms = (
    +115            d_params.get("Aterm"),
    +116            d_params.get("Bterm"),
    +117            d_params.get("Cterm"),
    +118        )
    +119
    +120        self._exc_high_freq = d_params.get("exc_high_freq")
    +121
    +122        self._exc_low_freq = d_params.get("exc_low_freq")
    +123
    +124        self._qpd_enabled = d_params.get("qpd_enabled")  # Quadrupolar detection enabled
    +125
    +126        self._mw_low = d_params.get("mw_low")  # low mass for detection
    +127
    +128        self._mw_high = d_params.get("mw_high")  # high mass for detection
    +129
    +130        self.bandwidth = d_params.get("bandwidth")
    +131
    +132        self.number_data_points = d_params.get("number_data_points")
    +133
    +134        self.polarity = int(d_params.get("polarity"))
    +135
    +136        self.location = 220
    +137
    +138        self._parameters = deepcopy(MSParameters.transient)
    +139
    +140    def scale_plot_size(self, factor=1.5):
    +141        """Scale the plot size by a factor
    +142
    +143        Parameters
    +144        ----------
    +145        factor : float, optional
    +146            The factor to scale the plot size, by default 1.5
    +147        """
    +148
    +149        default_dpi = rcParamsDefault["figure.dpi"]
    +150        rcParams["figure.dpi"] = default_dpi * factor
    +151
    +152    def __set__transient__time(self):
    +153        """Set the transient time variable with the calculated length."""
    +154        self.transient_time = self.cal_transient_time()
    +155
    +156    def set_processing_parameter(
    +157        self,
    +158        apodization_method: str,
    +159        number_of_truncations: int,
    +160        number_of_zero_fills: int,
    +161    ):
    +162        """Set the processing parameters
    +163
    +164        Parameters
    +165        ----------
    +166        apodization_method : str
    +167            Apodization method to be used
    +168        number_of_truncations : int
    +169            Number of truncations to be used
    +170        number_of_zero_fills : int
    +171            Number of zero fills to be used
    +172        """
     173
    -174    @property
    -175    def parameters(self):
    -176        """ The transient parameters"""
    -177        return self._parameters
    -178
    -179    @parameters.setter
    -180    def parameters(self, instance_TransientParameters):
    -181        self._parameters = instance_TransientParameters
    -182    
    -183    def set_parameter_from_toml(self, parameters_path):
    -184        """ Set the processing parameters from a toml file
    -185        """
    -186        self._parameters = load_and_set_toml_parameters_class('Transient', self._parameters, parameters_path=parameters_path)
    -187      
    -188    def set_parameter_from_json(self, parameters_path):
    -189        """ Set the processing parameters from a json file
    -190        """
    -191        self._parameters = load_and_set_parameters_class('Transient', self._parameters, parameters_path=parameters_path)
    -192    
    -193    def get_frequency_domain(self, plot_result=True):
    -194        """ Get the frequency domain and magnitude from the transient data
    -195
    -196        Parameters
    -197        ----------
    -198        plot_result : bool, optional
    -199            Plot the frequency domain and magnitude, by default True
    -200        
    -201        Returns
    -202        -------
    -203        frequency_domain : numpy.ndarray
    -204            Array with the frequency domain
    -205        magnitude : numpy.ndarray
    -206            Array with the magnitude
    -207        """
    +174        self.parameters.apodization_method = apodization_method
    +175
    +176        self.parameters.number_of_truncations = number_of_truncations
    +177
    +178        self.parameters.number_of_zero_fills = number_of_zero_fills
    +179
    +180    @property
    +181    def parameters(self):
    +182        """The transient parameters"""
    +183        return self._parameters
    +184
    +185    @parameters.setter
    +186    def parameters(self, instance_TransientParameters):
    +187        self._parameters = instance_TransientParameters
    +188
    +189    def set_parameter_from_toml(self, parameters_path):
    +190        """Set the processing parameters from a toml file"""
    +191        self._parameters = load_and_set_toml_parameters_class(
    +192            "Transient", self._parameters, parameters_path=parameters_path
    +193        )
    +194
    +195    def set_parameter_from_json(self, parameters_path):
    +196        """Set the processing parameters from a json file"""
    +197        self._parameters = load_and_set_parameters_class(
    +198            "Transient", self._parameters, parameters_path=parameters_path
    +199        )
    +200
    +201    def get_frequency_domain(self, plot_result=True):
    +202        """Get the frequency domain and magnitude from the transient data
    +203
    +204        Parameters
    +205        ----------
    +206        plot_result : bool, optional
    +207            Plot the frequency domain and magnitude, by default True
     208
    -209
    -210        if self.parameters.number_of_truncations > 0:
    -211
    -212            new_time_domain = self.truncation(self._transient_data)
    -213
    -214        else:
    -215
    -216            new_time_domain = self._transient_data
    -217
    -218        if self.parameters.apodization_method is not None:
    +209        Returns
    +210        -------
    +211        frequency_domain : numpy.ndarray
    +212            Array with the frequency domain
    +213        magnitude : numpy.ndarray
    +214            Array with the magnitude
    +215        """
    +216
    +217        if self.parameters.number_of_truncations > 0:
    +218            new_time_domain = self.truncation(self._transient_data)
     219
    -220            new_time_domain = self.apodization(new_time_domain)
    -221
    -222        if plot_result:
    -223
    -224            self._plot_transient(self._transient_data)
    +220        else:
    +221            new_time_domain = self._transient_data
    +222
    +223        if self.parameters.apodization_method is not None:
    +224            new_time_domain = self.apodization(new_time_domain)
     225
    -226            self._plot_transient(new_time_domain)
    -227
    -228        time_domain_y_zero_filled = self.zero_fill(new_time_domain)
    -229
    -230        self.transient_time = self.transient_time * (
    -231            self.parameters.number_of_zero_fills + 1
    -232        )
    -233
    -234        if plot_result:
    -235
    -236            self._plot_transient(time_domain_y_zero_filled)
    -237
    -238        return self.perform_magniture_mode_ft(time_domain_y_zero_filled)
    -239        # return frequency_domain, magnitude
    -240
    -241    def get_mass_spectrum(self, auto_process : bool=True, plot_result : bool=True,
    -242                         keep_profile : bool=True) -> MassSpecfromFreq:
    -243
    -244        """ Get the mass spectrum from the transient data
    -245
    -246        Parameters
    -247        ----------
    -248        auto_process : bool, optional
    -249            Process the transient data, by default True
    -250        plot_result : bool, optional
    -251            Plot the frequency domain and magnitude, by default True
    -252        keep_profile : bool, optional
    -253            Keep the profile data, by default True
    -254        
    -255        Returns
    -256        -------
    -257        MassSpecfromFreq
    -258            Mass spectrum object
    -259        """
    -260
    -261        frequency_domain, magnitude = self.get_frequency_domain(plot_result=plot_result)
    -262
    -263        if plot_result:
    -264
    -265            self._plot_frequency_domain(frequency_domain, magnitude)
    -266
    -267        self.d_params["filename"] = self.filename
    -268        self.d_params["dir_location"] = self.dir_location
    -269        
    -270        
    -271        return MassSpecfromFreq(
    -272            frequency_domain, magnitude, self.d_params, 
    -273            auto_process=auto_process, keep_profile=keep_profile)
    -274
    -275    @property
    -276    def filename(self):
    -277
    -278        # return dirname(self._full_filename_path)
    -279        return basename(normpath(self._full_filename_path))
    -280
    -281    @property
    -282    def dir_location(self):
    -283
    -284        return dirname(self._full_filename_path).strip(
    -285            basename(normpath(self._full_filename_path))
    -286        )
    -287
    -288    @property
    -289    def A_therm(self):
    -290
    -291        return self.calibration_terms[0]
    +226        if plot_result:
    +227            self._plot_transient(self._transient_data)
    +228
    +229            self._plot_transient(new_time_domain)
    +230
    +231        time_domain_y_zero_filled = self.zero_fill(new_time_domain)
    +232
    +233        self.transient_time = self.transient_time * (
    +234            self.parameters.number_of_zero_fills + 1
    +235        )
    +236
    +237        if plot_result:
    +238            self._plot_transient(time_domain_y_zero_filled)
    +239
    +240        return self.perform_magniture_mode_ft(time_domain_y_zero_filled)
    +241        # return frequency_domain, magnitude
    +242
    +243    def get_mass_spectrum(
    +244        self,
    +245        auto_process: bool = True,
    +246        plot_result: bool = True,
    +247        keep_profile: bool = True,
    +248    ) -> MassSpecfromFreq:
    +249        """Get the mass spectrum from the transient data
    +250
    +251        Parameters
    +252        ----------
    +253        auto_process : bool, optional
    +254            Process the transient data, by default True
    +255        plot_result : bool, optional
    +256            Plot the frequency domain and magnitude, by default True
    +257        keep_profile : bool, optional
    +258            Keep the profile data, by default True
    +259
    +260        Returns
    +261        -------
    +262        MassSpecfromFreq
    +263            Mass spectrum object
    +264        """
    +265
    +266        frequency_domain, magnitude = self.get_frequency_domain(plot_result=plot_result)
    +267
    +268        if plot_result:
    +269            self._plot_frequency_domain(frequency_domain, magnitude)
    +270
    +271        self.d_params["filename"] = self.filename
    +272        self.d_params["dir_location"] = self.dir_location
    +273
    +274        return MassSpecfromFreq(
    +275            frequency_domain,
    +276            magnitude,
    +277            self.d_params,
    +278            auto_process=auto_process,
    +279            keep_profile=keep_profile,
    +280        )
    +281
    +282    @property
    +283    def filename(self):
    +284        # return dirname(self._full_filename_path)
    +285        return basename(normpath(self._full_filename_path))
    +286
    +287    @property
    +288    def dir_location(self):
    +289        return dirname(self._full_filename_path).strip(
    +290            basename(normpath(self._full_filename_path))
    +291        )
     292
     293    @property
    -294    def B_therm(self):
    -295
    -296        return self.calibration_terms[1]
    -297
    -298    @property
    -299    def C_therm(self):
    +294    def A_therm(self):
    +295        return self.calibration_terms[0]
    +296
    +297    @property
    +298    def B_therm(self):
    +299        return self.calibration_terms[1]
     300
    -301        return self.calibration_terms[2]
    -302
    -303    def _plot_frequency_domain(self, frequency_domain, magnitude): # pragma: no cover
    -304        """ Plot the frequency domain and magnitude
    -305        
    -306        Parameters
    -307        ----------
    -308        frequency_domain : numpy.ndarray
    -309            Array with the frequency domain
    -310        magnitude : numpy.ndarray
    -311            Array with the magnitude
    -312        """
    -313
    -314        self.location += 1
    -315        plt.subplot(self.location)
    -316        plt.plot(frequency_domain, magnitude, color="green")
    -317        plt.xlabel("Hz")
    -318        plt.ylabel("Magnitude")
    -319        # reset grid location index to 0
    -320        self.location = 220
    -321        # plt.show()
    -322
    -323    def _plot_transient(self, transient_data): # pragma: no cover
    -324        """ Plot the transient data
    -325
    -326        Parameters
    -327        ----------
    -328        transient_data : numpy.ndarray
    -329            Array with the transient data
    -330        
    -331        """
    +301    @property
    +302    def C_therm(self):
    +303        return self.calibration_terms[2]
    +304
    +305    def _plot_frequency_domain(self, frequency_domain, magnitude):  # pragma: no cover
    +306        """Plot the frequency domain and magnitude
    +307
    +308        Parameters
    +309        ----------
    +310        frequency_domain : numpy.ndarray
    +311            Array with the frequency domain
    +312        magnitude : numpy.ndarray
    +313            Array with the magnitude
    +314        """
    +315
    +316        self.location += 1
    +317        plt.subplot(self.location)
    +318        plt.plot(frequency_domain, magnitude, color="green")
    +319        plt.xlabel("Hz")
    +320        plt.ylabel("Magnitude")
    +321        # reset grid location index to 0
    +322        self.location = 220
    +323        # plt.show()
    +324
    +325    def _plot_transient(self, transient_data):  # pragma: no cover
    +326        """Plot the transient data
    +327
    +328        Parameters
    +329        ----------
    +330        transient_data : numpy.ndarray
    +331            Array with the transient data
     332
    -333        self.location += 1
    -334        # print( self.location)
    -335        time_axis = linspace(0, self.transient_time, num=len(transient_data))
    -336        plt.subplot(self.location)
    -337        plt.plot(time_axis, transient_data, color="green")
    -338        plt.xlabel("Time (s)")
    -339        plt.ylabel("Magnitude")
    -340        # plt.show()
    -341
    -342    def plot_transient(self, ax=None, c='k'): # pragma: no cover
    -343        """ Plot the transient data
    -344        
    -345        Parameters
    -346        ----------
    -347        ax : matplotlib.axes, optional
    -348            Matplotlib axes object, by default None
    -349        c : str, optional
    -350            Color, by default 'k'
    -351
    -352        Returns
    -353        -------
    -354        matplotlib.axes
    -355            Matplotlib axes object
    -356        
    -357        """
    +333        """
    +334
    +335        self.location += 1
    +336        # print( self.location)
    +337        time_axis = linspace(0, self.transient_time, num=len(transient_data))
    +338        plt.subplot(self.location)
    +339        plt.plot(time_axis, transient_data, color="green")
    +340        plt.xlabel("Time (s)")
    +341        plt.ylabel("Magnitude")
    +342        # plt.show()
    +343
    +344    def plot_transient(self, ax=None, c="k"):  # pragma: no cover
    +345        """Plot the transient data
    +346
    +347        Parameters
    +348        ----------
    +349        ax : matplotlib.axes, optional
    +350            Matplotlib axes object, by default None
    +351        c : str, optional
    +352            Color, by default 'k'
    +353
    +354        Returns
    +355        -------
    +356        matplotlib.axes
    +357            Matplotlib axes object
     358
    -359        # self.location +=1
    -360        # print( self.location)
    -361        if ax is None:
    -362            ax = plt.gca()
    -363        time_axis = linspace(0, self.transient_time, num=len(self._transient_data))
    -364        # plt.subplot(self.location)
    -365        ax.plot(time_axis, self._transient_data, color=c)
    -366        plt.xlabel("Time (s)")
    -367        plt.ylabel("Magnitude")
    -368        # plt.show()
    -369        return ax
    -370
    -371    def plot_zerofilled_transient(self, ax=None, c='k'): # pragma: no cover
    -372        """ Plot the transient data with zero fill
    -373        
    -374        Parameters
    -375        ----------
    -376        ax : matplotlib.axes, optional
    -377            Matplotlib axes object, by default None
    -378        c : str, optional
    -379            Color, by default 'k'
    -380
    -381        Returns
    -382        -------
    -383        matplotlib.axes
    -384            Matplotlib axes object
    -385        
    -386        """
    -387        if ax is None:
    -388            ax = plt.gca()
    -389        new_time_domain = self.apodization(self._transient_data)
    -390        time_domain_y_zero_filled = self.zero_fill(new_time_domain)
    -391        self.transient_time = self.transient_time * (
    -392            self.parameters.number_of_zero_fills + 1
    -393        )
    -394        time_axis = linspace(0, self.transient_time, num=len(time_domain_y_zero_filled))
    -395        # plt.subplot(self.location)
    -396        ax.plot(time_axis, time_domain_y_zero_filled, color=c)
    -397        plt.xlabel("Time (s)")
    -398        plt.ylabel("Magnitude")
    -399        # plt.show()
    -400        return ax
    -401
    -402    def plot_apodized_transient(self, ax=None, c='k'):  # pragma: no cover
    -403        """ Plot the transient data with apodization
    -404
    -405        Parameters
    -406        ----------
    -407        ax : matplotlib.axes, optional
    -408            Matplotlib axes object, by default None
    -409        c : str, optional
    -410            Color, by default 'k'
    -411
    -412        Returns
    -413        -------
    -414        matplotlib.axes
    -415            Matplotlib axes object
    -416        
    -417        """
    -418        # self.location +=1
    -419        # print( self.location)
    -420        if ax is None:
    -421            ax = plt.gca()
    -422        new_time_domain = self.apodization(self._transient_data)
    -423        time_axis = linspace(0, self.transient_time, num=len(new_time_domain))
    -424        # plt.subplot(self.location)
    -425        ax.plot(time_axis, new_time_domain, color=c)
    -426        plt.xlabel("Time (s)")
    -427        plt.ylabel("Magnitude")
    -428        # plt.show()
    -429        return ax
    -430
    -431
    -432    def plot_frequency_domain(self, ax=None, c='k'):  # pragma: no cover
    -433        """ Plot the frequency domain and magnitude
    -434        
    -435        Parameters
    -436        ----------
    -437        ax : matplotlib.axes, optional
    -438            Matplotlib axes object, by default None
    -439        c : str, optional
    -440            Color, by default 'k'
    -441
    -442        Returns
    -443        -------
    -444        matplotlib.axes
    -445            Matplotlib axes object
    -446        
    -447        """
    -448        # self.location +=1
    -449        # plt.subplot(self.location)
    -450        if ax is None:
    -451            ax = plt.gca()
    -452        frequency_domain, magnitude = self.get_frequency_domain(plot_result=False)
    -453        ax.plot(frequency_domain / 1000, magnitude, color=c)
    -454        plt.xlabel("KHz")
    -455        plt.ylabel("Magnitude")
    -456        # plt.show()
    -457        return ax
    +359        """
    +360
    +361        # self.location +=1
    +362        # print( self.location)
    +363        if ax is None:
    +364            ax = plt.gca()
    +365        time_axis = linspace(0, self.transient_time, num=len(self._transient_data))
    +366        # plt.subplot(self.location)
    +367        ax.plot(time_axis, self._transient_data, color=c)
    +368        plt.xlabel("Time (s)")
    +369        plt.ylabel("Magnitude")
    +370        # plt.show()
    +371        return ax
    +372
    +373    def plot_zerofilled_transient(self, ax=None, c="k"):  # pragma: no cover
    +374        """Plot the transient data with zero fill
    +375
    +376        Parameters
    +377        ----------
    +378        ax : matplotlib.axes, optional
    +379            Matplotlib axes object, by default None
    +380        c : str, optional
    +381            Color, by default 'k'
    +382
    +383        Returns
    +384        -------
    +385        matplotlib.axes
    +386            Matplotlib axes object
    +387
    +388        """
    +389        if ax is None:
    +390            ax = plt.gca()
    +391        new_time_domain = self.apodization(self._transient_data)
    +392        time_domain_y_zero_filled = self.zero_fill(new_time_domain)
    +393        self.transient_time = self.transient_time * (
    +394            self.parameters.number_of_zero_fills + 1
    +395        )
    +396        time_axis = linspace(0, self.transient_time, num=len(time_domain_y_zero_filled))
    +397        # plt.subplot(self.location)
    +398        ax.plot(time_axis, time_domain_y_zero_filled, color=c)
    +399        plt.xlabel("Time (s)")
    +400        plt.ylabel("Magnitude")
    +401        # plt.show()
    +402        return ax
    +403
    +404    def plot_apodized_transient(self, ax=None, c="k"):  # pragma: no cover
    +405        """Plot the transient data with apodization
    +406
    +407        Parameters
    +408        ----------
    +409        ax : matplotlib.axes, optional
    +410            Matplotlib axes object, by default None
    +411        c : str, optional
    +412            Color, by default 'k'
    +413
    +414        Returns
    +415        -------
    +416        matplotlib.axes
    +417            Matplotlib axes object
    +418
    +419        """
    +420        # self.location +=1
    +421        # print( self.location)
    +422        if ax is None:
    +423            ax = plt.gca()
    +424        new_time_domain = self.apodization(self._transient_data)
    +425        time_axis = linspace(0, self.transient_time, num=len(new_time_domain))
    +426        # plt.subplot(self.location)
    +427        ax.plot(time_axis, new_time_domain, color=c)
    +428        plt.xlabel("Time (s)")
    +429        plt.ylabel("Magnitude")
    +430        # plt.show()
    +431        return ax
    +432
    +433    def plot_frequency_domain(self, ax=None, c="k"):  # pragma: no cover
    +434        """Plot the frequency domain and magnitude
    +435
    +436        Parameters
    +437        ----------
    +438        ax : matplotlib.axes, optional
    +439            Matplotlib axes object, by default None
    +440        c : str, optional
    +441            Color, by default 'k'
    +442
    +443        Returns
    +444        -------
    +445        matplotlib.axes
    +446            Matplotlib axes object
    +447
    +448        """
    +449        # self.location +=1
    +450        # plt.subplot(self.location)
    +451        if ax is None:
    +452            ax = plt.gca()
    +453        frequency_domain, magnitude = self.get_frequency_domain(plot_result=False)
    +454        ax.plot(frequency_domain / 1000, magnitude, color=c)
    +455        plt.xlabel("KHz")
    +456        plt.ylabel("Magnitude")
    +457        # plt.show()
    +458        return ax
     
    @@ -1077,9 +1076,9 @@
    Methods
    • get_frequency_domain(plot_result=True). Get the frequency domain and magnitude from the transient data
    • -
    • get_mass_spectrum(auto_process=True, plot_result=True, keep_profile=True).
      +
    • get_mass_spectrum(auto_process=True, plot_result=True, keep_profile=True). Get the mass spectrum from the transient data
    • -
    • set_processing_parameter(apodization_method, number_of_truncations, number_of_zero_fills).
      +
    • set_processing_parameter(apodization_method, number_of_truncations, number_of_zero_fills). Set the processing parameters
    • scale_plot_size(factor=1.5). Scale the plot size by a factor
    • @@ -1109,19 +1108,18 @@
      Methods
    -
    87    def __init__(self, data, d_params):
    -88
    -89        self._transient_data = data
    -90
    -91        self.d_params = d_params
    -92
    -93        self.frequency_domain = None
    -94
    -95        self.magnitude = None
    -96
    -97        self.__set__parameters__objects(d_params)
    -98
    -99        self.__set__transient__time()
    +            
     89    def __init__(self, data, d_params):
    + 90        self._transient_data = data
    + 91
    + 92        self.d_params = d_params
    + 93
    + 94        self.frequency_domain = None
    + 95
    + 96        self.magnitude = None
    + 97
    + 98        self.__set__parameters__objects(d_params)
    + 99
    +100        self.__set__transient__time()
     
    @@ -1172,17 +1170,17 @@
    Methods
    -
    139    def scale_plot_size(self, factor=1.5):
    -140        """Scale the plot size by a factor
    -141
    -142        Parameters
    -143        ----------
    -144        factor : float, optional
    -145            The factor to scale the plot size, by default 1.5
    -146        """
    -147        
    -148        default_dpi = rcParamsDefault["figure.dpi"]
    -149        rcParams["figure.dpi"] = default_dpi * factor
    +            
    140    def scale_plot_size(self, factor=1.5):
    +141        """Scale the plot size by a factor
    +142
    +143        Parameters
    +144        ----------
    +145        factor : float, optional
    +146            The factor to scale the plot size, by default 1.5
    +147        """
    +148
    +149        default_dpi = rcParamsDefault["figure.dpi"]
    +150        rcParams["figure.dpi"] = default_dpi * factor
     
    @@ -1209,24 +1207,29 @@
    Parameters
    -
    155    def set_processing_parameter(self, apodization_method: str, number_of_truncations: int, number_of_zero_fills: int):
    -156        """ Set the processing parameters
    -157
    -158        Parameters
    -159        ----------
    -160        apodization_method : str
    -161            Apodization method to be used
    -162        number_of_truncations : int
    -163            Number of truncations to be used
    -164        number_of_zero_fills : int
    -165            Number of zero fills to be used
    -166        """
    -167
    -168        self.parameters.apodization_method = apodization_method
    -169
    -170        self.parameters.number_of_truncations = number_of_truncations
    -171
    -172        self.parameters.number_of_zero_fills = number_of_zero_fills
    +            
    156    def set_processing_parameter(
    +157        self,
    +158        apodization_method: str,
    +159        number_of_truncations: int,
    +160        number_of_zero_fills: int,
    +161    ):
    +162        """Set the processing parameters
    +163
    +164        Parameters
    +165        ----------
    +166        apodization_method : str
    +167            Apodization method to be used
    +168        number_of_truncations : int
    +169            Number of truncations to be used
    +170        number_of_zero_fills : int
    +171            Number of zero fills to be used
    +172        """
    +173
    +174        self.parameters.apodization_method = apodization_method
    +175
    +176        self.parameters.number_of_truncations = number_of_truncations
    +177
    +178        self.parameters.number_of_zero_fills = number_of_zero_fills
     
    @@ -1270,10 +1273,11 @@
    Parameters
    -
    183    def set_parameter_from_toml(self, parameters_path):
    -184        """ Set the processing parameters from a toml file
    -185        """
    -186        self._parameters = load_and_set_toml_parameters_class('Transient', self._parameters, parameters_path=parameters_path)
    +            
    189    def set_parameter_from_toml(self, parameters_path):
    +190        """Set the processing parameters from a toml file"""
    +191        self._parameters = load_and_set_toml_parameters_class(
    +192            "Transient", self._parameters, parameters_path=parameters_path
    +193        )
     
    @@ -1293,10 +1297,11 @@
    Parameters
    -
    188    def set_parameter_from_json(self, parameters_path):
    -189        """ Set the processing parameters from a json file
    -190        """
    -191        self._parameters = load_and_set_parameters_class('Transient', self._parameters, parameters_path=parameters_path)
    +            
    195    def set_parameter_from_json(self, parameters_path):
    +196        """Set the processing parameters from a json file"""
    +197        self._parameters = load_and_set_parameters_class(
    +198            "Transient", self._parameters, parameters_path=parameters_path
    +199        )
     
    @@ -1316,53 +1321,47 @@
    Parameters
    -
    193    def get_frequency_domain(self, plot_result=True):
    -194        """ Get the frequency domain and magnitude from the transient data
    -195
    -196        Parameters
    -197        ----------
    -198        plot_result : bool, optional
    -199            Plot the frequency domain and magnitude, by default True
    -200        
    -201        Returns
    -202        -------
    -203        frequency_domain : numpy.ndarray
    -204            Array with the frequency domain
    -205        magnitude : numpy.ndarray
    -206            Array with the magnitude
    -207        """
    +            
    201    def get_frequency_domain(self, plot_result=True):
    +202        """Get the frequency domain and magnitude from the transient data
    +203
    +204        Parameters
    +205        ----------
    +206        plot_result : bool, optional
    +207            Plot the frequency domain and magnitude, by default True
     208
    -209
    -210        if self.parameters.number_of_truncations > 0:
    -211
    -212            new_time_domain = self.truncation(self._transient_data)
    -213
    -214        else:
    -215
    -216            new_time_domain = self._transient_data
    -217
    -218        if self.parameters.apodization_method is not None:
    +209        Returns
    +210        -------
    +211        frequency_domain : numpy.ndarray
    +212            Array with the frequency domain
    +213        magnitude : numpy.ndarray
    +214            Array with the magnitude
    +215        """
    +216
    +217        if self.parameters.number_of_truncations > 0:
    +218            new_time_domain = self.truncation(self._transient_data)
     219
    -220            new_time_domain = self.apodization(new_time_domain)
    -221
    -222        if plot_result:
    -223
    -224            self._plot_transient(self._transient_data)
    +220        else:
    +221            new_time_domain = self._transient_data
    +222
    +223        if self.parameters.apodization_method is not None:
    +224            new_time_domain = self.apodization(new_time_domain)
     225
    -226            self._plot_transient(new_time_domain)
    -227
    -228        time_domain_y_zero_filled = self.zero_fill(new_time_domain)
    -229
    -230        self.transient_time = self.transient_time * (
    -231            self.parameters.number_of_zero_fills + 1
    -232        )
    -233
    -234        if plot_result:
    -235
    -236            self._plot_transient(time_domain_y_zero_filled)
    -237
    -238        return self.perform_magniture_mode_ft(time_domain_y_zero_filled)
    -239        # return frequency_domain, magnitude
    +226        if plot_result:
    +227            self._plot_transient(self._transient_data)
    +228
    +229            self._plot_transient(new_time_domain)
    +230
    +231        time_domain_y_zero_filled = self.zero_fill(new_time_domain)
    +232
    +233        self.transient_time = self.transient_time * (
    +234            self.parameters.number_of_zero_fills + 1
    +235        )
    +236
    +237        if plot_result:
    +238            self._plot_transient(time_domain_y_zero_filled)
    +239
    +240        return self.perform_magniture_mode_ft(time_domain_y_zero_filled)
    +241        # return frequency_domain, magnitude
     
    @@ -1398,39 +1397,44 @@
    Returns
    -
    241    def get_mass_spectrum(self, auto_process : bool=True, plot_result : bool=True,
    -242                         keep_profile : bool=True) -> MassSpecfromFreq:
    -243
    -244        """ Get the mass spectrum from the transient data
    -245
    -246        Parameters
    -247        ----------
    -248        auto_process : bool, optional
    -249            Process the transient data, by default True
    -250        plot_result : bool, optional
    -251            Plot the frequency domain and magnitude, by default True
    -252        keep_profile : bool, optional
    -253            Keep the profile data, by default True
    -254        
    -255        Returns
    -256        -------
    -257        MassSpecfromFreq
    -258            Mass spectrum object
    -259        """
    -260
    -261        frequency_domain, magnitude = self.get_frequency_domain(plot_result=plot_result)
    -262
    -263        if plot_result:
    -264
    -265            self._plot_frequency_domain(frequency_domain, magnitude)
    -266
    -267        self.d_params["filename"] = self.filename
    -268        self.d_params["dir_location"] = self.dir_location
    -269        
    -270        
    -271        return MassSpecfromFreq(
    -272            frequency_domain, magnitude, self.d_params, 
    -273            auto_process=auto_process, keep_profile=keep_profile)
    +            
    243    def get_mass_spectrum(
    +244        self,
    +245        auto_process: bool = True,
    +246        plot_result: bool = True,
    +247        keep_profile: bool = True,
    +248    ) -> MassSpecfromFreq:
    +249        """Get the mass spectrum from the transient data
    +250
    +251        Parameters
    +252        ----------
    +253        auto_process : bool, optional
    +254            Process the transient data, by default True
    +255        plot_result : bool, optional
    +256            Plot the frequency domain and magnitude, by default True
    +257        keep_profile : bool, optional
    +258            Keep the profile data, by default True
    +259
    +260        Returns
    +261        -------
    +262        MassSpecfromFreq
    +263            Mass spectrum object
    +264        """
    +265
    +266        frequency_domain, magnitude = self.get_frequency_domain(plot_result=plot_result)
    +267
    +268        if plot_result:
    +269            self._plot_frequency_domain(frequency_domain, magnitude)
    +270
    +271        self.d_params["filename"] = self.filename
    +272        self.d_params["dir_location"] = self.dir_location
    +273
    +274        return MassSpecfromFreq(
    +275            frequency_domain,
    +276            magnitude,
    +277            self.d_params,
    +278            auto_process=auto_process,
    +279            keep_profile=keep_profile,
    +280        )
     
    @@ -1522,34 +1526,34 @@
    Returns
    -
    342    def plot_transient(self, ax=None, c='k'): # pragma: no cover
    -343        """ Plot the transient data
    -344        
    -345        Parameters
    -346        ----------
    -347        ax : matplotlib.axes, optional
    -348            Matplotlib axes object, by default None
    -349        c : str, optional
    -350            Color, by default 'k'
    -351
    -352        Returns
    -353        -------
    -354        matplotlib.axes
    -355            Matplotlib axes object
    -356        
    -357        """
    +            
    344    def plot_transient(self, ax=None, c="k"):  # pragma: no cover
    +345        """Plot the transient data
    +346
    +347        Parameters
    +348        ----------
    +349        ax : matplotlib.axes, optional
    +350            Matplotlib axes object, by default None
    +351        c : str, optional
    +352            Color, by default 'k'
    +353
    +354        Returns
    +355        -------
    +356        matplotlib.axes
    +357            Matplotlib axes object
     358
    -359        # self.location +=1
    -360        # print( self.location)
    -361        if ax is None:
    -362            ax = plt.gca()
    -363        time_axis = linspace(0, self.transient_time, num=len(self._transient_data))
    -364        # plt.subplot(self.location)
    -365        ax.plot(time_axis, self._transient_data, color=c)
    -366        plt.xlabel("Time (s)")
    -367        plt.ylabel("Magnitude")
    -368        # plt.show()
    -369        return ax
    +359        """
    +360
    +361        # self.location +=1
    +362        # print( self.location)
    +363        if ax is None:
    +364            ax = plt.gca()
    +365        time_axis = linspace(0, self.transient_time, num=len(self._transient_data))
    +366        # plt.subplot(self.location)
    +367        ax.plot(time_axis, self._transient_data, color=c)
    +368        plt.xlabel("Time (s)")
    +369        plt.ylabel("Magnitude")
    +370        # plt.show()
    +371        return ax
     
    @@ -1584,36 +1588,36 @@
    Returns
    -
    371    def plot_zerofilled_transient(self, ax=None, c='k'): # pragma: no cover
    -372        """ Plot the transient data with zero fill
    -373        
    -374        Parameters
    -375        ----------
    -376        ax : matplotlib.axes, optional
    -377            Matplotlib axes object, by default None
    -378        c : str, optional
    -379            Color, by default 'k'
    -380
    -381        Returns
    -382        -------
    -383        matplotlib.axes
    -384            Matplotlib axes object
    -385        
    -386        """
    -387        if ax is None:
    -388            ax = plt.gca()
    -389        new_time_domain = self.apodization(self._transient_data)
    -390        time_domain_y_zero_filled = self.zero_fill(new_time_domain)
    -391        self.transient_time = self.transient_time * (
    -392            self.parameters.number_of_zero_fills + 1
    -393        )
    -394        time_axis = linspace(0, self.transient_time, num=len(time_domain_y_zero_filled))
    -395        # plt.subplot(self.location)
    -396        ax.plot(time_axis, time_domain_y_zero_filled, color=c)
    -397        plt.xlabel("Time (s)")
    -398        plt.ylabel("Magnitude")
    -399        # plt.show()
    -400        return ax
    +            
    373    def plot_zerofilled_transient(self, ax=None, c="k"):  # pragma: no cover
    +374        """Plot the transient data with zero fill
    +375
    +376        Parameters
    +377        ----------
    +378        ax : matplotlib.axes, optional
    +379            Matplotlib axes object, by default None
    +380        c : str, optional
    +381            Color, by default 'k'
    +382
    +383        Returns
    +384        -------
    +385        matplotlib.axes
    +386            Matplotlib axes object
    +387
    +388        """
    +389        if ax is None:
    +390            ax = plt.gca()
    +391        new_time_domain = self.apodization(self._transient_data)
    +392        time_domain_y_zero_filled = self.zero_fill(new_time_domain)
    +393        self.transient_time = self.transient_time * (
    +394            self.parameters.number_of_zero_fills + 1
    +395        )
    +396        time_axis = linspace(0, self.transient_time, num=len(time_domain_y_zero_filled))
    +397        # plt.subplot(self.location)
    +398        ax.plot(time_axis, time_domain_y_zero_filled, color=c)
    +399        plt.xlabel("Time (s)")
    +400        plt.ylabel("Magnitude")
    +401        # plt.show()
    +402        return ax
     
    @@ -1648,34 +1652,34 @@
    Returns
    -
    402    def plot_apodized_transient(self, ax=None, c='k'):  # pragma: no cover
    -403        """ Plot the transient data with apodization
    -404
    -405        Parameters
    -406        ----------
    -407        ax : matplotlib.axes, optional
    -408            Matplotlib axes object, by default None
    -409        c : str, optional
    -410            Color, by default 'k'
    -411
    -412        Returns
    -413        -------
    -414        matplotlib.axes
    -415            Matplotlib axes object
    -416        
    -417        """
    -418        # self.location +=1
    -419        # print( self.location)
    -420        if ax is None:
    -421            ax = plt.gca()
    -422        new_time_domain = self.apodization(self._transient_data)
    -423        time_axis = linspace(0, self.transient_time, num=len(new_time_domain))
    -424        # plt.subplot(self.location)
    -425        ax.plot(time_axis, new_time_domain, color=c)
    -426        plt.xlabel("Time (s)")
    -427        plt.ylabel("Magnitude")
    -428        # plt.show()
    -429        return ax
    +            
    404    def plot_apodized_transient(self, ax=None, c="k"):  # pragma: no cover
    +405        """Plot the transient data with apodization
    +406
    +407        Parameters
    +408        ----------
    +409        ax : matplotlib.axes, optional
    +410            Matplotlib axes object, by default None
    +411        c : str, optional
    +412            Color, by default 'k'
    +413
    +414        Returns
    +415        -------
    +416        matplotlib.axes
    +417            Matplotlib axes object
    +418
    +419        """
    +420        # self.location +=1
    +421        # print( self.location)
    +422        if ax is None:
    +423            ax = plt.gca()
    +424        new_time_domain = self.apodization(self._transient_data)
    +425        time_axis = linspace(0, self.transient_time, num=len(new_time_domain))
    +426        # plt.subplot(self.location)
    +427        ax.plot(time_axis, new_time_domain, color=c)
    +428        plt.xlabel("Time (s)")
    +429        plt.ylabel("Magnitude")
    +430        # plt.show()
    +431        return ax
     
    @@ -1710,32 +1714,32 @@
    Returns
    -
    432    def plot_frequency_domain(self, ax=None, c='k'):  # pragma: no cover
    -433        """ Plot the frequency domain and magnitude
    -434        
    -435        Parameters
    -436        ----------
    -437        ax : matplotlib.axes, optional
    -438            Matplotlib axes object, by default None
    -439        c : str, optional
    -440            Color, by default 'k'
    -441
    -442        Returns
    -443        -------
    -444        matplotlib.axes
    -445            Matplotlib axes object
    -446        
    -447        """
    -448        # self.location +=1
    -449        # plt.subplot(self.location)
    -450        if ax is None:
    -451            ax = plt.gca()
    -452        frequency_domain, magnitude = self.get_frequency_domain(plot_result=False)
    -453        ax.plot(frequency_domain / 1000, magnitude, color=c)
    -454        plt.xlabel("KHz")
    -455        plt.ylabel("Magnitude")
    -456        # plt.show()
    -457        return ax
    +            
    433    def plot_frequency_domain(self, ax=None, c="k"):  # pragma: no cover
    +434        """Plot the frequency domain and magnitude
    +435
    +436        Parameters
    +437        ----------
    +438        ax : matplotlib.axes, optional
    +439            Matplotlib axes object, by default None
    +440        c : str, optional
    +441            Color, by default 'k'
    +442
    +443        Returns
    +444        -------
    +445        matplotlib.axes
    +446            Matplotlib axes object
    +447
    +448        """
    +449        # self.location +=1
    +450        # plt.subplot(self.location)
    +451        if ax is None:
    +452            ax = plt.gca()
    +453        frequency_domain, magnitude = self.get_frequency_domain(plot_result=False)
    +454        ax.plot(frequency_domain / 1000, magnitude, color=c)
    +455        plt.xlabel("KHz")
    +456        plt.ylabel("Magnitude")
    +457        # plt.show()
    +458        return ax
     
    diff --git a/docs/corems/transient/input/brukerSolarix.html b/docs/corems/transient/input/brukerSolarix.html index 585b5eb6..ad7c0e2e 100644 --- a/docs/corems/transient/input/brukerSolarix.html +++ b/docs/corems/transient/input/brukerSolarix.html @@ -92,23 +92,23 @@

    3from copy import deepcopy 4from datetime import datetime 5from pathlib import Path - 6 - 7from numpy import genfromtxt, fromstring, dtype, fromfile, frombuffer, float64, float32 - 8import pandas as pd + 6from xml.dom import minidom + 7 + 8from numpy import dtype, float32, float64, frombuffer, fromfile, fromstring, genfromtxt 9from s3path import S3Path - 10from xml.dom import minidom - 11 + 10 + 11from corems.encapsulation.factory.parameters import default_parameters 12from corems.transient.factory.TransientClasses import Transient - 13from corems.encapsulation.factory.parameters import default_parameters + 13 14 15class ReadBrukerSolarix(object): - 16 """ A class used to Read a single Transient from Bruker's FT-MS acquisition station (fid, or ser) - 17 + 16 """A class used to Read a single Transient from Bruker's FT-MS acquisition station (fid, or ser) + 17 18 Parameters 19 ---------- 20 d_directory_location : str 21 the full path of the .d folder - 22 + 22 23 Attributes 24 -------- 25 d_directory_location : str @@ -121,12 +121,12 @@

    32 the full path of the fid or ser file 33 scan_attr : str 34 the full path of the scan.xml file - 35 - 36 + 35 + 36 37 Methods 38 ------- 39 * get_transient(). - 40 Read the data and settings returning a Transient class + 40 Read the data and settings returning a Transient class 41 * get_scan_attr(). 42 Read the scan retention times, TIC values and scan indices. 43 * locate_file(folder, type_file_name). @@ -137,445 +137,429 @@

    48 Read and set the correct frequency limits for the spectrum 49 * get_excite_sweep_range(filename). 50 Determine excitation sweep range from ExciteSweep file - 51 + 51 52 """ - 53 - 54 def __enter__(self ): - 55 - 56 return self.get_transient() - 57 - 58 def __exit__(self, exc_type, exc_val, exc_tb): - 59 - 60 return False - 61 - 62 def __init__(self, d_directory_location): - 63 - 64 if isinstance(d_directory_location, str): - 65 d_directory_location = Path(d_directory_location) - 66 - 67 if not d_directory_location.exists(): - 68 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) - 69 - 70 self.d_directory_location = d_directory_location - 71 - 72 self.file_location = d_directory_location - 73 - 74 try: - 75 - 76 self.parameter_filename_location = self.locate_file( - 77 d_directory_location, "apexAcquisition.method" - 78 ) - 79 self.transient_data_path = d_directory_location / "fid" - 80 - 81 if not self.transient_data_path.exists(): + 53 + 54 def __enter__(self): + 55 return self.get_transient() + 56 + 57 def __exit__(self, exc_type, exc_val, exc_tb): + 58 return False + 59 + 60 def __init__(self, d_directory_location): + 61 if isinstance(d_directory_location, str): + 62 d_directory_location = Path(d_directory_location) + 63 + 64 if not d_directory_location.exists(): + 65 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) + 66 + 67 self.d_directory_location = d_directory_location + 68 + 69 self.file_location = d_directory_location + 70 + 71 try: + 72 self.parameter_filename_location = self.locate_file( + 73 d_directory_location, "apexAcquisition.method" + 74 ) + 75 self.transient_data_path = d_directory_location / "fid" + 76 + 77 if not self.transient_data_path.exists(): + 78 self.transient_data_path = d_directory_location / "ser" + 79 + 80 if not self.transient_data_path.exists(): + 81 raise FileNotFoundError("Could not locate transient data") 82 - 83 self.transient_data_path = d_directory_location / "ser" - 84 - 85 if not self.transient_data_path.exists(): - 86 - 87 raise FileNotFoundError("Could not locate transient data") - 88 - 89 else: - 90 # get scan attributes - 91 self.scan_attr = d_directory_location / "scan.xml" + 83 else: + 84 # get scan attributes + 85 self.scan_attr = d_directory_location / "scan.xml" + 86 + 87 except: + 88 raise FileExistsError( + 89 "%s does not seem to be a valid Solarix Mass Spectrum" + 90 % (d_directory_location) + 91 ) 92 - 93 except: - 94 - 95 raise FileExistsError( - 96 "%s does not seem to be a valid Solarix Mass Spectrum" - 97 % (d_directory_location) - 98 ) - 99 -100 def get_scan_attr(self): -101 """ Function to get the scan retention times, TIC values and scan indices. -102 -103 Gets information from scan.xml file in the bruker .d folder. -104 Note this file is only present in some .d format - e.g. for imaging mode data, it is not present. -105 -106 Returns -107 ------- -108 dict_scan_rt_tic : dict -109 a dictionary with scan number as key and rt and tic as values -110 """ -111 -112 from bs4 import BeautifulSoup -113 -114 try: -115 soup = BeautifulSoup(self.scan_attr.open(),'xml') -116 except: -117 raise FileNotFoundError("Dataset does not appear to contain a 'scan.xml' file or it is misformated") -118 -119 list_rt = [float(rt.text) for rt in soup.find_all('minutes')] -120 list_tic = [float(tic.text) for tic in soup.find_all('tic')] -121 list_scan = [int(scan.text) for scan in soup.find_all('count')] -122 -123 dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic))) -124 -125 return dict_scan_rt_tic -126 -127 -128 def get_transient(self, scan_number=1): -129 """ Function to get the transient data and parameters from a Bruker Solarix .d folder. -130 -131 Parameters -132 ---------- -133 scan_number : int -134 the scan number to be read. Default is 1. -135 -136 Returns -137 ------- -138 Transient -139 a transient object -140 """ + 93 def get_scan_attr(self): + 94 """Function to get the scan retention times, TIC values and scan indices. + 95 + 96 Gets information from scan.xml file in the bruker .d folder. + 97 Note this file is only present in some .d format - e.g. for imaging mode data, it is not present. + 98 + 99 Returns +100 ------- +101 dict_scan_rt_tic : dict +102 a dictionary with scan number as key and rt and tic as values +103 """ +104 +105 from bs4 import BeautifulSoup +106 +107 try: +108 soup = BeautifulSoup(self.scan_attr.open(), "xml") +109 except: +110 raise FileNotFoundError( +111 "Dataset does not appear to contain a 'scan.xml' file or it is misformated" +112 ) +113 +114 list_rt = [float(rt.text) for rt in soup.find_all("minutes")] +115 list_tic = [float(tic.text) for tic in soup.find_all("tic")] +116 list_scan = [int(scan.text) for scan in soup.find_all("count")] +117 +118 dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic))) +119 +120 return dict_scan_rt_tic +121 +122 def get_transient(self, scan_number=1): +123 """Function to get the transient data and parameters from a Bruker Solarix .d folder. +124 +125 Parameters +126 ---------- +127 scan_number : int +128 the scan number to be read. Default is 1. +129 +130 Returns +131 ------- +132 Transient +133 a transient object +134 """ +135 +136 file_d_params = self.parse_parameters(self.parameter_filename_location) +137 +138 self.fix_freq_limits(file_d_params) +139 +140 from sys import platform 141 -142 file_d_params = self.parse_parameters(self.parameter_filename_location) -143 -144 self.fix_freq_limits(file_d_params) -145 -146 from sys import platform -147 -148 if platform == "win32": -149 # Windows... -150 dt = dtype("l") -151 else: -152 dt = dtype("i") -153 -154 # get rt, scan, and tic from scan.xml file, otherwise using 0 defaults values -155 -156 output_parameters = deepcopy(default_parameters(self.d_directory_location)) +142 if platform == "win32": +143 # Windows... +144 dt = dtype("l") +145 else: +146 dt = dtype("i") +147 +148 # get rt, scan, and tic from scan.xml file, otherwise using 0 defaults values +149 +150 output_parameters = deepcopy(default_parameters(self.d_directory_location)) +151 +152 if self.transient_data_path.name == "ser": +153 if self.scan_attr.exists(): +154 dict_scan_rt_tic = self.get_scan_attr() +155 +156 output_parameters["scan_number"] = scan_number 157 -158 if self.transient_data_path.name == 'ser': -159 -160 if self.scan_attr.exists(): -161 -162 dict_scan_rt_tic = self.get_scan_attr() +158 output_parameters["rt"] = dict_scan_rt_tic.get(scan_number)[0] +159 +160 output_parameters["tic"] = dict_scan_rt_tic.get(scan_number)[1] +161 +162 output_parameters["analyzer"] = "ICR" 163 -164 output_parameters["scan_number"] = scan_number +164 output_parameters["label"] = "Bruker_Frequency" 165 -166 output_parameters["rt"] = dict_scan_rt_tic.get(scan_number)[0] +166 output_parameters["Aterm"] = float(file_d_params.get("ML1")) 167 -168 output_parameters["tic"] = dict_scan_rt_tic.get(scan_number)[1] -169 -170 output_parameters["analyzer"] = "ICR" +168 output_parameters["Bterm"] = float(file_d_params.get("ML2")) +169 +170 output_parameters["Cterm"] = float(file_d_params.get("ML3")) 171 -172 output_parameters["label"] = "Bruker_Frequency" +172 output_parameters["exc_high_freq"] = float(file_d_params.get("EXC_Freq_High")) 173 -174 output_parameters["Aterm"] = float(file_d_params.get("ML1")) -175 -176 output_parameters["Bterm"] = float(file_d_params.get("ML2")) -177 -178 output_parameters["Cterm"] = float(file_d_params.get("ML3")) +174 output_parameters["exc_low_freq"] = float(file_d_params.get("EXC_Freq_Low")) +175 try: +176 output_parameters["qpd_enabled"] = float(file_d_params.get("QPD_Enabled")) +177 except TypeError: # for older datasets which dont have this variable +178 output_parameters["qpd_enabled"] = 0 179 -180 output_parameters["exc_high_freq"] = float(file_d_params.get("EXC_Freq_High")) +180 output_parameters["mw_low"] = float(file_d_params.get("MW_low")) 181 -182 output_parameters["exc_low_freq"] = float(file_d_params.get("EXC_Freq_Low")) -183 try: -184 output_parameters["qpd_enabled"] = float(file_d_params.get("QPD_Enabled")) -185 except TypeError: # for older datasets which dont have this variable -186 output_parameters["qpd_enabled"] = 0 +182 output_parameters["mw_high"] = float(file_d_params.get("MW_high")) +183 +184 output_parameters["bandwidth"] = float(file_d_params.get("SW_h")) +185 +186 output_parameters["number_data_points"] = int(file_d_params.get("TD")) 187 -188 output_parameters["mw_low"] = float(file_d_params.get("MW_low")) +188 output_parameters["polarity"] = str(file_d_params.get("Polarity")) 189 -190 output_parameters["mw_high"] = float(file_d_params.get("MW_high")) +190 output_parameters["acquisition_time"] = file_d_params.get("acquisition_time") 191 -192 output_parameters["bandwidth"] = float(file_d_params.get("SW_h")) +192 data_points = int(file_d_params.get("TD")) 193 -194 output_parameters["number_data_points"] = int(file_d_params.get("TD")) -195 -196 output_parameters["polarity"] = str(file_d_params.get("Polarity")) -197 -198 output_parameters["acquisition_time"] = file_d_params.get("acquisition_time") -199 -200 data_points = int(file_d_params.get("TD")) -201 -202 scan = output_parameters["scan_number"] -203 from io import BytesIO -204 if self.transient_data_path.name == 'ser': -205 -206 if isinstance(self.transient_data_path, S3Path): -207 databin = BytesIO(self.transient_data_path.open('rb').read()) -208 -209 else: -210 databin = self.transient_data_path.open('rb') -211 -212 databin.seek((scan-1)*4*data_points) -213 #read scan data and parse to 32int struct -214 data = frombuffer(databin.read(4*data_points), dtype=dt) -215 -216 else: -217 -218 if isinstance(self.transient_data_path, S3Path): -219 data = frombuffer(self.transient_data_path.open('rb').read(), dtype=dt) -220 else: -221 data = fromfile(self.transient_data_path, dtype=dt) -222 -223 return Transient(data, output_parameters) -224 -225 # for key, values in default_parameters.items(): -226 # print(key, values) -227 def fix_freq_limits(self, d_parameters): -228 """ Function to read and set the correct frequency limits for the spectrum -229 -230 Notes -231 -------- -232 This is using the excitation limits from the apexAcquisition.method file, -233 which may not match the intended detection limits in edge cases. -234 In default acquisitions, excitation and detection are the same. -235 But, they may not be in some cases with selective excitation, custom excite waveforms, or in 2DMS applications. -236 -237 Parameters -238 ---------- -239 d_parameters : dict -240 a dictionary with the parameters from the apexAcquisition.method file -241 """ -242 -243 highfreq = float(d_parameters.get("EXC_Freq_High")) -244 -245 lowfreq = float(d_parameters.get("EXC_Freq_Low")) -246 -247 # CR for compatibility with Apex format as there is no EXciteSweep file -248 if not highfreq and lowfreq: -249 -250 excitation_sweep_filelocation = self.locate_file( -251 self.d_directory_location, "ExciteSweep" -252 ) -253 lowfreq, highfreq = self.get_excite_sweep_range( -254 excitation_sweep_filelocation -255 ) -256 d_parameters["EXC_Freq_High"] = highfreq -257 d_parameters["EXC_Freq_Low"] = lowfreq -258 -259 @staticmethod -260 def get_excite_sweep_range(filename): -261 """ Function to determine excitation sweep range from ExciteSweep file -262 -263 This looks at the first and last rows of the ExciteSweep file to determine the excitation frequency range. -264 Note that this assumes the excitation sweep was linear and the first and last rows are the lowest and highest frequencies. -265 This is presumably always true, but again may be incorrect for edge cases with custom excitation waveforms. -266 -267 Parameters -268 ---------- -269 filename : str -270 the full path to the ExciteSweep file -271 -272 """ -273 ExciteSweep_lines = genfromtxt(filename, comments="*", delimiter="\n") -274 # CR ready if we need the full array -275 highfreq = fromstring(ExciteSweep_lines[0]) -276 lowfreq = fromstring(ExciteSweep_lines[-1]) -277 -278 return lowfreq[0], highfreq[0] -279 -280 @staticmethod -281 def locate_file(folder, type_file_name='apexAcquisition.method'): -282 """ Function to locate a file in a folder +194 scan = output_parameters["scan_number"] +195 from io import BytesIO +196 +197 if self.transient_data_path.name == "ser": +198 if isinstance(self.transient_data_path, S3Path): +199 databin = BytesIO(self.transient_data_path.open("rb").read()) +200 +201 else: +202 databin = self.transient_data_path.open("rb") +203 +204 databin.seek((scan - 1) * 4 * data_points) +205 # read scan data and parse to 32int struct +206 data = frombuffer(databin.read(4 * data_points), dtype=dt) +207 +208 else: +209 if isinstance(self.transient_data_path, S3Path): +210 data = frombuffer(self.transient_data_path.open("rb").read(), dtype=dt) +211 else: +212 data = fromfile(self.transient_data_path, dtype=dt) +213 +214 return Transient(data, output_parameters) +215 +216 # for key, values in default_parameters.items(): +217 # print(key, values) +218 def fix_freq_limits(self, d_parameters): +219 """Function to read and set the correct frequency limits for the spectrum +220 +221 Notes +222 -------- +223 This is using the excitation limits from the apexAcquisition.method file, +224 which may not match the intended detection limits in edge cases. +225 In default acquisitions, excitation and detection are the same. +226 But, they may not be in some cases with selective excitation, custom excite waveforms, or in 2DMS applications. +227 +228 Parameters +229 ---------- +230 d_parameters : dict +231 a dictionary with the parameters from the apexAcquisition.method file +232 """ +233 +234 highfreq = float(d_parameters.get("EXC_Freq_High")) +235 +236 lowfreq = float(d_parameters.get("EXC_Freq_Low")) +237 +238 # CR for compatibility with Apex format as there is no EXciteSweep file +239 if not highfreq and lowfreq: +240 excitation_sweep_filelocation = self.locate_file( +241 self.d_directory_location, "ExciteSweep" +242 ) +243 lowfreq, highfreq = self.get_excite_sweep_range( +244 excitation_sweep_filelocation +245 ) +246 d_parameters["EXC_Freq_High"] = highfreq +247 d_parameters["EXC_Freq_Low"] = lowfreq +248 +249 @staticmethod +250 def get_excite_sweep_range(filename): +251 """Function to determine excitation sweep range from ExciteSweep file +252 +253 This looks at the first and last rows of the ExciteSweep file to determine the excitation frequency range. +254 Note that this assumes the excitation sweep was linear and the first and last rows are the lowest and highest frequencies. +255 This is presumably always true, but again may be incorrect for edge cases with custom excitation waveforms. +256 +257 Parameters +258 ---------- +259 filename : str +260 the full path to the ExciteSweep file +261 +262 """ +263 ExciteSweep_lines = genfromtxt(filename, comments="*", delimiter="\n") +264 # CR ready if we need the full array +265 highfreq = fromstring(ExciteSweep_lines[0]) +266 lowfreq = fromstring(ExciteSweep_lines[-1]) +267 +268 return lowfreq[0], highfreq[0] +269 +270 @staticmethod +271 def locate_file(folder, type_file_name="apexAcquisition.method"): +272 """Function to locate a file in a folder +273 +274 Find the full path of a specific file within the acquisition .d folder or subfolders +275 +276 Parameters +277 ---------- +278 folder : str +279 the full path to the folder +280 type_file_name : str +281 the name of the file to be located +282 Expected options: ExciteSweep or apexAcquisition.method 283 -284 Find the full path of a specific file within the acquisition .d folder or subfolders -285 -286 Parameters -287 ---------- -288 folder : str -289 the full path to the folder -290 type_file_name : str -291 the name of the file to be located -292 Expected options: ExciteSweep or apexAcquisition.method -293 -294 Returns -295 ------- -296 str -297 the full path to the file -298 -299 Notes -300 ----- -301 adapted from code from SPIKE library, https://github.com/spike-project/spike -302 -303 """ -304 -305 from pathlib import Path -306 -307 #directory_location = folder.glob( '**/*apexAcquisition.method') -308 directory_location = folder.glob( '**/*' + type_file_name) -309 result = list(directory_location) -310 if len(result) > 1: +284 Returns +285 ------- +286 str +287 the full path to the file +288 +289 Notes +290 ----- +291 adapted from code from SPIKE library, https://github.com/spike-project/spike +292 +293 """ +294 +295 from pathlib import Path +296 +297 # directory_location = folder.glob( '**/*apexAcquisition.method') +298 directory_location = folder.glob("**/*" + type_file_name) +299 result = list(directory_location) +300 if len(result) > 1: +301 raise Exception( +302 "You have more than 1 %s file in the %s folder, using the first one" +303 % (type_file_name, folder) +304 ) +305 +306 elif len(result) == 0: +307 raise Exception( +308 "You don't have any %s file in the %s folder, please double check the path" +309 % (type_file_name, folder) +310 ) 311 -312 raise Exception( -313 "You have more than 1 %s file in the %s folder, using the first one" -314 % (type_file_name, folder) -315 ) -316 -317 elif len(result) == 0: -318 -319 raise Exception( -320 "You don't have any %s file in the %s folder, please double check the path" -321 % (type_file_name, folder) -322 ) -323 -324 return result[0] -325 -326 @staticmethod -327 def parse_parameters(parameters_filename): -328 """ Function to parse the parameters from apexAcquisition.method file -329 -330 Open the given file and retrieve all parameters from apexAcquisition.method -331 None is written when no value for value is found -332 -333 structure : <param name = "AMS_ActiveExclusion"><value>0</value></param> -334 -335 Parameters -336 ---------- -337 parameters_filename : str -338 the full path to the apexAcquisition.method file -339 -340 Returns -341 ------- -342 dict -343 a dictionary with the parameters and values -344 -345 Notes -346 ----- -347 Adapted from code from SPIKE library, https://github.com/spike-project/spike. -348 Code may not handle all possible parameters, but should be sufficient for most common use cases -349 """ -350 -351 #TODO: change to beautiful soup xml parsing -352 -353 -354 xmldoc = minidom.parse(parameters_filename.open()) -355 -356 x = xmldoc.documentElement -357 parameter_dict = {} -358 children = x.childNodes -359 for child in children: -360 # print( child.node) -361 if child.nodeName == 'methodmetadata': +312 return result[0] +313 +314 @staticmethod +315 def parse_parameters(parameters_filename): +316 """Function to parse the parameters from apexAcquisition.method file +317 +318 Open the given file and retrieve all parameters from apexAcquisition.method +319 None is written when no value for value is found +320 +321 structure : <param name = "AMS_ActiveExclusion"><value>0</value></param> +322 +323 Parameters +324 ---------- +325 parameters_filename : str +326 the full path to the apexAcquisition.method file +327 +328 Returns +329 ------- +330 dict +331 a dictionary with the parameters and values +332 +333 Notes +334 ----- +335 Adapted from code from SPIKE library, https://github.com/spike-project/spike. +336 Code may not handle all possible parameters, but should be sufficient for most common use cases +337 """ +338 +339 # TODO: change to beautiful soup xml parsing +340 +341 xmldoc = minidom.parse(parameters_filename.open()) +342 +343 x = xmldoc.documentElement +344 parameter_dict = {} +345 children = x.childNodes +346 for child in children: +347 # print( child.node) +348 if child.nodeName == "methodmetadata": +349 sections = child.childNodes +350 for section in sections: +351 for element in section.childNodes: +352 if element.nodeName == "date": +353 # if element.nodeName == "primarykey": +354 +355 date_time_str = element.childNodes[0].nodeValue +356 # parameter_dict["acquisition_time"] = pd.to_datetime(date_time_str, infer_datetime_format=True).to_pydatetime() +357 parameter_dict["acquisition_time"] = datetime.strptime( +358 date_time_str, "%b_%d_%Y %H:%M:%S.%f" +359 ) +360 +361 if child.nodeName == "reportinfo": 362 sections = child.childNodes 363 for section in sections: -364 for element in section.childNodes: -365 if element.nodeName == "date": -366 #if element.nodeName == "primarykey": -367 -368 date_time_str = (element.childNodes[0].nodeValue) -369 #parameter_dict["acquisition_time"] = pd.to_datetime(date_time_str, infer_datetime_format=True).to_pydatetime() -370 parameter_dict["acquisition_time"] = datetime.strptime(date_time_str, "%b_%d_%Y %H:%M:%S.%f") -371 -372 -373 if child.nodeName == "reportinfo": -374 sections = child.childNodes -375 for section in sections: -376 if section.nodeName == "section": -377 if section.getAttribute("title") == "Main": -378 for element in section.childNodes: -379 if element.nodeName == "section": -380 if element.getAttribute("title") == "Polarity": -381 if ( -382 str( -383 element.childNodes[1].getAttribute( -384 "value" -385 ) -386 ) -387 == "Negative" -388 ): -389 parameter_dict["Polarity"] = -1 -390 else: -391 parameter_dict["Polarity"] = 1 -392 -393 if child.nodeName == "paramlist": -394 params = child.childNodes -395 for param in params: -396 # print( param.nodeName) -397 if param.nodeName == "param": -398 paramenter_label = str(param.getAttribute("name")) -399 for element in param.childNodes: -400 if element.nodeName == "value": -401 try: -402 parameter_value = str(element.firstChild.toxml()) -403 # print v -404 except: -405 parameter_value = None +364 if section.nodeName == "section": +365 if section.getAttribute("title") == "Main": +366 for element in section.childNodes: +367 if element.nodeName == "section": +368 if element.getAttribute("title") == "Polarity": +369 if ( +370 str( +371 element.childNodes[1].getAttribute( +372 "value" +373 ) +374 ) +375 == "Negative" +376 ): +377 parameter_dict["Polarity"] = -1 +378 else: +379 parameter_dict["Polarity"] = 1 +380 +381 if child.nodeName == "paramlist": +382 params = child.childNodes +383 for param in params: +384 # print( param.nodeName) +385 if param.nodeName == "param": +386 paramenter_label = str(param.getAttribute("name")) +387 for element in param.childNodes: +388 if element.nodeName == "value": +389 try: +390 parameter_value = str(element.firstChild.toxml()) +391 # print v +392 except: +393 parameter_value = None +394 +395 parameter_dict[paramenter_label] = parameter_value +396 +397 return parameter_dict +398 +399 def parse_sqlite(self, sqlite_filename="chromatography-data.sqlite"): +400 """ """ +401 import sqlite3 +402 +403 def read_sqlite_file(file_path, table_name): +404 """ +405 Read data from a SQLite database file and return it as a list of tuples 406 -407 parameter_dict[paramenter_label] = parameter_value -408 -409 return parameter_dict -410 -411 -412 def parse_sqlite(self, sqlite_filename="chromatography-data.sqlite"): -413 """ -414 -415 """ -416 import sqlite3 -417 -418 def read_sqlite_file(file_path, table_name): -419 """ -420 Read data from a SQLite database file and return it as a list of tuples -421 -422 Parameters -423 ---------- -424 file_path : str -425 the full path to the SQLite database file -426 table_name : str -427 the name of the table to be read -428 -429 Returns -430 ------- -431 list -432 a list of tuples with the data from the table -433 """ -434 # Connect to the SQLite database file -435 conn = sqlite3.connect(file_path) -436 cursor = conn.cursor() -437 -438 # Execute a query to select data from a table (replace 'table_name' with your table's name) -439 query = f"SELECT * FROM {table_name}" -440 cursor.execute(query) -441 -442 # Fetch all rows from the result set -443 rows = cursor.fetchall() -444 stream = [] -445 # Print or process the fetched rows -446 for row in rows: -447 stream.append(row) -448 # print(row) # Print each row, you can also process it differently +407 Parameters +408 ---------- +409 file_path : str +410 the full path to the SQLite database file +411 table_name : str +412 the name of the table to be read +413 +414 Returns +415 ------- +416 list +417 a list of tuples with the data from the table +418 """ +419 # Connect to the SQLite database file +420 conn = sqlite3.connect(file_path) +421 cursor = conn.cursor() +422 +423 # Execute a query to select data from a table (replace 'table_name' with your table's name) +424 query = f"SELECT * FROM {table_name}" +425 cursor.execute(query) +426 +427 # Fetch all rows from the result set +428 rows = cursor.fetchall() +429 stream = [] +430 # Print or process the fetched rows +431 for row in rows: +432 stream.append(row) +433 # print(row) # Print each row, you can also process it differently +434 +435 # Close the cursor and the connection +436 cursor.close() +437 conn.close() +438 return stream +439 +440 def parse_binary(binary, type): +441 """ +442 Parse binary data from the sqlite data streams +443 """ +444 if type == "double": +445 data = frombuffer(binary, dtype=float64) +446 elif type == "float": +447 data = frombuffer(binary, dtype=float32) +448 return data 449 -450 # Close the cursor and the connection -451 cursor.close() -452 conn.close() -453 return stream -454 -455 def parse_binary(binary, type): -456 """ -457 Parse binary data from the sqlite data streams -458 """ -459 if type == "double": -460 data = frombuffer(binary, dtype=float64) -461 elif type == "float": -462 data = frombuffer(binary, dtype=float32) -463 return data -464 -465 sqlite_filelocation = self.locate_file( -466 self.d_directory_location, sqlite_filename -467 ) -468 table_name = "TraceSources" -469 trace_sources = read_sqlite_file(sqlite_filelocation, table_name) -470 table_name = "TraceChunks" -471 trace_chunks = read_sqlite_file(sqlite_filelocation, table_name) -472 times = [] -473 values = [] -474 trace_type = {} -475 -476 -477 for index, source in enumerate(trace_sources): -478 trace_id = source[0] -479 trace_type[source[1]] = {"times": [], "values": []} -480 for index, chunk in enumerate(trace_chunks): -481 id = chunk[0] -482 times = parse_binary(chunk[1], "double") -483 values = parse_binary(chunk[2], "float") -484 for time, value in zip(times, values): -485 if source[0] == id: -486 trace_type[source[1]]["times"].append(time) -487 trace_type[source[1]]["values"].append(value) -488 -489 return trace_type +450 sqlite_filelocation = self.locate_file( +451 self.d_directory_location, sqlite_filename +452 ) +453 table_name = "TraceSources" +454 trace_sources = read_sqlite_file(sqlite_filelocation, table_name) +455 table_name = "TraceChunks" +456 trace_chunks = read_sqlite_file(sqlite_filelocation, table_name) +457 times = [] +458 values = [] +459 trace_type = {} +460 +461 for index, source in enumerate(trace_sources): +462 trace_id = source[0] +463 trace_type[source[1]] = {"times": [], "values": []} +464 for index, chunk in enumerate(trace_chunks): +465 id = chunk[0] +466 times = parse_binary(chunk[1], "double") +467 values = parse_binary(chunk[2], "float") +468 for time, value in zip(times, values): +469 if source[0] == id: +470 trace_type[source[1]]["times"].append(time) +471 trace_type[source[1]]["values"].append(value) +472 +473 return trace_type

    @@ -592,13 +576,13 @@

     16class ReadBrukerSolarix(object):
    - 17    """    A class used to Read a single Transient from Bruker's FT-MS acquisition station (fid, or ser)
    - 18        
    + 17    """A class used to Read a single Transient from Bruker's FT-MS acquisition station (fid, or ser)
    + 18
      19    Parameters
      20    ----------
      21    d_directory_location : str
      22        the full path of the .d folder
    - 23    
    + 23
      24    Attributes
      25    --------
      26    d_directory_location : str
    @@ -611,12 +595,12 @@ 

    33 the full path of the fid or ser file 34 scan_attr : str 35 the full path of the scan.xml file - 36 - 37 + 36 + 37 38 Methods 39 ------- 40 * get_transient(). - 41 Read the data and settings returning a Transient class + 41 Read the data and settings returning a Transient class 42 * get_scan_attr(). 43 Read the scan retention times, TIC values and scan indices. 44 * locate_file(folder, type_file_name). @@ -627,445 +611,429 @@

    49 Read and set the correct frequency limits for the spectrum 50 * get_excite_sweep_range(filename). 51 Determine excitation sweep range from ExciteSweep file - 52 + 52 53 """ - 54 - 55 def __enter__(self ): - 56 - 57 return self.get_transient() - 58 - 59 def __exit__(self, exc_type, exc_val, exc_tb): - 60 - 61 return False - 62 - 63 def __init__(self, d_directory_location): - 64 - 65 if isinstance(d_directory_location, str): - 66 d_directory_location = Path(d_directory_location) - 67 - 68 if not d_directory_location.exists(): - 69 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) - 70 - 71 self.d_directory_location = d_directory_location - 72 - 73 self.file_location = d_directory_location - 74 - 75 try: - 76 - 77 self.parameter_filename_location = self.locate_file( - 78 d_directory_location, "apexAcquisition.method" - 79 ) - 80 self.transient_data_path = d_directory_location / "fid" - 81 - 82 if not self.transient_data_path.exists(): + 54 + 55 def __enter__(self): + 56 return self.get_transient() + 57 + 58 def __exit__(self, exc_type, exc_val, exc_tb): + 59 return False + 60 + 61 def __init__(self, d_directory_location): + 62 if isinstance(d_directory_location, str): + 63 d_directory_location = Path(d_directory_location) + 64 + 65 if not d_directory_location.exists(): + 66 raise FileNotFoundError("File does not exist: " + str(d_directory_location)) + 67 + 68 self.d_directory_location = d_directory_location + 69 + 70 self.file_location = d_directory_location + 71 + 72 try: + 73 self.parameter_filename_location = self.locate_file( + 74 d_directory_location, "apexAcquisition.method" + 75 ) + 76 self.transient_data_path = d_directory_location / "fid" + 77 + 78 if not self.transient_data_path.exists(): + 79 self.transient_data_path = d_directory_location / "ser" + 80 + 81 if not self.transient_data_path.exists(): + 82 raise FileNotFoundError("Could not locate transient data") 83 - 84 self.transient_data_path = d_directory_location / "ser" - 85 - 86 if not self.transient_data_path.exists(): - 87 - 88 raise FileNotFoundError("Could not locate transient data") - 89 - 90 else: - 91 # get scan attributes - 92 self.scan_attr = d_directory_location / "scan.xml" + 84 else: + 85 # get scan attributes + 86 self.scan_attr = d_directory_location / "scan.xml" + 87 + 88 except: + 89 raise FileExistsError( + 90 "%s does not seem to be a valid Solarix Mass Spectrum" + 91 % (d_directory_location) + 92 ) 93 - 94 except: - 95 - 96 raise FileExistsError( - 97 "%s does not seem to be a valid Solarix Mass Spectrum" - 98 % (d_directory_location) - 99 ) -100 -101 def get_scan_attr(self): -102 """ Function to get the scan retention times, TIC values and scan indices. -103 -104 Gets information from scan.xml file in the bruker .d folder. -105 Note this file is only present in some .d format - e.g. for imaging mode data, it is not present. -106 -107 Returns -108 ------- -109 dict_scan_rt_tic : dict -110 a dictionary with scan number as key and rt and tic as values -111 """ -112 -113 from bs4 import BeautifulSoup -114 -115 try: -116 soup = BeautifulSoup(self.scan_attr.open(),'xml') -117 except: -118 raise FileNotFoundError("Dataset does not appear to contain a 'scan.xml' file or it is misformated") -119 -120 list_rt = [float(rt.text) for rt in soup.find_all('minutes')] -121 list_tic = [float(tic.text) for tic in soup.find_all('tic')] -122 list_scan = [int(scan.text) for scan in soup.find_all('count')] -123 -124 dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic))) -125 -126 return dict_scan_rt_tic -127 -128 -129 def get_transient(self, scan_number=1): -130 """ Function to get the transient data and parameters from a Bruker Solarix .d folder. -131 -132 Parameters -133 ---------- -134 scan_number : int -135 the scan number to be read. Default is 1. -136 -137 Returns -138 ------- -139 Transient -140 a transient object -141 """ + 94 def get_scan_attr(self): + 95 """Function to get the scan retention times, TIC values and scan indices. + 96 + 97 Gets information from scan.xml file in the bruker .d folder. + 98 Note this file is only present in some .d format - e.g. for imaging mode data, it is not present. + 99 +100 Returns +101 ------- +102 dict_scan_rt_tic : dict +103 a dictionary with scan number as key and rt and tic as values +104 """ +105 +106 from bs4 import BeautifulSoup +107 +108 try: +109 soup = BeautifulSoup(self.scan_attr.open(), "xml") +110 except: +111 raise FileNotFoundError( +112 "Dataset does not appear to contain a 'scan.xml' file or it is misformated" +113 ) +114 +115 list_rt = [float(rt.text) for rt in soup.find_all("minutes")] +116 list_tic = [float(tic.text) for tic in soup.find_all("tic")] +117 list_scan = [int(scan.text) for scan in soup.find_all("count")] +118 +119 dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic))) +120 +121 return dict_scan_rt_tic +122 +123 def get_transient(self, scan_number=1): +124 """Function to get the transient data and parameters from a Bruker Solarix .d folder. +125 +126 Parameters +127 ---------- +128 scan_number : int +129 the scan number to be read. Default is 1. +130 +131 Returns +132 ------- +133 Transient +134 a transient object +135 """ +136 +137 file_d_params = self.parse_parameters(self.parameter_filename_location) +138 +139 self.fix_freq_limits(file_d_params) +140 +141 from sys import platform 142 -143 file_d_params = self.parse_parameters(self.parameter_filename_location) -144 -145 self.fix_freq_limits(file_d_params) -146 -147 from sys import platform -148 -149 if platform == "win32": -150 # Windows... -151 dt = dtype("l") -152 else: -153 dt = dtype("i") -154 -155 # get rt, scan, and tic from scan.xml file, otherwise using 0 defaults values -156 -157 output_parameters = deepcopy(default_parameters(self.d_directory_location)) +143 if platform == "win32": +144 # Windows... +145 dt = dtype("l") +146 else: +147 dt = dtype("i") +148 +149 # get rt, scan, and tic from scan.xml file, otherwise using 0 defaults values +150 +151 output_parameters = deepcopy(default_parameters(self.d_directory_location)) +152 +153 if self.transient_data_path.name == "ser": +154 if self.scan_attr.exists(): +155 dict_scan_rt_tic = self.get_scan_attr() +156 +157 output_parameters["scan_number"] = scan_number 158 -159 if self.transient_data_path.name == 'ser': -160 -161 if self.scan_attr.exists(): -162 -163 dict_scan_rt_tic = self.get_scan_attr() +159 output_parameters["rt"] = dict_scan_rt_tic.get(scan_number)[0] +160 +161 output_parameters["tic"] = dict_scan_rt_tic.get(scan_number)[1] +162 +163 output_parameters["analyzer"] = "ICR" 164 -165 output_parameters["scan_number"] = scan_number +165 output_parameters["label"] = "Bruker_Frequency" 166 -167 output_parameters["rt"] = dict_scan_rt_tic.get(scan_number)[0] +167 output_parameters["Aterm"] = float(file_d_params.get("ML1")) 168 -169 output_parameters["tic"] = dict_scan_rt_tic.get(scan_number)[1] -170 -171 output_parameters["analyzer"] = "ICR" +169 output_parameters["Bterm"] = float(file_d_params.get("ML2")) +170 +171 output_parameters["Cterm"] = float(file_d_params.get("ML3")) 172 -173 output_parameters["label"] = "Bruker_Frequency" +173 output_parameters["exc_high_freq"] = float(file_d_params.get("EXC_Freq_High")) 174 -175 output_parameters["Aterm"] = float(file_d_params.get("ML1")) -176 -177 output_parameters["Bterm"] = float(file_d_params.get("ML2")) -178 -179 output_parameters["Cterm"] = float(file_d_params.get("ML3")) +175 output_parameters["exc_low_freq"] = float(file_d_params.get("EXC_Freq_Low")) +176 try: +177 output_parameters["qpd_enabled"] = float(file_d_params.get("QPD_Enabled")) +178 except TypeError: # for older datasets which dont have this variable +179 output_parameters["qpd_enabled"] = 0 180 -181 output_parameters["exc_high_freq"] = float(file_d_params.get("EXC_Freq_High")) +181 output_parameters["mw_low"] = float(file_d_params.get("MW_low")) 182 -183 output_parameters["exc_low_freq"] = float(file_d_params.get("EXC_Freq_Low")) -184 try: -185 output_parameters["qpd_enabled"] = float(file_d_params.get("QPD_Enabled")) -186 except TypeError: # for older datasets which dont have this variable -187 output_parameters["qpd_enabled"] = 0 +183 output_parameters["mw_high"] = float(file_d_params.get("MW_high")) +184 +185 output_parameters["bandwidth"] = float(file_d_params.get("SW_h")) +186 +187 output_parameters["number_data_points"] = int(file_d_params.get("TD")) 188 -189 output_parameters["mw_low"] = float(file_d_params.get("MW_low")) +189 output_parameters["polarity"] = str(file_d_params.get("Polarity")) 190 -191 output_parameters["mw_high"] = float(file_d_params.get("MW_high")) +191 output_parameters["acquisition_time"] = file_d_params.get("acquisition_time") 192 -193 output_parameters["bandwidth"] = float(file_d_params.get("SW_h")) +193 data_points = int(file_d_params.get("TD")) 194 -195 output_parameters["number_data_points"] = int(file_d_params.get("TD")) -196 -197 output_parameters["polarity"] = str(file_d_params.get("Polarity")) -198 -199 output_parameters["acquisition_time"] = file_d_params.get("acquisition_time") -200 -201 data_points = int(file_d_params.get("TD")) -202 -203 scan = output_parameters["scan_number"] -204 from io import BytesIO -205 if self.transient_data_path.name == 'ser': -206 -207 if isinstance(self.transient_data_path, S3Path): -208 databin = BytesIO(self.transient_data_path.open('rb').read()) -209 -210 else: -211 databin = self.transient_data_path.open('rb') -212 -213 databin.seek((scan-1)*4*data_points) -214 #read scan data and parse to 32int struct -215 data = frombuffer(databin.read(4*data_points), dtype=dt) -216 -217 else: -218 -219 if isinstance(self.transient_data_path, S3Path): -220 data = frombuffer(self.transient_data_path.open('rb').read(), dtype=dt) -221 else: -222 data = fromfile(self.transient_data_path, dtype=dt) -223 -224 return Transient(data, output_parameters) -225 -226 # for key, values in default_parameters.items(): -227 # print(key, values) -228 def fix_freq_limits(self, d_parameters): -229 """ Function to read and set the correct frequency limits for the spectrum -230 -231 Notes -232 -------- -233 This is using the excitation limits from the apexAcquisition.method file, -234 which may not match the intended detection limits in edge cases. -235 In default acquisitions, excitation and detection are the same. -236 But, they may not be in some cases with selective excitation, custom excite waveforms, or in 2DMS applications. -237 -238 Parameters -239 ---------- -240 d_parameters : dict -241 a dictionary with the parameters from the apexAcquisition.method file -242 """ -243 -244 highfreq = float(d_parameters.get("EXC_Freq_High")) -245 -246 lowfreq = float(d_parameters.get("EXC_Freq_Low")) -247 -248 # CR for compatibility with Apex format as there is no EXciteSweep file -249 if not highfreq and lowfreq: -250 -251 excitation_sweep_filelocation = self.locate_file( -252 self.d_directory_location, "ExciteSweep" -253 ) -254 lowfreq, highfreq = self.get_excite_sweep_range( -255 excitation_sweep_filelocation -256 ) -257 d_parameters["EXC_Freq_High"] = highfreq -258 d_parameters["EXC_Freq_Low"] = lowfreq -259 -260 @staticmethod -261 def get_excite_sweep_range(filename): -262 """ Function to determine excitation sweep range from ExciteSweep file -263 -264 This looks at the first and last rows of the ExciteSweep file to determine the excitation frequency range. -265 Note that this assumes the excitation sweep was linear and the first and last rows are the lowest and highest frequencies. -266 This is presumably always true, but again may be incorrect for edge cases with custom excitation waveforms. -267 -268 Parameters -269 ---------- -270 filename : str -271 the full path to the ExciteSweep file -272 -273 """ -274 ExciteSweep_lines = genfromtxt(filename, comments="*", delimiter="\n") -275 # CR ready if we need the full array -276 highfreq = fromstring(ExciteSweep_lines[0]) -277 lowfreq = fromstring(ExciteSweep_lines[-1]) -278 -279 return lowfreq[0], highfreq[0] -280 -281 @staticmethod -282 def locate_file(folder, type_file_name='apexAcquisition.method'): -283 """ Function to locate a file in a folder +195 scan = output_parameters["scan_number"] +196 from io import BytesIO +197 +198 if self.transient_data_path.name == "ser": +199 if isinstance(self.transient_data_path, S3Path): +200 databin = BytesIO(self.transient_data_path.open("rb").read()) +201 +202 else: +203 databin = self.transient_data_path.open("rb") +204 +205 databin.seek((scan - 1) * 4 * data_points) +206 # read scan data and parse to 32int struct +207 data = frombuffer(databin.read(4 * data_points), dtype=dt) +208 +209 else: +210 if isinstance(self.transient_data_path, S3Path): +211 data = frombuffer(self.transient_data_path.open("rb").read(), dtype=dt) +212 else: +213 data = fromfile(self.transient_data_path, dtype=dt) +214 +215 return Transient(data, output_parameters) +216 +217 # for key, values in default_parameters.items(): +218 # print(key, values) +219 def fix_freq_limits(self, d_parameters): +220 """Function to read and set the correct frequency limits for the spectrum +221 +222 Notes +223 -------- +224 This is using the excitation limits from the apexAcquisition.method file, +225 which may not match the intended detection limits in edge cases. +226 In default acquisitions, excitation and detection are the same. +227 But, they may not be in some cases with selective excitation, custom excite waveforms, or in 2DMS applications. +228 +229 Parameters +230 ---------- +231 d_parameters : dict +232 a dictionary with the parameters from the apexAcquisition.method file +233 """ +234 +235 highfreq = float(d_parameters.get("EXC_Freq_High")) +236 +237 lowfreq = float(d_parameters.get("EXC_Freq_Low")) +238 +239 # CR for compatibility with Apex format as there is no EXciteSweep file +240 if not highfreq and lowfreq: +241 excitation_sweep_filelocation = self.locate_file( +242 self.d_directory_location, "ExciteSweep" +243 ) +244 lowfreq, highfreq = self.get_excite_sweep_range( +245 excitation_sweep_filelocation +246 ) +247 d_parameters["EXC_Freq_High"] = highfreq +248 d_parameters["EXC_Freq_Low"] = lowfreq +249 +250 @staticmethod +251 def get_excite_sweep_range(filename): +252 """Function to determine excitation sweep range from ExciteSweep file +253 +254 This looks at the first and last rows of the ExciteSweep file to determine the excitation frequency range. +255 Note that this assumes the excitation sweep was linear and the first and last rows are the lowest and highest frequencies. +256 This is presumably always true, but again may be incorrect for edge cases with custom excitation waveforms. +257 +258 Parameters +259 ---------- +260 filename : str +261 the full path to the ExciteSweep file +262 +263 """ +264 ExciteSweep_lines = genfromtxt(filename, comments="*", delimiter="\n") +265 # CR ready if we need the full array +266 highfreq = fromstring(ExciteSweep_lines[0]) +267 lowfreq = fromstring(ExciteSweep_lines[-1]) +268 +269 return lowfreq[0], highfreq[0] +270 +271 @staticmethod +272 def locate_file(folder, type_file_name="apexAcquisition.method"): +273 """Function to locate a file in a folder +274 +275 Find the full path of a specific file within the acquisition .d folder or subfolders +276 +277 Parameters +278 ---------- +279 folder : str +280 the full path to the folder +281 type_file_name : str +282 the name of the file to be located +283 Expected options: ExciteSweep or apexAcquisition.method 284 -285 Find the full path of a specific file within the acquisition .d folder or subfolders -286 -287 Parameters -288 ---------- -289 folder : str -290 the full path to the folder -291 type_file_name : str -292 the name of the file to be located -293 Expected options: ExciteSweep or apexAcquisition.method -294 -295 Returns -296 ------- -297 str -298 the full path to the file -299 -300 Notes -301 ----- -302 adapted from code from SPIKE library, https://github.com/spike-project/spike -303 -304 """ -305 -306 from pathlib import Path -307 -308 #directory_location = folder.glob( '**/*apexAcquisition.method') -309 directory_location = folder.glob( '**/*' + type_file_name) -310 result = list(directory_location) -311 if len(result) > 1: +285 Returns +286 ------- +287 str +288 the full path to the file +289 +290 Notes +291 ----- +292 adapted from code from SPIKE library, https://github.com/spike-project/spike +293 +294 """ +295 +296 from pathlib import Path +297 +298 # directory_location = folder.glob( '**/*apexAcquisition.method') +299 directory_location = folder.glob("**/*" + type_file_name) +300 result = list(directory_location) +301 if len(result) > 1: +302 raise Exception( +303 "You have more than 1 %s file in the %s folder, using the first one" +304 % (type_file_name, folder) +305 ) +306 +307 elif len(result) == 0: +308 raise Exception( +309 "You don't have any %s file in the %s folder, please double check the path" +310 % (type_file_name, folder) +311 ) 312 -313 raise Exception( -314 "You have more than 1 %s file in the %s folder, using the first one" -315 % (type_file_name, folder) -316 ) -317 -318 elif len(result) == 0: -319 -320 raise Exception( -321 "You don't have any %s file in the %s folder, please double check the path" -322 % (type_file_name, folder) -323 ) -324 -325 return result[0] -326 -327 @staticmethod -328 def parse_parameters(parameters_filename): -329 """ Function to parse the parameters from apexAcquisition.method file -330 -331 Open the given file and retrieve all parameters from apexAcquisition.method -332 None is written when no value for value is found -333 -334 structure : <param name = "AMS_ActiveExclusion"><value>0</value></param> -335 -336 Parameters -337 ---------- -338 parameters_filename : str -339 the full path to the apexAcquisition.method file -340 -341 Returns -342 ------- -343 dict -344 a dictionary with the parameters and values -345 -346 Notes -347 ----- -348 Adapted from code from SPIKE library, https://github.com/spike-project/spike. -349 Code may not handle all possible parameters, but should be sufficient for most common use cases -350 """ -351 -352 #TODO: change to beautiful soup xml parsing -353 -354 -355 xmldoc = minidom.parse(parameters_filename.open()) -356 -357 x = xmldoc.documentElement -358 parameter_dict = {} -359 children = x.childNodes -360 for child in children: -361 # print( child.node) -362 if child.nodeName == 'methodmetadata': +313 return result[0] +314 +315 @staticmethod +316 def parse_parameters(parameters_filename): +317 """Function to parse the parameters from apexAcquisition.method file +318 +319 Open the given file and retrieve all parameters from apexAcquisition.method +320 None is written when no value for value is found +321 +322 structure : <param name = "AMS_ActiveExclusion"><value>0</value></param> +323 +324 Parameters +325 ---------- +326 parameters_filename : str +327 the full path to the apexAcquisition.method file +328 +329 Returns +330 ------- +331 dict +332 a dictionary with the parameters and values +333 +334 Notes +335 ----- +336 Adapted from code from SPIKE library, https://github.com/spike-project/spike. +337 Code may not handle all possible parameters, but should be sufficient for most common use cases +338 """ +339 +340 # TODO: change to beautiful soup xml parsing +341 +342 xmldoc = minidom.parse(parameters_filename.open()) +343 +344 x = xmldoc.documentElement +345 parameter_dict = {} +346 children = x.childNodes +347 for child in children: +348 # print( child.node) +349 if child.nodeName == "methodmetadata": +350 sections = child.childNodes +351 for section in sections: +352 for element in section.childNodes: +353 if element.nodeName == "date": +354 # if element.nodeName == "primarykey": +355 +356 date_time_str = element.childNodes[0].nodeValue +357 # parameter_dict["acquisition_time"] = pd.to_datetime(date_time_str, infer_datetime_format=True).to_pydatetime() +358 parameter_dict["acquisition_time"] = datetime.strptime( +359 date_time_str, "%b_%d_%Y %H:%M:%S.%f" +360 ) +361 +362 if child.nodeName == "reportinfo": 363 sections = child.childNodes 364 for section in sections: -365 for element in section.childNodes: -366 if element.nodeName == "date": -367 #if element.nodeName == "primarykey": -368 -369 date_time_str = (element.childNodes[0].nodeValue) -370 #parameter_dict["acquisition_time"] = pd.to_datetime(date_time_str, infer_datetime_format=True).to_pydatetime() -371 parameter_dict["acquisition_time"] = datetime.strptime(date_time_str, "%b_%d_%Y %H:%M:%S.%f") -372 -373 -374 if child.nodeName == "reportinfo": -375 sections = child.childNodes -376 for section in sections: -377 if section.nodeName == "section": -378 if section.getAttribute("title") == "Main": -379 for element in section.childNodes: -380 if element.nodeName == "section": -381 if element.getAttribute("title") == "Polarity": -382 if ( -383 str( -384 element.childNodes[1].getAttribute( -385 "value" -386 ) -387 ) -388 == "Negative" -389 ): -390 parameter_dict["Polarity"] = -1 -391 else: -392 parameter_dict["Polarity"] = 1 -393 -394 if child.nodeName == "paramlist": -395 params = child.childNodes -396 for param in params: -397 # print( param.nodeName) -398 if param.nodeName == "param": -399 paramenter_label = str(param.getAttribute("name")) -400 for element in param.childNodes: -401 if element.nodeName == "value": -402 try: -403 parameter_value = str(element.firstChild.toxml()) -404 # print v -405 except: -406 parameter_value = None +365 if section.nodeName == "section": +366 if section.getAttribute("title") == "Main": +367 for element in section.childNodes: +368 if element.nodeName == "section": +369 if element.getAttribute("title") == "Polarity": +370 if ( +371 str( +372 element.childNodes[1].getAttribute( +373 "value" +374 ) +375 ) +376 == "Negative" +377 ): +378 parameter_dict["Polarity"] = -1 +379 else: +380 parameter_dict["Polarity"] = 1 +381 +382 if child.nodeName == "paramlist": +383 params = child.childNodes +384 for param in params: +385 # print( param.nodeName) +386 if param.nodeName == "param": +387 paramenter_label = str(param.getAttribute("name")) +388 for element in param.childNodes: +389 if element.nodeName == "value": +390 try: +391 parameter_value = str(element.firstChild.toxml()) +392 # print v +393 except: +394 parameter_value = None +395 +396 parameter_dict[paramenter_label] = parameter_value +397 +398 return parameter_dict +399 +400 def parse_sqlite(self, sqlite_filename="chromatography-data.sqlite"): +401 """ """ +402 import sqlite3 +403 +404 def read_sqlite_file(file_path, table_name): +405 """ +406 Read data from a SQLite database file and return it as a list of tuples 407 -408 parameter_dict[paramenter_label] = parameter_value -409 -410 return parameter_dict -411 -412 -413 def parse_sqlite(self, sqlite_filename="chromatography-data.sqlite"): -414 """ -415 -416 """ -417 import sqlite3 -418 -419 def read_sqlite_file(file_path, table_name): -420 """ -421 Read data from a SQLite database file and return it as a list of tuples -422 -423 Parameters -424 ---------- -425 file_path : str -426 the full path to the SQLite database file -427 table_name : str -428 the name of the table to be read -429 -430 Returns -431 ------- -432 list -433 a list of tuples with the data from the table -434 """ -435 # Connect to the SQLite database file -436 conn = sqlite3.connect(file_path) -437 cursor = conn.cursor() -438 -439 # Execute a query to select data from a table (replace 'table_name' with your table's name) -440 query = f"SELECT * FROM {table_name}" -441 cursor.execute(query) -442 -443 # Fetch all rows from the result set -444 rows = cursor.fetchall() -445 stream = [] -446 # Print or process the fetched rows -447 for row in rows: -448 stream.append(row) -449 # print(row) # Print each row, you can also process it differently +408 Parameters +409 ---------- +410 file_path : str +411 the full path to the SQLite database file +412 table_name : str +413 the name of the table to be read +414 +415 Returns +416 ------- +417 list +418 a list of tuples with the data from the table +419 """ +420 # Connect to the SQLite database file +421 conn = sqlite3.connect(file_path) +422 cursor = conn.cursor() +423 +424 # Execute a query to select data from a table (replace 'table_name' with your table's name) +425 query = f"SELECT * FROM {table_name}" +426 cursor.execute(query) +427 +428 # Fetch all rows from the result set +429 rows = cursor.fetchall() +430 stream = [] +431 # Print or process the fetched rows +432 for row in rows: +433 stream.append(row) +434 # print(row) # Print each row, you can also process it differently +435 +436 # Close the cursor and the connection +437 cursor.close() +438 conn.close() +439 return stream +440 +441 def parse_binary(binary, type): +442 """ +443 Parse binary data from the sqlite data streams +444 """ +445 if type == "double": +446 data = frombuffer(binary, dtype=float64) +447 elif type == "float": +448 data = frombuffer(binary, dtype=float32) +449 return data 450 -451 # Close the cursor and the connection -452 cursor.close() -453 conn.close() -454 return stream -455 -456 def parse_binary(binary, type): -457 """ -458 Parse binary data from the sqlite data streams -459 """ -460 if type == "double": -461 data = frombuffer(binary, dtype=float64) -462 elif type == "float": -463 data = frombuffer(binary, dtype=float32) -464 return data -465 -466 sqlite_filelocation = self.locate_file( -467 self.d_directory_location, sqlite_filename -468 ) -469 table_name = "TraceSources" -470 trace_sources = read_sqlite_file(sqlite_filelocation, table_name) -471 table_name = "TraceChunks" -472 trace_chunks = read_sqlite_file(sqlite_filelocation, table_name) -473 times = [] -474 values = [] -475 trace_type = {} -476 -477 -478 for index, source in enumerate(trace_sources): -479 trace_id = source[0] -480 trace_type[source[1]] = {"times": [], "values": []} -481 for index, chunk in enumerate(trace_chunks): -482 id = chunk[0] -483 times = parse_binary(chunk[1], "double") -484 values = parse_binary(chunk[2], "float") -485 for time, value in zip(times, values): -486 if source[0] == id: -487 trace_type[source[1]]["times"].append(time) -488 trace_type[source[1]]["values"].append(value) -489 -490 return trace_type +451 sqlite_filelocation = self.locate_file( +452 self.d_directory_location, sqlite_filename +453 ) +454 table_name = "TraceSources" +455 trace_sources = read_sqlite_file(sqlite_filelocation, table_name) +456 table_name = "TraceChunks" +457 trace_chunks = read_sqlite_file(sqlite_filelocation, table_name) +458 times = [] +459 values = [] +460 trace_type = {} +461 +462 for index, source in enumerate(trace_sources): +463 trace_id = source[0] +464 trace_type[source[1]] = {"times": [], "values": []} +465 for index, chunk in enumerate(trace_chunks): +466 id = chunk[0] +467 times = parse_binary(chunk[1], "double") +468 values = parse_binary(chunk[2], "float") +469 for time, value in zip(times, values): +470 if source[0] == id: +471 trace_type[source[1]]["times"].append(time) +472 trace_type[source[1]]["values"].append(value) +473 +474 return trace_type

    @@ -1097,7 +1065,7 @@
    Methods
    • get_transient(). -Read the data and settings returning a Transient class
    • +Read the data and settings returning a Transient class
    • get_scan_attr(). Read the scan retention times, TIC values and scan indices.
    • locate_file(folder, type_file_name). @@ -1122,43 +1090,38 @@
      Methods
    -
    63    def __init__(self, d_directory_location):
    -64        
    -65        if isinstance(d_directory_location, str):
    -66            d_directory_location = Path(d_directory_location)
    -67        
    -68        if not d_directory_location.exists():
    -69            raise FileNotFoundError("File does not exist: " + str(d_directory_location))
    -70
    -71        self.d_directory_location = d_directory_location
    -72        
    -73        self.file_location = d_directory_location
    -74        
    -75        try:
    -76
    -77            self.parameter_filename_location = self.locate_file(
    -78                d_directory_location, "apexAcquisition.method"
    -79            )
    -80            self.transient_data_path = d_directory_location / "fid"
    -81            
    -82            if not self.transient_data_path.exists():
    +            
    61    def __init__(self, d_directory_location):
    +62        if isinstance(d_directory_location, str):
    +63            d_directory_location = Path(d_directory_location)
    +64
    +65        if not d_directory_location.exists():
    +66            raise FileNotFoundError("File does not exist: " + str(d_directory_location))
    +67
    +68        self.d_directory_location = d_directory_location
    +69
    +70        self.file_location = d_directory_location
    +71
    +72        try:
    +73            self.parameter_filename_location = self.locate_file(
    +74                d_directory_location, "apexAcquisition.method"
    +75            )
    +76            self.transient_data_path = d_directory_location / "fid"
    +77
    +78            if not self.transient_data_path.exists():
    +79                self.transient_data_path = d_directory_location / "ser"
    +80
    +81                if not self.transient_data_path.exists():
    +82                    raise FileNotFoundError("Could not locate transient data")
     83
    -84                self.transient_data_path = d_directory_location / "ser"
    -85
    -86                if not self.transient_data_path.exists():
    -87                    
    -88                    raise FileNotFoundError("Could not locate transient data")
    -89
    -90                else:
    -91                    # get scan attributes
    -92                    self.scan_attr = d_directory_location / "scan.xml"
    -93
    -94        except:
    -95            
    -96            raise FileExistsError(
    -97                "%s does not seem to be a valid Solarix Mass Spectrum"
    -98                % (d_directory_location)
    -99            )
    +84                else:
    +85                    # get scan attributes
    +86                    self.scan_attr = d_directory_location / "scan.xml"
    +87
    +88        except:
    +89            raise FileExistsError(
    +90                "%s does not seem to be a valid Solarix Mass Spectrum"
    +91                % (d_directory_location)
    +92            )
     
    @@ -1198,36 +1161,38 @@
    Methods
    -
    101    def get_scan_attr(self):
    -102        """ Function to get the scan retention times, TIC values and scan indices. 
    -103
    -104        Gets information from scan.xml file in the bruker .d folder.
    -105        Note this file is only present in some .d format - e.g. for imaging mode data, it is not present.
    -106        
    -107        Returns
    -108        -------
    -109        dict_scan_rt_tic : dict
    -110            a dictionary with scan number as key and rt and tic as values
    -111        """
    -112    
    -113        from bs4 import BeautifulSoup
    -114        
    -115        try: 
    -116            soup = BeautifulSoup(self.scan_attr.open(),'xml')
    -117        except:
    -118            raise FileNotFoundError("Dataset does not appear to contain a 'scan.xml' file or it is misformated")
    -119
    -120        list_rt = [float(rt.text) for rt in soup.find_all('minutes')]
    -121        list_tic = [float(tic.text) for tic in soup.find_all('tic')]
    -122        list_scan = [int(scan.text) for scan in soup.find_all('count')]
    -123
    -124        dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic)))
    -125        
    -126        return dict_scan_rt_tic
    +            
     94    def get_scan_attr(self):
    + 95        """Function to get the scan retention times, TIC values and scan indices.
    + 96
    + 97        Gets information from scan.xml file in the bruker .d folder.
    + 98        Note this file is only present in some .d format - e.g. for imaging mode data, it is not present.
    + 99
    +100        Returns
    +101        -------
    +102        dict_scan_rt_tic : dict
    +103            a dictionary with scan number as key and rt and tic as values
    +104        """
    +105
    +106        from bs4 import BeautifulSoup
    +107
    +108        try:
    +109            soup = BeautifulSoup(self.scan_attr.open(), "xml")
    +110        except:
    +111            raise FileNotFoundError(
    +112                "Dataset does not appear to contain a 'scan.xml' file or it is misformated"
    +113            )
    +114
    +115        list_rt = [float(rt.text) for rt in soup.find_all("minutes")]
    +116        list_tic = [float(tic.text) for tic in soup.find_all("tic")]
    +117        list_scan = [int(scan.text) for scan in soup.find_all("count")]
    +118
    +119        dict_scan_rt_tic = dict(zip(list_scan, zip(list_rt, list_tic)))
    +120
    +121        return dict_scan_rt_tic
     
    -

    Function to get the scan retention times, TIC values and scan indices.

    +

    Function to get the scan retention times, TIC values and scan indices.

    Gets information from scan.xml file in the bruker .d folder. Note this file is only present in some .d format - e.g. for imaging mode data, it is not present.

    @@ -1253,102 +1218,99 @@
    Returns
    -
    129    def get_transient(self, scan_number=1):
    -130        """ Function to get the transient data and parameters from a Bruker Solarix .d folder.
    -131        
    -132        Parameters
    -133        ----------
    -134        scan_number : int
    -135            the scan number to be read. Default is 1.
    -136        
    -137        Returns
    -138        -------
    -139        Transient
    -140            a transient object
    -141        """
    +            
    123    def get_transient(self, scan_number=1):
    +124        """Function to get the transient data and parameters from a Bruker Solarix .d folder.
    +125
    +126        Parameters
    +127        ----------
    +128        scan_number : int
    +129            the scan number to be read. Default is 1.
    +130
    +131        Returns
    +132        -------
    +133        Transient
    +134            a transient object
    +135        """
    +136
    +137        file_d_params = self.parse_parameters(self.parameter_filename_location)
    +138
    +139        self.fix_freq_limits(file_d_params)
    +140
    +141        from sys import platform
     142
    -143        file_d_params = self.parse_parameters(self.parameter_filename_location)
    -144
    -145        self.fix_freq_limits(file_d_params)
    -146
    -147        from sys import platform
    -148        
    -149        if platform == "win32":
    -150            # Windows...
    -151            dt = dtype("l")
    -152        else:
    -153            dt = dtype("i")
    -154
    -155        # get rt, scan, and tic from scan.xml file, otherwise  using 0 defaults values 
    -156        
    -157        output_parameters = deepcopy(default_parameters(self.d_directory_location))
    +143        if platform == "win32":
    +144            # Windows...
    +145            dt = dtype("l")
    +146        else:
    +147            dt = dtype("i")
    +148
    +149        # get rt, scan, and tic from scan.xml file, otherwise  using 0 defaults values
    +150
    +151        output_parameters = deepcopy(default_parameters(self.d_directory_location))
    +152
    +153        if self.transient_data_path.name == "ser":
    +154            if self.scan_attr.exists():
    +155                dict_scan_rt_tic = self.get_scan_attr()
    +156
    +157                output_parameters["scan_number"] = scan_number
     158
    -159        if self.transient_data_path.name == 'ser':
    -160            
    -161            if self.scan_attr.exists():
    -162                
    -163                dict_scan_rt_tic = self.get_scan_attr()
    +159                output_parameters["rt"] = dict_scan_rt_tic.get(scan_number)[0]
    +160
    +161                output_parameters["tic"] = dict_scan_rt_tic.get(scan_number)[1]
    +162
    +163        output_parameters["analyzer"] = "ICR"
     164
    -165                output_parameters["scan_number"] = scan_number
    +165        output_parameters["label"] = "Bruker_Frequency"
     166
    -167                output_parameters["rt"] = dict_scan_rt_tic.get(scan_number)[0]
    +167        output_parameters["Aterm"] = float(file_d_params.get("ML1"))
     168
    -169                output_parameters["tic"] = dict_scan_rt_tic.get(scan_number)[1]
    -170        
    -171        output_parameters["analyzer"] = "ICR"
    +169        output_parameters["Bterm"] = float(file_d_params.get("ML2"))
    +170
    +171        output_parameters["Cterm"] = float(file_d_params.get("ML3"))
     172
    -173        output_parameters["label"] = "Bruker_Frequency"
    +173        output_parameters["exc_high_freq"] = float(file_d_params.get("EXC_Freq_High"))
     174
    -175        output_parameters["Aterm"] = float(file_d_params.get("ML1"))
    -176
    -177        output_parameters["Bterm"] = float(file_d_params.get("ML2"))
    -178
    -179        output_parameters["Cterm"] = float(file_d_params.get("ML3"))
    +175        output_parameters["exc_low_freq"] = float(file_d_params.get("EXC_Freq_Low"))
    +176        try:
    +177            output_parameters["qpd_enabled"] = float(file_d_params.get("QPD_Enabled"))
    +178        except TypeError:  # for older datasets which dont have this variable
    +179            output_parameters["qpd_enabled"] = 0
     180
    -181        output_parameters["exc_high_freq"] = float(file_d_params.get("EXC_Freq_High"))
    +181        output_parameters["mw_low"] = float(file_d_params.get("MW_low"))
     182
    -183        output_parameters["exc_low_freq"] = float(file_d_params.get("EXC_Freq_Low"))
    -184        try:
    -185            output_parameters["qpd_enabled"] = float(file_d_params.get("QPD_Enabled"))
    -186        except TypeError: # for older datasets which dont have this variable
    -187            output_parameters["qpd_enabled"] = 0
    +183        output_parameters["mw_high"] = float(file_d_params.get("MW_high"))
    +184
    +185        output_parameters["bandwidth"] = float(file_d_params.get("SW_h"))
    +186
    +187        output_parameters["number_data_points"] = int(file_d_params.get("TD"))
     188
    -189        output_parameters["mw_low"] = float(file_d_params.get("MW_low"))
    +189        output_parameters["polarity"] = str(file_d_params.get("Polarity"))
     190
    -191        output_parameters["mw_high"] = float(file_d_params.get("MW_high"))
    +191        output_parameters["acquisition_time"] = file_d_params.get("acquisition_time")
     192
    -193        output_parameters["bandwidth"] = float(file_d_params.get("SW_h"))
    +193        data_points = int(file_d_params.get("TD"))
     194
    -195        output_parameters["number_data_points"] = int(file_d_params.get("TD"))
    -196
    -197        output_parameters["polarity"] = str(file_d_params.get("Polarity"))
    -198
    -199        output_parameters["acquisition_time"] = file_d_params.get("acquisition_time")
    -200
    -201        data_points = int(file_d_params.get("TD"))
    -202
    -203        scan = output_parameters["scan_number"]
    -204        from io import BytesIO
    -205        if self.transient_data_path.name == 'ser':
    -206            
    -207            if isinstance(self.transient_data_path, S3Path):
    -208                databin = BytesIO(self.transient_data_path.open('rb').read())
    -209            
    -210            else:
    -211                databin = self.transient_data_path.open('rb')
    -212               
    -213            databin.seek((scan-1)*4*data_points)
    -214            #read scan data and parse to 32int struct
    -215            data = frombuffer(databin.read(4*data_points), dtype=dt)
    -216        
    -217        else:
    -218            
    -219            if isinstance(self.transient_data_path, S3Path):
    -220                data = frombuffer(self.transient_data_path.open('rb').read(), dtype=dt)
    -221            else:
    -222                data = fromfile(self.transient_data_path, dtype=dt)
    -223        
    -224        return Transient(data, output_parameters)
    +195        scan = output_parameters["scan_number"]
    +196        from io import BytesIO
    +197
    +198        if self.transient_data_path.name == "ser":
    +199            if isinstance(self.transient_data_path, S3Path):
    +200                databin = BytesIO(self.transient_data_path.open("rb").read())
    +201
    +202            else:
    +203                databin = self.transient_data_path.open("rb")
    +204
    +205            databin.seek((scan - 1) * 4 * data_points)
    +206            # read scan data and parse to 32int struct
    +207            data = frombuffer(databin.read(4 * data_points), dtype=dt)
    +208
    +209        else:
    +210            if isinstance(self.transient_data_path, S3Path):
    +211                data = frombuffer(self.transient_data_path.open("rb").read(), dtype=dt)
    +212            else:
    +213                data = fromfile(self.transient_data_path, dtype=dt)
    +214
    +215        return Transient(data, output_parameters)
     
    @@ -1381,37 +1343,36 @@
    Returns
    -
    228    def fix_freq_limits(self, d_parameters):
    -229        """ Function to read and set the correct frequency limits for the spectrum
    -230        
    -231        Notes
    -232        --------
    -233        This is using the excitation limits from the apexAcquisition.method file,
    -234        which may not match the intended detection limits in edge cases. 
    -235        In default acquisitions, excitation and detection are the same. 
    -236        But, they may not be in some cases with selective excitation, custom excite waveforms, or in 2DMS applications.
    -237        
    -238        Parameters
    -239        ----------
    -240        d_parameters : dict
    -241            a dictionary with the parameters from the apexAcquisition.method file
    -242        """
    -243
    -244        highfreq = float(d_parameters.get("EXC_Freq_High"))
    -245
    -246        lowfreq = float(d_parameters.get("EXC_Freq_Low"))
    -247
    -248        # CR for compatibility with Apex format as there is no EXciteSweep file
    -249        if not highfreq and lowfreq:
    -250
    -251            excitation_sweep_filelocation = self.locate_file(
    -252                self.d_directory_location, "ExciteSweep"
    -253            )
    -254            lowfreq, highfreq = self.get_excite_sweep_range(
    -255                excitation_sweep_filelocation
    -256            )
    -257            d_parameters["EXC_Freq_High"] = highfreq
    -258            d_parameters["EXC_Freq_Low"] = lowfreq
    +            
    219    def fix_freq_limits(self, d_parameters):
    +220        """Function to read and set the correct frequency limits for the spectrum
    +221
    +222        Notes
    +223        --------
    +224        This is using the excitation limits from the apexAcquisition.method file,
    +225        which may not match the intended detection limits in edge cases.
    +226        In default acquisitions, excitation and detection are the same.
    +227        But, they may not be in some cases with selective excitation, custom excite waveforms, or in 2DMS applications.
    +228
    +229        Parameters
    +230        ----------
    +231        d_parameters : dict
    +232            a dictionary with the parameters from the apexAcquisition.method file
    +233        """
    +234
    +235        highfreq = float(d_parameters.get("EXC_Freq_High"))
    +236
    +237        lowfreq = float(d_parameters.get("EXC_Freq_Low"))
    +238
    +239        # CR for compatibility with Apex format as there is no EXciteSweep file
    +240        if not highfreq and lowfreq:
    +241            excitation_sweep_filelocation = self.locate_file(
    +242                self.d_directory_location, "ExciteSweep"
    +243            )
    +244            lowfreq, highfreq = self.get_excite_sweep_range(
    +245                excitation_sweep_filelocation
    +246            )
    +247            d_parameters["EXC_Freq_High"] = highfreq
    +248            d_parameters["EXC_Freq_Low"] = lowfreq
     
    @@ -1420,8 +1381,8 @@
    Returns
    Notes

    This is using the excitation limits from the apexAcquisition.method file, -which may not match the intended detection limits in edge cases. -In default acquisitions, excitation and detection are the same. +which may not match the intended detection limits in edge cases. +In default acquisitions, excitation and detection are the same. But, they may not be in some cases with selective excitation, custom excite waveforms, or in 2DMS applications.

    Parameters
    @@ -1446,26 +1407,26 @@
    Parameters
    -
    260    @staticmethod
    -261    def get_excite_sweep_range(filename):
    -262        """ Function to determine excitation sweep range from ExciteSweep file
    -263
    -264        This looks at the first and last rows of the ExciteSweep file to determine the excitation frequency range.
    -265        Note that this assumes the excitation sweep was linear and the first and last rows are the lowest and highest frequencies.
    -266        This is presumably always true, but again may be incorrect for edge cases with custom excitation waveforms.
    -267
    -268        Parameters
    -269        ----------
    -270        filename : str
    -271            the full path to the ExciteSweep file
    -272        
    -273        """
    -274        ExciteSweep_lines = genfromtxt(filename, comments="*", delimiter="\n")
    -275        # CR ready if we need the full array
    -276        highfreq = fromstring(ExciteSweep_lines[0])
    -277        lowfreq = fromstring(ExciteSweep_lines[-1])
    -278
    -279        return lowfreq[0], highfreq[0]
    +            
    250    @staticmethod
    +251    def get_excite_sweep_range(filename):
    +252        """Function to determine excitation sweep range from ExciteSweep file
    +253
    +254        This looks at the first and last rows of the ExciteSweep file to determine the excitation frequency range.
    +255        Note that this assumes the excitation sweep was linear and the first and last rows are the lowest and highest frequencies.
    +256        This is presumably always true, but again may be incorrect for edge cases with custom excitation waveforms.
    +257
    +258        Parameters
    +259        ----------
    +260        filename : str
    +261            the full path to the ExciteSweep file
    +262
    +263        """
    +264        ExciteSweep_lines = genfromtxt(filename, comments="*", delimiter="\n")
    +265        # CR ready if we need the full array
    +266        highfreq = fromstring(ExciteSweep_lines[0])
    +267        lowfreq = fromstring(ExciteSweep_lines[-1])
    +268
    +269        return lowfreq[0], highfreq[0]
     
    @@ -1497,51 +1458,49 @@
    Parameters
    -
    281    @staticmethod
    -282    def locate_file(folder, type_file_name='apexAcquisition.method'):
    -283        """ Function to locate a file in a folder
    +            
    271    @staticmethod
    +272    def locate_file(folder, type_file_name="apexAcquisition.method"):
    +273        """Function to locate a file in a folder
    +274
    +275        Find the full path of a specific file within the acquisition .d folder or subfolders
    +276
    +277        Parameters
    +278        ----------
    +279        folder : str
    +280            the full path to the folder
    +281        type_file_name : str
    +282            the name of the file to be located
    +283            Expected options: ExciteSweep or apexAcquisition.method
     284
    -285        Find the full path of a specific file within the acquisition .d folder or subfolders
    -286
    -287        Parameters
    -288        ----------
    -289        folder : str
    -290            the full path to the folder
    -291        type_file_name : str
    -292            the name of the file to be located
    -293            Expected options: ExciteSweep or apexAcquisition.method
    -294
    -295        Returns
    -296        -------
    -297        str
    -298            the full path to the file
    -299
    -300        Notes
    -301        -----
    -302        adapted from code from SPIKE library, https://github.com/spike-project/spike
    -303                
    -304        """
    -305        
    -306        from pathlib import Path
    -307               
    -308        #directory_location = folder.glob( '**/*apexAcquisition.method')
    -309        directory_location = folder.glob( '**/*' + type_file_name)
    -310        result = list(directory_location)
    -311        if len(result) > 1:
    +285        Returns
    +286        -------
    +287        str
    +288            the full path to the file
    +289
    +290        Notes
    +291        -----
    +292        adapted from code from SPIKE library, https://github.com/spike-project/spike
    +293
    +294        """
    +295
    +296        from pathlib import Path
    +297
    +298        # directory_location = folder.glob( '**/*apexAcquisition.method')
    +299        directory_location = folder.glob("**/*" + type_file_name)
    +300        result = list(directory_location)
    +301        if len(result) > 1:
    +302            raise Exception(
    +303                "You have more than 1 %s file in the %s folder, using the first one"
    +304                % (type_file_name, folder)
    +305            )
    +306
    +307        elif len(result) == 0:
    +308            raise Exception(
    +309                "You don't have any %s file in the  %s folder, please double check the path"
    +310                % (type_file_name, folder)
    +311            )
     312
    -313            raise Exception(
    -314                "You have more than 1 %s file in the %s folder, using the first one"
    -315                % (type_file_name, folder)
    -316            )
    -317
    -318        elif len(result) == 0:
    -319
    -320            raise Exception(
    -321                "You don't have any %s file in the  %s folder, please double check the path"
    -322                % (type_file_name, folder)
    -323            )
    -324
    -325        return result[0]
    +313        return result[0]
     
    @@ -1584,90 +1543,90 @@
    Notes
    -
    327    @staticmethod
    -328    def parse_parameters(parameters_filename):
    -329        """ Function to parse the parameters from apexAcquisition.method file
    -330
    -331        Open the given file and retrieve all parameters from apexAcquisition.method
    -332            None is written when no value for value is found
    -333            
    -334            structure : <param name = "AMS_ActiveExclusion"><value>0</value></param>
    -335        
    -336        Parameters
    -337        ----------
    -338        parameters_filename : str
    -339            the full path to the apexAcquisition.method file
    -340        
    -341        Returns
    -342        -------
    -343        dict
    -344            a dictionary with the parameters and values
    -345        
    -346        Notes
    -347        -----
    -348        Adapted from code from SPIKE library, https://github.com/spike-project/spike.
    -349        Code may not handle all possible parameters, but should be sufficient for most common use cases
    -350        """
    -351        
    -352        #TODO: change to beautiful soup xml parsing
    -353        
    -354        
    -355        xmldoc = minidom.parse(parameters_filename.open())
    -356
    -357        x = xmldoc.documentElement
    -358        parameter_dict = {}
    -359        children = x.childNodes
    -360        for child in children:
    -361            # print( child.node)
    -362            if child.nodeName == 'methodmetadata':
    +            
    315    @staticmethod
    +316    def parse_parameters(parameters_filename):
    +317        """Function to parse the parameters from apexAcquisition.method file
    +318
    +319        Open the given file and retrieve all parameters from apexAcquisition.method
    +320            None is written when no value for value is found
    +321
    +322            structure : <param name = "AMS_ActiveExclusion"><value>0</value></param>
    +323
    +324        Parameters
    +325        ----------
    +326        parameters_filename : str
    +327            the full path to the apexAcquisition.method file
    +328
    +329        Returns
    +330        -------
    +331        dict
    +332            a dictionary with the parameters and values
    +333
    +334        Notes
    +335        -----
    +336        Adapted from code from SPIKE library, https://github.com/spike-project/spike.
    +337        Code may not handle all possible parameters, but should be sufficient for most common use cases
    +338        """
    +339
    +340        # TODO: change to beautiful soup xml parsing
    +341
    +342        xmldoc = minidom.parse(parameters_filename.open())
    +343
    +344        x = xmldoc.documentElement
    +345        parameter_dict = {}
    +346        children = x.childNodes
    +347        for child in children:
    +348            # print( child.node)
    +349            if child.nodeName == "methodmetadata":
    +350                sections = child.childNodes
    +351                for section in sections:
    +352                    for element in section.childNodes:
    +353                        if element.nodeName == "date":
    +354                            # if element.nodeName == "primarykey":
    +355
    +356                            date_time_str = element.childNodes[0].nodeValue
    +357                            # parameter_dict["acquisition_time"] = pd.to_datetime(date_time_str, infer_datetime_format=True).to_pydatetime()
    +358                            parameter_dict["acquisition_time"] = datetime.strptime(
    +359                                date_time_str, "%b_%d_%Y %H:%M:%S.%f"
    +360                            )
    +361
    +362            if child.nodeName == "reportinfo":
     363                sections = child.childNodes
     364                for section in sections:
    -365                    for element in section.childNodes:
    -366                        if element.nodeName == "date":
    -367                        #if element.nodeName == "primarykey":
    -368                            
    -369                            date_time_str = (element.childNodes[0].nodeValue)
    -370                            #parameter_dict["acquisition_time"] = pd.to_datetime(date_time_str, infer_datetime_format=True).to_pydatetime()
    -371                            parameter_dict["acquisition_time"] = datetime.strptime(date_time_str, "%b_%d_%Y %H:%M:%S.%f")
    -372                            
    -373            
    -374            if child.nodeName == "reportinfo":
    -375                sections = child.childNodes
    -376                for section in sections:
    -377                    if section.nodeName == "section":
    -378                        if section.getAttribute("title") == "Main":
    -379                            for element in section.childNodes:
    -380                                if element.nodeName == "section":
    -381                                    if element.getAttribute("title") == "Polarity":
    -382                                        if (
    -383                                            str(
    -384                                                element.childNodes[1].getAttribute(
    -385                                                    "value"
    -386                                                )
    -387                                            )
    -388                                            == "Negative"
    -389                                        ):
    -390                                            parameter_dict["Polarity"] = -1
    -391                                        else:
    -392                                            parameter_dict["Polarity"] = 1
    -393
    -394            if child.nodeName == "paramlist":
    -395                params = child.childNodes
    -396                for param in params:
    -397                    # print( param.nodeName)
    -398                    if param.nodeName == "param":
    -399                        paramenter_label = str(param.getAttribute("name"))
    -400                        for element in param.childNodes:
    -401                            if element.nodeName == "value":
    -402                                try:
    -403                                    parameter_value = str(element.firstChild.toxml())
    -404                                    # print v
    -405                                except:
    -406                                    parameter_value = None
    -407
    -408                            parameter_dict[paramenter_label] = parameter_value
    -409
    -410        return parameter_dict
    +365                    if section.nodeName == "section":
    +366                        if section.getAttribute("title") == "Main":
    +367                            for element in section.childNodes:
    +368                                if element.nodeName == "section":
    +369                                    if element.getAttribute("title") == "Polarity":
    +370                                        if (
    +371                                            str(
    +372                                                element.childNodes[1].getAttribute(
    +373                                                    "value"
    +374                                                )
    +375                                            )
    +376                                            == "Negative"
    +377                                        ):
    +378                                            parameter_dict["Polarity"] = -1
    +379                                        else:
    +380                                            parameter_dict["Polarity"] = 1
    +381
    +382            if child.nodeName == "paramlist":
    +383                params = child.childNodes
    +384                for param in params:
    +385                    # print( param.nodeName)
    +386                    if param.nodeName == "param":
    +387                        paramenter_label = str(param.getAttribute("name"))
    +388                        for element in param.childNodes:
    +389                            if element.nodeName == "value":
    +390                                try:
    +391                                    parameter_value = str(element.firstChild.toxml())
    +392                                    # print v
    +393                                except:
    +394                                    parameter_value = None
    +395
    +396                            parameter_dict[paramenter_label] = parameter_value
    +397
    +398        return parameter_dict
     
    @@ -1711,84 +1670,81 @@
    Notes
    -
    413    def parse_sqlite(self, sqlite_filename="chromatography-data.sqlite"):
    -414        """
    -415        
    -416        """
    -417        import sqlite3
    -418
    -419        def read_sqlite_file(file_path, table_name):
    -420            """
    -421            Read data from a SQLite database file and return it as a list of tuples
    -422
    -423            Parameters
    -424            ----------
    -425            file_path : str
    -426                the full path to the SQLite database file
    -427            table_name : str
    -428                the name of the table to be read
    -429            
    -430            Returns
    -431            -------
    -432            list
    -433                a list of tuples with the data from the table
    -434            """
    -435             # Connect to the SQLite database file
    -436            conn = sqlite3.connect(file_path)
    -437            cursor = conn.cursor()
    -438
    -439            # Execute a query to select data from a table (replace 'table_name' with your table's name)
    -440            query = f"SELECT * FROM {table_name}"
    -441            cursor.execute(query)
    -442
    -443            # Fetch all rows from the result set
    -444            rows = cursor.fetchall()
    -445            stream = []
    -446            # Print or process the fetched rows
    -447            for row in rows:
    -448                stream.append(row)
    -449                # print(row)  # Print each row, you can also process it differently
    +            
    400    def parse_sqlite(self, sqlite_filename="chromatography-data.sqlite"):
    +401        """ """
    +402        import sqlite3
    +403
    +404        def read_sqlite_file(file_path, table_name):
    +405            """
    +406            Read data from a SQLite database file and return it as a list of tuples
    +407
    +408            Parameters
    +409            ----------
    +410            file_path : str
    +411                the full path to the SQLite database file
    +412            table_name : str
    +413                the name of the table to be read
    +414
    +415            Returns
    +416            -------
    +417            list
    +418                a list of tuples with the data from the table
    +419            """
    +420            # Connect to the SQLite database file
    +421            conn = sqlite3.connect(file_path)
    +422            cursor = conn.cursor()
    +423
    +424            # Execute a query to select data from a table (replace 'table_name' with your table's name)
    +425            query = f"SELECT * FROM {table_name}"
    +426            cursor.execute(query)
    +427
    +428            # Fetch all rows from the result set
    +429            rows = cursor.fetchall()
    +430            stream = []
    +431            # Print or process the fetched rows
    +432            for row in rows:
    +433                stream.append(row)
    +434                # print(row)  # Print each row, you can also process it differently
    +435
    +436            # Close the cursor and the connection
    +437            cursor.close()
    +438            conn.close()
    +439            return stream
    +440
    +441        def parse_binary(binary, type):
    +442            """
    +443            Parse binary data from the sqlite data streams
    +444            """
    +445            if type == "double":
    +446                data = frombuffer(binary, dtype=float64)
    +447            elif type == "float":
    +448                data = frombuffer(binary, dtype=float32)
    +449            return data
     450
    -451            # Close the cursor and the connection
    -452            cursor.close()
    -453            conn.close()
    -454            return stream
    -455        
    -456        def parse_binary(binary, type):
    -457            """
    -458            Parse binary data from the sqlite data streams
    -459            """
    -460            if type == "double":
    -461                data = frombuffer(binary, dtype=float64)
    -462            elif type == "float":
    -463                data = frombuffer(binary, dtype=float32)
    -464            return data
    -465        
    -466        sqlite_filelocation = self.locate_file(
    -467                self.d_directory_location, sqlite_filename
    -468            )
    -469        table_name = "TraceSources"
    -470        trace_sources = read_sqlite_file(sqlite_filelocation, table_name)
    -471        table_name = "TraceChunks"
    -472        trace_chunks = read_sqlite_file(sqlite_filelocation, table_name)
    -473        times = []
    -474        values = []
    -475        trace_type = {}
    -476
    -477
    -478        for index, source in enumerate(trace_sources):
    -479            trace_id = source[0]
    -480            trace_type[source[1]] = {"times": [], "values": []}
    -481            for index, chunk in enumerate(trace_chunks):
    -482                id = chunk[0]
    -483                times = parse_binary(chunk[1], "double")
    -484                values = parse_binary(chunk[2], "float")
    -485                for time, value in zip(times, values):
    -486                    if source[0] == id:
    -487                        trace_type[source[1]]["times"].append(time)
    -488                        trace_type[source[1]]["values"].append(value)
    -489
    -490        return trace_type
    +451        sqlite_filelocation = self.locate_file(
    +452            self.d_directory_location, sqlite_filename
    +453        )
    +454        table_name = "TraceSources"
    +455        trace_sources = read_sqlite_file(sqlite_filelocation, table_name)
    +456        table_name = "TraceChunks"
    +457        trace_chunks = read_sqlite_file(sqlite_filelocation, table_name)
    +458        times = []
    +459        values = []
    +460        trace_type = {}
    +461
    +462        for index, source in enumerate(trace_sources):
    +463            trace_id = source[0]
    +464            trace_type[source[1]] = {"times": [], "values": []}
    +465            for index, chunk in enumerate(trace_chunks):
    +466                id = chunk[0]
    +467                times = parse_binary(chunk[1], "double")
    +468                values = parse_binary(chunk[2], "float")
    +469                for time, value in zip(times, values):
    +470                    if source[0] == id:
    +471                        trace_type[source[1]]["times"].append(time)
    +472                        trace_type[source[1]]["values"].append(value)
    +473
    +474        return trace_type
     
    diff --git a/docs/corems/transient/input/midasDatFile.html b/docs/corems/transient/input/midasDatFile.html index c7576485..6782c9f4 100644 --- a/docs/corems/transient/input/midasDatFile.html +++ b/docs/corems/transient/input/midasDatFile.html @@ -81,172 +81,169 @@

    7__date__ = "Jun 19, 2019" 8 9 - 10 - 11class ReadMidasDatFile(): - 12 """ [Not Implemented] Reads MIDAS .dat files (binary transient data) - 13 - 14 This class will read .dat binary format transient data, e.g. midas format from Predator or Thermo datastations - 15 This code is not yet implemented and is not fully functional. - 16 - 17 Parameters - 18 ---------- - 19 filename_path : str - 20 The path to the .dat file - 21 - 22 Attributes - 23 ---------- - 24 filename_path : str - 25 The path to the .dat file - 26 d_params : dict - 27 A dictionary with the parameters of the .dat file - 28 transient_data : numpy.ndarray - 29 The transient data - 30 - 31 Methods - 32 ------- - 33 * read_file(). - 34 Reads the .dat file and returns the transient data and the parameters - 35 * get_transient_data(data_file, d_params). - 36 Reads the transient data from the .dat file - 37 * parse_parameter(f). - 38 Parses the parameters from the .dat file - 39 - 40 Raises - 41 ------ - 42 NotImplementedError - 43 This class is not yet implemented. - 44 - 45 """ - 46 - 47 def __init__(self, filename_path): - 48 - 49 raise NotImplementedError("This class is not yet implemented, if you want to use it please contact the author at corilo@pnnl.gov or feel free to implement it") + 10class ReadMidasDatFile: + 11 """[Not Implemented] Reads MIDAS .dat files (binary transient data) + 12 + 13 This class will read .dat binary format transient data, e.g. midas format from Predator or Thermo datastations + 14 This code is not yet implemented and is not fully functional. + 15 + 16 Parameters + 17 ---------- + 18 filename_path : str + 19 The path to the .dat file + 20 + 21 Attributes + 22 ---------- + 23 filename_path : str + 24 The path to the .dat file + 25 d_params : dict + 26 A dictionary with the parameters of the .dat file + 27 transient_data : numpy.ndarray + 28 The transient data + 29 + 30 Methods + 31 ------- + 32 * read_file(). + 33 Reads the .dat file and returns the transient data and the parameters + 34 * get_transient_data(data_file, d_params). + 35 Reads the transient data from the .dat file + 36 * parse_parameter(f). + 37 Parses the parameters from the .dat file + 38 + 39 Raises + 40 ------ + 41 NotImplementedError + 42 This class is not yet implemented. + 43 + 44 """ + 45 + 46 def __init__(self, filename_path): + 47 raise NotImplementedError( + 48 "This class is not yet implemented, if you want to use it please contact the author at corilo@pnnl.gov or feel free to implement it" + 49 ) 50 if not path.isfile(filename_path): - 51 raise Exception("File does not exist: "+ filename_path) - 52 + 51 raise Exception("File does not exist: " + filename_path) + 52 53 self.filename_path = filename_path - 54 + 54 55 def read_file(self): - 56 """ Reads the .dat file and returns the transient data and the parameters - 57 + 56 """Reads the .dat file and returns the transient data and the parameters + 57 58 Returns 59 ------- 60 transient_data : numpy.ndarray 61 The transient data 62 d_params : dict 63 A dictionary with the parameters of the .dat file - 64 + 64 65 """ - 66 data_file = open(self.filename_path, 'rb') + 66 data_file = open(self.filename_path, "rb") 67 68 # modo_de_ions = "POSITIVE ION MODE" 69 d_params = self.parse_parameters(self.parameter_filename_location) - 70 + 70 71 transient_data = self.get_transient_data(data_file, d_params, d_params) - 72 + 72 73 return transient_data, d_params - 74 - 75 - 76 def get_transient_data(self, data_file, d_params): - 77 """ Reads the transient data from the .dat file - 78 - 79 Parameters - 80 ---------- - 81 data_file : file - 82 The .dat file - 83 d_params : dict - 84 A dictionary with the parameters of the .dat file - 85 - 86 Returns - 87 ------- - 88 myarray : numpy.ndarray - 89 The transient data - 90 """ - 91 - 92 #dt = np.dtype('<f') - 93 if d_params.get("storage_type").split()[0] == "int": - 94 dt = dtype('i2') - 95 - 96 else: - 97 dt = dtype('<f') - 98 #dt = np.dtype(int) - 99 -100 myarray = fromfile(data_file, dtype=dt) -101 -102 data_file.close() -103 -104 if d_params.get("storage_type").split()[0] == "int": -105 return myarray * d_params.get("VoltageScale") -106 -107 else: -108 -109 return myarray -110 -111 def parse_parameter(self, f): -112 """ Parses the parameters from the .dat file -113 -114 Parameters -115 ---------- -116 f : file -117 The .dat file -118 -119 Returns -120 ------- -121 output_parameters : dict -122 A dictionary with the parameters of the .dat file -123 """ -124 -125 output_parameters = {} -126 output_parameters["filename_path"] = self.d_directory_location -127 -128 line = f.readline() -129 -130 while line != "Data:\n": -131 -132 if line[0:8] == "highfreq": -133 final_frequency = float(line.split(":")[1]) -134 output_parameters["exc_high_freq"] = final_frequency -135 -136 elif line[0:7] == "lowfreq": -137 initial_frequency = float(line.split(":")[1]) -138 output_parameters["exc_low_freq"] = initial_frequency -139 -140 elif line[0:9] == "sweeprate": -141 sweeprate = float(line.split(":")[1]) -142 -143 output_parameters['sweeprate'] = sweeprate -144 -145 elif line[0:13] == "Source Coeff0": -146 Acoef = float(line.split(":")[1]) -147 output_parameters["Aterm"] = Acoef -148 #print f.readline() -149 elif line[0:13] == "Source Coeff1": -150 output_parameters["Bterm"] = "Bcoef" -151 -152 elif line[0:13] == "Voltage Scale": -153 voltage_scale = float(line.split(":")[1]) -154 output_parameters["VoltageScale"] = voltage_scale -155 -156 elif line[0:9] == "Bandwidth": -157 bandwidth = float(line.split(":")[1]) -158 output_parameters["bandwidth"] = bandwidth -159 -160 elif line[0:11] == "Data Points": -161 datapoints = float(line.split(":")[1]) -162 output_parameters["number_data_points"] = datapoints -163 -164 elif line[0:12] == "Storage Type": -165 storage_type = line.split(":")[1] -166 output_parameters["storage_type"] = storage_type -167 -168 elif line[0:12] == "Trap Voltage": -169 trap_voltage = float(line.split(":")[1]) -170 #Bcoef = Bcoef*trap_voltage -171 output_parameters["trap_voltage"] = trap_voltage -172 -173 line = f.readline() -174 -175 return output_parameters + 74 + 75 def get_transient_data(self, data_file, d_params): + 76 """Reads the transient data from the .dat file + 77 + 78 Parameters + 79 ---------- + 80 data_file : file + 81 The .dat file + 82 d_params : dict + 83 A dictionary with the parameters of the .dat file + 84 + 85 Returns + 86 ------- + 87 myarray : numpy.ndarray + 88 The transient data + 89 """ + 90 + 91 # dt = np.dtype('<f') + 92 if d_params.get("storage_type").split()[0] == "int": + 93 dt = dtype("i2") + 94 + 95 else: + 96 dt = dtype("<f") + 97 # dt = np.dtype(int) + 98 + 99 myarray = fromfile(data_file, dtype=dt) +100 +101 data_file.close() +102 +103 if d_params.get("storage_type").split()[0] == "int": +104 return myarray * d_params.get("VoltageScale") +105 +106 else: +107 return myarray +108 +109 def parse_parameter(self, f): +110 """Parses the parameters from the .dat file +111 +112 Parameters +113 ---------- +114 f : file +115 The .dat file +116 +117 Returns +118 ------- +119 output_parameters : dict +120 A dictionary with the parameters of the .dat file +121 """ +122 +123 output_parameters = {} +124 output_parameters["filename_path"] = self.d_directory_location +125 +126 line = f.readline() +127 +128 while line != "Data:\n": +129 if line[0:8] == "highfreq": +130 final_frequency = float(line.split(":")[1]) +131 output_parameters["exc_high_freq"] = final_frequency +132 +133 elif line[0:7] == "lowfreq": +134 initial_frequency = float(line.split(":")[1]) +135 output_parameters["exc_low_freq"] = initial_frequency +136 +137 elif line[0:9] == "sweeprate": +138 sweeprate = float(line.split(":")[1]) +139 +140 output_parameters["sweeprate"] = sweeprate +141 +142 elif line[0:13] == "Source Coeff0": +143 Acoef = float(line.split(":")[1]) +144 output_parameters["Aterm"] = Acoef +145 # print f.readline() +146 elif line[0:13] == "Source Coeff1": +147 output_parameters["Bterm"] = "Bcoef" +148 +149 elif line[0:13] == "Voltage Scale": +150 voltage_scale = float(line.split(":")[1]) +151 output_parameters["VoltageScale"] = voltage_scale +152 +153 elif line[0:9] == "Bandwidth": +154 bandwidth = float(line.split(":")[1]) +155 output_parameters["bandwidth"] = bandwidth +156 +157 elif line[0:11] == "Data Points": +158 datapoints = float(line.split(":")[1]) +159 output_parameters["number_data_points"] = datapoints +160 +161 elif line[0:12] == "Storage Type": +162 storage_type = line.split(":")[1] +163 output_parameters["storage_type"] = storage_type +164 +165 elif line[0:12] == "Trap Voltage": +166 trap_voltage = float(line.split(":")[1]) +167 # Bcoef = Bcoef*trap_voltage +168 output_parameters["trap_voltage"] = trap_voltage +169 +170 line = f.readline() +171 +172 return output_parameters

    @@ -262,178 +259,176 @@

    -
     12class ReadMidasDatFile():
    - 13    """  [Not Implemented] Reads MIDAS .dat files (binary transient data)
    - 14
    - 15    This class will read .dat binary format transient data, e.g. midas format from Predator or Thermo datastations
    - 16    This code is not yet implemented and is not fully functional. 
    - 17    
    - 18    Parameters
    - 19    ----------
    - 20    filename_path : str
    - 21        The path to the .dat file
    - 22
    - 23    Attributes
    - 24    ----------
    - 25    filename_path : str
    - 26        The path to the .dat file
    - 27    d_params : dict
    - 28        A dictionary with the parameters of the .dat file
    - 29    transient_data : numpy.ndarray
    - 30        The transient data
    - 31    
    - 32    Methods
    - 33    -------
    - 34    * read_file().
    - 35        Reads the .dat file and returns the transient data and the parameters
    - 36    * get_transient_data(data_file, d_params).
    - 37        Reads the transient data from the .dat file
    - 38    * parse_parameter(f).
    - 39        Parses the parameters from the .dat file
    - 40
    - 41    Raises
    - 42    ------
    - 43    NotImplementedError
    - 44        This class is not yet implemented.  
    - 45
    - 46    """
    - 47
    - 48    def __init__(self, filename_path):
    - 49        
    - 50        raise NotImplementedError("This class is not yet implemented, if you want to use it please contact the author at corilo@pnnl.gov or feel free to implement it")
    +            
     11class ReadMidasDatFile:
    + 12    """[Not Implemented] Reads MIDAS .dat files (binary transient data)
    + 13
    + 14    This class will read .dat binary format transient data, e.g. midas format from Predator or Thermo datastations
    + 15    This code is not yet implemented and is not fully functional.
    + 16
    + 17    Parameters
    + 18    ----------
    + 19    filename_path : str
    + 20        The path to the .dat file
    + 21
    + 22    Attributes
    + 23    ----------
    + 24    filename_path : str
    + 25        The path to the .dat file
    + 26    d_params : dict
    + 27        A dictionary with the parameters of the .dat file
    + 28    transient_data : numpy.ndarray
    + 29        The transient data
    + 30
    + 31    Methods
    + 32    -------
    + 33    * read_file().
    + 34        Reads the .dat file and returns the transient data and the parameters
    + 35    * get_transient_data(data_file, d_params).
    + 36        Reads the transient data from the .dat file
    + 37    * parse_parameter(f).
    + 38        Parses the parameters from the .dat file
    + 39
    + 40    Raises
    + 41    ------
    + 42    NotImplementedError
    + 43        This class is not yet implemented.
    + 44
    + 45    """
    + 46
    + 47    def __init__(self, filename_path):
    + 48        raise NotImplementedError(
    + 49            "This class is not yet implemented, if you want to use it please contact the author at corilo@pnnl.gov or feel free to implement it"
    + 50        )
      51        if not path.isfile(filename_path):
    - 52            raise Exception("File does not exist: "+ filename_path)
    - 53        
    + 52            raise Exception("File does not exist: " + filename_path)
    + 53
      54        self.filename_path = filename_path
    - 55        
    + 55
      56    def read_file(self):
    - 57        """ Reads the .dat file and returns the transient data and the parameters
    - 58        
    + 57        """Reads the .dat file and returns the transient data and the parameters
    + 58
      59        Returns
      60        -------
      61        transient_data : numpy.ndarray
      62            The transient data
      63        d_params : dict
      64            A dictionary with the parameters of the .dat file
    - 65        
    + 65
      66        """
    - 67        data_file = open(self.filename_path, 'rb')
    + 67        data_file = open(self.filename_path, "rb")
      68
      69        # modo_de_ions = "POSITIVE ION MODE"
      70        d_params = self.parse_parameters(self.parameter_filename_location)
    - 71        
    + 71
      72        transient_data = self.get_transient_data(data_file, d_params, d_params)
    - 73        
    + 73
      74        return transient_data, d_params
    - 75        
    - 76    
    - 77    def get_transient_data(self, data_file, d_params):
    - 78        """ Reads the transient data from the .dat file
    - 79
    - 80        Parameters
    - 81        ----------
    - 82        data_file : file
    - 83            The .dat file
    - 84        d_params : dict
    - 85            A dictionary with the parameters of the .dat file
    - 86        
    - 87        Returns
    - 88        -------
    - 89        myarray : numpy.ndarray
    - 90            The transient data
    - 91        """
    - 92
    - 93        #dt = np.dtype('<f')
    - 94        if d_params.get("storage_type").split()[0] == "int":
    - 95            dt = dtype('i2')
    - 96            
    - 97        else:
    - 98            dt = dtype('<f')    
    - 99        #dt = np.dtype(int)
    -100        
    -101        myarray = fromfile(data_file, dtype=dt)
    -102        
    -103        data_file.close()
    -104
    -105        if d_params.get("storage_type").split()[0] == "int":
    -106            return myarray * d_params.get("VoltageScale")
    -107        
    -108        else:
    -109            
    -110            return myarray     
    -111        
    -112    def parse_parameter(self, f):
    -113        """ Parses the parameters from the .dat file
    -114        
    -115        Parameters
    -116        ----------
    -117        f : file
    -118            The .dat file
    -119            
    -120        Returns
    -121        -------
    -122        output_parameters : dict
    -123            A dictionary with the parameters of the .dat file
    -124        """
    -125        
    -126        output_parameters = {}
    -127        output_parameters["filename_path"] = self.d_directory_location
    -128        
    -129        line = f.readline()
    -130        
    -131        while line != "Data:\n":
    -132
    -133            if line[0:8] == "highfreq":
    -134                final_frequency = float(line.split(":")[1])
    -135                output_parameters["exc_high_freq"] = final_frequency
    -136                
    -137            elif line[0:7] == "lowfreq":
    -138                initial_frequency = float(line.split(":")[1])
    -139                output_parameters["exc_low_freq"] = initial_frequency
    -140                
    -141            elif line[0:9] == "sweeprate":
    -142                sweeprate = float(line.split(":")[1])
    -143               
    -144                output_parameters['sweeprate'] = sweeprate
    -145                
    -146            elif line[0:13] == "Source Coeff0":
    -147                Acoef = float(line.split(":")[1])
    -148                output_parameters["Aterm"] = Acoef
    -149                #print f.readline()
    -150            elif line[0:13] == "Source Coeff1":
    -151                output_parameters["Bterm"] = "Bcoef"
    -152                
    -153            elif line[0:13] == "Voltage Scale":
    -154                voltage_scale = float(line.split(":")[1])
    -155                output_parameters["VoltageScale"] = voltage_scale
    -156                
    -157            elif line[0:9] == "Bandwidth":
    -158                bandwidth = float(line.split(":")[1])
    -159                output_parameters["bandwidth"] = bandwidth
    -160                
    -161            elif line[0:11] == "Data Points":
    -162                datapoints = float(line.split(":")[1])
    -163                output_parameters["number_data_points"] = datapoints
    -164                
    -165            elif line[0:12] == "Storage Type":
    -166                storage_type = line.split(":")[1]
    -167                output_parameters["storage_type"] = storage_type
    -168                
    -169            elif line[0:12] == "Trap Voltage":
    -170                trap_voltage = float(line.split(":")[1])
    -171                #Bcoef = Bcoef*trap_voltage
    -172                output_parameters["trap_voltage"] = trap_voltage
    -173                
    -174            line = f.readline()
    -175            
    -176        return output_parameters
    + 75
    + 76    def get_transient_data(self, data_file, d_params):
    + 77        """Reads the transient data from the .dat file
    + 78
    + 79        Parameters
    + 80        ----------
    + 81        data_file : file
    + 82            The .dat file
    + 83        d_params : dict
    + 84            A dictionary with the parameters of the .dat file
    + 85
    + 86        Returns
    + 87        -------
    + 88        myarray : numpy.ndarray
    + 89            The transient data
    + 90        """
    + 91
    + 92        # dt = np.dtype('<f')
    + 93        if d_params.get("storage_type").split()[0] == "int":
    + 94            dt = dtype("i2")
    + 95
    + 96        else:
    + 97            dt = dtype("<f")
    + 98        # dt = np.dtype(int)
    + 99
    +100        myarray = fromfile(data_file, dtype=dt)
    +101
    +102        data_file.close()
    +103
    +104        if d_params.get("storage_type").split()[0] == "int":
    +105            return myarray * d_params.get("VoltageScale")
    +106
    +107        else:
    +108            return myarray
    +109
    +110    def parse_parameter(self, f):
    +111        """Parses the parameters from the .dat file
    +112
    +113        Parameters
    +114        ----------
    +115        f : file
    +116            The .dat file
    +117
    +118        Returns
    +119        -------
    +120        output_parameters : dict
    +121            A dictionary with the parameters of the .dat file
    +122        """
    +123
    +124        output_parameters = {}
    +125        output_parameters["filename_path"] = self.d_directory_location
    +126
    +127        line = f.readline()
    +128
    +129        while line != "Data:\n":
    +130            if line[0:8] == "highfreq":
    +131                final_frequency = float(line.split(":")[1])
    +132                output_parameters["exc_high_freq"] = final_frequency
    +133
    +134            elif line[0:7] == "lowfreq":
    +135                initial_frequency = float(line.split(":")[1])
    +136                output_parameters["exc_low_freq"] = initial_frequency
    +137
    +138            elif line[0:9] == "sweeprate":
    +139                sweeprate = float(line.split(":")[1])
    +140
    +141                output_parameters["sweeprate"] = sweeprate
    +142
    +143            elif line[0:13] == "Source Coeff0":
    +144                Acoef = float(line.split(":")[1])
    +145                output_parameters["Aterm"] = Acoef
    +146                # print f.readline()
    +147            elif line[0:13] == "Source Coeff1":
    +148                output_parameters["Bterm"] = "Bcoef"
    +149
    +150            elif line[0:13] == "Voltage Scale":
    +151                voltage_scale = float(line.split(":")[1])
    +152                output_parameters["VoltageScale"] = voltage_scale
    +153
    +154            elif line[0:9] == "Bandwidth":
    +155                bandwidth = float(line.split(":")[1])
    +156                output_parameters["bandwidth"] = bandwidth
    +157
    +158            elif line[0:11] == "Data Points":
    +159                datapoints = float(line.split(":")[1])
    +160                output_parameters["number_data_points"] = datapoints
    +161
    +162            elif line[0:12] == "Storage Type":
    +163                storage_type = line.split(":")[1]
    +164                output_parameters["storage_type"] = storage_type
    +165
    +166            elif line[0:12] == "Trap Voltage":
    +167                trap_voltage = float(line.split(":")[1])
    +168                # Bcoef = Bcoef*trap_voltage
    +169                output_parameters["trap_voltage"] = trap_voltage
    +170
    +171            line = f.readline()
    +172
    +173        return output_parameters
     

    [Not Implemented] Reads MIDAS .dat files (binary transient data)

    This class will read .dat binary format transient data, e.g. midas format from Predator or Thermo datastations -This code is not yet implemented and is not fully functional.

    +This code is not yet implemented and is not fully functional.

    Parameters
    @@ -482,12 +477,13 @@
    Raises
    -
    48    def __init__(self, filename_path):
    -49        
    -50        raise NotImplementedError("This class is not yet implemented, if you want to use it please contact the author at corilo@pnnl.gov or feel free to implement it")
    +            
    47    def __init__(self, filename_path):
    +48        raise NotImplementedError(
    +49            "This class is not yet implemented, if you want to use it please contact the author at corilo@pnnl.gov or feel free to implement it"
    +50        )
     51        if not path.isfile(filename_path):
    -52            raise Exception("File does not exist: "+ filename_path)
    -53        
    +52            raise Exception("File does not exist: " + filename_path)
    +53
     54        self.filename_path = filename_path
     
    @@ -518,23 +514,23 @@
    Raises
    56    def read_file(self):
    -57        """ Reads the .dat file and returns the transient data and the parameters
    -58        
    +57        """Reads the .dat file and returns the transient data and the parameters
    +58
     59        Returns
     60        -------
     61        transient_data : numpy.ndarray
     62            The transient data
     63        d_params : dict
     64            A dictionary with the parameters of the .dat file
    -65        
    +65
     66        """
    -67        data_file = open(self.filename_path, 'rb')
    +67        data_file = open(self.filename_path, "rb")
     68
     69        # modo_de_ions = "POSITIVE ION MODE"
     70        d_params = self.parse_parameters(self.parameter_filename_location)
    -71        
    +71
     72        transient_data = self.get_transient_data(data_file, d_params, d_params)
    -73        
    +73
     74        return transient_data, d_params
     
    @@ -564,40 +560,39 @@
    Returns
    -
     77    def get_transient_data(self, data_file, d_params):
    - 78        """ Reads the transient data from the .dat file
    - 79
    - 80        Parameters
    - 81        ----------
    - 82        data_file : file
    - 83            The .dat file
    - 84        d_params : dict
    - 85            A dictionary with the parameters of the .dat file
    - 86        
    - 87        Returns
    - 88        -------
    - 89        myarray : numpy.ndarray
    - 90            The transient data
    - 91        """
    - 92
    - 93        #dt = np.dtype('<f')
    - 94        if d_params.get("storage_type").split()[0] == "int":
    - 95            dt = dtype('i2')
    - 96            
    - 97        else:
    - 98            dt = dtype('<f')    
    - 99        #dt = np.dtype(int)
    -100        
    -101        myarray = fromfile(data_file, dtype=dt)
    -102        
    -103        data_file.close()
    -104
    -105        if d_params.get("storage_type").split()[0] == "int":
    -106            return myarray * d_params.get("VoltageScale")
    -107        
    -108        else:
    -109            
    -110            return myarray     
    +            
     76    def get_transient_data(self, data_file, d_params):
    + 77        """Reads the transient data from the .dat file
    + 78
    + 79        Parameters
    + 80        ----------
    + 81        data_file : file
    + 82            The .dat file
    + 83        d_params : dict
    + 84            A dictionary with the parameters of the .dat file
    + 85
    + 86        Returns
    + 87        -------
    + 88        myarray : numpy.ndarray
    + 89            The transient data
    + 90        """
    + 91
    + 92        # dt = np.dtype('<f')
    + 93        if d_params.get("storage_type").split()[0] == "int":
    + 94            dt = dtype("i2")
    + 95
    + 96        else:
    + 97            dt = dtype("<f")
    + 98        # dt = np.dtype(int)
    + 99
    +100        myarray = fromfile(data_file, dtype=dt)
    +101
    +102        data_file.close()
    +103
    +104        if d_params.get("storage_type").split()[0] == "int":
    +105            return myarray * d_params.get("VoltageScale")
    +106
    +107        else:
    +108            return myarray
     
    @@ -633,71 +628,70 @@
    Returns
    -
    112    def parse_parameter(self, f):
    -113        """ Parses the parameters from the .dat file
    -114        
    -115        Parameters
    -116        ----------
    -117        f : file
    -118            The .dat file
    -119            
    -120        Returns
    -121        -------
    -122        output_parameters : dict
    -123            A dictionary with the parameters of the .dat file
    -124        """
    -125        
    -126        output_parameters = {}
    -127        output_parameters["filename_path"] = self.d_directory_location
    -128        
    -129        line = f.readline()
    -130        
    -131        while line != "Data:\n":
    -132
    -133            if line[0:8] == "highfreq":
    -134                final_frequency = float(line.split(":")[1])
    -135                output_parameters["exc_high_freq"] = final_frequency
    -136                
    -137            elif line[0:7] == "lowfreq":
    -138                initial_frequency = float(line.split(":")[1])
    -139                output_parameters["exc_low_freq"] = initial_frequency
    -140                
    -141            elif line[0:9] == "sweeprate":
    -142                sweeprate = float(line.split(":")[1])
    -143               
    -144                output_parameters['sweeprate'] = sweeprate
    -145                
    -146            elif line[0:13] == "Source Coeff0":
    -147                Acoef = float(line.split(":")[1])
    -148                output_parameters["Aterm"] = Acoef
    -149                #print f.readline()
    -150            elif line[0:13] == "Source Coeff1":
    -151                output_parameters["Bterm"] = "Bcoef"
    -152                
    -153            elif line[0:13] == "Voltage Scale":
    -154                voltage_scale = float(line.split(":")[1])
    -155                output_parameters["VoltageScale"] = voltage_scale
    -156                
    -157            elif line[0:9] == "Bandwidth":
    -158                bandwidth = float(line.split(":")[1])
    -159                output_parameters["bandwidth"] = bandwidth
    -160                
    -161            elif line[0:11] == "Data Points":
    -162                datapoints = float(line.split(":")[1])
    -163                output_parameters["number_data_points"] = datapoints
    -164                
    -165            elif line[0:12] == "Storage Type":
    -166                storage_type = line.split(":")[1]
    -167                output_parameters["storage_type"] = storage_type
    -168                
    -169            elif line[0:12] == "Trap Voltage":
    -170                trap_voltage = float(line.split(":")[1])
    -171                #Bcoef = Bcoef*trap_voltage
    -172                output_parameters["trap_voltage"] = trap_voltage
    -173                
    -174            line = f.readline()
    -175            
    -176        return output_parameters
    +            
    110    def parse_parameter(self, f):
    +111        """Parses the parameters from the .dat file
    +112
    +113        Parameters
    +114        ----------
    +115        f : file
    +116            The .dat file
    +117
    +118        Returns
    +119        -------
    +120        output_parameters : dict
    +121            A dictionary with the parameters of the .dat file
    +122        """
    +123
    +124        output_parameters = {}
    +125        output_parameters["filename_path"] = self.d_directory_location
    +126
    +127        line = f.readline()
    +128
    +129        while line != "Data:\n":
    +130            if line[0:8] == "highfreq":
    +131                final_frequency = float(line.split(":")[1])
    +132                output_parameters["exc_high_freq"] = final_frequency
    +133
    +134            elif line[0:7] == "lowfreq":
    +135                initial_frequency = float(line.split(":")[1])
    +136                output_parameters["exc_low_freq"] = initial_frequency
    +137
    +138            elif line[0:9] == "sweeprate":
    +139                sweeprate = float(line.split(":")[1])
    +140
    +141                output_parameters["sweeprate"] = sweeprate
    +142
    +143            elif line[0:13] == "Source Coeff0":
    +144                Acoef = float(line.split(":")[1])
    +145                output_parameters["Aterm"] = Acoef
    +146                # print f.readline()
    +147            elif line[0:13] == "Source Coeff1":
    +148                output_parameters["Bterm"] = "Bcoef"
    +149
    +150            elif line[0:13] == "Voltage Scale":
    +151                voltage_scale = float(line.split(":")[1])
    +152                output_parameters["VoltageScale"] = voltage_scale
    +153
    +154            elif line[0:9] == "Bandwidth":
    +155                bandwidth = float(line.split(":")[1])
    +156                output_parameters["bandwidth"] = bandwidth
    +157
    +158            elif line[0:11] == "Data Points":
    +159                datapoints = float(line.split(":")[1])
    +160                output_parameters["number_data_points"] = datapoints
    +161
    +162            elif line[0:12] == "Storage Type":
    +163                storage_type = line.split(":")[1]
    +164                output_parameters["storage_type"] = storage_type
    +165
    +166            elif line[0:12] == "Trap Voltage":
    +167                trap_voltage = float(line.split(":")[1])
    +168                # Bcoef = Bcoef*trap_voltage
    +169                output_parameters["trap_voltage"] = trap_voltage
    +170
    +171            line = f.readline()
    +172
    +173        return output_parameters
     
    diff --git a/docs/search.js b/docs/search.js index 6556a3aa..b109fd32 100644 --- a/docs/search.js +++ b/docs/search.js @@ -1,6 +1,6 @@ window.pdocSearch = (function(){ /** elasticlunr - http://weixsong.github.io * Copyright (C) 2017 Oliver Nightingale * Copyright (C) 2017 Wei Song * MIT Licensed */!function(){function e(e){if(null===e||"object"!=typeof e)return e;var t=e.constructor();for(var n in e)e.hasOwnProperty(n)&&(t[n]=e[n]);return t}var t=function(e){var n=new t.Index;return n.pipeline.add(t.trimmer,t.stopWordFilter,t.stemmer),e&&e.call(n,n),n};t.version="0.9.5",lunr=t,t.utils={},t.utils.warn=function(e){return function(t){e.console&&console.warn&&console.warn(t)}}(this),t.utils.toString=function(e){return void 0===e||null===e?"":e.toString()},t.EventEmitter=function(){this.events={}},t.EventEmitter.prototype.addListener=function(){var e=Array.prototype.slice.call(arguments),t=e.pop(),n=e;if("function"!=typeof t)throw new TypeError("last argument must be a function");n.forEach(function(e){this.hasHandler(e)||(this.events[e]=[]),this.events[e].push(t)},this)},t.EventEmitter.prototype.removeListener=function(e,t){if(this.hasHandler(e)){var n=this.events[e].indexOf(t);-1!==n&&(this.events[e].splice(n,1),0==this.events[e].length&&delete this.events[e])}},t.EventEmitter.prototype.emit=function(e){if(this.hasHandler(e)){var t=Array.prototype.slice.call(arguments,1);this.events[e].forEach(function(e){e.apply(void 0,t)},this)}},t.EventEmitter.prototype.hasHandler=function(e){return e in this.events},t.tokenizer=function(e){if(!arguments.length||null===e||void 0===e)return[];if(Array.isArray(e)){var n=e.filter(function(e){return null===e||void 0===e?!1:!0});n=n.map(function(e){return t.utils.toString(e).toLowerCase()});var i=[];return n.forEach(function(e){var n=e.split(t.tokenizer.seperator);i=i.concat(n)},this),i}return e.toString().trim().toLowerCase().split(t.tokenizer.seperator)},t.tokenizer.defaultSeperator=/[\s\-]+/,t.tokenizer.seperator=t.tokenizer.defaultSeperator,t.tokenizer.setSeperator=function(e){null!==e&&void 0!==e&&"object"==typeof e&&(t.tokenizer.seperator=e)},t.tokenizer.resetSeperator=function(){t.tokenizer.seperator=t.tokenizer.defaultSeperator},t.tokenizer.getSeperator=function(){return t.tokenizer.seperator},t.Pipeline=function(){this._queue=[]},t.Pipeline.registeredFunctions={},t.Pipeline.registerFunction=function(e,n){n in t.Pipeline.registeredFunctions&&t.utils.warn("Overwriting existing registered function: "+n),e.label=n,t.Pipeline.registeredFunctions[n]=e},t.Pipeline.getRegisteredFunction=function(e){return e in t.Pipeline.registeredFunctions!=!0?null:t.Pipeline.registeredFunctions[e]},t.Pipeline.warnIfFunctionNotRegistered=function(e){var n=e.label&&e.label in this.registeredFunctions;n||t.utils.warn("Function is not registered with pipeline. This may cause problems when serialising the index.\n",e)},t.Pipeline.load=function(e){var n=new t.Pipeline;return e.forEach(function(e){var i=t.Pipeline.getRegisteredFunction(e);if(!i)throw new Error("Cannot load un-registered function: "+e);n.add(i)}),n},t.Pipeline.prototype.add=function(){var e=Array.prototype.slice.call(arguments);e.forEach(function(e){t.Pipeline.warnIfFunctionNotRegistered(e),this._queue.push(e)},this)},t.Pipeline.prototype.after=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i+1,0,n)},t.Pipeline.prototype.before=function(e,n){t.Pipeline.warnIfFunctionNotRegistered(n);var i=this._queue.indexOf(e);if(-1===i)throw new Error("Cannot find existingFn");this._queue.splice(i,0,n)},t.Pipeline.prototype.remove=function(e){var t=this._queue.indexOf(e);-1!==t&&this._queue.splice(t,1)},t.Pipeline.prototype.run=function(e){for(var t=[],n=e.length,i=this._queue.length,o=0;n>o;o++){for(var r=e[o],s=0;i>s&&(r=this._queue[s](r,o,e),void 0!==r&&null!==r);s++);void 0!==r&&null!==r&&t.push(r)}return t},t.Pipeline.prototype.reset=function(){this._queue=[]},t.Pipeline.prototype.get=function(){return this._queue},t.Pipeline.prototype.toJSON=function(){return this._queue.map(function(e){return t.Pipeline.warnIfFunctionNotRegistered(e),e.label})},t.Index=function(){this._fields=[],this._ref="id",this.pipeline=new t.Pipeline,this.documentStore=new t.DocumentStore,this.index={},this.eventEmitter=new t.EventEmitter,this._idfCache={},this.on("add","remove","update",function(){this._idfCache={}}.bind(this))},t.Index.prototype.on=function(){var e=Array.prototype.slice.call(arguments);return this.eventEmitter.addListener.apply(this.eventEmitter,e)},t.Index.prototype.off=function(e,t){return this.eventEmitter.removeListener(e,t)},t.Index.load=function(e){e.version!==t.version&&t.utils.warn("version mismatch: current "+t.version+" importing "+e.version);var n=new this;n._fields=e.fields,n._ref=e.ref,n.documentStore=t.DocumentStore.load(e.documentStore),n.pipeline=t.Pipeline.load(e.pipeline),n.index={};for(var i in e.index)n.index[i]=t.InvertedIndex.load(e.index[i]);return n},t.Index.prototype.addField=function(e){return this._fields.push(e),this.index[e]=new t.InvertedIndex,this},t.Index.prototype.setRef=function(e){return this._ref=e,this},t.Index.prototype.saveDocument=function(e){return this.documentStore=new t.DocumentStore(e),this},t.Index.prototype.addDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.addDoc(i,e),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));this.documentStore.addFieldLength(i,n,o.length);var r={};o.forEach(function(e){e in r?r[e]+=1:r[e]=1},this);for(var s in r){var u=r[s];u=Math.sqrt(u),this.index[n].addToken(s,{ref:i,tf:u})}},this),n&&this.eventEmitter.emit("add",e,this)}},t.Index.prototype.removeDocByRef=function(e){if(e&&this.documentStore.isDocStored()!==!1&&this.documentStore.hasDoc(e)){var t=this.documentStore.getDoc(e);this.removeDoc(t,!1)}},t.Index.prototype.removeDoc=function(e,n){if(e){var n=void 0===n?!0:n,i=e[this._ref];this.documentStore.hasDoc(i)&&(this.documentStore.removeDoc(i),this._fields.forEach(function(n){var o=this.pipeline.run(t.tokenizer(e[n]));o.forEach(function(e){this.index[n].removeToken(e,i)},this)},this),n&&this.eventEmitter.emit("remove",e,this))}},t.Index.prototype.updateDoc=function(e,t){var t=void 0===t?!0:t;this.removeDocByRef(e[this._ref],!1),this.addDoc(e,!1),t&&this.eventEmitter.emit("update",e,this)},t.Index.prototype.idf=function(e,t){var n="@"+t+"/"+e;if(Object.prototype.hasOwnProperty.call(this._idfCache,n))return this._idfCache[n];var i=this.index[t].getDocFreq(e),o=1+Math.log(this.documentStore.length/(i+1));return this._idfCache[n]=o,o},t.Index.prototype.getFields=function(){return this._fields.slice()},t.Index.prototype.search=function(e,n){if(!e)return[];e="string"==typeof e?{any:e}:JSON.parse(JSON.stringify(e));var i=null;null!=n&&(i=JSON.stringify(n));for(var o=new t.Configuration(i,this.getFields()).get(),r={},s=Object.keys(e),u=0;u0&&t.push(e);for(var i in n)"docs"!==i&&"df"!==i&&this.expandToken(e+i,t,n[i]);return t},t.InvertedIndex.prototype.toJSON=function(){return{root:this.root}},t.Configuration=function(e,n){var e=e||"";if(void 0==n||null==n)throw new Error("fields should not be null");this.config={};var i;try{i=JSON.parse(e),this.buildUserConfig(i,n)}catch(o){t.utils.warn("user configuration parse failed, will use default configuration"),this.buildDefaultConfig(n)}},t.Configuration.prototype.buildDefaultConfig=function(e){this.reset(),e.forEach(function(e){this.config[e]={boost:1,bool:"OR",expand:!1}},this)},t.Configuration.prototype.buildUserConfig=function(e,n){var i="OR",o=!1;if(this.reset(),"bool"in e&&(i=e.bool||i),"expand"in e&&(o=e.expand||o),"fields"in e)for(var r in e.fields)if(n.indexOf(r)>-1){var s=e.fields[r],u=o;void 0!=s.expand&&(u=s.expand),this.config[r]={boost:s.boost||0===s.boost?s.boost:1,bool:s.bool||i,expand:u}}else t.utils.warn("field name in user configuration not found in index instance fields");else this.addAllFields2UserConfig(i,o,n)},t.Configuration.prototype.addAllFields2UserConfig=function(e,t,n){n.forEach(function(n){this.config[n]={boost:1,bool:e,expand:t}},this)},t.Configuration.prototype.get=function(){return this.config},t.Configuration.prototype.reset=function(){this.config={}},lunr.SortedSet=function(){this.length=0,this.elements=[]},lunr.SortedSet.load=function(e){var t=new this;return t.elements=e,t.length=e.length,t},lunr.SortedSet.prototype.add=function(){var e,t;for(e=0;e1;){if(r===e)return o;e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o]}return r===e?o:-1},lunr.SortedSet.prototype.locationFor=function(e){for(var t=0,n=this.elements.length,i=n-t,o=t+Math.floor(i/2),r=this.elements[o];i>1;)e>r&&(t=o),r>e&&(n=o),i=n-t,o=t+Math.floor(i/2),r=this.elements[o];return r>e?o:e>r?o+1:void 0},lunr.SortedSet.prototype.intersect=function(e){for(var t=new lunr.SortedSet,n=0,i=0,o=this.length,r=e.length,s=this.elements,u=e.elements;;){if(n>o-1||i>r-1)break;s[n]!==u[i]?s[n]u[i]&&i++:(t.add(s[n]),n++,i++)}return t},lunr.SortedSet.prototype.clone=function(){var e=new lunr.SortedSet;return e.elements=this.toArray(),e.length=e.elements.length,e},lunr.SortedSet.prototype.union=function(e){var t,n,i;this.length>=e.length?(t=this,n=e):(t=e,n=this),i=t.clone();for(var o=0,r=n.toArray();o\"CoreMS

    \n\n
    \n\n
    \n
    \n\"CoreMS\n
    \n
    \n\n

    Table of Contents

    \n\n\n\n
    \n\n

    CoreMS

    \n\n

    CoreMS is a comprehensive mass spectrometry framework for software development and data analysis of small molecules analysis.

    \n\n

    Data handling and software development for modern mass spectrometry (MS) is an interdisciplinary endeavor requiring skills in computational science and a deep understanding of MS. To enable scientific software development to keep pace with fast improvements in MS technology, we have developed a Python software framework named CoreMS. The goal of the framework is to provide a fundamental, high-level basis for working with all mass spectrometry data types, allowing custom workflows for data signal processing, annotation, and curation. The data structures were designed with an intuitive, mass spectrometric hierarchical structure, thus allowing organized and easy access to the data and calculations. Moreover, CoreMS supports direct access for almost all vendors\u2019 data formats, allowing for the centralization and automation of all data processing workflows from the raw signal to data annotation and curation.

    \n\n

    CoreMS aims to provide

    \n\n
      \n
    • logical mass spectrometric data structure
    • \n
    • self-containing data and metadata storage
    • \n
    • modern molecular formulae assignment algorithms
    • \n
    • dynamic molecular search space database search and generator
    • \n
    \n\n
    \n\n

    Current Version

    \n\n

    2.2.1

    \n\n
    \n\n

    Main Developers/Contact

    \n\n\n\n
    \n\n

    Documentation

    \n\n

    API documentation can be found here.

    \n\n

    Overview slides can be found here.

    \n\n
    \n\n

    Contributing

    \n\n

    As an open source project, CoreMS welcomes contributions of all forms. Before contributing, please see our Dev Guide

    \n\n
    \n\n

    Data formats

    \n\n

    Data input formats

    \n\n
      \n
    • Bruker Solarix (CompassXtract)
    • \n
    • Bruker Solarix transients, ser and fid (FT magnitude mode only)
    • \n
    • ThermoFisher (.raw)
    • \n
    • Spectroswiss signal booster data-acquisition station (.hdf5)
    • \n
    • MagLab ICR data-acquisition station (FT and magnitude mode) (.dat)
    • \n
    • ANDI NetCDF for GC-MS (.cdf)
    • \n
    • mzml for LC-MS (.mzml)
    • \n
    • Generic mass list in profile and centroid mde (include all delimiters types and Excel formats)
    • \n
    • CoreMS exported processed mass list files(excel, .csv, .txt, pandas dataframe as .pkl)
    • \n
    • CoreMS self-containing Hierarchical Data Format (.hdf5)
    • \n
    • Pandas Dataframe
    • \n
    • Support for cloud Storage using s3path.S3path(see examples of usage here: S3 Support)
    • \n
    \n\n

    Data output formats

    \n\n
      \n
    • Pandas data frame (can be saved using pickle, h5, etc)
    • \n
    • Text Files (.csv, tab separated .txt, etc)
    • \n
    • Microsoft Excel (xlsx)
    • \n
    • Automatic JSON for metadata storage and reuse
    • \n
    • Self-containing Hierarchical Data Format (.hdf5) including raw data and time-series data-point for processed data-sets with all associated metadata stored as json attributes
    • \n
    \n\n

    Data structure types

    \n\n
      \n
    • LC-MS
    • \n
    • GC-MS
    • \n
    • Transient
    • \n
    • Mass Spectra
    • \n
    • Mass Spectrum
    • \n
    • Mass Spectral Peak
    • \n
    • Molecular Formula
    • \n
    \n\n
    \n\n

    Available features

    \n\n

    FT-MS Signal Processing, Calibration, and Molecular Formula Search and Assignment

    \n\n
      \n
    • Apodization, Zerofilling, and Magnitude mode FT
    • \n
    • Manual and automatic noise threshold calculation
    • \n
    • Peak picking using apex quadratic fitting
    • \n
    • Experimental resolving power calculation
    • \n
    • Frequency and m/z domain calibration functions:
    • \n
    • LedFord equation
    • \n
    • Linear equation
    • \n
    • Quadratic equation
    • \n
    • Automatic search most abundant Ox homologue series
    • \n
    • Automatic local (SQLite) or external (PostgreSQL) database check, generation, and search
    • \n
    • Automatic molecular formulae assignments algorithm for ESI(-) MS for natural organic matter analysis
    • \n
    • Automatic fine isotopic structure calculation and search for all isotopes
    • \n
    • Flexible Kendrick normalization base
    • \n
    • Kendrick filter using density-based clustering
    • \n
    • Kendrick classification
    • \n
    • Heteroatoms classification and visualization
    • \n
    \n\n

    GC-MS Signal Processing, Calibration, and Compound Identification

    \n\n
      \n
    • Baseline detection, subtraction, smoothing
    • \n
    • m/z based Chromatogram Peak Deconvolution,
    • \n
    • Manual and automatic noise threshold calculation
    • \n
    • First and second derivatives peak picking methods
    • \n
    • Peak Area Calculation
    • \n
    • Retention Index Calibration
    • \n
    • Automatic local (SQLite) or external (MongoDB or PostgreSQL) database check, generation, and search
    • \n
    • Automatic molecular match algorithm with all spectral similarity methods
    • \n
    \n\n

    High Resolution Mass Spectrum Simulations

    \n\n
      \n
    • Peak shape (Lorentz, Gaussian, Voigt, and pseudo-Voigt)
    • \n
    • Peak fitting for peak shape definition
    • \n
    • Peak position in function of data points, signal to noise and resolving power (Lorentz and Gaussian)
    • \n
    • Prediction of mass error distribution
    • \n
    • Calculated ICR Resolving Power based on magnetic field (B), and transient time(T)
    • \n
    \n\n

    LC-MS Signal Processing, Molecular Formula Search and Assignment, and Spectral Similarity Searches

    \n\n
      \n
    • Two dimensional (m/z and retention time) peak picking using persistent homology
    • \n
    • Smoothing, cetroid detection, and integration of extracted ion chromatograms
    • \n
    • Peak shape metric calculations including half peak height, tailing factor, and dispersity index
    • \n
    • MS1 deconvolution of mass features
    • \n
    • Idenfitication of 13C isotopes within the mass features
    • \n
    • Compatibility with molecular formula searching on MS1 or MS2 spectra
    • \n
    • Spectral search capability using entropy similarity
    • \n
    \n\n
    \n\n

    Installation

    \n\n
    \n
    pip install corems\n
    \n
    \n\n

    By default the molecular formula database will be generated using SQLite

    \n\n

    To use Postgresql the easiest way is to build a docker container:

    \n\n
    \n
    docker-compose up -d\n
    \n
    \n\n
      \n
    • Change the url_database on MSParameters.molecular_search.url_database to: \"postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp\"
    • \n
    • Set the url_database env variable COREMS_DATABASE_URL to: \"postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp\"
    • \n
    \n\n

    Thermo Raw File Access:

    \n\n

    To be able to open thermo file a installation of pythonnet is needed:

    \n\n
      \n
    • Windows: \n
      \n
      pip install pythonnet\n
      \n
    • \n
    \n\n
      \n
    • Mac and Linux:\n
      \n
      brew install mono\npip install pythonnet   \n
      \n
    • \n
    \n\n
    \n\n

    Docker stack

    \n\n

    Another option to use CoreMS is to run the docker stack that will start the CoreMS containers

    \n\n

    Molecular Database and Jupyter Notebook Docker Containers

    \n\n

    A docker container containing:

    \n\n
      \n
    • A custom python distribution will all dependencies installed
    • \n
    • A Jupyter notebook server with workflow examples
    • \n
    • A PostgreSQL database for the molecular formulae assignment
    • \n
    \n\n

    If you don't have docker installed, the easiest way is to install docker for desktop

    \n\n
      \n
    1. Start the containers using docker-compose (easiest way):

      \n\n

      On docker-compose-jupyter.yml there is a volume mapping for the tests_data directory with the data provided for testing, to change to your data location:

      \n\n
        \n
      • locate the volumes on docker-compose-jupyter.yml:

        \n\n
        \n
        volumes:\n  - ./tests/tests_data:/home/CoreMS/data\n
        \n
      • \n
      • change \"./tests/tests_data\" to your data directory location

        \n\n
        \n
        volumes:\n  - path_to_your_data_directory:/home/corems/data\n
        \n
      • \n
      • save the file and then call:

        \n\n
        \n
        docker-compose -f docker-compose-jupyter.yml up\n
        \n
      • \n
    2. \n
    3. Another option is to manually build the containers:

      \n\n
        \n
      • Build the corems image:

        \n\n
        \n
        docker build -t corems:local .\n
        \n
      • \n
      • Start the database container:

        \n\n
        \n
        docker-compose up -d   \n
        \n
      • \n
      • Start the Jupyter Notebook:

        \n\n
        \n
        docker run --rm -v ./data:/home/CoreMS/data corems:local\n
        \n
      • \n
      • Open your browser, copy and past the URL address provided in the terminal: http://localhost:8888/?token=<token>.

      • \n
      • Open the CoreMS-Tutorial.ipynb

      • \n
    4. \n
    \n\n
    \n\n

    Example for FT-ICR Data Processing

    \n\n

    More examples can be found under the directory examples/scripts, examples/notebooks

    \n\n
      \n
    • Basic functionality example
    • \n
    \n\n
    \n
    from corems.transient.input.brukerSolarix import ReadBrukerSolarix\nfrom corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas\nfrom corems.mass_spectrum.output.export import HighResMassSpecExport\nfrom matplotlib import pyplot\n\nfile_path= 'tests/tests_data/ftms/ESI_NEG_SRFA.d'\n\n# Instatiate the Bruker Solarix reader with the filepath\nbruker_reader = ReadBrukerSolarix(file_path)\n\n# Use the reader to instatiate a transient object\nbruker_transient_obj = bruker_reader.get_transient()\n\n# Calculate the transient duration time\nT =  bruker_transient_obj.transient_time\n\n# Use the transient object to instatitate a mass spectrum object\nmass_spectrum_obj = bruker_transient_obj.get_mass_spectrum(plot_result=False, auto_process=True)\n\n# The following SearchMolecularFormulas function does the following\n# - searches monoisotopic molecular formulas for all mass spectral peaks\n# - calculates fine isotopic structure based on monoisotopic molecular formulas found and current dynamic range\n# - searches molecular formulas of correspondent calculated isotopologues\n# - settings are stored at SearchConfig.json and can be changed directly on the file or inside the framework class\n\nSearchMolecularFormulas(mass_spectrum_obj, first_hit=False).run_worker_mass_spectrum()\n\n# Iterate over mass spectral peaks objs within the mass_spectrum_obj\nfor mspeak in mass_spectrum_obj.sort_by_abundance():\n\n    # If there is at least one molecular formula associated, mspeak returns True\n    if  mspeak:\n\n        # Get the molecular formula with the highest mass accuracy\n        molecular_formula = mspeak.molecular_formula_lowest_error\n\n        # Plot mz and peak height\n        pyplot.plot(mspeak.mz_exp, mspeak.abundance, 'o', c='g')\n\n        # Iterate over all molecular formulas associated with the ms peaks obj\n        for molecular_formula in mspeak:\n\n            # Check if the molecular formula is a isotopologue\n            if molecular_formula.is_isotopologue:\n\n                # Access the molecular formula text representation and print\n                print (molecular_formula.string)\n\n                # Get 13C atoms count\n                print (molecular_formula['13C'])\n    else:\n        # Get mz and peak height\n        print(mspeak.mz_exp,mspeak.abundance)\n\n# Save data\n## to a csv file\nmass_spectrum_obj.to_csv("filename")\nmass_spectrum_obj.to_hdf("filename")\n# to pandas Datarame pickle\nmass_spectrum_obj.to_pandas("filename")\n\n# Extract data as a pandas Dataframe\ndf = mass_spectrum_obj.to_dataframe()\n
    \n
    \n\n
    \n\n

    UML Diagrams

    \n\n

    UML (unified modeling language) diagrams for Direct Infusion FT-MS and GC-MS classes can be found here.

    \n\n
    \n\n

    Citing CoreMS

    \n\n

    If you use CoreMS in your work, please use the following citation:

    \n\n

    Version 2.2.1 Release on GitHub, archived on Zenodo:

    \n\n

    \"DOI\"

    \n\n

    Yuri E. Corilo, William R. Kew, Lee Ann McCue (2021, March 27). EMSL-Computing/CoreMS: CoreMS 2.0.1 (Version v2.0.1), as developed on Github. Zenodo. http://doi.org/10.5281/zenodo.4641552

    \n\n

    ```

    \n\n
    \n\n

    This material was prepared as an account of work sponsored by an agency of the\nUnited States Government. Neither the United States Government nor the United\nStates Department of Energy, nor Battelle, nor any of their employees, nor any\njurisdiction or organization that has cooperated in the development of these\nmaterials, makes any warranty, express or implied, or assumes any legal\nliability or responsibility for the accuracy, completeness, or usefulness or\nany information, apparatus, product, software, or process disclosed, or\nrepresents that its use would not infringe privately owned rights.

    \n\n

    Reference herein to any specific commercial product, process, or service by\ntrade name, trademark, manufacturer, or otherwise does not necessarily\nconstitute or imply its endorsement, recommendation, or favoring by the United\nStates Government or any agency thereof, or Battelle Memorial Institute. The\nviews and opinions of authors expressed herein do not necessarily state or\nreflect those of the United States Government or any agency thereof.

    \n\n
                 PACIFIC NORTHWEST NATIONAL LABORATORY\n                          operated by\n                            BATTELLE\n                            for the\n               UNITED STATES DEPARTMENT OF ENERGY\n                under Contract DE-AC05-76RL01830\n
    \n"}, {"fullname": "corems.readme_path", "modulename": "corems", "qualname": "readme_path", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/heal742/LOCAL/corems_dev/corems/corems/../README.md'"}, {"fullname": "corems.timeit", "modulename": "corems", "qualname": "timeit", "kind": "function", "doc": "

    \n", "signature": "(method):", "funcdef": "def"}, {"fullname": "corems.SuppressPrints", "modulename": "corems", "qualname": "SuppressPrints", "kind": "class", "doc": "

    \n"}, {"fullname": "corems.chunks", "modulename": "corems", "qualname": "chunks", "kind": "function", "doc": "

    Yield successive n-sized chunks from lst.

    \n", "signature": "(lst, n):", "funcdef": "def"}, {"fullname": "corems.corems_md5", "modulename": "corems", "qualname": "corems_md5", "kind": "function", "doc": "

    \n", "signature": "(fname):", "funcdef": "def"}, {"fullname": "corems.chroma_peak", "modulename": "corems.chroma_peak", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.calc", "modulename": "corems.chroma_peak.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "GCPeakCalculation", "kind": "class", "doc": "

    Class for performing peak calculations in GC chromatography.

    \n\n
    Methods
    \n\n
      \n
    • calc_area(self, tic: List[float], dx: float) -> None: Calculate the area under the curve of the chromatogram.
    • \n
    • linear_ri(self, right_ri: float, left_ri: float, left_rt: float, right_rt: float) -> float: Calculate the retention index using linear interpolation.
    • \n
    • calc_ri(self, rt_ri_pairs: List[Tuple[float, float]]) -> int: Calculate the retention index based on the given retention time - retention index pairs.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation.calc_area", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "GCPeakCalculation.calc_area", "kind": "function", "doc": "

    Calculate the area under the curve of the chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • tic (List[float]):\nThe total ion current (TIC) values.
    • \n
    • dx (float):\nThe spacing between data points.
    • \n
    \n", "signature": "(self, tic: list[float], dx: float) -> None:", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation.linear_ri", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "GCPeakCalculation.linear_ri", "kind": "function", "doc": "

    Calculate the retention index using linear interpolation.

    \n\n
    Parameters
    \n\n
      \n
    • right_ri (float):\nThe retention index at the right reference point.
    • \n
    • left_ri (float):\nThe retention index at the left reference point.
    • \n
    • left_rt (float):\nThe retention time at the left reference point.
    • \n
    • right_rt (float):\nThe retention time at the right reference point.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The calculated retention index.
    • \n
    \n", "signature": "(\tself,\tright_ri: float,\tleft_ri: float,\tleft_rt: float,\tright_rt: float) -> float:", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation.calc_ri", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "GCPeakCalculation.calc_ri", "kind": "function", "doc": "

    Calculate the retention index based on the given retention time - retention index pairs.

    \n\n
    Parameters
    \n\n
      \n
    • rt_ri_pairs (List[Tuple[float, float]]):\nThe list of retention time - retention index pairs.
    • \n
    \n", "signature": "(self, rt_ri_pairs: list[tuple[float, float]]) -> None:", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation", "kind": "class", "doc": "

    Class for performing peak calculations in LC-MS mass spectrometry.

    \n\n

    This class is intended to be used as a mixin class for the LCMSMassFeature class.

    \n"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation.calc_dispersity_index", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation.calc_dispersity_index", "kind": "function", "doc": "

    Calculate the dispersity index of the mass feature.

    \n\n

    This function calculates the dispersity index of the mass feature and\nstores the result in the _dispersity_index attribute. The dispersity index is calculated as the standard\ndeviation of the retention times that account for 50% of the cummulative intensity, starting from the most\nintense point, as described in [1].

    \n\n
    Returns
    \n\n
      \n
    • None, stores the result in the _dispersity_index attribute of the class.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the EIC data are not available.
    • \n
    \n\n
    References
    \n\n

    1) Boiteau, Rene M., et al. \"Relating Molecular Properties to the Persistence of Marine Dissolved\nOrganic Matter with Liquid Chromatography\u2013Ultrahigh-Resolution Mass Spectrometry.\"\nEnvironmental Science & Technology 58.7 (2024): 3267-3277.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation.calc_fraction_height_width", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation.calc_fraction_height_width", "kind": "function", "doc": "

    Calculate the height width of the mass feature at a specfic fraction of the maximum intensity.

    \n\n

    This function returns a tuple with the minimum and maximum half-height width based on scan resolution.

    \n\n
    Parameters
    \n\n
      \n
    • fraction (float):\nThe fraction of the maximum intensity to calculate the height width.\nFor example, 0.5 will calculate the half-height width.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Tuple[float, float, bool]: The minimum and maximum half-height width based on scan resolution (in minutes), and a boolean indicating if the width was estimated.
    • \n
    \n", "signature": "(self, fraction: float):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation.calc_half_height_width", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation.calc_half_height_width", "kind": "function", "doc": "

    Calculate the half-height width of the mass feature.

    \n\n

    This function calculates the half-height width of the mass feature and\nstores the result in the _half_height_width attribute

    \n\n
    Returns
    \n\n
      \n
    • None, stores the result in the _half_height_width attribute of the class.
    • \n
    \n", "signature": "(self, accept_estimated: bool = False):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation.calc_tailing_factor", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation.calc_tailing_factor", "kind": "function", "doc": "

    Calculate the peak asymmetry of the mass feature.

    \n\n

    This function calculates the peak asymmetry of the mass feature and\nstores the result in the _tailing_factor attribute.\nCalculations completed at 5% of the peak height in accordance with the USP tailing factor calculation.

    \n\n
    Returns
    \n\n
      \n
    • None, stores the result in the _tailing_factor attribute of the class.
    • \n
    \n\n
    References
    \n\n

    1) JIS K0124:2011 General rules for high performance liquid chromatography\n2) JIS K0214:2013 Technical terms for analytical chemistry

    \n", "signature": "(self, accept_estimated: bool = False):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory", "modulename": "corems.chroma_peak.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase", "kind": "class", "doc": "

    Base class for chromatographic peak (ChromaPeak) objects.

    \n\n
    Parameters
    \n\n
      \n
    • chromatogram_parent (Chromatogram):\nThe parent chromatogram object.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • start_index (int):\nThe start index of the peak.
    • \n
    • index (int):\nThe index of the peak.
    • \n
    • final_index (int):\nThe final index of the peak.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • start_scan (int):\nThe start scan of the peak.
    • \n
    • final_scan (int):\nThe final scan of the peak.
    • \n
    • apex_scan (int):\nThe apex scan of the peak.
    • \n
    • chromatogram_parent (Chromatogram):\nThe parent chromatogram object.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • _area (float):\nThe area of the peak.
    • \n
    \n\n
    Properties
    \n\n
      \n
    • retention_time : float.\nThe retention time of the peak.
    • \n
    • tic : float.\nThe total ion current of the peak.
    • \n
    • area : float.\nThe area of the peak.
    • \n
    • rt_list : list.\nThe list of retention times within the peak.
    • \n
    • tic_list : list.\nThe list of total ion currents within the peak.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • None
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.__init__", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tchromatogram_parent,\tmass_spectrum_obj,\tstart_index,\tindex,\tfinal_index)"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.start_scan", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.start_scan", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.final_scan", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.final_scan", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.apex_scan", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.apex_scan", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.chromatogram_parent", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.chromatogram_parent", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.mass_spectrum", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.retention_time", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.retention_time", "kind": "variable", "doc": "

    Retention Time

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.tic", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.tic", "kind": "variable", "doc": "

    Total Ion Current

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.area", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.area", "kind": "variable", "doc": "

    Peak Area

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.rt_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.rt_list", "kind": "variable", "doc": "

    Retention Time List

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.tic_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.tic_list", "kind": "variable", "doc": "

    Total Ion Current List

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature", "kind": "class", "doc": "

    Class representing a mass feature in a liquid chromatography (LC) chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • lcms_parent (LCMS):\nThe parent LCMSBase object.
    • \n
    • mz (float):\nThe observed mass to charge ratio of the feature.
    • \n
    • retention_time (float):\nThe retention time of the feature (in minutes), at the apex.
    • \n
    • intensity (float):\nThe intensity of the feature.
    • \n
    • apex_scan (int):\nThe scan number of the apex of the feature.
    • \n
    • persistence (float, optional):\nThe persistence of the feature. Default is None.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _mz_exp (float):\nThe observed mass to charge ratio of the feature.
    • \n
    • _mz_cal (float):\nThe calibrated mass to charge ratio of the feature.
    • \n
    • _retention_time (float):\nThe retention time of the feature (in minutes), at the apex.
    • \n
    • _apex_scan (int):\nThe scan number of the apex of the feature.
    • \n
    • _intensity (float):\nThe intensity of the feature.
    • \n
    • _persistence (float):\nThe persistence of the feature.
    • \n
    • _eic_data (EIC_Data):\nThe EIC data object associated with the feature.
    • \n
    • _dispersity_index (float):\nThe dispersity index of the feature.
    • \n
    • _half_height_width (numpy.ndarray):\nThe half height width of the feature (in minutes, as an array of min and max values).
    • \n
    • _tailing_factor (float):\nThe tailing factor of the feature. \n> 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
    • \n
    • _ms_deconvoluted_idx ([int]):\nThe indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
    • \n
    • is_calibrated (bool):\nIf True, the feature has been calibrated. Default is False.
    • \n
    • monoisotopic_mf_id (int):\nMass feature id that is the monoisotopic version of self.\nIf self.id, then self is the monoisotopic feature). Default is None.
    • \n
    • isotopologue_type (str):\nThe isotopic class of the feature, i.e. \"13C1\", \"13C2\", \"13C1 37Cl1\" etc. \nDefault is None.
    • \n
    • ms2_scan_numbers (list):\nList of scan numbers of the MS2 spectra associated with the feature. \nDefault is an empty list.
    • \n
    • ms2_mass_spectra (dict):\nDictionary of MS2 spectra associated with the feature (key = scan number for DDA). \nDefault is an empty dictionary.
    • \n
    • ms2_similarity_results (list):\nList of MS2 similarity results associated with the mass feature. \nDefault is an empty list.
    • \n
    • id (int):\nThe ID of the feature, also the key in the parent LCMS object's \nmass_features dictionary.
    • \n
    • mass_spectrum_deconvoluted_parent (bool):\nIf True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
    • \n
    • associated_mass_features_deconvoluted (list):\nList of mass features associated with the deconvoluted mass spectrum. Default is an empty list.
    • \n
    \n", "bases": "ChromaPeakBase, corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.__init__", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tlcms_parent,\tmz: float,\tretention_time: float,\tintensity: float,\tapex_scan: int,\tpersistence: float = None,\tid: int = None)"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.monoisotopic_mf_id", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.monoisotopic_mf_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.isotopologue_type", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.isotopologue_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.ms2_scan_numbers", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.ms2_scan_numbers", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.ms2_mass_spectra", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.ms2_mass_spectra", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.ms2_similarity_results", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.ms2_similarity_results", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.mass_spectrum_deconvoluted_parent", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.mass_spectrum_deconvoluted_parent", "kind": "variable", "doc": "

    \n", "annotation": ": bool"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.associated_mass_features_deconvoluted", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.associated_mass_features_deconvoluted", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.update_mz", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.update_mz", "kind": "function", "doc": "

    Update the mass to charge ratio from the mass spectrum object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.plot", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.plot", "kind": "function", "doc": "

    Plot the mass feature.

    \n\n
    Parameters
    \n\n
      \n
    • to_plot (list, optional):\nList of strings specifying what to plot, any iteration of \n\"EIC\", \"MS2\", and \"MS1\". \nDefault is [\"EIC\", \"MS1\", \"MS2\"].
    • \n
    • return_fig (bool, optional):\nIf True, the figure is returned. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.figure.Figure or None: The figure object if return_fig is True. \nOtherwise None and the figure is displayed.
    • \n
    \n", "signature": "(self, to_plot=['EIC', 'MS1', 'MS2'], return_fig=True):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.mz", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.mz", "kind": "variable", "doc": "

    Mass to charge ratio of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.mass_spectrum_deconvoluted", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.mass_spectrum_deconvoluted", "kind": "variable", "doc": "

    Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.retention_time", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.retention_time", "kind": "variable", "doc": "

    Retention time of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.apex_scan", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.apex_scan", "kind": "variable", "doc": "

    Apex scan of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.intensity", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.intensity", "kind": "variable", "doc": "

    Intensity of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.persistence", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.persistence", "kind": "variable", "doc": "

    Persistence of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.eic_rt_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.eic_rt_list", "kind": "variable", "doc": "

    Retention time list between the beginning and end of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.eic_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.eic_list", "kind": "variable", "doc": "

    EIC List between the beginning and end of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.ms1_peak", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.ms1_peak", "kind": "variable", "doc": "

    MS1 peak from associated mass spectrum that is closest to the mass feature's m/z

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.tailing_factor", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.tailing_factor", "kind": "variable", "doc": "

    Tailing factor of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.dispersity_index", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.dispersity_index", "kind": "variable", "doc": "

    Dispersity index of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.half_height_width", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.half_height_width", "kind": "variable", "doc": "

    Half height width of the mass feature, average of min and max values, in minutes

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.best_ms2", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.best_ms2", "kind": "variable", "doc": "

    Points to the best representative MS2 mass spectrum

    \n\n
    Notes
    \n\n

    If there is only one MS2 mass spectrum, it will be returned\nIf there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score.\nIf there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra.

    \n\n
    Returns
    \n\n
      \n
    • MassSpectrum or None: The best MS2 mass spectrum.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak", "kind": "class", "doc": "

    Class representing a peak in a gas chromatography (GC) chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • chromatogram_parent (Chromatogram):\nThe parent chromatogram object.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object associated with the peak.
    • \n
    • indexes (tuple):\nThe indexes of the peak in the chromatogram.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _compounds (list):\nList of compounds associated with the peak.
    • \n
    • _ri (float or None):\nRetention index of the peak.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __len__(). Returns the number of compounds associated with the peak.
    • \n
    • __getitem__(position). Returns the compound at the specified position.
    • \n
    • remove_compound(compounds_obj). Removes the specified compound from the peak.
    • \n
    • clear_compounds(). Removes all compounds from the peak.
    • \n
    • add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes.
    • \n
    • ri(). Returns the retention index of the peak.
    • \n
    • highest_ss_compound(). Returns the compound with the highest spectral similarity score.
    • \n
    • highest_score_compound(). Returns the compound with the highest similarity score.
    • \n
    • compound_names(). Returns a list of names of compounds associated with the peak.
    • \n
    \n", "bases": "ChromaPeakBase, corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.__init__", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.__init__", "kind": "function", "doc": "

    \n", "signature": "(chromatogram_parent, mass_spectrum_obj, indexes)"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.remove_compound", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.remove_compound", "kind": "function", "doc": "

    \n", "signature": "(self, compounds_obj):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.clear_compounds", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.clear_compounds", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.add_compound", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.add_compound", "kind": "function", "doc": "

    Adds a compound to the peak with the specified attributes.

    \n\n
    Parameters
    \n\n
      \n
    • compounds_dict (dict):\nDictionary containing the compound information.
    • \n
    • spectral_similarity_scores (dict):\nDictionary containing the spectral similarity scores.
    • \n
    • ri_score (float or None, optional):\nThe retention index score of the compound. Default is None.
    • \n
    • similarity_score (float or None, optional):\nThe similarity score of the compound. Default is None.
    • \n
    \n", "signature": "(\tself,\tcompounds_dict,\tspectral_similarity_scores,\tri_score=None,\tsimilarity_score=None):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.ri", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.ri", "kind": "variable", "doc": "

    Returns the retention index of the peak.

    \n\n
    Returns
    \n\n
      \n
    • float or None: The retention index of the peak.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.highest_ss_compound", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.highest_ss_compound", "kind": "variable", "doc": "

    Returns the compound with the highest spectral similarity score.

    \n\n
    Returns
    \n\n
      \n
    • LowResCompoundRef or None: The compound with the highest spectral similarity score.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.highest_score_compound", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.highest_score_compound", "kind": "variable", "doc": "

    Returns the compound with the highest similarity score.

    \n\n
    Returns
    \n\n
      \n
    • LowResCompoundRef or None: The compound with the highest similarity score.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.compound_names", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.compound_names", "kind": "variable", "doc": "

    Returns a list of names of compounds associated with the peak.

    \n\n
    Returns
    \n\n
      \n
    • list: List of names of compounds associated with the peak.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved", "kind": "class", "doc": "

    Represents a deconvolved peak in a chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • chromatogram_parent (Chromatogram):\nThe parent chromatogram object.
    • \n
    • mass_spectra (list):\nList of mass spectra associated with the peak.
    • \n
    • apex_index (int):\nIndex of the apex mass spectrum in the mass_spectra list.
    • \n
    • rt_list (list):\nList of retention times.
    • \n
    • tic_list (list):\nList of total ion currents.
    • \n
    \n", "bases": "GCPeak"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved.__init__", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved.__init__", "kind": "function", "doc": "

    \n", "signature": "(chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list)"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved.mass_spectra", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved.mass_spectra", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved.rt_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved.rt_list", "kind": "variable", "doc": "

    Get the list of retention times.

    \n\n
    Returns
    \n\n
      \n
    • list: The list of retention times.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved.tic_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved.tic_list", "kind": "variable", "doc": "

    Get the list of total ion currents.

    \n\n
    Returns
    \n\n
      \n
    • list: The list of total ion currents.
    • \n
    \n"}, {"fullname": "corems.encapsulation", "modulename": "corems.encapsulation", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.constant", "modulename": "corems.encapsulation.constant", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.constant.Labels", "modulename": "corems.encapsulation.constant", "qualname": "Labels", "kind": "class", "doc": "

    Class for Labels used in CoreMS

    \n\n

    These labels are used to define:

    \n\n
      \n
    • types of columns in plaintext data inputs,
    • \n
    • types of data/mass spectra
    • \n
    • types of assignment for ions
    • \n
    \n"}, {"fullname": "corems.encapsulation.constant.Labels.mz", "modulename": "corems.encapsulation.constant", "qualname": "Labels.mz", "kind": "variable", "doc": "

    \n", "default_value": "'m/z'"}, {"fullname": "corems.encapsulation.constant.Labels.abundance", "modulename": "corems.encapsulation.constant", "qualname": "Labels.abundance", "kind": "variable", "doc": "

    \n", "default_value": "'Peak Height'"}, {"fullname": "corems.encapsulation.constant.Labels.rp", "modulename": "corems.encapsulation.constant", "qualname": "Labels.rp", "kind": "variable", "doc": "

    \n", "default_value": "'Resolving Power'"}, {"fullname": "corems.encapsulation.constant.Labels.s2n", "modulename": "corems.encapsulation.constant", "qualname": "Labels.s2n", "kind": "variable", "doc": "

    \n", "default_value": "'S/N'"}, {"fullname": "corems.encapsulation.constant.Labels.label", "modulename": "corems.encapsulation.constant", "qualname": "Labels.label", "kind": "variable", "doc": "

    \n", "default_value": "'label'"}, {"fullname": "corems.encapsulation.constant.Labels.bruker_profile", "modulename": "corems.encapsulation.constant", "qualname": "Labels.bruker_profile", "kind": "variable", "doc": "

    \n", "default_value": "'Bruker_Profile'"}, {"fullname": "corems.encapsulation.constant.Labels.thermo_profile", "modulename": "corems.encapsulation.constant", "qualname": "Labels.thermo_profile", "kind": "variable", "doc": "

    \n", "default_value": "'Thermo_Profile'"}, {"fullname": "corems.encapsulation.constant.Labels.simulated_profile", "modulename": "corems.encapsulation.constant", "qualname": "Labels.simulated_profile", "kind": "variable", "doc": "

    \n", "default_value": "'Simulated Profile'"}, {"fullname": "corems.encapsulation.constant.Labels.booster_profile", "modulename": "corems.encapsulation.constant", "qualname": "Labels.booster_profile", "kind": "variable", "doc": "

    \n", "default_value": "'Booster Profile'"}, {"fullname": "corems.encapsulation.constant.Labels.bruker_frequency", "modulename": "corems.encapsulation.constant", "qualname": "Labels.bruker_frequency", "kind": "variable", "doc": "

    \n", "default_value": "'Bruker_Frequency'"}, {"fullname": "corems.encapsulation.constant.Labels.midas_frequency", "modulename": "corems.encapsulation.constant", "qualname": "Labels.midas_frequency", "kind": "variable", "doc": "

    \n", "default_value": "'Midas_Frequency'"}, {"fullname": "corems.encapsulation.constant.Labels.thermo_centroid", "modulename": "corems.encapsulation.constant", "qualname": "Labels.thermo_centroid", "kind": "variable", "doc": "

    \n", "default_value": "'Thermo_Centroid'"}, {"fullname": "corems.encapsulation.constant.Labels.corems_centroid", "modulename": "corems.encapsulation.constant", "qualname": "Labels.corems_centroid", "kind": "variable", "doc": "

    \n", "default_value": "'CoreMS_Centroid'"}, {"fullname": "corems.encapsulation.constant.Labels.gcms_centroid", "modulename": "corems.encapsulation.constant", "qualname": "Labels.gcms_centroid", "kind": "variable", "doc": "

    \n", "default_value": "'Thermo_Centroid'"}, {"fullname": "corems.encapsulation.constant.Labels.unassigned", "modulename": "corems.encapsulation.constant", "qualname": "Labels.unassigned", "kind": "variable", "doc": "

    \n", "default_value": "'unassigned'"}, {"fullname": "corems.encapsulation.constant.Labels.radical_ion", "modulename": "corems.encapsulation.constant", "qualname": "Labels.radical_ion", "kind": "variable", "doc": "

    \n", "default_value": "'RADICAL'"}, {"fullname": "corems.encapsulation.constant.Labels.protonated_de_ion", "modulename": "corems.encapsulation.constant", "qualname": "Labels.protonated_de_ion", "kind": "variable", "doc": "

    \n", "default_value": "'DE_OR_PROTONATED'"}, {"fullname": "corems.encapsulation.constant.Labels.protonated", "modulename": "corems.encapsulation.constant", "qualname": "Labels.protonated", "kind": "variable", "doc": "

    \n", "default_value": "'protonated'"}, {"fullname": "corems.encapsulation.constant.Labels.de_protonated", "modulename": "corems.encapsulation.constant", "qualname": "Labels.de_protonated", "kind": "variable", "doc": "

    \n", "default_value": "'de-protonated'"}, {"fullname": "corems.encapsulation.constant.Labels.adduct_ion", "modulename": "corems.encapsulation.constant", "qualname": "Labels.adduct_ion", "kind": "variable", "doc": "

    \n", "default_value": "'ADDUCT'"}, {"fullname": "corems.encapsulation.constant.Labels.neutral", "modulename": "corems.encapsulation.constant", "qualname": "Labels.neutral", "kind": "variable", "doc": "

    \n", "default_value": "'neutral'"}, {"fullname": "corems.encapsulation.constant.Labels.ion_type", "modulename": "corems.encapsulation.constant", "qualname": "Labels.ion_type", "kind": "variable", "doc": "

    \n", "default_value": "'IonType'"}, {"fullname": "corems.encapsulation.constant.Labels.ion_type_translate", "modulename": "corems.encapsulation.constant", "qualname": "Labels.ion_type_translate", "kind": "variable", "doc": "

    \n", "default_value": "{'protonated': 'DE_OR_PROTONATED', 'de-protonated': 'DE_OR_PROTONATED', 'radical': 'RADICAL', 'adduct': 'ADDUCT', 'ADDUCT': 'ADDUCT'}"}, {"fullname": "corems.encapsulation.constant.Atoms", "modulename": "corems.encapsulation.constant", "qualname": "Atoms", "kind": "class", "doc": "

    Class for Atoms in CoreMS

    \n\n

    This class includes key properties of atoms (and the electron) and isotopes, including their exact masses, relative abundances, and covalences. \nIt also associates which isotopes are for the same element, and provides an ordering of elements.

    \n\n

    IUPAC definition of monoisotopic mass is based on the most abundant isotopes of each element present.\nHere, we will use atom symbols with isotope numbers for all isotopes excluding the most abundant one.\nThis list has been corrected up to Iodine.

    \n\n
    References
    \n\n
      \n
    1. NIST - Last Accessed 2019-06-12\nhttps://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses
    2. \n
    \n"}, {"fullname": "corems.encapsulation.constant.Atoms.electron_mass", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.electron_mass", "kind": "variable", "doc": "

    \n", "default_value": "0.000548579909065"}, {"fullname": "corems.encapsulation.constant.Atoms.atomic_masses", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.atomic_masses", "kind": "variable", "doc": "

    \n", "default_value": "{'H': 1.00782503223, 'D': 2.01410177812, 'T': 3.0160492779, '3He': 3.0160293201, 'He': 4.00260325413, '6Li': 6.0151228874, 'Li': 7.0160034366, 'Be': 9.012183065, '10B': 10.01293695, 'B': 11.00930536, 'C': 12.0, '13C': 13.00335483507, '14C': 14.0032419884, 'N': 14.00307400443, '15N': 15.00010889888, 'O': 15.99491461957, '17O': 16.9991317565, '18O': 17.99915961286, 'F': 18.99840316273, 'Ne': 19.9924401762, '21Ne': 20.993846685, '22Ne': 21.991385114, 'Na': 22.989769282, 'Mg': 23.985041697, '25Mg': 24.985836976, '26Mg': 25.982592968, 'Al': 26.98153853, 'Si': 27.97692653465, '29Si': 28.9764946649, '30Si': 29.973770136, 'P': 30.97376199842, 'S': 31.9720711744, '33S': 32.9714589098, '34S': 33.967867004, '36S': 35.96708071, 'Cl': 34.968852682, '37Cl': 36.965902602, '36Ar': 35.967545105, '38Ar': 37.96273211, 'Ar': 39.9623831237, 'K': 38.9637064864, '40K': 39.963998166, '41K': 40.9618252579, 'Ca': 39.962590863, '42Ca': 41.95861783, '43Ca': 42.95876644, '44Ca': 43.95548156, '46Ca': 45.953689, '48Ca': 47.95252276, 'Sc': 44.95590828, '46Ti': 45.95262772, '47Ti': 46.95175879, 'Ti': 47.94794198, '49Ti': 48.94786568, '50Ti': 49.94478689, '50V': 49.94715601, 'V': 50.94395704, '50Cr': 49.94604183, 'Cr': 51.94050623, '53Cr': 52.94064815, '54Cr': 53.93887916, 'Mn': 54.93804391, '54Fe': 53.93960899, 'Fe': 55.93493633, '57Fe': 56.93539284, '58Fe': 57.93327443, 'Co': 58.93319429, 'Ni': 57.93534241, '60Ni': 59.93078588, '61Ni': 60.93105557, '62Ni': 61.92834537, '64Ni': 63.92796682, 'Cu': 62.92959772, '65Cu': 64.9277897, 'Zn': 63.92914201, '66Zn': 65.92603381, '67Zn': 66.92712775, '68Zn': 67.92484455, '70Zn': 69.9253192, 'Ga': 68.9255735, '71Ga': 70.92470258, '70Ge': 69.92424875, '72Ge': 71.922075826, '73Ge': 72.923458956, 'Ge': 73.921177761, '76Ge': 75.921402726, 'As': 74.92159457, '74Se': 73.922475934, '76Se': 75.919213704, '77Se': 76.919914154, '78Se': 77.91730928, 'Se': 79.9165218, '82Se': 81.9166995, 'Br': 78.9183376, '81Br': 80.9162897, '78Kr': 77.92036494, '80Kr': 79.91637808, '82Kr': 81.91348273, '83Kr': 82.91412716, 'Kr': 83.9114977282, '86Kr': 85.9106106269, 'Rb': 84.9117897379, '87Rb': 86.909180531, '84Sr': 83.9134191, '86Sr': 85.9092606, '87Sr': 86.9088775, 'Sr': 87.9056125, 'Y': 88.9058403, 'Zr': 89.9046977, '91Zr': 90.9056396, '92Zr': 91.9050347, '94Zr': 93.9063108, '96Zr': 95.9082714, 'Nb': 92.906373, '92Mo': 91.90680796, '94Mo': 93.9050849, '95Mo': 94.90583877, '96Mo': 95.90467612, '97Mo': 96.90601812, 'Mo': 97.90540482, '100Mo': 99.9074718, 'Tc': 98.9062508, '96Ru': 95.90759025, '98Ru': 97.9052868, '99Ru': 98.9059341, '100Ru': 99.9042143, '101Ru': 100.9055769, 'Ru': 101.9043441, '104Ru': 103.9054275, 'Rh': 102.905498, '102Pd': 101.9056022, '104Pd': 103.9040305, '105Pd': 104.9050796, 'Pd': 105.9034804, '108Pd': 107.9038916, '110Pd': 109.9051722, 'Ag': 106.9050916, '109Ag': 108.9047553, '106Cd': 105.9064599, '108Cd': 107.9041834, '110Cd': 109.90300661, '111Cd': 110.90418287, 'Cd': 111.90276287, '113Cd': 112.90440813, '114Cd': 113.90336509, '116Cd': 115.90476315, '113In': 112.90406184, 'In': 114.903878776, '112Sn': 111.90482387, '114Sn': 113.9027827, '115Sn': 114.903344699, '116Sn': 115.9017428, '117Sn': 116.90295398, '118Sn': 117.90160657, '119Sn': 118.90331117, 'Sn': 119.90220163, '122Sn': 121.9034438, '124Sn': 123.9052766, 'Sb': 120.903812, '123Sb': 122.9042132, '120Te': 119.9040593, '122Te': 121.9030435, '123Te': 122.9042698, '124Te': 123.9028171, '125Te': 124.9044299, '126Te': 125.9033109, '128Te': 127.90446128, 'Te': 129.906222748, 'I': 126.9044719, '124Xe': 123.905892, '126Xe': 125.9042983, '128Xe': 127.903531, '129Xe': 128.9047808611, '130Xe': 129.903509349, '131Xe': 130.90508406, 'Xe': 131.9041550856, '134Xe': 133.90539466, '136Xe': 135.907214484, 'Cs': 132.905451961, '130Ba': 129.9063207, '132Ba': 131.9050611, '134Ba': 133.90450818, '135Ba': 134.90568838, '136Ba': 135.90457573, '137Ba': 136.90582714, 'Ba': 137.905247, '138La': 137.9071149, 'La': 138.9063563, '136Ce': 135.90712921, '138Ce': 137.905991, 'Ce': 139.9054431, '142Ce': 141.9092504, 'Pr': 140.9076576, 'Nd': 141.907729, '143Nd': 142.90982, '144Nd': 143.910093, '145Nd': 144.9125793, '146Nd': 145.9131226, '148Nd': 147.9168993, '150Nd': 149.9209022, '145Pm': 144.9127559, '147Pm': 146.915145, '144Sm': 143.9120065, '147Sm': 146.9149044, '148Sm': 147.9148292, '149Sm': 148.9171921, '150Sm': 149.9172829, 'Sm': 151.9197397, '154Sm': 153.9222169, '151Eu': 150.9198578, 'Eu': 152.921238, '152Gd': 151.9197995, '154Gd': 153.9208741, '155Gd': 154.9226305, '156Gd': 155.9221312, '157Gd': 156.9239686, 'Gd': 157.9241123, '160Gd': 159.9270624, 'Tb': 158.9253547, '156Dy': 155.9242847, '158Dy': 157.9244159, '160Dy': 159.9252046, '161Dy': 160.9269405, '162Dy': 161.9268056, '163Dy': 162.9287383, 'Dy': 163.9291819, 'Ho': 164.9303288, '162Er': 161.9287884, '164Er': 163.9292088, 'Er': 165.9302995, '167Er': 166.9320546, '168Er': 167.9323767, '170Er': 169.9354702, 'Tm': 168.9342179, '168Yb': 167.9338896, '170Yb': 169.9347664, '171Yb': 170.9363302, '172Yb': 171.9363859, '173Yb': 172.9382151, 'Yb': 173.9388664, '176Yb': 175.9425764, 'Lu': 174.9407752, '176Lu': 175.9426897, '174Hf': 173.9400461, '176Hf': 175.9414076, '177Hf': 176.9432277, '178Hf': 177.9437058, '179Hf': 178.9458232, 'Hf': 179.946557, '180Ta': 179.9474648, 'Ta': 180.9479958, '180W': 179.9467108, '182W': 181.94820394, '183W': 182.95022275, 'W': 183.95093092, '186W': 185.9543628, '185Re': 184.9529545, 'Re': 186.9557501, '184Os': 183.9524885, '186Os': 185.953835, '187Os': 186.9557474, '188Os': 187.9558352, '189Os': 188.9581442, '190Os': 189.9584437, '192Os': 191.961477, '191Ir': 190.9605893, 'Ir': 192.9629216, '190Pt': 189.9599297, '192Pt': 191.9610387, '194Pt': 193.9626809, 'Pt': 194.9647917, '196Pt': 195.96495209, '198Pt': 197.9678949, 'Au': 196.96656879, '196Hg': 195.9658326, '198Hg': 197.9667686, '199Hg': 198.96828064, '200Hg': 199.96832659, '201Hg': 200.97030284, 'Hg': 201.9706434, '204Hg': 203.97349398, '203Tl': 202.9723446, 'Tl': 204.9744278, '204Pb': 203.973044, '206Pb': 205.9744657, '207Pb': 206.9758973, 'Pb': 207.9766525, 'Bi': 208.9803991, '209Po': 208.9824308, '210Po': 209.9828741, '210At': 209.9871479, '211At': 210.9874966, '211Rn': 210.9906011, '220Rn': 220.0113941, '222Rn': 222.0175782, '223Fr': 223.019736, '223Ra': 223.0185023, '224Ra': 224.020212, '226Ra': 226.0254103, '228Ra': 228.0310707, '227Ac': 227.0277523, '230Th': 230.0331341, 'Th': 232.0380558, 'Pa': 231.0358842, '233U': 233.0396355, '234U': 234.0409523, '235U': 235.0439301, '236U': 236.0455682, 'U': 238.0507884, '236Np': 236.04657, '237Np': 237.0481736, '238Pu': 238.0495601, '239Pu': 239.0521636, '240Pu': 240.0538138, '241Pu': 241.0568517, '242Pu': 242.0587428, '244Pu': 244.0642053, '241Am': 241.0568293, '243Am': 243.0613813, '243Cm': 243.0613893, '244Cm': 244.0627528, '245Cm': 245.0654915, '246Cm': 246.0672238, '247Cm': 247.0703541, '248Cm': 248.0723499, '247Bk': 247.0703073, '249Bk': 249.0749877, '249Cf': 249.0748539, '250Cf': 250.0764062, '251Cf': 251.0795886, '252Cf': 252.0816272, '252Es': 252.08298, '257Fm': 257.0951061, '258Md': 258.0984315, '260Md': 260.10365, '259No': 259.10103, '262Lr': 262.10961, '267Rf': 267.12179, '268Db': 268.12567, '271Sg': 271.13393, '272Bh': 272.13826, '270Hs': 270.13429, '276Mt': 276.15159, '281Ds': 281.16451, '280Rg': 280.16514, '285Cn': 285.17712, '284Nh': 284.17873, '289Fl': 289.19042, '288Mc': 288.19274, '293Lv': 293.20449, '292Ts': 292.20746, '294Og': 294.21392}"}, {"fullname": "corems.encapsulation.constant.Atoms.atoms_order", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.atoms_order", "kind": "variable", "doc": "

    \n", "default_value": "['C', 'H', 'O', 'N', 'P', 'S', 'F', 'Cl', 'Br', 'I', 'At', 'Li', 'Na', 'K', 'Rb', 'Cs', 'Fr', 'He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn', 'Be', 'B', 'Mg', 'Al', 'Si', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'Ba', 'La', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'Ra', 'Ac', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'D', '6Li', '10B', '13C', '15N', '17O', '18O', '22Ne', '25Mg', '26Mg', '29Si', '30Si', '33S', '34S', '36S', '37Cl', '40Ca', '41K', '44Ca', '46Ti', '47Ti', '49Ti', '50Cr', '50Ti', '50V', '53Cr', '54Cr', '54Fe', '57Fe', '58Fe', '60Ni', '61Ni', '62Ni', '65Cu', '66Zn', '67Zn', '68Zn', '70Ge', '71Ga', '72Ge', '73Ge', '76Ge', '76Se', '77Se', '78Se', '81Br', '80Kr', '82Kr', '82Se', '83Kr', '85Rb', '86Kr', '86Sr', '87Rb', '87Sr', '88Sr', '91Zr', '92Mo', '92Zr', '94Mo', '94Zr', '95Mo', '96Mo', '96Ru', '96Zr', '97Mo', '98Ru', '99Ru', '100Mo', '100Ru', '101Ru', '102Pd', '104Pd', '104Ru', '105Pd', '106Cd', '106Pd', '108Cd', '108Pd', '109Ag', '110Cd', '110Pd', '111Cd', '112Cd', '112Sn', '113Cd', '113In', '114Cd', '114Sn', '115In', '115Sn', '116Cd', '116Sn', '117Sn', '118Sn', '119Sn', '120Sn', '120Te', '121Sb', '122Sn', '122Te', '123Sb', '123Te', '124Sn', '124Te', '124Xe', '125Te', '126Te', '126Xe', '128Te', '128Xe', '129Xe', '130Ba', '130Te', '130Xe', '131Xe', '132Ba', '132Xe', '134Ba', '134Xe', '135Ba', '136Ba', '136Xe', '137Ba', '138Ba', '174Hf', '176Hf', '177Hf', '178Hf', '179Hf', '180Hf', '180W', '182W', '183W', '184Os', '184W', '185Re', '186Os', '186W', '187Os', '187Re', '188Os', '189Os', '190Os', '190Pt', '191Ir', '192Ir', '192Os', '192Pt', '194Pt', '195Pt', '196Hg', '196Pt', '198Hg', '198Pt', '199Hg', '200Hg', '201Hg', '202Hg', '203Tl', '204Hg', '204Pb', '205Tl', '206Pb', '207Pb', '208Pb']"}, {"fullname": "corems.encapsulation.constant.Atoms.atoms_covalence", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.atoms_covalence", "kind": "variable", "doc": "

    \n", "default_value": "{'C': 4, '13C': 4, 'N': 3, 'O': 2, 'S': 2, 'H': 1, 'F': (1, 0), 'Cl': (1, 0), 'Br': (1, 0), 'I': (1, 0), 'At': 1, 'Li': (1, 0), 'Na': (1, 0), 'K': (1, 0), 'Rb': 1, 'Cs': 1, 'Fr': 1, 'B': (4, 3, 2, 1), 'In': (3, 2, 1), 'Al': (3, 1, 2), 'P': (3, 5, 4, 2, 1), 'Ga': (3, 1, 2), 'Mg': (2, 1), 'Be': (2, 1), 'Ca': (2, 1), 'Sr': (2, 1), 'Ba': 2, 'Ra': 2, 'V': (5, 4, 3, 2, 1), 'Fe': (3, 2, 4, 5, 6), 'Si': (4, 3, 2), 'Sc': (3, 2, 1), 'Ti': (4, 3, 2, 1), 'Cr': (1, 2, 3, 4, 5, 6), 'Mn': (1, 2, 3, 4, 5, 6, 7), 'Co': (1, 2, 3, 4, 5), 'Ni': (1, 2, 3, 4), 'Cu': (2, 1, 3, 4), 'Zn': (2, 1), 'Ge': (4, 3, 2, 1), 'As': (5, 3, 2, 1), 'Se': (6, 4, 2, 1), 'Y': (3, 2, 1), 'Zr': (4, 3, 2, 1), 'Nb': (5, 4, 3, 2, 1), 'Mo': (6, 5, 4, 3, 2, 1), 'Tc': (7, 6, 5, 4, 3, 2, 1), 'Ru': (8, 7, 6, 5, 4, 3, 2, 1), 'Rh': (6, 5, 4, 3, 2, 1), 'Pd': (4, 2, 1), 'Ag': (0, 1, 2, 3, 4), 'Cd': (2, 1), 'Sn': (4, 2), 'Sb': (5, 3), 'Te': (6, 5, 4, 2), 'La': (3, 2), 'Hf': (4, 3, 2), 'Ta': (5, 4, 3, 2), 'W': (6, 5, 4, 3, 2, 1), 'Re': (4, 7, 6, 5, 3, 2, 1), 'Os': (4, 8, 7, 6, 5, 3, 2, 1), 'Ir': (4, 8, 6, 5, 3, 2, 1), 'Pt': (4, 6, 5, 3, 2, 1), 'Au': (3, 5, 2, 1), 'Hg': (1, 2, 4), 'Tl': (3, 1), 'Pb': (4, 2), 'Bi': (3, 1, 5), 'Po': (2, 4, 6), 'Ac': (3, 2)}"}, {"fullname": "corems.encapsulation.constant.Atoms.isotopic_abundance", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.isotopic_abundance", "kind": "variable", "doc": "

    \n", "default_value": "{'H': 0.999885, 'D': 0.000115, 'T': 0, '3He': 1.34e-06, 'He': 0.99999866, '6Li': 0.0759, 'Li': 0.9241, 'Be': 1.0, '10B': 0.199, 'B': 0.801, 'C': 0.9893, '13C': 0.0107, '14C': 0, 'N': 0.99636, '15N': 0.00364, 'O': 0.99757, '17O': 0.00038, '18O': 0.00205, 'F': 1.0, 'Ne': 0.9048, '21Ne': 0.0027, '22Ne': 0.0925, 'Na': 1.0, 'Mg': 0.7899, '25Mg': 0.1, '26Mg': 0.1101, 'Al': 1.0, 'Si': 0.92223, '29Si': 0.04685, '30Si': 0.03092, 'P': 1.0, 'S': 0.9499, '33S': 0.0075, '34S': 0.0425, '36S': 0.0001, 'Cl': 0.7576, '37Cl': 0.2424, '36Ar': 0.003336, '38Ar': 0.000629, 'Ar': 0.996035, 'K': 0.932581, '40K': 0.000117, '41K': 0.067302, 'Ca': 0.96941, '42Ca': 0.00647, '43Ca': 0.00135, '44Ca': 0.02086, '46Ca': 4e-05, '48Ca': 0.001872, 'Sc': 1.0, '46Ti': 0.0825, '47Ti': 0.0744, 'Ti': 0.7372, '49Ti': 0.0541, '50Ti': 0.0518, '50V': 0.0025, 'V': 0.9975, '50Cr': 0.04345, 'Cr': 0.83789, '53Cr': 0.09501, '54Cr': 0.02365, 'Mn': 1.0, '54Fe': 0.05845, 'Fe': 0.91754, '57Fe': 0.02119, '58Fe': 0.00282, 'Co': 1.0, 'Ni': 0.68077, '60Ni': 0.26223, '61Ni': 0.011399, '62Ni': 0.036346, '64Ni': 0.009255, 'Cu': 0.6915, '65Cu': 0.3085, 'Zn': 0.4917, '66Zn': 0.2773, '67Zn': 0.0404, '68Zn': 0.1845, '70Zn': 0.0061, 'Ga': 0.60108, '71Ga': 0.39892, '70Ge': 0.2057, '72Ge': 0.2745, '73Ge': 0.0775, 'Ge': 0.365, '76Ge': 0.0773, 'As': 1.0, '74Se': 0.0089, '76Se': 0.0937, '77Se': 0.0763, '78Se': 0.2377, 'Se': 0.4961, '82Se': 0.0873, 'Br': 0.5069, '81Br': 0.4931, '78Kr': 0.00355, '80Kr': 0.02286, '82Kr': 0.11593, '83Kr': 0.115, 'Kr': 0.56987, '86Kr': 0.17279, 'Rb': 0.7217, '87Rb': 0.2783, '84Sr': 0.0056, '86Sr': 0.0986, '87Sr': 0.07, 'Sr': 0.8258, 'Y': 1.0, 'Zr': 0.5145, '91Zr': 0.1122, '92Zr': 0.1715, '94Zr': 0.1738, '96Zr': 0.028, 'Nb': 1.0, '92Mo': 0.1453, '94Mo': 0.0915, '95Mo': 0.1584, '96Mo': 0.1667, '97Mo': 0.096, 'Mo': 0.2439, '100Mo': 0.0982, '99Tc': 0, '96Ru': 0.0554, '98Ru': 0.0187, '99Ru': 0.1276, '100Ru': 0.126, '101Ru': 0.1706, 'Ru': 0.3155, '104Ru': 0.1862, 'Rh': 1.0, '102Pd': 0.0102, '104Pd': 0.1114, '105Pd': 0.2233, 'Pd': 0.2733, '108Pd': 0.2646, '110Pd': 0.1172, 'Ag': 0.51839, '109Ag': 0.48161, '106Cd': 0.0125, '108Cd': 0.0089, '110Cd': 0.1249, '111Cd': 0.128, 'Cd': 0.2413, '113Cd': 0.1222, '114Cd': 0.2873, '116Cd': 0.0749, '113In': 0.0429, 'In': 0.9571, '112Sn': 0.0097, '114Sn': 0.0066, '115Sn': 0.0034, '116Sn': 0.1454, '117Sn': 0.0768, '118Sn': 0.2422, '119Sn': 0.0859, 'Sn': 0.3258, '122Sn': 0.0463, '124Sn': 0.0579, 'Sb': 0.5721, '123Sb': 0.4279, '120Te': 0.0009, '122Te': 0.0255, '123Te': 0.0089, '124Te': 0.0474, '125Te': 0.0707, '126Te': 0.1884, '128Te': 0.3174, 'Te': 0.3408, 'I': 1.0, '124Xe': 0.000952, '126Xe': 0.00089, '128Xe': 0.019102, '129Xe': 0.264006, '130Xe': 0.04071, '131Xe': 0.212324, 'Xe': 0.269086, '134Xe': 0.104357, '136Xe': 0.088573, 'Cs': 1.0, '130Ba': 0.00106, '132Ba': 0.00101, '134Ba': 0.02417, '135Ba': 0.06592, '136Ba': 0.07854, '137Ba': 0.11232, 'Ba': 0.71698, '138La': 0.0008881, 'La': 0.9991119, '136Ce': 0.00185, '138Ce': 0.00251, 'Ce': 0.8845, '142Ce': 0.11114, 'Pr': 1.0, 'Nd': 0.27152, '143Nd': 0.12174, '144Nd': 0.23798, '145Nd': 0.08293, '146Nd': 0.17189, '148Nd': 0.05756, '150Nd': 0.05638, '145Pm': 0, '147Pm': 0, '144Sm': 0.0307, '147Sm': 0.1499, '148Sm': 0.1124, '149Sm': 0.1382, '150Sm': 0.0738, 'Sm': 0.2675, '154Sm': 0.2275, '151Eu': 0.4781, 'Eu': 0.5219, '152Gd': 0.002, '154Gd': 0.0218, '155Gd': 0.148, '156Gd': 0.2047, '157Gd': 0.1565, 'Gd': 0.2484, '160Gd': 0.2186, 'Tb': 1.0, '156Dy': 0.00056, '158Dy': 0.00095, '160Dy': 0.02329, '161Dy': 0.18889, '162Dy': 0.25475, '163Dy': 0.24896, 'Dy': 0.2826, 'Ho': 1.0, '162Er': 0.00139, '164Er': 0.01601, 'Er': 0.33503, '167Er': 0.22869, '168Er': 0.26978, '170Er': 0.1491, 'Tm': 1.0, '168Yb': 0.00123, '170Yb': 0.02982, '171Yb': 0.1409, '172Yb': 0.2168, '173Yb': 0.16103, 'Yb': 0.32026, '176Yb': 0.12996, 'Lu': 0.97401, '176Lu': 0.02599, '174Hf': 0.0016, '176Hf': 0.0526, '177Hf': 0.186, '178Hf': 0.2728, '179Hf': 0.1362, 'Hf': 0.3508, '180Ta': 0.0001201, 'Ta': 0.9998799, '180W': 0.0012, '182W': 0.265, '183W': 0.1431, 'W': 0.3064, '186W': 0.2843, '185Re': 0.374, 'Re': 0.626, '184Os': 0.0002, '186Os': 0.0159, '187Os': 0.0196, '188Os': 0.1324, '189Os': 0.1615, '190Os': 0.2626, 'Os': 0.4078, '191Ir': 0.373, 'Ir': 0.627, '190Pt': 0.00012, '192Pt': 0.00782, '194Pt': 0.3286, 'Pt': 0.3378, '196Pt': 0.2521, '198Pt': 0.07356, 'Au': 1.0, '196Hg': 0.0015, '198Hg': 0.0997, '199Hg': 0.16872, '200Hg': 0.231, '201Hg': 0.1318, 'Hg': 0.2986, '204Hg': 0.0687, '203Tl': 0.2952, 'Tl': 0.7048, '204Pb': 0.014, '206Pb': 0.241, '207Pb': 0.221, 'Pb': 0.524, 'Bi': 1.0, '209Po': 0, '210Po': 0, '210At': 0, '211At': 0, '211Rn': 0, '220Rn': 0, '222Rn': 0, '223Fr': 0, '223Ra': 0, '224Ra': 0, '226Ra': 0, '228Ra': 0, '227Ac': 0, '230Th': 0, 'Th': 1.0, 'Pa': 1.0, '233U': 0, '234U': 5.4e-05, '235U': 0.007204, '236U': 0, 'U': 0.992742, '236Np': 0, '237Np': 0, '238Pu': 0, '239Pu': 0, '240Pu': 0, '241Pu': 0, '242Pu': 0, '244Pu': 0, '241Am': 0, '243Am': 0, '243Cm': 0, '244Cm': 0, '245Cm': 0, '246Cm': 0, '247Cm': 0, '248Cm': 0, '247Bk': 0, '249Bk': 0, '249Cf': 0, '250Cf': 0, '251Cf': 0, '252Cf': 0, '252Es': 0, '257Fm': 0, '258Md': 0, '260Md': 0, '259No': 0, '262Lr': 0, '267Rf': 0, '268Db': 0, '271Sg': 0, '272Bh': 0, '270Hs': 0, '276Mt': 0, '281Ds': 0, '280Rg': 0, '285Cn': 0, '284Nh': 0, '289Fl': 0, '288Mc': 0, '293Lv': 0, '292Ts': 0, '294Og': 0}"}, {"fullname": "corems.encapsulation.constant.Atoms.isotopes", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.isotopes", "kind": "variable", "doc": "

    \n", "default_value": "{'H': ['Hydrogen', ['D', 'T']], 'He': ['Helium', ['3He']], 'Li': ['Lithium', ['6Li']], 'Be': ['Beryllium', [None]], 'B': ['Boron', ['10B']], 'C': ['Carbon', ['13C']], 'N': ['Nitrogen', ['15N']], 'O': ['Oxygen', ['18O', '17O']], 'F': ['Fluorine', [None]], 'Ne': ['Neon', ['22Ne', '21Ne']], 'Na': ['Sodium', [None]], 'Mg': ['Magnesium', ['26Mg', '25Mg']], 'Al': ['Aluminum', [None]], 'Si': ['Silicon', ['29Si', '30Si']], 'P': ['Phosphorus', [None]], 'S': ['Sulfur', ['34S', '33S', '36S']], 'Cl': ['Chlorine', ['37Cl']], 'Ar': ['Argon', ['36Ar', '38Ar']], 'K': ['Potassium', ['41K', '40K']], 'Ca': ['Calcium', ['44Ca', '48Ca', '43Ca', '42Ca', '46Ca']], 'Sc': ['Scandium', [None]], 'Ti': ['Titanium', ['46Ti', '47Ti', '49Ti', '50Ti']], 'V': ['Vanadium', ['50V']], 'Cr': ['Chromium', ['53Cr', '50Cr', '54Cr']], 'Mn': ['Manganese', [None]], 'Fe': ['Iron', ['54Fe', '57Fe', '58Fe']], 'Co': ['Cobalt', [None]], 'Ni': ['Nickel', ['60Ni', '62Ni', '61Ni', '64Ni']], 'Cu': ['Copper', ['65Cu']], 'Zn': ['Zinc', ['66Zn', '68Zn', '67Zn', '70Zn']], 'Ga': ['Gallium', ['71Ga']], 'Ge': ['Germanium', ['72Ge', '70Ge', '73Ge', '76Ge']], 'As': ['Arsenic', [None]], 'Se': ['Selenium', ['78Se', '76Se', '82Se', '77Se', '74Se']], 'Br': ['Bromine', ['81Br']], 'Kr': ['Krypton', ['86Kr', '82Kr', '83Kr', '80Kr']], 'Rb': ['Rubidium', ['87Rb']], 'Sr': ['Strontium', ['86Sr', '87Sr', '84Sr']], 'Y': ['Yttrium', [None]], 'Zr': ['Zirconium', ['94Zr', '92Zr', '91Zr', '96Zr']], 'Nb': ['Niobium', [None]], 'Mo': ['Molybdenum', ['96Mo', '95Mo', '92Mo', '100Mo', '97Mo', '94Mo']], 'Tc': ['Technetium', [None]], 'Ru': ['Ruthenium', ['104Ru', '101Ru', '99Ru', '100Ru', '96Ru', '98Ru']], 'Rh': ['Rhodium', [None]], 'Pd': ['Palladium', ['108Pd', '105Pd', '110Pd', '104Pd', '102Pd']], 'Ag': ['Silver', ['109Ag']], 'Cd': ['Cadmium', ['114Cd', '111Cd', '110Cd', '113Cd', '116Cd', '106Cd', '108Cd']], 'In': ['Indium', ['113In']], 'Sn': ['Tin', ['118Sn', '116Sn', '119Sn', '117Sn', '124Sn', '122Sn', '112Sn', '114Sn', '115Sn']], 'Sb': ['Antimony', ['123Sb']], 'Te': ['Tellurium', ['128Te', '126Te', '125Te', '124Te', '122Te', '123Te', '120Te']], 'I': ['Iodine', [None]], 'Xe': ['Xenon', ['129Xe', '131Xe', '134Xe', '136Xe', '130Xe', '128Xe']], 'Cs': ['Cesium', [None]], 'Ba': ['Barium', ['137Ba', '136Ba', '135Ba', '134Ba']], 'La': ['Lanthanum', ['138La']], 'Hf': ['Hafnium', ['178Hf', '177Hf', '179Hf', '176Hf']], 'Ta': ['Tantalum', ['180Ta']], 'W': ['Tungsten', ['186W', '182W', '183W']], 'Re': ['Rhenium', ['185Re']], 'Os': ['Osmium', ['190Os', '189Os', '188Os', '187Os', '186Os']], 'Ir': ['Iridium', ['191Ir']], 'Pt': ['Platinum', ['194Pt', '196Pt', '198Pt', '192Pt']], 'Au': ['Gold', [None]], 'Hg': ['Mercury', ['200Hg', '199Hg', '201Hg', '198Hg', '204Hg']], 'Tl': ['Thallium', ['203Tl']], 'Pb': ['Lead', ['206Pb', '207Pb', '204Pb']], 'Bi': ['Bismuth', [None]], 'Po': ['Polonium', [None]], 'At': ['Astatine', [None]], 'Rn': ['Radon', [None]], 'Fr': ['Francium', [None]], 'Ra': ['Radium', [None]], 'Ac': ['Actinium', [None]], 'Rf': ['Rutherfordium', [None]], 'Db': ['Dubnium', [None]], 'Sg': ['Seaborgium', [None]], 'Bh': ['Bohrium', [None]], 'Hs': ['Hassium', [None]], 'Mt': ['Meitnerium', [None]], 'Ds': ['Darmstadtium', [None]], 'Rg': ['Roentgenium', [None]], 'Cn': ['Copernicium', [None]], 'Nh': ['Nihonium', [None]], 'Fl': ['Flerovium', [None]], 'Mc': ['Moscovium', [None]], 'Lv': ['Livermorium', [None]], 'Ts': ['Tennessine', [None]], 'Og': ['Oganesson', [None]], 'Ce': ['Cerium', ['142Ce', '138Ce136Ce']], 'Pr': ['Praseodymium', [None]], 'Nd': ['Neodymium', [None]], 'Pm': ['Promethium', [None]], 'Sm': ['Samarium', [None]], 'Eu': ['Europium', [None]], 'Gd': ['Gadolinium', [None]], 'Tb': ['Terbium', [None]], 'Dy': ['Dysprosium', [None]], 'Ho': ['Holmium', [None]], 'Er': ['Erbium', [None]], 'Tm': ['Thulium', [None]], 'Yb': ['Ytterbium', [None]], 'Lu': ['Lutetium', ['176Lu']], 'Th': ['Thorium', [None]], 'Pa': ['Protactinium', [None]], 'U': ['Uranium', ['235U', '234U']], 'Np': ['Neptunium', [None]], 'Pu': ['Plutonium', [None]], 'Am': ['Americium', [None]], 'Cm': ['Curium', [None]], 'Bk': ['Berkelium', [None]], 'Cf': ['Californium', [None]], 'Es': ['Einsteinium', [None]], 'Fm': ['Fermium', [None]], 'Md': ['Mendelevium', [None]], 'No': ['Nobelium', [None]], 'Lr': ['Lawrencium', [None]]}"}, {"fullname": "corems.encapsulation.factory", "modulename": "corems.encapsulation.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.parameters", "modulename": "corems.encapsulation.factory.parameters", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.parameters.reset_ms_parameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "reset_ms_parameters", "kind": "function", "doc": "

    Reset the MSParameter class to the default values

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.reset_gcms_parameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "reset_gcms_parameters", "kind": "function", "doc": "

    Reset the GCMSParameters class to the default values

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.reset_lcms_parameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "reset_lcms_parameters", "kind": "function", "doc": "

    Reset the LCMSParameters class to the default values

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters", "kind": "class", "doc": "

    MSParameters class is used to store the parameters used for the processing of the mass spectrum

    \n\n

    Each attibute is a class that contains the parameters for the processing of the mass spectrum, see the corems.encapsulation.factory.processingSetting module for more details.

    \n\n
    Parameters
    \n\n
      \n
    • use_defaults (bool, optional):\nif True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • molecular_search (MolecularFormulaSearchSettings):\nMolecularFormulaSearchSettings object
    • \n
    • transient (TransientSetting):\nTransientSetting object
    • \n
    • mass_spectrum (MassSpectrumSetting):\nMassSpectrumSetting object
    • \n
    • ms_peak (MassSpecPeakSetting):\nMassSpecPeakSetting object
    • \n
    • data_input (DataInputSetting):\nDataInputSetting object
    • \n
    \n\n
    Notes
    \n\n

    One can use the use_defaults parameter to reset the parameters to the default values.\nAlternatively, to use the current values - modify the class's contents before instantiating the class.

    \n"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.__init__", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.__init__", "kind": "function", "doc": "

    \n", "signature": "(use_defaults=False)"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.molecular_search", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.molecular_search", "kind": "variable", "doc": "

    \n", "default_value": "MolecularFormulaSearchSettings(use_isotopologue_filter=False, isotopologue_filter_threshold=33.0, isotopologue_filter_atoms=('Cl', 'Br'), use_runtime_kendrick_filter=False, use_min_peaks_filter=True, min_peaks_per_class=15, url_database='postgresql+psycopg2://coremsappdb:coremsapppnnl@molformdb:5432/coremsapp', db_jobs=3, db_chunk_size=300, ion_charge=-1, min_hc_filter=0.3, max_hc_filter=3.0, min_oc_filter=0.0, max_oc_filter=1.2, min_op_filter=2.0, use_pah_line_rule=False, min_dbe=0.0, max_dbe=40.0, mz_error_score_weight=0.6, isotopologue_score_weight=0.4, adduct_atoms_neg=('Cl', 'Br'), adduct_atoms_pos=('Na', 'K'), score_methods=('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'), score_method='prob_score', output_min_score=0.1, output_score_method='All Candidates', isRadical=False, isProtonated=True, isAdduct=False, usedAtoms={'C': (1, 100), 'H': (1, 200)}, ion_types_excluded=[], ionization_type='ESI', min_ppm_error=-10.0, max_ppm_error=10.0, min_abun_error=-100.0, max_abun_error=100.0, mz_error_range=1.5, error_method='None', mz_error_average=0.0, used_atom_valences={'C': 4, '13C': 4, 'N': 3, 'O': 2, 'S': 2, 'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'At': 1, 'Li': 1, 'Na': 1, 'K': 1, 'Rb': 1, 'Cs': 1, 'Fr': 1, 'B': 4, 'In': 3, 'Al': 3, 'P': 3, 'Ga': 3, 'Mg': 2, 'Be': 2, 'Ca': 2, 'Sr': 2, 'Ba': 2, 'Ra': 2, 'V': 5, 'Fe': 3, 'Si': 4, 'Sc': 3, 'Ti': 4, 'Cr': 1, 'Mn': 1, 'Co': 1, 'Ni': 1, 'Cu': 2, 'Zn': 2, 'Ge': 4, 'As': 5, 'Se': 6, 'Y': 3, 'Zr': 4, 'Nb': 5, 'Mo': 6, 'Tc': 7, 'Ru': 8, 'Rh': 6, 'Pd': 4, 'Ag': 0, 'Cd': 2, 'Sn': 4, 'Sb': 5, 'Te': 6, 'La': 3, 'Hf': 4, 'Ta': 5, 'W': 6, 'Re': 4, 'Os': 4, 'Ir': 4, 'Pt': 4, 'Au': 3, 'Hg': 1, 'Tl': 3, 'Pb': 4, 'Bi': 3, 'Po': 2, 'Ac': 3})"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.transient", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.transient", "kind": "variable", "doc": "

    \n", "default_value": "TransientSetting(implemented_apodization_function=('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser'), apodization_method='Hanning', number_of_truncations=0, number_of_zero_fills=1, next_power_of_two=False, kaiser_beta=8.6)"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.mass_spectrum", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.mass_spectrum", "kind": "variable", "doc": "

    \n", "default_value": "MassSpectrumSetting(noise_threshold_method='log', noise_threshold_methods_implemented=('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'), noise_threshold_min_std=6, noise_threshold_min_s2n=4.0, noise_threshold_min_relative_abundance=6.0, noise_threshold_absolute_abundance=1000000.0, noise_threshold_log_nsigma=6, noise_threshold_log_nsigma_corr_factor=0.463, noise_threshold_log_nsigma_bins=500, noise_min_mz=50.0, noise_max_mz=1200.0, min_picking_mz=50.0, max_picking_mz=1200.0, picking_point_extrapolate=3, calib_minimize_method='Powell', calib_pol_order=2, max_calib_ppm_error=1.0, min_calib_ppm_error=-1.0, calib_sn_threshold=2.0, calibration_ref_match_method='legacy', calibration_ref_match_method_implemented=('legacy', 'merged'), calibration_ref_match_tolerance=0.003, calibration_ref_match_std_raw_error_limit=1.5, do_calibration=True, verbose_processing=True)"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.ms_peak", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.ms_peak", "kind": "variable", "doc": "

    \n", "default_value": "MassSpecPeakSetting(kendrick_base={'C': 1, 'H': 2}, kendrick_rounding_method='floor', implemented_kendrick_rounding_methods=('floor', 'ceil', 'round'), peak_derivative_threshold=0.0, peak_min_prominence_percent=0.1, min_peak_datapoints=5.0, peak_max_prominence_percent=0.1, peak_height_max_percent=10.0, legacy_resolving_power=True, legacy_centroid_polyfit=False)"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.data_input", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.data_input", "kind": "variable", "doc": "

    \n", "default_value": "DataInputSetting(header_translate={'m/z': 'm/z', 'mOz': 'm/z', 'Mass': 'm/z', 'Resolving Power': 'Resolving Power', 'Res.': 'Resolving Power', 'resolution': 'Resolving Power', 'Intensity': 'Peak Height', 'Peak Height': 'Peak Height', 'I': 'Peak Height', 'Abundance': 'Peak Height', 'abs_abu': 'Peak Height', 'Signal/Noise': 'S/N', 'S/N': 'S/N', 'sn': 'S/N'})"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.copy", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.copy", "kind": "function", "doc": "

    Create a copy of the MSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.print", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.print", "kind": "function", "doc": "

    Print the MSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters", "kind": "class", "doc": "

    GCMSParameters class is used to store the parameters used for the processing of the gas chromatograph mass spectrum

    \n\n

    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.

    \n\n
    Parameters
    \n\n
      \n
    • use_defaults (bool, optional):\nif True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • molecular_search (MolecularFormulaSearchSettings):\nMolecularFormulaSearchSettings object
    • \n
    • gc_ms (GasChromatographSetting):\nGasChromatographSetting object
    • \n
    \n\n
    Notes
    \n\n

    One can use the use_defaults parameter to reset the parameters to the default values.\nAlternatively, to use the current values - modify the class's contents before instantiating the class.

    \n"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.__init__", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.__init__", "kind": "function", "doc": "

    \n", "signature": "(use_defaults=False)"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.molecular_search", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.molecular_search", "kind": "variable", "doc": "

    \n", "default_value": "CompoundSearchSettings(url_database='sqlite:///db/pnnl_lowres_gcms_compounds.sqlite', ri_search_range=35.0, rt_search_range=1.0, correlation_threshold=0.5, score_threshold=0.0, ri_spacing=200.0, ri_std=3.0, ri_calibration_compound_names=['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'], exploratory_mode=False, score_methods=('highest_sim_score', 'highest_ss'), output_score_method='All')"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.gc_ms", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.gc_ms", "kind": "variable", "doc": "

    \n", "default_value": "GasChromatographSetting(use_deconvolution=False, implemented_smooth_method=('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), smooth_window=5, smooth_method='savgol', savgol_pol_order=2, peak_derivative_threshold=0.0005, peak_height_max_percent=10.0, peak_max_prominence_percent=1.0, min_peak_datapoints=5.0, max_peak_width=0.1, noise_threshold_method='manual_relative_abundance', noise_threshold_methods_implemented=('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), std_noise_threshold=3, peak_height_min_percent=0.1, peak_min_prominence_percent=0.1, eic_signal_threshold=0.01, max_rt_distance=0.025, verbose_processing=True)"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.copy", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.copy", "kind": "function", "doc": "

    Create a copy of the GCMSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.print", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.print", "kind": "function", "doc": "

    Print the GCMSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters", "kind": "class", "doc": "

    LCMSParameters class is used to store the parameters used for the processing of the liquid chromatograph mass spectrum

    \n\n

    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.

    \n\n
    Parameters
    \n\n
      \n
    • use_defaults (bool, optional):\nif True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • lc_ms (LiquidChromatographSetting):\nLiquidChromatographSetting object
    • \n
    • mass_spectrum (dict):\ndictionary with the mass spectrum parameters for ms1 and ms2, each value is a MSParameters object
    • \n
    \n\n
    Notes
    \n\n

    One can use the use_defaults parameter to reset the parameters to the default values.\nAlternatively, to use the current values - modify the class's contents before instantiating the class.

    \n"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.__init__", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.__init__", "kind": "function", "doc": "

    \n", "signature": "(use_defaults=False)"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.lc_ms", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.lc_ms", "kind": "variable", "doc": "

    \n", "default_value": "LiquidChromatographSetting(scans=(-1, -1), eic_tolerance_ppm=5.0, smooth_window=5, smooth_method='savgol', implemented_smooth_method=('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), savgol_pol_order=2, peak_height_max_percent=10.0, peak_max_prominence_percent=1.0, peak_derivative_threshold=0.0005, min_peak_datapoints=5.0, noise_threshold_method='manual_relative_abundance', noise_threshold_methods_implemented=('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), peak_height_min_percent=0.1, eic_signal_threshold=0.01, peak_picking_method='persistent homology', implemented_peak_picking_methods=('persistent homology',), mass_feature_cluster_mz_tolerance_rel=5e-06, mass_feature_cluster_rt_tolerance=0.3, ms1_scans_to_average=1, ms1_deconvolution_corr_min=0.8, ms2_dda_rt_tolerance=0.15, ms2_dda_mz_tolerance=0.05, ms2_min_fe_score=0.2, search_as_lipids=False, include_fragment_types=False, export_profile_spectra=False, export_eics=True, export_unprocessed_ms1=False, verbose_processing=True)"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.mass_spectrum", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.mass_spectrum", "kind": "variable", "doc": "

    \n", "default_value": "{'ms1': <corems.encapsulation.factory.parameters.MSParameters object>, 'ms2': <corems.encapsulation.factory.parameters.MSParameters object>}"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.copy", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.copy", "kind": "function", "doc": "

    Create a copy of the LCMSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.print", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.print", "kind": "function", "doc": "

    Print the LCMSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.default_parameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "default_parameters", "kind": "function", "doc": "

    Generate parameters dictionary with the default parameters for data processing\n To gather parameters from instrument data during the data parsing step, a parameters dictionary with the default parameters needs to be generated.\n This dictionary acts as a placeholder and is later used as an argument for all the class constructor methods during instantiation. \n The data gathered from the instrument is added to the class properties.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\npath to the file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • parameters (dict):\ndictionary with the default parameters for data processing
    • \n
    \n", "signature": "(file_location):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting", "modulename": "corems.encapsulation.factory.processingSetting", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting", "kind": "class", "doc": "

    Transient processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • implemented_apodization_function (tuple):\nAvailable apodization functions
    • \n
    • apodization_method (str):\nApodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
    • \n
    • number_of_truncations (int):\nHow many times to truncate the transient prior to Fourier transform
    • \n
    • number_of_zero_fills (int):\nHow many times to zero fille the transient prior to Fourier transform.
    • \n
    • next_power_of_two (bool):\nIf True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
    • \n
    • kaiser_beta (float):\nBeta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming,\n6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\timplemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser'),\tapodization_method: str = 'Hanning',\tnumber_of_truncations: int = 0,\tnumber_of_zero_fills: int = 1,\tnext_power_of_two: bool = False,\tkaiser_beta: float = 8.6)"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.implemented_apodization_function", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.implemented_apodization_function", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser')"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.apodization_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.apodization_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'Hanning'"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.number_of_truncations", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.number_of_truncations", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "0"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.number_of_zero_fills", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.number_of_zero_fills", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.next_power_of_two", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.next_power_of_two", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.kaiser_beta", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.kaiser_beta", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "8.6"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting", "kind": "class", "doc": "

    Data input settings class

    \n\n
    Attributes
    \n\n
      \n
    • header_translate (dict):\nDictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(header_translate: dict = <factory>)"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.header_translate", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.header_translate", "kind": "variable", "doc": "

    \n", "annotation": ": dict"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.add_mz_label", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.add_mz_label", "kind": "function", "doc": "

    Add a label to the header_translate dictionary to be translated to the corems label for mz.

    \n", "signature": "(self, label):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.add_peak_height_label", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.add_peak_height_label", "kind": "function", "doc": "

    Add a label to the header_translate dictionary to be translated to the corems label for peak height.

    \n", "signature": "(self, label):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.add_sn_label", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.add_sn_label", "kind": "function", "doc": "

    Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.

    \n", "signature": "(self, label):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.add_resolving_power_label", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.add_resolving_power_label", "kind": "function", "doc": "

    Add a label to the header_translate dictionary to be translated to the corems label for resolving power.

    \n", "signature": "(self, label):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting", "kind": "class", "doc": "

    Liquid chromatograph processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • scans (list or tuple, optional):\nList of select scan to average or a tuple containing the range to average. Default is (0, 1).
    • \n
    • eic_tolerance_ppm (float, optional):\nMass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
    • \n
    • correct_eic_baseline (bool, optional):\nIf True, correct the baseline of the extracted ion chromatogram. Default is True.
    • \n
    • smooth_window (int, optional):\nWindow size for smoothing the ion chromatogram (extracted or total). Default is 5.
    • \n
    • smooth_method (str, optional):\nSmoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
    • \n
    • implemented_smooth_method (tuple, optional):\nSmoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
    • \n
    • savgol_pol_order (int, optional):\nPolynomial order for Savitzky-Golay smoothing. Default is 2.
    • \n
    • peak_height_max_percent (float, optional):\n1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
    • \n
    • peak_max_prominence_percent (float, optional):\n1-100 % used for baseline detection. Default is 1.
    • \n
    • peak_derivative_threshold (float, optional):\nThreshold for defining derivative crossing. Default is 0.0005.
    • \n
    • min_peak_datapoints (float, optional):\nminimum data point to define a chromatografic peak. Default is 5.
    • \n
    • noise_threshold_method (str, optional):\nMethod for detecting noise threshold. Default is 'manual_relative_abundance'.
    • \n
    • noise_threshold_methods_implemented (tuple, optional):\nMethods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
    • \n
    • peak_height_min_percent (float, optional):\n0-100 % used for peak detection. Default is 0.1.
    • \n
    • eic_signal_threshold (float, optional):\n0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
    • \n
    • eic_buffer_time (float, optional):\nBuffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
    • \n
    • ph_smooth_it (int, optional):\nNumber of iterations to use for smoothing prior to finding mass features. \nCalled within the PHCalculations.find_mass_features_ph() method. Default is 7.
    • \n
    • ph_smooth_radius_mz (int, optional):\nRadius in m/z steps (not daltons) for smoothing prior to finding mass features. \nCalled within the PHCalculations.find_mass_features_ph() method. Default is 0.
    • \n
    • ph_smooth_radius_scan (int, optional):\nRadius in scan steps for smoothing prior to finding mass features. \nCalled within the PHCalculations.find_mass_features_ph() method. Default is 3.
    • \n
    • ph_inten_min_rel (int, optional):\nRelative minimum intensity to use for finding mass features. \nCalculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).\nCalled within the PH_Calculations.find_mass_features() method. Default is 0.001.
    • \n
    • ph_persis_min_rel (int, optional):\nRelative minimum persistence for retaining mass features. \nCalculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).\nShould be greater to or equal to ph_inten_min_rel.\nCalled within the PH_Calculations.find_mass_features() method. Default is 0.001.
    • \n
    • mass_feature_cluster_mz_tolerance_rel (float, optional):\nRelative m/z tolerance to use for clustering mass features. \nCalled with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.\nDefault is 5E-6 (5 ppm).
    • \n
    • mass_feature_cluster_rt_tolerance (float, optional):\nRetention time tolerance to use for clustering mass features, in minutes. \nCalled with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods. \nDefault is 0.2.
    • \n
    • ms1_scans_to_average (int, optional):\nNumber of MS1 scans to average for mass-feature associated m/zs. \nCalled within the LCMSBase.add_associated_ms1() method. Default is 1.
    • \n
    • ms1_deconvolution_corr_min (float, optional):\nMinimum correlation to use for deconvoluting MS1 mass features. \nCalled within the LCCalculations.deconvolute_ms1_mass_features() method. \nDefault is 0.8.
    • \n
    • ms2_dda_rt_tolerance (float, optional):\nRetention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
    • \n
    • ms2_dda_mz_tolerance (float, optional):\nMass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
    • \n
    • ms2_min_fe_score (float, optional):\nMinimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
    • \n
    • search_as_lipids (bool, optional):\nIf True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
    • \n
    • include_fragment_types (bool, optional):\nIf True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
    • \n
    • verbose_processing (bool, optional):\nIf True, print verbose processing information. Default is True.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tscans: list | tuple = (-1, -1),\teic_tolerance_ppm: float = 5,\tsmooth_window: int = 5,\tsmooth_method: str = 'savgol',\timplemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'),\tsavgol_pol_order: int = 2,\tpeak_height_max_percent: float = 10,\tpeak_max_prominence_percent: float = 1,\tpeak_derivative_threshold: float = 0.0005,\tmin_peak_datapoints: float = 5,\tnoise_threshold_method: str = 'manual_relative_abundance',\tnoise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'),\tpeak_height_min_percent: float = 0.1,\teic_signal_threshold: float = 0.01,\tpeak_picking_method: str = 'persistent homology',\timplemented_peak_picking_methods: tuple = ('persistent homology',),\tmass_feature_cluster_mz_tolerance_rel: float = 5e-06,\tmass_feature_cluster_rt_tolerance: float = 0.3,\tms1_scans_to_average: int = 1,\tms1_deconvolution_corr_min: float = 0.8,\tms2_dda_rt_tolerance: float = 0.15,\tms2_dda_mz_tolerance: float = 0.05,\tms2_min_fe_score: float = 0.2,\tsearch_as_lipids: bool = False,\tinclude_fragment_types: bool = False,\texport_profile_spectra: bool = False,\texport_eics: bool = True,\texport_unprocessed_ms1: bool = False,\tverbose_processing: bool = True)"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.scans", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.scans", "kind": "variable", "doc": "

    \n", "annotation": ": list | tuple", "default_value": "(-1, -1)"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.eic_tolerance_ppm", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.eic_tolerance_ppm", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.correct_eic_baseline", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.correct_eic_baseline", "kind": "variable", "doc": "

    \n", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.smooth_window", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.smooth_window", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.smooth_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.smooth_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'savgol'"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.implemented_smooth_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.implemented_smooth_method", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.savgol_pol_order", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.savgol_pol_order", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "2"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_height_max_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_height_max_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "10"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_max_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_max_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_derivative_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_derivative_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0005"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.min_peak_datapoints", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.min_peak_datapoints", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.noise_threshold_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.noise_threshold_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'manual_relative_abundance'"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.noise_threshold_methods_implemented", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.noise_threshold_methods_implemented", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_height_min_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_height_min_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.eic_signal_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.eic_signal_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.01"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.eic_buffer_time", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.eic_buffer_time", "kind": "variable", "doc": "

    \n", "default_value": "1.5"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_picking_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_picking_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'persistent homology'"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.implemented_peak_picking_methods", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.implemented_peak_picking_methods", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('persistent homology',)"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_smooth_it", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_smooth_it", "kind": "variable", "doc": "

    \n", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_smooth_radius_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_smooth_radius_mz", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_smooth_radius_scan", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_smooth_radius_scan", "kind": "variable", "doc": "

    \n", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_inten_min_rel", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_inten_min_rel", "kind": "variable", "doc": "

    \n", "default_value": "0.001"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_persis_min_rel", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_persis_min_rel", "kind": "variable", "doc": "

    \n", "default_value": "0.001"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.mass_feature_cluster_mz_tolerance_rel", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.mass_feature_cluster_mz_tolerance_rel", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5e-06"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.mass_feature_cluster_rt_tolerance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.mass_feature_cluster_rt_tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.3"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms1_scans_to_average", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms1_scans_to_average", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms1_deconvolution_corr_min", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms1_deconvolution_corr_min", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.8"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms2_dda_rt_tolerance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms2_dda_rt_tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.15"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms2_dda_mz_tolerance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms2_dda_mz_tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.05"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms2_min_fe_score", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms2_min_fe_score", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.2"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.search_as_lipids", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.search_as_lipids", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.include_fragment_types", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.include_fragment_types", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.export_profile_spectra", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.export_profile_spectra", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.export_eics", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.export_eics", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.export_unprocessed_ms1", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.export_unprocessed_ms1", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.verbose_processing", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.verbose_processing", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting", "kind": "class", "doc": "

    Mass spectrum processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • noise_threshold_method (str, optional):\nMethod for detecting noise threshold. Default is 'log'.
    • \n
    • noise_threshold_methods_implemented (tuple, optional):\nMethods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
    • \n
    • noise_threshold_min_std (int, optional):\nMinumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
    • \n
    • noise_threshold_min_s2n (float, optional):\nMinimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
    • \n
    • noise_threshold_min_relative_abundance (float, optional):\nMinimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
    • \n
    • noise_threshold_absolute_abundance (float, optional):\nMinimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
    • \n
    • noise_threshold_log_nsigma (int, optional):\nNumber of standard deviations to use when using 'log' noise threshold method. Default is 6.
    • \n
    • noise_threshold_log_nsigma_corr_factor (float, optional):\nCorrection factor for log noise threshold method. Default is 0.463.
    • \n
    • noise_threshold_log_nsigma_bins (int, optional):\nNumber of bins to use for histogram when using 'log' noise threshold method. Default is 500.
    • \n
    • noise_min_mz (float, optional):\nMinimum m/z to use for noise thresholding. Default is 50.0.
    • \n
    • noise_max_mz (float, optional):\nMaximum m/z to use for noise thresholding. Default is 1200.0.
    • \n
    • min_picking_mz (float, optional):\nMinimum m/z to use for peak picking. Default is 50.0.
    • \n
    • max_picking_mz (float, optional):\nMaximum m/z to use for peak picking. Default is 1200.0.
    • \n
    • picking_point_extrapolate (int, optional):\nHow many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.\nRecommend 3 for reduced profile data or if peak picking faults
    • \n
    • calib_minimize_method (str, optional):\nMinimization method to use for calibration. Default is 'Powell'.
    • \n
    • calib_pol_order (int, optional):\nPolynomial order to use for calibration. Default is 2.
    • \n
    • max_calib_ppm_error (float, optional):\nMaximum ppm error to use for calibration. Default is 1.0.
    • \n
    • min_calib_ppm_error (float, optional):\nMinimum ppm error to use for calibration. Default is -1.0.
    • \n
    • calib_sn_threshold (float, optional):\nSignal to noise threshold to use for calibration. Default is 2.0.
    • \n
    • calibration_ref_match_method (string, optional):\nMethod for matching reference masses with measured masses for recalibration. Default is 'legacy'.
    • \n
    • calibration_ref_match_tolerance (float, optional):\nIf using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
    • \n
    • do_calibration (bool, optional):\nIf True, perform calibration. Default is True.
    • \n
    • verbose_processing (bool, optional):\nIf True, print verbose processing information. Default is True.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tnoise_threshold_method: str = 'log',\tnoise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'),\tnoise_threshold_min_std: int = 6,\tnoise_threshold_min_s2n: float = 4,\tnoise_threshold_min_relative_abundance: float = 6,\tnoise_threshold_absolute_abundance: float = 1000000,\tnoise_threshold_log_nsigma: int = 6,\tnoise_threshold_log_nsigma_corr_factor: float = 0.463,\tnoise_threshold_log_nsigma_bins: int = 500,\tnoise_min_mz: float = 50.0,\tnoise_max_mz: float = 1200.0,\tmin_picking_mz: float = 50.0,\tmax_picking_mz: float = 1200.0,\tpicking_point_extrapolate: int = 3,\tcalib_minimize_method: str = 'Powell',\tcalib_pol_order: int = 2,\tmax_calib_ppm_error: float = 1.0,\tmin_calib_ppm_error: float = -1.0,\tcalib_sn_threshold: float = 2.0,\tcalibration_ref_match_method: str = 'legacy',\tcalibration_ref_match_method_implemented: tuple = ('legacy', 'merged'),\tcalibration_ref_match_tolerance: float = 0.003,\tcalibration_ref_match_std_raw_error_limit: float = 1.5,\tdo_calibration: bool = True,\tverbose_processing: bool = True)"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'log'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_methods_implemented", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_methods_implemented", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_min_std", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_min_std", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "6"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_min_s2n", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_min_s2n", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "4"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_min_relative_abundance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_min_relative_abundance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "6"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_absolute_abundance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_absolute_abundance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1000000"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_log_nsigma", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_log_nsigma", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "6"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_log_nsigma_corr_factor", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_log_nsigma_corr_factor", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.463"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_log_nsigma_bins", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_log_nsigma_bins", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "500"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_min_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_min_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "50.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_max_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_max_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1200.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.min_picking_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.min_picking_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "50.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.max_picking_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.max_picking_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1200.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.picking_point_extrapolate", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.picking_point_extrapolate", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calib_minimize_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calib_minimize_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'Powell'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calib_pol_order", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calib_pol_order", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "2"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.max_calib_ppm_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.max_calib_ppm_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.min_calib_ppm_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.min_calib_ppm_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "-1.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calib_sn_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calib_sn_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "2.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calibration_ref_match_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calibration_ref_match_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'legacy'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calibration_ref_match_method_implemented", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calibration_ref_match_method_implemented", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('legacy', 'merged')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calibration_ref_match_tolerance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calibration_ref_match_tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.003"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calibration_ref_match_std_raw_error_limit", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calibration_ref_match_std_raw_error_limit", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.5"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.do_calibration", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.do_calibration", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.verbose_processing", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.verbose_processing", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting", "kind": "class", "doc": "

    Mass spectrum peak processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • kendrick_base (Dict, optional):\nDictionary specifying the elements and their counts in the Kendrick base.\nDefaults to {'C': 1, 'H': 2}.
    • \n
    • kendrick_rounding_method (str, optional):\nMethod for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.\nDefaults to 'floor'.
    • \n
    • implemented_kendrick_rounding_methods (tuple):\nTuple of valid rounding methods for calculating the nominal Kendrick mass.\nDefaults to ('floor', 'ceil', 'round').
    • \n
    • peak_derivative_threshold (float, optional):\nThreshold for defining derivative crossing. Should be a value between 0 and 1.\nDefaults to 0.0.
    • \n
    • peak_min_prominence_percent (float, optional):\nMinimum prominence percentage used for peak detection. Should be a value between 1 and 100.\nDefaults to 0.1.
    • \n
    • min_peak_datapoints (float, optional):\nMinimum number of data points used for peak detection. Should be a value between 0 and infinity.\nDefaults to 5.
    • \n
    • peak_max_prominence_percent (float, optional):\nMaximum prominence percentage used for baseline detection. Should be a value between 1 and 100.\nDefaults to 0.1.
    • \n
    • peak_height_max_percent (float, optional):\nMaximum height percentage used for baseline detection. Should be a value between 1 and 100.\nDefaults to 10.
    • \n
    • legacy_resolving_power (bool, optional):\nFlag indicating whether to use the legacy (CoreMS v1) resolving power calculation.\nDefaults to True.
    • \n
    • legacy_centroid_polyfit (bool, optional):\nUse legacy (numpy polyfit) to fit centroid\nDefault false.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tkendrick_base: Dict = <factory>,\tkendrick_rounding_method: str = 'floor',\timplemented_kendrick_rounding_methods: tuple = ('floor', 'ceil', 'round'),\tpeak_derivative_threshold: float = 0.0,\tpeak_min_prominence_percent: float = 0.1,\tmin_peak_datapoints: float = 5,\tpeak_max_prominence_percent: float = 0.1,\tpeak_height_max_percent: float = 10,\tlegacy_resolving_power: bool = True,\tlegacy_centroid_polyfit: bool = False)"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.kendrick_base", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.kendrick_base", "kind": "variable", "doc": "

    \n", "annotation": ": Dict"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.kendrick_rounding_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.kendrick_rounding_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'floor'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.implemented_kendrick_rounding_methods", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.implemented_kendrick_rounding_methods", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('floor', 'ceil', 'round')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.peak_derivative_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.peak_derivative_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.peak_min_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.peak_min_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.min_peak_datapoints", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.min_peak_datapoints", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.peak_max_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.peak_max_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.peak_height_max_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.peak_height_max_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "10"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.legacy_resolving_power", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.legacy_resolving_power", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.legacy_centroid_polyfit", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.legacy_centroid_polyfit", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting", "kind": "class", "doc": "

    Gas chromatograph processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • use_deconvolution (bool, optional):\nIf True, use deconvolution. Default is False.
    • \n
    • implemented_smooth_method (tuple, optional):\nSmoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
    • \n
    • smooth_window (int, optional):\nWindow size for smoothing the ion chromatogram. Default is 5.
    • \n
    • smooth_method (str, optional):\nSmoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
    • \n
    • savgol_pol_order (int, optional):\nPolynomial order for Savitzky-Golay smoothing. Default is 2.
    • \n
    • peak_derivative_threshold (float, optional):\nThreshold for defining derivative crossing. Should be a value between 0 and 1.\nDefaults to 0.0005.
    • \n
    • peak_height_max_percent (float, optional):\nMaximum height percentage used for baseline detection. Should be a value between 1 and 100.\nDefaults to 10.
    • \n
    • peak_max_prominence_percent (float, optional):\nMaximum prominence percentage used for baseline detection. Should be a value between 1 and 100.\nDefaults to 1.
    • \n
    • min_peak_datapoints (float, optional):\nMinimum number of data points used for peak detection. Should be a value between 0 and infinity.\nDefaults to 5.
    • \n
    • max_peak_width (float, optional):\nMaximum peak width used for peak detection. Should be a value between 0 and infinity.\nDefaults to 0.1.
    • \n
    • noise_threshold_method (str, optional):\nMethod for detecting noise threshold. Default is 'manual_relative_abundance'.
    • \n
    • noise_threshold_methods_implemented (tuple, optional):\nMethods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
    • \n
    • std_noise_threshold (int, optional):\nDefault is 3.
    • \n
    • peak_height_min_percent (float, optional):\n0-100 % used for peak detection. Default is 0.1.
    • \n
    • peak_min_prominence_percent (float, optional):\n0-100 % used for peak detection. Default is 0.1.
    • \n
    • eic_signal_threshold (float, optional):\n0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
    • \n
    • max_rt_distance (float, optional):\nMaximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
    • \n
    • verbose_processing (bool, optional):\nIf True, print verbose processing information. Default is True.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tuse_deconvolution: bool = False,\timplemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'),\tsmooth_window: int = 5,\tsmooth_method: str = 'savgol',\tsavgol_pol_order: int = 2,\tpeak_derivative_threshold: float = 0.0005,\tpeak_height_max_percent: float = 10,\tpeak_max_prominence_percent: float = 1,\tmin_peak_datapoints: float = 5,\tmax_peak_width: float = 0.1,\tnoise_threshold_method: str = 'manual_relative_abundance',\tnoise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'),\tstd_noise_threshold: int = 3,\tpeak_height_min_percent: float = 0.1,\tpeak_min_prominence_percent: float = 0.1,\teic_signal_threshold: float = 0.01,\tmax_rt_distance: float = 0.025,\tverbose_processing: bool = True)"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.use_deconvolution", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.use_deconvolution", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.implemented_smooth_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.implemented_smooth_method", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.smooth_window", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.smooth_window", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.smooth_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.smooth_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'savgol'"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.savgol_pol_order", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.savgol_pol_order", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "2"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_derivative_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_derivative_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0005"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_height_max_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_height_max_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "10"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_max_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_max_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.min_peak_datapoints", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.min_peak_datapoints", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.max_peak_width", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.max_peak_width", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.noise_threshold_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.noise_threshold_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'manual_relative_abundance'"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.noise_threshold_methods_implemented", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.noise_threshold_methods_implemented", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.std_noise_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.std_noise_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_height_min_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_height_min_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_min_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_min_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.eic_signal_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.eic_signal_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.01"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.max_rt_distance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.max_rt_distance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.025"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.verbose_processing", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.verbose_processing", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings", "kind": "class", "doc": "

    Settings for compound search

    \n\n
    Attributes
    \n\n
      \n
    • url_database (str, optional):\nURL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
    • \n
    • ri_search_range (float, optional):\nRetention index search range. Default is 35.
    • \n
    • rt_search_range (float, optional):\nRetention time search range, in minutes. Default is 1.0.
    • \n
    • correlation_threshold (float, optional):\nThreshold for correlation for spectral similarity. Default is 0.5.
    • \n
    • score_threshold (float, optional):\nThreshold for compsite score. Default is 0.0.
    • \n
    • ri_spacing (float, optional):\nRetention index spacing. Default is 200.
    • \n
    • ri_std (float, optional):\nRetention index standard deviation. Default is 3.
    • \n
    • ri_calibration_compound_names (list, optional):\nList of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.__init__", "kind": "function", "doc": "

    \n", "signature": "(\turl_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres',\tri_search_range: float = 35,\trt_search_range: float = 1.0,\tcorrelation_threshold: float = 0.5,\tscore_threshold: float = 0.0,\tri_spacing: float = 200,\tri_std: float = 3,\tri_calibration_compound_names: List = <factory>,\texploratory_mode: bool = False,\tscore_methods: tuple = ('highest_sim_score', 'highest_ss'),\toutput_score_method: str = 'All')"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.url_database", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.url_database", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres'"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.ri_search_range", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.ri_search_range", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "35"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.rt_search_range", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.rt_search_range", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.correlation_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.correlation_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.5"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.score_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.score_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.ri_spacing", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.ri_spacing", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "200"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.ri_std", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.ri_std", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.ri_calibration_compound_names", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.ri_calibration_compound_names", "kind": "variable", "doc": "

    \n", "annotation": ": List"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.exploratory_mode", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.exploratory_mode", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.score_methods", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.score_methods", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('highest_sim_score', 'highest_ss')"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.output_score_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.output_score_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'All'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings", "kind": "class", "doc": "

    Settings for molecular searching

    \n\n

    These are used to generate the database entries, do not change.

    \n\n
    Attributes
    \n\n
      \n
    • usedAtoms (dict, optional):\nDictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
    • \n
    • min_mz (float, optional):\nMinimum m/z to use for searching. Default is 50.0.
    • \n
    • max_mz (float, optional):\nMaximum m/z to use for searching. Default is 1200.0.
    • \n
    • min_dbe (float, optional):\nMinimum double bond equivalent to use for searching. Default is 0.
    • \n
    • max_dbe (float, optional):\nMaximum double bond equivalent to use for searching. Default is 50.
    • \n
    • use_pah_line_rule (bool, optional):\nIf True, use the PAH line rule. Default is False.
    • \n
    • isRadical (bool, optional):\nIf True, search for radical ions. Default is True.
    • \n
    • isProtonated (bool, optional):\nIf True, search for protonated ions. Default is True.
    • \n
    • url_database (str, optional):\nURL for the database. Default is None.
    • \n
    • db_jobs (int, optional):\nNumber of jobs to use for database queries. Default is 1.
    • \n
    • used_atom_valences (dict, optional):\nDictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.usedAtoms", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.usedAtoms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.min_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.min_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.max_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.max_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.min_dbe", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.min_dbe", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.max_dbe", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.max_dbe", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.use_pah_line_rule", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.use_pah_line_rule", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.isRadical", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.isRadical", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.isProtonated", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.isProtonated", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.url_database", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.url_database", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.db_jobs", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.db_jobs", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.used_atom_valences", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.used_atom_valences", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings", "kind": "class", "doc": "

    Settings for molecular searching

    \n\n
    Attributes
    \n\n
      \n
    • use_isotopologue_filter (bool, optional):\nIf True, use isotopologue filter. Default is False.
    • \n
    • isotopologue_filter_threshold (float, optional):\nThreshold for isotopologue filter. Default is 33.
    • \n
    • isotopologue_filter_atoms (tuple, optional):\nTuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
    • \n
    • use_runtime_kendrick_filter (bool, optional):\nIf True, use runtime Kendrick filter. Default is False.
    • \n
    • use_min_peaks_filter (bool, optional):\nIf True, use minimum peaks filter. Default is True.
    • \n
    • min_peaks_per_class (int, optional):\nMinimum number of peaks per class. Default is 15.
    • \n
    • url_database (str, optional):\nURL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
    • \n
    • db_jobs (int, optional):\nNumber of jobs to use for database queries. Default is 3.
    • \n
    • db_chunk_size (int, optional):\nChunk size to use for database queries. Default is 300.
    • \n
    • ion_charge (int, optional):\nIon charge. Default is -1.
    • \n
    • min_hc_filter (float, optional):\nMinimum hydrogen to carbon ratio. Default is 0.3.
    • \n
    • max_hc_filter (float, optional):\nMaximum hydrogen to carbon ratio. Default is 3.
    • \n
    • min_oc_filter (float, optional):\nMinimum oxygen to carbon ratio. Default is 0.0.
    • \n
    • max_oc_filter (float, optional):\nMaximum oxygen to carbon ratio. Default is 1.2.
    • \n
    • min_op_filter (float, optional):\nMinimum oxygen to phosphorous ratio. Default is 2.
    • \n
    • use_pah_line_rule (bool, optional):\nIf True, use the PAH line rule. Default is False.
    • \n
    • min_dbe (float, optional):\nMinimum double bond equivalent to use for searching. Default is 0.
    • \n
    • max_dbe (float, optional):\nMaximum double bond equivalent to use for searching. Default is 40.
    • \n
    • mz_error_score_weight (float, optional):\nWeight for m/z error score to contribute to composite score. Default is 0.6.
    • \n
    • isotopologue_score_weight (float, optional):\nWeight for isotopologue score to contribute to composite score. Default is 0.4.
    • \n
    • adduct_atoms_neg (tuple, optional):\nTuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
    • \n
    • adduct_atoms_pos (tuple, optional):\nTuple of atoms to use in positive polarity. Default is ('Na', 'K').
    • \n
    • score_methods (tuple, optional):\nTuple of score method that can be implemented. \nDefault is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
    • \n
    • score_method (str, optional):\nScore method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
    • \n
    • output_min_score (float, optional):\nMinimum score for output. Default is 0.1.
    • \n
    • output_score_method (str, optional):\nScore method to use for output. Default is 'All Candidates'.
    • \n
    • isRadical (bool, optional):\nIf True, search for radical ions. Default is False.
    • \n
    • isProtonated (bool, optional):\nIf True, search for protonated ions. Default is True.
    • \n
    • isAdduct (bool, optional):\nIf True, search for adduct ions. Default is False.
    • \n
    • usedAtoms (dict, optional):\nDictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
    • \n
    • ion_types_excluded (list, optional):\nList of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
    • \n
    • ionization_type (str, optional):\nIonization type. Default is 'ESI'.
    • \n
    • min_ppm_error (float, optional):\nMinimum ppm error. Default is -10.0.
    • \n
    • max_ppm_error (float, optional):\nMaximum ppm error. Default is 10.0.
    • \n
    • min_abun_error (float, optional):\nMinimum abundance error for isotolopologue search. Default is -100.0.
    • \n
    • max_abun_error (float, optional):\nMaximum abundance error for isotolopologue search. Default is 100.0.
    • \n
    • mz_error_range (float, optional):\nm/z error range. Default is 1.5.
    • \n
    • error_method (str, optional):\nError method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
    • \n
    • mz_error_average (float, optional):\nm/z error average. Default is 0.0.
    • \n
    • used_atom_valences (dict, optional):\nDictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tuse_isotopologue_filter: bool = False,\tisotopologue_filter_threshold: float = 33,\tisotopologue_filter_atoms: tuple = ('Cl', 'Br'),\tuse_runtime_kendrick_filter: bool = False,\tuse_min_peaks_filter: bool = True,\tmin_peaks_per_class: int = 15,\turl_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp',\tdb_jobs: int = 3,\tdb_chunk_size: int = 300,\tion_charge: int = -1,\tmin_hc_filter: float = 0.3,\tmax_hc_filter: float = 3,\tmin_oc_filter: float = 0.0,\tmax_oc_filter: float = 1.2,\tmin_op_filter: float = 2,\tuse_pah_line_rule: bool = False,\tmin_dbe: float = 0,\tmax_dbe: float = 40,\tmz_error_score_weight: float = 0.6,\tisotopologue_score_weight: float = 0.4,\tadduct_atoms_neg: tuple = ('Cl', 'Br'),\tadduct_atoms_pos: tuple = ('Na', 'K'),\tscore_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'),\tscore_method: str = 'prob_score',\toutput_min_score: float = 0.1,\toutput_score_method: str = 'All Candidates',\tisRadical: bool = False,\tisProtonated: bool = True,\tisAdduct: bool = False,\tusedAtoms: dict = <factory>,\tion_types_excluded: list = <factory>,\tionization_type: str = 'ESI',\tmin_ppm_error: float = -10.0,\tmax_ppm_error: float = 10.0,\tmin_abun_error: float = -100.0,\tmax_abun_error: float = 100.0,\tmz_error_range: float = 1.5,\terror_method: str = 'None',\tmz_error_average: float = 0.0,\tused_atom_valences: dict = <factory>)"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.use_isotopologue_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.use_isotopologue_filter", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isotopologue_filter_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isotopologue_filter_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "33"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isotopologue_filter_atoms", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isotopologue_filter_atoms", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('Cl', 'Br')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.use_runtime_kendrick_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.use_runtime_kendrick_filter", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.use_min_peaks_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.use_min_peaks_filter", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_peaks_per_class", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_peaks_per_class", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "15"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.url_database", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.url_database", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.db_jobs", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.db_jobs", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.db_chunk_size", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.db_chunk_size", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "300"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.ion_charge", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.ion_charge", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "-1"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_hc_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_hc_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.3"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_hc_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_hc_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_oc_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_oc_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_oc_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_oc_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.2"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_op_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_op_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "2"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.use_pah_line_rule", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.use_pah_line_rule", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_dbe", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_dbe", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_dbe", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_dbe", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "40"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.mz_error_score_weight", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.mz_error_score_weight", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.6"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isotopologue_score_weight", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isotopologue_score_weight", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.4"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.adduct_atoms_neg", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.adduct_atoms_neg", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('Cl', 'Br')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.adduct_atoms_pos", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.adduct_atoms_pos", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('Na', 'K')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.score_methods", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.score_methods", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.score_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.score_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'prob_score'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.output_min_score", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.output_min_score", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.output_score_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.output_score_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'All Candidates'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isRadical", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isRadical", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isProtonated", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isProtonated", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isAdduct", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isAdduct", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.usedAtoms", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.usedAtoms", "kind": "variable", "doc": "

    \n", "annotation": ": dict"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.ion_types_excluded", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.ion_types_excluded", "kind": "variable", "doc": "

    \n", "annotation": ": list"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.ionization_type", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.ionization_type", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'ESI'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_ppm_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_ppm_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "-10.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_ppm_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_ppm_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "10.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_abun_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_abun_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "-100.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_abun_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_abun_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "100.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.mz_error_range", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.mz_error_range", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.5"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.error_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.error_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'None'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.mz_error_average", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.mz_error_average", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.used_atom_valences", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.used_atom_valences", "kind": "variable", "doc": "

    \n", "annotation": ": dict"}, {"fullname": "corems.encapsulation.input", "modulename": "corems.encapsulation.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.input.parameter_from_json", "modulename": "corems.encapsulation.input.parameter_from_json", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_toml_parameters_ms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_toml_parameters_ms", "kind": "function", "doc": "

    Load parameters from a toml file and set the parameters in the mass_spec_obj

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec_obj (MassSpectrum):\ncorems MassSpectrum object
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(mass_spec_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_parameters_ms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_parameters_ms", "kind": "function", "doc": "

    Load parameters from a json file and set the parameters in the mass_spec_obj

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec_obj (MassSpectrum):\ncorems MassSpectrum object
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(mass_spec_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_toml_parameters_gcms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_toml_parameters_gcms", "kind": "function", "doc": "

    Load parameters from a toml file and set the parameters in the GCMS object

    \n\n
    Parameters
    \n\n
      \n
    • gcms_obj (GCMSBase):\ncorems GCMSBase object
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(gcms_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_parameters_gcms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_parameters_gcms", "kind": "function", "doc": "

    Load parameters from a json file and set the parameters in the GCMS object

    \n\n
    Parameters
    \n\n
      \n
    • gcms_obj (GCMSBase):\ncorems GCMSBase object
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(gcms_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_json_parameters_lcms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_json_parameters_lcms", "kind": "function", "doc": "

    Load parameters from a json file and set the parameters in the LCMS object

    \n\n
    Parameters
    \n\n
      \n
    • lcms_obj (LCMSBase):\ncorems LCMSBase object
    • \n
    • parameters_path (str):\npath to the parameters file saved as a .json, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(lcms_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_toml_parameters_lcms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_toml_parameters_lcms", "kind": "function", "doc": "

    Load parameters from a toml file and set the parameters in the LCMS object

    \n\n
    Parameters
    \n\n
      \n
    • lcms_obj (LCMSBase):\ncorems LCMSBase object
    • \n
    • parameters_path (str):\npath to the parameters file saved as a .toml, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(lcms_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_toml_parameters_class", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_toml_parameters_class", "kind": "function", "doc": "

    Load parameters from a toml file and set the parameters in the instance_parameters_class

    \n\n
    Parameters
    \n\n
      \n
    • parameter_label (str):\nlabel of the parameters in the toml file
    • \n
    • instance_parameters_class (object):\ninstance of the parameters class
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n\n
    Returns
    \n\n
      \n
    • object: instance of the parameters class
    • \n
    \n", "signature": "(parameter_label, instance_parameters_class, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_parameters_class", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_parameters_class", "kind": "function", "doc": "

    Load parameters from a json file and set the parameters in the instance_parameters_class

    \n\n
    Parameters
    \n\n
      \n
    • parameter_label (str):\nlabel of the parameters in the json file
    • \n
    • instance_parameters_class (object):\ninstance of the parameters class
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n\n
    Returns
    \n\n
      \n
    • object: instance of the parameters class
    • \n
    \n", "signature": "(parameter_label, instance_parameters_class, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output", "modulename": "corems.encapsulation.output", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.output.parameter_to_dict", "modulename": "corems.encapsulation.output.parameter_to_dict", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_all_default_data", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_all_default_data", "kind": "function", "doc": "

    Return a dictionary with all default parameters for MS and GCMS

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_data_lcms", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_data_lcms", "kind": "function", "doc": "

    Return a dictionary with all parameters for LCMSBase object

    \n\n
    Parameters
    \n\n
      \n
    • lcms_obj (LCMSBase):\nLCMSBase object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: dictionary with all parameters for LCMSBase object
    • \n
    \n", "signature": "(lcms_obj):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_lcms_default_data", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_lcms_default_data", "kind": "function", "doc": "

    Return a dictionary with all default parameters for LCMS

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_data_ms", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_data_ms", "kind": "function", "doc": "

    Return a dictionary with all parameters for MassSpectrum object

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec (MassSpectrum):\nMassSpectrum object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: dictionary with all parameters for MassSpectrum object
    • \n
    \n", "signature": "(mass_spec):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_ms_default_data", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_ms_default_data", "kind": "function", "doc": "

    Return a dictionary with all default parameters for MS including data input

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_gcms_default_data", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_gcms_default_data", "kind": "function", "doc": "

    Return a dictionary with all default parameters for GCMS

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_data_gcms", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_data_gcms", "kind": "function", "doc": "

    Return a dictionary with all parameters for GCMS

    \n", "signature": "(gcms):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json", "modulename": "corems.encapsulation.output.parameter_to_json", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_all_settings_json", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_all_settings_json", "kind": "function", "doc": "

    Write JSON file into current directory with all the default settings for the CoreMS package.

    \n\n

    Parameters:

    \n\n

    filename : str, optional\n The name of the JSON file to be created. Default is 'SettingsCoreMS.json'.\nfile_path : str or Path, optional\n The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.

    \n", "signature": "(filename='SettingsCoreMS.json', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_ms_settings_json", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_ms_settings_json", "kind": "function", "doc": "

    Write JSON file into current directory with all the mass spectrum default settings for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    • \n
    • file_path (str or Path, optional):\nThe path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.json', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_gcms_settings_json", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_gcms_settings_json", "kind": "function", "doc": "

    Write JSON file into current directory containing the default GCMS settings data.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    • \n
    • file_path (str or Path-like object, optional):\nThe path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.json', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_all_settings_toml", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_all_settings_toml", "kind": "function", "doc": "

    Write TOML file into the specified file path or the current directory with all the default settings for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    • \n
    • file_path (str or Path, optional):\nThe path where the TOML file will be saved. If not provided, the file will be saved in the current directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.toml', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_ms_settings_toml", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_ms_settings_toml", "kind": "function", "doc": "

    Write TOML file into the current directory with all the mass spectrum default settings for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the TOML file to be created. Default is 'SettingsCoreMS.toml'.
    • \n
    • file_path (str or Path, optional):\nThe path where the TOML file should be saved. If not provided, the file will be saved in the current working directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.toml', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_gcms_settings_toml", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_gcms_settings_toml", "kind": "function", "doc": "

    Write TOML file into current directory containing the default GCMS settings data.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    • \n
    • file_path (str or Path, optional):\nThe path where the TOML file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.toml', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_lcms_settings_json", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_lcms_settings_json", "kind": "function", "doc": "

    Write JSON file into current directory with all the LCMS settings data for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the JSON file. Defaults to 'SettingsCoreMS.json'.
    • \n
    • file_path (str or Path, optional):\nThe path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    • lcms_obj (object, optional):\nThe LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.json', file_path=None, lcms_obj=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_lcms_settings_toml", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_lcms_settings_toml", "kind": "function", "doc": "

    Write TOML file into current directory with all the LCMS settings data for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    • \n
    • file_path (str or Path, optional):\nThe path where the TOML file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    • lcms_obj (object, optional):\nThe LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.toml', file_path=None, lcms_obj=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra", "modulename": "corems.mass_spectra", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc", "modulename": "corems.mass_spectra.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Calc", "modulename": "corems.mass_spectra.calc.GC_Calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations", "kind": "class", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations.calibrate_ri", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations.calibrate_ri", "kind": "function", "doc": "

    \n", "signature": "(self, ref_dict, cal_file_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations.smooth_tic", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations.smooth_tic", "kind": "function", "doc": "

    \n", "signature": "(self, tic):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations.centroid_detector", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations.centroid_detector", "kind": "function", "doc": "

    \n", "signature": "(self, tic, rt):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations.remove_outliers", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations.remove_outliers", "kind": "function", "doc": "

    \n", "signature": "(self, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution", "kind": "class", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.run_deconvolution", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.run_deconvolution", "kind": "function", "doc": "

    \n", "signature": "(self, plot_res=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.centroid_detector", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.centroid_detector", "kind": "function", "doc": "

    this function has been replaced with sp.peak_picking_first_derivative\nand it not used

    \n", "signature": "(self, tic, rt):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.ion_extracted_chroma", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.ion_extracted_chroma", "kind": "function", "doc": "

    \n", "signature": "(self, mass_spectra_obj):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.hc", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.hc", "kind": "function", "doc": "

    \n", "signature": "(self, X, Y, max_rt_distance=0.025):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.find_peaks_entity", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.find_peaks_entity", "kind": "function", "doc": "

    combine eic with mathing rt apexes

    \n", "signature": "(self, eic_dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.mass_spec_factory", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.mass_spec_factory", "kind": "function", "doc": "

    \n", "signature": "(self, rt, datadict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.smooth_signal", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.smooth_signal", "kind": "function", "doc": "

    \n", "signature": "(self, signal):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.add_gcpeak", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.add_gcpeak", "kind": "function", "doc": "

    \n", "signature": "(\tself,\tnew_apex_index,\tstart_rt,\tfinal_rt,\tpeak_rt,\tsmoothed_tic,\tdatadict,\tplot_res):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.deconvolution", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.deconvolution", "kind": "function", "doc": "

    \n", "signature": "(self, peaks_entity_data, plot_res):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.quadratic_interpolation", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.quadratic_interpolation", "kind": "function", "doc": "

    \n", "signature": "(self, rt_list, tic_list, apex_index):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_RI_Calibration", "modulename": "corems.mass_spectra.calc.GC_RI_Calibration", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_RI_Calibration.get_rt_ri_pairs", "modulename": "corems.mass_spectra.calc.GC_RI_Calibration", "qualname": "get_rt_ri_pairs", "kind": "function", "doc": "

    \n", "signature": "(gcms_ref_obj, sql_obj=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch", "modulename": "corems.mass_spectra.calc.MZSearch", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults", "kind": "class", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.__init__", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.__init__", "kind": "function", "doc": "

    \n", "signature": "(calculated_mz: float, exp_mz: float, error: float, tolerance: float)"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.calculated_mz", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.calculated_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.exp_mz", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.exp_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.error", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.error", "kind": "variable", "doc": "

    \n", "annotation": ": float"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.tolerance", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch", "kind": "class", "doc": "

    A class that represents a thread of control.

    \n\n

    This class can be safely subclassed in a limited fashion. There are two ways\nto specify the activity: by passing a callable object to the constructor, or\nby overriding the run() method in a subclass.

    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.__init__", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.__init__", "kind": "function", "doc": "
    Parameters
    \n\n
      \n
    • calculated_mzs ([float] calculated m/z):

    • \n
    • exp_mzs ([float] experimental m/z):

    • \n
    • method (string,):\nppm or ppb

    • \n
    • call run to trigger the m/z search algorithm
    • \n
    • or start if using it as thread
    • \n
    \n", "signature": "(\texp_mzs: List[float],\tcalculated_mzs: List[float],\ttolerance,\tmethod='ppm',\taverage_target_mz=True)"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.method", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.method", "kind": "variable", "doc": "

    method: string,\n ppm or ppb

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.results", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.results", "kind": "variable", "doc": "

    {calculated_mz: [SearchResults]}\ncontains the results of the search

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.averaged_target_mz", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.averaged_target_mz", "kind": "variable", "doc": "

    [float]\ncontains the average target m/z to be searched against

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.calculated_mzs", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.calculated_mzs", "kind": "variable", "doc": "

    [float]\ncontains the mz target to be searched against

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.exp_mzs", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.exp_mzs", "kind": "variable", "doc": "

    [float]\ncontains the exp mz to be searched against

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.tolerance", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.tolerance", "kind": "variable", "doc": "

    method: string,\n ppm or ppb

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.colapse_calculated", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.colapse_calculated", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.run", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.run", "kind": "function", "doc": "

    Method representing the thread's activity.

    \n\n

    You may override this method in a subclass. The standard run() method\ninvokes the callable object passed to the object's constructor as the\ntarget argument, if any, with sequential and keyword arguments taken\nfrom the args and kwargs arguments, respectively.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.calc_mz_error", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.calc_mz_error", "kind": "function", "doc": "
    Parameters
    \n\n
      \n
    • calculated_mz (float,):

    • \n
    • exp_mz (float):

    • \n
    • method (string,):\nppm or ppb

    • \n
    \n", "signature": "(calculated_mz, exp_mz, method='ppm'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.check_ppm_error", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.check_ppm_error", "kind": "function", "doc": "

    \n", "signature": "(tolerance, error):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.get_nominal_exp", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.get_nominal_exp", "kind": "function", "doc": "

    \n", "signature": "(self, exp_mzs) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.search_mz", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.search_mz", "kind": "function", "doc": "

    \n", "signature": "(self, results, dict_nominal_exp_mz, calculated_mz, offset) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing", "modulename": "corems.mass_spectra.calc.SignalProcessing", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.peak_detector", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "peak_detector", "kind": "function", "doc": "

    Find peaks by detecting minima in the first derivative of the data\nUsed in LC/GC data processing

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • max_tic (float):\nmaximum value of the data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: tuple of indexes of the start, apex and final points of the peak
    • \n
    \n", "signature": "(tic, max_tic):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.find_nearest_scan", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "find_nearest_scan", "kind": "function", "doc": "

    Find nearest data point in a list of nodes (derivated data)\nin LC/GC this is 'scan', in MS this is 'm/z' data point

    \n\n
    Parameters
    \n\n
      \n
    • data (float):\ndata point to find the nearest node
    • \n
    • nodes (array):\narray of nodes to search for the nearest node
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: nearest node to the data point
    • \n
    \n", "signature": "(data, nodes):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.check_corrected_abundance", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "check_corrected_abundance", "kind": "function", "doc": "

    Check the corrected abundance of the peak

    \n\n
    Parameters
    \n\n
      \n
    • closest_left (int):\nindex of the closest left node
    • \n
    • closest_right (int):\nindex of the closest right node
    • \n
    • apex_index (int):\nindex of the apex node
    • \n
    • signal (array):\narray of data points to find the peaks
    • \n
    • max_signal (float):\nmaximum value of the data points
    • \n
    • signal_threshold (float):\nthreshold for the signal
    • \n
    • abun_norm (float):\nabundance normalization factor
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: corrected abundance of the peak
    • \n
    \n", "signature": "(\tclosest_left,\tclosest_right,\tapex_index,\tsignal,\tmax_signal,\tsignal_threshold,\tabun_norm):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.peak_picking_first_derivative", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "peak_picking_first_derivative", "kind": "function", "doc": "

    Find peaks by detecting minima in the first derivative of the data\nUsed in LC/GC and MS data processing\nOptional baseline correction, then peak apex detection via 1st derivative.\nFor each apex the peak datapoints surrounding the apex are determined. \nSome basic thresholding is applied (signal, number of datapoints, etc).

    \n\n
    Parameters
    \n\n
      \n
    • domain (array):\narray of data points to find the peaks
    • \n
    • signal (array):\narray of data points to find the peaks
    • \n
    • max_height (float):\nmaximum height of the peak
    • \n
    • max_prominence (float):\nmaximum prominence of the peak
    • \n
    • max_signal (float):\nmaximum signal of the peak
    • \n
    • min_peak_datapoints (int):\nminimum number of data points in the peak
    • \n
    • peak_derivative_threshold (float):\nthreshold for the peak derivative
    • \n
    • signal_threshold (float):\nthreshold for the signal
    • \n
    • correct_baseline (bool):\nflag to correct the baseline
    • \n
    • plot_res (bool):\nflag to plot the results
    • \n
    • abun_norm (float):\nabundance normalization factor
    • \n
    • check_abundance (bool):\nflag to check the abundance
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: tuple of indexes of the start, apex and final points of the peak
    • \n
    \n", "signature": "(\tdomain,\tsignal,\tmax_height,\tmax_prominence,\tmax_signal,\tmin_peak_datapoints,\tpeak_derivative_threshold,\tsignal_threshold=0.1,\tcorrect_baseline=True,\tplot_res=False,\tabun_norm=100,\tcheck_abundance=False,\tapex_indexes=[]):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.find_minima", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "find_minima", "kind": "function", "doc": "

    Find the index of the local minima in the given time-of-flight (TOF) intensity array.

    \n\n

    Parameters:

    \n\n

    index: int \n The starting index to search for the minima.\ntic: list\n TIC data points\nright : bool, optional\n Determines the direction of the search. If True, search to the right of the index. If False, search to the left of the index. Default is True.

    \n\n

    Returns:

    \n\n

    int\n The index of the local minima in the TIC array.

    \n", "signature": "(index, tic, right=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.derivate", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "derivate", "kind": "function", "doc": "

    Calculate derivative of the data points. \nReplaces nan with infinity

    \n\n
    Parameters
    \n\n
      \n
    • data_array (array):\narray of data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • array: array of the derivative of the data points
    • \n
    \n", "signature": "(data_array):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.minima_detector", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "minima_detector", "kind": "function", "doc": "

    Minima detector for the TIC data points.

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • max_tic (float):\nmaximum value of the data points
    • \n
    • peak_height_max_percent (float):\nmaximum height of the peak
    • \n
    • peak_max_prominence_percent (float):\nmaximum prominence of the peak
    • \n
    \n\n
    Returns
    \n\n
      \n
    • generator: generator of the indexes of the minima in the TIC array
    • \n
    \n", "signature": "(tic, max_tic, peak_height_max_percent, peak_max_prominence_percent):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.baseline_detector", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "baseline_detector", "kind": "function", "doc": "

    Baseline detector for the TIC data points.\nFor LC/GC data processing

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • rt (array):\narray of retention time data points
    • \n
    • peak_height_max_percent (float):\nmaximum height of the peak
    • \n
    • peak_max_prominence_percent (float):\nmaximum prominence of the peak
    • \n
    • do_interpolation (bool, optional):\nflag to interpolate the data points. Default is True
    • \n
    • Returns
    • \n
    • -------
    • \n
    • array: array of the baseline corrected data points
    • \n
    \n", "signature": "(\ttic,\trt,\tpeak_height_max_percent,\tpeak_max_prominence_percent,\tdo_interpolation=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.peak_detector_generator", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "peak_detector_generator", "kind": "function", "doc": "

    Peak detector generator for the TIC data points.

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • stds (float):\nstandard deviation
    • \n
    • method (str):\nmethod to detect the peaks\nAvailable methods: 'manual_relative_abundance', 'auto_relative_abundance', 'second_derivative'
    • \n
    • rt (array):\narray of retention time data points
    • \n
    • max_height (float):\nmaximum height of the peak
    • \n
    • min_height (float):\nminimum height of the peak
    • \n
    • max_prominence (float):\nmaximum prominence of the peak
    • \n
    • min_datapoints (int):\nminimum number of data points in the peak
    • \n
    \n\n
    Returns
    \n\n
      \n
    • generator: generator of the indexes of the peaks in the TIC array
    • \n
    \n", "signature": "(\ttic,\tstds,\tmethod,\trt,\tmax_height,\tmin_height,\tmax_prominence,\tmin_datapoints):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.smooth_signal", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "smooth_signal", "kind": "function", "doc": "

    Smooth the data using a window with requested size.

    \n\n

    This method is based on the convolution of a scaled window with the signal.\nThe signal is prepared by introducing reflected copies of the signal \n(with the window size) in both ends so that transient parts are minimized\nin the begining and end part of the output signal.

    \n\n
    Parameters
    \n\n
      \n
    • x (array):\nthe input signal
    • \n
    • window_len (int):\nthe dimension of the smoothing window; should be an odd integer
    • \n
    • window (str):\nthe type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
    • \n
    • pol_order (int):\nthe order of the polynomial to fit the data
    • \n
    • implemented_smooth_method (list):\nlist of implemented smoothing methods
    • \n
    \n\n
    Returns
    \n\n

    y: array\n the smoothed signal

    \n\n

    Notes:

    \n\n

    See also: numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve\nscipy.signal.savgol_filter

    \n", "signature": "(x, window_len, window, pol_order, implemented_smooth_method):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.second_derivative_threshold", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "second_derivative_threshold", "kind": "function", "doc": "

    Second derivative threshold for the TIC data points.\nFor LC/GC data processing

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • stds (float):\nstandard deviation
    • \n
    • rt (array):\narray of retention time data points
    • \n
    • peak_height_max_percent (float):\nmaximum height of the peak
    • \n
    \n\n
    Returns
    \n\n
      \n
    • array: array of the indexes of the data points to remove
    • \n
    \n", "signature": "(tic, stds, rt, peak_height_max_percent, peak_max_prominence_percent):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc", "modulename": "corems.mass_spectra.calc.lc_calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.lc_calc.find_closest", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "find_closest", "kind": "function", "doc": "

    Find the index of closest value in A to each value in target.

    \n\n
    Parameters
    \n\n
      \n
    • A (~numpy.array):\nThe array to search (blueprint). A must be sorted.
    • \n
    • target (~numpy.array):\nThe array of values to search for. target must be sorted.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ~numpy.array: The indices of the closest values in A to each value in target.
    • \n
    \n", "signature": "(A, target):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations", "kind": "class", "doc": "

    Methods for performing LC calculations on mass spectra data.

    \n\n
    Notes
    \n\n

    This class is intended to be used as a mixin for the LCMSBase class.

    \n\n
    Methods
    \n\n
      \n
    • get_max_eic(eic_data).\nReturns the maximum EIC value from the given EIC data. A static method.
    • \n
    • smooth_tic(tic).\nSmooths the TIC data using the specified smoothing method and settings.
    • \n
    • eic_centroid_detector(rt, eic, max_eic).\nPerforms EIC centroid detection on the given EIC data.
    • \n
    • find_nearest_scan(rt).\nFinds the nearest scan to the given retention time.
    • \n
    • get_average_mass_spectrum(scan_list, apex_scan, spectrum_mode=\"profile\", ms_level=1, auto_process=True, use_parser=False, perform_checks=True, polarity=None).\nReturns an averaged mass spectrum object.
    • \n
    • find_mass_features(ms_level=1).\nFind regions of interest for a given MS level (default is MS1).
    • \n
    • integrate_mass_features(drop_if_fail=False, ms_level=1).\nIntegrate mass features of interest and extracts EICs.
    • \n
    • find_c13_mass_features().\nEvaluate mass features and mark likely C13 isotopes.
    • \n
    • deconvolute_ms1_mass_features().\nDeconvolute mass features' ms1 mass spectra.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.get_max_eic", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.get_max_eic", "kind": "function", "doc": "

    Returns the maximum EIC value from the given EIC data.

    \n\n
    Notes
    \n\n

    This is a static method.

    \n\n
    Parameters
    \n\n
      \n
    • eic_data (dict):\nA dictionary containing EIC data.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The maximum EIC value.
    • \n
    \n", "signature": "(eic_data: dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.smooth_tic", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.smooth_tic", "kind": "function", "doc": "

    Smooths the TIC or EIC data using the specified smoothing method and settings.

    \n\n
    Parameters
    \n\n
      \n
    • tic (numpy.ndarray):\nThe TIC (or EIC) data to be smoothed.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The smoothed TIC data.
    • \n
    \n", "signature": "(self, tic):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.eic_centroid_detector", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.eic_centroid_detector", "kind": "function", "doc": "

    Performs EIC centroid detection on the given EIC data.

    \n\n
    Parameters
    \n\n
      \n
    • rt (numpy.ndarray):\nThe retention time data.
    • \n
    • eic (numpy.ndarray):\nThe EIC data.
    • \n
    • max_eic (float):\nThe maximum EIC value.
    • \n
    • apex_indexes (list, optional):\nThe apexes of the EIC peaks. Defaults to [], which means that the apexes will be calculated by the function.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The indexes of left, apex, and right limits as a generator.
    • \n
    \n", "signature": "(self, rt, eic, max_eic, apex_indexes=[]):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.find_nearest_scan", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.find_nearest_scan", "kind": "function", "doc": "

    Finds the nearest scan to the given retention time.

    \n\n
    Parameters
    \n\n
      \n
    • rt (float):\nThe retention time (in minutes) to find the nearest scan for.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The scan number of the nearest scan.
    • \n
    \n", "signature": "(self, rt):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.add_peak_metrics", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.add_peak_metrics", "kind": "function", "doc": "

    Add peak metrics to the mass features.

    \n\n

    This function calculates the peak metrics for each mass feature and adds them to the mass feature objects.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.get_average_mass_spectrum", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.get_average_mass_spectrum", "kind": "function", "doc": "

    Returns an averaged mass spectrum object

    \n\n
    Parameters
    \n\n
      \n
    • scan_list (list):\nList of scan numbers to average.
    • \n
    • apex_scan (int):\nNumber of the apex scan
    • \n
    • spectrum_mode (str, optional):\nThe spectrum mode to use. Defaults to \"profile\". Not that only \"profile\" mode is supported for averaging.
    • \n
    • ms_level (int, optional):\nThe MS level to use. Defaults to 1.
    • \n
    • auto_process (bool, optional):\nIf True, the averaged mass spectrum will be auto-processed. Defaults to True.
    • \n
    • use_parser (bool, optional):\nIf True, the mass spectra will be obtained from the parser. Defaults to False.
    • \n
    • perform_checks (bool, optional):\nIf True, the function will check if the data are within the ms_unprocessed dictionary and are the correct mode. Defaults to True. Only set to False if you are sure the data are profile, and (if not using the parser) are in the ms_unprocessed dictionary! ms_unprocessed dictionary also must be indexed on scan
    • \n
    • polarity (int, optional):\nThe polarity of the mass spectra (1 or -1). If not set, the polarity will be determined from the dataset. Defaults to None. (fastest if set to -1 or 1)
    • \n
    • ms_params (MSParameters, optional):\nThe mass spectrum parameters to use. If not set (None), the globally set parameters will be used. Defaults to None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpectrumProfile: The averaged mass spectrum object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the spectrum mode is not \"profile\".\nIf the MS level is not found in the unprocessed mass spectra dictionary.\nIf not all scan numbers are found in the unprocessed mass spectra dictionary.
    • \n
    \n", "signature": "(\tself,\tscan_list,\tapex_scan,\tspectrum_mode='profile',\tms_level=1,\tauto_process=True,\tuse_parser=False,\tperform_checks=True,\tpolarity=None,\tms_params=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.find_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.find_mass_features", "kind": "function", "doc": "

    Find mass features within an LCMSBase object

    \n\n

    Note that this is a wrapper function that calls the find_mass_features_ph function, but can be extended to support other peak picking methods in the future.

    \n\n
    Parameters
    \n\n
      \n
    • ms_level (int, optional):\nThe MS level to use for peak picking Default is 1.
    • \n
    • grid (bool, optional):\nIf True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded, used for persistent homology peak picking. Default is True.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no MS level data is found on the object.\nIf persistent homology peak picking is attempted on non-profile mode data.\nIf data is not gridded and grid is False.\nIf peak picking method is not implemented.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but assigns the mass_features and eics attributes to the object.
    • \n
    \n", "signature": "(self, ms_level=1, grid=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.integrate_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.integrate_mass_features", "kind": "function", "doc": "

    Integrate mass features and extract EICs.

    \n\n

    Populates the _eics attribute on the LCMSBase object for each unique mz in the mass_features dataframe and adds data (start_scan, final_scan, area) to the mass_features attribute.

    \n\n
    Parameters
    \n\n
      \n
    • drop_if_fail (bool, optional):\nWhether to drop mass features if the EIC limit calculations fail.\nDefault is True.
    • \n
    • drop_duplicates (bool, optional):\nWhether to mass features that appear to be duplicates \n(i.e., mz is similar to another mass feature and limits of the EIC are similar or encapsulating).\nDefault is True.
    • \n
    • ms_level (int, optional):\nThe MS level to use. Default is 1.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass features are found.\nIf no MS level data is found for the given MS level (either in data or in the scan data)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the eics attribute on the LCMSBase object and adds data (start_scan, final_scan, area) to the mass_features attribute.
    • \n
    \n\n
    Notes
    \n\n

    drop_if_fail is useful for discarding mass features that do not have good shapes, usually due to a detection on a shoulder of a peak or a noisy region (especially if minimal smoothing is used during mass feature detection).

    \n", "signature": "(self, drop_if_fail=True, drop_duplicates=True, ms_level=1):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.find_c13_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.find_c13_mass_features", "kind": "function", "doc": "

    Mark likely C13 isotopes and connect to monoisoitopic mass features.

    \n\n
    Returns
    \n\n
      \n
    • None, but populates the monoisotopic_mf_id and isotopologue_type attributes to the indivual LCMSMassFeatures within the mass_features attribute of the LCMSBase object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass features are found.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.deconvolute_ms1_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.deconvolute_ms1_mass_features", "kind": "function", "doc": "

    Deconvolute MS1 mass features

    \n\n

    Deconvolute mass features ms1 spectrum based on the correlation of all masses within a spectrum over the EIC of the mass features

    \n\n
    Parameters
    \n\n
      \n
    • None
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but assigns the _ms_deconvoluted_idx, mass_spectrum_deconvoluted_parent,
    • \n
    • and associated_mass_features_deconvoluted attributes to the mass features in the
    • \n
    • mass_features attribute of the LCMSBase object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass features are found, must run find_mass_features() first.\nIf no EICs are found, did you run integrate_mass_features() first?
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations", "kind": "class", "doc": "

    Methods for performing calculations related to 2D peak picking via persistent homology on LCMS data.

    \n\n
    Notes
    \n\n

    This class is intended to be used as a mixin for the LCMSBase class.

    \n\n
    Methods
    \n\n
      \n
    • sparse_mean_filter(idx, V, radius=[0, 1, 1]).\nSparse implementation of a mean filter.
    • \n
    • embed_unique_indices(a).\nCreates an array of indices, sorted by unique element.
    • \n
    • sparse_upper_star(idx, V).\nSparse implementation of an upper star filtration.
    • \n
    • check_if_grid(data).\nCheck if the data is gridded in mz space.
    • \n
    • grid_data(data).\nGrid the data in the mz dimension.
    • \n
    • find_mass_features_ph(ms_level=1, grid=True).\nFind mass features within an LCMSBase object using persistent homology.
    • \n
    • cluster_mass_features(drop_children=True).\nCluster regions of interest.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.sparse_mean_filter", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.sparse_mean_filter", "kind": "function", "doc": "

    Sparse implementation of a mean filter.

    \n\n
    Parameters
    \n\n
      \n
    • idx (~numpy.array):\nEdge indices for each dimension (MxN).
    • \n
    • V (~numpy.array):\nArray of intensity data (Mx1).
    • \n
    • radius (float or list):\nRadius of the sparse filter in each dimension. Values less than\nzero indicate no connectivity in that dimension.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ~numpy.array: Filtered intensities (Mx1).
    • \n
    \n\n
    Notes
    \n\n

    This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos.\nThis is a static method.

    \n", "signature": "(idx, V, radius=[0, 1, 1]):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.embed_unique_indices", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.embed_unique_indices", "kind": "function", "doc": "

    Creates an array of indices, sorted by unique element.

    \n\n
    Parameters
    \n\n
      \n
    • a (~numpy.array):\nArray of unique elements (Mx1).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ~numpy.array: Array of indices (Mx1).
    • \n
    \n\n
    Notes
    \n\n

    This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos\nThis is a static method.

    \n", "signature": "(a):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.sparse_upper_star", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.sparse_upper_star", "kind": "function", "doc": "

    Sparse implementation of an upper star filtration.

    \n\n
    Parameters
    \n\n
      \n
    • idx (~numpy.array):\nEdge indices for each dimension (MxN).
    • \n
    • V (~numpy.array):\nArray of intensity data (Mx1).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • idx (~numpy.array):\nIndex of filtered points (Mx1).
    • \n
    • persistence (~numpy.array):\nPersistence of each filtered point (Mx1).
    • \n
    \n\n
    Notes
    \n\n

    This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos

    \n", "signature": "(self, idx, V):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.check_if_grid", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.check_if_grid", "kind": "function", "doc": "

    Check if the data are gridded in mz space.

    \n\n
    Parameters
    \n\n
      \n
    • data (DataFrame):\nDataFrame containing the mass spectrometry data. Needs to have mz and scan columns.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • bool: True if the data is gridded in the mz direction, False otherwise.
    • \n
    \n\n
    Notes
    \n\n

    This function is used within the grid_data function and the find_mass_features function and is not intended to be called directly.

    \n", "signature": "(self, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.grid_data", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.grid_data", "kind": "function", "doc": "

    Grid the data in the mz dimension.

    \n\n

    Data must be gridded prior to persistent homology calculations.

    \n\n
    Parameters
    \n\n
      \n
    • data (DataFrame):\nThe input data containing mz, scan, scan_time, and intensity columns.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The gridded data with mz, scan, scan_time, and intensity columns.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If gridding fails.
    • \n
    \n", "signature": "(self, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.find_mass_features_ph", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.find_mass_features_ph", "kind": "function", "doc": "

    Find mass features within an LCMSBase object using persistent homology.

    \n\n

    Assigns the mass_features attribute to the object (a dictionary of LCMSMassFeature objects, keyed by mass feature id)

    \n\n
    Parameters
    \n\n
      \n
    • ms_level (int, optional):\nThe MS level to use. Default is 1.
    • \n
    • grid (bool, optional):\nIf True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded). Default is True.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no MS level data is found on the object.\nIf data is not gridded and grid is False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but assigns the mass_features attribute to the object.
    • \n
    \n\n
    Notes
    \n\n

    This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos

    \n", "signature": "(self, ms_level=1, grid=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.cluster_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.cluster_mass_features", "kind": "function", "doc": "

    Cluster mass features

    \n\n

    Based on their proximity in the mz and scan_time dimensions, priorizies the mass features with the highest persistence.

    \n\n
    Parameters
    \n\n
      \n
    • drop_children (bool, optional):\nWhether to drop the mass features that are not cluster parents. Default is True.
    • \n
    • sort_by (str, optional):\nThe column to sort the mass features by, this will determine which mass features get rolled up into a parent mass feature. Default is \"persistence\".
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass features are found.\nIf too many mass features are found.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None if drop_children is True, otherwise returns a list of mass feature ids that are not cluster parents.
    • \n
    \n", "signature": "(self, drop_children=True, sort_by='persistence'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory", "modulename": "corems.mass_spectra.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class", "modulename": "corems.mass_spectra.factory.GC_Class", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase", "kind": "class", "doc": "

    Base class for GC-MS data processing.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str, pathlib.Path, or s3path.S3Path):\nPath object containing the file location.
    • \n
    • analyzer (str, optional):\nName of the analyzer. Defaults to 'Unknown'.
    • \n
    • instrument_label (str, optional):\nLabel of the instrument. Defaults to 'Unknown'.
    • \n
    • sample_name (str, optional):\nName of the sample. If not provided, it is derived from the file location.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (pathlib.Path):\nPath object containing the file location.
    • \n
    • sample_name (str):\nName of the sample.
    • \n
    • analyzer (str):\nName of the analyzer.
    • \n
    • instrument_label (str):\nLabel of the instrument.
    • \n
    • gcpeaks (list):\nList of GCPeak objects.
    • \n
    • ri_pairs_ref (None):\nReference retention index pairs.
    • \n
    • cal_file_path (None):\nCalibration file path.
    • \n
    • _parameters (GCMSParameters):\nGC-MS parameters.
    • \n
    • _retention_time_list (list):\nList of retention times.
    • \n
    • _scans_number_list (list):\nList of scan numbers.
    • \n
    • _tic_list (list):\nList of total ion chromatogram values.
    • \n
    • _ms (dict):\nDictionary containing all mass spectra.
    • \n
    • _processed_tic (list):\nList of processed total ion chromatogram values.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • process_chromatogram(plot_res=False). Process the chromatogram.
    • \n
    • plot_gc_peaks(ax=None, color='red'). Plot the GC peaks.
    • \n
    \n", "bases": "corems.mass_spectra.calc.GC_Calc.GC_Calculations, corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.__init__", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None)"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.file_location", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.analyzer", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.instrument_label", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.gcpeaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.gcpeaks", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.ri_pairs_ref", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.ri_pairs_ref", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.cal_file_path", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.cal_file_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.process_chromatogram", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.process_chromatogram", "kind": "function", "doc": "

    Process the chromatogram.

    \n\n

    This method processes the chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • plot_res (bool, optional):\nIf True, plot the results. Defaults to False.
    • \n
    \n", "signature": "(self, plot_res=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.add_mass_spectrum", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.add_mass_spectrum", "kind": "function", "doc": "

    Add a mass spectrum to the GC-MS object.

    \n\n

    This method adds a mass spectrum to the GC-MS object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec (MassSpectrum):\nMass spectrum to be added.
    • \n
    \n", "signature": "(self, mass_spec):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.set_tic_list_from_data", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.set_tic_list_from_data", "kind": "function", "doc": "

    Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.set_retention_time_from_data", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.set_retention_time_from_data", "kind": "function", "doc": "

    Set the retention time list from the mass spectra data within the GC-MS data object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.set_scans_number_from_data", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.set_scans_number_from_data", "kind": "function", "doc": "

    Set the scan number list from the mass spectra data within the GC-MS data object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.parameter", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.parameter", "kind": "variable", "doc": "

    GCMS Parameters

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.molecular_search_settings", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.molecular_search_settings", "kind": "variable", "doc": "

    Molecular Search Settings

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.chromatogram_settings", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.chromatogram_settings", "kind": "variable", "doc": "

    Chromatogram Settings

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.scans_number", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.scans_number", "kind": "variable", "doc": "

    Scans Number

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.retention_time", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.retention_time", "kind": "variable", "doc": "

    Retention Time

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.processed_tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.processed_tic", "kind": "variable", "doc": "

    Processed Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.tic", "kind": "variable", "doc": "

    Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.max_tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.max_tic", "kind": "variable", "doc": "

    Maximum Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.min_tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.min_tic", "kind": "variable", "doc": "

    Minimum Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.dynamic_range", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.dynamic_range", "kind": "variable", "doc": "

    Dynamic Range of the Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.matched_peaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.matched_peaks", "kind": "variable", "doc": "

    Matched Peaks

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.sorted_gcpeaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.sorted_gcpeaks", "kind": "variable", "doc": "

    Sorted GC Peaks, by retention time

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.unique_metabolites", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.unique_metabolites", "kind": "variable", "doc": "

    Unique Metabolites

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.metabolites_data", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.metabolites_data", "kind": "variable", "doc": "

    Metabolites Data

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.no_matched_peaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.no_matched_peaks", "kind": "variable", "doc": "

    Peaks with no Matched Metabolites

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_gc_peaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_gc_peaks", "kind": "function", "doc": "

    Plot the GC peaks.

    \n\n

    This method plots the GC peaks.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the GC peaks. Defaults to None.
    • \n
    • color (str, optional):\nColor of the GC peaks. Defaults to 'red'.
    • \n
    \n", "signature": "(self, ax=None, color='red'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_excel", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_excel", "kind": "function", "doc": "

    Export the GC-MS data to an Excel file.

    \n\n

    This method exports the GC-MS data to an Excel file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str, pathlib.Path, or s3path.S3Path):\nPath object containing the file location.
    • \n
    • write_mode (str, optional):\nWrite mode. Defaults to 'ab'.
    • \n
    • write_metadata (bool, optional):\nIf True, write the metadata. Defaults to True.
    • \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(\tself,\tout_file_path,\twrite_mode='ab',\twrite_metadata=True,\tid_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_csv", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_csv", "kind": "function", "doc": "

    Export the GC-MS data to a CSV file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str, pathlib.Path, or s3path.S3Path):\nPath object containing the file location.
    • \n
    • separate_output (bool, optional):\nIf True, separate the output. Defaults to False.
    • \n
    • write_metadata (bool, optional):\nIf True, write the metadata. Defaults to True.
    • \n
    \n", "signature": "(\tself,\tout_file_path,\tseparate_output=False,\twrite_metadata=True,\tid_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_pandas", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_pandas", "kind": "function", "doc": "

    Export the GC-MS data to a Pandas dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str, pathlib.Path, or s3path.S3Path):\nPath object containing the file location.
    • \n
    • write_metadata (bool, optional):\nIf True, write the metadata. Defaults to True.
    • \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(self, out_file_path, write_metadata=True, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_dataframe", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_dataframe", "kind": "function", "doc": "

    Export the GC-MS data to a Pandas dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.processing_stats", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.processing_stats", "kind": "function", "doc": "

    Return the processing statistics.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.parameters_json", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.parameters_json", "kind": "function", "doc": "

    Return the parameters in JSON format.

    \n\n
    Parameters
    \n\n
      \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    • output_path (str, optional):\nPath object containing the file location. Defaults to \" \".
    • \n
    \n", "signature": "(self, id_label='corems:', output_path=' '):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_json", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_json", "kind": "function", "doc": "

    Export the GC-MS data to a JSON file.

    \n\n
    Parameters
    \n\n
      \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_hdf", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_hdf", "kind": "function", "doc": "

    Export the GC-MS data to a HDF file.

    \n\n
    Parameters
    \n\n
      \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_chromatogram", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_chromatogram", "kind": "function", "doc": "

    Plot the chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the chromatogram. Defaults to None.
    • \n
    • color (str, optional):\nColor of the chromatogram. Defaults to 'blue'.
    • \n
    \n", "signature": "(self, ax=None, color='blue'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_smoothed_chromatogram", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_smoothed_chromatogram", "kind": "function", "doc": "

    Plot the smoothed chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the smoothed chromatogram. Defaults to None.
    • \n
    • color (str, optional):\nColor of the smoothed chromatogram. Defaults to 'green'.
    • \n
    \n", "signature": "(self, ax=None, color='green'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_detected_baseline", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_detected_baseline", "kind": "function", "doc": "

    Plot the detected baseline.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the detected baseline. Defaults to None.
    • \n
    • color (str, optional):\nColor of the detected baseline. Defaults to 'blue'.
    • \n
    \n", "signature": "(self, ax=None, color='blue'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_baseline_subtraction", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_baseline_subtraction", "kind": "function", "doc": "

    Plot the baseline subtraction.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the baseline subtraction. Defaults to None.
    • \n
    • color (str, optional):\nColor of the baseline subtraction. Defaults to 'black'.
    • \n
    \n", "signature": "(self, ax=None, color='black'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.peaks_rt_tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.peaks_rt_tic", "kind": "function", "doc": "

    Return the peaks, retention time, and total ion chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • json_string (bool, optional):\nIf True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False.
    • \n
    \n", "signature": "(self, json_string=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_processed_chromatogram", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_processed_chromatogram", "kind": "function", "doc": "

    Plot the processed chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the processed chromatogram. Defaults to None.
    • \n
    • color (str, optional):\nColor of the processed chromatogram. Defaults to 'black'.
    • \n
    \n", "signature": "(self, ax=None, color='black'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.chromat_data", "modulename": "corems.mass_spectra.factory.chromat_data", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data", "kind": "class", "doc": "

    A class to represent total ion chromatogram data.

    \n\n

    scans: [int]\n original scan numbers\ntime: [floats]\n list of retention times\ntic: [floats]\n total ion current [chromatogram]\nbpc: [floats]\n base peak [chromatogram]\nApexes: [int]
    \n original thermo apex scan number after peak picking

    \n"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.__init__", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tscans: List[int] = <factory>,\ttime: List[float] = <factory>,\ttic: List[float] = <factory>,\tbpc: List[float] = <factory>,\tapexes: List[int] = <factory>)"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.scans", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.scans", "kind": "variable", "doc": "

    \n", "annotation": ": List[int]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.time", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.time", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.tic", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.tic", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.bpc", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.bpc", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.apexes", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.apexes", "kind": "variable", "doc": "

    \n", "annotation": ": List[int]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data", "kind": "class", "doc": "

    A class to represent extracted ion chromatogram data.

    \n\n

    scans: [int]\n original scan numbers\ntime: [floats]\n list of retention times\neic: [floats]\n extracted ion chromatogram\neic_smoothed: [floats]\n extracted ion chromatogram smoothed\napexes: [int]\n original apex scan number after peak picking\nareas: [floats]\n area under the curve for each apex

    \n"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.__init__", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tscans: List[int] = <factory>,\ttime: List[float] = <factory>,\teic: List[float] = <factory>,\teic_smoothed: List[float] = <factory>,\tapexes: List[int] = <factory>,\tareas: List[float] = <factory>)"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.scans", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.scans", "kind": "variable", "doc": "

    \n", "annotation": ": List[int]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.time", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.time", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.eic", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.eic", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.eic_smoothed", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.eic_smoothed", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.apexes", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.apexes", "kind": "variable", "doc": "

    \n", "annotation": ": List[int]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.areas", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.areas", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.lc_class", "modulename": "corems.mass_spectra.factory.lc_class", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase", "kind": "class", "doc": "

    Base class for mass spectra objects.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe location of the file containing the mass spectra data.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
    • \n
    • instrument_label (str, optional):\nThe type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
    • \n
    • sample_name (str, optional):\nThe name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
    • \n
    • spectra_parser (object, optional):\nThe spectra parser object used to create the mass spectra object. Defaults to None.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • spectra_parser_class (class):\nThe class of the spectra parser used to create the mass spectra object.
    • \n
    • file_location (str or Path):\nThe location of the file containing the mass spectra data.
    • \n
    • sample_name (str):\nThe name of the sample; defaults to the file name if not provided to the parser.
    • \n
    • analyzer (str):\nThe type of analyzer used to generate the mass spectra data. Derived from the spectra parser.
    • \n
    • instrument_label (str):\nThe type of instrument used to generate the mass spectra data. Derived from the spectra parser.
    • \n
    • _scan_info (dict):\nA dictionary containing the scan data with columns for scan number, scan time, ms level, precursor m/z,\nscan text, and scan window (lower and upper).\nAssociated with the property scan_df, which returns a pandas DataFrame or can set this attribute from a pandas DataFrame.
    • \n
    • _ms (dict):\nA dictionary containing mass spectra for the dataset, keys of dictionary are scan numbers. Initialized as an empty dictionary.
    • \n
    • _ms_unprocessed (dictionary of pandas.DataFrames or None):\nA dictionary of unprocssed mass spectra data, as an (optional) intermediate data product for peak picking.\nKey is ms_level, and value is dataframe with columns for scan number, m/z, and intensity. Default is None.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • add_mass_spectra(scan_list, spectrum_mode: str = 'profile', use_parser = True, auto_process=True).\nAdd mass spectra (or singlel mass spectrum) to _ms slot, from a list of scans
    • \n
    • get_time_of_scan_id(scan).\nReturns the scan time for the specified scan number.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.__init__", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None,\tspectra_parser=None)"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.file_location", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.analyzer", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.instrument_label", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.add_mass_spectrum", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.add_mass_spectrum", "kind": "function", "doc": "

    Adds a mass spectrum to the dataset.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec (MassSpectrum):\nThe corems MassSpectrum object to be added to the dataset.
    • \n
    \n\n
    Notes
    \n\n

    This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.

    \n", "signature": "(self, mass_spec):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.add_mass_spectra", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.add_mass_spectra", "kind": "function", "doc": "

    Add mass spectra to _ms dictionary, from a list of scans or single scan

    \n\n
    Notes
    \n\n

    The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.

    \n\n
    Parameters
    \n\n
      \n
    • scan_list (list of ints):\nList of scans to use to populate _ms slot
    • \n
    • spectrum_mode (str or None):\nThe spectrum mode to use for the mass spectra.\nIf None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types).\nDefaults to None.
    • \n
    • ms_level (int, optional):\nThe MS level to use for the mass spectra.\nThis is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects.\nDefaults to 1.
    • \n
    • using_parser (bool):\nWhether to use the mass spectra parser to get the mass spectra. Defaults to True.
    • \n
    • auto_process (bool):\nWhether to auto-process the mass spectra. Defaults to True.
    • \n
    • ms_params (MSParameters or None):\nThe mass spectrum parameters to use for the mass spectra. If None, uses the globally set MSParameters.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • TypeError: If scan_list is not a list of ints
    • \n
    • ValueError: If polarity is not 'positive' or 'negative'\nIf ms_level is not 1 or 2
    • \n
    \n", "signature": "(\tself,\tscan_list,\tspectrum_mode=None,\tms_level=1,\tuse_parser=True,\tauto_process=True,\tms_params=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.get_time_of_scan_id", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.get_time_of_scan_id", "kind": "function", "doc": "

    Returns the scan time for the specified scan number.

    \n\n
    Parameters
    \n\n
      \n
    • scan (int):\nThe scan number of the desired scan time.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The scan time for the specified scan number (in minutes).
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no scan time is found for the specified scan number.
    • \n
    \n", "signature": "(self, scan):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.scan_df", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.scan_df", "kind": "variable", "doc": "

    pandas.DataFrame : A pandas DataFrame containing the scan info data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper).

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase", "kind": "class", "doc": "

    A class representing a liquid chromatography-mass spectrometry (LC-MS) data object.

    \n\n

    This class is not intended to be instantiated directly, but rather to be instantiated by an appropriate mass spectra parser using the get_lcms_obj() method.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe location of the file containing the mass spectra data.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
    • \n
    • instrument_label (str, optional):\nThe type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
    • \n
    • sample_name (str, optional):\nThe name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
    • \n
    • spectra_parser (object, optional):\nThe spectra parser object used to create the mass spectra object. Defaults to None.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • polarity (str):\nThe polarity of the ionization mode used for the dataset.
    • \n
    • _parameters (LCMSParameters):\nThe parameters used for all methods called on the LCMSBase object. Set upon instantiation from LCMSParameters.
    • \n
    • _retention_time_list (numpy.ndarray):\nAn array of retention times for the dataset.
    • \n
    • _scans_number_list (list):\nA list of scan numbers for the dataset.
    • \n
    • _tic_list (numpy.ndarray):\nAn array of total ion current (TIC) values for the dataset.
    • \n
    • eics (dict):\nA dictionary containing extracted ion chromatograms (EICs) for the dataset.\nKey is the mz of the EIC. Initialized as an empty dictionary.
    • \n
    • mass_features (dictionary of LCMSMassFeature objects):\nA dictionary containing mass features for the dataset.\nKey is mass feature ID. Initialized as an empty dictionary.
    • \n
    • spectral_search_results (dictionary of MS2SearchResults objects):\nA dictionary containing spectral search results for the dataset.\nKey is scan number : precursor mz. Initialized as an empty dictionary.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_parameters_json().\nReturns the parameters used for the LC-MS analysis in JSON format.
    • \n
    • add_associated_ms2_dda(add_to_lcmsobj=True, auto_process=True, use_parser=True)\nAdds which MS2 scans are associated with each mass feature to the\nmass_features dictionary and optionally adds the MS2 spectra to the _ms dictionary.
    • \n
    • add_associated_ms1(add_to_lcmsobj=True, auto_process=True, use_parser=True)\nAdds the MS1 spectra associated with each mass feature to the\nmass_features dictionary and adds the MS1 spectra to the _ms dictionary.
    • \n
    • mass_features_to_df()\nReturns a pandas dataframe summarizing the mass features in the dataset.
    • \n
    • set_tic_list_from_data(overwrite=False)\nSets the TIC list from the mass spectrum objects within the _ms dictionary.
    • \n
    • set_retention_time_from_data(overwrite=False)\nSets the retention time list from the data in the _ms dictionary.
    • \n
    • set_scans_number_from_data(overwrite=False)\nSets the scan number list from the data in the _ms dictionary.
    • \n
    \n", "bases": "MassSpectraBase, corems.mass_spectra.calc.lc_calc.LCCalculations, corems.mass_spectra.calc.lc_calc.PHCalculations, corems.molecular_id.search.lcms_spectral_search.LCMSSpectralSearch"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.__init__", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None,\tspectra_parser=None)"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.polarity", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.polarity", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.eics", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.eics", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.mass_features", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.mass_features", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.spectral_search_results", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.spectral_search_results", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.get_parameters_json", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.get_parameters_json", "kind": "function", "doc": "

    Returns the parameters stored for the LC-MS object in JSON format.

    \n\n
    Returns
    \n\n
      \n
    • str: The parameters used for the LC-MS analysis in JSON format.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.remove_unprocessed_data", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.remove_unprocessed_data", "kind": "function", "doc": "

    Removes the unprocessed data from the LCMSBase object.

    \n\n
    Parameters
    \n\n
      \n
    • ms_level (int, optional):\nThe MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If ms_level is not 1 or 2.
    • \n
    \n\n
    Notes
    \n\n

    This method is useful for freeing up memory after the data has been processed.

    \n", "signature": "(self, ms_level=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.add_associated_ms2_dda", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.add_associated_ms2_dda", "kind": "function", "doc": "

    Add MS2 spectra associated with mass features to the dataset.

    \n\n

    Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)

    \n\n
    Parameters
    \n\n
      \n
    • auto_process (bool, optional):\nIf True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
    • \n
    • use_parser (bool, optional):\nIf True, envoke the spectra parser to get the MS2 spectra. Default is True.
    • \n
    • spectrum_mode (str or None, optional):\nThe spectrum mode to use for the mass spectra. If None, method will use the spectrum mode\nfrom the spectra parser to ascertain the spectrum mode (this allows for mixed types).\nDefaults to None. (faster if defined, otherwise will check each scan)
    • \n
    • ms_params_key (string, optional):\nThe key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute.\nDefaults to 'ms2'.
    • \n
    • scan_filter (str):\nA string to filter the scans to add to the _ms dictionary. If None, all scans are added. Defaults to None.\n\"hcd\" will pull out only HCD scans.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If mass_features is not set, must run find_mass_features() first.\nIf no MS2 scans are found in the dataset.\nIf no precursor m/z values are found in MS2 scans, not a DDA dataset.
    • \n
    \n", "signature": "(\tself,\tauto_process=True,\tuse_parser=True,\tspectrum_mode=None,\tms_params_key='ms2',\tscan_filter=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.add_associated_ms1", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.add_associated_ms1", "kind": "function", "doc": "

    Add MS1 spectra associated with mass features to the dataset.

    \n\n
    Parameters
    \n\n
      \n
    • auto_process (bool, optional):\nIf True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
    • \n
    • use_parser (bool, optional):\nIf True, envoke the spectra parser to get the MS1 spectra. Default is True.
    • \n
    • spectrum_mode (str or None, optional):\nThe spectrum mode to use for the mass spectra. If None, method will use the spectrum mode\nfrom the spectra parser to ascertain the spectrum mode (this allows for mixed types).\nDefaults to None. (faster if defined, otherwise will check each scan)
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If mass_features is not set, must run find_mass_features() first.\nIf apex scans are not profile mode, all apex scans must be profile mode for averaging.\nIf number of scans to average is not 1 or an integer with an integer median (i.e. 3, 5, 7, 9).\nIf deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
    • \n
    \n", "signature": "(self, auto_process=True, use_parser=True, spectrum_mode=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.mass_features_to_df", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.mass_features_to_df", "kind": "function", "doc": "

    Returns a pandas dataframe summarizing the mass features.

    \n\n

    The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity,\npersistence, area, monoisotopic_mf_id, and isotopologue_type. The index is set to mf_id (mass feature ID).

    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A pandas dataframe of mass features with the following columns:\nmf_id, mz, apex_scan, scan_time, intensity, persistence, area.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.mass_features_ms1_annot_to_df", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.mass_features_ms1_annot_to_df", "kind": "function", "doc": "

    Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.

    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A pandas dataframe of MS1 annotations for the mass features in the dataset.\nThe index is set to mf_id (mass feature ID)
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Warning: If no MS1 annotations were found for the mass features in the dataset.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.mass_features_ms2_annot_to_df", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.mass_features_ms2_annot_to_df", "kind": "function", "doc": "

    Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_metadata (dict of MolecularMetadata objects):\nA dictionary of MolecularMetadata objects, keyed by metabref_mol_id. Defaults to None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A pandas dataframe of MS2 annotations for the mass features in the dataset, \nand optionally molecular metadata. The index is set to mf_id (mass feature ID)
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Warning: If no MS2 annotations were found for the mass features in the dataset.
    • \n
    \n", "signature": "(self, molecular_metadata=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.set_tic_list_from_data", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.set_tic_list_from_data", "kind": "function", "doc": "

    Sets the TIC list from the mass spectrum objects within the _ms dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • overwrite (bool, optional):\nIf True, overwrites the TIC list if it is already set. Defaults to False.
    • \n
    \n\n
    Notes
    \n\n

    If the _ms dictionary is incomplete, sets the TIC list to an empty list.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass spectra are found in the dataset.\nIf the TIC list is already set and overwrite is False.
    • \n
    \n", "signature": "(self, overwrite=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.set_retention_time_from_data", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.set_retention_time_from_data", "kind": "function", "doc": "

    Sets the retention time list from the data in the _ms dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • overwrite (bool, optional):\nIf True, overwrites the retention time list if it is already set. Defaults to False.
    • \n
    \n\n
    Notes
    \n\n

    If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass spectra are found in the dataset.\nIf the retention time list is already set and overwrite is False.
    • \n
    \n", "signature": "(self, overwrite=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.set_scans_number_from_data", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.set_scans_number_from_data", "kind": "function", "doc": "

    Sets the scan number list from the data in the _ms dictionary.

    \n\n
    Notes
    \n\n

    If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass spectra are found in the dataset.\nIf the scan number list is already set and overwrite is False.
    • \n
    \n", "signature": "(self, overwrite=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.ms1_scans", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.ms1_scans", "kind": "variable", "doc": "

    list : A list of MS1 scan numbers for the dataset.

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.parameters", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.parameters", "kind": "variable", "doc": "

    LCMSParameters : The parameters used for the LC-MS analysis.

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.scans_number", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.scans_number", "kind": "variable", "doc": "

    list : A list of scan numbers for the dataset.

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.retention_time", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.retention_time", "kind": "variable", "doc": "

    numpy.ndarray : An array of retention times for the dataset.

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.tic", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.tic", "kind": "variable", "doc": "

    numpy.ndarray : An array of TIC values for the dataset.

    \n"}, {"fullname": "corems.mass_spectra.input", "modulename": "corems.mass_spectra.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF", "modulename": "corems.mass_spectra.input.andiNetCDF", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF", "kind": "class", "doc": "

    A class for reading AndiNetCDF files and extracting mass spectra data.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe location of the AndiNetCDF file.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used (default is 'Quadruple').
    • \n
    • instrument_label (str, optional):\nThe label of the instrument (default is 'GCMS-Agilent').
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the data (default is True).
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (Path):\nThe path to the AndiNetCDF file.
    • \n
    • net_cdf_obj (Dataset):\nThe NetCDF dataset object.
    • \n
    • ionization_type (str):\nThe ionization type used in the experiment.
    • \n
    • experiment_type (str):\nThe type of experiment.
    • \n
    • list_scans (range):\nThe range of scan numbers in the dataset.
    • \n
    • initial_scan_number (int):\nThe number of the initial scan.
    • \n
    • final_scan_number (int):\nThe number of the final scan.
    • \n
    • analyzer (str):\nThe type of analyzer used.
    • \n
    • instrument_label (str):\nThe label of the instrument.
    • \n
    • gcms (GCMSBase):\nThe GCMSBase object for storing mass spectra data.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • polarity(). \nGet the polarity of the ionization.
    • \n
    • get_mass_spectrum(mz, abun, rp, d_params). \nAdd a mass spectrum to the GCMSBase object.
    • \n
    • run(). \nPopulate the GCMSBase object with mass spectra data.
    • \n
    • import_mass_spectra(d_params). \nImport mass spectra data from the AndiNetCDF file.
    • \n
    • get_gcms_obj(). \nGet the GCMSBase object.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.__init__", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(\tfile_location: str | pathlib.Path,\tanalyzer='Quadruple',\tinstrument_label='GCMS-Agilent',\tauto_process=True)"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.ionization_type", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.ionization_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.experiment_type", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.experiment_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.list_scans", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.list_scans", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.initial_scan_number", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.initial_scan_number", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.final_scan_number", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.final_scan_number", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.analyzer", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.instrument_label", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.gcms", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.gcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.polarity", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.polarity", "kind": "variable", "doc": "

    Get the polarity of the ionization.

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.get_mass_spectrum", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.get_mass_spectrum", "kind": "function", "doc": "

    Add a mass spectrum to the GCMSBase object.

    \n\n
    Parameters
    \n\n
      \n
    • mz (array-like):\nThe m/z values of the mass spectrum.
    • \n
    • abun (array-like):\nThe abundance values of the mass spectrum.
    • \n
    • rp (array-like):\nThe resolution values of the mass spectrum.
    • \n
    • d_params (dict):\nAdditional parameters for the mass spectrum.
    • \n
    \n", "signature": "(self, mz, abun, rp, d_params):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.run", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.run", "kind": "function", "doc": "

    Populate the GCMSBase object with mass spectra data.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.import_mass_spectra", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.import_mass_spectra", "kind": "function", "doc": "

    Import mass spectra data from the AndiNetCDF file.

    \n\n
    Parameters
    \n\n
      \n
    • d_params (dict):\nAdditional parameters for the mass spectra.
    • \n
    \n", "signature": "(self, d_params):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.get_gcms_obj", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.get_gcms_obj", "kind": "function", "doc": "

    Get the GCMSBase object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5", "modulename": "corems.mass_spectra.input.boosterHDF5", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra", "kind": "class", "doc": "

    Class for reading HDF5 files containing booster mass spectra.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (Path or S3Path):\nThe full path to the HDF5 file.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used for the mass spectra. Defaults to \"ICR\".
    • \n
    • instrument_label (str, optional):\nThe label of the instrument. Defaults to \"21T\".
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectra. Defaults to True.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.__init__", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.__init__", "kind": "function", "doc": "

    Initialize the ReadHDF_BoosterMassSpectra class.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (Path or S3Path):\nThe full path to the HDF5 file.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used for the mass spectra. Defaults to \"ICR\".
    • \n
    • instrument_label (str, optional):\nThe label of the instrument. Defaults to \"21T\".
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectra. Defaults to True.
    • \n
    \n", "signature": "(\tfile_location: pathlib.Path | s3path.S3Path,\tanalyzer='ICR',\tinstrument_label='21T',\tauto_process=True)"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.lcms", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.lcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.hdf_obj", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.hdf_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.list_scans", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.list_scans", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.initial_scan_number", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.initial_scan_number", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.final_scan_number", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.final_scan_number", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.file_location", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.auto_process", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.auto_process", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.analyzer", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.instrument_label", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.get_polarity", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.get_polarity", "kind": "function", "doc": "

    Get the polarity of a scan.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (Path or S3Path):\nThe full path to the HDF5 file.
    • \n
    • scan (int):\nThe scan number.
    • \n
    \n", "signature": "(self, file_location: pathlib.Path | s3path.S3Path, scan: int):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.get_attr_data", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.get_attr_data", "kind": "function", "doc": "

    Get the attribute data of a scan.

    \n\n
    Parameters
    \n\n
      \n
    • scan (int):\nThe scan number.
    • \n
    • attr_srt (str):\nThe attribute name.
    • \n
    \n", "signature": "(self, scan, attr_srt):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.import_mass_spectra", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.import_mass_spectra", "kind": "function", "doc": "

    Import the mass spectra from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • d_params (dict):\nThe parameters for importing the mass spectra.
    • \n
    \n", "signature": "(self, d_params: dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.get_mass_spectrum", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.get_mass_spectrum", "kind": "function", "doc": "

    Get the mass spectrum for a scan.

    \n\n
    Parameters
    \n\n
      \n
    • scan (int):\nThe scan number.
    • \n
    • d_params (dict):\nThe parameters for creating the mass spectrum.
    • \n
    \n", "signature": "(self, scan: int, d_params: dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.run", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.run", "kind": "function", "doc": "

    Run the thread to create the LCMS object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.get_lcms_obj", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.get_lcms_obj", "kind": "function", "doc": "

    Get the LCMS object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix", "modulename": "corems.mass_spectra.input.brukerSolarix", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra", "kind": "class", "doc": "

    Class for reading Bruker Solarix Transient Mass Spectra.

    \n\n
    Parameters
    \n\n
      \n
    • d_directory_location (str, pathlib.Path, or s3path.S3Path):\nPath object from pathlib containing the file location.
    • \n
    • analyzer (str, optional):\nType of analyzer used in the mass spectrometer. Defaults to \"ICR\".
    • \n
    • instrument_label (str, optional):\nLabel for the instrument. Defaults to \"15T\".
    • \n
    • auto_process (bool, optional):\nFlag indicating whether to automatically process the mass spectra. Defaults to True.
    • \n
    • keep_profile (bool, optional):\nFlag indicating whether to keep the profile data in the mass spectra. Defaults to False.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.__init__", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(\td_directory_location: str | pathlib.Path | s3path.S3Path,\tanalyzer='ICR',\tinstrument_label='15T',\tauto_process=True,\tkeep_profile=False)"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.scan_attr", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.scan_attr", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.lcms", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.lcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.auto_process", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.auto_process", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.keep_profile", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.keep_profile", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.get_scan_attr", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.get_scan_attr", "kind": "function", "doc": "

    Get the scan attributes from the scan.xml file.

    \n\n
    Returns
    \n\n
      \n
    • dict: Dictionary containing the scan number as key and a tuple of retention time and TIC as value.
    • \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.import_mass_spectra", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.import_mass_spectra", "kind": "function", "doc": "

    Import the mass spectra from the scan.xml file.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.get_mass_spectrum", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.get_mass_spectrum", "kind": "function", "doc": "

    Get the mass spectrum for a given scan number.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int):\nScan number.
    • \n
    \n", "signature": "(self, scan_number: int):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.run", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.run", "kind": "function", "doc": "

    Run the import_mass_spectra method.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.get_lcms_obj", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.get_lcms_obj", "kind": "function", "doc": "

    Get the LCMSBase object.

    \n\n
    Raises
    \n\n
      \n
    • Exception: If the LCMSBase object is empty.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5", "modulename": "corems.mass_spectra.input.corems_hdf5", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra", "kind": "class", "doc": "

    Class to read CoreMS HDF5 files and populate a LCMS or MassSpectraBase object.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\nThe location of the HDF5 file to read, including the suffix.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (str):\nThe location of the HDF5 file to read.
    • \n
    • h5pydata (h5py.File):\nThe HDF5 file object.
    • \n
    • scans (list):\nA list of the location of individual mass spectra within the HDF5 file.
    • \n
    • scan_number_list (list):\nA list of the scan numbers of the mass spectra within the HDF5 file.
    • \n
    • parameters_location (str):\nThe location of the parameters file (json or toml).
    • \n
    \n\n
    Methods
    \n\n
      \n
    • import_mass_spectra(mass_spectra).\nImports all mass spectra from the HDF5 file onto the LCMS or MassSpectraBase object.
    • \n
    • get_mass_spectrum_from_scan(scan_number).\nReturn mass spectrum data object from scan number.
    • \n
    • load().\nPlaceholder method to meet the requirements of the SpectraParserInterface.
    • \n
    • run(mass_spectra).\nRuns the importer functions to populate a LCMS or MassSpectraBase object.
    • \n
    • import_scan_info(mass_spectra).\nImports the scan info from the HDF5 file to populate the _scan_info attribute\non the LCMS or MassSpectraBase object
    • \n
    • import_ms_unprocessed(mass_spectra).\nImports the unprocessed mass spectra from the HDF5 file to populate the\n_ms_unprocessed attribute on the LCMS or MassSpectraBase object
    • \n
    • import_parameters(mass_spectra).\nImports the parameters from the HDF5 file to populate the parameters\nattribute on the LCMS or MassSpectraBase object
    • \n
    • import_mass_features(mass_spectra).\nImports the mass features from the HDF5 file to populate the mass_features\nattribute on the LCMS or MassSpectraBase object
    • \n
    • import_eics(mass_spectra).\nImports the extracted ion chromatograms from the HDF5 file to populate the\neics attribute on the LCMS or MassSpectraBase object
    • \n
    • import_spectral_search_results(mass_spectra).\nImports the spectral search results from the HDF5 file to populate the\nspectral_search_results attribute on the LCMS or MassSpectraBase object
    • \n
    • get_mass_spectra_obj().\nReturn mass spectra data object, populating the _ms list on the LCMS or\nMassSpectraBase object from the HDF5 file
    • \n
    • get_lcms_obj().\nReturn LCMSBase object, populating the majority of the attributes on the\nLCMS object from the HDF5 file
    • \n
    \n", "bases": "corems.mass_spectra.input.parserbase.SpectraParserInterface, corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum, threading.Thread"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.__init__", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(file_location: str)"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.scans", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.scans", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.scan_number_list", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.scan_number_list", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_mass_spectrum_from_scan", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_mass_spectrum_from_scan", "kind": "function", "doc": "

    Return mass spectrum data object from scan number.

    \n", "signature": "(self, scan_number):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.load", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.load", "kind": "function", "doc": "

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_ms_raw", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_ms_raw", "kind": "function", "doc": "

    \n", "signature": "(self, spectra=None, scan_df=None) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_scan_df", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_scan_df", "kind": "function", "doc": "

    Return scan data as a pandas DataFrame.

    \n", "signature": "(self) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.run", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.run", "kind": "function", "doc": "

    Runs the importer functions to populate a LCMS or MassSpectraBase object.

    \n\n
    Notes
    \n\n

    The following functions are run in order, if the HDF5 file contains the necessary data:

    \n\n
      \n
    1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object.
    2. \n
    3. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object.
    4. \n
    5. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object.
    6. \n
    7. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object.
    8. \n
    9. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object.
    10. \n
    11. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object.
    12. \n
    13. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object.
    14. \n
    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase or MassSpectraBase):\nThe LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes.
    • \n
    • load_raw (bool):\nIf True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates several attributes on the LCMS or MassSpectraBase object.
    • \n
    \n", "signature": "(self, mass_spectra, load_raw=True) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_mass_spectra", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_mass_spectra", "kind": "function", "doc": "

    Imports all mass spectra from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with mass spectra.
    • \n
    • load_raw (bool):\nIf True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the '_ms' list on the LCMSBase or MassSpectraBase
    • \n
    • object with mass spectra from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra, load_raw=True) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_scan_info", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_scan_info", "kind": "function", "doc": "

    Imports the scan info from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • lcms (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase objects
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a pandas DataFrame of the 'scan_info' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_ms_unprocessed", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_ms_unprocessed", "kind": "function", "doc": "

    Imports the unprocessed mass spectra from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • lcms (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase objects
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a dictionary of the 'ms_unprocessed' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_parameters", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_parameters", "kind": "function", "doc": "

    Imports the parameters from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with parameters.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase
    • \n
    • object with a dictionary of the 'parameters' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_mass_features", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_mass_features", "kind": "function", "doc": "

    Imports the mass features from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with mass features.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a dictionary of the 'mass_features' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_eics", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_eics", "kind": "function", "doc": "

    Imports the extracted ion chromatograms from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a dictionary of the 'eics' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_spectral_search_results", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_spectral_search_results", "kind": "function", "doc": "

    Imports the spectral search results from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with spectral search results.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a dictionary of the 'spectral_search_results' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_mass_spectra_obj", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_mass_spectra_obj", "kind": "function", "doc": "

    Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • load_raw (bool):\nIf True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True.
    • \n
    \n", "signature": "(\tself,\tload_raw=True) -> corems.mass_spectra.factory.lc_class.MassSpectraBase:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_lcms_obj", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_lcms_obj", "kind": "function", "doc": "

    Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • load_raw (bool):\nIf True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    • \n
    • use_original_parser (bool):\nIf True, use the original parser to populate the LCMS object. Default is True.
    • \n
    • raw_file_path (str):\nThe location of the raw file to parse if attempting to use original parser.\nDefault is None, which attempts to get the raw file path from the HDF5 file.\nIf the original file path has moved, this parameter can be used to specify the new location.
    • \n
    \n", "signature": "(\tself,\tload_raw=True,\tuse_original_parser=True,\traw_file_path=None) -> corems.mass_spectra.factory.lc_class.LCMSBase:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.add_original_parser", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.add_original_parser", "kind": "function", "doc": "

    Add the original parser to the mass spectra object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (MassSpectraBase | LCMSBase):\nThe MassSpectraBase or LCMSBase object to add the original parser to.
    • \n
    • raw_file_path (str):\nThe location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra, raw_file_path=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList", "modulename": "corems.mass_spectra.input.massList", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText", "kind": "class", "doc": "

    Class for reading CoreMS mass spectra from a text file.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str, pathlib.Path, or s3path.S3Path):\nPath object from pathlib containing the file location
    • \n
    • analyzer (str, optional):\nName of the analyzer, by default 'Unknown'
    • \n
    • instrument_label (str, optional):\nLabel of the instrument, by default 'Unknown'
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • lcms (LCMSBase):\nLCMSBase object for storing the mass spectra data.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_scans_filename(). Get the filenames of all the scan files associated with the CoreMS file.
    • \n
    • set_filepath_datatype_and_delimiter(file_path_obj). Set the file path, data type, and delimiter based on the file path object.
    • \n
    • import_mass_spectra(). Import the mass spectra from the scan files and add them to the LCMSBase object.
    • \n
    • run(). Run the import_mass_spectra method to create the LCMSBase object.
    • \n
    • get_lcms_obj(). Get the LCMSBase object.
    • \n
    \n", "bases": "corems.mass_spectrum.input.massList.ReadCoremsMasslist, threading.Thread"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.__init__", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(file_location, analyzer='Unknown', instrument_label='Unknown')"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.lcms", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.lcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.get_scans_filename", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.get_scans_filename", "kind": "function", "doc": "

    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.set_filepath_datatype_and_delimiter", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.set_filepath_datatype_and_delimiter", "kind": "function", "doc": "

    \n", "signature": "(self, file_path_obj) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.import_mass_spectra", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.import_mass_spectra", "kind": "function", "doc": "

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.run", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.run", "kind": "function", "doc": "

    Creates the LCMS object and imports mass spectra.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.get_lcms_obj", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.get_lcms_obj", "kind": "function", "doc": "

    Returns the LCMSBase object associated with the massList.

    \n\n

    If the LCMSBase object is already initialized, it is returned.\nOtherwise, an exception is raised.

    \n\n

    Raises:\n Exception: If the LCMSBase object is not initialized.

    \n", "signature": "(self) -> corems.mass_spectra.factory.lc_class.LCMSBase:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml", "modulename": "corems.mass_spectra.input.mzml", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser", "kind": "class", "doc": "

    A class for parsing mzml spectrometry data files into MassSpectraBase or LCMSBase objects

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe path to the RAW file to be parsed.
    • \n
    • analyzer (str, optional):\nThe type of mass analyzer used in the instrument. Default is \"Unknown\".
    • \n
    • instrument_label (str, optional):\nThe name of the instrument used to acquire the data. Default is \"Unknown\".
    • \n
    • sample_name (str, optional):\nThe name of the sample being analyzed. If not provided, the stem of the file_location path will be used.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (Path):\nThe path to the RAW file being parsed.
    • \n
    • analyzer (str):\nThe type of mass analyzer used in the instrument.
    • \n
    • instrument_label (str):\nThe name of the instrument used to acquire the data.
    • \n
    • sample_name (str):\nThe name of the sample being analyzed.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • load().\nLoad mzML file using pymzml.run.Reader and return the data as a numpy array.
    • \n
    • run(spectra=True).\nParses the mzml file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe.
    • \n
    • get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True)\nParses the mzml file and returns a MassSpecBase object from a single scan.
    • \n
    • get_mass_spectra_obj().\nParses the mzml file and instantiates a MassSpectraBase object.
    • \n
    • get_lcms_obj().\nParses the mzml file and instantiates an LCMSBase object.
    • \n
    \n\n

    Inherits from ThermoBaseClass and SpectraParserInterface

    \n", "bases": "corems.mass_spectra.input.parserbase.SpectraParserInterface"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.__init__", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None)"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.file_location", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.analyzer", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.instrument_label", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.load", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.load", "kind": "function", "doc": "

    Load mzML file using pymzml.run.Reader and return the data as a numpy array.

    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The mass spectra data as a numpy array.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_scan_df", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_scan_df", "kind": "function", "doc": "

    Return scan data as a pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • data (pymzml.run.Reader):\nThe mass spectra data.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    • \n
    \n", "signature": "(self, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_ms_raw", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_ms_raw", "kind": "function", "doc": "

    Return a dictionary of mass spectra data as a pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • spectra (str):\nWhich mass spectra data to include in the output. \nOptions: None, \"ms1\", \"ms2\", \"all\".
    • \n
    • scan_df (pandas.DataFrame):\nScan dataframe. Output from get_scan_df().
    • \n
    • data (pymzml.run.Reader):\nThe mass spectra data.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary containing the mass spectra data as pandas DataFrames, with keys corresponding to the MS level.
    • \n
    \n", "signature": "(self, spectra, scan_df, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.run", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.run", "kind": "function", "doc": "

    Parse the mzML file and return a dictionary of spectra dataframes and a scan metadata dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • spectra (str, optional):\nWhich mass spectra data to include in the output. Default is \"all\".\nOther options: None, \"ms1\", \"ms2\".
    • \n
    • scan_df (pandas.DataFrame, optional):\nScan dataframe. If not provided, the scan dataframe is created from the mzML file.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing two elements:\n
        \n
      • A dictionary containing the mass spectra data as numpy arrays, with keys corresponding to the MS level.
      • \n
      • A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
      • \n
    • \n
    \n", "signature": "(self, spectra='all', scan_df=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_mass_spectrum_from_scan", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_mass_spectrum_from_scan", "kind": "function", "doc": "

    Instatiate a mass spectrum object from the mzML file.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int):\nThe scan number to be parsed.
    • \n
    • spectrum_mode (str):\nThe type of spectrum to instantiate. Must be'profile' or 'centroid'.
    • \n
    • polarity (int):\nThe polarity of the scan. Must be -1 or 1.
    • \n
    • auto_process (bool, optional):\nIf True, process the mass spectrum. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile | MassSpecCentroid: The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    • \n
    \n", "signature": "(self, scan_number, spectrum_mode, auto_process=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_mass_spectra_obj", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_mass_spectra_obj", "kind": "function", "doc": "

    Instatiate a MassSpectraBase object from the mzML file.

    \n\n
    Returns
    \n\n
      \n
    • MassSpectraBase: The MassSpectra object containing the parsed mass spectra.
      \nThe object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_lcms_obj", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_lcms_obj", "kind": "function", "doc": "

    Instatiates a LCMSBase object from the mzML file.

    \n\n
    Parameters
    \n\n
      \n
    • spectra (str, optional):\nWhich mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • LCMSBase: LCMS object containing mass spectra data. \nThe object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe, \nand mz dataframe(s), as well as lists of scan numbers, retention times, and TICs.
    • \n
    \n", "signature": "(self, spectra='all'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase", "modulename": "corems.mass_spectra.input.parserbase", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface", "kind": "class", "doc": "

    Interface for parsing mass spectra data into MassSpectraBase objects.

    \n\n
    Methods
    \n\n
      \n
    • load().\nLoad mass spectra data.
    • \n
    • run().\nParse mass spectra data.
    • \n
    • get_mass_spectra_obj().\nReturn MassSpectraBase object with several attributes populated
    • \n
    • get_mass_spectrum_from_scan(scan_number).\nReturn MassSpecBase data object from scan number.
    • \n
    \n\n
    Notes
    \n\n

    This is an abstract class and should not be instantiated directly.

    \n", "bases": "abc.ABC"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.load", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.load", "kind": "function", "doc": "

    Load mass spectra data.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.run", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.run", "kind": "function", "doc": "

    Parse mass spectra data.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.get_scan_df", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.get_scan_df", "kind": "function", "doc": "

    Return scan data as a pandas DataFrame.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.get_ms_raw", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.get_ms_raw", "kind": "function", "doc": "

    Return a dictionary of mass spectra data as a pandas DataFrame.

    \n", "signature": "(self, spectra, scan_df):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.get_mass_spectra_obj", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.get_mass_spectra_obj", "kind": "function", "doc": "

    Return mass spectra data object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.get_mass_spectrum_from_scan", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.get_mass_spectrum_from_scan", "kind": "function", "doc": "

    Return mass spectrum data object from scan number.

    \n", "signature": "(self, scan_number, spectrum_mode, auto_process=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader", "modulename": "corems.mass_spectra.input.rawFileReader", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass", "kind": "class", "doc": "

    Class for parsing Thermo Raw files and extracting information from them.

    \n\n

    Parameters:

    \n\n

    file_location : str or pathlib.Path or s3path.S3Path\n Thermo Raw file path or S3 path.

    \n\n

    Attributes:

    \n\n

    file_path : str or pathlib.Path or s3path.S3Path\n The file path of the Thermo Raw file.\nparameters : LCMSParameters\n The LCMS parameters for the Thermo Raw file.\nchromatogram_settings : LiquidChromatographSetting\n The chromatogram settings for the Thermo Raw file.\nscans : list or tuple\n The selected scans for the Thermo Raw file.\nstart_scan : int\n The starting scan number for the Thermo Raw file.\nend_scan : int\n The ending scan number for the Thermo Raw file.

    \n\n

    Methods:

    \n\n
      \n
    • set_msordertype(scanFilter, mstype: str = 'ms1') -> scanFilter\nConvert the user-passed MS Type string to a Thermo MSOrderType object.
    • \n
    • get_creation_time() -> datetime.datetime\nExtract the creation date stamp from the .RAW file and return it as a formatted datetime object.
    • \n
    • remove_temp_file()\nRemove the temporary file if the path is from S3Path.
    • \n
    • get_polarity_mode(scan_number: int) -> int\nGet the polarity mode for the given scan number.
    • \n
    • get_filter_for_scan_num(scan_number: int) -> List[str]\nGet the filter for the given scan number.
    • \n
    • check_full_scan(scan_number: int) -> bool\nCheck if the given scan number is a full scan.
    • \n
    • get_all_filters() -> Tuple[Dict[int, str], List[str]]\nGet all scan filters for the Thermo Raw file.
    • \n
    • get_scan_header(scan: int) -> Dict[str, Any]\nGet the full dictionary of scan header metadata for the given scan number.
    • \n
    • get_rt_time_from_trace(trace) -> Tuple[List[float], List[float], List[int]]\nGet the retention time, intensity, and scan number from the given trace.
    • \n
    • get_eics(target_mzs: List[float], tic_data: Dict[str, Any], ms_type: str = 'MS !d',\n peak_detection: bool = True, smooth: bool = True, plot: bool = False,\n ax: Optional[matplotlib.axes.Axes] = None, legend: bool = False) -> Tuple[Dict[float, EIC_Data], matplotlib.axes.Axes]\nGet the extracted ion chromatograms (EICs) for the target m/z values.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.__init__", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.__init__", "kind": "function", "doc": "

    file_location: srt pathlib.Path or s3path.S3Path\nThermo Raw file path

    \n", "signature": "(file_location)"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.iRawDataPlus", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.iRawDataPlus", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.res", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.res", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.file_path", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.file_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.iFileHeader", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.iFileHeader", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.parameters", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.parameters", "kind": "variable", "doc": "

    Get or set the LCMSParameters object.

    \n", "annotation": ": corems.encapsulation.factory.parameters.LCMSParameters"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.chromatogram_settings", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.chromatogram_settings", "kind": "variable", "doc": "

    Get or set the LiquidChromatographSetting object.

    \n", "annotation": ": corems.encapsulation.factory.processingSetting.LiquidChromatographSetting"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.scans", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.scans", "kind": "variable", "doc": "

    scans : list or tuple\nIf list uses Thermo AverageScansInScanRange for selected scans, ortherwise uses Thermo AverageScans for a scan range

    \n", "annotation": ": list | tuple"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.start_scan", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.start_scan", "kind": "variable", "doc": "

    Get the starting scan number for the Thermo Raw file.

    \n", "annotation": ": int"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.end_scan", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.end_scan", "kind": "variable", "doc": "

    Get the ending scan number for the Thermo Raw file.

    \n", "annotation": ": int"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.set_msordertype", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.set_msordertype", "kind": "function", "doc": "

    Function to convert user passed string MS Type to Thermo MSOrderType object\nLimited to MS1 through MS10.

    \n\n

    Parameters:

    \n\n

    scanFilter : Thermo.ScanFilter\n The scan filter object.\nmstype : str, optional\n The MS Type string, by default 'ms1'

    \n", "signature": "(self, scanFilter, mstype: str = 'ms1'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_creation_time", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_creation_time", "kind": "function", "doc": "

    Extract the creation date stamp from the .RAW file\nReturn formatted creation date stamp.

    \n", "signature": "(self) -> datetime.datetime:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.remove_temp_file", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.remove_temp_file", "kind": "function", "doc": "

    if the path is from S3Path data cannot be serialized to io.ByteStream and\na temporary copy is stored at the temp dir\nuse this function only at the end of your execution scrip\nsome LCMS class methods depend on this file

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_polarity_mode", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_polarity_mode", "kind": "function", "doc": "

    Get the polarity mode for the given scan number.

    \n\n

    Parameters:

    \n\n

    scan_number : int\n The scan number.

    \n\n

    Raises:

    \n\n

    Exception\n If the polarity mode is unknown.

    \n", "signature": "(self, scan_number: int) -> int:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_filter_for_scan_num", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_filter_for_scan_num", "kind": "function", "doc": "

    Returns the closest matching run time that corresponds to scan_number for the current\ncontroller. This function is only supported for MS device controllers.\ne.g. ['FTMS', '-', 'p', 'NSI', 'Full', 'ms', '[200.00-1000.00]']

    \n\n

    Parameters:

    \n\n

    scan_number : int\n The scan number.

    \n", "signature": "(self, scan_number: int) -> System.Collections.Generic.List[String]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.check_full_scan", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.check_full_scan", "kind": "function", "doc": "

    \n", "signature": "(self, scan_number: int) -> bool:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_all_filters", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_all_filters", "kind": "function", "doc": "

    Get all scan filters.\nThis function is only supported for MS device controllers.\ne.g. ['FTMS', '-', 'p', 'NSI', 'Full', 'ms', '[200.00-1000.00]']

    \n", "signature": "(self) -> Tuple[Dict[int, str], System.Collections.Generic.List[String]]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_scan_header", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_scan_header", "kind": "function", "doc": "

    Get full dictionary of scan header meta data, i.e. AGC status, ion injection time, etc.

    \n\n

    Parameters:

    \n\n

    scan : int\n The scan number.

    \n", "signature": "(self, scan: int) -> Dict[str, Any]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_rt_time_from_trace", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_rt_time_from_trace", "kind": "function", "doc": "

    trace: ThermoFisher.CommonCore.Data.Business.ChromatogramSignal

    \n", "signature": "(\ttrace) -> Tuple[System.Collections.Generic.List[Double], System.Collections.Generic.List[Double], System.Collections.Generic.List[Int32]]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_eics", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_eics", "kind": "function", "doc": "

    ms_type: str ('MS', MS2')\nstart_scan: int default -1 will select the lowest available\nend_scan: int default -1 will select the highest available

    \n\n

    returns:

    \n\n
    chroma: dict{target_mz: EIC_Data(\n                            Scans: [int]\n                                original thermo scan numbers\n                            Time: [floats]\n                                list of retention times\n                            TIC: [floats]\n                                total ion chromatogram\n                            Apexes: [int]\n                                original thermo apex scan number after peak picking\n                            )\n
    \n", "signature": "(\tself,\ttarget_mzs: System.Collections.Generic.List[Double],\ttic_data: Dict[str, Any],\tms_type='MS !d',\tpeak_detection=True,\tsmooth=True,\tplot=False,\tax: Optional[matplotlib.axes._axes.Axes] = None,\tlegend=False) -> Tuple[Dict[float, corems.mass_spectra.factory.chromat_data.EIC_Data], matplotlib.axes._axes.Axes]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_tic", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_tic", "kind": "function", "doc": "

    ms_type: str ('MS !d', 'MS2', None)\n if you use None you get all scans.\npeak_detection: bool\nsmooth: bool\nplot: bool\nax: matplotlib axis object\ntrace_type: str ('TIC','BPC')

    \n\n

    returns:\n chroma: dict\n {\n Scan: [int]\n original thermo scan numberMS\n Time: [floats]\n list of retention times\n TIC: [floats]\n total ion chromatogram\n Apexes: [int]\n original thermo apex scan number after peak picking\n }

    \n", "signature": "(\tself,\tms_type='MS !d',\tpeak_detection=True,\tsmooth=True,\tplot=False,\tax=None,\ttrace_type='TIC') -> Tuple[corems.mass_spectra.factory.chromat_data.TIC_Data, matplotlib.axes._axes.Axes]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_average_mass_spectrum", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_average_mass_spectrum", "kind": "function", "doc": "

    Averages mass spectra over a scan range using Thermo's AverageScansInScanRange method\nor a scan list using Thermo's AverageScans method\nspectrum_mode: str\n centroid or profile mass spectrum\nauto_process: bool\n If true performs peak picking, and noise threshold calculation after creation of mass spectrum object\nms_type: str\n String of form 'ms1' or 'ms2' or 'MS3' etc. Valid up to MS10.\n Internal function converts to Thermo MSOrderType class.

    \n", "signature": "(\tself,\tspectrum_mode: str = 'profile',\tauto_process: bool = True,\tppm_tolerance: float = 5.0,\tms_type: str = 'MS1') -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile | corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.set_metadata", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.set_metadata", "kind": "function", "doc": "

    Collect metadata to be ingested in the mass spectrum object

    \n\n

    scans_list: list[int] or false\nlastScanNumber: int\nfirstScanNumber: int

    \n", "signature": "(\tself,\tfirstScanNumber=0,\tlastScanNumber=0,\tscans_list=False,\tlabel='Thermo_Profile'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_centroid_msms_data", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_centroid_msms_data", "kind": "function", "doc": "

    Deprecated since version 2.0:\nThis function will be removed in CoreMS 2.0. Please use get_average_mass_spectrum() instead for similar functionality.

    \n", "signature": "(self, scan):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_average_mass_spectrum_by_scanlist", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_average_mass_spectrum_by_scanlist", "kind": "function", "doc": "

    Averages selected scans mass spectra using Thermo's AverageScans method\nscans_list: list[int]\nauto_process: bool\n If true performs peak picking, and noise threshold calculation after creation of mass spectrum object\nReturns:\n MassSpecProfile

    \n\n

    Deprecated since version 2.0.

    \n\n

    This function will be removed in CoreMS 2.0. Please use get_average_mass_spectrum() instead for similar functionality.

    \n", "signature": "(\tself,\tscans_list: System.Collections.Generic.List[Int32],\tauto_process: bool = True,\tppm_tolerance: float = 5.0) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader", "kind": "class", "doc": "

    A class for parsing Thermo RAW mass spectrometry data files and instatiating MassSpectraBase or LCMSBase objects

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe path to the RAW file to be parsed.
    • \n
    • analyzer (str, optional):\nThe type of mass analyzer used in the instrument. Default is \"Unknown\".
    • \n
    • instrument_label (str, optional):\nThe name of the instrument used to acquire the data. Default is \"Unknown\".
    • \n
    • sample_name (str, optional):\nThe name of the sample being analyzed. If not provided, the stem of the file_location path will be used.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (Path):\nThe path to the RAW file being parsed.
    • \n
    • analyzer (str):\nThe type of mass analyzer used in the instrument.
    • \n
    • instrument_label (str):\nThe name of the instrument used to acquire the data.
    • \n
    • sample_name (str):\nThe name of the sample being analyzed.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run(spectra=True).\nParses the RAW file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe.
    • \n
    • get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True)\nParses the RAW file and returns a MassSpecBase object from a single scan.
    • \n
    • get_mass_spectra_obj().\nParses the RAW file and instantiates a MassSpectraBase object.
    • \n
    • get_lcms_obj().\nParses the RAW file and instantiates an LCMSBase object.
    • \n
    • get_icr_transient_times().\nReturn a list for transient time targets for all scans, or selected scans range
    • \n
    \n\n

    Inherits from ThermoBaseClass and SpectraParserInterface

    \n", "bases": "ThermoBaseClass, corems.mass_spectra.input.parserbase.SpectraParserInterface"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.__init__", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.__init__", "kind": "function", "doc": "

    file_location: srt pathlib.Path or s3path.S3Path\nThermo Raw file path

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None)"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.file_location", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.analyzer", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.instrument_label", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.load", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.load", "kind": "function", "doc": "

    Load mass spectra data.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_scan_df", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_scan_df", "kind": "function", "doc": "

    Return scan data as a pandas DataFrame.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_ms_raw", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_ms_raw", "kind": "function", "doc": "

    Return a dictionary of mass spectra data as a pandas DataFrame.

    \n", "signature": "(self, spectra, scan_df):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.run", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.run", "kind": "function", "doc": "

    Extracts mass spectra data from a raw file.

    \n\n
    Parameters
    \n\n
      \n
    • spectra (str, optional):\nWhich mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2.
    • \n
    • scan_df (pandas.DataFrame, optional):\nScan dataframe. If not provided, the scan dataframe is created from the mzML file.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing two elements:\n
        \n
      • A dictionary containing mass spectra data, separated by MS level.
      • \n
      • A pandas DataFrame containing scan information, including scan number, scan time, TIC, MS level,\nscan text, scan window lower and upper bounds, polarity, and precursor m/z (if applicable).
      • \n
    • \n
    \n", "signature": "(self, spectra='all', scan_df=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_mass_spectrum_from_scan", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_mass_spectrum_from_scan", "kind": "function", "doc": "

    Instatiate a MassSpecBase object from a single scan number from the binary file, currently only supports profile mode.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int):\nThe scan number to extract the mass spectrum from.
    • \n
    • polarity (int):\nThe polarity of the scan. 1 for positive mode, -1 for negative mode.
    • \n
    • spectrum_mode (str):\nThe type of mass spectrum to extract. Must be 'profile' or 'centroid'.
    • \n
    • auto_process (bool, optional):\nIf True, perform peak picking and noise threshold calculation after creating the mass spectrum object. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile | MassSpecCentroid: The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    • \n
    \n", "signature": "(self, scan_number, spectrum_mode, auto_process=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_mass_spectra_obj", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_mass_spectra_obj", "kind": "function", "doc": "

    Instatiate a MassSpectraBase object from the binary data file file.

    \n\n
    Returns
    \n\n
      \n
    • MassSpectraBase: The MassSpectra object containing the parsed mass spectra. The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_lcms_obj", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_lcms_obj", "kind": "function", "doc": "

    Instatiates a LCMSBase object from the mzML file.

    \n\n
    Parameters
    \n\n
      \n
    • verbose (bool, optional):\nIf True, print progress messages. Default is True.
    • \n
    • spectra (str, optional):\nWhich mass spectra data to include in the output. Default is \"all\". Other options: \"none\", \"ms1\", \"ms2\".
    • \n
    \n\n
    Returns
    \n\n
      \n
    • LCMSBase: LCMS object containing mass spectra data. The object is instatiated with the file location, analyzer, instrument, sample name, scan info, mz dataframe (as specifified), polarity, as well as the attributes holding the scans, retention times, and tics.
    • \n
    \n", "signature": "(self, spectra='all'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_icr_transient_times", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_icr_transient_times", "kind": "function", "doc": "

    Return a list for transient time targets for all scans, or selected scans range

    \n\n
    Notes
    \n\n

    Resolving Power and Transient time targets based on 7T FT-ICR MS system

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output", "modulename": "corems.mass_spectra.output", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export", "modulename": "corems.mass_spectra.output.export", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.ion_type_dict", "modulename": "corems.mass_spectra.output.export", "qualname": "ion_type_dict", "kind": "variable", "doc": "

    \n", "default_value": "{'M+': [{}, {}], 'protonated': [{'H': 1}, {}], '[M+H]+': [{'H': 1}, {}], '[M+NH4]+': [{'N': 1, 'H': 4}, {}], '[M+Na]+': [{'Na': 1}, {}], '[M+K]+': [{'K': 1}, {}], '[M+2Na+Cl]+': [{'Na': 2, 'Cl': 1}, {}], '[M+2Na-H]+': [{'Na': 2}, {'H': 1}], '[M+C2H3Na2O2]+': [{'C': 2, 'H': 3, 'Na': 2, 'O': 2}, {}], '[M+C4H10N3]+': [{'C': 4, 'H': 10, 'N': 3}, {}], '[M+NH4+ACN]+': [{'C': 2, 'H': 7, 'N': 2}, {}], '[M+H-H2O]+': [{}, {'H': 1, 'O': 1}], 'de-protonated': [{}, {'H': 1}], '[M-H]-': [{}, {'H': 1}], '[M+Cl]-': [{'Cl': 1}, {}], '[M+HCOO]-': [{'C': 1, 'H': 1, 'O': 2}, {}], '[M+CH3COO]-': [{'C': 2, 'H': 3, 'O': 2}, {}], '[M+2NaAc+Cl]-': [{'Na': 2, 'C': 2, 'H': 3, 'O': 2, 'Cl': 1}, {}], '[M+K-2H]-': [{'K': 1}, {'H': 2}], '[M+Na-2H]-': [{'Na': 1}, {'H': 2}]}"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport", "kind": "class", "doc": "

    A class to export low resolution GC-MS data.

    \n\n

    This class provides methods to export low resolution GC-MS data to various formats such as Excel, CSV, HDF5, and Pandas DataFrame.

    \n\n

    Parameters:

    \n\n

    out_file_path : str\n The output file path.\ngcms : object\n The low resolution GCMS object.

    \n\n

    Attributes:

    \n\n

    output_file : Path\n The output file path as a Path object.\ngcms : object\n The low resolution GCMS object.

    \n\n

    Methods:

    \n\n
      \n
    • get_pandas_df(id_label=\"corems:\"). Get the exported data as a Pandas DataFrame.
    • \n
    • get_json(nan=False, id_label=\"corems:\"). Get the exported data as a JSON string.
    • \n
    • to_pandas(write_metadata=True, id_label=\"corems:\"). Export the data to a Pandas DataFrame and save it as a pickle file.
    • \n
    • to_excel(write_mode='a', write_metadata=True, id_label=\"corems:\"),\nExport the data to an Excel file.
    • \n
    • to_csv(separate_output=False, write_mode=\"w\", write_metadata=True, id_label=\"corems:\").\nExport the data to a CSV file.
    • \n
    • to_hdf(id_label=\"corems:\").\nExport the data to an HDF5 file.
    • \n
    • get_data_stats(gcms).\nGet statistics about the GCMS data.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.__init__", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.__init__", "kind": "function", "doc": "

    \n", "signature": "(out_file_path, gcms)"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.output_file", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.output_file", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.gcms", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.gcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_pandas_df", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_pandas_df", "kind": "function", "doc": "

    Get the exported data as a Pandas DataFrame.

    \n\n

    Parameters:

    \n\n

    id_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n\n

    Returns:

    \n\n

    DataFrame\n The exported data as a Pandas DataFrame.

    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_json", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_json", "kind": "function", "doc": "

    Get the exported data as a JSON string.

    \n\n

    Parameters:

    \n\n

    nan : bool, optional\n Whether to include NaN values in the JSON string. Default is False.\nid_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n", "signature": "(self, nan=False, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.to_pandas", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.to_pandas", "kind": "function", "doc": "

    Export the data to a Pandas DataFrame and save it as a pickle file.

    \n\n

    Parameters:

    \n\n

    write_metadata : bool, optional\n Whether to write metadata to the output file.\nid_label : str, optional\n The ID label for the data.

    \n", "signature": "(self, write_metadata=True, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.to_excel", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.to_excel", "kind": "function", "doc": "

    Export the data to an Excel file.

    \n\n

    Parameters:

    \n\n

    write_mode : str, optional\n The write mode for the Excel file. Default is 'a' (append).\nwrite_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.\nid_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n", "signature": "(self, write_mode='a', write_metadata=True, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.to_csv", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.to_csv", "kind": "function", "doc": "

    Export the data to a CSV file.

    \n\n

    Parameters:

    \n\n

    separate_output : bool, optional\n Whether to separate the output into multiple files. Default is False.\nwrite_mode : str, optional\n The write mode for the CSV file. Default is 'w' (write).\nwrite_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.\nid_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n", "signature": "(\tself,\tseparate_output=False,\twrite_mode='w',\twrite_metadata=True,\tid_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.to_hdf", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.to_hdf", "kind": "function", "doc": "

    Export the data to an HDF5 file.

    \n\n

    Parameters:

    \n\n

    id_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_data_stats", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_data_stats", "kind": "function", "doc": "

    Get statistics about the GCMS data.

    \n\n

    Parameters:

    \n\n

    gcms : object\n The low resolution GCMS object.

    \n\n

    Returns:

    \n\n

    dict\n A dictionary containing the data statistics.

    \n", "signature": "(self, gcms):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_calibration_stats", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_calibration_stats", "kind": "function", "doc": "

    Get statistics about the GC-MS calibration.

    \n\n

    Parameters:

    \n", "signature": "(self, gcms, id_label):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_blank_stats", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_blank_stats", "kind": "function", "doc": "

    Get statistics about the GC-MS blank.

    \n", "signature": "(self, gcms):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_instrument_metadata", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_instrument_metadata", "kind": "function", "doc": "

    Get metadata about the GC-MS instrument.

    \n", "signature": "(self, gcms):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_data_metadata", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_data_metadata", "kind": "function", "doc": "

    Get metadata about the GC-MS data.

    \n\n

    Parameters:

    \n\n

    gcms : object\n The low resolution GCMS object.\nid_label : str\n The ID label for the data.\noutput_path : str\n The output file path.

    \n\n

    Returns:

    \n\n

    dict\n A dictionary containing the data metadata.

    \n", "signature": "(self, gcms, id_label, output_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_parameters_json", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_parameters_json", "kind": "function", "doc": "

    Get the parameters as a JSON string.

    \n\n

    Parameters:

    \n\n

    gcms : GCMS object\n The low resolution GCMS object.\nid_label : str\n The ID label for the data.\noutput_path : str\n The output file path.

    \n\n

    Returns:

    \n\n

    str\n The parameters as a JSON string.

    \n", "signature": "(self, gcms, id_label, output_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.write_settings", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.write_settings", "kind": "function", "doc": "

    Write the settings to a JSON file.

    \n\n

    Parameters:

    \n\n

    output_path : str\n The output file path.\ngcms : GCMS object\n The low resolution GCMS object.\nid_label : str\n The ID label for the data. Default is \"emsl:\".

    \n", "signature": "(self, output_path, gcms, id_label='emsl:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_list_dict_data", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_list_dict_data", "kind": "function", "doc": "

    Get the exported data as a list of dictionaries.

    \n\n

    Parameters:

    \n\n

    gcms : object\n The low resolution GCMS object.\ninclude_no_match : bool, optional\n Whether to include no match data. Default is True.\nno_match_inline : bool, optional\n Whether to include no match data inline. Default is False.

    \n\n

    Returns:

    \n\n

    list\n The exported data as a list of dictionaries.

    \n", "signature": "(self, gcms, include_no_match=True, no_match_inline=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport", "kind": "class", "doc": "

    A class to export high resolution mass spectra data.

    \n\n

    This class provides methods to export high resolution mass spectra data to various formats\nsuch as Excel, CSV, HDF5, and Pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str | Path):\nThe output file path.
    • \n
    • mass_spectra (object):\nThe high resolution mass spectra object.
    • \n
    • output_type (str, optional):\nThe output type. Default is 'excel'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • output_file (Path):\nThe output file path without suffix
    • \n
    • dir_loc (Path):\nThe directory location for the output file,\nby default this will be the output_file + \".corems\" and all output files will be\nwritten into this location
    • \n
    • mass_spectra (MassSpectraBase):\nThe high resolution mass spectra object.
    • \n
    \n", "bases": "corems.mass_spectrum.output.export.HighResMassSpecExport"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.__init__", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(out_file_path, mass_spectra, output_type='excel')"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.dir_loc", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.dir_loc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.output_file", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.output_file", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.mass_spectra", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.mass_spectra", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.atoms_order_list", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.atoms_order_list", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.get_pandas_df", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.get_pandas_df", "kind": "function", "doc": "

    Get the mass spectra as a list of Pandas DataFrames.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.to_pandas", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.to_pandas", "kind": "function", "doc": "

    Export the data to a Pandas DataFrame and save it as a pickle file.

    \n\n

    Parameters:

    \n\n

    write_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.

    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.to_excel", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.to_excel", "kind": "function", "doc": "

    Export the data to an Excel file.

    \n\n

    Parameters:

    \n\n

    write_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.

    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.to_csv", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.to_csv", "kind": "function", "doc": "

    Export the data to a CSV file.

    \n\n

    Parameters:

    \n\n

    write_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.

    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.get_mass_spectra_attrs", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.get_mass_spectra_attrs", "kind": "function", "doc": "

    Get the mass spectra attributes as a JSON string.

    \n\n

    Parameters:

    \n\n

    mass_spectra : object\n The high resolution mass spectra object.

    \n\n

    Returns:

    \n\n

    str\n The mass spectra attributes as a JSON string.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.to_hdf", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.to_hdf", "kind": "function", "doc": "

    Export the data to an HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • overwrite (bool, optional):\nWhether to overwrite the output file. Default is False.
    • \n
    • export_raw (bool, optional):\nWhether to export the raw mass spectra data. Default is True.
    • \n
    \n", "signature": "(self, overwrite=False, export_raw=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LCMSExport", "modulename": "corems.mass_spectra.output.export", "qualname": "LCMSExport", "kind": "class", "doc": "

    A class to export high resolution LC-MS data.

    \n\n

    This class provides methods to export high resolution LC-MS data to HDF5.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str | Path):\nThe output file path, do not include the file extension.
    • \n
    • lcms_object (LCMSBase):\nThe high resolution lc-ms object.
    • \n
    \n", "bases": "HighResMassSpectraExport"}, {"fullname": "corems.mass_spectra.output.export.LCMSExport.__init__", "modulename": "corems.mass_spectra.output.export", "qualname": "LCMSExport.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(out_file_path, mass_spectra)"}, {"fullname": "corems.mass_spectra.output.export.LCMSExport.to_hdf", "modulename": "corems.mass_spectra.output.export", "qualname": "LCMSExport.to_hdf", "kind": "function", "doc": "

    Export the data to an HDF5.

    \n\n
    Parameters
    \n\n
      \n
    • overwrite (bool, optional):\nWhether to overwrite the output file. Default is False.
    • \n
    • save_parameters (bool, optional):\nWhether to save the parameters as a separate json or toml file. Default is True.
    • \n
    • parameter_format (str, optional):\nThe format to save the parameters in. Default is 'toml'.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If parameter_format is not 'json' or 'toml'.
    • \n
    \n", "signature": "(self, overwrite=False, save_parameters=True, parameter_format='toml'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport", "kind": "class", "doc": "

    A class to export lipidomics data.

    \n\n

    This class provides methods to export lipidomics data to various formats and summarize the lipid report.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str | Path):\nThe output file path, do not include the file extension.
    • \n
    • mass_spectra (object):\nThe high resolution mass spectra object.
    • \n
    \n", "bases": "LCMSExport"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.__init__", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(out_file_path, mass_spectra)"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.ion_type_dict", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.ion_type_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.get_ion_formula", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.get_ion_formula", "kind": "function", "doc": "

    From a neutral formula and an ion type, return the formula of the ion.

    \n\n
    Notes
    \n\n

    This is a static method.\nIf the neutral_formula is not a string, this method will return None.

    \n\n
    Parameters
    \n\n
      \n
    • neutral_formula (str):\nThe neutral formula, this should be a string form from the MolecularFormula class\n(e.g. 'C2 H4 O2', isotopes OK), or simple string (e.g. 'C2H4O2', no isotope handling in this case).\nIn the case of a simple string, the atoms are parsed based on the presence of capital letters,\ne.g. MgCl2 is parsed as 'Mg Cl2.
    • \n
    • ion_type (str):\nThe ion type, e.g. 'protonated', '[M+H]+', '[M+Na]+', etc.\nSee the self.ion_type_dict for the available ion types.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: The formula of the ion as a string (like 'C2 H4 O2'); or None if the neutral_formula is not a string.
    • \n
    \n", "signature": "(neutral_formula, ion_type):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.get_isotope_type", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.get_isotope_type", "kind": "function", "doc": "

    From an ion formula, return the 13C isotope type of the ion.

    \n\n
    Notes
    \n\n

    This is a static method.\nIf the ion_formula is not a string, this method will return None.\nThis is currently only functional for 13C isotopes.

    \n\n
    Parameters
    \n\n
      \n
    • ion_formula (str):\nThe formula of the ion, expected to be a string like 'C2 H4 O2'.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: The isotope type of the ion, e.g. '13C1', '13C2', etc; or None if the ion_formula does not contain a 13C isotope.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the ion_formula is not a string.
    • \n
    \n", "signature": "(ion_formula):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.clean_ms1_report", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.clean_ms1_report", "kind": "function", "doc": "

    Clean the MS1 report.

    \n\n
    Parameters
    \n\n
      \n
    • ms1_summary_full (DataFrame):\nThe full MS1 summary DataFrame.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The cleaned MS1 summary DataFrame.
    • \n
    \n", "signature": "(self, ms1_summary_full):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.summarize_lipid_report", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.summarize_lipid_report", "kind": "function", "doc": "

    Summarize the lipid report.

    \n\n
    Parameters
    \n\n
      \n
    • ms2_annot (DataFrame):\nThe MS2 annotation DataFrame with all annotations.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The summarized lipid report.
    • \n
    \n", "signature": "(self, ms2_annot):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.clean_ms2_report", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.clean_ms2_report", "kind": "function", "doc": "

    Clean the MS2 report.

    \n\n
    Parameters
    \n\n
      \n
    • lipid_summary (DataFrame):\nThe full lipid summary DataFrame.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The cleaned lipid summary DataFrame.
    • \n
    \n", "signature": "(self, lipid_summary):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.to_report", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.to_report", "kind": "function", "doc": "

    Create a report of the mass features and their annotations.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_metadata (dict, optional):\nThe molecular metadata. Default is None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The report of the mass features and their annotations.
    • \n
    \n\n
    Notes
    \n\n

    The report will contain the mass features and their annotations from MS1 and MS2 (if available).

    \n", "signature": "(self, molecular_metadata=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.report_to_csv", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.report_to_csv", "kind": "function", "doc": "

    Create a report of the mass features and their annotations and save it as a CSV file.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_metadata (dict, optional):\nThe molecular metadata. Default is None.
    • \n
    \n", "signature": "(self, molecular_metadata=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum", "modulename": "corems.mass_spectrum", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc", "modulename": "corems.mass_spectrum.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "kind": "module", "doc": "

    Created on March 23 2023

    \n\n

    @author: Will Kew

    \n\n

    Modules for automatic mass internal recalibration

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration", "kind": "class", "doc": "

    This class is designed for high resolution (FTICR, Orbitrap) data of complex mixture, e.g. Organic matter

    \n\n

    The tool first does a broad mass range search for the most commonly expected ion type (i.e. CHO, deprotonated - for negative ESI)\nAnd then the assigned data mass error distribution is searched, with a gaussian fit to the most prominent range. \nThis tool works when the data are of sufficient quality, and not outwith the typical expected range of the mass analyzer\nIt presumes the mean error is out by 0-several ppm, but that the spread of error values is modest (<2ppm)

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nCoreMS mass spectrum object
    • \n
    • plot (bool, optional):\nWhether to plot the error distribution. The default is False.
    • \n
    • docker (bool, optional):\nWhether to use the docker database. The default is True. If not, it uses a dynamically generated sqlite database.
    • \n
    • ppmFWHMprior (float, optional):\nThe FWHM of the prior distribution (ppm). The default is 3.
    • \n
    • ppmRangeprior (float, optional):\nThe range of the prior distribution (ppm). The default is 15.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • determine_error_boundaries(). Determine the error boundaries for recalibration space.
    • \n
    \n\n
    Notes
    \n\n

    This initialisation function creates a copy of the MassSpectrum object to avoid over-writing assignments. \nPossible future task is to make the base class copyable.

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.__init__", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmass_spectrum,\tplot: bool = False,\tdocker: bool = True,\tppmFWHMprior: float = 3,\tppmRangeprior: float = 15)"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.mass_spectrum", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.plot", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.plot", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.docker", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.docker", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.ppmFWHMprior", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.ppmFWHMprior", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.ppmRangeprior", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.ppmRangeprior", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.set_uncal_settings", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.set_uncal_settings", "kind": "function", "doc": "

    Set uncalibrated formula search settings

    \n\n

    This function serves the uncalibrated data (hence broad error tolerance)\nIt only allows CHO formula in deprotonated ion type- as most common for SRFA ESI negative mode

    \n\n

    This will not work for positive mode data, or for other ion types, or other expected elemental searches.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.positive_search_settings", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.positive_search_settings", "kind": "function", "doc": "

    Set the positive mode elemental search settings

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.get_error_range", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.get_error_range", "kind": "function", "doc": "

    Get the error range from the error distribution

    \n\n

    Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species.

    \n\n
    Parameters
    \n\n
      \n
    • errors (list):\nlist of the errors of the assigned species (ppm)
    • \n
    • ppmFWHMprior (float, optional):\nThe FWHM of the prior distribution (ppm). The default is 3.
    • \n
    • plot_logic (bool, optional):\nWhether to plot the error distribution. The default is False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mean_error (float):\nmean mass error of the Gaussian distribution (ppm)
    • \n
    • fwhm_error (float):\nfull width half max of the gaussian error distribution (ppm)
    • \n
    • ppm_thresh (list):\nrecommended thresholds for the recalibration parameters (ppm)\nConsists of [mean_error-fwhm_error,mean_error+fwhm_error]
    • \n
    \n", "signature": "(errors: list, ppmFWHMprior: float = 3, plot_logic: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.determine_error_boundaries", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.determine_error_boundaries", "kind": "function", "doc": "

    Determine the error boundaries for recalibration space

    \n\n

    This is the main function in this class\nSets the Molecular Formulas search settings, performs the initial formula search\nConverts the data to a dataframe, and gets the error range\nReturns the error thresholds.

    \n\n
    Returns
    \n\n
      \n
    • mean_error (float):\nmean mass error of the Gaussian distribution (ppm)
    • \n
    • fwhm_error (float):\nfull width half max of the gaussian error distribution (ppm)
    • \n
    • ppm_thresh (list):\nrecommended thresholds for the recalibration parameters (ppm)\nConsists of [mean_error-fwhm_error,mean_error+fwhm_error]
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration", "modulename": "corems.mass_spectrum.calc.Calibration", "kind": "module", "doc": "

    Created on Wed May 13 02:16:09 2020

    \n\n

    @author: Will Kew

    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration", "kind": "class", "doc": "

    MzDomainCalibration class for recalibrating mass spectra

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (CoreMS MassSpectrum Object):\nThe mass spectrum to be calibrated.
    • \n
    • ref_masslist (str):\nThe path to a reference mass list.
    • \n
    • mzsegment (tuple of floats, optional):\nThe mz range to recalibrate, or None. Used for calibration of specific parts of the mz domain at a time.\nFuture work - allow multiple mzsegments to be passed.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum (CoreMS MassSpectrum Object):\nThe mass spectrum to be calibrated.
    • \n
    • mzsegment (tuple of floats or None):\nThe mz range to recalibrate, or None.
    • \n
    • ref_mass_list_path (str or Path):\nThe path to the reference mass list.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run(). \nMain function to run this class.
    • \n
    • load_ref_mass_list(). \nLoad reference mass list (Bruker format).
    • \n
    • gen_ref_mass_list_from_assigned(min_conf=0.7). \nGenerate reference mass list from assigned masses.
    • \n
    • find_calibration_points(df_ref, calib_ppm_error_threshold=(-1, 1), calib_snr_threshold=5). \nFind calibration points in the mass spectrum based on the reference mass list.
    • \n
    • robust_calib(param, cal_peaks_mz, cal_refs_mz, order=1). \nRecalibration function.
    • \n
    • recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=1, diagnostic=False). \nMain recalibration function which uses a robust linear regression.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.__init__", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.__init__", "kind": "function", "doc": "

    \n", "signature": "(mass_spectrum, ref_masslist, mzsegment=None)"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.mass_spectrum", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.mzsegment", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.mzsegment", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.ref_mass_list_path", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.ref_mass_list_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.load_ref_mass_list", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.load_ref_mass_list", "kind": "function", "doc": "

    Load reference mass list (Bruker format)

    \n\n

    Loads in a reference mass list from a .ref file\nNote that some versions of Bruker's software produce .ref files with a different format. \nAs such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format. \nCoreMS includes an example .ref file with the correct format for reference.

    \n\n
    Returns
    \n\n
      \n
    • df_ref (Pandas DataFrame):\nreference mass list object.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.gen_ref_mass_list_from_assigned", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.gen_ref_mass_list_from_assigned", "kind": "function", "doc": "

    Generate reference mass list from assigned masses

    \n\n

    This function will generate a ref mass dataframe object from an assigned corems mass spec obj\nusing assigned masses above a certain minimum confidence threshold.

    \n\n

    This function needs to be retested and check it is covered in the unit tests.

    \n\n
    Parameters
    \n\n
      \n
    • min_conf (float, optional):\nminimum confidence score. The default is 0.7.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • df_ref (Pandas DataFrame):\nreference mass list - based on calculated masses.
    • \n
    \n", "signature": "(self, min_conf: float = 0.7):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.find_calibration_points", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.find_calibration_points", "kind": "function", "doc": "

    Function to find calibration points in the mass spectrum

    \n\n

    Based on the reference mass list.

    \n\n
    Parameters
    \n\n
      \n
    • df_ref (Pandas DataFrame):\nreference mass list for recalibration.
    • \n
    • calib_ppm_error_threshold (tuple of floats, optional):\nppm error for finding calibration masses in the spectrum. The default is -1,1.\nNote: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6. \n Some software does this the other way around and value signs must be inverted for that to work.
    • \n
    • calib_snr_threshold (float, optional):\nsnr threshold for finding calibration masses in the spectrum. The default is 5.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • cal_peaks_mz (list of floats):\nmasses of measured ions to use in calibration routine
    • \n
    • cal_refs_mz (list of floats):\nreference mz values of found calibration points.
    • \n
    \n", "signature": "(\tself,\tdf_ref,\tcalib_ppm_error_threshold: tuple[float, float] = (-1, 1),\tcalib_snr_threshold: float = 5,\tcalibration_ref_match_method: str = 'legacy',\tcalibration_ref_match_tolerance: float = 0.003,\tcalibration_ref_match_std_raw_error_limit: float = 1.5):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.robust_calib", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.robust_calib", "kind": "function", "doc": "

    Recalibration function

    \n\n

    Computes the rms of m/z errors to minimize when calibrating.\nThis is adapted from from spike.

    \n\n
    Parameters
    \n\n
      \n
    • param (list of floats):\ngenerated by minimize function from scipy optimize.
    • \n
    • cal_peaks_mz (list of floats):\nmasses of measured peaks to use in mass calibration.
    • \n
    • cal_peaks_mz (list of floats):\nreference mz values of found calibration points.
    • \n
    • order (int, optional):\norder of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • rmserror (float):\nroot mean square mass error for calibration points.
    • \n
    \n", "signature": "(\tself,\tparam: list[float],\tcal_peaks_mz: list[float],\tcal_refs_mz: list[float],\torder: int = 1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.recalibrate_mass_spectrum", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.recalibrate_mass_spectrum", "kind": "function", "doc": "

    Main recalibration function which uses a robust linear regression

    \n\n

    This function performs the recalibration of the mass spectrum object. \nIt iteratively applies

    \n\n
    Parameters
    \n\n
      \n
    • cal_peaks_mz (list of float):\nmasses of measured peaks to use in mass calibration.
    • \n
    • cal_refs_mz (list of float):\nreference mz values of found calibration points.
    • \n
    • order (int, optional):\norder of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mass_spectrum (CoreMS mass spectrum object):\nCalibrated mass spectrum object
    • \n
    \n\n
    Notes
    \n\n

    This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method.

    \n\n
    References
    \n\n
      \n
    1. Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A. \nSPIKE a Processing Software dedicated to Fourier Spectroscopies \nhttps://arxiv.org/abs/1608.06777 (2016)
    2. \n
    3. SPIKE - https://github.com/spike-project/spike
    4. \n
    \n", "signature": "(\tself,\tcal_peaks_mz: list[float],\tcal_refs_mz: list[float],\torder: int = 1,\tdiagnostic: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.run", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.run", "kind": "function", "doc": "

    Run the calibration routine

    \n\n

    This function runs the calibration routine.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration", "kind": "class", "doc": "

    Frequency Domain Calibration class for mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • selected_mass_peaks (list):\nList of selected mass peaks.
    • \n
    • include_isotopologue (bool, optional):\nFlag to include isotopologues, by default False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mz_exp (ndarray):\nArray of experimental m/z values.
    • \n
    • mz_calc (ndarray):\nArray of calculated m/z values.
    • \n
    • freq_exp (ndarray):\nArray of experimental frequencies.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • freq_exp_ms (ndarray):\nArray of experimental frequencies for mass spectrum.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • recal_mass_spec(mz_domain, Aterm, Bterm, Cterm). \nRecalibrate the mass spectrum with the given parameters.
    • \n
    • linear(). \nPerform linear calibration.
    • \n
    • quadratic(iteration=False). \nPerform quadratic calibration.
    • \n
    • ledford_calibration(iteration=False). \nPerform Ledford calibration.
    • \n
    • step_fit(steps=4).
      \nPerform step fit calibration.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.__init__", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.__init__", "kind": "function", "doc": "

    \n", "signature": "(mass_spectrum, selected_mass_peaks, include_isotopologue=False)"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.selected_mspeaks", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.selected_mspeaks", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.mz_exp", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.mz_exp", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.mz_calc", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.mz_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.freq_exp", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.freq_exp", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.mass_spectrum", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.freq_exp_ms", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.freq_exp_ms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.recal_mass_spec", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.recal_mass_spec", "kind": "function", "doc": "

    Recalibrate the mass spectrum with the given parameters.

    \n\n
    Parameters
    \n\n
      \n
    • mz_domain (ndarray):\nArray of m/z values for recalibration.
    • \n
    • Aterm (float):\nAterm parameter for recalibration.
    • \n
    • Bterm (float):\nBterm parameter for recalibration.
    • \n
    • Cterm (float):\nCterm parameter for recalibration.
    • \n
    \n", "signature": "(self, mz_domain, Aterm, Bterm, Cterm):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.linear", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.linear", "kind": "function", "doc": "

    Perform linear calibration.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.quadratic", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.quadratic", "kind": "function", "doc": "

    Perform quadratic calibration.

    \n\n
    Parameters
    \n\n
      \n
    • iteration (bool, optional):\nFlag to perform iterative calibration, by default False.
    • \n
    \n", "signature": "(self, iteration: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.ledford_calibration", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.ledford_calibration", "kind": "function", "doc": "

    Perform Ledford calibration.

    \n\n
    Parameters
    \n\n
      \n
    • iteration (bool, optional):\nFlag to perform iterative calibration, by default False.
    • \n
    \n", "signature": "(self, iteration: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.step_fit", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.step_fit", "kind": "function", "doc": "

    Perform step fit calibration.

    \n\n
    Parameters
    \n\n
      \n
    • steps (int, optional):\nNumber of steps for step fit calibration, by default 4.
    • \n
    \n", "signature": "(self, steps: int = 4):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping", "kind": "class", "doc": "

    Class for Kendrick grouping of mass spectra.

    \n\n
    Methods
    \n\n
      \n
    • mz_odd_even_index_lists(). \nGet odd and even indexes lists.
    • \n
    • calc_error(current, test). \nCalculate the error between two values.
    • \n
    • populate_kendrick_index_dict_error(list_indexes, sort=True). \nPopulate the Kendrick index dictionary based on error.
    • \n
    • populate_kendrick_index_dict_rounding(list_indexes, sort=True). \nPopulate the Kendrick index dictionary based on rounding.
    • \n
    • sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index). \nSort the Kendrick index dictionary based on abundance.
    • \n
    • kendrick_groups_indexes(sort=True). \nGet the Kendrick groups indexes dictionary.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.mz_odd_even_index_lists", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.mz_odd_even_index_lists", "kind": "function", "doc": "

    Get odd and even indexes lists.

    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the lists of even and odd indexes.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.calc_error", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.calc_error", "kind": "function", "doc": "

    Calculate the error between two values.

    \n\n
    Parameters
    \n\n
      \n
    • current (float):\nThe current value.
    • \n
    • test (float):\nThe test value.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The calculated error.
    • \n
    \n", "signature": "(self, current: float, test: float):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.populate_kendrick_index_dict_error", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.populate_kendrick_index_dict_error", "kind": "function", "doc": "

    Populate the Kendrick index dictionary based on error.

    \n\n
    Parameters
    \n\n
      \n
    • list_indexes (list):\nThe list of indexes.
    • \n
    • sort (bool, optional):\nWhether to sort the dictionary by abundance (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The Kendrick index dictionary.
    • \n
    \n", "signature": "(self, list_indexes: list, sort: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.populate_kendrick_index_dict_rounding", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.populate_kendrick_index_dict_rounding", "kind": "function", "doc": "

    Populate the Kendrick index dictionary based on rounding.

    \n\n
    Parameters
    \n\n
      \n
    • list_indexes (list):\nThe list of indexes.
    • \n
    • sort (bool, optional):\nWhether to sort the dictionary by abundance (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The Kendrick index dictionary.
    • \n
    \n", "signature": "(self, list_indexes: list, sort: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.sort_abundance_kendrick_dict", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.sort_abundance_kendrick_dict", "kind": "function", "doc": "

    Sort the Kendrick index dictionary based on abundance.

    \n\n
    Parameters
    \n\n
      \n
    • even_kendrick_group_index (dict):\nThe Kendrick index dictionary for even indexes.
    • \n
    • odd_kendrick_group_index (dict):\nThe Kendrick index dictionary for odd indexes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The sorted Kendrick index dictionary.
    • \n
    \n", "signature": "(\tself,\teven_kendrick_group_index: dict,\todd_kendrick_group_index: dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.kendrick_groups_indexes", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.kendrick_groups_indexes", "kind": "function", "doc": "

    Get the Kendrick groups indexes dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • sort (bool, optional):\nWhether to sort the dictionary by abundance (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The Kendrick groups indexes dictionary.
    • \n
    \n", "signature": "(self, sort: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction", "kind": "class", "doc": "

    Class for mass error prediction.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (list):\nList of mass spectrum objects.
    • \n
    • mz_overlay (int, optional):\nThe mz overlay value for peak simulation. Default is 10.
    • \n
    • rp_increments (int, optional):\nThe resolving power increments for peak simulation. Default is 10000.
    • \n
    • base_line_target (float, optional):\nThe target value for the baseline resolution. Default is 0.01.
    • \n
    • max_interation (int, optional):\nThe maximum number of iterations for peak simulation. Default is 1000.
    • \n
    • interpolation (str, optional):\nThe interpolation method for missing data. Default is 'linear'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum_obj (list):\nList of mass spectrum objects.
    • \n
    • mz_overlay (int):\nThe mz overlay value for peak simulation.
    • \n
    • rp_increments (int):\nThe resolving power increments for peak simulation.
    • \n
    • base_line_target (float):\nThe target value for the baseline resolution.
    • \n
    • max_interation (int):\nThe maximum number of iterations for peak simulation.
    • \n
    • df (DataFrame or None):\nThe calculated error distribution dataframe.
    • \n
    • interpolation (str):\nThe interpolation method for missing data.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run().\nRuns the mass error prediction calculation.
    • \n
    • get_results().\nReturns the calculated error distribution dataframe.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.__init__", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(\tmass_spectrum,\tmz_overlay=10,\trp_increments=10000,\tbase_line_target: float = 0.01,\tmax_interation=1000,\tinterpolation='linear')"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.mass_spectrum_obj", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.mass_spectrum_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.mz_overlay", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.mz_overlay", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.rp_increments", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.rp_increments", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.base_line_target", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.base_line_target", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.max_interation", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.max_interation", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.df", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.df", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.interpolation", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.interpolation", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.run", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.run", "kind": "function", "doc": "

    Runs the mass error prediction calculation.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.get_results", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.get_results", "kind": "function", "doc": "

    Returns the calculated error distribution dataframe.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.calc_error_dist", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.calc_error_dist", "kind": "function", "doc": "

    Calculate the error distribution.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.sum_data", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.sum_data", "kind": "function", "doc": "

    Sum the abundances of the simulated peaks.

    \n\n
    Parameters
    \n\n
      \n
    • tuple_mz_abun_list (tuple):\nA tuple containing the mz and abundance lists.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the summed mz and abundance lists.
    • \n
    \n", "signature": "(self, tuple_mz_abun_list: tuple):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.calc_error", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.calc_error", "kind": "function", "doc": "

    Calculate the error between two values.

    \n\n
    Parameters
    \n\n
      \n
    • mass_ref (float):\nThe reference value.
    • \n
    • mass_sim (float):\nThe simulated value.
    • \n
    • factor (float):\nThe factor to multiply the error by.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The calculated error.
    • \n
    \n", "signature": "(self, mass_ref, mass_sim, factor):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.find_peak_apex", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.find_peak_apex", "kind": "function", "doc": "

    Find the peak apex.

    \n\n
    Parameters
    \n\n
      \n
    • mz (array):\nThe mz array.
    • \n
    • abund (array):\nThe abundance array.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the peak apex mass and abundance.
    • \n
    \n", "signature": "(self, mz, abund):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.find_peak_valley", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.find_peak_valley", "kind": "function", "doc": "

    Find the peak valley.

    \n\n
    Parameters
    \n\n
      \n
    • mz (array):\nThe mz array.
    • \n
    • abund (array):\nThe abundance array.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the peak valley mz and abundance.
    • \n
    \n", "signature": "(self, mz, abund):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc", "kind": "class", "doc": "

    Class for Mass Spectrum Calculations

    \n\n

    Class including numerical calculations related to mass spectrum class\nInherited PeakPicking and NoiseThresholdCalc ensuring its methods are \navailable to the instantiated mass spectrum class object

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nCoreMS mass spectrum object
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • All Attributes are derivative from the MassSpecBase Class
    • \n
    \n\n
    Methods
    \n\n
      \n
    • check_mspeaks(). \nCheck if the mspeaks attribute is populated
    • \n
    • sort_by_abundance(). \nSort the mspeaks by abundance
    • \n
    • percentile_assigned(report_error=False). \nCalculate the percentage of assigned peaks
    • \n
    • resolving_power_calc(B, T). \nCalculate the resolving power
    • \n
    • number_average_molecular_weight(profile=False). \nCalculate the number average molecular weight
    • \n
    • weight_average_molecular_weight(profile=False). \nCalculate the weight average molecular weight
    • \n
    \n", "bases": "corems.mass_spectrum.calc.PeakPicking.PeakPicking, corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc.percentile_assigned", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc.percentile_assigned", "kind": "function", "doc": "

    Percentage of peaks which are assigned

    \n\n
    Parameters
    \n\n
      \n
    • report_error (bool, optional):\nReport the error of the assigned peaks. Default is False.
    • \n
    \n", "signature": "(self, report_error: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc.resolving_power_calc", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc.resolving_power_calc", "kind": "function", "doc": "

    Calculate the theoretical resolving power

    \n\n

    Calls on the MSPeak object function to calculate the resolving power of a peak, this calcs for all peaks in a spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • T (float):\ntransient time
    • \n
    • B (float):\nMagnetic Filed Strength (Tesla)
    • \n
    \n\n
    References
    \n\n
      \n
    1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)\nDOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    2. \n
    \n", "signature": "(self, B: float, T: float):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc.number_average_molecular_weight", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc.number_average_molecular_weight", "kind": "function", "doc": "

    Average molecular weight calculation

    \n\n
    Parameters
    \n\n
      \n
    • profile (bool, optional):\nis data profile or centroid mode. The default is False (e.g. Centroid data)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The average molecular weight.
    • \n
    \n", "signature": "(self, profile: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc.weight_average_molecular_weight", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc.weight_average_molecular_weight", "kind": "function", "doc": "

    Weighted Average molecular weight calculation

    \n\n
    Returns
    \n\n
      \n
    • float: The weight average molecular weight.
    • \n
    \n", "signature": "(self, profile: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "kind": "module", "doc": "

    Created on June 2nd 2023

    \n\n

    @author: Will Kew

    \n\n

    Module for mean resolving power filtration\nBased upon the work in:

    \n\n

    Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. \nFourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. \nRapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940

    \n\n

    Calculates a m/z normalised resolving power, fits a gaussian distribution to this, and then filters out peaks which are outside of the user defined number of standard deviations

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter", "kind": "class", "doc": "

    Class for for mean resolving power filtration.

    \n\n

    This module implements a mean resolving power filter based on the work described [1]

    \n\n

    The MeanResolvingPowerFilter class provides methods to calculate the m/z normalized resolving power, fit a Gaussian distribution to it, and filter out peaks that are outside of the user-defined number of standard deviations.

    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum (object) (The mass spectrum object.):

    • \n
    • ndeviations (int) (The number of standard deviations used for filtering.):

    • \n
    • plot (bool) (Flag indicating whether to plot the results.):

    • \n
    • guess_pars (bool) (Flag indicating whether to guess the parameters for the Gaussian model.):

    • \n
    \n\n
    Methods
    \n\n
      \n
    • extract_peaks(): Extracts the peaks from the mass spectrum.
    • \n
    • normalise_rps(tmpdf_ms): Normalizes the resolving powers to be independent of m/z.
    • \n
    • calculate_distribution(tmpdf_ms): Calculates the distribution of the resolving powers.
    • \n
    • create_index_list_to_remove(tmpdf_ms, rps_thresh): Creates an index list of peaks to remove based on the calculated thresholds.
    • \n
    • main(): Executes the main filtering process and returns the index list of peaks to remove.
    • \n
    \n\n
    References
    \n\n
      \n
    1. Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P. \nFourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry. \nRapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940
    2. \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.__init__", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmass_spectrum,\tndeviations: float = 3,\tplot: bool = False,\tguess_pars: bool = False)"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.mass_spectrum", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.plot", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.plot", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.ndeviations", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.ndeviations", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.guess_pars", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.guess_pars", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.extract_peaks", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.extract_peaks", "kind": "function", "doc": "

    Extracts the peaks from the mass spectrum.

    \n\n
    Returns
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame containing the extracted peaks.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.normalise_rps", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.normalise_rps", "kind": "function", "doc": "

    Normalizes the resolving powers to be independent of m/z.

    \n\n
    Parameters
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame containing the extracted peaks.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame with the resolving powers normalized.
    • \n
    \n", "signature": "(self, tmpdf_ms):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.calculate_distribution", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.calculate_distribution", "kind": "function", "doc": "

    Calculates the distribution of the resolving powers.

    \n\n
    Parameters
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame containing the extracted peaks with normalized resolving powers.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • rps_thresh (list):\nA list of the calculated thresholds for filtering.
    • \n
    \n", "signature": "(self, tmpdf_ms):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.create_index_list_to_remove", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.create_index_list_to_remove", "kind": "function", "doc": "

    Creates an index list of peaks to remove based on the calculated thresholds.

    \n\n
    Parameters
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame containing the extracted peaks with normalized resolving powers.
    • \n
    • rps_thresh (list):\nA list of the calculated thresholds for filtering.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • index_to_keep (list):\nA list of indices of peaks to keep.
    • \n
    \n", "signature": "(self, tmpdf_ms, rps_thresh: list):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.main", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.main", "kind": "function", "doc": "

    Executes the main filtering process and returns the index list of peaks to remove.

    \n\n
    Returns
    \n\n
      \n
    • index_to_remove (list):\nA list of indices of peaks to remove.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc", "kind": "class", "doc": "

    Class for noise threshold calculation.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • settings (MSParameters):\nThe mass spectrum parameters object.
    • \n
    • is_centroid (bool):\nFlag indicating whether the mass spectrum is centroid or profile.
    • \n
    • baseline_noise (float):\nThe baseline noise.
    • \n
    • baseline_noise_std (float):\nThe baseline noise standard deviation.
    • \n
    • max_signal_to_noise (float):\nThe maximum signal to noise.
    • \n
    • max_abundance (float):\nThe maximum abundance.
    • \n
    • abundance (np.array):\nThe abundance array.
    • \n
    • abundance_profile (np.array):\nThe abundance profile array.
    • \n
    • mz_exp (np.array):\nThe experimental m/z array.
    • \n
    • mz_exp_profile (np.array):\nThe experimental m/z profile array.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • None
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_noise_threshold(). Get the noise threshold.
    • \n
    • cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    • \n
    • get_noise_average(ymincentroid). \nGet the average noise and standard deviation.
    • \n
    • get_abundance_minima_centroid(abun_cut)\nGet the abundance minima for centroid data.
    • \n
    • run_log_noise_threshold_calc(). \nRun the log noise threshold calculation.
    • \n
    • run_noise_threshold_calc(). \nRun the noise threshold calculation.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.get_noise_threshold", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.get_noise_threshold", "kind": "function", "doc": "

    Get the noise threshold.

    \n\n
    Returns
    \n\n
      \n
    • Tuple[Tuple[float, float], Tuple[float, float]]: A tuple containing the m/z and abundance noise thresholds.\n(min_mz, max_mz), (noise_threshold, noise_threshold)
    • \n
    \n", "signature": "(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.cut_mz_domain_noise", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.cut_mz_domain_noise", "kind": "function", "doc": "

    Cut the m/z domain to the noise threshold regions.

    \n\n
    Returns
    \n\n
      \n
    • Tuple[np.array, np.array]: A tuple containing the m/z and abundance arrays of the truncated spectrum region.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.get_noise_average", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.get_noise_average", "kind": "function", "doc": "

    Get the average noise and standard deviation.

    \n\n
    Parameters
    \n\n
      \n
    • ymincentroid (np.array):\nThe ymincentroid array.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Tuple[float, float]: A tuple containing the average noise and standard deviation.
    • \n
    \n", "signature": "(self, ymincentroid):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.get_abundance_minima_centroid", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.get_abundance_minima_centroid", "kind": "function", "doc": "

    Get the abundance minima for centroid data.

    \n\n
    Parameters
    \n\n
      \n
    • abun_cut (np.array):\nThe abundance cut array.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • np.array: The abundance minima array.
    • \n
    \n", "signature": "(self, abun_cut):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.run_log_noise_threshold_calc", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.run_log_noise_threshold_calc", "kind": "function", "doc": "

    Run the log noise threshold calculation.

    \n\n
    Returns
    \n\n
      \n
    • Tuple[float, float]: A tuple containing the average noise and standard deviation.
    • \n
    \n\n
    Notes
    \n\n

    Method for estimating the noise based on decimal log of all the data point

    \n\n

    Idea is that you calculate a histogram of of the log10(abundance) values. \nThe maximum of the histogram == the standard deviation of the noise.

    \n\n

    For aFT data it is a gaussian distribution of noise - not implemented here!\nFor mFT data it is a Rayleigh distribution, and the value is actually 10^(abu_max)*0.463.

    \n\n

    See the publication cited above for the derivation of this.

    \n\n
    References
    \n\n
      \n
    1. dx.doi.org/10.1021/ac403278t | Anal. Chem. 2014, 86, 3308\u22123316
    2. \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.run_noise_threshold_calc", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.run_noise_threshold_calc", "kind": "function", "doc": "

    Runs noise threshold calculation (not log based method)

    \n\n
    Returns
    \n\n
      \n
    • Tuple[float, float]: A tuple containing the average noise and standard deviation.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "kind": "module", "doc": "

    This code is for Bayesian estimation of the noise levels. \nIt is it not implemented or used in the current code base. \nThe packages it uses are not part of the requirements. \nIf you want to use it, you will need to install them manually.

    \n"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes.BayesNoiseCalc", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "qualname": "BayesNoiseCalc", "kind": "class", "doc": "

    Class for noise threshold calculation.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • settings (MSParameters):\nThe mass spectrum parameters object.
    • \n
    • is_centroid (bool):\nFlag indicating whether the mass spectrum is centroid or profile.
    • \n
    • baseline_noise (float):\nThe baseline noise.
    • \n
    • baseline_noise_std (float):\nThe baseline noise standard deviation.
    • \n
    • max_signal_to_noise (float):\nThe maximum signal to noise.
    • \n
    • max_abundance (float):\nThe maximum abundance.
    • \n
    • abundance (np.array):\nThe abundance array.
    • \n
    • abundance_profile (np.array):\nThe abundance profile array.
    • \n
    • mz_exp (np.array):\nThe experimental m/z array.
    • \n
    • mz_exp_profile (np.array):\nThe experimental m/z profile array.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • None
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_noise_threshold(). Get the noise threshold.
    • \n
    • cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    • \n
    • get_noise_average(ymincentroid). \nGet the average noise and standard deviation.
    • \n
    • get_abundance_minima_centroid(abun_cut)\nGet the abundance minima for centroid data.
    • \n
    • run_log_noise_threshold_calc(). \nRun the log noise threshold calculation.
    • \n
    • run_noise_threshold_calc(). \nRun the noise threshold calculation.
    • \n
    \n", "bases": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes.BayesNoiseCalc.from_posterior", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "qualname": "BayesNoiseCalc.from_posterior", "kind": "function", "doc": "

    Legacy code for Bayesian efforts - not used.

    \n\n

    pymc3 is not installed by default, \n if have plans to use it manual installation of pymc3 \n package before using this method is needed

    \n", "signature": "(self, param, samples):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes.BayesNoiseCalc.error_model_from_trace", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "qualname": "BayesNoiseCalc.error_model_from_trace", "kind": "function", "doc": "

    Legacy code for Bayesian efforts - not used.

    \n\n

    pymc3 is not installed by default, \n if have plans to use it manual installation of pymc3 \n package before using this method is needed

    \n", "signature": "(self, trace, ymincentroid):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes.BayesNoiseCalc.simple_model_error_dist", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "qualname": "BayesNoiseCalc.simple_model_error_dist", "kind": "function", "doc": "

    Legacy code for Bayesian efforts - not used.

    \n\n

    pymc3 is not installed by default, \n if have plans to use it manual installation of pymc3 \n package before using this method is needed

    \n", "signature": "(self, ymincentroid):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "kind": "module", "doc": "

    @author: Yuri E. Corilo\n@date: Jun 27, 2019

    \n"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking", "kind": "class", "doc": "

    Class for peak picking.

    \n\n
    Parameters
    \n\n
      \n
    • None
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • None
    • \n
    \n\n
    Methods
    \n\n
      \n
    • prepare_peak_picking_data().\nPrepare the mz, abundance, and frequence data for peak picking.
    • \n
    • cut_mz_domain_peak_picking().\nCut the m/z domain for peak picking.
    • \n
    • extrapolate_axes_for_pp(mz=None, abund=None, freq=None).\nExtrapolate the m/z axis and fill the abundance axis with 0s.
    • \n
    • do_peak_picking().\nPerform peak picking.
    • \n
    • find_minima(apex_index, abundance, len_abundance, right=True).\nFind the minima of a peak.
    • \n
    • linear_fit_calc(intes, massa, index_term, index_sign).\nAlgebraic solution to a linear fit.
    • \n
    • calculate_resolving_power(intes, massa, current_index).\nCalculate the resolving power of a peak.
    • \n
    • cal_minima(mass, abun).\nCalculate the minima of a peak.
    • \n
    • calc_centroid(mass, abund, freq).\nCalculate the centroid of a peak.
    • \n
    • get_threshold(intes).\nGet the intensity threshold for peak picking.
    • \n
    • algebraic_quadratic(list_mass, list_y).\nFind the apex of a peak - algebraically.
    • \n
    • find_apex_fit_quadratic(mass, abund, freq, current_index).\nFind the apex of a peak.
    • \n
    • check_prominence(abun, current_index, len_abundance, peak_height_diff).\nCheck the prominence of a peak.
    • \n
    • use_the_max(mass, abund, current_index, len_abundance, peak_height_diff).\nUse the max peak height as the centroid.
    • \n
    • calc_centroid_legacy(mass, abund, freq).\nLegacy centroid calculation. Deprecated - for deletion.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.prepare_peak_picking_data", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.prepare_peak_picking_data", "kind": "function", "doc": "

    Prepare the data for peak picking.

    \n\n

    This function will prepare the m/z, abundance, and frequency data for peak picking according to the settings.

    \n\n
    Returns
    \n\n
      \n
    • mz (ndarray):\nThe m/z axis.
    • \n
    • abundance (ndarray):\nThe abundance axis.
    • \n
    • freq (ndarray or None):\nThe frequency axis, if available.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.cut_mz_domain_peak_picking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.cut_mz_domain_peak_picking", "kind": "function", "doc": "

    Cut the m/z domain for peak picking.

    \n\n

    Simplified function

    \n\n
    Returns
    \n\n
      \n
    • mz_domain_X_low_cutoff (ndarray):\nThe m/z values within the specified range.
    • \n
    • mz_domain_low_Y_cutoff (ndarray):\nThe abundance values within the specified range.
    • \n
    • freq_domain_low_Y_cutoff (ndarray or None):\nThe frequency values within the specified range, if available.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.legacy_cut_mz_domain_peak_picking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.legacy_cut_mz_domain_peak_picking", "kind": "function", "doc": "

    Cut the m/z domain for peak picking.\nDEPRECATED

    \n\n
    Returns
    \n\n
      \n
    • mz_domain_X_low_cutoff (ndarray):\nThe m/z values within the specified range.
    • \n
    • mz_domain_low_Y_cutoff (ndarray):\nThe abundance values within the specified range.
    • \n
    • freq_domain_low_Y_cutoff (ndarray or None):\nThe frequency values within the specified range, if available.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.extrapolate_axis", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.extrapolate_axis", "kind": "function", "doc": "

    This function will extrapolate an input array in both directions by N pts.

    \n\n
    Parameters
    \n\n
      \n
    • initial_array (ndarray):\nThe input array.
    • \n
    • pts (int):\nThe number of points to extrapolate.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ndarray: The extrapolated array.
    • \n
    \n\n
    Notes
    \n\n

    This is a static method.

    \n", "signature": "(initial_array, pts):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.extrapolate_axes_for_pp", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.extrapolate_axes_for_pp", "kind": "function", "doc": "

    Extrapolate the m/z axis and fill the abundance axis with 0s.

    \n\n
    Parameters
    \n\n
      \n
    • mz (ndarray or None):\nThe m/z axis, if available. If None, the experimental m/z axis is used.
    • \n
    • abund (ndarray or None):\nThe abundance axis, if available. If None, the experimental abundance axis is used.
    • \n
    • freq (ndarray or None):\nThe frequency axis, if available. If None, the experimental frequency axis is used.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz (ndarray):\nThe extrapolated m/z axis.
    • \n
    • abund (ndarray):\nThe abundance axis with 0s filled.
    • \n
    • freq (ndarray or None):\nThe extrapolated frequency axis, if available.
    • \n
    \n\n
    Notes
    \n\n

    This function will extrapolate the mz axis by the number of datapoints specified in the settings,\nand fill the abundance axis with 0s. \nThis should prevent peak picking issues at the spectrum edge.

    \n", "signature": "(self, mz=None, abund=None, freq=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.do_peak_picking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.do_peak_picking", "kind": "function", "doc": "

    Perform peak picking.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.find_minima", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.find_minima", "kind": "function", "doc": "

    Find the minima of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • apex_index (int):\nThe index of the peak apex.
    • \n
    • abundance (ndarray):\nThe abundance values.
    • \n
    • len_abundance (int):\nThe length of the abundance array.
    • \n
    • right (bool, optional):\nFlag indicating whether to search for minima to the right of the apex (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The index of the minima.
    • \n
    \n", "signature": "(self, apex_index, abundance, len_abundance, right=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.linear_fit_calc", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.linear_fit_calc", "kind": "function", "doc": "

    Algebraic solution to a linear fit - roughly 25-50x faster than numpy polyfit when passing only two vals and doing a 1st order fit

    \n\n
    Parameters
    \n\n
      \n
    • intes (ndarray):\nThe intensity values.
    • \n
    • massa (ndarray):\nThe mass values.
    • \n
    • index_term (int):\nThe index of the current term.
    • \n
    • index_sign (str):\nThe index sign
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ndarray: The coefficients of the linear fit.
    • \n
    \n\n
    Notes
    \n\n

    This is a static method.

    \n", "signature": "(intes, massa, index_term, index_sign):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.calculate_resolving_power", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.calculate_resolving_power", "kind": "function", "doc": "

    Calculate the resolving power of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • intes (ndarray):\nThe intensity values.
    • \n
    • massa (ndarray):\nThe mass values.
    • \n
    • current_index (int):\nThe index of the current peak.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The resolving power of the peak.
    • \n
    \n\n
    Notes
    \n\n

    This is a conservative calculation of resolving power,\nthe peak need to be resolved at least at the half-maximum magnitude,\notherwise, the combined full width at half maximum is used to calculate resolving power.

    \n", "signature": "(self, intes, massa, current_index):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.cal_minima", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.cal_minima", "kind": "function", "doc": "

    Calculate the minima of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • mass (ndarray):\nThe mass values.
    • \n
    • abun (ndarray):\nThe abundance values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ndarray or None: The mass values at the minima, if found.
    • \n
    \n", "signature": "(self, mass, abun):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.calc_centroid", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.calc_centroid", "kind": "function", "doc": "

    Calculate the centroid of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • mass (ndarray):\nThe mass values.
    • \n
    • abund (ndarray):\nThe abundance values.
    • \n
    • freq (ndarray or None):\nThe frequency values, if available.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None
    • \n
    \n", "signature": "(self, mass, abund, freq):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.get_threshold", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.get_threshold", "kind": "function", "doc": "

    Get the intensity threshold for peak picking.

    \n\n
    Parameters
    \n\n
      \n
    • intes (ndarray):\nThe intensity values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The intensity threshold.
    • \n
    • float: The factor to multiply the intensity threshold by.
    • \n
    \n", "signature": "(self, intes):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.algebraic_quadratic", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.algebraic_quadratic", "kind": "function", "doc": "

    Find the apex of a peak - algebraically. \nFaster than using numpy polyfit by ~28x per fit.

    \n\n
    Parameters
    \n\n
      \n
    • list_mass (ndarray):\nlist of m/z values (3 points)
    • \n
    • list_y (ndarray):\nlist of abundance values (3 points)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • a, b, c (float):\ncoefficients of the quadratic equation.
    • \n
    \n\n
    Notes
    \n\n

    This is a static method.

    \n", "signature": "(list_mass, list_y):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.find_apex_fit_quadratic", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.find_apex_fit_quadratic", "kind": "function", "doc": "

    Find the apex of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • mass (ndarray):\nThe mass values.
    • \n
    • abund (ndarray):\nThe abundance values.
    • \n
    • freq (ndarray or None):\nThe frequency values, if available.
    • \n
    • current_index (int):\nThe index of the current peak.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The m/z value of the peak apex.
    • \n
    • float: The frequency value of the peak apex, if available.
    • \n
    • float: The abundance value of the peak apex.
    • \n
    \n", "signature": "(self, mass, abund, freq, current_index):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.check_prominence", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.check_prominence", "kind": "function", "doc": "

    Check the prominence of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • abun (ndarray):\nThe abundance values.
    • \n
    • current_index (int):\nThe index of the current peak.
    • \n
    • len_abundance (int):\nThe length of the abundance array.
    • \n
    • peak_height_diff (function):\nThe function to calculate the peak height difference.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple or False: A tuple containing the indexes of the peak, if the prominence is above the threshold.\nOtherwise, False.
    • \n
    \n", "signature": "(self, abun, current_index, len_abundance, peak_height_diff) -> tuple:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.use_the_max", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.use_the_max", "kind": "function", "doc": "

    Use the max peak height as the centroid

    \n\n
    Parameters
    \n\n
      \n
    • mass (ndarray):\nThe mass values.
    • \n
    • abund (ndarray):\nThe abundance values.
    • \n
    • current_index (int):\nThe index of the current peak.
    • \n
    • len_abundance (int):\nThe length of the abundance array.
    • \n
    • peak_height_diff (function):\nThe function to calculate the peak height difference.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The m/z value of the peak apex.
    • \n
    • float: The abundance value of the peak apex.
    • \n
    • tuple or None: A tuple containing the indexes of the peak, if the prominence is above the threshold.\nOtherwise, None.
    • \n
    \n", "signature": "(self, mass, abund, current_index, len_abundance, peak_height_diff):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.calc_centroid_legacy", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.calc_centroid_legacy", "kind": "function", "doc": "

    Legacy centroid calculation\nDeprecated - for deletion.

    \n", "signature": "(self, mass, abund, freq):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory", "modulename": "corems.mass_spectrum.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.overrides", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "overrides", "kind": "function", "doc": "

    Checks if the method overrides a method from an interface class.

    \n", "signature": "(interface_class):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase", "kind": "class", "doc": "

    A mass spectrum base class, stores the profile data and instrument settings.

    \n\n

    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.\n_mspeaks is populated under the hood by calling process_mass_spec method.\nIteration is null if _mspeaks is empty.

    \n\n
    Parameters
    \n\n
      \n
    • mz_exp (array_like):\nThe m/z values of the mass spectrum.
    • \n
    • abundance (array_like):\nThe abundance values of the mass spectrum.
    • \n
    • d_params (dict):\nA dictionary of parameters for the mass spectrum.
    • \n
    • **kwargs: Additional keyword arguments.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mspeaks (list):\nA list of mass peaks.
    • \n
    • is_calibrated (bool):\nWhether the mass spectrum is calibrated.
    • \n
    • is_centroid (bool):\nWhether the mass spectrum is centroided.
    • \n
    • has_frequency (bool):\nWhether the mass spectrum has a frequency domain.
    • \n
    • calibration_order (None or int):\nThe order of the mass spectrum's calibration.
    • \n
    • calibration_points (None or ndarray):\nThe calibration points of the mass spectrum.
    • \n
    • calibration_ref_mzs (None or ndarray):\nThe reference m/z values of the mass spectrum's calibration.
    • \n
    • calibration_meas_mzs (None or ndarray):\nThe measured m/z values of the mass spectrum's calibration.
    • \n
    • calibration_RMS (None or float):\nThe root mean square of the mass spectrum's calibration.
    • \n
    • calibration_segment (None or CalibrationSegment):\nThe calibration segment of the mass spectrum.
    • \n
    • _abundance (ndarray):\nThe abundance values of the mass spectrum.
    • \n
    • _mz_exp (ndarray):\nThe m/z values of the mass spectrum.
    • \n
    • _mspeaks (list):\nA list of mass peaks.
    • \n
    • _dict_nominal_masses_indexes (dict):\nA dictionary of nominal masses and their indexes.
    • \n
    • _baseline_noise (float):\nThe baseline noise of the mass spectrum.
    • \n
    • _baseline_noise_std (float):\nThe standard deviation of the baseline noise of the mass spectrum.
    • \n
    • _dynamic_range (float or None):\nThe dynamic range of the mass spectrum.
    • \n
    • _transient_settings (None or TransientSettings):\nThe transient settings of the mass spectrum.
    • \n
    • _frequency_domain (None or FrequencyDomain):\nThe frequency domain of the mass spectrum.
    • \n
    • _mz_cal_profile (None or MzCalibrationProfile):\nThe m/z calibration profile of the mass spectrum.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • process_mass_spec(). Main function to process the mass spectrum, \nincluding calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
    • \n
    \n\n

    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()

    \n", "bases": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc, corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(mz_exp, abundance, d_params, **kwargs)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mspeaks", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mspeaks", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.is_calibrated", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.is_calibrated", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.is_centroid", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.is_centroid", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.has_frequency", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.has_frequency", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_order", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_order", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_points", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_points", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_ref_mzs", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_ref_mzs", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_meas_mzs", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_meas_mzs", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_RMS", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_RMS", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_segment", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_segment", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_raw_error_median", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_raw_error_median", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_raw_error_stdev", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_raw_error_stdev", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.set_indexes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.set_indexes", "kind": "function", "doc": "

    Set the mass spectrum to iterate over only the selected MSpeaks indexes.

    \n\n
    Parameters
    \n\n
      \n
    • list_indexes (list of int):\nA list of integers representing the indexes of the MSpeaks to iterate over.
    • \n
    \n", "signature": "(self, list_indexes):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.reset_indexes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.reset_indexes", "kind": "function", "doc": "

    Reset the mass spectrum to iterate over all MSpeaks objects.

    \n\n

    This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.\nIt also sets the index of each MSpeak object to its corresponding position in the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.add_mspeak", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.add_mspeak", "kind": "function", "doc": "

    Add a new MSPeak object to the MassSpectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • ion_charge (int):\nThe ion charge of the MSPeak.
    • \n
    • mz_exp (float):\nThe experimental m/z value of the MSPeak.
    • \n
    • abundance (float):\nThe abundance of the MSPeak.
    • \n
    • resolving_power (float):\nThe resolving power of the MSPeak.
    • \n
    • signal_to_noise (float):\nThe signal-to-noise ratio of the MSPeak.
    • \n
    • massspec_indexes (list):\nA list of indexes of the MSPeak in the MassSpectrum object.
    • \n
    • exp_freq (float, optional):\nThe experimental frequency of the MSPeak. Defaults to None.
    • \n
    • ms_parent (MSParent, optional):\nThe MSParent object associated with the MSPeak. Defaults to None.
    • \n
    \n", "signature": "(\tself,\tion_charge,\tmz_exp,\tabundance,\tresolving_power,\tsignal_to_noise,\tmassspec_indexes,\texp_freq=None,\tms_parent=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.reset_cal_therms", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.reset_cal_therms", "kind": "function", "doc": "

    Reset calibration terms and recalculate the mass-to-charge ratio and abundance.

    \n\n
    Parameters
    \n\n
      \n
    • Aterm (float):\nThe A-term calibration coefficient.
    • \n
    • Bterm (float):\nThe B-term calibration coefficient.
    • \n
    • C (float):\nThe C-term calibration coefficient.
    • \n
    • fas (float, optional):\nThe frequency amplitude scaling factor. Default is 0.
    • \n
    \n", "signature": "(self, Aterm, Bterm, C, fas=0):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.clear_molecular_formulas", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.clear_molecular_formulas", "kind": "function", "doc": "

    Clear the molecular formulas for all mspeaks in the MassSpectrum.

    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.process_mass_spec", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.process_mass_spec", "kind": "function", "doc": "

    Process the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • keep_profile (bool, optional):\nWhether to keep the profile data after processing. Defaults to True.
    • \n
    \n\n
    Notes
    \n\n

    This method does the following:

    \n\n
      \n
    • calculates the noise threshold
    • \n
    • does peak picking (creates mspeak_objs)
    • \n
    • resets the mspeak_obj indexes
    • \n
    \n", "signature": "(self, keep_profile=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.cal_noise_threshold", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.cal_noise_threshold", "kind": "function", "doc": "

    Calculate the noise threshold of the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.parameters", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.parameters", "kind": "variable", "doc": "

    Return the parameters of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.set_parameter_from_json", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.set_parameter_from_json", "kind": "function", "doc": "

    Set the parameters of the mass spectrum from a JSON file.

    \n\n
    Parameters
    \n\n
      \n
    • parameters_path (str):\nThe path to the JSON file containing the parameters.
    • \n
    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.set_parameter_from_toml", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.set_parameter_from_toml", "kind": "function", "doc": "

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mspeaks_settings", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mspeaks_settings", "kind": "variable", "doc": "

    Return the MS peak settings of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.settings", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.settings", "kind": "variable", "doc": "

    Return the settings of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.molecular_search_settings", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.molecular_search_settings", "kind": "variable", "doc": "

    Return the molecular search settings of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_cal_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_cal_profile", "kind": "variable", "doc": "

    Return the calibrated m/z profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_cal", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_cal", "kind": "variable", "doc": "

    Return the calibrated m/z values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_exp", "kind": "variable", "doc": "

    Return the experimental m/z values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.freq_exp_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.freq_exp_profile", "kind": "variable", "doc": "

    Return the experimental frequency profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.freq_exp_pp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.freq_exp_pp", "kind": "variable", "doc": "

    Return the experimental frequency values of the mass spectrum that are used for peak picking.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_exp_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_exp_profile", "kind": "variable", "doc": "

    Return the experimental m/z profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_exp_pp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_exp_pp", "kind": "variable", "doc": "

    Return the experimental m/z values of the mass spectrum that are used for peak picking.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.abundance_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.abundance_profile", "kind": "variable", "doc": "

    Return the abundance profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.abundance_profile_pp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.abundance_profile_pp", "kind": "variable", "doc": "

    Return the abundance profile of the mass spectrum that is used for peak picking.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.abundance", "kind": "variable", "doc": "

    Return the abundance values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.freq_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.freq_exp", "kind": "function", "doc": "

    Return the experimental frequency values of the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.resolving_power", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.resolving_power", "kind": "variable", "doc": "

    Return the resolving power values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.signal_to_noise", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.signal_to_noise", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.nominal_mz", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.nominal_mz", "kind": "variable", "doc": "

    Return the nominal m/z values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.get_mz_and_abundance_peaks_tuples", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.get_mz_and_abundance_peaks_tuples", "kind": "function", "doc": "

    Return a list of tuples containing the m/z and abundance values of the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.kmd", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.kmd", "kind": "variable", "doc": "

    Return the Kendrick mass defect values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.kendrick_mass", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.kendrick_mass", "kind": "variable", "doc": "

    Return the Kendrick mass values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.max_mz_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.max_mz_exp", "kind": "variable", "doc": "

    Return the maximum experimental m/z value of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.min_mz_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.min_mz_exp", "kind": "variable", "doc": "

    Return the minimum experimental m/z value of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.max_abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.max_abundance", "kind": "variable", "doc": "

    Return the maximum abundance value of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.max_signal_to_noise", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.max_signal_to_noise", "kind": "variable", "doc": "

    Return the maximum signal-to-noise ratio of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.most_abundant_mspeak", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.most_abundant_mspeak", "kind": "variable", "doc": "

    Return the most abundant MSpeak object of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.min_abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.min_abundance", "kind": "variable", "doc": "

    Return the minimum abundance value of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.dynamic_range", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.dynamic_range", "kind": "variable", "doc": "

    Return the dynamic range of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.baseline_noise", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.baseline_noise", "kind": "variable", "doc": "

    Return the baseline noise of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.baseline_noise_std", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.baseline_noise_std", "kind": "variable", "doc": "

    Return the standard deviation of the baseline noise of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.Aterm", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.Aterm", "kind": "variable", "doc": "

    Return the A-term calibration coefficient of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.Bterm", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.Bterm", "kind": "variable", "doc": "

    Return the B-term calibration coefficient of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.Cterm", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.Cterm", "kind": "variable", "doc": "

    Return the C-term calibration coefficient of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filename", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filename", "kind": "variable", "doc": "

    Return the filename of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.dir_location", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.dir_location", "kind": "variable", "doc": "

    Return the directory location of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.sort_by_mz", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.sort_by_mz", "kind": "function", "doc": "

    Sort the mass spectrum by m/z values.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.sort_by_abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.sort_by_abundance", "kind": "function", "doc": "

    Sort the mass spectrum by abundance values.

    \n", "signature": "(self, reverse=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.tic", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.tic", "kind": "variable", "doc": "

    Return the total ion current of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.check_mspeaks_warning", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.check_mspeaks_warning", "kind": "function", "doc": "

    Check if the mass spectrum has MSpeaks objects.

    \n\n
    Raises
    \n\n
      \n
    • Warning: If the mass spectrum has no MSpeaks objects.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.check_mspeaks", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.check_mspeaks", "kind": "function", "doc": "

    Check if the mass spectrum has MSpeaks objects.

    \n\n
    Raises
    \n\n
      \n
    • Exception: If the mass spectrum has no MSpeaks objects.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.remove_assignment_by_index", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.remove_assignment_by_index", "kind": "function", "doc": "

    Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.

    \n\n
    Parameters
    \n\n
      \n
    • indexes (list of int):\nA list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
    • \n
    \n", "signature": "(self, indexes):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_index", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_index", "kind": "function", "doc": "

    Filter the mass spectrum by the specified indexes.

    \n\n
    Parameters
    \n\n
      \n
    • list_indexes (list of int):\nA list of indexes of the MSpeaks objects to drop.
    • \n
    \n", "signature": "(self, list_indexes):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_mz", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_mz", "kind": "function", "doc": "

    Filter the mass spectrum by the specified m/z range.

    \n\n
    Parameters
    \n\n
      \n
    • min_mz (float):\nThe minimum m/z value to keep.
    • \n
    • max_mz (float):\nThe maximum m/z value to keep.
    • \n
    \n", "signature": "(self, min_mz, max_mz):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_s2n", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_s2n", "kind": "function", "doc": "

    Filter the mass spectrum by the specified signal-to-noise ratio range.

    \n\n
    Parameters
    \n\n
      \n
    • min_s2n (float):\nThe minimum signal-to-noise ratio to keep.
    • \n
    • max_s2n (float, optional):\nThe maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
    • \n
    \n", "signature": "(self, min_s2n, max_s2n=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_abundance", "kind": "function", "doc": "

    Filter the mass spectrum by the specified abundance range.

    \n\n
    Parameters
    \n\n
      \n
    • min_abund (float):\nThe minimum abundance to keep.
    • \n
    • max_abund (float, optional):\nThe maximum abundance to keep. Defaults to False (no maximum).
    • \n
    \n", "signature": "(self, min_abund, max_abund=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_max_resolving_power", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_max_resolving_power", "kind": "function", "doc": "

    Filter the mass spectrum by the specified maximum resolving power.

    \n\n
    Parameters
    \n\n
      \n
    • B (float):

    • \n
    • T (float):

    • \n
    \n", "signature": "(self, B, T):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_mean_resolving_power", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_mean_resolving_power", "kind": "function", "doc": "

    Filter the mass spectrum by the specified mean resolving power.

    \n\n
    Parameters
    \n\n
      \n
    • ndeviations (float, optional):\nThe number of standard deviations to use for filtering. Defaults to 3.
    • \n
    • plot (bool, optional):\nWhether to plot the resolving power distribution. Defaults to False.
    • \n
    • guess_pars (bool, optional):\nWhether to guess the parameters for the Gaussian model. Defaults to False.
    • \n
    \n", "signature": "(self, ndeviations=3, plot=False, guess_pars=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_min_resolving_power", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_min_resolving_power", "kind": "function", "doc": "

    Filter the mass spectrum by the specified minimum resolving power.

    \n\n
    Parameters
    \n\n
      \n
    • B (float):

    • \n
    • T (float):

    • \n
    \n", "signature": "(self, B, T):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_noise_threshold", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_noise_threshold", "kind": "function", "doc": "

    Filter the mass spectrum by the noise threshold.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.find_peaks", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.find_peaks", "kind": "function", "doc": "

    Find the peaks of the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.change_kendrick_base_all_mspeaks", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.change_kendrick_base_all_mspeaks", "kind": "function", "doc": "

    Change the Kendrick base of all MSpeaks objects.

    \n\n
    Parameters
    \n\n
      \n
    • kendrick_dict_base (dict):\nA dictionary of the Kendrick base to change to.
    • \n
    \n\n
    Notes
    \n\n

    Example of kendrick_dict_base parameter: kendrick_dict_base = {\"C\": 1, \"H\": 2} or {\"C\": 1, \"H\": 1, \"O\":1} etc

    \n", "signature": "(self, kendrick_dict_base):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.get_nominal_mz_first_last_indexes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.get_nominal_mz_first_last_indexes", "kind": "function", "doc": "

    Return the first and last indexes of the MSpeaks objects with the specified nominal mass.

    \n\n
    Parameters
    \n\n
      \n
    • nominal_mass (int):\nThe nominal mass to get the indexes for.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
    • \n
    \n", "signature": "(self, nominal_mass):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.get_masses_count_by_nominal_mass", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.get_masses_count_by_nominal_mass", "kind": "function", "doc": "

    Return a dictionary of the nominal masses and their counts.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.datapoints_count_by_nominal_mz", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.datapoints_count_by_nominal_mz", "kind": "function", "doc": "

    Return a dictionary of the nominal masses and their counts.

    \n\n
    Parameters
    \n\n
      \n
    • mz_overlay (float, optional):\nThe m/z overlay to use for counting. Defaults to 0.1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary of the nominal masses and their counts.
    • \n
    \n", "signature": "(self, mz_overlay=0.1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.get_nominal_mass_indexes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.get_nominal_mass_indexes", "kind": "function", "doc": "

    Return the indexes of the MSpeaks objects with the specified nominal mass.

    \n\n
    Parameters
    \n\n
      \n
    • nominal_mass (int):\nThe nominal mass to get the indexes for.
    • \n
    • overlay (float, optional):\nThe m/z overlay to use for counting. Defaults to 0.1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • generator: A generator of the indexes of the MSpeaks objects with the specified nominal mass.
    • \n
    \n", "signature": "(self, nominal_mass, overlay=0.1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.plot_centroid", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.plot_centroid", "kind": "function", "doc": "

    Plot the centroid data of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nThe matplotlib axes to plot on. Defaults to None.
    • \n
    • c (str, optional):\nThe color to use for the plot. Defaults to 'g' (green).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes.Axes: The matplotlib axes containing the plot.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If no centroid data is found.
    • \n
    \n", "signature": "(self, ax=None, c='g'):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.plot_profile_and_noise_threshold", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.plot_profile_and_noise_threshold", "kind": "function", "doc": "

    Plot the profile data and noise threshold of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nThe matplotlib axes to plot on. Defaults to None.
    • \n
    • legend (bool, optional):\nWhether to show the legend. Defaults to False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes.Axes: The matplotlib axes containing the plot.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If no noise threshold is found.
    • \n
    \n", "signature": "(self, ax=None, legend=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.plot_mz_domain_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.plot_mz_domain_profile", "kind": "function", "doc": "

    Plot the m/z domain profile of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • color (str, optional):\nThe color to use for the plot. Defaults to 'green'.
    • \n
    • ax (matplotlib.axes.Axes, optional):\nThe matplotlib axes to plot on. Defaults to None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes.Axes: The matplotlib axes containing the plot.
    • \n
    \n", "signature": "(self, color='green', ax=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_excel", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_excel", "kind": "function", "doc": "

    Export the mass spectrum to an Excel file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe path to the Excel file to export to.
    • \n
    • write_metadata (bool, optional):\nWhether to write the metadata to the Excel file. Defaults to True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None
    • \n
    \n", "signature": "(self, out_file_path, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_hdf", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_hdf", "kind": "function", "doc": "

    Export the mass spectrum to an HDF file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe path to the HDF file to export to.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None
    • \n
    \n", "signature": "(self, out_file_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_csv", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_csv", "kind": "function", "doc": "

    Export the mass spectrum to a CSV file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe path to the CSV file to export to.
    • \n
    • write_metadata (bool, optional):\nWhether to write the metadata to the CSV file. Defaults to True.
    • \n
    \n", "signature": "(self, out_file_path, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_pandas", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_pandas", "kind": "function", "doc": "

    Export the mass spectrum to a Pandas dataframe with pkl extension.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe path to the CSV file to export to.
    • \n
    • write_metadata (bool, optional):\nWhether to write the metadata to the CSV file. Defaults to True.
    • \n
    \n", "signature": "(self, out_file_path, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_dataframe", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_dataframe", "kind": "function", "doc": "

    Return the mass spectrum as a Pandas dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • additional_columns (list, optional):\nA list of additional columns to include in the dataframe. Defaults to None.\nSuitable columns are: \"Aromaticity Index\", \"Aromaticity Index (modified)\", and \"NOSC\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: The mass spectrum as a Pandas dataframe.
    • \n
    \n", "signature": "(self, additional_columns=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_json", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_json", "kind": "function", "doc": "

    Return the mass spectrum as a JSON file.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.parameters_json", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.parameters_json", "kind": "function", "doc": "

    Return the parameters of the mass spectrum as a JSON string.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.parameters_toml", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.parameters_toml", "kind": "function", "doc": "

    Return the parameters of the mass spectrum as a TOML string.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecProfile", "kind": "class", "doc": "

    A mass spectrum class when the entry point is on profile format

    \n\n
    Notes
    \n\n

    Stores the profile data and instrument settings. \nIteration over a list of MSPeaks classes stored at the _mspeaks attributes.\n_mspeaks is populated under the hood by calling process_mass_spec method.\nIteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().

    \n\n
    Parameters
    \n\n
      \n
    • data_dict (dict):\nA dictionary containing the profile data.
    • \n
    • d_params : dict{'str' (float, int or str}):\ncontains the instrument settings and processing settings
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Defaults to True.
    • \n
    \n\n
    Attributes
    \n\n

    _abundance : ndarray\n The abundance values of the mass spectrum.\n_mz_exp : ndarray\n The m/z values of the mass spectrum.\n_mspeaks : list\n A list of mass peaks.

    \n\n
    Methods
    \n\n
      \n
    • process_mass_spec(). Process the mass spectrum.
    • \n
    \n\n

    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()

    \n", "bases": "MassSpecBase"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecProfile.__init__", "kind": "function", "doc": "

    \n", "signature": "(data_dict, d_params, auto_process=True)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq", "kind": "class", "doc": "

    A mass spectrum class when data entry is on frequency domain

    \n\n
    Notes
    \n\n
      \n
    • Transform to m/z based on the settings stored at d_params
    • \n
    • Stores the profile data and instrument settings
    • \n
    • Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    • \n
    • _mspeaks is populated under the hood by calling process_mass_spec method
    • \n
    • iteration is null if _mspeaks is empty
    • \n
    \n\n
    Parameters
    \n\n
      \n
    • frequency_domain (list(float)):\nall datapoints in frequency domain in Hz
    • \n
    • magnitude : frequency_domain (list(float)):\nall datapoints in for magnitude of each frequency datapoint
    • \n
    • d_params : dict{'str' (float, int or str}):\ncontains the instrument settings and processing settings
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Defaults to True.
    • \n
    • keep_profile (bool, optional):\nWhether to keep the profile data. Defaults to True.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • has_frequency (bool):\nWhether the mass spectrum has frequency data.
    • \n
    • _frequency_domain (list(float)):\nFrequency domain in Hz
    • \n
    • label (str):\nstore label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
    • \n
    • _abundance (ndarray):\nThe abundance values of the mass spectrum.
    • \n
    • _mz_exp (ndarray):\nThe m/z values of the mass spectrum.
    • \n
    • _mspeaks (list):\nA list of mass peaks.
    • \n
    • See Also (all the attributes of MassSpecBase class):
    • \n
    \n\n
    Methods
    \n\n
      \n
    • _set_mz_domain().\ncalculates the m_z based on the setting of d_params
    • \n
    • process_mass_spec(). Process the mass spectrum.
    • \n
    \n\n

    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()

    \n", "bases": "MassSpecBase"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfrequency_domain,\tmagnitude,\td_params,\tauto_process=True,\tkeep_profile=True)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.has_frequency", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.has_frequency", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.magnetron_frequency", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.magnetron_frequency", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.magnetron_frequency_sigma", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.magnetron_frequency_sigma", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.transient_settings", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.transient_settings", "kind": "variable", "doc": "

    Return the transient settings of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.calc_magnetron_freq", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.calc_magnetron_freq", "kind": "function", "doc": "

    Calculates the magnetron frequency of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • max_magnetron_freq (float, optional):\nThe maximum magnetron frequency. Defaults to 50.
    • \n
    • magnetron_freq_bins (int, optional):\nThe number of bins to use for the histogram. Defaults to 300.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None
    • \n
    \n\n
    Notes
    \n\n

    Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.\nA histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.\nA gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.\nThis appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.

    \n", "signature": "(self, max_magnetron_freq=50, magnetron_freq_bins=300):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid", "kind": "class", "doc": "

    A mass spectrum class when the entry point is on centroid format

    \n\n
    Notes
    \n\n
      \n
    • Stores the centroid data and instrument settings
    • \n
    • Simulate profile data based on Gaussian or Lorentzian peak shape
    • \n
    • Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    • \n
    • _mspeaks is populated under the hood by calling process_mass_spec method
    • \n
    • iteration is null if _mspeaks is empty
    • \n
    \n\n
    Parameters
    \n\n
      \n
    • data_dict : dict {string (numpy array float64 )):\ncontains keys [m/z, Abundance, Resolving Power, S/N]
    • \n
    • d_params : dict{'str' (float, int or str}):\ncontains the instrument settings and processing settings
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Defaults to True.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • label (str):\nstore label (Bruker, Midas Transient, see Labels class)
    • \n
    • _baseline_noise (float):\nstore baseline noise
    • \n
    • _baseline_noise_std (float):\nstore baseline noise std
    • \n
    • _abundance (ndarray):\nThe abundance values of the mass spectrum.
    • \n
    • _mz_exp (ndarray):\nThe m/z values of the mass spectrum.
    • \n
    • _mspeaks (list):\nA list of mass peaks.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • process_mass_spec().\nProcess the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
    • \n
    • __simulate_profile__data__().\nSimulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
    • \n
    \n\n

    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()

    \n", "bases": "MassSpecBase"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.__init__", "kind": "function", "doc": "

    \n", "signature": "(data_dict, d_params, auto_process=True)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.is_centroid", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.is_centroid", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.data_dict", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.data_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.mz_exp_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.mz_exp_profile", "kind": "variable", "doc": "

    Return the m/z profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.abundance_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.abundance_profile", "kind": "variable", "doc": "

    Return the abundance profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.tic", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.tic", "kind": "variable", "doc": "

    Return the total ion current of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.process_mass_spec", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.process_mass_spec", "kind": "function", "doc": "

    Process the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes", "kind": "class", "doc": "

    A mass spectrum class when the entry point is on low resolution centroid format

    \n\n
    Notes
    \n\n

    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead

    \n\n
    Parameters
    \n\n
      \n
    • data_dict : dict {string (numpy array float64 )):\ncontains keys [m/z, Abundance, Resolving Power, S/N]
    • \n
    • d_params : dict{'str' (float, int or str}):\ncontains the instrument settings and processing settings
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _processed_tic (float):\nstore processed total ion current
    • \n
    • _abundance (ndarray):\nThe abundance values of the mass spectrum.
    • \n
    • _mz_exp (ndarray):\nThe m/z values of the mass spectrum.
    • \n
    \n", "bases": "MassSpecCentroid"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.__init__", "kind": "function", "doc": "

    \n", "signature": "(data_dict, d_params)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.mz_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.mz_exp", "kind": "variable", "doc": "

    Return the m/z values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.abundance", "kind": "variable", "doc": "

    Return the abundance values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.processed_tic", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.processed_tic", "kind": "variable", "doc": "

    Return the processed total ion current of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.tic", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.tic", "kind": "variable", "doc": "

    Return the total ion current of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.mz_abun_tuples", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.mz_abun_tuples", "kind": "variable", "doc": "

    Return the m/z and abundance values of the mass spectrum as a list of tuples.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.mz_abun_dict", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.mz_abun_dict", "kind": "variable", "doc": "

    Return the m/z and abundance values of the mass spectrum as a dictionary.

    \n"}, {"fullname": "corems.mass_spectrum.input", "modulename": "corems.mass_spectrum.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass", "modulename": "corems.mass_spectrum.input.baseClass", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass", "kind": "class", "doc": "

    The MassListBaseClass object reads mass list data types and returns the mass spectrum obj

    \n\n
    Parameters
    \n\n
      \n
    • file_location (Path or S3Path):\nFull data path.
    • \n
    • isCentroid (bool, optional):\nDetermines the mass spectrum data structure. If set to True, it assumes centroid mode. If set to False, it assumes profile mode and attempts to peak pick. Default is True.
    • \n
    • analyzer (str, optional):\nThe analyzer used for the mass spectrum. Default is 'Unknown'.
    • \n
    • instrument_label (str, optional):\nThe label of the instrument used for the mass spectrum. Default is 'Unknown'.
    • \n
    • sample_name (str, optional):\nThe name of the sample. Default is None.
    • \n
    • header_lines (int, optional):\nThe number of lines to skip in the file, including the column labels line. Default is 0.
    • \n
    • isThermoProfile (bool, optional):\nDetermines the number of expected columns in the file. If set to True, only m/z and intensity columns are expected. Signal-to-noise ratio (S/N) and resolving power (RP) will be calculated based on the data. Default is False.
    • \n
    • headerless (bool, optional):\nIf True, assumes that there are no headers present in the file (e.g., a .xy file from Bruker) and assumes two columns: m/z and intensity. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • parameters (DataInputSetting):\nThe data input settings for the mass spectrum.
    • \n
    • data_type (str):\nThe type of data in the file.
    • \n
    • delimiter (str):\nThe delimiter used to read text-based files.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • set_parameter_from_toml(parameters_path). Sets the data input settings from a TOML file.
    • \n
    • set_parameter_from_json(parameters_path). Sets the data input settings from a JSON file.
    • \n
    • get_dataframe(). Reads the file and returns the data as a pandas DataFrame.
    • \n
    • load_settings(mass_spec_obj, output_parameters). Loads the settings for the mass spectrum.
    • \n
    • get_output_parameters(polarity, scan_index=0). Returns the output parameters for the mass spectrum.
    • \n
    • clean_data_frame(dataframe). Cleans the data frame by removing columns that are not in the expected columns set.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.__init__", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location: pathlib.Path | s3path.S3Path,\tisCentroid: bool = True,\tanalyzer: str = 'Unknown',\tinstrument_label: str = 'Unknown',\tsample_name: str = None,\theader_lines: int = 0,\tisThermoProfile: bool = False,\theaderless: bool = False)"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.file_location", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.header_lines", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.header_lines", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.isCentroid", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.isCentroid", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.isThermoProfile", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.isThermoProfile", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.headerless", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.headerless", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.analyzer", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.instrument_label", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.sample_name", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.sample_name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.parameters", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.parameters", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.set_parameter_from_toml", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.set_parameter_from_toml", "kind": "function", "doc": "

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.set_parameter_from_json", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.set_parameter_from_json", "kind": "function", "doc": "

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.data_type", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.data_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.delimiter", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.delimiter", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.encoding_detector", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.encoding_detector", "kind": "function", "doc": "

    Detects the encoding of a file.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\nThe location of the file to be analyzed.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: The detected encoding of the file.
    • \n
    \n", "signature": "(self, file_location) -> str:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.set_data_type", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.set_data_type", "kind": "function", "doc": "

    Set the data type and delimiter based on the file extension.

    \n\n
    Raises
    \n\n
      \n
    • TypeError: If the data type could not be automatically recognized.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.get_dataframe", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.get_dataframe", "kind": "function", "doc": "

    Get the data as a pandas DataFrame.

    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: The data as a pandas DataFrame.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • TypeError: If the data type is not supported.
    • \n
    \n", "signature": "(self) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.load_settings", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.load_settings", "kind": "function", "doc": "

    TODO loading output parameters from json file is not functional

    \n\n

    Load settings from a JSON file and apply them to the given mass_spec_obj.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec_obj (MassSpec):\nThe mass spectrum object to apply the settings to.
    • \n
    \n", "signature": "(self, mass_spec_obj, output_parameters):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.get_output_parameters", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.get_output_parameters", "kind": "function", "doc": "

    Get the output parameters for the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (int):\nThe polarity of the mass spectrum +1 or -1.
    • \n
    • scan_index (int, optional):\nThe index of the scan. Default is 0.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary containing the output parameters.
    • \n
    \n", "signature": "(self, polarity: int, scan_index: int = 0) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.clean_data_frame", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.clean_data_frame", "kind": "function", "doc": "

    Clean the input dataframe by removing columns that are not expected.

    \n\n
    Parameters
    \n\n
      \n
    • pandas.DataFrame: The input dataframe to be cleaned.
    • \n
    \n", "signature": "(self, dataframe):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.check_columns", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.check_columns", "kind": "function", "doc": "

    Check if the given header labels match the expected columns.

    \n\n
    Parameters
    \n\n
      \n
    • header_labels (list):\nThe header labels to be checked.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If any expected column is not found in the header labels.
    • \n
    \n", "signature": "(self, header_labels: list[str]):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.read_xml_peaks", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.read_xml_peaks", "kind": "function", "doc": "

    Read peaks from a Bruker .xml file and return a pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • data (str):\nThe path to the .xml file.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A DataFrame containing the peak data with columns: 'm/z', 'I', 'Resolving Power', 'Area', 'S/N', 'fwhm'.
    • \n
    \n", "signature": "(self, data: str) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.get_xml_polarity", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.get_xml_polarity", "kind": "function", "doc": "

    Get the polarity from an XML peaklist.

    \n\n
    Returns
    \n\n
      \n
    • int: The polarity of the XML peaklist. Returns -1 for negative polarity, +1 for positive polarity.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If the data type is not XML peaklist in Bruker format or if the polarity is unhandled.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5", "modulename": "corems.mass_spectrum.input.boosterHDF5", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum", "kind": "class", "doc": "

    The ReadHDF_BoosterMassSpectrum class parses the mass spectrum data from an HDF file and generate a mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\nThe path to the HDF file.
    • \n
    • isCentroid (bool, optional):\nSpecifies whether the mass spectrum is centroided or not. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • polarity (int):\nThe polarity of the mass spectrum.
    • \n
    • h5pydata (h5py.File):\nThe HDF file object.
    • \n
    • scans (list):\nThe list of scan names in the HDF file.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_data_profile(mz, abundance, auto_process). Returns a MassSpecProfile object from the given m/z and abundance arrays.
    • \n
    • get_attr_data(scan, attr_srt). Returns the attribute value for the given scan and attribute name.
    • \n
    • get_polarity(file_location). Returns the polarity of the mass spectrum.
    • \n
    • get_mass_spectrum(auto_process). Returns the mass spectrum as a MassSpecProfile object.
    • \n
    • get_output_parameters(). Returns the default output parameters for the mass spectrum.
    • \n
    \n", "bases": "corems.mass_spectrum.input.baseClass.MassListBaseClass"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.__init__", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.__init__", "kind": "function", "doc": "

    \n", "signature": "(file_location, isCentroid=False)"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.polarity", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.polarity", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_data_profile", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_data_profile", "kind": "function", "doc": "

    Returns a MassSpecProfile object from the given m/z and abundance arrays.

    \n\n
    Parameters
    \n\n
      \n
    • mz (array_like):\nThe m/z values.
    • \n
    • abundance (array_like):\nThe abundance values.
    • \n
    • auto_process (bool):\nSpecifies whether to automatically process the mass spectrum.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile: The MassSpecProfile object.
    • \n
    \n", "signature": "(\tself,\tmz,\tabundance,\tauto_process) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_attr_data", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_attr_data", "kind": "function", "doc": "

    Returns the attribute value for the given scan and attribute name.

    \n\n
    Parameters
    \n\n
      \n
    • scan (int):\nThe scan index.
    • \n
    • attr_srt (str):\nThe attribute name.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • object: The attribute value.
    • \n
    \n", "signature": "(self, scan, attr_srt):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_polarity", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_polarity", "kind": "function", "doc": "

    Returns the polarity of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\nThe path to the HDF file.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The polarity of the mass spectrum.
    • \n
    \n", "signature": "(self, file_location: str | s3path.S3Path) -> int:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_mass_spectrum", "kind": "function", "doc": "

    Returns the mass spectrum as a MassSpecProfile object.

    \n\n
    Parameters
    \n\n
      \n
    • auto_process (bool, optional):\nSpecifies whether to automatically process the mass spectrum. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile: The MassSpecProfile object.
    • \n
    \n", "signature": "(\tself,\tauto_process=True) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_output_parameters", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_output_parameters", "kind": "function", "doc": "

    Returns the default output parameters for the mass spectrum.

    \n\n
    Returns
    \n\n
      \n
    • dict: The default output parameters.
    • \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5", "modulename": "corems.mass_spectrum.input.coremsHDF5", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum", "kind": "class", "doc": "

    Class for reading mass spectrum data from a CoreMS HDF5 file.

    \n\n
    Attributes
    \n\n
      \n
    • h5pydata (h5py.File):\nThe HDF5 file object.
    • \n
    • scans (list):\nList of scan labels in the HDF5 file.
    • \n
    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or S3Path):\nThe path to the CoreMS HDF5 file.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • load_raw_data(mass_spectrum, scan_index=0) Load raw data into the mass spectrum object.
    • \n
    • get_mass_spectrum(scan_number=0, time_index=-1, auto_process=True, load_settings=True, load_raw=True).Get a mass spectrum object.
    • \n
    • load_settings(mass_spectrum, scan_index=0, time_index=-1). Load settings into the mass spectrum object.
    • \n
    • get_dataframe(scan_index=0, time_index=-1). Get a pandas DataFrame representing the mass spectrum.
    • \n
    • get_time_index_to_pull(scan_label, time_index). Get the time index to pull from the HDF5 file.
    • \n
    • get_high_level_attr_data(attr_str). Get high-level attribute data from the HDF5 file.
    • \n
    • get_scan_group_attr_data(scan_index, time_index, attr_group, attr_srt=None). Get scan group attribute data from the HDF5 file.
    • \n
    • get_raw_data_attr_data(scan_index, attr_group, attr_str). Get raw data attribute data from the HDF5 file.
    • \n
    • get_output_parameters(polarity, scan_index=0). Get the output parameters for the mass spectrum.
    • \n
    \n", "bases": "corems.mass_spectrum.input.massList.ReadCoremsMasslist"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.__init__", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.__init__", "kind": "function", "doc": "

    \n", "signature": "(file_location)"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.h5pydata", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.h5pydata", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.scans", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.scans", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.load_raw_data", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.load_raw_data", "kind": "function", "doc": "

    Load raw data into the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpecCentroid):\nThe mass spectrum object to load the raw data into.
    • \n
    • scan_index (int, optional):\nThe index of the scan to load the raw data from. Default is 0.
    • \n
    \n", "signature": "(self, mass_spectrum, scan_index=0):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_mass_spectrum", "kind": "function", "doc": "

    Instantiate a mass spectrum object from the CoreMS HDF5 file. \nNote that this always returns a centroid mass spectrum object; functionality for profile and\nfrequency mass spectra is not yet implemented.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int, optional):\nThe index of the scan to retrieve the mass spectrum from. Default is 0.
    • \n
    • time_index (int, optional):\nThe index of the time point to retrieve the mass spectrum from. Default is -1.
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Default is True.
    • \n
    • load_settings (bool, optional):\nWhether to load the settings into the mass spectrum object. Default is True.
    • \n
    • load_raw (bool, optional):\nWhether to load the raw data into the mass spectrum object. Default is True.
    • \n
    • load_molecular_formula (bool, optional):\nWhether to load the molecular formula into the mass spectrum object.\nDefault is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecCentroid: The mass spectrum object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the CoreMS file is not valid.\nIf the mass spectrum has not been processed and load_molecular_formula is True.
    • \n
    \n", "signature": "(\tself,\tscan_number=0,\ttime_index=-1,\tauto_process=True,\tload_settings=True,\tload_raw=True,\tload_molecular_formula=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.load_settings", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.load_settings", "kind": "function", "doc": "

    Load settings into the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpecCentroid):\nThe mass spectrum object to load the settings into.
    • \n
    • scan_index (int, optional):\nThe index of the scan to load the settings from. Default is 0.
    • \n
    • time_index (int, optional):\nThe index of the time point to load the settings from. Default is -1.
    • \n
    \n", "signature": "(self, mass_spectrum, scan_index=0, time_index=-1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_dataframe", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_dataframe", "kind": "function", "doc": "

    Get a pandas DataFrame representing the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • scan_index (int, optional):\nThe index of the scan to retrieve the DataFrame from. Default is 0.
    • \n
    • time_index (int, optional):\nThe index of the time point to retrieve the DataFrame from. Default is -1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The pandas DataFrame representing the mass spectrum.
    • \n
    \n", "signature": "(self, scan_index=0, time_index=-1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_time_index_to_pull", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_time_index_to_pull", "kind": "function", "doc": "

    Get the time index to pull from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • scan_label (str):\nThe label of the scan.
    • \n
    • time_index (int):\nThe index of the time point.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: The time index to pull.
    • \n
    \n", "signature": "(self, scan_label, time_index):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_high_level_attr_data", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_high_level_attr_data", "kind": "function", "doc": "

    Get high-level attribute data from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • attr_str (str):\nThe attribute string.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The attribute data.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • KeyError: If the attribute string is not found in the HDF5 file.
    • \n
    \n", "signature": "(self, attr_str):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_scan_group_attr_data", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_scan_group_attr_data", "kind": "function", "doc": "

    Get scan group attribute data from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • scan_index (int):\nThe index of the scan.
    • \n
    • time_index (int):\nThe index of the time point.
    • \n
    • attr_group (str):\nThe attribute group.
    • \n
    • attr_srt (str, optional):\nThe attribute string. Default is None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The attribute data.
    • \n
    \n\n
    Notes
    \n\n

    This method retrieves attribute data from the HDF5 file for a specific scan and time point.\nThe attribute data is stored in the specified attribute group.\nIf an attribute string is provided, only the corresponding attribute value is returned.\nIf no attribute string is provided, all attribute data in the group is returned as a dictionary.

    \n", "signature": "(self, scan_index, time_index, attr_group, attr_srt=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_raw_data_attr_data", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_raw_data_attr_data", "kind": "function", "doc": "

    Get raw data attribute data from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • scan_index (int):\nThe index of the scan.
    • \n
    • attr_group (str):\nThe attribute group.
    • \n
    • attr_str (str):\nThe attribute string.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The attribute data.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • KeyError: If the attribute string is not found in the attribute group.
    • \n
    \n\n
    Notes
    \n\n

    This method retrieves the attribute data associated with a specific scan, attribute group, and attribute string\nfrom the HDF5 file. It returns the attribute data as a dictionary.

    \n\n

    Example usage:

    \n\n
    \n
    >>> data = get_raw_data_attr_data(0, "group1", "attribute1")\n>>> print(data)\n{'key1': 'value1', 'key2': 'value2'}\n
    \n
    \n", "signature": "(self, scan_index, attr_group, attr_str):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_output_parameters", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_output_parameters", "kind": "function", "doc": "

    Get the output parameters for the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (str):\nThe polarity of the mass spectrum.
    • \n
    • scan_index (int, optional):\nThe index of the scan. Default is 0.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The output parameters.
    • \n
    \n", "signature": "(self, polarity, scan_index=0):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.massList", "modulename": "corems.mass_spectrum.input.massList", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.massList.ReadCoremsMasslist", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadCoremsMasslist", "kind": "class", "doc": "

    The ReadCoremsMasslist object reads processed mass list data types\nand returns the mass spectrum obj with the molecular formula obj

    \n\n

    Only available for centroid mass spectrum type: it will ignore the parameter isCentroid \nPlease see MassListBaseClass for more details

    \n", "bases": "corems.mass_spectrum.input.baseClass.MassListBaseClass"}, {"fullname": "corems.mass_spectrum.input.massList.ReadCoremsMasslist.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadCoremsMasslist.get_mass_spectrum", "kind": "function", "doc": "

    Get the mass spectrum object from the processed mass list data.

    \n\n
    Parameters
    \n\n
      \n
    • loadSettings (bool, optional):\nWhether to load the settings for the mass spectrum. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecCentroid: The mass spectrum object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the input file is not a valid CoreMS file.
    • \n
    \n", "signature": "(\tself,\tloadSettings: bool = True) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.massList.ReadCoremsMasslist.add_molecular_formula", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadCoremsMasslist.add_molecular_formula", "kind": "function", "doc": "

    Add molecular formula information to the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec_obj (MassSpecCentroid):\nThe mass spectrum object to add the molecular formula to.
    • \n
    • dataframe (pandas.DataFrame):\nThe processed mass list data.
    • \n
    \n", "signature": "(self, mass_spec_obj, dataframe):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.massList.ReadMassList", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadMassList", "kind": "class", "doc": "

    The ReadMassList object reads unprocessed mass list data types\nand returns the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • MassListBaseClass (class):\nThe base class for reading mass list data types.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.
    • \n
    \n", "bases": "corems.mass_spectrum.input.baseClass.MassListBaseClass"}, {"fullname": "corems.mass_spectrum.input.massList.ReadMassList.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadMassList.get_mass_spectrum", "kind": "function", "doc": "

    Reads mass list data types and returns the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (int):\nThe polarity of the mass spectrum (+1 or -1).
    • \n
    • scan (int, optional):\nThe scan number of the mass spectrum (default is 0).
    • \n
    • auto_process (bool, optional):\nFlag indicating whether to automatically process the mass spectrum (default is True).
    • \n
    • loadSettings (bool, optional):\nFlag indicating whether to load settings for the mass spectrum (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mass_spec (MassSpecCentroid or MassSpecProfile):\nThe mass spectrum object.
    • \n
    \n", "signature": "(\tself,\tpolarity: int,\tscan: int = 0,\tauto_process: bool = True,\tloadSettings: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.massList.ReadBrukerXMLList", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadBrukerXMLList", "kind": "class", "doc": "

    The ReadBrukerXMLList object reads Bruker XML objects\nand returns the mass spectrum object.\nSee MassListBaseClass for details

    \n\n
    Parameters
    \n\n
      \n
    • MassListBaseClass (class):\nThe base class for reading mass list data types and returning the mass spectrum object.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.
    • \n
    \n", "bases": "corems.mass_spectrum.input.baseClass.MassListBaseClass"}, {"fullname": "corems.mass_spectrum.input.massList.ReadBrukerXMLList.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadBrukerXMLList.get_mass_spectrum", "kind": "function", "doc": "

    Reads mass list data types and returns the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (bool, optional):\nThe polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
    • \n
    • scan (int, optional):\nThe scan number of the mass spectrum. Default is 0.
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Default is True.
    • \n
    • loadSettings (bool, optional):\nWhether to load the settings for the mass spectrum. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mass_spec (MassSpecCentroid):\nThe mass spectrum object representing the centroided mass spectrum.
    • \n
    \n", "signature": "(\tself,\tpolarity: bool = None,\tscan: int = 0,\tauto_process: bool = True,\tloadSettings: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.numpyArray", "modulename": "corems.mass_spectrum.input.numpyArray", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.numpyArray.ms_from_array_profile", "modulename": "corems.mass_spectrum.input.numpyArray", "qualname": "ms_from_array_profile", "kind": "function", "doc": "

    Create a MassSpecProfile object from an array of m/z values and abundance values.

    \n\n
    Parameters
    \n\n
      \n
    • mz (numpy.ndarray):\nArray of m/z values.
    • \n
    • abundance (numpy.ndarray):\nArray of abundance values.
    • \n
    • dataname (str):\nName of the data.
    • \n
    • polarity (int, optional):\nPolarity of the data. The default is -1.
    • \n
    • auto_process (bool, optional):\nFlag to automatically process the data. The default is True.
    • \n
    • data_type (str, optional):\nType of the data. The default is Labels.simulated_profile.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile: The created MassSpecProfile object.
    • \n
    \n", "signature": "(\tmz,\tabundance,\tdataname: str,\tpolarity: int = -1,\tauto_process: bool = True,\tdata_type: str = 'Simulated Profile'):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.numpyArray.ms_from_array_centroid", "modulename": "corems.mass_spectrum.input.numpyArray", "qualname": "ms_from_array_centroid", "kind": "function", "doc": "

    Create a MassSpecCentroid object from an array of m/z values, abundance values, resolution power, and signal-to-noise ratio.

    \n\n
    Parameters
    \n\n
      \n
    • mz (numpy.ndarray):\nArray of m/z values.
    • \n
    • abundance (numpy.ndarray):\nArray of abundance values.
    • \n
    • rp (list(float)):\nList of resolving power values.
    • \n
    • s2n (list(float)):\nList of signal-to-noise ratio values.
    • \n
    • dataname (str):\nName of the data.
    • \n
    • polarity (int, optional):\nPolarity of the data. The default is -1.
    • \n
    • auto_process (bool, optional):
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecCentroid: The created MassSpecCentroid object.
    • \n
    \n", "signature": "(\tmz,\tabundance,\trp: list[float],\ts2n: list[float],\tdataname: str,\tpolarity: int = -1,\tauto_process: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.numpyArray.get_output_parameters", "modulename": "corems.mass_spectrum.input.numpyArray", "qualname": "get_output_parameters", "kind": "function", "doc": "

    Generate the output parameters for creating a MassSpecProfile or MassSpecCentroid object.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (int):\nPolarity of the data.
    • \n
    • file_location (str):\nFile location.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: Output parameters.
    • \n
    \n", "signature": "(polarity: int, file_location: str):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.win_only", "modulename": "corems.mass_spectrum.input.win_only", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output", "modulename": "corems.mass_spectrum.output", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export", "modulename": "corems.mass_spectrum.output.export", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport", "kind": "class", "doc": "

    A class for exporting high-resolution mass spectra.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe output file path.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    • output_type (str, optional):\nThe type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • output_file (Path):\nThe output file path.
    • \n
    • output_type (str):\nThe type of output file.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    • atoms_order_list (list):\nThe list of assigned atoms in the order specified by Atoms.atoms_order list.
    • \n
    • columns_label (list):\nThe column labels in order.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • save().\nSave the mass spectrum data to the output file.
    • \n
    • run().\nRun the export process.
    • \n
    • get_pandas_df().\nReturns the mass spectrum data as a pandas DataFrame.
    • \n
    • write_settings(output_path, mass_spectrum).\nWrites the settings of the mass spectrum to a JSON file.
    • \n
    • to_pandas(write_metadata=True).\nExports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    • \n
    • to_excel(write_metadata=True).\nExports the mass spectrum data to an Excel file.
    • \n
    • to_csv(write_metadata=True).\nExports the mass spectrum data to a CSV file.
    • \n
    • to_json().\nExports the mass spectrum data to a JSON string.
    • \n
    • to_hdf().\nExports the mass spectrum data to an HDF5 file.
    • \n
    • parameters_to_toml().\nConverts the mass spectrum parameters to a TOML string.
    • \n
    • parameters_to_json().\nConverts the mass spectrum parameters to a JSON string.
    • \n
    • get_mass_spec_attrs(mass_spectrum).\nReturns the mass spectrum attributes as a dictionary.
    • \n
    • get_all_used_atoms_in_order(mass_spectrum).\nReturns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    • \n
    • list_dict_to_list(mass_spectrum, is_hdf5=False).\nReturns the mass spectrum data as a list of dictionaries.
    • \n
    • get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False).\nReturns the mass spectrum data as a list of dictionaries.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.__init__", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(out_file_path, mass_spectrum, output_type='excel')"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.output_file", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.output_file", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.output_type", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.output_type", "kind": "variable", "doc": "

    Returns the output type of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.mass_spectrum", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.atoms_order_list", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.atoms_order_list", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.save", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.save", "kind": "function", "doc": "

    Save the mass spectrum data to the output file.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the output type is not supported.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.run", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.run", "kind": "function", "doc": "

    Run the export process.

    \n\n

    This method is called when the thread starts.\nIt calls the save method to perform the export.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.get_pandas_df", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.get_pandas_df", "kind": "function", "doc": "

    Returns the mass spectrum data as a pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • additional_columns (list, optional):\nAdditional columns to include in the DataFrame. Defaults to None.\nSuitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The mass spectrum data as a pandas DataFrame.
    • \n
    \n", "signature": "(self, additional_columns=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.write_settings", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.write_settings", "kind": "function", "doc": "

    Writes the settings of the mass spectrum to a JSON file.

    \n\n
    Parameters
    \n\n
      \n
    • output_path (str):\nThe output file path.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    \n", "signature": "(self, output_path, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_pandas", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_pandas", "kind": "function", "doc": "

    Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.

    \n\n
    Parameters
    \n\n
      \n
    • write_metadata (bool, optional):\nWhether to write the metadata to a JSON file. Defaults to True.
    • \n
    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_excel", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_excel", "kind": "function", "doc": "

    Exports the mass spectrum data to an Excel file.

    \n\n
    Parameters
    \n\n
      \n
    • write_metadata (bool, optional):\nWhether to write the metadata to a JSON file. Defaults to True.
    • \n
    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_csv", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_csv", "kind": "function", "doc": "

    Exports the mass spectrum data to a CSV file.

    \n\n
    Parameters
    \n\n
      \n
    • write_metadata (bool, optional):\nWhether to write the metadata to a JSON file. Defaults to True.
    • \n
    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_json", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_json", "kind": "function", "doc": "

    Exports the mass spectrum data to a JSON string.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.add_mass_spectrum_to_hdf5", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.add_mass_spectrum_to_hdf5", "kind": "function", "doc": "

    Adds the mass spectrum data to an HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • hdf_handle (h5py.File):\nThe HDF5 file handle.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to add to the HDF5 file.
    • \n
    • group_key (str):\nThe group key (where to add the mass spectrum data within the HDF5 file).
    • \n
    • mass_spectra_group (h5py.Group, optional):\nThe mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
    • \n
    • export_raw (bool, optional):\nWhether to export the raw data. Defaults to True. \nIf False, only the processed data (peaks) is exported (essentially centroided data).
    • \n
    \n", "signature": "(\tself,\thdf_handle,\tmass_spectrum,\tgroup_key,\tmass_spectra_group=None,\texport_raw=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_hdf", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_hdf", "kind": "function", "doc": "

    Exports the mass spectrum data to an HDF5 file.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.parameters_to_toml", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.parameters_to_toml", "kind": "function", "doc": "

    Converts the mass spectrum parameters to a TOML string.

    \n\n
    Returns
    \n\n
      \n
    • str: The TOML string of the mass spectrum parameters.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.parameters_to_json", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.parameters_to_json", "kind": "function", "doc": "

    Converts the mass spectrum parameters to a JSON string.

    \n\n
    Returns
    \n\n
      \n
    • str: The JSON string of the mass spectrum parameters.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.get_mass_spec_attrs", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.get_mass_spec_attrs", "kind": "function", "doc": "

    Returns the mass spectrum attributes as a dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The mass spectrum attributes.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.get_all_used_atoms_in_order", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.get_all_used_atoms_in_order", "kind": "function", "doc": "

    Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: The list of assigned atoms in the order specified by Atoms.atoms_order list.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.list_dict_to_list", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.list_dict_to_list", "kind": "function", "doc": "

    Returns the mass spectrum data as a list of dictionaries.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    • is_hdf5 (bool, optional):\nWhether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: The mass spectrum data as a list of dictionaries.
    • \n
    \n", "signature": "(self, mass_spectrum, is_hdf5=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.get_list_dict_data", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.get_list_dict_data", "kind": "function", "doc": "

    Returns the mass spectrum data as a list of dictionaries.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    • include_no_match (bool, optional):\nWhether to include unassigned (no match) data. Defaults to True.
    • \n
    • include_isotopologues (bool, optional):\nWhether to include isotopologues. Defaults to True.
    • \n
    • isotopologue_inline (bool, optional):\nWhether to include isotopologues inline. Defaults to True.
    • \n
    • no_match_inline (bool, optional):\nWhether to include unassigned (no match) data inline. Defaults to False.
    • \n
    • is_hdf5 (bool, optional):\nWhether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: The mass spectrum data as a list of dictionaries.
    • \n
    \n", "signature": "(\tself,\tmass_spectrum,\tinclude_no_match=True,\tinclude_isotopologues=True,\tisotopologue_inline=True,\tno_match_inline=False,\tis_hdf5=False,\tadditional_columns=None):", "funcdef": "def"}, {"fullname": "corems.molecular_formula", "modulename": "corems.molecular_formula", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.calc", "modulename": "corems.molecular_formula.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.isospec_version", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "isospec_version", "kind": "variable", "doc": "

    \n", "default_value": "'2.2.2'"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "MolecularFormulaCalc", "kind": "class", "doc": "

    Class of calculations related to molecular formula

    \n\n

    This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula

    \n\n
    Attributes
    \n\n
      \n
    • mz_calc (float):\nThe m/z value of the molecular formula.
    • \n
    • neutral_mass (float):\nThe neutral mass of the molecular formula.
    • \n
    • ion_charge (int):\nThe ion charge of the molecular formula.
    • \n
    • _external_mz (float):\nThe externally provided m/z value of the molecular formula.
    • \n
    • _d_molecular_formula (dict):\nThe dictionary representation of the molecular formula.
    • \n
    • _mspeak_parent (object):\nThe parent MS peak object associated with the molecular formula.
    • \n
    • _assignment_mass_error (float):\nThe mass error of the molecular formula.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • _calc_resolving_power_low_pressure(B, T) \nCalculate the resolving power at low pressure.
    • \n
    • _calc_resolving_power_high_pressure(B, T)\nCalculate the resolving power at high pressure.
    • \n
    • _adduct_mz(adduct_atom, ion_charge)\nGet the m/z value of an adducted ion version of the molecular formula.
    • \n
    • _protonated_mz(ion_charge)\nGet the m/z value of a protonated or deprotonated ion version of the molecular formula.
    • \n
    • _radical_mz(ion_charge)\nGet the m/z value of a radical ion version of the molecular formula.
    • \n
    • _neutral_mass()\nGet the neutral mass of the molecular formula.
    • \n
    • _calc_mz()\nGet the m/z value of the molecular formula.
    • \n
    • _calc_assignment_mass_error(method='ppm')\nCalculate the mass error of the molecular formula.
    • \n
    • _calc_mz_confidence(mean=0)\nCalculate the m/z confidence of the molecular formula.
    • \n
    • _calc_isotopologue_confidence()\nCalculate the isotopologue confidence of the molecular formula.
    • \n
    • normalize_distance(dist, dist_range)\nNormalize the distance value.
    • \n
    • subtract_formula(formula_obj, formated=True)\nSubtract a formula from the current formula object.
    • \n
    • _calc_average_mz_score()\nCalculate the average m/z error score of the molecular formula identification, including the isotopologues.
    • \n
    \n"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc.normalize_distance", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "MolecularFormulaCalc.normalize_distance", "kind": "function", "doc": "

    Normalize the distance value.

    \n\n
    Parameters
    \n\n
      \n
    • dist (float):\nThe distance value to be normalized.
    • \n
    • dist_range (list):\nThe range of the distance value.
    • \n
    \n", "signature": "(self, dist, dist_range):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc.subtract_formula", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "MolecularFormulaCalc.subtract_formula", "kind": "function", "doc": "

    Subtract a formula from the current formula object

    \n\n
    Parameters
    \n\n
      \n
    • formula_obj (MolecularFormula):\nMolecularFormula object to be subtracted from the current formula object
    • \n
    • formated (bool, optional):\nIf True, returns the formula in string format, by default True
    • \n
    \n", "signature": "(self, formula_obj, formated=True):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc.dbe_ai", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "MolecularFormulaCalc.dbe_ai", "kind": "variable", "doc": "

    Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens.

    \n"}, {"fullname": "corems.molecular_formula.factory", "modulename": "corems.molecular_formula.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase", "kind": "class", "doc": "

    Base class for representing a molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_formula (dict, list, str):\nThe molecular formula.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • ion_type (str, optional):\nThe ion type. Defaults to None.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom. Defaults to None.
    • \n
    • mspeak_parent (_MSPeak, optional):\nThe parent mass spectrum peak object instance. Defaults to None.
    • \n
    • external_mz (float, optional):\nThe external m/z value. Defaults to None.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • TypeError: If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or 'ADDUCT'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • isotopologue_count_percentile (float):\nThe isotopologue count percentile.
    • \n
    • O_C (float):\nThe O/C ratio.
    • \n
    • H_C (float):\nThe H/C ratio.
    • \n
    • dbe (float):\nThe double bond equivalent.
    • \n
    • mz_nominal_calc (int):\nThe nominal m/z value.
    • \n
    • mz_error (float):\nThe m/z error.
    • \n
    • mz_calc (float):\nThe m/z value.
    • \n
    • protonated_mz (float):\nThe protonated or deprotonated m/z value.
    • \n
    • radical_mz (float):\nThe radical m/z value.
    • \n
    • neutral_mass (float):\nThe neutral mass.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • atoms (list):\nThe atoms in the molecular formula.
    • \n
    • confidence_score (float):\nThe confidence score of the molecular formula identification.
    • \n
    • isotopologue_similarity (float):\nThe isotopologue similarity score of the molecular formula identification.
    • \n
    • average_mz_error_score (float):\nThe average m/z error score of the molecular formula identification, including the isotopologues.
    • \n
    • mz_error_score (float):\nThe m/z error score of the molecular formula identification.
    • \n
    • kmd (float):\nThe Kendrick mass defect (KMD).
    • \n
    • kendrick_mass (float):\nThe Kendrick mass.
    • \n
    • knm (float):\nThe nominal Kendrick mass.
    • \n
    • string (str):\nThe molecular formula string.
    • \n
    • string_formated (str):\nThe molecular formula string formated with subscripts and superscripts.
    • \n
    • class_label (str):\nThe class label.
    • \n
    • class_dict (dict):\nThe class dictionary.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • change_kendrick_base(kendrick_dict_base).\nChange the Kendrick base.
    • \n
    • isotopologues(min_abundance, current_mono_abundance, dynamic_range).\nCalculate the isotopologues.
    • \n
    • atoms_qnt(atom).\nGet the atom quantity.
    • \n
    • atoms_symbol(atom).\nGet the atom symbol without the mass number.
    • \n
    • to_dict().\nGet the molecular formula as a dictionary.
    • \n
    • to_list().\nGet the molecular formula as a list.
    • \n
    \n", "bases": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.__init__", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmolecular_formula,\tion_charge,\tion_type=None,\tadduct_atom=None,\tmspeak_parent=None,\texternal_mz=None)"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.is_isotopologue", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.is_isotopologue", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.expected_isotopologues", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.expected_isotopologues", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mspeak_mf_isotopologues_indexes", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mspeak_mf_isotopologues_indexes", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.get", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.get", "kind": "function", "doc": "

    Get the atom quantity of a specific atom.

    \n\n
    Parameters
    \n\n
      \n
    • atom (str):\nThe atom symbol.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The atom quantity.
    • \n
    \n", "signature": "(self, atom):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.split", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.split", "kind": "function", "doc": "

    Splits the molecular formula string.

    \n\n
    Parameters
    \n\n
      \n
    • delimiters (list):\nThe list of delimiters.
    • \n
    • string (str):\nThe molecular formula string.
    • \n
    • maxsplit (int, optional):\nThe maximum number of splits. Defaults to 0.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: The molecular formula list.
    • \n
    \n\n
    Notes
    \n\n

    Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.

    \n", "signature": "(self, delimiters, string, maxsplit=0):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.isotopologue_count_percentile", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.isotopologue_count_percentile", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.O_C", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.O_C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.H_C", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.H_C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.A_I", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.A_I", "kind": "variable", "doc": "

    Aromaticity index

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.A_I_mod", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.A_I_mod", "kind": "variable", "doc": "

    Modified aromaticity index

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.nosc", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.nosc", "kind": "variable", "doc": "

    Nominal oxidation state of carbon

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.dbe", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.dbe", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mz_nominal_calc", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mz_nominal_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mz_error", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mz_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mz_calc", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mz_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.protonated_mz", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.protonated_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.radical_mz", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.radical_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.neutral_mass", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.neutral_mass", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.adduct_mz", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.adduct_mz", "kind": "function", "doc": "

    Get m/z of an adducted ion version of the molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • adduct_atom (str):\nThe adduct atom.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The m/z value of the adducted ion version of the molecular formula.
    • \n
    \n", "signature": "(self, adduct_atom):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.ion_type", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.ion_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.ion_charge", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.ion_charge", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.atoms", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.atoms", "kind": "variable", "doc": "

    Get the atoms in the molecular formula.

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.confidence_score", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.confidence_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.isotopologue_similarity", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.isotopologue_similarity", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.average_mz_error_score", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.average_mz_error_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mz_error_score", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mz_error_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.kmd", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.kmd", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.kendrick_mass", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.kendrick_mass", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.knm", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.knm", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.change_kendrick_base", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.change_kendrick_base", "kind": "function", "doc": "

    Change the Kendrick base.

    \n\n
    Parameters
    \n\n
      \n
    • kendrick_dict_base (dict):\nThe Kendrick base dictionary. Ex: {\"C\": 1, \"H\": 2}
    • \n
    \n", "signature": "(self, kendrick_dict_base):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.isotopologues", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.isotopologues", "kind": "function", "doc": "

    Calculate the isotopologues for a given molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • min_abundance (float):\nThe minimum abundance.
    • \n
    • current_mono_abundance (float):\nThe current monoisotopic abundance.
    • \n
    • dynamic_range (float):\nThe dynamic range.
    • \n
    \n\n
    Yields
    \n\n
      \n
    • MolecularFormulaIsotopologue: The molecular formula isotopologue.
    • \n
    \n\n
    Notes
    \n\n

    This calculation ignores the hydrogen isotopes.

    \n", "signature": "(self, min_abundance, current_mono_abundance, dynamic_range):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.atoms_qnt", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.atoms_qnt", "kind": "function", "doc": "

    Get the atom quantity of a specific atom in the molecular formula.

    \n", "signature": "(self, atom):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.atoms_symbol", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.atoms_symbol", "kind": "function", "doc": "

    Get the atom symbol without the mass number.

    \n", "signature": "(self, atom):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.string", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.string", "kind": "variable", "doc": "

    Returns the molecular formula as a string.

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.string_formated", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.string_formated", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.to_dict", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.to_dict", "kind": "function", "doc": "

    Returns the molecular formula as a dictionary.

    \n\n
    Returns
    \n\n
      \n
    • dict: The molecular formula as a dictionary.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.to_list", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.to_list", "kind": "function", "doc": "

    Returns the molecular formula as a list.

    \n\n
    Returns
    \n\n
      \n
    • list: The molecular formula as a list.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If the molecular formula identification was not performed yet.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.class_label", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.class_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.class_dict", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.class_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue", "kind": "class", "doc": "

    Class for representing a molecular formula isotopologue.

    \n\n
    Parameters
    \n\n
      \n
    • _d_molecular_formula (dict):\nThe molecular formula as a dictionary.
    • \n
    • prob_ratio (float):\nThe probability ratio.
    • \n
    • mono_abundance (float):\nThe monoisotopic abundance.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • mspeak_parent (object, optional):\nThe parent mass spectrum peak object instance. Defaults to None.
    • \n
    • ion_type (str, optional):\nThe ion type. Defaults to None.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom. Defaults to None.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • prob_ratio (float):\nThe probability ratio.
    • \n
    • abundance_calc (float):\nThe calculated abundance.
    • \n
    • area_error (float):\nThe area error.
    • \n
    • abundance_error (float):\nThe abundance error.
    • \n
    • is_isotopologue (bool):\nThe isotopologue flag. Defaults to True.
    • \n
    • mspeak_index_mono_isotopic (int):\nThe index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
    • \n
    • mono_isotopic_formula_index (int):\nThe index of the monoisotopic formula in the molecular formula list. Defaults to None.
    • \n
    \n", "bases": "MolecularFormulaBase"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.__init__", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.__init__", "kind": "function", "doc": "

    \n", "signature": "(\t_d_molecular_formula,\tprob_ratio,\tmono_abundance,\tion_charge,\tmspeak_parent=None,\tion_type=None,\tadduct_atom=None)"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.prob_ratio", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.prob_ratio", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.abundance_calc", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.abundance_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.is_isotopologue", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.is_isotopologue", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.mspeak_index_mono_isotopic", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.mspeak_index_mono_isotopic", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.mono_isotopic_formula_index", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.mono_isotopic_formula_index", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.area_error", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.area_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.abundance_error", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.abundance_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula", "kind": "class", "doc": "

    Class for representing a molecular formula associated with a molecule in a LCMS library reference.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_formula (dict, list, str):\nThe molecular formula.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • ion_type (str, optional):\nThe ion type. Defaults to None.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom. Defaults to None.
    • \n
    • mspeak_parent (object, optional):\nThe parent mass spectrum peak object instance. Defaults to None.
    • \n
    • name (str, optional):\nThe name of the reference molecule. Defaults to None.
    • \n
    • kegg_id (str, optional):\nThe KEGG ID of the reference molecule. Defaults to None.
    • \n
    • cas (str, optional):\nThe CAS number of the reference molecule. Defaults to None.
    • \n
    \n", "bases": "MolecularFormulaBase"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula.__init__", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmolecular_formula,\tion_charge,\tion_type=None,\tadduct_atom=None,\tmspeak_parent=None,\tname=None,\tkegg_id=None,\tcas=None)"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula.name", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula.kegg_id", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula.kegg_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula.cas", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula.cas", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormula", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormula", "kind": "class", "doc": "

    General class for representing a molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_formula (dict, list, str):\nThe molecular formula.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • ion_type (str, optional):\nThe ion type. Defaults to None.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom. Defaults to None.
    • \n
    • mspeak_parent (object, optional):\nThe parent mass spectrum peak object instance. Defaults to None.
    • \n
    • external_mz (float, optional):\nThe external m/z value. Defaults to False.
    • \n
    \n", "bases": "MolecularFormulaBase"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormula.__init__", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormula.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmolecular_formula,\tion_charge,\tion_type=None,\tadduct_atom=None,\tmspeak_parent=None,\texternal_mz=False)"}, {"fullname": "corems.molecular_formula.input", "modulename": "corems.molecular_formula.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref", "modulename": "corems.molecular_formula.input.masslist_ref", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy", "kind": "class", "doc": "

    Proxy class for MolecularFormulaLink to be used in the molecular formula ref file import

    \n\n
    Parameters
    \n\n
      \n
    • molecular_formula (MolecularFormula | LCMSLibRefMolecularFormula):\ncorems MolecularFormula or LCMSLibRefMolecularFormula object
    • \n
    • mz (float):\ntarget m/z
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • C (int):\nnumber of carbon atoms
    • \n
    • H (int):\nnumber of hydrogen atoms
    • \n
    • H_C (float):\nratio of hydrogen to carbon atoms
    • \n
    • class_label (str):\nmolecular formula class label
    • \n
    • mz_calc (float):\ncalculated m/z
    • \n
    • dbe (int):\ndouble bond equivalent
    • \n
    • formula_dict (dict):\nmolecular formula dictionary
    • \n
    \n\n
    Methods
    \n\n
      \n
    • to_dict(). \nreturn molecular formula dictionary
    • \n
    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.__init__", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.__init__", "kind": "function", "doc": "

    \n", "signature": "(molecular_formula, mz)"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.C", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.H", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.H", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.H_C", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.H_C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.class_label", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.class_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.mz_calc", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.mz_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.dbe", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.dbe", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.formula_dict", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.formula_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.to_dict", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.to_dict", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef", "kind": "class", "doc": "

    Import Mass List from Reference File

    \n\n
    Parameters
    \n\n
      \n
    • ref_file_location (str):\npath to the reference file
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • ref_file_location (str):\npath to the reference file
    • \n
    \n\n
    Methods
    \n\n
      \n
    • molecular_formula_ref(mz, molecular_formula). \nReturn MolecularFormulaLinkProxy object
    • \n
    • from_lcms_lib_file(ion_charge, ion_types).\nReturn Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file
    • \n
    • from_bruker_ref_file().\nReturn List[MolecularFormula] from Bruker reference file
    • \n
    • from_corems_ref_file(delimiter).\nReturn List[MolecularFormula] from CoreMS reference file
    • \n
    • split(delimiters, string, maxsplit).\nSplits a string using a list of delimiters.
    • \n
    • mformula_s_to_dict(s_mformulatring, iontype).\nConverts a molecular formula string to a dict
    • \n
    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.__init__", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.__init__", "kind": "function", "doc": "

    \n", "signature": "(ref_file_location)"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.ref_file_location", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.ref_file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.molecular_formula_ref", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.molecular_formula_ref", "kind": "function", "doc": "

    Instantiate a MolecularFormulaLinkProxy object

    \n\n
    Parameters
    \n\n
      \n
    • mz (float):\ntarget m/z
    • \n
    • molecular_formula (MolecularFormula | LCMSLibRefMolecularFormula):\ncorems MolecularFormula or LCMSLibRefMolecularFormula object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MolecularFormulaLinkProxy: MolecularFormulaLinkProxy object
    • \n
    \n", "signature": "(self, mz, molecular_formula):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.from_lcms_lib_file", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.from_lcms_lib_file", "kind": "function", "doc": "

    Create a dictionary of LCMSLibRefMolecularFormula objects from LCMS library reference file

    \n\n
    Parameters
    \n\n
      \n
    • ion_charge (float):\nion charge
    • \n
    • ion_types (List[str]):\nlist of ion types
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Dict: Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file. m/z is the target m/z; standard_name is the name of the molecular standard mix; MolecularFormula is the corems molecular formula class
    • \n
    \n", "signature": "(\tself,\tion_charge: float,\tion_types: List[str]) -> Dict[str, Dict[float, List[corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula]]]:", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.from_bruker_ref_file", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.from_bruker_ref_file", "kind": "function", "doc": "

    Create a list of MolecularFormula objects from Bruker reference file

    \n\n
    Returns
    \n\n
      \n
    • List[MolecularFormula]: List of MolecularFormula objects from Bruker reference file
    • \n
    \n", "signature": "(\tself) -> List[corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormula]:", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.from_corems_ref_file", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.from_corems_ref_file", "kind": "function", "doc": "

    Create a list of MolecularFormula objects from CoreMS reference file

    \n\n

    Not being used

    \n\n
    Parameters
    \n\n
      \n
    • delimiter (str):\ndelimiter used in the reference file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • List[MolecularFormula]: List of MolecularFormula objects from CoreMS reference file
    • \n
    \n", "signature": "(self, delimiter='\\t'):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.split", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.split", "kind": "function", "doc": "

    Splits a string using a list of delimiters.

    \n\n

    Does not work when formula has atoms with same characters, i.e - C10H21NNa

    \n\n
    Parameters
    \n\n
      \n
    • delimiters (list):\nlist of delimiters
    • \n
    • string (str):\nstring to be split
    • \n
    • maxsplit (int, optional):\nmaximum number of splits. Default is 0
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: list of strings obtained after splitting the string
    • \n
    • list: list of counts obtained after splitting the string
    • \n
    \n", "signature": "(self, delimiters, string, maxsplit=0):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.mformula_s_to_dict", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.mformula_s_to_dict", "kind": "function", "doc": "

    Converts a molecular formula string to a dict

    \n\n
    Parameters
    \n\n
      \n
    • s_mformulatring (str):\nmolecular formula string, i.e. 'C10H21NNa'
    • \n
    • iontype (str, optional):\nion type. Default is 'unknown'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: molecular formula dictionary
    • \n
    \n\n
    Notes
    \n\n

    Does not work if the atomic mass number is passed i.e. 37Cl, 81Br, convention follow the light isotope labeling 35Cl is Cl, 12C is C, etc.\nIf you need to use heavy isotopes please use another reference file format that separate the formula string by a blank space and parse it using the function corems_ref_file

    \n\n
    Raises
    \n\n
      \n
    • TypeError: Atom does not exist in Atoms.atoms_order list
    • \n
    • Exception: Empty molecular formula
    • \n
    \n", "signature": "(self, s_mformulatring, iontype='unknown'):", "funcdef": "def"}, {"fullname": "corems.molecular_id", "modulename": "corems.molecular_id", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc", "modulename": "corems.molecular_id.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.ClusterFilter", "modulename": "corems.molecular_id.calc.ClusterFilter", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter", "kind": "class", "doc": "

    Class for filtering and clustering mass spectra data using various algorithms.

    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nMass spectrum object.
    • \n
    • ms_peaks (list):\nList of mass peaks.
    • \n
    • ms_peak_indexes (list):\nList of peak indexes.
    • \n
    • min_samples (int):\nMinimum number of samples in a cluster.
    • \n
    • eps (float):\nThe maximum distance between two samples for one to be considered as in the neighborhood of the other.
    • \n
    • bandwidth (float):\nBandwidth used in MeanShift algorithm.
    • \n
    • quantile (float):\nQuantile used in estimate_bandwidth function.
    • \n
    • n_samples (int):\nNumber of samples used in estimate_bandwidth function.
    • \n
    • bin_seeding (bool):\nIf true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized.
    • \n
    • min_peaks_per_class (int):\nMinimum number of peaks per class.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_mass_error_matrix_data(ms_peaks).\nGet the mass error matrix data from a list of mass peaks.
    • \n
    • get_kendrick_matrix_data(mass_spectrum).\nGet the Kendrick matrix data from a mass spectrum.
    • \n
    • filter_kendrick(mass_spectrum).\nFilter the mass spectrum data using the Kendrick algorithm.
    • \n
    • filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj).\nFilter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
    • \n
    • remove_assignment_by_mass_error(mass_spectrum).\nRemove assignments from the mass spectrum based on mass error.
    • \n
    \n"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.get_mass_error_matrix_data", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.get_mass_error_matrix_data", "kind": "function", "doc": "

    Get the mass error matrix data from a list of mass peaks.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peaks (list):\nList of mass peaks.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matrix_data (ndarray):\nMatrix data containing mass and error values.
    • \n
    • list_indexes_mass_spec (list):\nList of indexes of mass peaks in the original mass spectrum.
    • \n
    \n", "signature": "(self, ms_peaks):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.get_kendrick_matrix_data", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.get_kendrick_matrix_data", "kind": "function", "doc": "

    Get the Kendrick matrix data from a mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nMass spectrum object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matrix_data (ndarray):\nMatrix data containing Kendrick mass and Kendrick mass defect values.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.filter_kendrick", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.filter_kendrick", "kind": "function", "doc": "

    Filter the mass spectrum data using the Kendrick algorithm.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nMass spectrum object.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.filter_kendrick_by_index", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.filter_kendrick_by_index", "kind": "function", "doc": "

    Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peak_indexes (list):\nList of peak indexes.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nMass spectrum object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • noise_idx (list):\nList of indexes of noise points in the mass spectrum.
    • \n
    \n", "signature": "(self, ms_peak_indexes, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.remove_assignment_by_mass_error", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.remove_assignment_by_mass_error", "kind": "function", "doc": "

    Remove assignments from the mass spectrum based on mass error.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nMass spectrum object.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.KendrickGroup", "modulename": "corems.molecular_id.calc.KendrickGroup", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.MolecularFilter", "modulename": "corems.molecular_id.calc.MolecularFilter", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.MolecularFilter.MolecularFormulaSearchFilters", "modulename": "corems.molecular_id.calc.MolecularFilter", "qualname": "MolecularFormulaSearchFilters", "kind": "class", "doc": "

    Class containing static methods for filtering molecular formulas in a mass spectrum.

    \n\n
    Methods
    \n\n
      \n
    • filter_kendrick(ms_peak_indexes, mass_spectrum_obj).
      \nApply Kendrick filter to the mass spectrum.
    • \n
    • check_min_peaks(ms_peak_indexes, mass_spectrum_obj).
      \nCheck if the number of peaks per class meets the minimum requirement.
    • \n
    • filter_isotopologue(ms_peak_indexes, mass_spectrum_obj).
      \nApply isotopologue filter to the mass spectrum.
    • \n
    \n"}, {"fullname": "corems.molecular_id.calc.MolecularFilter.MolecularFormulaSearchFilters.filter_kendrick", "modulename": "corems.molecular_id.calc.MolecularFilter", "qualname": "MolecularFormulaSearchFilters.filter_kendrick", "kind": "function", "doc": "

    Apply Kendrick filter to the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • filtered_ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects after applying the Kendrick filter.
    • \n
    \n", "signature": "(ms_peak_indexes, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.MolecularFilter.MolecularFormulaSearchFilters.check_min_peaks", "modulename": "corems.molecular_id.calc.MolecularFilter", "qualname": "MolecularFormulaSearchFilters.check_min_peaks", "kind": "function", "doc": "

    Check if the number of peaks per class meets the minimum requirement.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n", "signature": "(ms_peak_indexes, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.MolecularFilter.MolecularFormulaSearchFilters.filter_isotopologue", "modulename": "corems.molecular_id.calc.MolecularFilter", "qualname": "MolecularFormulaSearchFilters.filter_isotopologue", "kind": "function", "doc": "

    Apply isotopologue filter to the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • filtered_ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects after applying the isotopologue filter.
    • \n
    \n", "signature": "(ms_peak_indexes, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.methods_name", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "methods_name", "kind": "variable", "doc": "

    \n", "default_value": "{'chebyshev_distance': 'Chebyshev Distance', 'squared_euclidean_distance': 'Squared Euclidean Distance', 'fidelity_similarity': 'Fidelity Similarity', 'matusita_distance': 'Matusita Distance', 'squared_chord_distance': 'Squared-chord Distance', 'harmonic_mean_similarity': 'Harmonic mean Distance', 'Pearson_chi_squared_distance': 'Pearson Chi Squared Distance', 'Neyman_chi_squared_distance': 'Neyman Chi Squared Distance', 'probabilistic_symmetric_chi_squared_distance': 'Probabilistic symmetric X2 Distance', 'topsoe_distance': 'Topsoe Distance', 'chernoff_distance': 'Chernoff Distance', 'ruzicka_distance': 'Ruzicka Distance', 'roberts_distance': 'Roberts Distance', 'motyka_distance': 'Motyka Distance', 'canberra_distance': 'Canberra Distance', 'canberra_metric': 'Canberra Metric', 'kulczynski_1_distance': 'Kulczynski 1 Distance', 'lorentzian_distance': 'Lorentzian Distance', 'clark_distance': 'Clark Distance', 'hellinger_distance': 'Hellinger Distance', 'whittaker_index_of_association_distance': 'Whittaker index of association Distance', 'spectral_contrast_angle_distance': 'Spectral Contrast Angle', 'wave_hedges_distance': 'Wave Hedges Distance', 'dice_similarity': 'Dice Similarity', 'inner_product_distance': 'Inner Product Distance', 'divergence_distance': 'Divergence Distance', 'jensen_difference_distance': 'Jensen Differences Distance', 'kumar_johnson_distance': 'Kumar Johnson Distance', 'avg_l_distance': 'Avg (L1, L8) Distance', 'vicis_wave_hadges_distance': 'Vicis Wave Hadges Distance', 'vicis_symmetric_chi_squared_1_distance': 'Vicis-Symmetric X2 1 Distance', 'vicis_symmetric_chi_squared_2_distance': 'Vicis-Symmetric X2 2 Distance', 'vicis_symmetric_chi_squared_3_distance': 'Vicis-Symmetric X2 3 Distance', 'max_symmetric_chi_squared_distance': 'Max Symmetric Chi Squared Distance', 'min_symmetric_chi_squared_distance': 'Min Symmetric Chi Squared Distance', 'additive_sym_chi_sq': 'Additive Symmetric Chi Squared', 'bhattacharya_distance': 'Battacharya Distance', 'generalized_ochiai_index': 'Generalized Ochiai Index', 'gower_distance': 'Gower Distance', 'impr_sqrt_cosine_sim': 'Improved Square Root Cosine Similarity', 'intersection_sim': 'Intersection Similarity', 'j_divergence': 'J Divergence', 'jensen_shannon_index': 'Jensen Shannon Index', 'k_divergence': 'K Divergence', 'VW6': 'VW6', 'VW5': 'VW5', 'VW4': 'VW4', 'VW3': 'VW3', 'VW2': 'VW2', 'VW1': 'VW1', 'taneja_divergence': 'Taneja Divergence', 'symmetric_chi_squared_distance': 'Symmetric Chi Squared Distance', 'squared_chi_squared_distance': 'Squared Chi Squared Distance', 'square_root_cosine_correlation': 'Square Root Cosine Correlation', 'sorensen_distance': 'Sorensen Distance', 'Minokowski_3': 'Minokowski 3 Distance', 'Minokowski_4': 'Minokowski 4 Distance', 'kumarjohnson_divergence': 'Kumar Johnson Divergence', 'kumarhassebrook_similarity': 'Kumar Hassebrook Similarity', 'kullbackleibler_divergence': 'Kullback Leibler Divergence', 'soergel_distance': 'Soergel Distance'}"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.methods_scale", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "methods_scale", "kind": "variable", "doc": "

    \n", "default_value": "{'entropy': [0, 1.3862943611198906], 'weighted_entropy': [0, 1.3862943611198906], 'absolute_value': [0, 2], 'avg_l': [0, 1.5], 'bhattacharya_1': [0, 2.4674011002723395], 'bhattacharya_2': [0, inf], 'canberra': [0, inf], 'clark': [0, inf], 'divergence': [0, inf], 'euclidean': [0, 1.4142135623730951], 'hellinger': [0, inf], 'improved_similarity': [0, inf], 'lorentzian': [0, inf], 'manhattan': [0, 2], 'matusita': [0, 1.4142135623730951], 'mean_character': [0, 2], 'motyka': [-0.5, 0], 'ms_for_id': [-inf, 0], 'ms_for_id_v1': [0, inf], 'pearson_correlation': [-1, 1], 'penrose_shape': [0, 1.4142135623730951], 'penrose_size': [0, inf], 'probabilistic_symmetric_chi_squared': [0, 1], 'similarity_index': [0, inf], 'squared_chord': [0, 2], 'squared_euclidean': [0, 2], 'symmetric_chi_squared': [0, 0.7071067811865476], 'topsoe': [0, 1.4142135623730951], 'vicis_symmetric_chi_squared_3': [0, 2], 'wave_hedges': [0, inf], 'whittaker_index_of_association': [0, inf]}"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity", "kind": "class", "doc": "

    Class containing methods for calculating spectral similarity between two mass spectra.

    \n\n
    Parameters
    \n\n
      \n
    • ms_mz_abun_dict (dict):\nDictionary of mass to abundance values for the experimental mass spectrum.
    • \n
    • ref_obj (dict):\nDictionary of mass to abundance values for the reference mass spectrum.
    • \n
    • norm_func (function):\nFunction to normalize the abundance values.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • normalize_func (function):\nFunction to normalize the abundance values.
    • \n
    • ms_mz_abun_dict (dict):\nDictionary of mass to abundance values for the experimental mass spectrum.
    • \n
    • ref_obj (dict):\nDictionary of mass to abundance values for the reference mass spectrum.
    • \n
    • exp_abun (list):\nList of abundance values for the experimental mass spectrum.
    • \n
    • exp_mz (list):\nList of mass values for the experimental mass spectrum.
    • \n
    • ref_mz (list):\nList of mass values for the reference mass spectrum.
    • \n
    • ref_abun (list):\nList of abundance values for the reference mass spectrum.
    • \n
    • ref_mz_abun_dict (dict):\nDictionary of mass to abundance values for the reference mass spectrum.
    • \n
    • df (DataFrame):\nDataFrame containing the experimental and reference mass spectrum data.
    • \n
    • zero_filled_u_l (tuple):\nTuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    • \n
    • common_mz_values (list):\nList of common mass values between the experimental and reference mass spectra.
    • \n
    • n_x_y (int):\nNumber of common mass values between the experimental and reference mass spectra.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • nan_fill(df, fill_with=0).\nFill missing mass values with a given value.
    • \n
    • normalize(x, y, norm_func=sum).\nNormalize the abundance values.
    • \n
    • weighted_cosine_correlation(a=0.5, b=1.3, nanfill=1e-10).\nCalculate the weighted cosine correlation between the experimental and reference mass spectra.
    • \n
    • cosine_correlation().\nCalculate the cosine correlation between the experimental and reference mass spectra.
    • \n
    • stein_scott().\nCalculate the Stein-Scott similarity between the experimental and reference mass spectra.
    • \n
    • pearson_correlation().\nCalculate the Pearson correlation between the experimental and reference mass spectra.
    • \n
    • spearman_correlation().\nCalculate the Spearman correlation between the experimental and reference mass spectra.
    • \n
    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.__init__", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.__init__", "kind": "function", "doc": "

    \n", "signature": "(ms_mz_abun_dict, ref_obj, norm_func=<built-in function sum>)"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.normalize_func", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.normalize_func", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ms_mz_abun_dict", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ms_mz_abun_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ref_obj", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ref_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.exp_abun", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.exp_abun", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.exp_mz", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.exp_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ref_mz", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ref_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ref_abun", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ref_abun", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ref_mz_abun_dict", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ref_mz_abun_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.df", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.df", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.zero_filled_u_l", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.zero_filled_u_l", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.common_mz_values", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.common_mz_values", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.n_x_y", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.n_x_y", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.nan_fill", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.nan_fill", "kind": "function", "doc": "

    Fill missing mass values with a given value.

    \n\n
    Parameters
    \n\n
      \n
    • df (DataFrame):\nDataFrame containing the experimental and reference mass spectrum data.
    • \n
    • fill_with (float):\nValue to fill missing mass values with.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • x (list):\nList of abundance values for the experimental mass spectrum.
    • \n
    • y (list):\nList of abundance values for the reference mass spectrum.
    • \n
    \n", "signature": "(self, df, fill_with=0):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.normalize", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.normalize", "kind": "function", "doc": "

    Normalize the abundance values.

    \n\n
    Parameters
    \n\n
      \n
    • x (list):\nList of abundance values for the experimental mass spectrum.
    • \n
    • y (list):\nList of abundance values for the reference mass spectrum.
    • \n
    • norm_func (function):\nFunction to normalize the abundance values.\nDefault is sum
    • \n
    \n\n
    Returns
    \n\n
      \n
    • u_l (tuple):\nTuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    • \n
    \n", "signature": "(self, x, y, norm_func=<built-in function sum>):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.weighted_cosine_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.weighted_cosine_correlation", "kind": "function", "doc": "

    Calculate the weighted cosine correlation between the experimental and reference mass spectra.

    \n\n
    Parameters
    \n\n
      \n
    • a (float):\nWeighting factor for the abundance values.\nDefault is 0.5
    • \n
    • b (float):\nWeighting factor for the mass values.\nDefault is 1.3
    • \n
    • nanfill (float):\nValue to fill missing mass values with.\nDefault is 1e-10
    • \n
    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nWeighted cosine correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self, a=0.5, b=1.3, nanfill=1e-10):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.cosine_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.cosine_correlation", "kind": "function", "doc": "

    Calculate the cosine correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nCosine correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.stein_scott", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.stein_scott", "kind": "function", "doc": "

    Calculate the Stein-Scott similarity between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • s_ss_x_y (float):\nStein-Scott similarity between the experimental and reference mass spectra.
    • \n
    • s_ss_x_y_nist (float):\nStein-Scott similarity between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.pearson_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.pearson_correlation", "kind": "function", "doc": "

    Calculate the Pearson correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nPearson correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.spearman_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.spearman_correlation", "kind": "function", "doc": "

    Calculate the Spearman correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • coorelation (float):\nSpearman correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.kendall_tau", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.kendall_tau", "kind": "function", "doc": "

    Calculate the Kendall's tau correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nKendall's tau correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.dft_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.dft_correlation", "kind": "function", "doc": "

    Calculate the DFT correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nDFT correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.dwt_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.dwt_correlation", "kind": "function", "doc": "

    Calculate the DWT correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nDWT correlation between the experimental and reference mass spectra.
    • \n
    \n\n
    Notes
    \n\n

    This function requires the PyWavelets library to be installed. \n This is not a default requirement as this function is not widely used.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.euclidean_distance", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.euclidean_distance", "kind": "function", "doc": "

    Calculate the Euclidean distance between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nEuclidean distance between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.manhattan_distance", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.manhattan_distance", "kind": "function", "doc": "

    Calculate the Manhattan distance between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nManhattan distance between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.jaccard_distance", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.jaccard_distance", "kind": "function", "doc": "

    Calculate the Jaccard distance between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nJaccard distance between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.extra_distances", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.extra_distances", "kind": "function", "doc": "

    Function to calculate distances using additional metrics defined in math_distance.py

    \n\n

    Currently, calculates all distances.

    \n\n
    Returns
    \n\n
      \n
    • dict_res (dict):\nDictionary containing the distances between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance", "modulename": "corems.molecular_id.calc.math_distance", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.math_distance.entropy_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "entropy_distance", "kind": "function", "doc": "

    Calculate entropy distance between two vectors

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Entropy distance between v and y
    • \n
    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.weighted_entropy_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "weighted_entropy_distance", "kind": "function", "doc": "

    Calculate weighted entropy distance between two vectors

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Weighted entropy distance between v and y
    • \n
    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.chebyshev_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "chebyshev_distance", "kind": "function", "doc": "

    Chebyshev distance

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Chebyshev distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\underset{i}{\\max}{(|v_{i}\\ -\\ y_{i}|)}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.squared_euclidean_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "squared_euclidean_distance", "kind": "function", "doc": "

    Squared Euclidean distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Squared Euclidean distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum(v_{i}-y_{i})^2$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.fidelity_similarity", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "fidelity_similarity", "kind": "function", "doc": "

    Fidelity similarity:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Fidelity similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\sqrt{v_{i}y_{i}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.matusita_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "matusita_distance", "kind": "function", "doc": "

    Matusita distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Matusita distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt{\\sum(\\sqrt{v_{i}}-\\sqrt{y_{i}})^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.squared_chord_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "squared_chord_distance", "kind": "function", "doc": "

    Squared-chord distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Squared-chord distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum(\\sqrt{v_{i}}-\\sqrt{y_{i}})^2$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.bhattacharya_1_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "bhattacharya_1_distance", "kind": "function", "doc": "

    Bhattacharya 1 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Bhattacharya 1 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$(\\arccos{(\\sum\\sqrt{v_{i}y_{i}})})^2$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.bhattacharya_2_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "bhattacharya_2_distance", "kind": "function", "doc": "

    Bhattacharya 2 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Bhattacharya 2 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$-\\ln{(\\sum\\sqrt{v_{i}y_{i}})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.harmonic_mean_similarity", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "harmonic_mean_similarity", "kind": "function", "doc": "

    Harmonic mean similarity:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Harmonic mean similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$#1-2\\sum(\\frac{v_{i}y_{i}}{v_{i}+y_{i}})\n2\\sum(\\frac{v_{i}y_{i}}{v_{i}+y_{i}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.chernoff_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "chernoff_distance", "kind": "function", "doc": "

    Chernoff distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Chernoff distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\max{(-ln\\sum(v_{i}^ty_{i}^{1-t})^{1-t})},\\ t=0.1,\\ 0\\le\\ t<1$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.ruzicka_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "ruzicka_distance", "kind": "function", "doc": "

    Ruzicka distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Ruzicka distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{\\sum{|v_{i}-y_{i}|}}{\\sum{\\max(v_{i},y_{i})}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.roberts_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "roberts_distance", "kind": "function", "doc": "

    Roberts distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Roberts distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1-\\sum\\frac{(v_{i}+y_{i})\\frac{\\min{(v_{i},y_{i})}}{\\max{(v_{i},y_{i})}}}{\\sum(v_{i}+y_{i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.intersection_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "intersection_distance", "kind": "function", "doc": "

    Intersection distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Intersection distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1-\\frac{\\sum\\min{(v_{i},y_{i})}}{\\min(\\sum{v_{i},\\sum{y_{i})}}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.motyka_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "motyka_distance", "kind": "function", "doc": "

    Motyka distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Motyka distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$-\\frac{\\sum\\min{(y_{i},v_{i})}}{\\sum(y_{i}+v_{i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.canberra_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "canberra_distance", "kind": "function", "doc": "

    Canberra distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Canberra distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$#\\sum\\frac{|v_{i}-y_{i}|}{|v_{i}|+|y_{i}|}\n\\sum_{i}\\frac{|y_{i} - v_{i}|}{y_{i} + v_{i}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.canberra_metric", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "canberra_metric", "kind": "function", "doc": "

    Canberra Metric

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Canberra metric between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{\\sum_{i}I(v_{i}\\neq 0)}\\sum_{i}\\frac{|y_{i}-v_{i}|}{(y_{i}+v_{i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kulczynski_1_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kulczynski_1_distance", "kind": "function", "doc": "

    Kulczynski 1 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Kulczynski 1 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{\\sum{|v_i}-y_i|}{\\sum m\\ i\\ n\\ (v_i,y_i)}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.baroni_urbani_buser_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "baroni_urbani_buser_distance", "kind": "function", "doc": "

    Baroni-Urbani-Buser distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Baroni-Urbani-Buser distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1-\\frac{\\sum\\min{(v_i,y_i)}+\\sqrt{\\sum\\min{(v_i,y_i)}\\sum(\\max{(v)}-\\max{(v_i,y_i)})}}{\\sum{\\max{(v_i,y_i)}+\\sqrt{\\sum{\\min{(v_i,y_i)}\\sum(\\max{(v)}-\\max{(v_i,y_i)})}}}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.penrose_size_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "penrose_size_distance", "kind": "function", "doc": "

    Penrose size distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Penrose size distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt N\\sum{|y_i-v_i|}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.mean_character_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "mean_character_distance", "kind": "function", "doc": "

    Mean character distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Mean character distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{N}\\sum{|y_i-v_i|}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.lorentzian_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "lorentzian_distance", "kind": "function", "doc": "

    Lorentzian distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Lorentzian distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum{\\ln(1+|v_i-y_i|)}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.penrose_shape_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "penrose_shape_distance", "kind": "function", "doc": "

    Penrose shape distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Penrose shape distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt{\\sum((v_i-\\bar{v})-(y_i-\\bar{y}))^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.clark_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "clark_distance", "kind": "function", "doc": "

    Clark distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Clark distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$#(\\frac{1}{N}\\sum(\\frac{v_i-y_i}{|v_i|+|y_i|})^2)^\\frac{1}{2}\n\\sqrt{\\sum(\\frac{|v_i-y_i|}{v_i+y_i})^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.hellinger_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "hellinger_distance", "kind": "function", "doc": "

    Hellinger distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Hellinger distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$#\\sqrt{2\\sum(\\sqrt{\\frac{v_i}{\\bar{v}}}-\\sqrt{\\frac{y_i}{\\bar{y}}})^2}\n\\sqrt{2\\sum(\\sqrt{v_i}-\\sqrt{y_i})^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.whittaker_index_of_association_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "whittaker_index_of_association_distance", "kind": "function", "doc": "

    Whittaker index of association distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Whittaker index of association distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{2}\\sum|\\frac{v_i}{\\bar{v}}-\\frac{y_i}{\\bar{y}}|$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.similarity_index_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "similarity_index_distance", "kind": "function", "doc": "

    Similarity Index Distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Similarity Index Distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt{\\frac{\\sum{\\frac{v_i-y_i}{y_i}}^2}{N}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.improved_similarity_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "improved_similarity_distance", "kind": "function", "doc": "

    Improved Similarity Index:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Improved Similarity Index between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt{\\frac{1}{N}\\sum{\\frac{y_i-v_i}{y_i+v_i}}^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.absolute_value_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "absolute_value_distance", "kind": "function", "doc": "

    Absolute Value Distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Absolute Value Distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac { \\sum(|y_i-v_i|)}{\\sum v_i}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.spectral_contrast_angle_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "spectral_contrast_angle_distance", "kind": "function", "doc": "

    Spectral Contrast Angle:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Spectral Contrast Angle between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1 - \\frac{\\sum{y_iv_i}}{\\sqrt{\\sum y_i^2\\sum v_i^2}}\n\\arccos(\\frac{\\sum_{P}y_{p}^* v_{p}^}{\\sqrt{\\sum_{P}y_{p}^{2} \\sum_{P}v_{p}^{*2}}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.wave_hedges_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "wave_hedges_distance", "kind": "function", "doc": "

    Wave Hedges distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Wave Hedges distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{|v_i-y_i|}{\\max{(v_i,y_i)}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.dice_similarity", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "dice_similarity", "kind": "function", "doc": "

    Dice similarity:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Dice similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{\\sum(v_i-y_i)^2}{\\sum v_i^2+\\sum y_i^2}\n\\frac{2 * \\sum_{i}v_{i}y_{i}}{\\sum_{i}y_{i}^2 + \\sum_{i}v_{i}^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.inner_product_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "inner_product_distance", "kind": "function", "doc": "

    Inner Product distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Inner product distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1-\\sum{v_iy_i}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.divergence_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "divergence_distance", "kind": "function", "doc": "

    Divergence distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Divergence distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$2\\sum\\frac{(v_i-y_i)^2}{(v_i+y_i)^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.jensen_difference_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "jensen_difference_distance", "kind": "function", "doc": "

    Jensen difference:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Jensen difference distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum[\\frac{1}{2}(v_i\\ln{v_i}+y_i\\ln{y_i})-(\\frac{v_i+y_i}{2})\\ln{(\\frac{v_i+y_i}{2})}]$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kumar_johnson_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kumar_johnson_distance", "kind": "function", "doc": "

    Kumar-Johnson distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Kumar Johnson distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{(v_i^2-y_i^2)^2}{2(v_iy_i)^\\frac{3}{2}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.avg_l_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "avg_l_distance", "kind": "function", "doc": "

    Avg (L1, L\u221e) distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Average L distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{2}(\\sum|v_i-y_i|+\\underset{i}{\\max}{|v_i-y_i|})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.vicis_wave_hadges_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "vicis_wave_hadges_distance", "kind": "function", "doc": "

    Vicis-Wave Hadges distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Vicis Wave Hadges distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{|v_i-y_i|}{\\min{(v_i,\\ y_i)}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.vicis_symmetric_chi_squared_1_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "vicis_symmetric_chi_squared_1_distance", "kind": "function", "doc": "

    Vicis-Symmetric \u03c72 1 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Vici Symmetric \u03c72 1 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{(v_i-y_i)^2}{\\min{(v_i,y_i)^2}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.vicis_symmetric_chi_squared_2_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "vicis_symmetric_chi_squared_2_distance", "kind": "function", "doc": "

    Vicis-Symmetric \u03c72 2 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Vicis Symmetric \u03c72 2 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{(v_i-y_i)^2}{\\min{(v_i,y_i)}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.vicis_symmetric_chi_squared_3_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "vicis_symmetric_chi_squared_3_distance", "kind": "function", "doc": "

    Vicis-Symmetric \u03c72 3 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Vici Symmetric \u03c72 3 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{(v_i-y_i)^2}{\\max{(v_i,y_i)}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.max_symmetric_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "max_symmetric_chi_squared_distance", "kind": "function", "doc": "

    Max-Symmetric \u03c72 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Max-Symmetric \u03c72 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\max{(\\sum\\frac{(v_i-y_i)^2}{v_i},\\sum\\frac{(v_i-y_i)^2}{y_i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.min_symmetric_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "min_symmetric_chi_squared_distance", "kind": "function", "doc": "

    Min-Symmetric \u03c72 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Min-Symmetric \u03c72 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\min{(\\sum\\frac{(v_i-y_i)^2}{v_i},\\sum\\frac{(v_i-y_i)^2}{y_i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.additive_sym_chi_sq", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "additive_sym_chi_sq", "kind": "function", "doc": "

    Additive Symmetric \u03c72 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Additive Symmetric \u03c72 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum_{i}\\frac{(y_{i} - v_{i})^2(y_{i}+v_{i})}{y_{i}v_{i}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.bhattacharya_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "bhattacharya_distance", "kind": "function", "doc": "

    Bhattacharya Distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Bhattcharya distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$-ln(\\sum_{i}\\sqrt{y_{i}v_{i}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.generalized_ochiai_index", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "generalized_ochiai_index", "kind": "function", "doc": "

    Generalized Ochiai Index

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Generalized Ochiai Index between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1 - \\frac{\\sum_{i}min(y_{i}, v_{i})}{\\sqrt{\\sum_{i}y_{i} \\sum_{i}v_{i}}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.gower_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "gower_distance", "kind": "function", "doc": "

    Gower Distance

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Gower distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{N}\\sum_{i}|y_{i} - v_{i}|$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.impr_sqrt_cosine_sim", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "impr_sqrt_cosine_sim", "kind": "function", "doc": "

    Improved Square Root Cosine Similarity

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Improved Square Root Cosine Similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{\\sum_{i}\\sqrt{y_{i}v_{i}}}{\\sum_{i}\\sqrt{y_{i}}\\sum_{i}\\sqrt{v_{i}}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.intersection_sim", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "intersection_sim", "kind": "function", "doc": "

    Intersection Similarity

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Intersection Similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum_{i}min(y_{i}, v_{i})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.j_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "j_divergence", "kind": "function", "doc": "

    J Divergence

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: J Divergence between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum_{i}(y_{i} - v_{i}) ln(\\frac{y_{i}}{v_{i}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.jensen_shannon_index", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "jensen_shannon_index", "kind": "function", "doc": "

    Jensen-Shannon Index

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Jensen Shannon Index between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{2}[\\sum_{i}y_{i}ln(\\frac{2y_{i}}{y_{i} + v_{i}}) + \\sum_{i}v_{i}ln(\\frac{2v_{i}}{y_{i}+v_{i}})]$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.k_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "k_divergence", "kind": "function", "doc": "

    K-Divergence

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: K-Divergence between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum_{i}y_{i}ln(\\frac{2y_{i}}{y_{i} + v_{i}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.topsoe_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "topsoe_distance", "kind": "function", "doc": "

    Topsoe distance

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Topsoe distance between v and y
    • \n
    • Notes
    • \n
    • -----
    • \n
    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.probabilistic_symmetric_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "probabilistic_symmetric_chi_squared_distance", "kind": "function", "doc": "

    Fixed\n\"I commented out the previous one; please review\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW6", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW6", "kind": "function", "doc": "

    \"appears to be the same as max_symmetric_chi_squared_distance\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW5", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW5", "kind": "function", "doc": "

    \"appears to be the same as max_symmetric_chi_squared_distance\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW4", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW4", "kind": "function", "doc": "

    \"Tecnically the Symmetric chi2 eq63\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW3", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW3", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW2", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW2", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW1", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW1", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.taneja_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "taneja_divergence", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.symmetric_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "symmetric_chi_squared_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.squared_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "squared_chi_squared_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.square_root_cosine_correlation", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "square_root_cosine_correlation", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.sorensen_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "sorensen_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.Pearson_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "Pearson_chi_squared_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.Neyman_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "Neyman_chi_squared_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.Minokowski_3", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "Minokowski_3", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.Minokowski_4", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "Minokowski_4", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kumarjohnson_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kumarjohnson_divergence", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kumarhassebrook_similarity", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kumarhassebrook_similarity", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kullbackleibler_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kullbackleibler_divergence", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.soergel_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "soergel_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory", "modulename": "corems.molecular_id.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL", "modulename": "corems.molecular_id.factory.EI_SQL", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Base", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Base", "kind": "class", "doc": "

    The base class of the class hierarchy.

    \n\n

    When called, it accepts no arguments and returns a new featureless\ninstance that has no instance attributes and cannot be given any.

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Base.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Base.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Base.registry", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Base.registry", "kind": "variable", "doc": "

    \n", "default_value": "<sqlalchemy.orm.decl_api.registry object>"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Base.metadata", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Base.metadata", "kind": "variable", "doc": "

    \n", "default_value": "MetaData()"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar", "kind": "class", "doc": "

    This class is used to store the metadata of the compounds in the database

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe id of the compound.
    • \n
    • cas (str):\nThe CAS number of the compound.
    • \n
    • inchikey (str):\nThe InChiKey of the compound.
    • \n
    • inchi (str):\nThe InChi of the compound.
    • \n
    • chebi (str):\nThe ChEBI ID of the compound.
    • \n
    • smiles (str):\nThe SMILES of the compound.
    • \n
    • kegg (str):\nThe KEGG ID of the compound.
    • \n
    • iupac_name (str):\nThe IUPAC name of the compound.
    • \n
    • traditional_name (str):\nThe traditional name of the compound.
    • \n
    • common_name (str):\nThe common name of the compound.
    • \n
    • data_id (int):\nThe id of the compound in the molecularData table.
    • \n
    • data (LowResolutionEICompound):\nThe compound object.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.cas", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.cas", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.inchikey", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.inchikey", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.inchi", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.inchi", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.chebi", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.chebi", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.smiles", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.smiles", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.kegg", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.kegg", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.iupac_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.iupac_name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.traditional_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.traditional_name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.common_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.common_name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.data_id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.data_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.data", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.data", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound", "kind": "class", "doc": "

    This class is used to store the molecular and spectral data of the compounds in the low res EI database

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe id of the compound.
    • \n
    • name (str):\nThe name of the compound.
    • \n
    • classify (str):\nThe classification of the compound.
    • \n
    • formula (str):\nThe formula of the compound.
    • \n
    • ri (float):\nThe retention index of the compound.
    • \n
    • retention_time (float):\nThe retention time of the compound.
    • \n
    • source (str):\nThe source of the compound.
    • \n
    • casno (str):\nThe CAS number of the compound.
    • \n
    • comment (str):\nThe comment of the compound.
    • \n
    • source_temp_c (float):\nThe source temperature of the spectra.
    • \n
    • ev (float):\nThe electron volts of the spectra.
    • \n
    • peaks_count (int):\nThe number of peaks in the spectra.
    • \n
    • mz (numpy.ndarray):\nThe m/z values of the spectra.
    • \n
    • abundance (numpy.ndarray):\nThe abundance values of the spectra.
    • \n
    • metadatar (Metadatar):\nThe metadata object.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.__init__", "kind": "function", "doc": "

    \n", "signature": "(**dict_data)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.classify", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.classify", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.formula", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.formula", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.ri", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.ri", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.retention_time", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.retention_time", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.source", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.source", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.casno", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.casno", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.comment", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.comment", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.derivativenum", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.derivativenum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.derivatization", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.derivatization", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.source_temp_c", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.source_temp_c", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.ev", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.ev", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.peaks_count", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.peaks_count", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.mz", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.abundance", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.abundance", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.metadatar", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.metadatar", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata", "kind": "class", "doc": "

    Dataclass for the Metabolite Metadata

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe id of the compound.
    • \n
    • cas (str):\nThe CAS number of the compound.
    • \n
    • inchikey (str):\nThe InChiKey of the compound.
    • \n
    • inchi (str):\nThe InChi of the compound.
    • \n
    • chebi (str):\nThe ChEBI ID of the compound.
    • \n
    • smiles (str):\nThe SMILES of the compound.
    • \n
    • kegg (str):\nThe KEGG ID of the compound.
    • \n
    • iupac_name (str):\nThe IUPAC name of the compound.
    • \n
    • traditional_name (str):\nThe traditional name of the compound.
    • \n
    • common_name (str):\nThe common name of the compound.
    • \n
    • data_id (int):\nThe id of the compound in the molecularData table.
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tid: int,\tcas: str,\tinchikey: str,\tinchi: str,\tchebi: str,\tsmiles: str,\tkegg: str,\tdata_id: int,\tiupac_name: str,\ttraditional_name: str,\tcommon_name: str)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.id", "kind": "variable", "doc": "

    \n", "annotation": ": int"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.cas", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.cas", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.inchikey", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.inchikey", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.inchi", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.inchi", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.chebi", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.chebi", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.smiles", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.smiles", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.kegg", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.kegg", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.data_id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.data_id", "kind": "variable", "doc": "

    \n", "annotation": ": int"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.iupac_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.iupac_name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.traditional_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.traditional_name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.common_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.common_name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef", "kind": "class", "doc": "

    Dataclass for the Low Resolution Compound Reference

    \n\n

    This class is used to store the molecular and spectral data of the compounds in the low res EI database

    \n\n
    Parameters
    \n\n
      \n
    • compounds_dict (dict):\nA dictionary representing the compound.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe id of the compound.
    • \n
    • name (str):\nThe name of the compound.
    • \n
    • ri (str):\nThe retention index of the compound.
    • \n
    • retention_time (str):\nThe retention time of the compound.
    • \n
    • casno (str):\nThe CAS number of the compound.
    • \n
    • comment (str):\nThe comment of the compound.
    • \n
    • peaks_count (int):\nThe number of peaks in the spectra.
    • \n
    • classify (str):\nThe classification of the compound.
    • \n
    • derivativenum (str):\nThe derivative number of the compound.
    • \n
    • derivatization (str):\nThe derivatization applied to the compound.
    • \n
    • mz (numpy.ndarray):\nThe m/z values of the spectra.
    • \n
    • abundance (numpy.ndarray):\nThe abundance values of the spectra.
    • \n
    • source_temp_c (float):\nThe source temperature of the spectra.
    • \n
    • ev (float):\nThe electron volts of the spectra.
    • \n
    • formula (str):\nThe formula of the compound.
    • \n
    • source (str):\nThe source of the spectra data.
    • \n
    • classify (str):\nThe classification of the compound.
    • \n
    • metadata (MetaboliteMetadata):\nThe metadata object.
    • \n
    • similarity_score (float):\nThe similarity score of the compound.
    • \n
    • ri_score (float):\nThe RI score of the compound.
    • \n
    • spectral_similarity_score (float):\nThe spectral similarity score of the compound.
    • \n
    • spectral_similarity_scores (dict):\nThe spectral similarity scores of the compound.
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.__init__", "kind": "function", "doc": "

    \n", "signature": "(compounds_dict)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.ri", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.ri", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.retention_time", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.retention_time", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.casno", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.casno", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.comment", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.comment", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.peaks_count", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.peaks_count", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.classify", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.classify", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.derivativenum", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.derivativenum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.derivatization", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.derivatization", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.mz", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.abundance", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.abundance", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.source_temp_c", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.source_temp_c", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.ev", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.ev", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.formula", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.formula", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.source", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.source", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.similarity_score", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.similarity_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.ri_score", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.ri_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.spectral_similarity_score", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.spectral_similarity_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.spectral_similarity_scores", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.spectral_similarity_scores", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite", "kind": "class", "doc": "

    A class for interacting with a SQLite database for low-resolution EI compounds.

    \n\n
    Parameters
    \n\n
      \n
    • url (str, optional):\nThe URL of the SQLite database. Default is 'sqlite://'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • engine (sqlalchemy.engine.Engine):\nThe SQLAlchemy engine for connecting to the database.
    • \n
    • session (sqlalchemy.orm.Session):\nThe SQLAlchemy session for executing database operations.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __init__(self, url='sqlite://').\nInitializes the EI_LowRes_SQLite object.
    • \n
    • __exit__(self, exc_type, exc_val, exc_tb).\nCloses the database connection.
    • \n
    • init_engine(self, url).\nInitializes the SQLAlchemy engine.
    • \n
    • __enter__(self).\nReturns the EI_LowRes_SQLite object.
    • \n
    • add_compound_list(self, data_dict_list).\nAdds a list of compounds to the database.
    • \n
    • add_compound(self, data_dict).\nAdds a single compound to the database.
    • \n
    • commit(self).\nCommits the changes to the database.
    • \n
    • row_to_dict(self, row).\nConverts a database row to a dictionary.
    • \n
    • get_all(self).\nRetrieves all compounds from the database.
    • \n
    • query_min_max_rt(self, min_max_rt).\nQueries compounds based on retention time range.
    • \n
    • query_min_max_ri(self, min_max_ri).\nQueries compounds based on RI range.
    • \n
    • query_names_and_rt(self, min_max_rt, compound_names).\nQueries compounds based on compound names and retention time range.
    • \n
    • query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).\nQueries compounds based on RI range and retention time range.
    • \n
    • delete_compound(self, compound).\nDeletes a compound from the database.
    • \n
    • purge(self).\nDeletes all compounds from the database table.
    • \n
    • clear_data(self).\nClears all tables in the database.
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.__init__", "kind": "function", "doc": "

    \n", "signature": "(url='sqlite://')"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.engine", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.engine", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.session", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.session", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.init_engine", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.init_engine", "kind": "function", "doc": "

    Initializes the SQLAlchemy engine.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nThe URL of the SQLite database.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • sqlalchemy.engine.Engine: The SQLAlchemy engine for connecting to the database.
    • \n
    \n", "signature": "(self, url):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.add_compound_list", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.add_compound_list", "kind": "function", "doc": "

    Adds a list of compounds to the database.

    \n\n
    Parameters
    \n\n
      \n
    • data_dict_list (list of dict):\nA list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self, data_dict_list):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.add_compound", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.add_compound", "kind": "function", "doc": "

    Adds a single compound to the database.

    \n\n
    Parameters
    \n\n
      \n
    • data_dict (dict):\nA dictionary representing the compound.
    • \n
    \n", "signature": "(self, data_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.commit", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.commit", "kind": "function", "doc": "

    Commits the changes to the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.row_to_dict", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.row_to_dict", "kind": "function", "doc": "

    Converts a database row to a dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • row (sqlalchemy.engine.row.Row):\nA row from the database.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary representing the compound.
    • \n
    \n", "signature": "(self, row):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.get_all", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.get_all", "kind": "function", "doc": "

    Retrieves all compounds from the database.

    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.query_min_max_rt", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.query_min_max_rt", "kind": "function", "doc": "

    Queries compounds based on retention time range.

    \n\n
    Parameters
    \n\n
      \n
    • min_max_rt (tuple):\nA tuple containing the minimum and maximum retention time values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self, min_max_rt):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.query_min_max_ri", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.query_min_max_ri", "kind": "function", "doc": "

    Queries compounds based on RI range.

    \n\n
    Parameters
    \n\n
      \n
    • min_max_ri (tuple):\nA tuple containing the minimum and maximum RI values.
    • \n
    \n", "signature": "(self, min_max_ri):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.query_names_and_rt", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.query_names_and_rt", "kind": "function", "doc": "

    Queries compounds based on compound names and retention time range.

    \n\n
    Parameters
    \n\n
      \n
    • min_max_rt (tuple):\nA tuple containing the minimum and maximum retention time values.
    • \n
    • compound_names (list):\nA list of compound names.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self, min_max_rt, compound_names):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.query_min_max_ri_and_rt", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.query_min_max_ri_and_rt", "kind": "function", "doc": "

    Queries compounds based on RI range and retention time range.

    \n\n
    Parameters
    \n\n
      \n
    • min_max_ri (tuple):\nA tuple containing the minimum and maximum RI values.
    • \n
    • min_max_rt (tuple):\nA tuple containing the minimum and maximum retention time values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self, min_max_ri, min_max_rt):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.delete_compound", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.delete_compound", "kind": "function", "doc": "

    Deletes a compound from the database.

    \n\n
    Parameters
    \n\n
      \n
    • compound (LowResolutionEICompound):\nA compound object.
    • \n
    \n", "signature": "(self, compound):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.purge", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.purge", "kind": "function", "doc": "

    Deletes all compounds from the database table.

    \n\n
    Notes
    \n\n

    Careful, this will delete the entire database table.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.clear_data", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.clear_data", "kind": "function", "doc": "

    Clears all tables in the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.profiled", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "profiled", "kind": "function", "doc": "

    A context manager for profiling.

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.insert_database_worker", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "insert_database_worker", "kind": "function", "doc": "

    Inserts data into the database.

    \n", "signature": "(args):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations", "kind": "class", "doc": "

    A class for generating molecular formula combinations.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_search_settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • sql_db (MolForm_SQL):\nThe SQLite database object.
    • \n
    • len_existing_classes (int):\nThe number of existing classes in the SQLite database.
    • \n
    • odd_ch_id (list):\nA list of odd carbon and hydrogen atom IDs.
    • \n
    • odd_ch_dict (list):\nA list of odd carbon and hydrogen atom dictionaries.
    • \n
    • odd_ch_mass (list):\nA list of odd carbon and hydrogen atom masses.
    • \n
    • odd_ch_dbe (list):\nA list of odd carbon and hydrogen atom double bond equivalents.
    • \n
    • even_ch_id (list):\nA list of even carbon and hydrogen atom IDs.
    • \n
    • even_ch_dict (list):\nA list of even carbon and hydrogen atom dictionaries.
    • \n
    • even_ch_mass (list):\nA list of even carbon and hydrogen atom masses.
    • \n
    • even_ch_dbe (list):\nA list of even carbon and hydrogen atom double bond equivalents.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • cProfile_worker(args)
      \nA cProfile worker for the get_mol_formulas function.
    • \n
    • check_database_get_class_list(molecular_search_settings)
      \nChecks if the database has all the classes, if not create the missing classes.
    • \n
    • get_carbonsHydrogens(settings, odd_even)
      \nRetrieves carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.
    • \n
    • add_carbonsHydrogens(settings, existing_classes_objs)
      \nAdds carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.
    • \n
    • runworker(molecular_search_settings)
      \nRuns the molecular formula lookup table worker.
    • \n
    • get_classes_in_order(molecular_search_settings)
      \nGets the classes in order.
    • \n
    • sort_classes(atoms_in_order, combination_dict)
      \nSorts the classes in order.
    • \n
    • get_fixed_initial_number_of_hydrogen(min_h, odd_even)
      \nGets the fixed initial number of hydrogen atoms.
    • \n
    • calc_mz(datadict, class_mass=0)
      \nCalculates the mass-to-charge ratio (m/z) of a molecular formula.
    • \n
    • calc_dbe_class(datadict)
      \nCalculates the double bond equivalent (DBE) of a molecular formula.
    • \n
    • populate_combinations(classe_tuple, settings)
      \nPopulates the combinations.
    • \n
    • get_or_add(SomeClass, kw)
      \nGets or adds a class.
    • \n
    • get_mol_formulas(odd_even_tag, classe_tuple, settings)
      \nGets the molecular formulas.
    • \n
    • get_h_odd_or_even(class_dict)
      \nGets the hydrogen odd or even.
    • \n
    • get_total_halogen_atoms(class_dict)
      \nGets the total number of halogen atoms.
    • \n
    • get_total_hetero_valence(class_dict)\nGets the total valence of heteroatoms other than N, F, Cl, and Br
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.__init__", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.__init__", "kind": "function", "doc": "

    \n", "signature": "(sql_db=None)"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.cProfile_worker", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.cProfile_worker", "kind": "function", "doc": "

    cProfile worker for the get_mol_formulas function

    \n", "signature": "(self, args):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.check_database_get_class_list", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.check_database_get_class_list", "kind": "function", "doc": "

    check if the database has all the classes, if not create the missing classes

    \n\n
    Parameters
    \n\n
      \n
    • molecular_search_settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: list of tuples with the class name and the class dictionary
    • \n
    \n", "signature": "(self, molecular_search_settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_carbonsHydrogens", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_carbonsHydrogens", "kind": "function", "doc": "

    Retrieve carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.

    \n\n
    Parameters
    \n\n
      \n
    • settings (object):\nAn object containing user-defined settings.
    • \n
    • odd_even (str):\nA string indicating whether to retrieve even or odd hydrogen atoms.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of CarbonHydrogen objects that satisfy the specified conditions.
    • \n
    \n", "signature": "(self, settings, odd_even):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.add_carbonsHydrogens", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.add_carbonsHydrogens", "kind": "function", "doc": "

    Add carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.

    \n\n
    Parameters
    \n\n
      \n
    • settings (object):\nAn object containing user-defined settings.
    • \n
    • existing_classes_objs (list):\nA list of HeteroAtoms objects.
    • \n
    \n", "signature": "(self, settings, existing_classes_objs):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.runworker", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.runworker", "kind": "function", "doc": "

    Run the molecular formula lookup table worker.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_search_settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of tuples with the class name and the class dictionary.
    • \n
    \n", "signature": "(*args, **kw):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_classes_in_order", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_classes_in_order", "kind": "function", "doc": "

    Get the classes in order

    \n\n
    Parameters
    \n\n
      \n
    • molecular_search_settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary of classes in order.\nstructure is ('HC', {'HC': 1})
    • \n
    \n", "signature": "(self, molecular_search_settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.sort_classes", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.sort_classes", "kind": "function", "doc": "

    Sort the classes in order

    \n\n
    Parameters
    \n\n
      \n
    • atoms_in_order (list):\nA list of atoms in order.
    • \n
    • combination_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary of classes in order.
    • \n
    \n", "signature": "(atoms_in_order, combination_dict) -> Dict[str, Dict[str, int]]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_fixed_initial_number_of_hydrogen", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_fixed_initial_number_of_hydrogen", "kind": "function", "doc": "

    Get the fixed initial number of hydrogen atoms

    \n\n
    Parameters
    \n\n
      \n
    • min_h (int):\nThe minimum number of hydrogen atoms.
    • \n
    • odd_even (str):\nA string indicating whether to retrieve even or odd hydrogen atoms.
    • \n
    \n", "signature": "(min_h, odd_even):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.calc_mz", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.calc_mz", "kind": "function", "doc": "

    Calculate the mass-to-charge ratio (m/z) of a molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • datadict (dict):\nA dictionary of classes.
    • \n
    • class_mass (int):\nThe mass of the class.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The mass-to-charge ratio (m/z) of a molecular formula.
    • \n
    \n", "signature": "(self, datadict, class_mass=0):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.calc_dbe_class", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.calc_dbe_class", "kind": "function", "doc": "

    Calculate the double bond equivalent (DBE) of a molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • datadict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The double bond equivalent (DBE) of a molecular formula.
    • \n
    \n", "signature": "(self, datadict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.populate_combinations", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.populate_combinations", "kind": "function", "doc": "

    Populate the combinations

    \n\n
    Parameters
    \n\n
      \n
    • classe_tuple (tuple):\nA tuple containing the class name, the class dictionary, and the class ID.
    • \n
    • settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of molecular formula data dictionaries.
    • \n
    \n", "signature": "(self, classe_tuple, settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_or_add", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_or_add", "kind": "function", "doc": "

    Get or add a class

    \n\n
    Parameters
    \n\n
      \n
    • SomeClass (object):\nA class object.
    • \n
    • kw (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • object: A class object.
    • \n
    \n", "signature": "(self, SomeClass, kw):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_mol_formulas", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_mol_formulas", "kind": "function", "doc": "

    Get the molecular formulas

    \n\n
    Parameters
    \n\n
      \n
    • odd_even_tag (str):\nA string indicating whether to retrieve even or odd hydrogen atoms.
    • \n
    • classe_tuple (tuple):

    • \n
    • settings (object):\nAn object containing user-defined settings.

    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of molecular formula data dictionaries.
    • \n
    \n", "signature": "(self, odd_even_tag, classe_tuple, settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_h_odd_or_even", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_h_odd_or_even", "kind": "function", "doc": "

    Get the hydrogen odd or even

    \n\n
    Parameters
    \n\n
      \n
    • class_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: A string indicating whether to retrieve even or odd hydrogen atoms.
    • \n
    \n", "signature": "(self, class_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_total_heteroatoms", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_total_heteroatoms", "kind": "function", "doc": "

    Get the total number of heteroatoms other than N, F, Cl, Br

    \n\n
    Parameters
    \n\n
      \n
    • class_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The total number of heteroatoms.
    • \n
    \n", "signature": "(class_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_total_hetero_valence", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_total_hetero_valence", "kind": "function", "doc": "

    Get the total valence of heteroatoms other than N, F, Cl, Br

    \n\n
    Parameters
    \n\n
      \n
    • class_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The total heteroatom valence.
    • \n
    \n", "signature": "(class_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_total_halogen_atoms", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_total_halogen_atoms", "kind": "function", "doc": "

    Get the total number of halogen atoms

    \n\n
    Parameters
    \n\n
      \n
    • class_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The total number of halogen atoms.
    • \n
    \n", "signature": "(class_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification", "modulename": "corems.molecular_id.factory.classification", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.flatten_list", "modulename": "corems.molecular_id.factory.classification", "qualname": "flatten_list", "kind": "function", "doc": "

    \n", "signature": "(l):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification", "kind": "class", "doc": "

    Class for grouping mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..)

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • choose_molecular_formula (bool, optional):\nIf True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered. Default is True.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If no molecular formula is associated with any mspeak objects.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _ms_grouped_class (dict):\nA dictionary of classes and a list of ms_peak objects.
    • \n
    • choose_mf (bool):\nIf True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered.
    • \n
    • total_peaks (int):\nThe total number of peaks.
    • \n
    • sum_abundance (float):\nThe sum of the abundance of all peaks.
    • \n
    • min_max_mz (tuple):\nThe minimum and maximum mz values.
    • \n
    • min_max_abundance (tuple):\nThe minimum and maximum abundance values.
    • \n
    • min_ppm_error (float):\nThe minimum ppm error.
    • \n
    • max_ppm_error (float):\nThe maximum ppm error.
    • \n
    • all_identified_atoms (list):\nA list of all identified atoms.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __len__().\nReturns the number of classes.
    • \n
    • __getitem__(classe)
      \nReturns the molecular formula list for specified class.
    • \n
    • __iter__().\nReturns an iterator over the keys of the dictionary.
    • \n
    • get_classes(threshold_perc=1, isotopologue=True).\nReturns a list of classes with abundance percentile above threshold.
    • \n
    • molecular_formula_string(classe).\nReturns a list of molecular formula string for specified class.
    • \n
    • molecular_formula(classe).\nReturns a list of molecular formula for specified class.
    • \n
    • carbon_number(classe).\nReturns a list of carbon number for specified class.
    • \n
    • atom_count(atom, classe).\nReturns a list of atom count for specified class.
    • \n
    • dbe(classe).\nReturns a list of DBE for specified class.
    • \n
    • atoms_ratio(classe, numerator, denominator).\nReturns a list of atoms ratio for specified class.
    • \n
    • mz_exp(classe).\nReturns a list of experimental mz for specified class.
    • \n
    • abundance(classe).\nReturns a list of abundance for specified class.
    • \n
    • mz_error(classe).\nReturns a list of mz error for specified class.
    • \n
    • mz_calc(classe).\nReturns a list of calculated mz for specified class.
    • \n
    • peaks_count_percentile(classe).\nReturns the peaks count percentile of a specific class.
    • \n
    • abundance_count_percentile(classe).\nReturns the abundance percentile of a specific class.
    • \n
    • mz_exp_assigned().\nReturns a list of experimental mz for all assigned classes.
    • \n
    • abundance_assigned().\nReturns a list of abundance for all classes.
    • \n
    • mz_exp_all().\nReturns a list of mz for all classes.
    • \n
    \n", "bases": "collections.abc.Mapping"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.__init__", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.__init__", "kind": "function", "doc": "

    \n", "signature": "(mass_spectrum, choose_molecular_formula=True)"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.choose_mf", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.choose_mf", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.total_peaks", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.total_peaks", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.sum_abundance", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.sum_abundance", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.min_max_mz", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.min_max_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.min_max_abundance", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.min_max_abundance", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.min_ppm_error", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.min_ppm_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.max_ppm_error", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.max_ppm_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.all_identified_atoms", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.all_identified_atoms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.get_classes", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.get_classes", "kind": "function", "doc": "

    Return a list of classes with abundance percentile above threshold

    \n", "signature": "(self, threshold_perc=1, isotopologue=True):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.molecular_formula_string", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.molecular_formula_string", "kind": "function", "doc": "

    Return a list of molecular formula string for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.molecular_formula", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.molecular_formula", "kind": "function", "doc": "

    Return a list of molecular formula for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.carbon_number", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.carbon_number", "kind": "function", "doc": "

    Return a list of carbon number for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.atom_count", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.atom_count", "kind": "function", "doc": "

    Return a list of atom count for specified class

    \n", "signature": "(self, atom, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.dbe", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.dbe", "kind": "function", "doc": "

    Return a list of DBE for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.atoms_ratio", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.atoms_ratio", "kind": "function", "doc": "

    Return a list of atoms ratio for specified class

    \n", "signature": "(self, classe, numerator, denominator):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_exp", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_exp", "kind": "function", "doc": "

    Return a list of experimental mz for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.abundance", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.abundance", "kind": "function", "doc": "

    Return a list of abundance for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_error", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_error", "kind": "function", "doc": "

    Return a list of mz error for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_calc", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_calc", "kind": "function", "doc": "

    Return a list of calculated mz for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.peaks_count_percentile", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.peaks_count_percentile", "kind": "function", "doc": "

    Return the peaks count percentile of a specific class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.abundance_count_percentile", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.abundance_count_percentile", "kind": "function", "doc": "

    Return the abundance percentile of a specific class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_exp_assigned", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_exp_assigned", "kind": "function", "doc": "

    Return a list of experimental mz for all assigned classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.abundance_assigned", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.abundance_assigned", "kind": "function", "doc": "

    Return a list of abundance for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_exp_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_exp_all", "kind": "function", "doc": "

    Return a list of mz for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_error_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_error_all", "kind": "function", "doc": "

    Return a list of mz error for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.carbon_number_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.carbon_number_all", "kind": "function", "doc": "

    Return a list of carbon number for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.dbe_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.dbe_all", "kind": "function", "doc": "

    Return a list of DBE for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.atoms_ratio_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.atoms_ratio_all", "kind": "function", "doc": "

    Return a list of atoms ratio for all classes

    \n", "signature": "(self, numerator, denominator):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.to_dataframe", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.to_dataframe", "kind": "function", "doc": "

    Return a pandas dataframe with all the data from the class

    \n\n
    Parameters
    \n\n
      \n
    • include_isotopologue (bool, optional):\nInclude isotopologues, by default False
    • \n
    • abundance_perc_threshold (int, optional):\nAbundance percentile threshold, by default 5
    • \n
    • include_unassigned (bool, optional):\nInclude unassigned peaks, by default False
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: Pandas dataframe with all the data from the class
    • \n
    \n", "signature": "(\tself,\tinclude_isotopologue=False,\tabundance_perc_threshold=5,\tinclude_unassigned=False):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_ms_assigned_unassigned", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_ms_assigned_unassigned", "kind": "function", "doc": "

    Plot stick mass spectrum of all classes

    \n\n
    Parameters
    \n\n
      \n
    • assigned_color (str, optional):\nMatplotlib color for the assigned peaks, by default \"b\"
    • \n
    • unassigned_color (str, optional):\nMatplotlib color for the unassigned peaks, by default \"r\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    \n", "signature": "(self, assigned_color='b', unassigned_color='r'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_mz_error", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_mz_error", "kind": "function", "doc": "

    Plot mz error scatter plot of all classes

    \n\n
    Parameters
    \n\n
      \n
    • color (str, optional):\nMatplotlib color, by default \"g\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    \n", "signature": "(self, color='g'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_mz_error_class", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_mz_error_class", "kind": "function", "doc": "

    Plot mz error scatter plot of a specific class

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nClass name
    • \n
    • color (str, optional):\nMatplotlib color, by default \"g\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    \n", "signature": "(self, classe, color='g'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_ms_class", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_ms_class", "kind": "function", "doc": "

    Plot stick mass spectrum of a specific class

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nClass name
    • \n
    • color (str, optional):\nMatplotlib color, by default \"g\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    \n", "signature": "(self, classe, color='g'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_van_krevelen", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_van_krevelen", "kind": "function", "doc": "

    Plot Van Krevelen Diagram

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nClass name
    • \n
    • max_hc (float, optional):\nMax H/C ratio, by default 2.5
    • \n
    • max_oc (float, optional):\nMax O/C ratio, by default 2
    • \n
    • ticks_number (int, optional):\nNumber of ticks, by default 5
    • \n
    • color (str, optional):\nMatplotlib color, by default \"viridis\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    • abun_perc (float):\nClass percentile of the relative abundance
    • \n
    \n", "signature": "(self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color='viridis'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_dbe_vs_carbon_number", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_dbe_vs_carbon_number", "kind": "function", "doc": "

    Plot DBE vs Carbon Number

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nClass name
    • \n
    • max_c (int, optional):\nMax Carbon Number, by default 50
    • \n
    • max_dbe (int, optional):\nMax DBE, by default 40
    • \n
    • dbe_incr (int, optional):\nDBE increment, by default 5
    • \n
    • c_incr (int, optional):\nCarbon Number increment, by default 10
    • \n
    • color (str, optional):\nMatplotlib color, by default \"viridis\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    • abun_perc (float):\nClass percentile of the relative abundance
    • \n
    \n", "signature": "(\tself,\tclasse,\tmax_c=50,\tmax_dbe=40,\tdbe_incr=5,\tc_incr=10,\tcolor='viridis'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata", "kind": "class", "doc": "

    Dataclass for the Lipid Metadata

    \n\n
    Parameters
    \n\n
      \n
    • name (str):\nThe name of the lipid, using the LIPID MAPS nomenclature
    • \n
    • casno (str):\nThe CAS number of the lipid
    • \n
    • formula (str):\nThe molecular formula of the lipid
    • \n
    • pubchem_id (str):\nThe PubChem ID of the lipid
    • \n
    • structure_level (str):\nThe structure level of the lipid, following the LIPID MAPS classification
    • \n
    • lipid_summed_name (str):\nThe summed name of the lipid, aka lipid species, \nfollowing the LIPID MAPS classification
    • \n
    • lipid_subclass (str):\nThe subclass of the lipid, following the LIPID MAPS classification
    • \n
    • lipid_class (str):\nThe class of the lipid, following the LIPID MAPS classification
    • \n
    • lipid_category (str):\nThe category of the lipid, following the LIPID MAPS classification
    • \n
    \n", "bases": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.__init__", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tid: int,\tcas: str,\tinchikey: str,\tinchi: str,\tchebi: str,\tsmiles: str,\tkegg: str,\tdata_id: int,\tiupac_name: str,\ttraditional_name: str,\tcommon_name: str,\tname: str,\tcasno: str,\tformula: str,\tpubchem_id: str,\tstructure_level: str,\tlipid_summed_name: str,\tlipid_subclass: str,\tlipid_class: str,\tlipid_category: str)"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.name", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.casno", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.casno", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.formula", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.formula", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.pubchem_id", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.pubchem_id", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.structure_level", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.structure_level", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.lipid_summed_name", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.lipid_summed_name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.lipid_subclass", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.lipid_subclass", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.lipid_class", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.lipid_class", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.lipid_category", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.lipid_category", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.molecularSQL", "modulename": "corems.molecular_id.factory.molecularSQL", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.Base", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "Base", "kind": "class", "doc": "

    The base class of the class hierarchy.

    \n\n

    When called, it accepts no arguments and returns a new featureless\ninstance that has no instance attributes and cannot be given any.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.Base.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "Base.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.Base.registry", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "Base.registry", "kind": "variable", "doc": "

    \n", "default_value": "<sqlalchemy.orm.decl_api.registry object>"}, {"fullname": "corems.molecular_id.factory.molecularSQL.Base.metadata", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "Base.metadata", "kind": "variable", "doc": "

    \n", "default_value": "MetaData()"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms", "kind": "class", "doc": "

    HeteroAtoms class for the heteroAtoms table in the SQLite database.

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe primary key for the table.
    • \n
    • name (str):\nThe name of the heteroAtoms class.
    • \n
    • halogensCount (int):\nThe number of halogens in the heteroAtoms class.
    • \n
    • carbonHydrogen (relationship):\nThe relationship to the carbonHydrogen table.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __repr__()\nReturns the string representation of the object.
    • \n
    • to_dict()\nReturns the heteroAtoms class as a dictionary.
    • \n
    • halogens_count()\nReturns the number of halogens as a float.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.id", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.name", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.halogensCount", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.halogensCount", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.carbonHydrogen", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.carbonHydrogen", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.halogens_count", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.halogens_count", "kind": "variable", "doc": "

    Returns the number of halogens as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.to_dict", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.to_dict", "kind": "function", "doc": "

    Returns the heteroAtoms class as a dictionary.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen", "kind": "class", "doc": "

    CarbonHydrogen class for the carbonHydrogen table in the SQLite database.

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe primary key for the table.
    • \n
    • C (int):\nThe number of carbon atoms.
    • \n
    • H (int):\nThe number of hydrogen atoms.
    • \n
    • heteroAtoms (relationship):\nThe relationship to the heteroAtoms table.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __repr__()\nReturns the string representation of the object.
    • \n
    • mass()\nReturns the mass of the carbonHydrogen class as a float.
    • \n
    • c()\nReturns the number of carbon atoms as a float.
    • \n
    • h()\nReturns the number of hydrogen atoms as a float.
    • \n
    • dbe()\nReturns the double bond equivalent as a float.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.id", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.C", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.H", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.H", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.heteroAtoms", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.heteroAtoms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.mass", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.mass", "kind": "variable", "doc": "

    Returns the mass of the carbonHydrogen class as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.c", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.c", "kind": "variable", "doc": "

    Returns the number of carbon atoms as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.h", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.h", "kind": "variable", "doc": "

    Returns the number of hydrogen atoms as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.dbe", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.dbe", "kind": "variable", "doc": "

    Returns the double bond equivalent as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink", "kind": "class", "doc": "

    MolecularFormulaLink class for the molecularformula table in the SQLite database.

    \n\n
    Attributes
    \n\n
      \n
    • heteroAtoms_id (int):\nThe foreign key for the heteroAtoms table.
    • \n
    • carbonHydrogen_id (int):\nThe foreign key for the carbonHydrogen table.
    • \n
    • mass (float):\nThe mass of the molecular formula.
    • \n
    • DBE (float):\nThe double bond equivalent of the molecular formula.
    • \n
    • carbonHydrogen (relationship):\nThe relationship to the carbonHydrogen table.
    • \n
    • heteroAtoms (relationship):\nThe relationship to the heteroAtoms table.
    • \n
    • C (association_proxy):\nThe association proxy for the carbonHydrogen table.
    • \n
    • H (association_proxy):\nThe association proxy for the carbonHydrogen table.
    • \n
    • classe (association_proxy):\nThe association proxy for the heteroAtoms table.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __repr__()\nReturns the string representation of the object.
    • \n
    • to_dict()\nReturns the molecular formula as a dictionary.
    • \n
    • formula_string()\nReturns the molecular formula as a string.
    • \n
    • classe_string()\nReturns the heteroAtoms class as a string.
    • \n
    • _adduct_mz(ion_charge, adduct_atom)\nReturns the m/z of the adduct ion as a float.
    • \n
    • _protonated_mz(ion_charge)\nReturns the m/z of the protonated ion as a float.
    • \n
    • _radical_mz(ion_charge)\nReturns the m/z of the radical ion as a float.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.heteroAtoms_id", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.heteroAtoms_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.carbonHydrogen_id", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.carbonHydrogen_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.mass", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.mass", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.DBE", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.DBE", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.carbonHydrogen", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.carbonHydrogen", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.heteroAtoms", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.heteroAtoms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.C", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.C", "kind": "variable", "doc": "

    A descriptor that presents a read/write view of an object attribute.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.H", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.H", "kind": "variable", "doc": "

    A descriptor that presents a read/write view of an object attribute.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.classe", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.classe", "kind": "variable", "doc": "

    A descriptor that presents a read/write view of an object attribute.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.to_dict", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.to_dict", "kind": "function", "doc": "

    Returns the molecular formula as a dictionary.

    \n\n
    Returns
    \n\n
      \n
    • dict: The molecular formula as a dictionary.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.formula_string", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.formula_string", "kind": "variable", "doc": "

    Returns the molecular formula as a string.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.classe_string", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.classe_string", "kind": "variable", "doc": "

    Returns the heteroAtoms class as a string.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL", "kind": "class", "doc": "

    MolForm_SQL class for the SQLite database.

    \n\n
    Attributes
    \n\n
      \n
    • engine (sqlalchemy.engine.base.Engine):\nThe SQLAlchemy engine.
    • \n
    • session (sqlalchemy.orm.session.Session):\nThe SQLAlchemy session.
    • \n
    • type (str):\nThe type of database.
    • \n
    • chunks_count (int):\nThe number of chunks to use when querying the database.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __init__(url=None, echo=False)\nInitializes the database.
    • \n
    • __exit__(exc_type, exc_val, exc_tb)\nCloses the database.
    • \n
    • initiate_database(url, database_name)\nCreates the database.
    • \n
    • commit()\nCommits the session.
    • \n
    • init_engine(url)\nInitializes the SQLAlchemy engine.
    • \n
    • __enter__()

    • \n
    • get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None)\nReturns a dictionary of molecular formulas.

    • \n
    • check_entry(classe, ion_type, molecular_search_settings)\nChecks if a molecular formula is in the database.
    • \n
    • get_all_classes()\nReturns a list of all classes in the database.
    • \n
    • get_all()\nReturns a list of all molecular formulas in the database.
    • \n
    • delete_entry(row)\nDeletes a molecular formula from the database.
    • \n
    • purge(cls)\nDeletes all molecular formulas from the database.
    • \n
    • clear_data()\nClears the database.
    • \n
    • close(commit=True)\nCloses the database.
    • \n
    • add_engine_pidguard(engine)\nAdds multiprocessing guards.
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.__init__", "kind": "function", "doc": "

    \n", "signature": "(url=None, echo=False)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.engine", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.engine", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.session", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.session", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.initiate_database", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.initiate_database", "kind": "function", "doc": "

    Creates the database.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nThe URL for the database.
    • \n
    • database_name (str):\nThe name of the database.
    • \n
    \n", "signature": "(self, url, database_name):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.commit", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.commit", "kind": "function", "doc": "

    Commits the session.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.init_engine", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.init_engine", "kind": "function", "doc": "

    Initializes the SQLAlchemy engine.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nThe URL for the database.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • sqlalchemy.engine.base.Engine: The SQLAlchemy engine.
    • \n
    \n", "signature": "(self, url):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.get_dict_by_classes", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.get_dict_by_classes", "kind": "function", "doc": "

    Returns a dictionary of molecular formulas.

    \n\n
    Parameters
    \n\n
      \n
    • classes (list):\nThe list of classes.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • nominal_mzs (list):\nThe list of nominal m/z values.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • molecular_search_settings (MolecularFormulaSearchSettings):\nThe molecular formula search settings.
    • \n
    • adducts (list, optional):\nThe list of adducts. Default is None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The dictionary of molecular formulas.
    • \n
    \n\n
    Notes
    \n\n

    Known issue, when using SQLite:\nif the number of classes and nominal_m/zs are higher than 999 the query will fail\nSolution: use postgres or split query

    \n", "signature": "(\tself,\tclasses,\tion_type,\tnominal_mzs,\tion_charge,\tmolecular_search_settings,\tadducts=None):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.check_entry", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.check_entry", "kind": "function", "doc": "

    Checks if a molecular formula is in the database.

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nThe class of the molecular formula.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • molecular_search_settings (MolecularFormulaSearchSettings):\nThe molecular formula search settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • sqlalchemy.orm.query.Query: The query.
    • \n
    \n", "signature": "(self, classe, ion_type, molecular_search_settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.get_all_classes", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.get_all_classes", "kind": "function", "doc": "

    Returns a list of all classes in the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.get_all", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.get_all", "kind": "function", "doc": "

    Returns a list of all molecular formulas in the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.delete_entry", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.delete_entry", "kind": "function", "doc": "

    Deletes a molecular formula from the database.

    \n", "signature": "(self, row):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.purge", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.purge", "kind": "function", "doc": "

    Deletes all molecular formulas from the database.

    \n\n
    Notes
    \n\n

    Careful, this will delete the entire database table

    \n", "signature": "(self, cls):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.clear_data", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.clear_data", "kind": "function", "doc": "

    Clears the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.close", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.close", "kind": "function", "doc": "

    Closes the database.

    \n\n
    Parameters
    \n\n
      \n
    • commit (bool, optional):\nWhether to commit the session. Default is True.
    • \n
    \n", "signature": "(self, commit=True):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.add_engine_pidguard", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.add_engine_pidguard", "kind": "function", "doc": "

    Adds multiprocessing guards.

    \n\n

    Forces a connection to be reconnected if it is detected\nas having been shared to a sub-process.

    \n\n
    Parameters
    \n\n
      \n
    • engine (sqlalchemy.engine.base.Engine):\nThe SQLAlchemy engine.
    • \n
    \n", "signature": "(self, engine):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results", "modulename": "corems.molecular_id.factory.spectrum_search_results", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults", "kind": "class", "doc": "

    Class for storing Search Results for a single Spectrum Query

    \n\n
    Parameters
    \n\n
      \n
    • query_spectrum (MassSpectrum):\nThe queried mass spectrum
    • \n
    • precursor_mz (float, optional):\nThe queried precursor_mz. None is interpreted as an open query, i.e. no precursor_mz
    • \n
    • spectral_similarity_search_results (dict):\nThe search results for the queried spectrum, which will be unpacked into attributes
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • query_spectrum (MassSpectrum):\nThe queried mass spectrum
    • \n
    • query_spectrum_id (int):\nThe id of the queried spectrum (the scan number within an MassSpectra object)
    • \n
    • precursor_mz (float):\nThe precursor m/z of the queried spectrum
    • \n
    \n\n
    Other Possible Attributes
    \n\n

    ref_mol_id : str\n The id of the molecule associated with the query spectrum in reference database\nref_ms_id : str\n The id of the query spectrum in reference database\nref_precursor_mz : float\n The precursor mass of the query spectrum\nprecursor_mz_error_ppm : float\n The ppm error between the query spectrum and the reference spectrum\nentropy_similarity : float\n The entropy similarity between the query spectrum and the reference spectrum\nref_ion_type : str\n The ion type of the reference spectrum, i.e. [M+H]+, [M+Na]+, etc.\nquery_mz_in_ref_n : list\n The number of query m/z peaks that are in the reference spectrum\nquery_mz_in_ref_fract : float\n The fraction of query m/z peaks that are in the reference spectrum\nquery_frag_types : list\n The fragment types of the query spectrum that are in the reference spectrum,\n i.e. LSF (lipid species fragments) or MSF (molecular species fragments),\n generally used for only for lipidomics\nref_mz_in_query_n : list\n The number of reference m/z peaks that are in the query spectrum\nref_mz_in_query_fract : float\n The fraction of reference m/z peaks that are in the query spectrum\nref_frag_types : list\n The fragment types of the reference spectrum,\n i.e. LSF (lipid species fragments) or MSF (molecular species fragments),\n generally used for only for lipidomics

    \n\n
    Methods
    \n\n

    *to_dataframe().\n Convert the SpectrumSearchResults to a pandas DataFrame

    \n"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults.__init__", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults.__init__", "kind": "function", "doc": "

    \n", "signature": "(query_spectrum, precursor_mz, spectral_similarity_search_results)"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults.query_spectrum", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults.query_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults.precursor_mz", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults.precursor_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults.to_dataframe", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults.to_dataframe", "kind": "function", "doc": "

    Convert the SpectrumSearchResults to a pandas DataFrame

    \n\n
    Parameters
    \n\n
      \n
    • cols_to_drop (list, optional):\nA list of columns to drop from the DataFrame. Default is None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A DataFrame with the SpectrumSearchResults attributes as columns
    • \n
    \n", "signature": "(self, cols_to_drop=None):", "funcdef": "def"}, {"fullname": "corems.molecular_id.input", "modulename": "corems.molecular_id.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.input.nistMSI", "modulename": "corems.molecular_id.input.nistMSI", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI", "kind": "class", "doc": "

    A class for reading NIST MSI files and storing the data in a SQLite database.

    \n\n
    Parameters
    \n\n
      \n
    • file_path (str):\nThe path to the NIST MSI file.
    • \n
    • url (str, optional):\nThe URL for the SQLite database. Default is 'sqlite://'.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileExistsError: If the specified file does not exist.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_path (str):\nThe path to the NIST MSI file.
    • \n
    • url (str):\nThe URL for the SQLite database.
    • \n
    • sqlLite_obj (EI_LowRes_SQLite):\nThe SQLite object for storing the compound data.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run().\nRuns the thread and initializes the SQLite object.
    • \n
    • get_sqlLite_obj().\nReturns the SQLite object.
    • \n
    • get_compound_data_dict_list().\nParses the NIST MSI file and returns a list of compound data dictionaries.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.__init__", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(file_path, url='sqlite://')"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.file_path", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.file_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.url", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.url", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.run", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.run", "kind": "function", "doc": "

    Runs the thread and initializes the SQLite object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.get_sqlLite_obj", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.get_sqlLite_obj", "kind": "function", "doc": "

    Returns the SQLite object.

    \n\n
    Returns
    \n\n
      \n
    • EI_LowRes_SQLite: The SQLite object for storing the compound data.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.get_compound_data_dict_list", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.get_compound_data_dict_list", "kind": "function", "doc": "

    Parses the NIST MSI file and returns a list of compound data dictionaries.

    \n\n
    Returns
    \n\n
      \n
    • list: A list of compound data dictionaries.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search", "modulename": "corems.molecular_id.search", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.compoundSearch", "modulename": "corems.molecular_id.search.compoundSearch", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch", "kind": "class", "doc": "

    A class representing a low-resolution mass spectral match.

    \n\n
    Parameters
    \n\n
      \n
    • gcms_obj (object):\nThe GC-MS object.
    • \n
    • sql_obj (object, optional):\nThe SQL object for database operations. Default is None.
    • \n
    • calibration (bool, optional):\nFlag indicating if the match is for calibration. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • gcms_obj (object):\nThe GC-MS object.
    • \n
    • sql_obj (object):\nThe SQL object for database operations.
    • \n
    • calibration (bool):\nFlag indicating if the match is for calibration.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • metabolite_detector_score(gc_peak, ref_obj, spectral_simi).\nCalculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
    • \n
    • run().\nRuns the low-resolution mass spectral match.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.__init__", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(gcms_obj, sql_obj=None, calibration=False)"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.gcms_obj", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.gcms_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.calibration", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.calibration", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.metabolite_detector_score", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.metabolite_detector_score", "kind": "function", "doc": "

    Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.

    \n\n
    Parameters
    \n\n
      \n
    • gc_peak (object):\nThe GC peak object.
    • \n
    • ref_obj (object):\nThe reference object.
    • \n
    • spectral_simi (object):\nThe spectral similarity object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the spectral similarity scores, RI score, and similarity score.
    • \n
    \n", "signature": "(self, gc_peak, ref_obj, spectral_simi):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.run", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.run", "kind": "function", "doc": "

    Runs the low-resolution mass spectral match.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces", "modulename": "corems.molecular_id.search.database_interfaces", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface", "kind": "class", "doc": "

    Base class that facilitates connection to spectral reference databases,\nsuch as EMSL's Metabolomics Reference Database (MetabRef).

    \n", "bases": "abc.ABC"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.__init__", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.__init__", "kind": "function", "doc": "

    Initialize instance.

    \n\n
    Parameters
    \n\n
      \n
    • key (str):\nToken key.
    • \n
    \n", "signature": "(key=None)"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.key", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.key", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.set_token", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.set_token", "kind": "function", "doc": "

    Set environment variable for MetabRef database token.

    \n\n
    Parameters
    \n\n
      \n
    • path (str):\nPath to token.
    • \n
    \n", "signature": "(self, path):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.get_token", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.get_token", "kind": "function", "doc": "

    Get environment variable for database token.

    \n\n
    Returns
    \n\n
      \n
    • str: Token string.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.get_header", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.get_header", "kind": "function", "doc": "

    Access stored database token and prepare as header.

    \n\n
    Returns
    \n\n
      \n
    • str: Header string.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.get_query", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.get_query", "kind": "function", "doc": "

    Request payload from URL according to get protocol.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nURL for request.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: Response as JSON.
    • \n
    \n", "signature": "(self, url):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.post_query", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.post_query", "kind": "function", "doc": "

    Request payload from URL according to post protocol.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nURL for request.
    • \n
    • variable (str):\nVariable to query.
    • \n
    • values (str):\nSpecific values of variable to query.
    • \n
    • tolerance (str):\nQuery tolerance relative to values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: Response as JSON.
    • \n
    \n", "signature": "(self, url, variable, values, tolerance):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefInterface", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefInterface", "kind": "class", "doc": "

    Interface to the Metabolomics Reference Database.

    \n", "bases": "SpectralDatabaseInterface"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefInterface.__init__", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefInterface.__init__", "kind": "function", "doc": "

    Initialize instance.

    \n", "signature": "()"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefInterface.spectrum_to_array", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefInterface.spectrum_to_array", "kind": "function", "doc": "

    Convert MetabRef-formatted spectrum to array.

    \n\n
    Parameters
    \n\n
      \n
    • spectrum (str):\nMetabRef spectrum, i.e. list of (m/z,abundance) pairs.
    • \n
    • normalize (bool):\nNormalize the spectrum by its magnitude.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ~numpy.array: Array of shape (N, 2), with m/z in the first column and abundance in\nthe second.
    • \n
    \n", "signature": "(self, spectrum, normalize=True):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface", "kind": "class", "doc": "

    Interface to the Metabolomics Reference Database.

    \n", "bases": "MetabRefInterface"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.__init__", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.__init__", "kind": "function", "doc": "

    Initialize instance.

    \n", "signature": "()"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.GCMS_LIBRARY_URL", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.GCMS_LIBRARY_URL", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.FAMES_URL", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.FAMES_URL", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.available_formats", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.available_formats", "kind": "function", "doc": "

    View list of available formats.

    \n\n
    Returns
    \n\n
      \n
    • list: Format map keys.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.get_library", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.get_library", "kind": "function", "doc": "

    Request MetabRef GC/MS library.

    \n\n
    Parameters
    \n\n
      \n
    • format (str):\nFormat of requested library, i.e. \"json\", \"sql\", \"flashentropy\".\nSee available_formats method for aliases.
    • \n
    • normalize (bool):\nNormalize the spectrum by its magnitude.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Library in requested format.
    • \n
    \n", "signature": "(self, format='json', normalize=False):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.get_fames", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.get_fames", "kind": "function", "doc": "

    Request MetabRef GC/MS FAMEs library.

    \n\n
    Parameters
    \n\n
      \n
    • format (str):\nFormat of requested library, i.e. \"json\", \"sql\", \"flashentropy\".\nSee available_formats method for aliases.
    • \n
    • normalize (bool):\nNormalize the spectrum by its magnitude.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Library in requested format.
    • \n
    \n", "signature": "(self, format='json', normalize=False):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface", "kind": "class", "doc": "

    Interface to the Metabolomics Reference Database for LC-MS data.

    \n", "bases": "MetabRefInterface"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.__init__", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.__init__", "kind": "function", "doc": "

    Initialize instance.

    \n", "signature": "()"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.PRECURSOR_MZ_URL", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.PRECURSOR_MZ_URL", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.PRECURSOR_MZ_ALL_URL", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.PRECURSOR_MZ_ALL_URL", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.query_by_precursor", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.query_by_precursor", "kind": "function", "doc": "

    Query MetabRef by precursor m/z values.

    \n\n
    Parameters
    \n\n
      \n
    • mz_list (list):\nList of precursor m/z values.
    • \n
    • polarity (str):\nIonization polarity, either \"positive\" or \"negative\".
    • \n
    • mz_tol_ppm (float):\nTolerance in ppm for each precursor m/z value.\nUsed for retrieving from a potential match from database.
    • \n
    • mz_tol_da_api (float, optional):\nMaximum tolerance between precursor m/z values for API search, in daltons.\nUsed to group similar mzs into a single API query for speed. Default is 0.2.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: List of library entries in original JSON format.
    • \n
    \n", "signature": "(self, mz_list, polarity, mz_tol_ppm, mz_tol_da_api=0.2):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.request_all_precursors", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.request_all_precursors", "kind": "function", "doc": "

    Request all precursor m/z values from MetabRef.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (str):\nIonization polarity, either \"positive\" or \"negative\".
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: List of all precursor m/z values.
    • \n
    \n", "signature": "(self, polarity):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.get_lipid_library", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.get_lipid_library", "kind": "function", "doc": "

    Request MetabRef lipid library.

    \n\n
    Parameters
    \n\n
      \n
    • mz_list (list):\nList of precursor m/z values.
    • \n
    • polarity (str):\nIonization polarity, either \"positive\" or \"negative\".
    • \n
    • mz_tol_ppm (float):\nTolerance in ppm for each precursor m/z value.\nUsed for retrieving from a potential match from database.
    • \n
    • mz_tol_da_api (float, optional):\nMaximum tolerance between precursor m/z values for API search, in daltons.\nUsed to group similar mzs into a single API query for speed. Default is 0.2.
    • \n
    • format (str, optional):\nFormat of requested library, i.e. \"json\", \"sql\", \"flashentropy\".\nSee available_formats method for aliases. Default is \"json\".
    • \n
    • normalize (bool, optional):\nNormalize the spectrum by its magnitude. Default is True.
    • \n
    • fe_kwargs (dict, optional):\nKeyword arguments for FlashEntropy search. Default is {}.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: Library in requested format and lipid metadata as a LipidMetadata dataclass.
    • \n
    \n", "signature": "(\tself,\tmz_list,\tpolarity,\tmz_tol_ppm,\tmz_tol_da_api=0.2,\tformat='json',\tnormalize=True,\tfe_kwargs={}):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks", "kind": "class", "doc": "

    Class to find Oxygen peaks in a mass spectrum for formula assignment search

    \n\n

    Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter\nReturns a list of MSPeak class containing the possible Molecular Formula class objects.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nThis is where we store MassSpec class obj,
    • \n
    • lookupTableSettings (MolecularLookupTableSettings class):\nThis is where we store MolecularLookupTableSettings class obj
    • \n
    • min_O , max_O (int):\nminium and maximum of Oxygen to allow the software to look for\nit will override the settings at lookupTableSettings.usedAtoms\ndefault min = 1, max = 22
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nThis is where we store MassSpec class obj,
    • \n
    • lookupTableSettings (MolecularLookupTableSettings class):\nThis is where we store MolecularLookupTableSettings class obj
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run().\nwill be called when the instantiated class method start is called
    • \n
    • get_list_found_peaks().\nreturns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak\nfor more details of the structure see MSPeak class and MolecularFormula class
    • \n
    • set_mass_spec_indexes_by_found_peaks().\nset the mass spectrum to interate over only the selected indexes
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.__init__", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(\tmass_spectrum_obj,\tsql_db: bool = False,\tmin_O: int = 1,\tmax_O: int = 22)"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.mass_spectrum_obj", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.mass_spectrum_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.min_0", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.min_0", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.max_O", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.max_O", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.run", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.run", "kind": "function", "doc": "

    Run the thread

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.find_most_abundant_formula", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.find_most_abundant_formula", "kind": "function", "doc": "

    Find the most abundant formula in the mass spectrum

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nMass spectrum object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MolecularFormula class obj: most abundant MolecularFormula with the lowest mass error
    • \n
    \n", "signature": "(self, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.find_most_abundant_formula_test", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.find_most_abundant_formula_test", "kind": "function", "doc": "

    [Test function] Find the most abundant formula in the mass spectrum

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nMass spectrum object
    • \n
    • settings (MolecularSearchSettings class):\nMolecular search settings object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MolecularFormula class obj: most abundant MolecularFormula with the lowest mass error
    • \n
    \n", "signature": "(self, mass_spectrum_obj, settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.find_series_mspeaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.find_series_mspeaks", "kind": "function", "doc": "

    Find a series of abundant peaks in the mass spectrum for a given molecular formula

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nMass spectrum object
    • \n
    • molecular_formula_obj_reference (MolecularFormula class):\nMolecular formula object
    • \n
    • deltamz (float):\ndelta m/z to look for peaks
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: list of MSpeak class objects
    • \n
    \n", "signature": "(self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.get_list_found_peaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.get_list_found_peaks", "kind": "function", "doc": "

    Get the list of found peaks

    \n\n
    Returns
    \n\n
      \n
    • list: list of MSpeak class objects
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.set_mass_spec_indexes_by_found_peaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.set_mass_spec_indexes_by_found_peaks", "kind": "function", "doc": "

    Set the mass spectrum to interate over only the selected indexes.

    \n\n
    Notes
    \n\n

    Warning!!!!\nset the mass spectrum to interate over only the selected indexes\ndon not forget to call mass_spectrum_obj.reset_indexes after the job is done

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.lcms_spectral_search", "modulename": "corems.molecular_id.search.lcms_spectral_search", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.lcms_spectral_search.LCMSSpectralSearch", "modulename": "corems.molecular_id.search.lcms_spectral_search", "qualname": "LCMSSpectralSearch", "kind": "class", "doc": "

    Methods for searching LCMS spectra.

    \n\n

    This class is designed to be a mixin class for the ~corems.mass_spectra.factory.lc_class.LCMSBase class.

    \n"}, {"fullname": "corems.molecular_id.search.lcms_spectral_search.LCMSSpectralSearch.get_more_match_quals", "modulename": "corems.molecular_id.search.lcms_spectral_search", "qualname": "LCMSSpectralSearch.get_more_match_quals", "kind": "function", "doc": "

    Return additional match qualities between query and library entry.

    \n\n
    Parameters
    \n\n
      \n
    • query_mz_arr (np.array):\nArray of query spectrum. Shape (N, 2), with m/z in the first column\nand abundance in the second.
    • \n
    • lib_entry (dict):\nLibrary spectrum entry, with 'mz' key containing the spectrum in\nthe format (mz, abundance),(mz, abundance), i.e. from MetabRef.
    • \n
    • mz_tol_da (float, optional):\nTolerance in Da for matching peaks (in MS2). Default is 0.1.
    • \n
    • include_fragment_types (bool, optional):\nIf True, include fragment type comparisons in output.\nDefaults to False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz).
    • \n
    \n\n
    Notes
    \n\n

    query_in_lib : int\n Number of peaks in query that are present in the library entry (within mz_tol_da).\nquery_in_lib_fract : float\n Fraction of peaks in query that are present in the library entry (within mz_tol_da).\nlib_in_query : int\n Number of peaks in the library entry that are present in the query (within mz_tol_da).\nlib_in_query_fract : float\n Fraction of peaks in the library entry that are present in the query (within mz_tol_da).\nquery_frags : list\n List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both.\nlib_frags : list\n List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If library entry does not have 'fragment_types' key and include_fragment_types is True.
    • \n
    \n", "signature": "(query_mz_arr, lib_entry, mz_tol_da=0.1, include_fragment_types=False):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.lcms_spectral_search.LCMSSpectralSearch.fe_search", "modulename": "corems.molecular_id.search.lcms_spectral_search", "qualname": "LCMSSpectralSearch.fe_search", "kind": "function", "doc": "

    Search LCMS spectra using a FlashEntropy approach.

    \n\n
    Parameters
    \n\n
      \n
    • scan_list (list):\nList of scan numbers to search.
    • \n
    • fe_lib (~ms_entropy.FlashEntropySearch):\nFlashEntropy Search instance.
    • \n
    • precursor_mz_list (list, optional):\nList of precursor m/z values to search, by default [], which implies\nmatched with mass features; to enable this use_mass_features must be True.
    • \n
    • use_mass_features (bool, optional):\nIf True, use mass features to get precursor m/z values, by default True.\nIf True, will add search results to mass features' ms2_similarity_results attribute.
    • \n
    • peak_sep_da (float, optional):\nMinimum separation between m/z peaks spectra in Da. This needs match the\napproximate resolution of the search spectra and the FlashEntropySearch\ninstance, by default 0.01.
    • \n
    • get_additional_metrics (bool, optional):\nIf True, get additional metrics from FlashEntropy search, by default True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but adds results to self.spectral_search_results and associates these
    • \n
    • spectral_search_results with mass_features within the self.mass_features dictionary.
    • \n
    \n", "signature": "(\tself,\tscan_list,\tfe_lib,\tprecursor_mz_list=[],\tuse_mass_features=True,\tpeak_sep_da=0.01,\tget_additional_metrics=True):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.last_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "last_error", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.last_dif", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "last_dif", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.closest_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "closest_error", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.error_average", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "error_average", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.nbValues", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "nbValues", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas", "kind": "class", "doc": "

    Class for searching molecular formulas in a mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • sql_db (MolForm_SQL, optional):\nThe SQL database object, by default None.
    • \n
    • first_hit (bool, optional):\nFlag to indicate whether to skip peaks that already have a molecular formula assigned, by default False.
    • \n
    • find_isotopologues (bool, optional):\nFlag to indicate whether to find isotopologues, by default True.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • sql_db (MolForm_SQL):\nThe SQL database object.
    • \n
    • first_hit (bool):\nFlag to indicate whether to skip peaks that already have a molecular formula assigned.
    • \n
    • find_isotopologues (bool):\nFlag to indicate whether to find isotopologues.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run_search().\nRun the molecular formula search.
    • \n
    • run_worker_mass_spectrum().\nRun the molecular formula search on the mass spectrum object.
    • \n
    • run_worker_ms_peaks().\nRun the molecular formula search on the given list of mass spectrum peaks.
    • \n
    • database_to_dict().\nConvert the database results to a dictionary.
    • \n
    • run_molecular_formula().\nRun the molecular formula search on the given list of mass spectrum peaks.
    • \n
    • search_mol_formulas().\nSearch for molecular formulas in the mass spectrum.
    • \n
    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.__init__", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmass_spectrum_obj,\tsql_db=None,\tfirst_hit: bool = False,\tfind_isotopologues: bool = True)"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.first_hit", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.first_hit", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.find_isotopologues", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.find_isotopologues", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.mass_spectrum_obj", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.mass_spectrum_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.run_search", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.run_search", "kind": "function", "doc": "

    Run the molecular formula search.

    \n\n
    Parameters
    \n\n
      \n
    • mspeaks (list of MSPeak):\nThe list of mass spectrum peaks.
    • \n
    • query (dict):\nThe query dictionary containing the possible molecular formulas.
    • \n
    • min_abundance (float):\nThe minimum abundance threshold.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom, by default None.
    • \n
    \n", "signature": "(\tself,\tmspeaks: list,\tquery: dict,\tmin_abundance: float,\tion_type: str,\tion_charge: int,\tadduct_atom=None):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.run_worker_mass_spectrum", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.run_worker_mass_spectrum", "kind": "function", "doc": "

    Run the molecular formula search on the mass spectrum object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.run_worker_ms_peaks", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.run_worker_ms_peaks", "kind": "function", "doc": "

    Run the molecular formula search on the given list of mass spectrum peaks.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peaks (list of MSPeak):\nThe list of mass spectrum peaks.
    • \n
    \n", "signature": "(self, ms_peaks):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.database_to_dict", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.database_to_dict", "kind": "function", "doc": "

    Convert the database results to a dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • classe_str_list (list):\nThe list of class strings.
    • \n
    • nominal_mzs (list):\nThe list of nominal m/z values.
    • \n
    • mf_search_settings (MolecularFormulaSearchSettings):\nThe molecular formula search settings.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The dictionary containing the database results.
    • \n
    \n", "signature": "(classe_str_list, nominal_mzs, mf_search_settings, ion_charge):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.run_molecular_formula", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.run_molecular_formula", "kind": "function", "doc": "

    Run the molecular formula search on the given list of mass spectrum peaks.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peaks (list of MSPeak):\nThe list of mass spectrum peaks.
    • \n
    \n", "signature": "(*args, **kw):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.search_mol_formulas", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.search_mol_formulas", "kind": "function", "doc": "

    Search for molecular formulas in the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • possible_formulas_list (list of MolecularFormula):\nThe list of possible molecular formulas.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • neutral_molform (bool, optional):\nFlag to indicate whether the molecular formulas are neutral, by default True.
    • \n
    • find_isotopologues (bool, optional):\nFlag to indicate whether to find isotopologues, by default True.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom, by default None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list of MSPeak: The list of mass spectrum peaks with assigned molecular formulas.
    • \n
    \n", "signature": "(\tself,\tpossible_formulas_list: List[corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormula],\tion_type: str,\tneutral_molform=True,\tfind_isotopologues=True,\tadduct_atom=None) -> List[corems.ms_peak.factory.MSPeakClasses._MSPeak]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker", "kind": "class", "doc": "

    Class for searching molecular formulas in a mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • find_isotopologues (bool, optional):\nFlag to indicate whether to find isotopologues, by default True.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • find_isotopologues (bool):\nFlag to indicate whether to find isotopologues.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • reset_error().\nReset the error variables.
    • \n
    • set_last_error().\nSet the last error.
    • \n
    • find_formulas().\nFind the formulas.
    • \n
    • calc_error().\nCalculate the error.
    • \n
    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.__init__", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.__init__", "kind": "function", "doc": "

    \n", "signature": "(find_isotopologues=True)"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.find_isotopologues", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.find_isotopologues", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.reset_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.reset_error", "kind": "function", "doc": "

    Reset the error variables.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n", "signature": "(self, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.set_last_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.set_last_error", "kind": "function", "doc": "

    Set the last error.

    \n\n
    Parameters
    \n\n
      \n
    • error (float):\nThe error.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n", "signature": "(self, error, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.calc_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.calc_error", "kind": "function", "doc": "

    Calculate the error.

    \n\n
    Parameters
    \n\n
      \n
    • mz_exp (float):\nThe experimental m/z value.
    • \n
    • mz_calc (float):\nThe calculated m/z value.
    • \n
    • method (str, optional):\nThe method, by default 'ppm'.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If the method is not ppm or ppb.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The error.
    • \n
    \n", "signature": "(mz_exp, mz_calc, method='ppm'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.find_formulas", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.find_formulas", "kind": "function", "doc": "

    Find the formulas.

    \n\n
    Parameters
    \n\n
      \n
    • formulas (list of MolecularFormula):\nThe list of molecular formulas.
    • \n
    • min_abundance (float):\nThe minimum abundance threshold.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • ms_peak (MSPeak):\nThe mass spectrum peak.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom, by default None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list of MSPeak: The list of mass spectrum peaks with assigned molecular formulas.
    • \n
    \n\n
    Notes
    \n\n

    Uses the closest error the next search (this is not ideal, it needs to use confidence \nmetric to choose the right candidate then propagate the error using the error from the best candidate).\nIt needs to add s/n to the equation.\nIt need optimization to define the mz_error_range within a m/z unit since it is directly proportional \nwith the mass, and inversely proportional to the rp. It's not linear, i.e., sigma mass.\nThe idea it to correlate sigma to resolving power, signal to noise and sample complexity per mz unit.\nMethod='distance'

    \n", "signature": "(\tself,\tformulas,\tmin_abundance,\tmass_spectrum_obj,\tms_peak,\tion_type,\tion_charge,\tadduct_atom=None):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC", "kind": "class", "doc": "

    Class for searching molecular formulas in a LC object.

    \n\n
    Parameters
    \n\n
      \n
    • lcms_obj (LC):\nThe LC object.
    • \n
    • sql_db (MolForm_SQL, optional):\nThe SQL database object, by default None.
    • \n
    • first_hit (bool, optional):\nFlag to indicate whether to skip peaks that already have a molecular formula assigned, by default False.
    • \n
    • find_isotopologues (bool, optional):\nFlag to indicate whether to find isotopologues, by default True.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run_untargeted_worker_ms1().\nRun untargeted molecular formula search on the ms1 mass spectrum.
    • \n
    • run_target_worker_ms1().\nRun targeted molecular formula search on the ms1 mass spectrum.
    • \n
    \n", "bases": "SearchMolecularFormulas"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.__init__", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.__init__", "kind": "function", "doc": "

    \n", "signature": "(lcms_obj, sql_db=None, first_hit=False, find_isotopologues=True)"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.first_hit", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.first_hit", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.find_isotopologues", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.find_isotopologues", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.lcms_obj", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.lcms_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.run_untargeted_worker_ms1", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.run_untargeted_worker_ms1", "kind": "function", "doc": "

    Run untargeted molecular formula search on the ms1 mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.run_target_worker_ms1", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.run_target_worker_ms1", "kind": "function", "doc": "

    Run targeted molecular formula search on the ms1 mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment", "modulename": "corems.molecular_id.search.priorityAssignment", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment", "kind": "class", "doc": "

    A class for assigning priority to oxygen classes in a molecular search.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • sql_db (bool, optional):\nWhether to use an SQL database. The default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • sql_db (MolForm_SQL):\nThe SQL database object.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run().\nRun the priority assignment process.
    • \n
    • create_data_base().\nCreate the molecular database for the specified heteroatomic classes.
    • \n
    • run_worker_mass_spectrum(assign_classes_order_tuples).\nRun the molecular formula search for each class in the specified order.
    • \n
    • get_dict_molecular_database(classe_str_list).\nGet the molecular database as a dictionary.
    • \n
    • ox_classes_and_peaks_in_order_().\nGet the oxygen classes and associated peaks in order.
    • \n
    • get_classes_in_order(dict_ox_class_and_ms_peak)
      \nGet the classes in order.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.__init__", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(mass_spectrum_obj, sql_db=False)"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.mass_spectrum_obj", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.mass_spectrum_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.run", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.run", "kind": "function", "doc": "

    Run the priority assignment process.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.create_data_base", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.create_data_base", "kind": "function", "doc": "

    Create the molecular database for the specified heteroatomic classes.

    \n\n
    Returns
    \n\n
      \n
    • assign_classes_order_str_dict_tuple_ (list):\nA list of tuples containing the class names and dictionaries of class attributes.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.run_worker_mass_spectrum", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.run_worker_mass_spectrum", "kind": "function", "doc": "

    Run the molecular formula search for each class in the specified order.

    \n\n
    Parameters
    \n\n
      \n
    • assign_classes_order_tuples (list):\nA list of tuples containing the class names and dictionaries of class attributes.
    • \n
    \n", "signature": "(self, assign_classes_order_tuples):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.get_dict_molecular_database", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.get_dict_molecular_database", "kind": "function", "doc": "

    Get the molecular database as a dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • classe_str_list (list):\nA list of class names.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary containing the molecular database.
    • \n
    \n", "signature": "(self, classe_str_list):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.ox_classes_and_peaks_in_order_", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.ox_classes_and_peaks_in_order_", "kind": "function", "doc": "

    Get the oxygen classes and associated peaks in order.

    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary containing the oxygen classes and associated peaks.
    • \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.get_classes_in_order", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.get_classes_in_order", "kind": "function", "doc": "

    Get the classes in order.

    \n\n
    Parameters
    \n\n
      \n
    • dict_ox_class_and_ms_peak (dict):\nA dictionary containing the oxygen classes and associated peaks.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of tuples containing the class names and dictionaries of class attributes.
    • \n
    \n\n
    Notes
    \n\n

    structure is \n ('HC', {'HC': 1})

    \n", "signature": "(self, dict_ox_class_and_ms_peak) -> [(<class 'str'>, <class 'dict'>)]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.get_class_strings_dict", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.get_class_strings_dict", "kind": "function", "doc": "

    Get the class strings and dictionaries.

    \n\n
    Parameters
    \n\n
      \n
    • all_atoms_tuples (tuple):\nA tuple containing the atoms.
    • \n
    • atoms_in_order (list):\nA list of atoms in order.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of tuples containing the class strings and dictionaries.
    • \n
    \n", "signature": "(all_atoms_tuples, atoms_in_order) -> [(<class 'str'>, <class 'dict'>)]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.combine_ox_class_with_other", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.combine_ox_class_with_other", "kind": "function", "doc": "

    Combine the oxygen classes with other classes.

    \n\n
    Parameters
    \n\n
      \n
    • atoms_in_order (list):\nA list of atoms in order.
    • \n
    • classes_strings_dict_tuples (list):

    • \n
    • dict_ox_class_and_ms_peak (dict):\nA dictionary containing the oxygen classes and associated peaks.

    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries.
    • \n
    \n", "signature": "(\tatoms_in_order,\tclasses_strings_dict_tuples,\tdict_ox_class_and_ms_peak) -> [<class 'dict'>]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.sort_classes", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.sort_classes", "kind": "function", "doc": "

    Sort the classes.

    \n\n
    Parameters
    \n\n
      \n
    • atoms_in_order (list):\nA list of atoms in order.
    • \n
    • combination_tuples (list):
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of tuples containing the class strings and dictionaries.
    • \n
    \n", "signature": "(atoms_in_order, combination_tuples) -> [(<class 'str'>, <class 'dict'>)]:", "funcdef": "def"}, {"fullname": "corems.ms_peak", "modulename": "corems.ms_peak", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.calc", "modulename": "corems.ms_peak.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc", "modulename": "corems.ms_peak.calc.MSPeakCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation", "kind": "class", "doc": "

    Class to perform calculations on MSPeak objects.

    \n\n

    This class provides methods to perform various calculations on MSPeak objects, such as calculating Kendrick Mass Defect (KMD) and Kendrick Mass (KM), calculating peak area, and fitting peak lineshape using different models.

    \n\n
    Parameters
    \n\n
      \n
    • None
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _ms_parent (MSParent):\nThe parent MSParent object associated with the MSPeakCalculation object.
    • \n
    • mz_exp (float):\nThe experimental m/z value of the peak.
    • \n
    • peak_left_index (int):\nThe start scan index of the peak.
    • \n
    • peak_right_index (int):\nThe final scan index of the peak.
    • \n
    • resolving_power (float):\nThe resolving power of the peak.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • _calc_kmd(dict_base).\nCalculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula.
    • \n
    • calc_area().\nCalculate the peak area using numpy's trapezoidal fit.
    • \n
    • fit_peak(mz_extend=6, delta_rp=0, model='Gaussian').\nPerform lineshape analysis on a peak using lmfit module.
    • \n
    • voigt_pso(w, r, yoff, width, loc, a).\nCalculate the Voigt function for particle swarm optimization (PSO) fitting.
    • \n
    • objective_pso(x, w, u).\nCalculate the objective function for PSO fitting.
    • \n
    • minimize_pso(lower, upper, w, u).\nMinimize the objective function using the particle swarm optimization algorithm.
    • \n
    • fit_peak_pso(mz_extend=6, upsample_multiplier=5).\nPerform lineshape analysis on a peak using particle swarm optimization (PSO) fitting.
    • \n
    • voigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1).\n[Legacy] Perform voigt lineshape analysis on a peak.
    • \n
    • pseudovoigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1, fraction=0.5).\n[Legacy] Perform pseudovoigt lineshape analysis on a peak.
    • \n
    • lorentz(oversample_multiplier=1, delta_rp=0, mz_overlay=1).\n[Legacy] Perform lorentz lineshape analysis on a peak.
    • \n
    • gaussian(oversample_multiplier=1, delta_rp=0, mz_overlay=1).\n[Legacy] Perform gaussian lineshape analysis on a peak.
    • \n
    • get_mz_domain(oversample_multiplier, mz_overlay).\n[Legacy] Resample/interpolate datapoints for lineshape analysis.
    • \n
    • number_possible_assignments().\nReturn the number of possible molecular formula assignments for the peak.
    • \n
    • molecular_formula_lowest_error().\nReturn the molecular formula with the smallest absolute mz error.
    • \n
    • molecular_formula_highest_prob_score().\nReturn the molecular formula with the highest confidence score.
    • \n
    • molecular_formula_earth_filter(lowest_error=True).\nFilter molecular formula using the 'Earth' filter.
    • \n
    • molecular_formula_water_filter(lowest_error=True).\nFilter molecular formula using the 'Water' filter.
    • \n
    • molecular_formula_air_filter(lowest_error=True).\nFilter molecular formula using the 'Air' filter.
    • \n
    • cia_score_S_P_error().\nCompound Identification Algorithm SP Error - Assignment Filter.
    • \n
    • cia_score_N_S_P_error().\nCompound Identification Algorithm NSP Error - Assignment Filter.
    • \n
    \n"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.calc_area", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.calc_area", "kind": "function", "doc": "

    Calculate the peak area using numpy's trapezoidal fit

    \n\n

    uses provided mz_domain to accurately integrate areas independent of digital resolution

    \n\n
    Returns
    \n\n
      \n
    • float: peak area
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.fit_peak", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.fit_peak", "kind": "function", "doc": "

    Lineshape analysis on a peak using lmfit module.

    \n\n

    Model and fit peak lineshape by defined function - using lmfit module\nDoes not oversample/resample/interpolate data points \nBetter to go back to time domain and perform more zero filling - if possible.

    \n\n
    Parameters
    \n\n
      \n
    • mz_extend (int):\nextra points left and right of peak definition to include in fitting
    • \n
    • delta_rp (float):\ndelta resolving power to add to resolving power
    • \n
    • model (str):\nType of lineshape model to use.\nModels allowed: Gaussian, Lorentz, Voigt
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    • fit_peak (lmfit object):\nfit results object from lmfit module
    • \n
    \n\n
    Notes
    \n\n

    Returns the calculated mz domain, initial defined abundance profile, and the fit peak results object from lmfit module\nmz_extend here extends the x-axis domain so that we have sufficient points either side of the apex to fit.\nTakes about 10ms per peak

    \n", "signature": "(self, mz_extend=6, delta_rp=0, model='Gaussian'):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.voigt_pso", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.voigt_pso", "kind": "function", "doc": "

    Voigt function for particle swarm optimisation (PSO) fitting

    \n\n

    From https://github.com/pnnl/nmrfit/blob/master/nmrfit/equations.py.\nCalculates a Voigt function over w based on the relevant properties of the distribution.

    \n\n
    Parameters
    \n\n
      \n
    • w (ndarray):\nArray over which the Voigt function will be evaluated.
    • \n
    • r (float):\nRatio between the Guassian and Lorentzian functions.
    • \n
    • yoff (float):\nY-offset of the Voigt function.
    • \n
    • width (float):\nThe width of the Voigt function.
    • \n
    • loc (float):\nCenter of the Voigt function.
    • \n
    • a (float):\nArea of the Voigt function.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • V (ndarray):\nArray defining the Voigt function over w.
    • \n
    \n\n
    References
    \n\n
      \n
    1. https://github.com/pnnl/nmrfit
    2. \n
    \n\n
    Notes
    \n\n

    Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.

    \n", "signature": "(self, w, r, yoff, width, loc, a):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.objective_pso", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.objective_pso", "kind": "function", "doc": "

    Objective function for particle swarm optimisation (PSO) fitting

    \n\n

    The objective function used to fit supplied data. Evaluates sum of squared differences between the fit and the data.

    \n\n
    Parameters
    \n\n
      \n
    • x (list of floats):\nParameter vector.
    • \n
    • w (ndarray):\nArray of frequency data.
    • \n
    • u (ndarray):\nArray of data to be fit.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • rmse (float):\nRoot mean square error between the data and fit.
    • \n
    \n\n
    References
    \n\n
      \n
    1. https://github.com/pnnl/nmrfit
    2. \n
    \n", "signature": "(self, x, w, u):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.minimize_pso", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.minimize_pso", "kind": "function", "doc": "

    Minimization function for particle swarm optimisation (PSO) fitting

    \n\n

    Minimizes the objective function using the particle swarm optimization algorithm.\nMinimization function based on defined parameters

    \n\n
    Parameters
    \n\n
      \n
    • lower (list of floats):\nLower bounds for the parameters.
    • \n
    • upper (list of floats):\nUpper bounds for the parameters.
    • \n
    • w (ndarray):\nArray of frequency data.
    • \n
    • u (ndarray):\nArray of data to be fit.
    • \n
    \n\n
    Notes
    \n\n

    Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.\nCurrent parameters take ~2 seconds per peak.

    \n\n
    References
    \n\n
      \n
    1. https://github.com/pnnl/nmrfit
    2. \n
    \n", "signature": "(self, lower, upper, w, u):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.fit_peak_pso", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.fit_peak_pso", "kind": "function", "doc": "

    Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting

    \n\n

    Function to fit a Voigt peakshape using particle swarm optimisation (PSO).\nShould return better results than lmfit, but much more computationally expensive

    \n\n
    Parameters
    \n\n
      \n
    • mz_extend (int, optional):\nextra points left and right of peak definition to include in fitting. Defaults to 6.
    • \n
    • upsample_multiplier (int, optional):\nfactor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 5.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • xopt (array):\nvariables describing the voigt function.\nG/L ratio, width (fwhm), apex (x-axis), area.\ny-axis offset is fixed at 0
    • \n
    • fopt (float):\nobjective score (rmse)
    • \n
    • psfit (array):\nrecalculated y values based on function and optimised fit
    • \n
    • psfit_hdp (tuple of arrays):\n0 - linspace x-axis upsampled grid\n1 - recalculated y values based on function and upsampled x-axis grid\nDoes not change results, but aids in visualisation of the 'true' voigt lineshape
    • \n
    \n\n
    Notes
    \n\n

    Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.

    \n", "signature": "(self, mz_extend: int = 6, upsample_multiplier: int = 5):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.voigt", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.voigt", "kind": "function", "doc": "

    [Legacy] Voigt lineshape analysis function\nLegacy function for voigt lineshape analysis

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int):\nfactor to increase x-axis points by for simulation of fitted lineshape function
    • \n
    • delta_rp (float):\ndelta resolving power to add to resolving power
    • \n
    • mz_overlay (int):\nextra points left and right of peak definition to include in fitting
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    • calc_abundance (ndarray):\ncalculated abundance profile based on voigt function
    • \n
    \n", "signature": "(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.pseudovoigt", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.pseudovoigt", "kind": "function", "doc": "

    [Legacy] pseudovoigt lineshape function

    \n\n

    Legacy function for pseudovoigt lineshape analysis. \nNote - Code may not be functional currently.

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int, optional):\nfactor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 1.
    • \n
    • delta_rp (float, optional):\ndelta resolving power to add to resolving power. Defaults to 0.
    • \n
    • mz_overlay (int, optional):\nextra points left and right of peak definition to include in fitting. Defaults to 1.
    • \n
    • fraction (float, optional):\nfraction of gaussian component in pseudovoigt function. Defaults to 0.5.
    • \n
    \n", "signature": "(\tself,\toversample_multiplier=1,\tdelta_rp=0,\tmz_overlay=1,\tfraction=0.5):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.lorentz", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.lorentz", "kind": "function", "doc": "

    [Legacy] Lorentz lineshape analysis function

    \n\n

    Legacy function for lorentz lineshape analysis

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int):\nfactor to increase x-axis points by for simulation of fitted lineshape function
    • \n
    • delta_rp (float):\ndelta resolving power to add to resolving power
    • \n
    • mz_overlay (int):\nextra points left and right of peak definition to include in fitting
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    • calc_abundance (ndarray):\ncalculated abundance profile based on lorentz function
    • \n
    \n", "signature": "(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.gaussian", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.gaussian", "kind": "function", "doc": "

    [Legacy] Gaussian lineshape analysis function\nLegacy gaussian lineshape analysis function

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int):\nfactor to increase x-axis points by for simulation of fitted lineshape function
    • \n
    • delta_rp (float):\ndelta resolving power to add to resolving power
    • \n
    • mz_overlay (int):\nextra points left and right of peak definition to include in fitting
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    • calc_abundance (ndarray):\ncalculated abundance profile based on gaussian function
    • \n
    \n", "signature": "(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.get_mz_domain", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.get_mz_domain", "kind": "function", "doc": "

    [Legacy] function to resample/interpolate datapoints for lineshape analysis

    \n\n

    This code is used for the legacy line fitting functions and not recommended.\nLegacy function to support expanding mz domain for legacy lineshape functions

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int):\nfactor to increase x-axis points by for simulation of fitted lineshape function
    • \n
    • mz_overlay (int):\nextra points left and right of peak definition to include in fitting
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    \n", "signature": "(self, oversample_multiplier, mz_overlay):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.number_possible_assignments", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.number_possible_assignments", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_lowest_error", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_lowest_error", "kind": "function", "doc": "

    Return the molecular formula with the smallest absolute mz error

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_highest_prob_score", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_highest_prob_score", "kind": "function", "doc": "

    Return the molecular formula with the highest confidence score score

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_earth_filter", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_earth_filter", "kind": "function", "doc": "

    Filter molecular formula using the 'Earth' filter

    \n\n

    This function applies the Formularity-esque 'Earth' filter to possible molecular formula assignments.\nEarth Filter:\n O > 0 AND N <= 3 AND P <= 2 AND 3P <= O

    \n\n

    If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter. \nOtherwise, it will return all Earth-filter compliant formulas.

    \n\n
    Parameters
    \n\n
      \n
    • lowest_error (bool, optional.):\nReturn only the lowest error formula which also fits the Earth filter. \nIf False, return all Earth-filter compliant formulas. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: List of molecular formula objects which fit the Earth filter
    • \n
    \n\n
    References
    \n\n
      \n
    1. Nikola Tolic et al., \"Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra\"\nAnal. Chem. 2017, 89, 23, 12659\u201312665\ndoi: 10.1021/acs.analchem.7b03318
    2. \n
    \n", "signature": "(self, lowest_error=True):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_water_filter", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_water_filter", "kind": "function", "doc": "

    Filter molecular formula using the 'Water' filter

    \n\n

    This function applies the Formularity-esque 'Water' filter to possible molecular formula assignments.\nWater Filter:\n O > 0 AND N <= 3 AND S <= 2 AND P <= 2

    \n\n

    If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Water filter.\nOtherwise, it will return all Water-filter compliant formulas.

    \n\n
    Parameters
    \n\n
      \n
    • lowest_error (bool, optional):\nReturn only the lowest error formula which also fits the Water filter.\nIf False, return all Water-filter compliant formulas. Defaults to 2
    • \n
    \n\n
    Returns
    \n\n

    list\n List of molecular formula objects which fit the Water filter

    \n\n
    References
    \n\n
      \n
    1. Nikola Tolic et al., \"Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra\"\nAnal. Chem. 2017, 89, 23, 12659\u201312665\ndoi: 10.1021/acs.analchem.7b03318
    2. \n
    \n", "signature": "(self, lowest_error=True):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_air_filter", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_air_filter", "kind": "function", "doc": "

    Filter molecular formula using the 'Air' filter

    \n\n

    This function applies the Formularity-esque 'Air' filter to possible molecular formula assignments.\nAir Filter:\n O > 0 AND N <= 3 AND S <= 1 AND P = 0 AND 3(S+N) <= O

    \n\n

    If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Air filter.\nOtherwise, it will return all Air-filter compliant formulas.

    \n\n
    Parameters
    \n\n
      \n
    • lowest_error (bool, optional):\nReturn only the lowest error formula which also fits the Air filter.\nIf False, return all Air-filter compliant formulas. Defaults to True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: List of molecular formula objects which fit the Air filter
    • \n
    \n\n
    References
    \n\n
      \n
    1. Nikola Tolic et al., \"Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra\"\nAnal. Chem. 2017, 89, 23, 12659\u201312665\ndoi: 10.1021/acs.analchem.7b03318
    2. \n
    \n", "signature": "(self, lowest_error=True):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.cia_score_S_P_error", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.cia_score_S_P_error", "kind": "function", "doc": "

    Compound Identification Algorithm SP Error - Assignment Filter

    \n\n

    This function applies the Compound Identification Algorithm (CIA) SP Error filter to possible molecular formula assignments.

    \n\n

    It takes the molecular formula with the lowest S+P count, and returns the formula with the lowest absolute error from this subset.

    \n\n
    Returns
    \n\n
      \n
    • MolecularFormula: A single molecular formula which fits the rules of the CIA SP Error filter
    • \n
    \n\n
    References
    \n\n
      \n
    1. Elizabeth B. Kujawinski and Mark D. Behn, \"Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter\"\nAnal. Chem. 2006, 78, 13, 4363\u20134373\ndoi: 10.1021/ac0600306
    2. \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.cia_score_N_S_P_error", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.cia_score_N_S_P_error", "kind": "function", "doc": "

    Compound Identification Algorithm NSP Error - Assignment Filter

    \n\n

    This function applies the Compound Identification Algorithm (CIA) NSP Error filter to possible molecular formula assignments.

    \n\n

    It takes the molecular formula with the lowest N+S+P count, and returns the formula with the lowest absolute error from this subset.

    \n\n
    Returns
    \n\n
      \n
    • MolecularFormula: A single molecular formula which fits the rules of the CIA NSP Error filter
    • \n
    \n\n
    References
    \n\n
      \n
    1. Elizabeth B. Kujawinski and Mark D. Behn, \"Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter\"\nAnal. Chem. 2006, 78, 13, 4363\u20134373\ndoi: 10.1021/ac0600306
    2. \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If no molecular formula are associated with mass spectrum peak.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.factory", "modulename": "corems.ms_peak.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses", "modulename": "corems.ms_peak.factory.MSPeakClasses", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.ICRMassPeak", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "ICRMassPeak", "kind": "class", "doc": "

    A class representing a peak in an ICR mass spectrum.

    \n", "bases": "_MSPeak"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.ICRMassPeak.__init__", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "ICRMassPeak.__init__", "kind": "function", "doc": "

    \n", "signature": "(*args, ms_parent=None, exp_freq=None)"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.ICRMassPeak.resolving_power_calc", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "ICRMassPeak.resolving_power_calc", "kind": "function", "doc": "

    Calculate the theoretical resolving power of the peak.

    \n\n
    Parameters
    \n\n
      \n
    • T (float):\ntransient time
    • \n
    • B (float):\nMagnetic Filed Strength (Tesla)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Theoretical resolving power of the peak.
    • \n
    \n\n
    References
    \n\n
      \n
    1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)\nDOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    2. \n
    \n", "signature": "(self, B, T):", "funcdef": "def"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.ICRMassPeak.set_calc_resolving_power", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "ICRMassPeak.set_calc_resolving_power", "kind": "function", "doc": "

    Set the resolving power of the peak to the calculated one.

    \n", "signature": "(self, B: float, T: float):", "funcdef": "def"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.TOFMassPeak", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "TOFMassPeak", "kind": "class", "doc": "

    A class representing a peak in a TOF mass spectrum.

    \n", "bases": "_MSPeak"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.TOFMassPeak.__init__", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "TOFMassPeak.__init__", "kind": "function", "doc": "

    \n", "signature": "(*args, exp_freq=None)"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.TOFMassPeak.set_calc_resolving_power", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "TOFMassPeak.set_calc_resolving_power", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.OrbiMassPeak", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "OrbiMassPeak", "kind": "class", "doc": "

    A class representing a peak in an Orbitrap mass spectrum.

    \n", "bases": "_MSPeak"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.OrbiMassPeak.__init__", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "OrbiMassPeak.__init__", "kind": "function", "doc": "

    \n", "signature": "(*args, exp_freq=None)"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.OrbiMassPeak.set_calc_resolving_power", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "OrbiMassPeak.set_calc_resolving_power", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient", "modulename": "corems.transient", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.calc", "modulename": "corems.transient.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.calc.TransientCalc", "modulename": "corems.transient.calc.TransientCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations", "kind": "class", "doc": "

    Transient Calculations

    \n\n
    Parameters
    \n\n
      \n
    • parameters (corems.transient.parameters.TransientParameters):\nThe transient parameters
    • \n
    • bandwidth (float):\nThe bandwidth of the transient (Hz)
    • \n
    • number_data_points (int):\nThe number of data points of the transient
    • \n
    • exc_low_freq (float):\nThe low frequency of the excitation (Hz)
    • \n
    • exc_high_freq (float):\nThe high frequency of the excitation (Hz)
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • parameters (corems.transient.parameters.TransientParameters):\nThe transient parameters
    • \n
    • bandwidth (float):\nThe bandwidth of the transient (Hz)
    • \n
    • number_data_points (int):\nThe number of data points of the transient
    • \n
    • exc_low_freq (float):\nThe low frequency of the excitation (Hz)
    • \n
    • exc_high_freq (float):\nThe high frequency of the excitation (Hz)
    • \n
    \n\n
    Methods
    \n\n
      \n
    • cal_transient_time().\nCalculate the time domain length of the transient
    • \n
    • zero_fill(transient).\nZero fill the transient
    • \n
    • truncation(transient).\nTruncate the transient
    • \n
    • apodization(transient).
      \nApodization of the transient
    • \n
    • calculate_frequency_domain(number_data_points).\nCalculate the frequency domain (axis) of the transient
    • \n
    • cut_freq_domain(freqdomain_X, freqdomain_Y).\nCut the frequency domain of the transient
    • \n
    • phase_and_absorption_mode_ft().\n[Not Functional] Produce a phased absorption mode FT spectrum
    • \n
    • magnitude_mode_ft(transient).\nPerform magnitude mode FT of the transient
    • \n
    • correct_dc_offset().\n[Not Yet Implemented] Correct the DC offset of the transient
    • \n
    \n"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.cal_transient_time", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.cal_transient_time", "kind": "function", "doc": "

    Calculate the time domain length of the transient

    \n\n
    Returns
    \n\n
      \n
    • float: The time domain length of the transient (s)
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.zero_fill", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.zero_fill", "kind": "function", "doc": "

    Zero fill the transient

    \n\n
    Parameters
    \n\n
      \n
    • transient (numpy.ndarray):\nThe transient data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The transient data points zerofilled
    • \n
    \n\n
    Notes
    \n\n

    The number of zero fills is defined by the transient parameter number_of_zero_fills.\nThe function first calculate the next power of two of the transient length and zero fills to that length, to take advantage of FFT algorithm.\n If the parameter next_power_of_two is set to False, the function will zero fill to the length of the original transient times the number of zero fills

    \n", "signature": "(self, transient):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.truncation", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.truncation", "kind": "function", "doc": "

    Truncate the transient

    \n\n
    Parameters
    \n\n
      \n
    • transient (numpy.ndarray):\nThe transient data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The truncated transient data points
    • \n
    \n\n
    Notes
    \n\n

    The number of truncations is defined by the transient parameter number_of_truncations

    \n", "signature": "(self, transient):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.apodization", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.apodization", "kind": "function", "doc": "

    Apodization of the transient

    \n\n
    Parameters
    \n\n
      \n
    • transient (numpy.ndarray):\nThe transient data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The apodized transient data points
    • \n
    \n\n
    Notes
    \n\n

    The apodization method is defined by the transient parameter apodization_method.\nThe following apodization methods are available:\n Hamming,\n Hanning,\n Blackman,\n Full-Sine,\n Half-Sine,\n Kaiser,\n Half-Kaiser.

    \n\n

    For Kaiser and Half-Kaiser, an additional parameter 'beta' is required, set by the transient parameter kaiser_beta.

    \n", "signature": "(self, transient):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.calculate_frequency_domain", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.calculate_frequency_domain", "kind": "function", "doc": "

    Calculate the frequency domain (axis) of the transient

    \n\n
    Parameters
    \n\n
      \n
    • number_data_points (int):\nThe number of data points of the transient
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The frequency domain of the transient (Hz)
    • \n
    \n", "signature": "(self, number_data_points):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.cut_freq_domain", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.cut_freq_domain", "kind": "function", "doc": "

    Cut the frequency domain of the transient

    \n\n
    Parameters
    \n\n
      \n
    • freqdomain_X (numpy.ndarray):\nThe frequency domain of the transient (Hz)
    • \n
    • freqdomain_Y (numpy.ndarray):\nThe frequency domain of the transient (Hz)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The frequency domain of the transient (Hz)
    • \n
    • numpy.ndarray: The frequency domain of the transient (Hz)
    • \n
    \n", "signature": "(self, freqdomain_X, freqdomain_Y):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.phase_and_absorption_mode_ft", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.phase_and_absorption_mode_ft", "kind": "function", "doc": "

    [Not Functional] Produce a phased absorption mode FT spectrum

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.perform_magniture_mode_ft", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.perform_magniture_mode_ft", "kind": "function", "doc": "

    Perform magnitude mode FT of the transient

    \n\n

    Parameters

    \n\n

    transient : numpy.ndarray\n The transient data points

    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The frequency domain of the transient (Hz)
    • \n
    • numpy.ndarray: The magnitude of the transient (a.u.)
    • \n
    \n", "signature": "(self, transient):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.correct_dc_offset", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.correct_dc_offset", "kind": "function", "doc": "

    [Not Yet Implemented] Correct the DC offset of the transient

    \n\n

    A simple baseline correction to compensate for a DC offset in the recorded transient.\nNot implemented.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.factory", "modulename": "corems.transient.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses", "modulename": "corems.transient.factory.TransientClasses", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient", "kind": "class", "doc": "

    The Transient object contains the transient data and the parameters used to process it

    \n\n
    Parameters
    \n\n
      \n
    • data (numpy.ndarray):\nArray with the transient data
    • \n
    • d_params (dict):\nDictionary with the parameters to be set
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • calibration_terms (tuple):\nTuple with the calibration terms (A, B, C)
    • \n
    • bandwidth (float):\nThe bandwidth of the transient (Hz)
    • \n
    • number_data_points (int):\nThe number of data points of the transient
    • \n
    • polarity (int):\nThe polarity of the transient
    • \n
    • transient_time (float):\nThe time domain length of the transient
    • \n
    • d_params (dict):\nDictionary with the parameters to be set
    • \n
    • frequency_domain (numpy.ndarray):\nArray with the frequency domain
    • \n
    • magnitude (numpy.ndarray):\nArray with the magnitude
    • \n
    • _full_filename_path (str):\nThe full path of the transient file
    • \n
    • _exc_high_freq (float):\nThe high frequency of the excitation (Hz)
    • \n
    • _exc_low_freq (float):\nThe low frequency of the excitation (Hz)
    • \n
    • _parameters (corems.transient.parameters.TransientParameters):\nThe transient parameters
    • \n
    • _transient_data (numpy.ndarray):\nArray with the transient data
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_frequency_domain(plot_result=True).\nGet the frequency domain and magnitude from the transient data
    • \n
    • get_mass_spectrum(auto_process=True, plot_result=True, keep_profile=True).
      \nGet the mass spectrum from the transient data
    • \n
    • set_processing_parameter(apodization_method, number_of_truncations, number_of_zero_fills).
      \nSet the processing parameters
    • \n
    • scale_plot_size(factor=1.5).\nScale the plot size by a factor
    • \n
    • plot_transient(ax=None, c='k').\nPlot the transient data
    • \n
    • plot_zerofilled_transient(ax=None, c='k').\nPlot the transient data with zero fill
    • \n
    • plot_apodized_transient(ax=None, c='k').\nPlot the transient data with apodization
    • \n
    • plot_frequency_domain(ax=None, c='k').\nPlot the frequency domain and magnitude
    • \n
    • set_parameter_from_toml(parameters_path).\nSet the processing parameters from a toml file
    • \n
    • set_parameter_from_json(parameters_path).\nSet the processing parameters from a json file
    • \n
    \n", "bases": "corems.transient.calc.TransientCalc.TransientCalculations"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.__init__", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.__init__", "kind": "function", "doc": "

    \n", "signature": "(data, d_params)"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.d_params", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.d_params", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.frequency_domain", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.frequency_domain", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.magnitude", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.magnitude", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.scale_plot_size", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.scale_plot_size", "kind": "function", "doc": "

    Scale the plot size by a factor

    \n\n
    Parameters
    \n\n
      \n
    • factor (float, optional):\nThe factor to scale the plot size, by default 1.5
    • \n
    \n", "signature": "(self, factor=1.5):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.set_processing_parameter", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.set_processing_parameter", "kind": "function", "doc": "

    Set the processing parameters

    \n\n
    Parameters
    \n\n
      \n
    • apodization_method (str):\nApodization method to be used
    • \n
    • number_of_truncations (int):\nNumber of truncations to be used
    • \n
    • number_of_zero_fills (int):\nNumber of zero fills to be used
    • \n
    \n", "signature": "(\tself,\tapodization_method: str,\tnumber_of_truncations: int,\tnumber_of_zero_fills: int):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.parameters", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.parameters", "kind": "variable", "doc": "

    The transient parameters

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.set_parameter_from_toml", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.set_parameter_from_toml", "kind": "function", "doc": "

    Set the processing parameters from a toml file

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.set_parameter_from_json", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.set_parameter_from_json", "kind": "function", "doc": "

    Set the processing parameters from a json file

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.get_frequency_domain", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.get_frequency_domain", "kind": "function", "doc": "

    Get the frequency domain and magnitude from the transient data

    \n\n
    Parameters
    \n\n
      \n
    • plot_result (bool, optional):\nPlot the frequency domain and magnitude, by default True
    • \n
    \n\n
    Returns
    \n\n
      \n
    • frequency_domain (numpy.ndarray):\nArray with the frequency domain
    • \n
    • magnitude (numpy.ndarray):\nArray with the magnitude
    • \n
    \n", "signature": "(self, plot_result=True):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.get_mass_spectrum", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.get_mass_spectrum", "kind": "function", "doc": "

    Get the mass spectrum from the transient data

    \n\n
    Parameters
    \n\n
      \n
    • auto_process (bool, optional):\nProcess the transient data, by default True
    • \n
    • plot_result (bool, optional):\nPlot the frequency domain and magnitude, by default True
    • \n
    • keep_profile (bool, optional):\nKeep the profile data, by default True
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecfromFreq: Mass spectrum object
    • \n
    \n", "signature": "(\tself,\tauto_process: bool = True,\tplot_result: bool = True,\tkeep_profile: bool = True) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq:", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.filename", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.filename", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.dir_location", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.dir_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.A_therm", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.A_therm", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.B_therm", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.B_therm", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.C_therm", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.C_therm", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.plot_transient", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.plot_transient", "kind": "function", "doc": "

    Plot the transient data

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes, optional):\nMatplotlib axes object, by default None
    • \n
    • c (str, optional):\nColor, by default 'k'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes: Matplotlib axes object
    • \n
    \n", "signature": "(self, ax=None, c='k'):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.plot_zerofilled_transient", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.plot_zerofilled_transient", "kind": "function", "doc": "

    Plot the transient data with zero fill

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes, optional):\nMatplotlib axes object, by default None
    • \n
    • c (str, optional):\nColor, by default 'k'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes: Matplotlib axes object
    • \n
    \n", "signature": "(self, ax=None, c='k'):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.plot_apodized_transient", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.plot_apodized_transient", "kind": "function", "doc": "

    Plot the transient data with apodization

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes, optional):\nMatplotlib axes object, by default None
    • \n
    • c (str, optional):\nColor, by default 'k'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes: Matplotlib axes object
    • \n
    \n", "signature": "(self, ax=None, c='k'):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.plot_frequency_domain", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.plot_frequency_domain", "kind": "function", "doc": "

    Plot the frequency domain and magnitude

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes, optional):\nMatplotlib axes object, by default None
    • \n
    • c (str, optional):\nColor, by default 'k'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes: Matplotlib axes object
    • \n
    \n", "signature": "(self, ax=None, c='k'):", "funcdef": "def"}, {"fullname": "corems.transient.input", "modulename": "corems.transient.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.input.brukerSolarix", "modulename": "corems.transient.input.brukerSolarix", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix", "kind": "class", "doc": "

    A class used to Read a single Transient from Bruker's FT-MS acquisition station (fid, or ser)

    \n\n
    Parameters
    \n\n
      \n
    • d_directory_location (str):\nthe full path of the .d folder
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • d_directory_location (str):\nthe full path of the .d folder
    • \n
    • file_location (str):\nthe full path of the .d folder
    • \n
    • parameter_filename_location (str):\nthe full path of the apexAcquisition.method file
    • \n
    • transient_data_path (str):\nthe full path of the fid or ser file
    • \n
    • scan_attr (str):\nthe full path of the scan.xml file
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_transient().\nRead the data and settings returning a Transient class
    • \n
    • get_scan_attr().\nRead the scan retention times, TIC values and scan indices.
    • \n
    • locate_file(folder, type_file_name).\nFind the full path of a specific file within the acquisition .d folder or subfolders
    • \n
    • parse_parameters(parameters_filename).\nOpen the given file and retrieve all parameters from apexAcquisition.method
    • \n
    • fix_freq_limits(d_parameters).\nRead and set the correct frequency limits for the spectrum
    • \n
    • get_excite_sweep_range(filename).\nDetermine excitation sweep range from ExciteSweep file
    • \n
    \n"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.__init__", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.__init__", "kind": "function", "doc": "

    \n", "signature": "(d_directory_location)"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.d_directory_location", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.d_directory_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.file_location", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.get_scan_attr", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.get_scan_attr", "kind": "function", "doc": "

    Function to get the scan retention times, TIC values and scan indices.

    \n\n

    Gets information from scan.xml file in the bruker .d folder.\nNote this file is only present in some .d format - e.g. for imaging mode data, it is not present.

    \n\n
    Returns
    \n\n
      \n
    • dict_scan_rt_tic (dict):\na dictionary with scan number as key and rt and tic as values
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.get_transient", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.get_transient", "kind": "function", "doc": "

    Function to get the transient data and parameters from a Bruker Solarix .d folder.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int):\nthe scan number to be read. Default is 1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Transient: a transient object
    • \n
    \n", "signature": "(self, scan_number=1):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.fix_freq_limits", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.fix_freq_limits", "kind": "function", "doc": "

    Function to read and set the correct frequency limits for the spectrum

    \n\n
    Notes
    \n\n

    This is using the excitation limits from the apexAcquisition.method file,\nwhich may not match the intended detection limits in edge cases. \nIn default acquisitions, excitation and detection are the same. \nBut, they may not be in some cases with selective excitation, custom excite waveforms, or in 2DMS applications.

    \n\n
    Parameters
    \n\n
      \n
    • d_parameters (dict):\na dictionary with the parameters from the apexAcquisition.method file
    • \n
    \n", "signature": "(self, d_parameters):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.get_excite_sweep_range", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.get_excite_sweep_range", "kind": "function", "doc": "

    Function to determine excitation sweep range from ExciteSweep file

    \n\n

    This looks at the first and last rows of the ExciteSweep file to determine the excitation frequency range.\nNote that this assumes the excitation sweep was linear and the first and last rows are the lowest and highest frequencies.\nThis is presumably always true, but again may be incorrect for edge cases with custom excitation waveforms.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str):\nthe full path to the ExciteSweep file
    • \n
    \n", "signature": "(filename):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.locate_file", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.locate_file", "kind": "function", "doc": "

    Function to locate a file in a folder

    \n\n

    Find the full path of a specific file within the acquisition .d folder or subfolders

    \n\n
    Parameters
    \n\n
      \n
    • folder (str):\nthe full path to the folder
    • \n
    • type_file_name (str):\nthe name of the file to be located\nExpected options: ExciteSweep or apexAcquisition.method
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: the full path to the file
    • \n
    \n\n
    Notes
    \n\n

    adapted from code from SPIKE library, https://github.com/spike-project/spike

    \n", "signature": "(folder, type_file_name='apexAcquisition.method'):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.parse_parameters", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.parse_parameters", "kind": "function", "doc": "

    Function to parse the parameters from apexAcquisition.method file

    \n\n

    Open the given file and retrieve all parameters from apexAcquisition.method\n None is written when no value for value is found

    \n\n
    structure : <param name = \"AMS_ActiveExclusion\"><value>0</value></param>\n
    \n\n
    Parameters
    \n\n
      \n
    • parameters_filename (str):\nthe full path to the apexAcquisition.method file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: a dictionary with the parameters and values
    • \n
    \n\n
    Notes
    \n\n

    Adapted from code from SPIKE library, https://github.com/spike-project/spike.\nCode may not handle all possible parameters, but should be sufficient for most common use cases

    \n", "signature": "(parameters_filename):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.parse_sqlite", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.parse_sqlite", "kind": "function", "doc": "

    \n", "signature": "(self, sqlite_filename='chromatography-data.sqlite'):", "funcdef": "def"}, {"fullname": "corems.transient.input.midasDatFile", "modulename": "corems.transient.input.midasDatFile", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile", "kind": "class", "doc": "

    [Not Implemented] Reads MIDAS .dat files (binary transient data)

    \n\n

    This class will read .dat binary format transient data, e.g. midas format from Predator or Thermo datastations\nThis code is not yet implemented and is not fully functional.

    \n\n
    Parameters
    \n\n
      \n
    • filename_path (str):\nThe path to the .dat file
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • filename_path (str):\nThe path to the .dat file
    • \n
    • d_params (dict):\nA dictionary with the parameters of the .dat file
    • \n
    • transient_data (numpy.ndarray):\nThe transient data
    • \n
    \n\n
    Methods
    \n\n
      \n
    • read_file().\nReads the .dat file and returns the transient data and the parameters
    • \n
    • get_transient_data(data_file, d_params).\nReads the transient data from the .dat file
    • \n
    • parse_parameter(f).\nParses the parameters from the .dat file
    • \n
    \n\n
    Raises
    \n\n
      \n
    • NotImplementedError: This class is not yet implemented.
    • \n
    \n"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.__init__", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.__init__", "kind": "function", "doc": "

    \n", "signature": "(filename_path)"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.filename_path", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.filename_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.read_file", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.read_file", "kind": "function", "doc": "

    Reads the .dat file and returns the transient data and the parameters

    \n\n
    Returns
    \n\n
      \n
    • transient_data (numpy.ndarray):\nThe transient data
    • \n
    • d_params (dict):\nA dictionary with the parameters of the .dat file
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.get_transient_data", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.get_transient_data", "kind": "function", "doc": "

    Reads the transient data from the .dat file

    \n\n
    Parameters
    \n\n
      \n
    • data_file (file):\nThe .dat file
    • \n
    • d_params (dict):\nA dictionary with the parameters of the .dat file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • myarray (numpy.ndarray):\nThe transient data
    • \n
    \n", "signature": "(self, data_file, d_params):", "funcdef": "def"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.parse_parameter", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.parse_parameter", "kind": "function", "doc": "

    Parses the parameters from the .dat file

    \n\n
    Parameters
    \n\n
      \n
    • f (file):\nThe .dat file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • output_parameters (dict):\nA dictionary with the parameters of the .dat file
    • \n
    \n", "signature": "(self, f):", "funcdef": "def"}]; + /** pdoc search index */const docs = [{"fullname": "corems", "modulename": "corems", "kind": "module", "doc": "

    \"CoreMS

    \n\n
    \n\n
    \n
    \n\"DOI\"\n
    \n
    \n\n

    Table of Contents

    \n\n\n\n
    \n\n

    CoreMS

    \n\n

    CoreMS is a comprehensive mass spectrometry framework for software development and data analysis of small molecules analysis.

    \n\n

    Data handling and software development for modern mass spectrometry (MS) is an interdisciplinary endeavor requiring skills in computational science and a deep understanding of MS. To enable scientific software development to keep pace with fast improvements in MS technology, we have developed a Python software framework named CoreMS. The goal of the framework is to provide a fundamental, high-level basis for working with all mass spectrometry data types, allowing custom workflows for data signal processing, annotation, and curation. The data structures were designed with an intuitive, mass spectrometric hierarchical structure, thus allowing organized and easy access to the data and calculations. Moreover, CoreMS supports direct access for almost all vendors\u2019 data formats, allowing for the centralization and automation of all data processing workflows from the raw signal to data annotation and curation.

    \n\n

    CoreMS aims to provide

    \n\n
      \n
    • logical mass spectrometric data structure
    • \n
    • self-containing data and metadata storage
    • \n
    • modern molecular formulae assignment algorithms
    • \n
    • dynamic molecular search space database search and generator
    • \n
    \n\n
    \n\n

    Current Version

    \n\n

    3.0.0

    \n\n
    \n\n

    Main Developers/Contact

    \n\n\n\n
    \n\n

    Documentation

    \n\n

    API documentation can be found here.

    \n\n

    Overview slides can be found here.

    \n\n
    \n\n

    Contributing

    \n\n

    As an open source project, CoreMS welcomes contributions of all forms. Before contributing, please see our Dev Guide

    \n\n
    \n\n

    Data formats

    \n\n

    Data input formats

    \n\n
      \n
    • Bruker Solarix (CompassXtract)
    • \n
    • Bruker Solarix transients, ser and fid (FT magnitude mode only)
    • \n
    • ThermoFisher (.raw)
    • \n
    • Spectroswiss signal booster data-acquisition station (.hdf5)
    • \n
    • MagLab ICR data-acquisition station (FT and magnitude mode) (.dat)
    • \n
    • ANDI NetCDF for GC-MS (.cdf)
    • \n
    • mzml for LC-MS (.mzml)
    • \n
    • Generic mass list in profile and centroid mde (include all delimiters types and Excel formats)
    • \n
    • CoreMS exported processed mass list files(excel, .csv, .txt, pandas dataframe as .pkl)
    • \n
    • CoreMS self-containing Hierarchical Data Format (.hdf5)
    • \n
    • Pandas Dataframe
    • \n
    • Support for cloud Storage using s3path.S3path
    • \n
    \n\n

    Data output formats

    \n\n
      \n
    • Pandas data frame (can be saved using pickle, h5, etc)
    • \n
    • Text Files (.csv, tab separated .txt, etc)
    • \n
    • Microsoft Excel (xlsx)
    • \n
    • Automatic JSON for metadata storage and reuse
    • \n
    • Self-containing Hierarchical Data Format (.hdf5) including raw data and time-series data-point for processed data-sets with all associated metadata stored as json attributes
    • \n
    \n\n

    Data structure types

    \n\n
      \n
    • LC-MS
    • \n
    • GC-MS
    • \n
    • Transient
    • \n
    • Mass Spectra
    • \n
    • Mass Spectrum
    • \n
    • Mass Spectral Peak
    • \n
    • Molecular Formula
    • \n
    \n\n
    \n\n

    Available features

    \n\n

    FT-MS Signal Processing, Calibration, and Molecular Formula Search and Assignment

    \n\n
      \n
    • Apodization, Zerofilling, and Magnitude mode FT
    • \n
    • Manual and automatic noise threshold calculation
    • \n
    • Peak picking using apex quadratic fitting
    • \n
    • Experimental resolving power calculation
    • \n
    • Frequency and m/z domain calibration functions:
    • \n
    • LedFord equation
    • \n
    • Linear equation
    • \n
    • Quadratic equation
    • \n
    • Automatic search most abundant Ox homologue series
    • \n
    • Automatic local (SQLite) or external (PostgreSQL) database check, generation, and search
    • \n
    • Automatic molecular formulae assignments algorithm for ESI(-) MS for natural organic matter analysis
    • \n
    • Automatic fine isotopic structure calculation and search for all isotopes
    • \n
    • Flexible Kendrick normalization base
    • \n
    • Kendrick filter using density-based clustering
    • \n
    • Kendrick classification
    • \n
    • Heteroatoms classification and visualization
    • \n
    \n\n

    GC-MS Signal Processing, Calibration, and Compound Identification

    \n\n
      \n
    • Baseline detection, subtraction, smoothing
    • \n
    • m/z based Chromatogram Peak Deconvolution,
    • \n
    • Manual and automatic noise threshold calculation
    • \n
    • First and second derivatives peak picking methods
    • \n
    • Peak Area Calculation
    • \n
    • Retention Index Calibration
    • \n
    • Automatic local (SQLite) or external (MongoDB or PostgreSQL) database check, generation, and search
    • \n
    • Automatic molecular match algorithm with all spectral similarity methods
    • \n
    \n\n

    High Resolution Mass Spectrum Simulations

    \n\n
      \n
    • Peak shape (Lorentz, Gaussian, Voigt, and pseudo-Voigt)
    • \n
    • Peak fitting for peak shape definition
    • \n
    • Peak position in function of data points, signal to noise and resolving power (Lorentz and Gaussian)
    • \n
    • Prediction of mass error distribution
    • \n
    • Calculated ICR Resolving Power based on magnetic field (B), and transient time(T)
    • \n
    \n\n

    LC-MS Signal Processing, Molecular Formula Search and Assignment, and Spectral Similarity Searches

    \n\n
      \n
    • Two dimensional (m/z and retention time) peak picking using persistent homology
    • \n
    • Smoothing, cetroid detection, and integration of extracted ion chromatograms
    • \n
    • Peak shape metric calculations including half peak height, tailing factor, and dispersity index
    • \n
    • MS1 deconvolution of mass features
    • \n
    • Idenfitication of 13C isotopes within the mass features
    • \n
    • Compatibility with molecular formula searching on MS1 or MS2 spectra
    • \n
    • Spectral search capability using entropy similarity
    • \n
    \n\n
    \n\n

    Installation

    \n\n
    \n
    pip install corems\n
    \n
    \n\n

    By default the molecular formula database will be generated using SQLite

    \n\n

    To use Postgresql the easiest way is to build a docker container:

    \n\n
    \n
    docker-compose up -d\n
    \n
    \n\n
      \n
    • Change the url_database on MSParameters.molecular_search.url_database to: \"postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp\"
    • \n
    • Set the url_database env variable COREMS_DATABASE_URL to: \"postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp\"
    • \n
    \n\n

    Thermo Raw File Access:

    \n\n

    To be able to open thermo file a installation of pythonnet is needed:

    \n\n
      \n
    • Windows: \n
      \n
      pip install pythonnet\n
      \n
    • \n
    \n\n
      \n
    • Mac and Linux:\n
      \n
      brew install mono\npip install pythonnet   \n
      \n
    • \n
    \n\n
    \n\n

    Docker stack

    \n\n

    Another option to use CoreMS is to run the docker stack that will start the CoreMS containers

    \n\n

    Molecular Database and Jupyter Notebook Docker Containers

    \n\n

    A docker container containing:

    \n\n
      \n
    • A custom python distribution will all dependencies installed
    • \n
    • A Jupyter notebook server with workflow examples
    • \n
    • A PostgreSQL database for the molecular formulae assignment
    • \n
    \n\n

    If you don't have docker installed, the easiest way is to install docker for desktop

    \n\n
      \n
    1. Start the containers using docker-compose (easiest way):

      \n\n

      On docker-compose-jupyter.yml there is a volume mapping for the tests_data directory with the data provided for testing, to change to your data location:

      \n\n
        \n
      • locate the volumes on docker-compose-jupyter.yml:

        \n\n
        \n
        volumes:\n  - ./tests/tests_data:/home/CoreMS/data\n
        \n
      • \n
      • change \"./tests/tests_data\" to your data directory location

        \n\n
        \n
        volumes:\n  - path_to_your_data_directory:/home/corems/data\n
        \n
      • \n
      • save the file and then call:

        \n\n
        \n
        docker-compose -f docker-compose-jupyter.yml up\n
        \n
      • \n
    2. \n
    3. Another option is to manually build the containers:

      \n\n
        \n
      • Build the corems image:

        \n\n
        \n
        docker build -t corems:local .\n
        \n
      • \n
      • Start the database container:

        \n\n
        \n
        docker-compose up -d   \n
        \n
      • \n
      • Start the Jupyter Notebook:

        \n\n
        \n
        docker run --rm -v ./data:/home/CoreMS/data corems:local\n
        \n
      • \n
      • Open your browser, copy and past the URL address provided in the terminal: http://localhost:8888/?token=<token>.

      • \n
      • Open the CoreMS-Tutorial.ipynb

      • \n
    4. \n
    \n\n
    \n\n

    Example for FT-ICR Data Processing

    \n\n

    More examples can be found under the directory examples/scripts, examples/notebooks

    \n\n
      \n
    • Basic functionality example
    • \n
    \n\n
    \n
    from corems.transient.input.brukerSolarix import ReadBrukerSolarix\nfrom corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas\nfrom corems.mass_spectrum.output.export import HighResMassSpecExport\nfrom matplotlib import pyplot\n\nfile_path= 'tests/tests_data/ftms/ESI_NEG_SRFA.d'\n\n# Instatiate the Bruker Solarix reader with the filepath\nbruker_reader = ReadBrukerSolarix(file_path)\n\n# Use the reader to instatiate a transient object\nbruker_transient_obj = bruker_reader.get_transient()\n\n# Calculate the transient duration time\nT =  bruker_transient_obj.transient_time\n\n# Use the transient object to instatitate a mass spectrum object\nmass_spectrum_obj = bruker_transient_obj.get_mass_spectrum(plot_result=False, auto_process=True)\n\n# The following SearchMolecularFormulas function does the following\n# - searches monoisotopic molecular formulas for all mass spectral peaks\n# - calculates fine isotopic structure based on monoisotopic molecular formulas found and current dynamic range\n# - searches molecular formulas of correspondent calculated isotopologues\n# - settings are stored at SearchConfig.json and can be changed directly on the file or inside the framework class\n\nSearchMolecularFormulas(mass_spectrum_obj, first_hit=False).run_worker_mass_spectrum()\n\n# Iterate over mass spectral peaks objs within the mass_spectrum_obj\nfor mspeak in mass_spectrum_obj.sort_by_abundance():\n\n    # If there is at least one molecular formula associated, mspeak returns True\n    if  mspeak:\n\n        # Get the molecular formula with the highest mass accuracy\n        molecular_formula = mspeak.molecular_formula_lowest_error\n\n        # Plot mz and peak height\n        pyplot.plot(mspeak.mz_exp, mspeak.abundance, 'o', c='g')\n\n        # Iterate over all molecular formulas associated with the ms peaks obj\n        for molecular_formula in mspeak:\n\n            # Check if the molecular formula is a isotopologue\n            if molecular_formula.is_isotopologue:\n\n                # Access the molecular formula text representation and print\n                print (molecular_formula.string)\n\n                # Get 13C atoms count\n                print (molecular_formula['13C'])\n    else:\n        # Get mz and peak height\n        print(mspeak.mz_exp,mspeak.abundance)\n\n# Save data\n## to a csv file\nmass_spectrum_obj.to_csv("filename")\nmass_spectrum_obj.to_hdf("filename")\n# to pandas Datarame pickle\nmass_spectrum_obj.to_pandas("filename")\n\n# Extract data as a pandas Dataframe\ndf = mass_spectrum_obj.to_dataframe()\n
    \n
    \n\n
    \n\n

    UML Diagrams

    \n\n

    UML (unified modeling language) diagrams for Direct Infusion FT-MS and GC-MS classes can be found here.

    \n\n
    \n\n

    Citing CoreMS

    \n\n

    If you use CoreMS in your work, please use the following citation:

    \n\n

    Version 3.0.0 Release on GitHub, archived on Zenodo:

    \n\n

    \"DOI\"

    \n\n

    Yuri E. Corilo, William R. Kew, Lee Ann McCue, Katherine R . Heal, James C. Carr (2024, October 29). EMSL-Computing/CoreMS: CoreMS 3.0.0 (Version v3.0.0), as developed on Github. Zenodo. http://doi.org/10.5281/zenodo.14009575

    \n\n

    ```

    \n\n
    \n\n

    This material was prepared as an account of work sponsored by an agency of the\nUnited States Government. Neither the United States Government nor the United\nStates Department of Energy, nor Battelle, nor any of their employees, nor any\njurisdiction or organization that has cooperated in the development of these\nmaterials, makes any warranty, express or implied, or assumes any legal\nliability or responsibility for the accuracy, completeness, or usefulness or\nany information, apparatus, product, software, or process disclosed, or\nrepresents that its use would not infringe privately owned rights.

    \n\n

    Reference herein to any specific commercial product, process, or service by\ntrade name, trademark, manufacturer, or otherwise does not necessarily\nconstitute or imply its endorsement, recommendation, or favoring by the United\nStates Government or any agency thereof, or Battelle Memorial Institute. The\nviews and opinions of authors expressed herein do not necessarily state or\nreflect those of the United States Government or any agency thereof.

    \n\n
                 PACIFIC NORTHWEST NATIONAL LABORATORY\n                          operated by\n                            BATTELLE\n                            for the\n               UNITED STATES DEPARTMENT OF ENERGY\n                under Contract DE-AC05-76RL01830\n
    \n"}, {"fullname": "corems.readme_path", "modulename": "corems", "qualname": "readme_path", "kind": "variable", "doc": "

    \n", "default_value": "'/Users/heal742/LOCAL/corems_dev/corems/corems/../README.md'"}, {"fullname": "corems.timeit", "modulename": "corems", "qualname": "timeit", "kind": "function", "doc": "

    \n", "signature": "(method):", "funcdef": "def"}, {"fullname": "corems.SuppressPrints", "modulename": "corems", "qualname": "SuppressPrints", "kind": "class", "doc": "

    \n"}, {"fullname": "corems.chunks", "modulename": "corems", "qualname": "chunks", "kind": "function", "doc": "

    Yield successive n-sized chunks from lst.

    \n", "signature": "(lst, n):", "funcdef": "def"}, {"fullname": "corems.corems_md5", "modulename": "corems", "qualname": "corems_md5", "kind": "function", "doc": "

    \n", "signature": "(fname):", "funcdef": "def"}, {"fullname": "corems.chroma_peak", "modulename": "corems.chroma_peak", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.calc", "modulename": "corems.chroma_peak.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "GCPeakCalculation", "kind": "class", "doc": "

    Class for performing peak calculations in GC chromatography.

    \n\n
    Methods
    \n\n
      \n
    • calc_area(self, tic: List[float], dx: float) -> None: Calculate the area under the curve of the chromatogram.
    • \n
    • linear_ri(self, right_ri: float, left_ri: float, left_rt: float, right_rt: float) -> float: Calculate the retention index using linear interpolation.
    • \n
    • calc_ri(self, rt_ri_pairs: List[Tuple[float, float]]) -> int: Calculate the retention index based on the given retention time - retention index pairs.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation.calc_area", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "GCPeakCalculation.calc_area", "kind": "function", "doc": "

    Calculate the area under the curve of the chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • tic (List[float]):\nThe total ion current (TIC) values.
    • \n
    • dx (float):\nThe spacing between data points.
    • \n
    \n", "signature": "(self, tic: list[float], dx: float) -> None:", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation.linear_ri", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "GCPeakCalculation.linear_ri", "kind": "function", "doc": "

    Calculate the retention index using linear interpolation.

    \n\n
    Parameters
    \n\n
      \n
    • right_ri (float):\nThe retention index at the right reference point.
    • \n
    • left_ri (float):\nThe retention index at the left reference point.
    • \n
    • left_rt (float):\nThe retention time at the left reference point.
    • \n
    • right_rt (float):\nThe retention time at the right reference point.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The calculated retention index.
    • \n
    \n", "signature": "(\tself,\tright_ri: float,\tleft_ri: float,\tleft_rt: float,\tright_rt: float) -> float:", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation.calc_ri", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "GCPeakCalculation.calc_ri", "kind": "function", "doc": "

    Calculate the retention index based on the given retention time - retention index pairs.

    \n\n
    Parameters
    \n\n
      \n
    • rt_ri_pairs (List[Tuple[float, float]]):\nThe list of retention time - retention index pairs.
    • \n
    \n", "signature": "(self, rt_ri_pairs: list[tuple[float, float]]) -> None:", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation", "kind": "class", "doc": "

    Class for performing peak calculations in LC-MS mass spectrometry.

    \n\n

    This class is intended to be used as a mixin class for the LCMSMassFeature class.

    \n"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation.calc_dispersity_index", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation.calc_dispersity_index", "kind": "function", "doc": "

    Calculate the dispersity index of the mass feature.

    \n\n

    This function calculates the dispersity index of the mass feature and\nstores the result in the _dispersity_index attribute. The dispersity index is calculated as the standard\ndeviation of the retention times that account for 50% of the cummulative intensity, starting from the most\nintense point, as described in [1].

    \n\n
    Returns
    \n\n
      \n
    • None, stores the result in the _dispersity_index attribute of the class.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the EIC data are not available.
    • \n
    \n\n
    References
    \n\n

    1) Boiteau, Rene M., et al. \"Relating Molecular Properties to the Persistence of Marine Dissolved\nOrganic Matter with Liquid Chromatography\u2013Ultrahigh-Resolution Mass Spectrometry.\"\nEnvironmental Science & Technology 58.7 (2024): 3267-3277.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation.calc_fraction_height_width", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation.calc_fraction_height_width", "kind": "function", "doc": "

    Calculate the height width of the mass feature at a specfic fraction of the maximum intensity.

    \n\n

    This function returns a tuple with the minimum and maximum half-height width based on scan resolution.

    \n\n
    Parameters
    \n\n
      \n
    • fraction (float):\nThe fraction of the maximum intensity to calculate the height width.\nFor example, 0.5 will calculate the half-height width.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Tuple[float, float, bool]: The minimum and maximum half-height width based on scan resolution (in minutes), and a boolean indicating if the width was estimated.
    • \n
    \n", "signature": "(self, fraction: float):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation.calc_half_height_width", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation.calc_half_height_width", "kind": "function", "doc": "

    Calculate the half-height width of the mass feature.

    \n\n

    This function calculates the half-height width of the mass feature and\nstores the result in the _half_height_width attribute

    \n\n
    Returns
    \n\n
      \n
    • None, stores the result in the _half_height_width attribute of the class.
    • \n
    \n", "signature": "(self, accept_estimated: bool = False):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation.calc_tailing_factor", "modulename": "corems.chroma_peak.calc.ChromaPeakCalc", "qualname": "LCMSMassFeatureCalculation.calc_tailing_factor", "kind": "function", "doc": "

    Calculate the peak asymmetry of the mass feature.

    \n\n

    This function calculates the peak asymmetry of the mass feature and\nstores the result in the _tailing_factor attribute.\nCalculations completed at 5% of the peak height in accordance with the USP tailing factor calculation.

    \n\n
    Returns
    \n\n
      \n
    • None, stores the result in the _tailing_factor attribute of the class.
    • \n
    \n\n
    References
    \n\n

    1) JIS K0124:2011 General rules for high performance liquid chromatography\n2) JIS K0214:2013 Technical terms for analytical chemistry

    \n", "signature": "(self, accept_estimated: bool = False):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory", "modulename": "corems.chroma_peak.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase", "kind": "class", "doc": "

    Base class for chromatographic peak (ChromaPeak) objects.

    \n\n
    Parameters
    \n\n
      \n
    • chromatogram_parent (Chromatogram):\nThe parent chromatogram object.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • start_index (int):\nThe start index of the peak.
    • \n
    • index (int):\nThe index of the peak.
    • \n
    • final_index (int):\nThe final index of the peak.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • start_scan (int):\nThe start scan of the peak.
    • \n
    • final_scan (int):\nThe final scan of the peak.
    • \n
    • apex_scan (int):\nThe apex scan of the peak.
    • \n
    • chromatogram_parent (Chromatogram):\nThe parent chromatogram object.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • _area (float):\nThe area of the peak.
    • \n
    \n\n
    Properties
    \n\n
      \n
    • retention_time : float.\nThe retention time of the peak.
    • \n
    • tic : float.\nThe total ion current of the peak.
    • \n
    • area : float.\nThe area of the peak.
    • \n
    • rt_list : list.\nThe list of retention times within the peak.
    • \n
    • tic_list : list.\nThe list of total ion currents within the peak.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • None
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.__init__", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tchromatogram_parent,\tmass_spectrum_obj,\tstart_index,\tindex,\tfinal_index)"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.start_scan", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.start_scan", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.final_scan", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.final_scan", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.apex_scan", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.apex_scan", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.chromatogram_parent", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.chromatogram_parent", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.mass_spectrum", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.retention_time", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.retention_time", "kind": "variable", "doc": "

    Retention Time

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.tic", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.tic", "kind": "variable", "doc": "

    Total Ion Current

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.area", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.area", "kind": "variable", "doc": "

    Peak Area

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.rt_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.rt_list", "kind": "variable", "doc": "

    Retention Time List

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.ChromaPeakBase.tic_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "ChromaPeakBase.tic_list", "kind": "variable", "doc": "

    Total Ion Current List

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature", "kind": "class", "doc": "

    Class representing a mass feature in a liquid chromatography (LC) chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • lcms_parent (LCMS):\nThe parent LCMSBase object.
    • \n
    • mz (float):\nThe observed mass to charge ratio of the feature.
    • \n
    • retention_time (float):\nThe retention time of the feature (in minutes), at the apex.
    • \n
    • intensity (float):\nThe intensity of the feature.
    • \n
    • apex_scan (int):\nThe scan number of the apex of the feature.
    • \n
    • persistence (float, optional):\nThe persistence of the feature. Default is None.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _mz_exp (float):\nThe observed mass to charge ratio of the feature.
    • \n
    • _mz_cal (float):\nThe calibrated mass to charge ratio of the feature.
    • \n
    • _retention_time (float):\nThe retention time of the feature (in minutes), at the apex.
    • \n
    • _apex_scan (int):\nThe scan number of the apex of the feature.
    • \n
    • _intensity (float):\nThe intensity of the feature.
    • \n
    • _persistence (float):\nThe persistence of the feature.
    • \n
    • _eic_data (EIC_Data):\nThe EIC data object associated with the feature.
    • \n
    • _dispersity_index (float):\nThe dispersity index of the feature.
    • \n
    • _half_height_width (numpy.ndarray):\nThe half height width of the feature (in minutes, as an array of min and max values).
    • \n
    • _tailing_factor (float):\nThe tailing factor of the feature.\n> 1 indicates tailing, < 1 indicates fronting, = 1 indicates symmetrical peak.
    • \n
    • _ms_deconvoluted_idx ([int]):\nThe indexes of the mass_spectrum attribute in the deconvoluted mass spectrum.
    • \n
    • is_calibrated (bool):\nIf True, the feature has been calibrated. Default is False.
    • \n
    • monoisotopic_mf_id (int):\nMass feature id that is the monoisotopic version of self.\nIf self.id, then self is the monoisotopic feature). Default is None.
    • \n
    • isotopologue_type (str):\nThe isotopic class of the feature, i.e. \"13C1\", \"13C2\", \"13C1 37Cl1\" etc.\nDefault is None.
    • \n
    • ms2_scan_numbers (list):\nList of scan numbers of the MS2 spectra associated with the feature.\nDefault is an empty list.
    • \n
    • ms2_mass_spectra (dict):\nDictionary of MS2 spectra associated with the feature (key = scan number for DDA).\nDefault is an empty dictionary.
    • \n
    • ms2_similarity_results (list):\nList of MS2 similarity results associated with the mass feature.\nDefault is an empty list.
    • \n
    • id (int):\nThe ID of the feature, also the key in the parent LCMS object's\nmass_features dictionary.
    • \n
    • mass_spectrum_deconvoluted_parent (bool):\nIf True, the mass feature corresponds to the most intense peak in the deconvoluted mass spectrum. Default is None.
    • \n
    • associated_mass_features_deconvoluted (list):\nList of mass features associated with the deconvoluted mass spectrum. Default is an empty list.
    • \n
    \n", "bases": "ChromaPeakBase, corems.chroma_peak.calc.ChromaPeakCalc.LCMSMassFeatureCalculation"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.__init__", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tlcms_parent,\tmz: float,\tretention_time: float,\tintensity: float,\tapex_scan: int,\tpersistence: float = None,\tid: int = None)"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.monoisotopic_mf_id", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.monoisotopic_mf_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.isotopologue_type", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.isotopologue_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.ms2_scan_numbers", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.ms2_scan_numbers", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.ms2_mass_spectra", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.ms2_mass_spectra", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.ms2_similarity_results", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.ms2_similarity_results", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.mass_spectrum_deconvoluted_parent", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.mass_spectrum_deconvoluted_parent", "kind": "variable", "doc": "

    \n", "annotation": ": bool"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.associated_mass_features_deconvoluted", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.associated_mass_features_deconvoluted", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.update_mz", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.update_mz", "kind": "function", "doc": "

    Update the mass to charge ratio from the mass spectrum object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.plot", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.plot", "kind": "function", "doc": "

    Plot the mass feature.

    \n\n
    Parameters
    \n\n
      \n
    • to_plot (list, optional):\nList of strings specifying what to plot, any iteration of\n\"EIC\", \"MS2\", and \"MS1\".\nDefault is [\"EIC\", \"MS1\", \"MS2\"].
    • \n
    • return_fig (bool, optional):\nIf True, the figure is returned. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.figure.Figure or None: The figure object if return_fig is True.\nOtherwise None and the figure is displayed.
    • \n
    \n", "signature": "(self, to_plot=['EIC', 'MS1', 'MS2'], return_fig=True):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.mz", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.mz", "kind": "variable", "doc": "

    Mass to charge ratio of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.mass_spectrum_deconvoluted", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.mass_spectrum_deconvoluted", "kind": "variable", "doc": "

    Returns the deconvoluted mass spectrum object associated with the mass feature, if deconvolution has been performed.

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.retention_time", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.retention_time", "kind": "variable", "doc": "

    Retention time of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.apex_scan", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.apex_scan", "kind": "variable", "doc": "

    Apex scan of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.intensity", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.intensity", "kind": "variable", "doc": "

    Intensity of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.persistence", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.persistence", "kind": "variable", "doc": "

    Persistence of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.eic_rt_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.eic_rt_list", "kind": "variable", "doc": "

    Retention time list between the beginning and end of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.eic_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.eic_list", "kind": "variable", "doc": "

    EIC List between the beginning and end of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.ms1_peak", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.ms1_peak", "kind": "variable", "doc": "

    MS1 peak from associated mass spectrum that is closest to the mass feature's m/z

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.tailing_factor", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.tailing_factor", "kind": "variable", "doc": "

    Tailing factor of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.dispersity_index", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.dispersity_index", "kind": "variable", "doc": "

    Dispersity index of the mass feature

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.half_height_width", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.half_height_width", "kind": "variable", "doc": "

    Half height width of the mass feature, average of min and max values, in minutes

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.LCMSMassFeature.best_ms2", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "LCMSMassFeature.best_ms2", "kind": "variable", "doc": "

    Points to the best representative MS2 mass spectrum

    \n\n
    Notes
    \n\n

    If there is only one MS2 mass spectrum, it will be returned\nIf there are MS2 similarity results, this will return the MS2 mass spectrum with the highest entropy similarity score.\nIf there are no MS2 similarity results, the best MS2 mass spectrum is determined by the closest scan time to the apex of the mass feature, with higher resolving power. Checks for and disqualifies possible chimeric spectra.

    \n\n
    Returns
    \n\n
      \n
    • MassSpectrum or None: The best MS2 mass spectrum.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak", "kind": "class", "doc": "

    Class representing a peak in a gas chromatography (GC) chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • chromatogram_parent (Chromatogram):\nThe parent chromatogram object.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object associated with the peak.
    • \n
    • indexes (tuple):\nThe indexes of the peak in the chromatogram.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _compounds (list):\nList of compounds associated with the peak.
    • \n
    • _ri (float or None):\nRetention index of the peak.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __len__(). Returns the number of compounds associated with the peak.
    • \n
    • __getitem__(position). Returns the compound at the specified position.
    • \n
    • remove_compound(compounds_obj). Removes the specified compound from the peak.
    • \n
    • clear_compounds(). Removes all compounds from the peak.
    • \n
    • add_compound(compounds_dict, spectral_similarity_scores, ri_score=None, similarity_score=None). Adds a compound to the peak with the specified attributes.
    • \n
    • ri(). Returns the retention index of the peak.
    • \n
    • highest_ss_compound(). Returns the compound with the highest spectral similarity score.
    • \n
    • highest_score_compound(). Returns the compound with the highest similarity score.
    • \n
    • compound_names(). Returns a list of names of compounds associated with the peak.
    • \n
    \n", "bases": "ChromaPeakBase, corems.chroma_peak.calc.ChromaPeakCalc.GCPeakCalculation"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.__init__", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.__init__", "kind": "function", "doc": "

    \n", "signature": "(chromatogram_parent, mass_spectrum_obj, indexes)"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.remove_compound", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.remove_compound", "kind": "function", "doc": "

    \n", "signature": "(self, compounds_obj):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.clear_compounds", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.clear_compounds", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.add_compound", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.add_compound", "kind": "function", "doc": "

    Adds a compound to the peak with the specified attributes.

    \n\n
    Parameters
    \n\n
      \n
    • compounds_dict (dict):\nDictionary containing the compound information.
    • \n
    • spectral_similarity_scores (dict):\nDictionary containing the spectral similarity scores.
    • \n
    • ri_score (float or None, optional):\nThe retention index score of the compound. Default is None.
    • \n
    • similarity_score (float or None, optional):\nThe similarity score of the compound. Default is None.
    • \n
    \n", "signature": "(\tself,\tcompounds_dict,\tspectral_similarity_scores,\tri_score=None,\tsimilarity_score=None):", "funcdef": "def"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.ri", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.ri", "kind": "variable", "doc": "

    Returns the retention index of the peak.

    \n\n
    Returns
    \n\n
      \n
    • float or None: The retention index of the peak.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.highest_ss_compound", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.highest_ss_compound", "kind": "variable", "doc": "

    Returns the compound with the highest spectral similarity score.

    \n\n
    Returns
    \n\n
      \n
    • LowResCompoundRef or None: The compound with the highest spectral similarity score.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.highest_score_compound", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.highest_score_compound", "kind": "variable", "doc": "

    Returns the compound with the highest similarity score.

    \n\n
    Returns
    \n\n
      \n
    • LowResCompoundRef or None: The compound with the highest similarity score.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeak.compound_names", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeak.compound_names", "kind": "variable", "doc": "

    Returns a list of names of compounds associated with the peak.

    \n\n
    Returns
    \n\n
      \n
    • list: List of names of compounds associated with the peak.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved", "kind": "class", "doc": "

    Represents a deconvolved peak in a chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • chromatogram_parent (Chromatogram):\nThe parent chromatogram object.
    • \n
    • mass_spectra (list):\nList of mass spectra associated with the peak.
    • \n
    • apex_index (int):\nIndex of the apex mass spectrum in the mass_spectra list.
    • \n
    • rt_list (list):\nList of retention times.
    • \n
    • tic_list (list):\nList of total ion currents.
    • \n
    \n", "bases": "GCPeak"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved.__init__", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved.__init__", "kind": "function", "doc": "

    \n", "signature": "(chromatogram_parent, mass_spectra, apex_index, rt_list, tic_list)"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved.mass_spectra", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved.mass_spectra", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved.rt_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved.rt_list", "kind": "variable", "doc": "

    Get the list of retention times.

    \n\n
    Returns
    \n\n
      \n
    • list: The list of retention times.
    • \n
    \n"}, {"fullname": "corems.chroma_peak.factory.chroma_peak_classes.GCPeakDeconvolved.tic_list", "modulename": "corems.chroma_peak.factory.chroma_peak_classes", "qualname": "GCPeakDeconvolved.tic_list", "kind": "variable", "doc": "

    Get the list of total ion currents.

    \n\n
    Returns
    \n\n
      \n
    • list: The list of total ion currents.
    • \n
    \n"}, {"fullname": "corems.encapsulation", "modulename": "corems.encapsulation", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.constant", "modulename": "corems.encapsulation.constant", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.constant.Labels", "modulename": "corems.encapsulation.constant", "qualname": "Labels", "kind": "class", "doc": "

    Class for Labels used in CoreMS

    \n\n

    These labels are used to define:

    \n\n
      \n
    • types of columns in plaintext data inputs,
    • \n
    • types of data/mass spectra
    • \n
    • types of assignment for ions
    • \n
    \n"}, {"fullname": "corems.encapsulation.constant.Labels.mz", "modulename": "corems.encapsulation.constant", "qualname": "Labels.mz", "kind": "variable", "doc": "

    \n", "default_value": "'m/z'"}, {"fullname": "corems.encapsulation.constant.Labels.abundance", "modulename": "corems.encapsulation.constant", "qualname": "Labels.abundance", "kind": "variable", "doc": "

    \n", "default_value": "'Peak Height'"}, {"fullname": "corems.encapsulation.constant.Labels.rp", "modulename": "corems.encapsulation.constant", "qualname": "Labels.rp", "kind": "variable", "doc": "

    \n", "default_value": "'Resolving Power'"}, {"fullname": "corems.encapsulation.constant.Labels.s2n", "modulename": "corems.encapsulation.constant", "qualname": "Labels.s2n", "kind": "variable", "doc": "

    \n", "default_value": "'S/N'"}, {"fullname": "corems.encapsulation.constant.Labels.label", "modulename": "corems.encapsulation.constant", "qualname": "Labels.label", "kind": "variable", "doc": "

    \n", "default_value": "'label'"}, {"fullname": "corems.encapsulation.constant.Labels.bruker_profile", "modulename": "corems.encapsulation.constant", "qualname": "Labels.bruker_profile", "kind": "variable", "doc": "

    \n", "default_value": "'Bruker_Profile'"}, {"fullname": "corems.encapsulation.constant.Labels.thermo_profile", "modulename": "corems.encapsulation.constant", "qualname": "Labels.thermo_profile", "kind": "variable", "doc": "

    \n", "default_value": "'Thermo_Profile'"}, {"fullname": "corems.encapsulation.constant.Labels.simulated_profile", "modulename": "corems.encapsulation.constant", "qualname": "Labels.simulated_profile", "kind": "variable", "doc": "

    \n", "default_value": "'Simulated Profile'"}, {"fullname": "corems.encapsulation.constant.Labels.booster_profile", "modulename": "corems.encapsulation.constant", "qualname": "Labels.booster_profile", "kind": "variable", "doc": "

    \n", "default_value": "'Booster Profile'"}, {"fullname": "corems.encapsulation.constant.Labels.bruker_frequency", "modulename": "corems.encapsulation.constant", "qualname": "Labels.bruker_frequency", "kind": "variable", "doc": "

    \n", "default_value": "'Bruker_Frequency'"}, {"fullname": "corems.encapsulation.constant.Labels.midas_frequency", "modulename": "corems.encapsulation.constant", "qualname": "Labels.midas_frequency", "kind": "variable", "doc": "

    \n", "default_value": "'Midas_Frequency'"}, {"fullname": "corems.encapsulation.constant.Labels.thermo_centroid", "modulename": "corems.encapsulation.constant", "qualname": "Labels.thermo_centroid", "kind": "variable", "doc": "

    \n", "default_value": "'Thermo_Centroid'"}, {"fullname": "corems.encapsulation.constant.Labels.corems_centroid", "modulename": "corems.encapsulation.constant", "qualname": "Labels.corems_centroid", "kind": "variable", "doc": "

    \n", "default_value": "'CoreMS_Centroid'"}, {"fullname": "corems.encapsulation.constant.Labels.gcms_centroid", "modulename": "corems.encapsulation.constant", "qualname": "Labels.gcms_centroid", "kind": "variable", "doc": "

    \n", "default_value": "'Thermo_Centroid'"}, {"fullname": "corems.encapsulation.constant.Labels.unassigned", "modulename": "corems.encapsulation.constant", "qualname": "Labels.unassigned", "kind": "variable", "doc": "

    \n", "default_value": "'unassigned'"}, {"fullname": "corems.encapsulation.constant.Labels.radical_ion", "modulename": "corems.encapsulation.constant", "qualname": "Labels.radical_ion", "kind": "variable", "doc": "

    \n", "default_value": "'RADICAL'"}, {"fullname": "corems.encapsulation.constant.Labels.protonated_de_ion", "modulename": "corems.encapsulation.constant", "qualname": "Labels.protonated_de_ion", "kind": "variable", "doc": "

    \n", "default_value": "'DE_OR_PROTONATED'"}, {"fullname": "corems.encapsulation.constant.Labels.protonated", "modulename": "corems.encapsulation.constant", "qualname": "Labels.protonated", "kind": "variable", "doc": "

    \n", "default_value": "'protonated'"}, {"fullname": "corems.encapsulation.constant.Labels.de_protonated", "modulename": "corems.encapsulation.constant", "qualname": "Labels.de_protonated", "kind": "variable", "doc": "

    \n", "default_value": "'de-protonated'"}, {"fullname": "corems.encapsulation.constant.Labels.adduct_ion", "modulename": "corems.encapsulation.constant", "qualname": "Labels.adduct_ion", "kind": "variable", "doc": "

    \n", "default_value": "'ADDUCT'"}, {"fullname": "corems.encapsulation.constant.Labels.neutral", "modulename": "corems.encapsulation.constant", "qualname": "Labels.neutral", "kind": "variable", "doc": "

    \n", "default_value": "'neutral'"}, {"fullname": "corems.encapsulation.constant.Labels.ion_type", "modulename": "corems.encapsulation.constant", "qualname": "Labels.ion_type", "kind": "variable", "doc": "

    \n", "default_value": "'IonType'"}, {"fullname": "corems.encapsulation.constant.Labels.ion_type_translate", "modulename": "corems.encapsulation.constant", "qualname": "Labels.ion_type_translate", "kind": "variable", "doc": "

    \n", "default_value": "{'protonated': 'DE_OR_PROTONATED', 'de-protonated': 'DE_OR_PROTONATED', 'radical': 'RADICAL', 'adduct': 'ADDUCT', 'ADDUCT': 'ADDUCT'}"}, {"fullname": "corems.encapsulation.constant.Atoms", "modulename": "corems.encapsulation.constant", "qualname": "Atoms", "kind": "class", "doc": "

    Class for Atoms in CoreMS

    \n\n

    This class includes key properties of atoms (and the electron) and isotopes, including their exact masses, relative abundances, and covalences.\nIt also associates which isotopes are for the same element, and provides an ordering of elements.

    \n\n

    IUPAC definition of monoisotopic mass is based on the most abundant isotopes of each element present.\nHere, we will use atom symbols with isotope numbers for all isotopes excluding the most abundant one.\nThis list has been corrected up to Iodine.

    \n\n
    References
    \n\n
      \n
    1. NIST - Last Accessed 2019-06-12\nhttps://www.nist.gov/pml/atomic-weights-and-isotopic-compositions-relative-atomic-masses
    2. \n
    \n"}, {"fullname": "corems.encapsulation.constant.Atoms.electron_mass", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.electron_mass", "kind": "variable", "doc": "

    \n", "default_value": "0.000548579909065"}, {"fullname": "corems.encapsulation.constant.Atoms.atomic_masses", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.atomic_masses", "kind": "variable", "doc": "

    \n", "default_value": "{'H': 1.00782503223, 'D': 2.01410177812, 'T': 3.0160492779, '3He': 3.0160293201, 'He': 4.00260325413, '6Li': 6.0151228874, 'Li': 7.0160034366, 'Be': 9.012183065, '10B': 10.01293695, 'B': 11.00930536, 'C': 12.0, '13C': 13.00335483507, '14C': 14.0032419884, 'N': 14.00307400443, '15N': 15.00010889888, 'O': 15.99491461957, '17O': 16.9991317565, '18O': 17.99915961286, 'F': 18.99840316273, 'Ne': 19.9924401762, '21Ne': 20.993846685, '22Ne': 21.991385114, 'Na': 22.989769282, 'Mg': 23.985041697, '25Mg': 24.985836976, '26Mg': 25.982592968, 'Al': 26.98153853, 'Si': 27.97692653465, '29Si': 28.9764946649, '30Si': 29.973770136, 'P': 30.97376199842, 'S': 31.9720711744, '33S': 32.9714589098, '34S': 33.967867004, '36S': 35.96708071, 'Cl': 34.968852682, '37Cl': 36.965902602, '36Ar': 35.967545105, '38Ar': 37.96273211, 'Ar': 39.9623831237, 'K': 38.9637064864, '40K': 39.963998166, '41K': 40.9618252579, 'Ca': 39.962590863, '42Ca': 41.95861783, '43Ca': 42.95876644, '44Ca': 43.95548156, '46Ca': 45.953689, '48Ca': 47.95252276, 'Sc': 44.95590828, '46Ti': 45.95262772, '47Ti': 46.95175879, 'Ti': 47.94794198, '49Ti': 48.94786568, '50Ti': 49.94478689, '50V': 49.94715601, 'V': 50.94395704, '50Cr': 49.94604183, 'Cr': 51.94050623, '53Cr': 52.94064815, '54Cr': 53.93887916, 'Mn': 54.93804391, '54Fe': 53.93960899, 'Fe': 55.93493633, '57Fe': 56.93539284, '58Fe': 57.93327443, 'Co': 58.93319429, 'Ni': 57.93534241, '60Ni': 59.93078588, '61Ni': 60.93105557, '62Ni': 61.92834537, '64Ni': 63.92796682, 'Cu': 62.92959772, '65Cu': 64.9277897, 'Zn': 63.92914201, '66Zn': 65.92603381, '67Zn': 66.92712775, '68Zn': 67.92484455, '70Zn': 69.9253192, 'Ga': 68.9255735, '71Ga': 70.92470258, '70Ge': 69.92424875, '72Ge': 71.922075826, '73Ge': 72.923458956, 'Ge': 73.921177761, '76Ge': 75.921402726, 'As': 74.92159457, '74Se': 73.922475934, '76Se': 75.919213704, '77Se': 76.919914154, '78Se': 77.91730928, 'Se': 79.9165218, '82Se': 81.9166995, 'Br': 78.9183376, '81Br': 80.9162897, '78Kr': 77.92036494, '80Kr': 79.91637808, '82Kr': 81.91348273, '83Kr': 82.91412716, 'Kr': 83.9114977282, '86Kr': 85.9106106269, 'Rb': 84.9117897379, '87Rb': 86.909180531, '84Sr': 83.9134191, '86Sr': 85.9092606, '87Sr': 86.9088775, 'Sr': 87.9056125, 'Y': 88.9058403, 'Zr': 89.9046977, '91Zr': 90.9056396, '92Zr': 91.9050347, '94Zr': 93.9063108, '96Zr': 95.9082714, 'Nb': 92.906373, '92Mo': 91.90680796, '94Mo': 93.9050849, '95Mo': 94.90583877, '96Mo': 95.90467612, '97Mo': 96.90601812, 'Mo': 97.90540482, '100Mo': 99.9074718, 'Tc': 98.9062508, '96Ru': 95.90759025, '98Ru': 97.9052868, '99Ru': 98.9059341, '100Ru': 99.9042143, '101Ru': 100.9055769, 'Ru': 101.9043441, '104Ru': 103.9054275, 'Rh': 102.905498, '102Pd': 101.9056022, '104Pd': 103.9040305, '105Pd': 104.9050796, 'Pd': 105.9034804, '108Pd': 107.9038916, '110Pd': 109.9051722, 'Ag': 106.9050916, '109Ag': 108.9047553, '106Cd': 105.9064599, '108Cd': 107.9041834, '110Cd': 109.90300661, '111Cd': 110.90418287, 'Cd': 111.90276287, '113Cd': 112.90440813, '114Cd': 113.90336509, '116Cd': 115.90476315, '113In': 112.90406184, 'In': 114.903878776, '112Sn': 111.90482387, '114Sn': 113.9027827, '115Sn': 114.903344699, '116Sn': 115.9017428, '117Sn': 116.90295398, '118Sn': 117.90160657, '119Sn': 118.90331117, 'Sn': 119.90220163, '122Sn': 121.9034438, '124Sn': 123.9052766, 'Sb': 120.903812, '123Sb': 122.9042132, '120Te': 119.9040593, '122Te': 121.9030435, '123Te': 122.9042698, '124Te': 123.9028171, '125Te': 124.9044299, '126Te': 125.9033109, '128Te': 127.90446128, 'Te': 129.906222748, 'I': 126.9044719, '124Xe': 123.905892, '126Xe': 125.9042983, '128Xe': 127.903531, '129Xe': 128.9047808611, '130Xe': 129.903509349, '131Xe': 130.90508406, 'Xe': 131.9041550856, '134Xe': 133.90539466, '136Xe': 135.907214484, 'Cs': 132.905451961, '130Ba': 129.9063207, '132Ba': 131.9050611, '134Ba': 133.90450818, '135Ba': 134.90568838, '136Ba': 135.90457573, '137Ba': 136.90582714, 'Ba': 137.905247, '138La': 137.9071149, 'La': 138.9063563, '136Ce': 135.90712921, '138Ce': 137.905991, 'Ce': 139.9054431, '142Ce': 141.9092504, 'Pr': 140.9076576, 'Nd': 141.907729, '143Nd': 142.90982, '144Nd': 143.910093, '145Nd': 144.9125793, '146Nd': 145.9131226, '148Nd': 147.9168993, '150Nd': 149.9209022, '145Pm': 144.9127559, '147Pm': 146.915145, '144Sm': 143.9120065, '147Sm': 146.9149044, '148Sm': 147.9148292, '149Sm': 148.9171921, '150Sm': 149.9172829, 'Sm': 151.9197397, '154Sm': 153.9222169, '151Eu': 150.9198578, 'Eu': 152.921238, '152Gd': 151.9197995, '154Gd': 153.9208741, '155Gd': 154.9226305, '156Gd': 155.9221312, '157Gd': 156.9239686, 'Gd': 157.9241123, '160Gd': 159.9270624, 'Tb': 158.9253547, '156Dy': 155.9242847, '158Dy': 157.9244159, '160Dy': 159.9252046, '161Dy': 160.9269405, '162Dy': 161.9268056, '163Dy': 162.9287383, 'Dy': 163.9291819, 'Ho': 164.9303288, '162Er': 161.9287884, '164Er': 163.9292088, 'Er': 165.9302995, '167Er': 166.9320546, '168Er': 167.9323767, '170Er': 169.9354702, 'Tm': 168.9342179, '168Yb': 167.9338896, '170Yb': 169.9347664, '171Yb': 170.9363302, '172Yb': 171.9363859, '173Yb': 172.9382151, 'Yb': 173.9388664, '176Yb': 175.9425764, 'Lu': 174.9407752, '176Lu': 175.9426897, '174Hf': 173.9400461, '176Hf': 175.9414076, '177Hf': 176.9432277, '178Hf': 177.9437058, '179Hf': 178.9458232, 'Hf': 179.946557, '180Ta': 179.9474648, 'Ta': 180.9479958, '180W': 179.9467108, '182W': 181.94820394, '183W': 182.95022275, 'W': 183.95093092, '186W': 185.9543628, '185Re': 184.9529545, 'Re': 186.9557501, '184Os': 183.9524885, '186Os': 185.953835, '187Os': 186.9557474, '188Os': 187.9558352, '189Os': 188.9581442, '190Os': 189.9584437, '192Os': 191.961477, '191Ir': 190.9605893, 'Ir': 192.9629216, '190Pt': 189.9599297, '192Pt': 191.9610387, '194Pt': 193.9626809, 'Pt': 194.9647917, '196Pt': 195.96495209, '198Pt': 197.9678949, 'Au': 196.96656879, '196Hg': 195.9658326, '198Hg': 197.9667686, '199Hg': 198.96828064, '200Hg': 199.96832659, '201Hg': 200.97030284, 'Hg': 201.9706434, '204Hg': 203.97349398, '203Tl': 202.9723446, 'Tl': 204.9744278, '204Pb': 203.973044, '206Pb': 205.9744657, '207Pb': 206.9758973, 'Pb': 207.9766525, 'Bi': 208.9803991, '209Po': 208.9824308, '210Po': 209.9828741, '210At': 209.9871479, '211At': 210.9874966, '211Rn': 210.9906011, '220Rn': 220.0113941, '222Rn': 222.0175782, '223Fr': 223.019736, '223Ra': 223.0185023, '224Ra': 224.020212, '226Ra': 226.0254103, '228Ra': 228.0310707, '227Ac': 227.0277523, '230Th': 230.0331341, 'Th': 232.0380558, 'Pa': 231.0358842, '233U': 233.0396355, '234U': 234.0409523, '235U': 235.0439301, '236U': 236.0455682, 'U': 238.0507884, '236Np': 236.04657, '237Np': 237.0481736, '238Pu': 238.0495601, '239Pu': 239.0521636, '240Pu': 240.0538138, '241Pu': 241.0568517, '242Pu': 242.0587428, '244Pu': 244.0642053, '241Am': 241.0568293, '243Am': 243.0613813, '243Cm': 243.0613893, '244Cm': 244.0627528, '245Cm': 245.0654915, '246Cm': 246.0672238, '247Cm': 247.0703541, '248Cm': 248.0723499, '247Bk': 247.0703073, '249Bk': 249.0749877, '249Cf': 249.0748539, '250Cf': 250.0764062, '251Cf': 251.0795886, '252Cf': 252.0816272, '252Es': 252.08298, '257Fm': 257.0951061, '258Md': 258.0984315, '260Md': 260.10365, '259No': 259.10103, '262Lr': 262.10961, '267Rf': 267.12179, '268Db': 268.12567, '271Sg': 271.13393, '272Bh': 272.13826, '270Hs': 270.13429, '276Mt': 276.15159, '281Ds': 281.16451, '280Rg': 280.16514, '285Cn': 285.17712, '284Nh': 284.17873, '289Fl': 289.19042, '288Mc': 288.19274, '293Lv': 293.20449, '292Ts': 292.20746, '294Og': 294.21392}"}, {"fullname": "corems.encapsulation.constant.Atoms.atoms_order", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.atoms_order", "kind": "variable", "doc": "

    \n", "default_value": "['C', 'H', 'O', 'N', 'P', 'S', 'F', 'Cl', 'Br', 'I', 'At', 'Li', 'Na', 'K', 'Rb', 'Cs', 'Fr', 'He', 'Ne', 'Ar', 'Kr', 'Xe', 'Rn', 'Be', 'B', 'Mg', 'Al', 'Si', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'Ba', 'La', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'Ra', 'Ac', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'D', '6Li', '10B', '13C', '15N', '17O', '18O', '22Ne', '25Mg', '26Mg', '29Si', '30Si', '33S', '34S', '36S', '37Cl', '40Ca', '41K', '44Ca', '46Ti', '47Ti', '49Ti', '50Cr', '50Ti', '50V', '53Cr', '54Cr', '54Fe', '57Fe', '58Fe', '60Ni', '61Ni', '62Ni', '65Cu', '66Zn', '67Zn', '68Zn', '70Ge', '71Ga', '72Ge', '73Ge', '76Ge', '76Se', '77Se', '78Se', '81Br', '80Kr', '82Kr', '82Se', '83Kr', '85Rb', '86Kr', '86Sr', '87Rb', '87Sr', '88Sr', '91Zr', '92Mo', '92Zr', '94Mo', '94Zr', '95Mo', '96Mo', '96Ru', '96Zr', '97Mo', '98Ru', '99Ru', '100Mo', '100Ru', '101Ru', '102Pd', '104Pd', '104Ru', '105Pd', '106Cd', '106Pd', '108Cd', '108Pd', '109Ag', '110Cd', '110Pd', '111Cd', '112Cd', '112Sn', '113Cd', '113In', '114Cd', '114Sn', '115In', '115Sn', '116Cd', '116Sn', '117Sn', '118Sn', '119Sn', '120Sn', '120Te', '121Sb', '122Sn', '122Te', '123Sb', '123Te', '124Sn', '124Te', '124Xe', '125Te', '126Te', '126Xe', '128Te', '128Xe', '129Xe', '130Ba', '130Te', '130Xe', '131Xe', '132Ba', '132Xe', '134Ba', '134Xe', '135Ba', '136Ba', '136Xe', '137Ba', '138Ba', '174Hf', '176Hf', '177Hf', '178Hf', '179Hf', '180Hf', '180W', '182W', '183W', '184Os', '184W', '185Re', '186Os', '186W', '187Os', '187Re', '188Os', '189Os', '190Os', '190Pt', '191Ir', '192Ir', '192Os', '192Pt', '194Pt', '195Pt', '196Hg', '196Pt', '198Hg', '198Pt', '199Hg', '200Hg', '201Hg', '202Hg', '203Tl', '204Hg', '204Pb', '205Tl', '206Pb', '207Pb', '208Pb']"}, {"fullname": "corems.encapsulation.constant.Atoms.atoms_covalence", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.atoms_covalence", "kind": "variable", "doc": "

    \n", "default_value": "{'C': 4, '13C': 4, 'N': 3, 'O': 2, 'S': 2, 'H': 1, 'F': (1, 0), 'Cl': (1, 0), 'Br': (1, 0), 'I': (1, 0), 'At': 1, 'Li': (1, 0), 'Na': (1, 0), 'K': (1, 0), 'Rb': 1, 'Cs': 1, 'Fr': 1, 'B': (4, 3, 2, 1), 'In': (3, 2, 1), 'Al': (3, 1, 2), 'P': (3, 5, 4, 2, 1), 'Ga': (3, 1, 2), 'Mg': (2, 1), 'Be': (2, 1), 'Ca': (2, 1), 'Sr': (2, 1), 'Ba': 2, 'Ra': 2, 'V': (5, 4, 3, 2, 1), 'Fe': (3, 2, 4, 5, 6), 'Si': (4, 3, 2), 'Sc': (3, 2, 1), 'Ti': (4, 3, 2, 1), 'Cr': (1, 2, 3, 4, 5, 6), 'Mn': (1, 2, 3, 4, 5, 6, 7), 'Co': (1, 2, 3, 4, 5), 'Ni': (1, 2, 3, 4), 'Cu': (2, 1, 3, 4), 'Zn': (2, 1), 'Ge': (4, 3, 2, 1), 'As': (5, 3, 2, 1), 'Se': (6, 4, 2, 1), 'Y': (3, 2, 1), 'Zr': (4, 3, 2, 1), 'Nb': (5, 4, 3, 2, 1), 'Mo': (6, 5, 4, 3, 2, 1), 'Tc': (7, 6, 5, 4, 3, 2, 1), 'Ru': (8, 7, 6, 5, 4, 3, 2, 1), 'Rh': (6, 5, 4, 3, 2, 1), 'Pd': (4, 2, 1), 'Ag': (0, 1, 2, 3, 4), 'Cd': (2, 1), 'Sn': (4, 2), 'Sb': (5, 3), 'Te': (6, 5, 4, 2), 'La': (3, 2), 'Hf': (4, 3, 2), 'Ta': (5, 4, 3, 2), 'W': (6, 5, 4, 3, 2, 1), 'Re': (4, 7, 6, 5, 3, 2, 1), 'Os': (4, 8, 7, 6, 5, 3, 2, 1), 'Ir': (4, 8, 6, 5, 3, 2, 1), 'Pt': (4, 6, 5, 3, 2, 1), 'Au': (3, 5, 2, 1), 'Hg': (1, 2, 4), 'Tl': (3, 1), 'Pb': (4, 2), 'Bi': (3, 1, 5), 'Po': (2, 4, 6), 'Ac': (3, 2)}"}, {"fullname": "corems.encapsulation.constant.Atoms.isotopic_abundance", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.isotopic_abundance", "kind": "variable", "doc": "

    \n", "default_value": "{'H': 0.999885, 'D': 0.000115, 'T': 0, '3He': 1.34e-06, 'He': 0.99999866, '6Li': 0.0759, 'Li': 0.9241, 'Be': 1.0, '10B': 0.199, 'B': 0.801, 'C': 0.9893, '13C': 0.0107, '14C': 0, 'N': 0.99636, '15N': 0.00364, 'O': 0.99757, '17O': 0.00038, '18O': 0.00205, 'F': 1.0, 'Ne': 0.9048, '21Ne': 0.0027, '22Ne': 0.0925, 'Na': 1.0, 'Mg': 0.7899, '25Mg': 0.1, '26Mg': 0.1101, 'Al': 1.0, 'Si': 0.92223, '29Si': 0.04685, '30Si': 0.03092, 'P': 1.0, 'S': 0.9499, '33S': 0.0075, '34S': 0.0425, '36S': 0.0001, 'Cl': 0.7576, '37Cl': 0.2424, '36Ar': 0.003336, '38Ar': 0.000629, 'Ar': 0.996035, 'K': 0.932581, '40K': 0.000117, '41K': 0.067302, 'Ca': 0.96941, '42Ca': 0.00647, '43Ca': 0.00135, '44Ca': 0.02086, '46Ca': 4e-05, '48Ca': 0.001872, 'Sc': 1.0, '46Ti': 0.0825, '47Ti': 0.0744, 'Ti': 0.7372, '49Ti': 0.0541, '50Ti': 0.0518, '50V': 0.0025, 'V': 0.9975, '50Cr': 0.04345, 'Cr': 0.83789, '53Cr': 0.09501, '54Cr': 0.02365, 'Mn': 1.0, '54Fe': 0.05845, 'Fe': 0.91754, '57Fe': 0.02119, '58Fe': 0.00282, 'Co': 1.0, 'Ni': 0.68077, '60Ni': 0.26223, '61Ni': 0.011399, '62Ni': 0.036346, '64Ni': 0.009255, 'Cu': 0.6915, '65Cu': 0.3085, 'Zn': 0.4917, '66Zn': 0.2773, '67Zn': 0.0404, '68Zn': 0.1845, '70Zn': 0.0061, 'Ga': 0.60108, '71Ga': 0.39892, '70Ge': 0.2057, '72Ge': 0.2745, '73Ge': 0.0775, 'Ge': 0.365, '76Ge': 0.0773, 'As': 1.0, '74Se': 0.0089, '76Se': 0.0937, '77Se': 0.0763, '78Se': 0.2377, 'Se': 0.4961, '82Se': 0.0873, 'Br': 0.5069, '81Br': 0.4931, '78Kr': 0.00355, '80Kr': 0.02286, '82Kr': 0.11593, '83Kr': 0.115, 'Kr': 0.56987, '86Kr': 0.17279, 'Rb': 0.7217, '87Rb': 0.2783, '84Sr': 0.0056, '86Sr': 0.0986, '87Sr': 0.07, 'Sr': 0.8258, 'Y': 1.0, 'Zr': 0.5145, '91Zr': 0.1122, '92Zr': 0.1715, '94Zr': 0.1738, '96Zr': 0.028, 'Nb': 1.0, '92Mo': 0.1453, '94Mo': 0.0915, '95Mo': 0.1584, '96Mo': 0.1667, '97Mo': 0.096, 'Mo': 0.2439, '100Mo': 0.0982, '99Tc': 0, '96Ru': 0.0554, '98Ru': 0.0187, '99Ru': 0.1276, '100Ru': 0.126, '101Ru': 0.1706, 'Ru': 0.3155, '104Ru': 0.1862, 'Rh': 1.0, '102Pd': 0.0102, '104Pd': 0.1114, '105Pd': 0.2233, 'Pd': 0.2733, '108Pd': 0.2646, '110Pd': 0.1172, 'Ag': 0.51839, '109Ag': 0.48161, '106Cd': 0.0125, '108Cd': 0.0089, '110Cd': 0.1249, '111Cd': 0.128, 'Cd': 0.2413, '113Cd': 0.1222, '114Cd': 0.2873, '116Cd': 0.0749, '113In': 0.0429, 'In': 0.9571, '112Sn': 0.0097, '114Sn': 0.0066, '115Sn': 0.0034, '116Sn': 0.1454, '117Sn': 0.0768, '118Sn': 0.2422, '119Sn': 0.0859, 'Sn': 0.3258, '122Sn': 0.0463, '124Sn': 0.0579, 'Sb': 0.5721, '123Sb': 0.4279, '120Te': 0.0009, '122Te': 0.0255, '123Te': 0.0089, '124Te': 0.0474, '125Te': 0.0707, '126Te': 0.1884, '128Te': 0.3174, 'Te': 0.3408, 'I': 1.0, '124Xe': 0.000952, '126Xe': 0.00089, '128Xe': 0.019102, '129Xe': 0.264006, '130Xe': 0.04071, '131Xe': 0.212324, 'Xe': 0.269086, '134Xe': 0.104357, '136Xe': 0.088573, 'Cs': 1.0, '130Ba': 0.00106, '132Ba': 0.00101, '134Ba': 0.02417, '135Ba': 0.06592, '136Ba': 0.07854, '137Ba': 0.11232, 'Ba': 0.71698, '138La': 0.0008881, 'La': 0.9991119, '136Ce': 0.00185, '138Ce': 0.00251, 'Ce': 0.8845, '142Ce': 0.11114, 'Pr': 1.0, 'Nd': 0.27152, '143Nd': 0.12174, '144Nd': 0.23798, '145Nd': 0.08293, '146Nd': 0.17189, '148Nd': 0.05756, '150Nd': 0.05638, '145Pm': 0, '147Pm': 0, '144Sm': 0.0307, '147Sm': 0.1499, '148Sm': 0.1124, '149Sm': 0.1382, '150Sm': 0.0738, 'Sm': 0.2675, '154Sm': 0.2275, '151Eu': 0.4781, 'Eu': 0.5219, '152Gd': 0.002, '154Gd': 0.0218, '155Gd': 0.148, '156Gd': 0.2047, '157Gd': 0.1565, 'Gd': 0.2484, '160Gd': 0.2186, 'Tb': 1.0, '156Dy': 0.00056, '158Dy': 0.00095, '160Dy': 0.02329, '161Dy': 0.18889, '162Dy': 0.25475, '163Dy': 0.24896, 'Dy': 0.2826, 'Ho': 1.0, '162Er': 0.00139, '164Er': 0.01601, 'Er': 0.33503, '167Er': 0.22869, '168Er': 0.26978, '170Er': 0.1491, 'Tm': 1.0, '168Yb': 0.00123, '170Yb': 0.02982, '171Yb': 0.1409, '172Yb': 0.2168, '173Yb': 0.16103, 'Yb': 0.32026, '176Yb': 0.12996, 'Lu': 0.97401, '176Lu': 0.02599, '174Hf': 0.0016, '176Hf': 0.0526, '177Hf': 0.186, '178Hf': 0.2728, '179Hf': 0.1362, 'Hf': 0.3508, '180Ta': 0.0001201, 'Ta': 0.9998799, '180W': 0.0012, '182W': 0.265, '183W': 0.1431, 'W': 0.3064, '186W': 0.2843, '185Re': 0.374, 'Re': 0.626, '184Os': 0.0002, '186Os': 0.0159, '187Os': 0.0196, '188Os': 0.1324, '189Os': 0.1615, '190Os': 0.2626, 'Os': 0.4078, '191Ir': 0.373, 'Ir': 0.627, '190Pt': 0.00012, '192Pt': 0.00782, '194Pt': 0.3286, 'Pt': 0.3378, '196Pt': 0.2521, '198Pt': 0.07356, 'Au': 1.0, '196Hg': 0.0015, '198Hg': 0.0997, '199Hg': 0.16872, '200Hg': 0.231, '201Hg': 0.1318, 'Hg': 0.2986, '204Hg': 0.0687, '203Tl': 0.2952, 'Tl': 0.7048, '204Pb': 0.014, '206Pb': 0.241, '207Pb': 0.221, 'Pb': 0.524, 'Bi': 1.0, '209Po': 0, '210Po': 0, '210At': 0, '211At': 0, '211Rn': 0, '220Rn': 0, '222Rn': 0, '223Fr': 0, '223Ra': 0, '224Ra': 0, '226Ra': 0, '228Ra': 0, '227Ac': 0, '230Th': 0, 'Th': 1.0, 'Pa': 1.0, '233U': 0, '234U': 5.4e-05, '235U': 0.007204, '236U': 0, 'U': 0.992742, '236Np': 0, '237Np': 0, '238Pu': 0, '239Pu': 0, '240Pu': 0, '241Pu': 0, '242Pu': 0, '244Pu': 0, '241Am': 0, '243Am': 0, '243Cm': 0, '244Cm': 0, '245Cm': 0, '246Cm': 0, '247Cm': 0, '248Cm': 0, '247Bk': 0, '249Bk': 0, '249Cf': 0, '250Cf': 0, '251Cf': 0, '252Cf': 0, '252Es': 0, '257Fm': 0, '258Md': 0, '260Md': 0, '259No': 0, '262Lr': 0, '267Rf': 0, '268Db': 0, '271Sg': 0, '272Bh': 0, '270Hs': 0, '276Mt': 0, '281Ds': 0, '280Rg': 0, '285Cn': 0, '284Nh': 0, '289Fl': 0, '288Mc': 0, '293Lv': 0, '292Ts': 0, '294Og': 0}"}, {"fullname": "corems.encapsulation.constant.Atoms.isotopes", "modulename": "corems.encapsulation.constant", "qualname": "Atoms.isotopes", "kind": "variable", "doc": "

    \n", "default_value": "{'H': ['Hydrogen', ['D', 'T']], 'He': ['Helium', ['3He']], 'Li': ['Lithium', ['6Li']], 'Be': ['Beryllium', [None]], 'B': ['Boron', ['10B']], 'C': ['Carbon', ['13C']], 'N': ['Nitrogen', ['15N']], 'O': ['Oxygen', ['18O', '17O']], 'F': ['Fluorine', [None]], 'Ne': ['Neon', ['22Ne', '21Ne']], 'Na': ['Sodium', [None]], 'Mg': ['Magnesium', ['26Mg', '25Mg']], 'Al': ['Aluminum', [None]], 'Si': ['Silicon', ['29Si', '30Si']], 'P': ['Phosphorus', [None]], 'S': ['Sulfur', ['34S', '33S', '36S']], 'Cl': ['Chlorine', ['37Cl']], 'Ar': ['Argon', ['36Ar', '38Ar']], 'K': ['Potassium', ['41K', '40K']], 'Ca': ['Calcium', ['44Ca', '48Ca', '43Ca', '42Ca', '46Ca']], 'Sc': ['Scandium', [None]], 'Ti': ['Titanium', ['46Ti', '47Ti', '49Ti', '50Ti']], 'V': ['Vanadium', ['50V']], 'Cr': ['Chromium', ['53Cr', '50Cr', '54Cr']], 'Mn': ['Manganese', [None]], 'Fe': ['Iron', ['54Fe', '57Fe', '58Fe']], 'Co': ['Cobalt', [None]], 'Ni': ['Nickel', ['60Ni', '62Ni', '61Ni', '64Ni']], 'Cu': ['Copper', ['65Cu']], 'Zn': ['Zinc', ['66Zn', '68Zn', '67Zn', '70Zn']], 'Ga': ['Gallium', ['71Ga']], 'Ge': ['Germanium', ['72Ge', '70Ge', '73Ge', '76Ge']], 'As': ['Arsenic', [None]], 'Se': ['Selenium', ['78Se', '76Se', '82Se', '77Se', '74Se']], 'Br': ['Bromine', ['81Br']], 'Kr': ['Krypton', ['86Kr', '82Kr', '83Kr', '80Kr']], 'Rb': ['Rubidium', ['87Rb']], 'Sr': ['Strontium', ['86Sr', '87Sr', '84Sr']], 'Y': ['Yttrium', [None]], 'Zr': ['Zirconium', ['94Zr', '92Zr', '91Zr', '96Zr']], 'Nb': ['Niobium', [None]], 'Mo': ['Molybdenum', ['96Mo', '95Mo', '92Mo', '100Mo', '97Mo', '94Mo']], 'Tc': ['Technetium', [None]], 'Ru': ['Ruthenium', ['104Ru', '101Ru', '99Ru', '100Ru', '96Ru', '98Ru']], 'Rh': ['Rhodium', [None]], 'Pd': ['Palladium', ['108Pd', '105Pd', '110Pd', '104Pd', '102Pd']], 'Ag': ['Silver', ['109Ag']], 'Cd': ['Cadmium', ['114Cd', '111Cd', '110Cd', '113Cd', '116Cd', '106Cd', '108Cd']], 'In': ['Indium', ['113In']], 'Sn': ['Tin', ['118Sn', '116Sn', '119Sn', '117Sn', '124Sn', '122Sn', '112Sn', '114Sn', '115Sn']], 'Sb': ['Antimony', ['123Sb']], 'Te': ['Tellurium', ['128Te', '126Te', '125Te', '124Te', '122Te', '123Te', '120Te']], 'I': ['Iodine', [None]], 'Xe': ['Xenon', ['129Xe', '131Xe', '134Xe', '136Xe', '130Xe', '128Xe']], 'Cs': ['Cesium', [None]], 'Ba': ['Barium', ['137Ba', '136Ba', '135Ba', '134Ba']], 'La': ['Lanthanum', ['138La']], 'Hf': ['Hafnium', ['178Hf', '177Hf', '179Hf', '176Hf']], 'Ta': ['Tantalum', ['180Ta']], 'W': ['Tungsten', ['186W', '182W', '183W']], 'Re': ['Rhenium', ['185Re']], 'Os': ['Osmium', ['190Os', '189Os', '188Os', '187Os', '186Os']], 'Ir': ['Iridium', ['191Ir']], 'Pt': ['Platinum', ['194Pt', '196Pt', '198Pt', '192Pt']], 'Au': ['Gold', [None]], 'Hg': ['Mercury', ['200Hg', '199Hg', '201Hg', '198Hg', '204Hg']], 'Tl': ['Thallium', ['203Tl']], 'Pb': ['Lead', ['206Pb', '207Pb', '204Pb']], 'Bi': ['Bismuth', [None]], 'Po': ['Polonium', [None]], 'At': ['Astatine', [None]], 'Rn': ['Radon', [None]], 'Fr': ['Francium', [None]], 'Ra': ['Radium', [None]], 'Ac': ['Actinium', [None]], 'Rf': ['Rutherfordium', [None]], 'Db': ['Dubnium', [None]], 'Sg': ['Seaborgium', [None]], 'Bh': ['Bohrium', [None]], 'Hs': ['Hassium', [None]], 'Mt': ['Meitnerium', [None]], 'Ds': ['Darmstadtium', [None]], 'Rg': ['Roentgenium', [None]], 'Cn': ['Copernicium', [None]], 'Nh': ['Nihonium', [None]], 'Fl': ['Flerovium', [None]], 'Mc': ['Moscovium', [None]], 'Lv': ['Livermorium', [None]], 'Ts': ['Tennessine', [None]], 'Og': ['Oganesson', [None]], 'Ce': ['Cerium', ['142Ce', '138Ce136Ce']], 'Pr': ['Praseodymium', [None]], 'Nd': ['Neodymium', [None]], 'Pm': ['Promethium', [None]], 'Sm': ['Samarium', [None]], 'Eu': ['Europium', [None]], 'Gd': ['Gadolinium', [None]], 'Tb': ['Terbium', [None]], 'Dy': ['Dysprosium', [None]], 'Ho': ['Holmium', [None]], 'Er': ['Erbium', [None]], 'Tm': ['Thulium', [None]], 'Yb': ['Ytterbium', [None]], 'Lu': ['Lutetium', ['176Lu']], 'Th': ['Thorium', [None]], 'Pa': ['Protactinium', [None]], 'U': ['Uranium', ['235U', '234U']], 'Np': ['Neptunium', [None]], 'Pu': ['Plutonium', [None]], 'Am': ['Americium', [None]], 'Cm': ['Curium', [None]], 'Bk': ['Berkelium', [None]], 'Cf': ['Californium', [None]], 'Es': ['Einsteinium', [None]], 'Fm': ['Fermium', [None]], 'Md': ['Mendelevium', [None]], 'No': ['Nobelium', [None]], 'Lr': ['Lawrencium', [None]]}"}, {"fullname": "corems.encapsulation.factory", "modulename": "corems.encapsulation.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.parameters", "modulename": "corems.encapsulation.factory.parameters", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.parameters.reset_ms_parameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "reset_ms_parameters", "kind": "function", "doc": "

    Reset the MSParameter class to the default values

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.reset_gcms_parameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "reset_gcms_parameters", "kind": "function", "doc": "

    Reset the GCMSParameters class to the default values

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.reset_lcms_parameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "reset_lcms_parameters", "kind": "function", "doc": "

    Reset the LCMSParameters class to the default values

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters", "kind": "class", "doc": "

    MSParameters class is used to store the parameters used for the processing of the mass spectrum

    \n\n

    Each attibute is a class that contains the parameters for the processing of the mass spectrum, see the corems.encapsulation.factory.processingSetting module for more details.

    \n\n
    Parameters
    \n\n
      \n
    • use_defaults (bool, optional):\nif True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • molecular_search (MolecularFormulaSearchSettings):\nMolecularFormulaSearchSettings object
    • \n
    • transient (TransientSetting):\nTransientSetting object
    • \n
    • mass_spectrum (MassSpectrumSetting):\nMassSpectrumSetting object
    • \n
    • ms_peak (MassSpecPeakSetting):\nMassSpecPeakSetting object
    • \n
    • data_input (DataInputSetting):\nDataInputSetting object
    • \n
    \n\n
    Notes
    \n\n

    One can use the use_defaults parameter to reset the parameters to the default values.\nAlternatively, to use the current values - modify the class's contents before instantiating the class.

    \n"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.__init__", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.__init__", "kind": "function", "doc": "

    \n", "signature": "(use_defaults=False)"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.molecular_search", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.molecular_search", "kind": "variable", "doc": "

    \n", "default_value": "MolecularFormulaSearchSettings(use_isotopologue_filter=False, isotopologue_filter_threshold=33.0, isotopologue_filter_atoms=('Cl', 'Br'), use_runtime_kendrick_filter=False, use_min_peaks_filter=True, min_peaks_per_class=15, url_database='postgresql+psycopg2://coremsappdb:coremsapppnnl@molformdb:5432/coremsapp', db_jobs=3, db_chunk_size=300, ion_charge=-1, min_hc_filter=0.3, max_hc_filter=3.0, min_oc_filter=0.0, max_oc_filter=1.2, min_op_filter=2.0, use_pah_line_rule=False, min_dbe=0.0, max_dbe=40.0, mz_error_score_weight=0.6, isotopologue_score_weight=0.4, adduct_atoms_neg=('Cl', 'Br'), adduct_atoms_pos=('Na', 'K'), score_methods=('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'), score_method='prob_score', output_min_score=0.1, output_score_method='All Candidates', isRadical=False, isProtonated=True, isAdduct=False, usedAtoms={'C': (1, 100), 'H': (1, 200)}, ion_types_excluded=[], ionization_type='ESI', min_ppm_error=-10.0, max_ppm_error=10.0, min_abun_error=-100.0, max_abun_error=100.0, mz_error_range=1.5, error_method='None', mz_error_average=0.0, used_atom_valences={'C': 4, '13C': 4, 'N': 3, 'O': 2, 'S': 2, 'H': 1, 'F': 1, 'Cl': 1, 'Br': 1, 'I': 1, 'At': 1, 'Li': 1, 'Na': 1, 'K': 1, 'Rb': 1, 'Cs': 1, 'Fr': 1, 'B': 4, 'In': 3, 'Al': 3, 'P': 3, 'Ga': 3, 'Mg': 2, 'Be': 2, 'Ca': 2, 'Sr': 2, 'Ba': 2, 'Ra': 2, 'V': 5, 'Fe': 3, 'Si': 4, 'Sc': 3, 'Ti': 4, 'Cr': 1, 'Mn': 1, 'Co': 1, 'Ni': 1, 'Cu': 2, 'Zn': 2, 'Ge': 4, 'As': 5, 'Se': 6, 'Y': 3, 'Zr': 4, 'Nb': 5, 'Mo': 6, 'Tc': 7, 'Ru': 8, 'Rh': 6, 'Pd': 4, 'Ag': 0, 'Cd': 2, 'Sn': 4, 'Sb': 5, 'Te': 6, 'La': 3, 'Hf': 4, 'Ta': 5, 'W': 6, 'Re': 4, 'Os': 4, 'Ir': 4, 'Pt': 4, 'Au': 3, 'Hg': 1, 'Tl': 3, 'Pb': 4, 'Bi': 3, 'Po': 2, 'Ac': 3})"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.transient", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.transient", "kind": "variable", "doc": "

    \n", "default_value": "TransientSetting(implemented_apodization_function=('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser'), apodization_method='Hanning', number_of_truncations=0, number_of_zero_fills=1, next_power_of_two=False, kaiser_beta=8.6)"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.mass_spectrum", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.mass_spectrum", "kind": "variable", "doc": "

    \n", "default_value": "MassSpectrumSetting(noise_threshold_method='log', noise_threshold_methods_implemented=('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'), noise_threshold_min_std=6, noise_threshold_min_s2n=4.0, noise_threshold_min_relative_abundance=6.0, noise_threshold_absolute_abundance=1000000.0, noise_threshold_log_nsigma=6, noise_threshold_log_nsigma_corr_factor=0.463, noise_threshold_log_nsigma_bins=500, noise_min_mz=50.0, noise_max_mz=1200.0, min_picking_mz=50.0, max_picking_mz=1200.0, picking_point_extrapolate=3, calib_minimize_method='Powell', calib_pol_order=2, max_calib_ppm_error=1.0, min_calib_ppm_error=-1.0, calib_sn_threshold=2.0, calibration_ref_match_method='legacy', calibration_ref_match_method_implemented=('legacy', 'merged'), calibration_ref_match_tolerance=0.003, calibration_ref_match_std_raw_error_limit=1.5, do_calibration=True, verbose_processing=True)"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.ms_peak", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.ms_peak", "kind": "variable", "doc": "

    \n", "default_value": "MassSpecPeakSetting(kendrick_base={'C': 1, 'H': 2}, kendrick_rounding_method='floor', implemented_kendrick_rounding_methods=('floor', 'ceil', 'round'), peak_derivative_threshold=0.0, peak_min_prominence_percent=0.1, min_peak_datapoints=5.0, peak_max_prominence_percent=0.1, peak_height_max_percent=10.0, legacy_resolving_power=True, legacy_centroid_polyfit=False)"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.data_input", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.data_input", "kind": "variable", "doc": "

    \n", "default_value": "DataInputSetting(header_translate={'m/z': 'm/z', 'mOz': 'm/z', 'Mass': 'm/z', 'Resolving Power': 'Resolving Power', 'Res.': 'Resolving Power', 'resolution': 'Resolving Power', 'Intensity': 'Peak Height', 'Peak Height': 'Peak Height', 'I': 'Peak Height', 'Abundance': 'Peak Height', 'abs_abu': 'Peak Height', 'Signal/Noise': 'S/N', 'S/N': 'S/N', 'sn': 'S/N'})"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.copy", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.copy", "kind": "function", "doc": "

    Create a copy of the MSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.MSParameters.print", "modulename": "corems.encapsulation.factory.parameters", "qualname": "MSParameters.print", "kind": "function", "doc": "

    Print the MSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters", "kind": "class", "doc": "

    GCMSParameters class is used to store the parameters used for the processing of the gas chromatograph mass spectrum

    \n\n

    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.

    \n\n
    Parameters
    \n\n
      \n
    • use_defaults (bool, optional):\nif True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • molecular_search (MolecularFormulaSearchSettings):\nMolecularFormulaSearchSettings object
    • \n
    • gc_ms (GasChromatographSetting):\nGasChromatographSetting object
    • \n
    \n\n
    Notes
    \n\n

    One can use the use_defaults parameter to reset the parameters to the default values.\nAlternatively, to use the current values - modify the class's contents before instantiating the class.

    \n"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.__init__", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.__init__", "kind": "function", "doc": "

    \n", "signature": "(use_defaults=False)"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.molecular_search", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.molecular_search", "kind": "variable", "doc": "

    \n", "default_value": "CompoundSearchSettings(url_database='sqlite:///db/pnnl_lowres_gcms_compounds.sqlite', ri_search_range=35.0, rt_search_range=1.0, correlation_threshold=0.5, score_threshold=0.0, ri_spacing=200.0, ri_std=3.0, ri_calibration_compound_names=['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'], exploratory_mode=False, score_methods=('highest_sim_score', 'highest_ss'), output_score_method='All')"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.gc_ms", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.gc_ms", "kind": "variable", "doc": "

    \n", "default_value": "GasChromatographSetting(use_deconvolution=False, implemented_smooth_method=('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), smooth_window=5, smooth_method='savgol', savgol_pol_order=2, peak_derivative_threshold=0.0005, peak_height_max_percent=10.0, peak_max_prominence_percent=1.0, min_peak_datapoints=5.0, max_peak_width=0.1, noise_threshold_method='manual_relative_abundance', noise_threshold_methods_implemented=('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), std_noise_threshold=3, peak_height_min_percent=0.1, peak_min_prominence_percent=0.1, eic_signal_threshold=0.01, max_rt_distance=0.025, verbose_processing=True)"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.copy", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.copy", "kind": "function", "doc": "

    Create a copy of the GCMSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.GCMSParameters.print", "modulename": "corems.encapsulation.factory.parameters", "qualname": "GCMSParameters.print", "kind": "function", "doc": "

    Print the GCMSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters", "kind": "class", "doc": "

    LCMSParameters class is used to store the parameters used for the processing of the liquid chromatograph mass spectrum

    \n\n

    Each attibute is a class that contains the parameters for the processing of the data, see the corems.encapsulation.factory.processingSetting module for more details.

    \n\n
    Parameters
    \n\n
      \n
    • use_defaults (bool, optional):\nif True, the class will be instantiated with the default values, otherwise the current values will be used. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • lc_ms (LiquidChromatographSetting):\nLiquidChromatographSetting object
    • \n
    • mass_spectrum (dict):\ndictionary with the mass spectrum parameters for ms1 and ms2, each value is a MSParameters object
    • \n
    \n\n
    Notes
    \n\n

    One can use the use_defaults parameter to reset the parameters to the default values.\nAlternatively, to use the current values - modify the class's contents before instantiating the class.

    \n"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.__init__", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.__init__", "kind": "function", "doc": "

    \n", "signature": "(use_defaults=False)"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.lc_ms", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.lc_ms", "kind": "variable", "doc": "

    \n", "default_value": "LiquidChromatographSetting(scans=(-1, -1), eic_tolerance_ppm=5.0, smooth_window=5, smooth_method='savgol', implemented_smooth_method=('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'), savgol_pol_order=2, peak_height_max_percent=10.0, peak_max_prominence_percent=1.0, peak_derivative_threshold=0.0005, min_peak_datapoints=5.0, noise_threshold_method='manual_relative_abundance', noise_threshold_methods_implemented=('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'), peak_height_min_percent=0.1, eic_signal_threshold=0.01, peak_picking_method='persistent homology', implemented_peak_picking_methods=('persistent homology',), mass_feature_cluster_mz_tolerance_rel=5e-06, mass_feature_cluster_rt_tolerance=0.3, ms1_scans_to_average=1, ms1_deconvolution_corr_min=0.8, ms2_dda_rt_tolerance=0.15, ms2_dda_mz_tolerance=0.05, ms2_min_fe_score=0.2, search_as_lipids=False, include_fragment_types=False, export_profile_spectra=False, export_eics=True, export_unprocessed_ms1=False, verbose_processing=True)"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.mass_spectrum", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.mass_spectrum", "kind": "variable", "doc": "

    \n", "default_value": "{'ms1': <corems.encapsulation.factory.parameters.MSParameters object>, 'ms2': <corems.encapsulation.factory.parameters.MSParameters object>}"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.copy", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.copy", "kind": "function", "doc": "

    Create a copy of the LCMSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.LCMSParameters.print", "modulename": "corems.encapsulation.factory.parameters", "qualname": "LCMSParameters.print", "kind": "function", "doc": "

    Print the LCMSParameters object

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.parameters.default_parameters", "modulename": "corems.encapsulation.factory.parameters", "qualname": "default_parameters", "kind": "function", "doc": "

    Generate parameters dictionary with the default parameters for data processing\n To gather parameters from instrument data during the data parsing step, a parameters dictionary with the default parameters needs to be generated.\n This dictionary acts as a placeholder and is later used as an argument for all the class constructor methods during instantiation.\n The data gathered from the instrument is added to the class properties.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\npath to the file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • parameters (dict):\ndictionary with the default parameters for data processing
    • \n
    \n", "signature": "(file_location):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting", "modulename": "corems.encapsulation.factory.processingSetting", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting", "kind": "class", "doc": "

    Transient processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • implemented_apodization_function (tuple):\nAvailable apodization functions
    • \n
    • apodization_method (str):\nApodization function to use. Hanning is a good default for Fourier transform magnitude mode. For absorption mode processing, Half-Sine or Half-Kaiser may be more appropriate.
    • \n
    • number_of_truncations (int):\nHow many times to truncate the transient prior to Fourier transform
    • \n
    • number_of_zero_fills (int):\nHow many times to zero fille the transient prior to Fourier transform.
    • \n
    • next_power_of_two (bool):\nIf True, zero fill to the next power of two after the new length of len(transient)+(number_of_zero_fills*len(transient)).
    • \n
    • kaiser_beta (float):\nBeta parameter for Kaiser or Half-Kaiser apodisation function. 0 is rectangular, 5 is similar to Hamming,\n6 is similar to hanning, and 8.6 is similar to Blackman (from numpy docs)
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\timplemented_apodization_function: tuple = ('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser'),\tapodization_method: str = 'Hanning',\tnumber_of_truncations: int = 0,\tnumber_of_zero_fills: int = 1,\tnext_power_of_two: bool = False,\tkaiser_beta: float = 8.6)"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.implemented_apodization_function", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.implemented_apodization_function", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('Hamming', 'Hanning', 'Blackman', 'Full-Sine', 'Half-Sine', 'Kaiser', 'Half-Kaiser')"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.apodization_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.apodization_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'Hanning'"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.number_of_truncations", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.number_of_truncations", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "0"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.number_of_zero_fills", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.number_of_zero_fills", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.next_power_of_two", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.next_power_of_two", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.TransientSetting.kaiser_beta", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "TransientSetting.kaiser_beta", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "8.6"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting", "kind": "class", "doc": "

    Data input settings class

    \n\n
    Attributes
    \n\n
      \n
    • header_translate (dict):\nDictionary with the header labels to be translated to the corems labels. For example, {'m/z':'m/z', 'Resolving Power':'Resolving Power', 'Abundance':'Abundance' , 'S/N':'S/N'}
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(header_translate: dict = <factory>)"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.header_translate", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.header_translate", "kind": "variable", "doc": "

    \n", "annotation": ": dict"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.add_mz_label", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.add_mz_label", "kind": "function", "doc": "

    Add a label to the header_translate dictionary to be translated to the corems label for mz.

    \n", "signature": "(self, label):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.add_peak_height_label", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.add_peak_height_label", "kind": "function", "doc": "

    Add a label to the header_translate dictionary to be translated to the corems label for peak height.

    \n", "signature": "(self, label):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.add_sn_label", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.add_sn_label", "kind": "function", "doc": "

    Add a label to the header_translate dictionary to be translated to the corems label for signal to noise.

    \n", "signature": "(self, label):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting.DataInputSetting.add_resolving_power_label", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "DataInputSetting.add_resolving_power_label", "kind": "function", "doc": "

    Add a label to the header_translate dictionary to be translated to the corems label for resolving power.

    \n", "signature": "(self, label):", "funcdef": "def"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting", "kind": "class", "doc": "

    Liquid chromatograph processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • scans (list or tuple, optional):\nList of select scan to average or a tuple containing the range to average. Default is (0, 1).
    • \n
    • eic_tolerance_ppm (float, optional):\nMass tolerance in ppm for extracted ion chromatogram peak detection. Default is 5.
    • \n
    • correct_eic_baseline (bool, optional):\nIf True, correct the baseline of the extracted ion chromatogram. Default is True.
    • \n
    • smooth_window (int, optional):\nWindow size for smoothing the ion chromatogram (extracted or total). Default is 5.
    • \n
    • smooth_method (str, optional):\nSmoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
    • \n
    • implemented_smooth_method (tuple, optional):\nSmoothing methods that can be implemented. Values are ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
    • \n
    • savgol_pol_order (int, optional):\nPolynomial order for Savitzky-Golay smoothing. Default is 2.
    • \n
    • peak_height_max_percent (float, optional):\n1-100 % used for baseline detection use 0.1 for second_derivative and 10 for other methods. Default is 10.
    • \n
    • peak_max_prominence_percent (float, optional):\n1-100 % used for baseline detection. Default is 1.
    • \n
    • peak_derivative_threshold (float, optional):\nThreshold for defining derivative crossing. Default is 0.0005.
    • \n
    • min_peak_datapoints (float, optional):\nminimum data point to define a chromatografic peak. Default is 5.
    • \n
    • noise_threshold_method (str, optional):\nMethod for detecting noise threshold. Default is 'manual_relative_abundance'.
    • \n
    • noise_threshold_methods_implemented (tuple, optional):\nMethods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
    • \n
    • peak_height_min_percent (float, optional):\n0-100 % used for peak detection. Default is 0.1.
    • \n
    • eic_signal_threshold (float, optional):\n0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
    • \n
    • eic_buffer_time (float, optional):\nBuffer time to add to the start and end of the plot of the extracted ion chromatogram, in minutes. Default is 1.5.
    • \n
    • ph_smooth_it (int, optional):\nNumber of iterations to use for smoothing prior to finding mass features.\nCalled within the PHCalculations.find_mass_features_ph() method. Default is 7.
    • \n
    • ph_smooth_radius_mz (int, optional):\nRadius in m/z steps (not daltons) for smoothing prior to finding mass features.\nCalled within the PHCalculations.find_mass_features_ph() method. Default is 0.
    • \n
    • ph_smooth_radius_scan (int, optional):\nRadius in scan steps for smoothing prior to finding mass features.\nCalled within the PHCalculations.find_mass_features_ph() method. Default is 3.
    • \n
    • ph_inten_min_rel (int, optional):\nRelative minimum intensity to use for finding mass features.\nCalculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).\nCalled within the PH_Calculations.find_mass_features() method. Default is 0.001.
    • \n
    • ph_persis_min_rel (int, optional):\nRelative minimum persistence for retaining mass features.\nCalculated as a fraction of the maximum intensity of the unprocessed profile data (mz, scan).\nShould be greater to or equal to ph_inten_min_rel.\nCalled within the PH_Calculations.find_mass_features() method. Default is 0.001.
    • \n
    • mass_feature_cluster_mz_tolerance_rel (float, optional):\nRelative m/z tolerance to use for clustering mass features.\nCalled with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.\nDefault is 5E-6 (5 ppm).
    • \n
    • mass_feature_cluster_rt_tolerance (float, optional):\nRetention time tolerance to use for clustering mass features, in minutes.\nCalled with the PHCalculations.cluster_mass_features() and the LCCalculations.deconvolute_ms1_mass_features() methods.\nDefault is 0.2.
    • \n
    • ms1_scans_to_average (int, optional):\nNumber of MS1 scans to average for mass-feature associated m/zs.\nCalled within the LCMSBase.add_associated_ms1() method. Default is 1.
    • \n
    • ms1_deconvolution_corr_min (float, optional):\nMinimum correlation to use for deconvoluting MS1 mass features.\nCalled within the LCCalculations.deconvolute_ms1_mass_features() method.\nDefault is 0.8.
    • \n
    • ms2_dda_rt_tolerance (float, optional):\nRetention time tolerance to use for associating MS2 spectra to mass features, in minutes. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.15.
    • \n
    • ms2_dda_mz_tolerance (float, optional):\nMass tolerance to use for associating MS2 spectra to mass features. Called within the LCMSBase.add_associated_ms2_dda() method. Default is 0.05.
    • \n
    • ms2_min_fe_score (float, optional):\nMinimum flash entropy for retaining MS2 annotations. Called within the LCMSSpectralSearch.fe_search() method. Default is 0.2.
    • \n
    • search_as_lipids (bool, optional):\nIf True, prepare the database for lipid searching. Called within the LCMSSpectralSearch.fe_prep_search_db() method. Default is False.
    • \n
    • include_fragment_types (bool, optional):\nIf True, include fragment types in the database. Called within the LCMSSpectralSearch.fe_search() and related methods. Default is False.
    • \n
    • verbose_processing (bool, optional):\nIf True, print verbose processing information. Default is True.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tscans: list | tuple = (-1, -1),\teic_tolerance_ppm: float = 5,\tsmooth_window: int = 5,\tsmooth_method: str = 'savgol',\timplemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'),\tsavgol_pol_order: int = 2,\tpeak_height_max_percent: float = 10,\tpeak_max_prominence_percent: float = 1,\tpeak_derivative_threshold: float = 0.0005,\tmin_peak_datapoints: float = 5,\tnoise_threshold_method: str = 'manual_relative_abundance',\tnoise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'),\tpeak_height_min_percent: float = 0.1,\teic_signal_threshold: float = 0.01,\tpeak_picking_method: str = 'persistent homology',\timplemented_peak_picking_methods: tuple = ('persistent homology',),\tmass_feature_cluster_mz_tolerance_rel: float = 5e-06,\tmass_feature_cluster_rt_tolerance: float = 0.3,\tms1_scans_to_average: int = 1,\tms1_deconvolution_corr_min: float = 0.8,\tms2_dda_rt_tolerance: float = 0.15,\tms2_dda_mz_tolerance: float = 0.05,\tms2_min_fe_score: float = 0.2,\tsearch_as_lipids: bool = False,\tinclude_fragment_types: bool = False,\texport_profile_spectra: bool = False,\texport_eics: bool = True,\texport_unprocessed_ms1: bool = False,\tverbose_processing: bool = True)"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.scans", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.scans", "kind": "variable", "doc": "

    \n", "annotation": ": list | tuple", "default_value": "(-1, -1)"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.eic_tolerance_ppm", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.eic_tolerance_ppm", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.correct_eic_baseline", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.correct_eic_baseline", "kind": "variable", "doc": "

    \n", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.smooth_window", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.smooth_window", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.smooth_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.smooth_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'savgol'"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.implemented_smooth_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.implemented_smooth_method", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.savgol_pol_order", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.savgol_pol_order", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "2"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_height_max_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_height_max_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "10"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_max_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_max_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_derivative_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_derivative_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0005"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.min_peak_datapoints", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.min_peak_datapoints", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.noise_threshold_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.noise_threshold_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'manual_relative_abundance'"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.noise_threshold_methods_implemented", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.noise_threshold_methods_implemented", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_height_min_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_height_min_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.eic_signal_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.eic_signal_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.01"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.eic_buffer_time", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.eic_buffer_time", "kind": "variable", "doc": "

    \n", "default_value": "1.5"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.peak_picking_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.peak_picking_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'persistent homology'"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.implemented_peak_picking_methods", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.implemented_peak_picking_methods", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('persistent homology',)"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_smooth_it", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_smooth_it", "kind": "variable", "doc": "

    \n", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_smooth_radius_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_smooth_radius_mz", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_smooth_radius_scan", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_smooth_radius_scan", "kind": "variable", "doc": "

    \n", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_inten_min_rel", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_inten_min_rel", "kind": "variable", "doc": "

    \n", "default_value": "0.001"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ph_persis_min_rel", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ph_persis_min_rel", "kind": "variable", "doc": "

    \n", "default_value": "0.001"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.mass_feature_cluster_mz_tolerance_rel", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.mass_feature_cluster_mz_tolerance_rel", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5e-06"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.mass_feature_cluster_rt_tolerance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.mass_feature_cluster_rt_tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.3"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms1_scans_to_average", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms1_scans_to_average", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms1_deconvolution_corr_min", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms1_deconvolution_corr_min", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.8"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms2_dda_rt_tolerance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms2_dda_rt_tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.15"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms2_dda_mz_tolerance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms2_dda_mz_tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.05"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.ms2_min_fe_score", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.ms2_min_fe_score", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.2"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.search_as_lipids", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.search_as_lipids", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.include_fragment_types", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.include_fragment_types", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.export_profile_spectra", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.export_profile_spectra", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.export_eics", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.export_eics", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.export_unprocessed_ms1", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.export_unprocessed_ms1", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.LiquidChromatographSetting.verbose_processing", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "LiquidChromatographSetting.verbose_processing", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting", "kind": "class", "doc": "

    Mass spectrum processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • noise_threshold_method (str, optional):\nMethod for detecting noise threshold. Default is 'log'.
    • \n
    • noise_threshold_methods_implemented (tuple, optional):\nMethods for detected noise threshold that can be implemented. Default is ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log').
    • \n
    • noise_threshold_min_std (int, optional):\nMinumum value for noise thresholding when using 'minima' noise threshold method. Default is 6.
    • \n
    • noise_threshold_min_s2n (float, optional):\nMinimum value for noise thresholding when using 'signal_noise' noise threshold method. Default is 4.
    • \n
    • noise_threshold_min_relative_abundance (float, optional):\nMinimum value for noise thresholding when using 'relative_abundance' noise threshold method. Note that this is a percentage value. Default is 6 (6%).
    • \n
    • noise_threshold_absolute_abundance (float, optional):\nMinimum value for noise thresholding when using 'absolute_abundance' noise threshold method. Default is 1_000_000.
    • \n
    • noise_threshold_log_nsigma (int, optional):\nNumber of standard deviations to use when using 'log' noise threshold method. Default is 6.
    • \n
    • noise_threshold_log_nsigma_corr_factor (float, optional):\nCorrection factor for log noise threshold method. Default is 0.463.
    • \n
    • noise_threshold_log_nsigma_bins (int, optional):\nNumber of bins to use for histogram when using 'log' noise threshold method. Default is 500.
    • \n
    • noise_min_mz (float, optional):\nMinimum m/z to use for noise thresholding. Default is 50.0.
    • \n
    • noise_max_mz (float, optional):\nMaximum m/z to use for noise thresholding. Default is 1200.0.
    • \n
    • min_picking_mz (float, optional):\nMinimum m/z to use for peak picking. Default is 50.0.
    • \n
    • max_picking_mz (float, optional):\nMaximum m/z to use for peak picking. Default is 1200.0.
    • \n
    • picking_point_extrapolate (int, optional):\nHow many data points (in each direction) to extrapolate the mz axis and 0 pad the abundance axis. Default is 3.\nRecommend 3 for reduced profile data or if peak picking faults
    • \n
    • calib_minimize_method (str, optional):\nMinimization method to use for calibration. Default is 'Powell'.
    • \n
    • calib_pol_order (int, optional):\nPolynomial order to use for calibration. Default is 2.
    • \n
    • max_calib_ppm_error (float, optional):\nMaximum ppm error to use for calibration. Default is 1.0.
    • \n
    • min_calib_ppm_error (float, optional):\nMinimum ppm error to use for calibration. Default is -1.0.
    • \n
    • calib_sn_threshold (float, optional):\nSignal to noise threshold to use for calibration. Default is 2.0.
    • \n
    • calibration_ref_match_method (string, optional):\nMethod for matching reference masses with measured masses for recalibration. Default is 'legacy'.
    • \n
    • calibration_ref_match_tolerance (float, optional):\nIf using the new method for calibration reference mass matching, this tolerance is the initial matching tolerance. Default is 0.003
    • \n
    • do_calibration (bool, optional):\nIf True, perform calibration. Default is True.
    • \n
    • verbose_processing (bool, optional):\nIf True, print verbose processing information. Default is True.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tnoise_threshold_method: str = 'log',\tnoise_threshold_methods_implemented: tuple = ('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log'),\tnoise_threshold_min_std: int = 6,\tnoise_threshold_min_s2n: float = 4,\tnoise_threshold_min_relative_abundance: float = 6,\tnoise_threshold_absolute_abundance: float = 1000000,\tnoise_threshold_log_nsigma: int = 6,\tnoise_threshold_log_nsigma_corr_factor: float = 0.463,\tnoise_threshold_log_nsigma_bins: int = 500,\tnoise_min_mz: float = 50.0,\tnoise_max_mz: float = 1200.0,\tmin_picking_mz: float = 50.0,\tmax_picking_mz: float = 1200.0,\tpicking_point_extrapolate: int = 3,\tcalib_minimize_method: str = 'Powell',\tcalib_pol_order: int = 2,\tmax_calib_ppm_error: float = 1.0,\tmin_calib_ppm_error: float = -1.0,\tcalib_sn_threshold: float = 2.0,\tcalibration_ref_match_method: str = 'legacy',\tcalibration_ref_match_method_implemented: tuple = ('legacy', 'merged'),\tcalibration_ref_match_tolerance: float = 0.003,\tcalibration_ref_match_std_raw_error_limit: float = 1.5,\tdo_calibration: bool = True,\tverbose_processing: bool = True)"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'log'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_methods_implemented", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_methods_implemented", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('minima', 'signal_noise', 'relative_abundance', 'absolute_abundance', 'log')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_min_std", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_min_std", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "6"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_min_s2n", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_min_s2n", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "4"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_min_relative_abundance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_min_relative_abundance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "6"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_absolute_abundance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_absolute_abundance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1000000"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_log_nsigma", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_log_nsigma", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "6"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_log_nsigma_corr_factor", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_log_nsigma_corr_factor", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.463"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_threshold_log_nsigma_bins", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_threshold_log_nsigma_bins", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "500"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_min_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_min_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "50.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.noise_max_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.noise_max_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1200.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.min_picking_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.min_picking_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "50.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.max_picking_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.max_picking_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1200.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.picking_point_extrapolate", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.picking_point_extrapolate", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calib_minimize_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calib_minimize_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'Powell'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calib_pol_order", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calib_pol_order", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "2"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.max_calib_ppm_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.max_calib_ppm_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.min_calib_ppm_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.min_calib_ppm_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "-1.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calib_sn_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calib_sn_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "2.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calibration_ref_match_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calibration_ref_match_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'legacy'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calibration_ref_match_method_implemented", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calibration_ref_match_method_implemented", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('legacy', 'merged')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calibration_ref_match_tolerance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calibration_ref_match_tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.003"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.calibration_ref_match_std_raw_error_limit", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.calibration_ref_match_std_raw_error_limit", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.5"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.do_calibration", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.do_calibration", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpectrumSetting.verbose_processing", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpectrumSetting.verbose_processing", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting", "kind": "class", "doc": "

    Mass spectrum peak processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • kendrick_base (Dict, optional):\nDictionary specifying the elements and their counts in the Kendrick base.\nDefaults to {'C': 1, 'H': 2}.
    • \n
    • kendrick_rounding_method (str, optional):\nMethod for calculating the nominal Kendrick mass. Valid values are 'floor', 'ceil', or 'round'.\nDefaults to 'floor'.
    • \n
    • implemented_kendrick_rounding_methods (tuple):\nTuple of valid rounding methods for calculating the nominal Kendrick mass.\nDefaults to ('floor', 'ceil', 'round').
    • \n
    • peak_derivative_threshold (float, optional):\nThreshold for defining derivative crossing. Should be a value between 0 and 1.\nDefaults to 0.0.
    • \n
    • peak_min_prominence_percent (float, optional):\nMinimum prominence percentage used for peak detection. Should be a value between 1 and 100.\nDefaults to 0.1.
    • \n
    • min_peak_datapoints (float, optional):\nMinimum number of data points used for peak detection. Should be a value between 0 and infinity.\nDefaults to 5.
    • \n
    • peak_max_prominence_percent (float, optional):\nMaximum prominence percentage used for baseline detection. Should be a value between 1 and 100.\nDefaults to 0.1.
    • \n
    • peak_height_max_percent (float, optional):\nMaximum height percentage used for baseline detection. Should be a value between 1 and 100.\nDefaults to 10.
    • \n
    • legacy_resolving_power (bool, optional):\nFlag indicating whether to use the legacy (CoreMS v1) resolving power calculation.\nDefaults to True.
    • \n
    • legacy_centroid_polyfit (bool, optional):\nUse legacy (numpy polyfit) to fit centroid\nDefault false.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tkendrick_base: Dict = <factory>,\tkendrick_rounding_method: str = 'floor',\timplemented_kendrick_rounding_methods: tuple = ('floor', 'ceil', 'round'),\tpeak_derivative_threshold: float = 0.0,\tpeak_min_prominence_percent: float = 0.1,\tmin_peak_datapoints: float = 5,\tpeak_max_prominence_percent: float = 0.1,\tpeak_height_max_percent: float = 10,\tlegacy_resolving_power: bool = True,\tlegacy_centroid_polyfit: bool = False)"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.kendrick_base", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.kendrick_base", "kind": "variable", "doc": "

    \n", "annotation": ": Dict"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.kendrick_rounding_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.kendrick_rounding_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'floor'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.implemented_kendrick_rounding_methods", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.implemented_kendrick_rounding_methods", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('floor', 'ceil', 'round')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.peak_derivative_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.peak_derivative_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.peak_min_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.peak_min_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.min_peak_datapoints", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.min_peak_datapoints", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.peak_max_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.peak_max_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.peak_height_max_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.peak_height_max_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "10"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.legacy_resolving_power", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.legacy_resolving_power", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MassSpecPeakSetting.legacy_centroid_polyfit", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MassSpecPeakSetting.legacy_centroid_polyfit", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting", "kind": "class", "doc": "

    Gas chromatograph processing settings class

    \n\n
    Attributes
    \n\n
      \n
    • use_deconvolution (bool, optional):\nIf True, use deconvolution. Default is False.
    • \n
    • implemented_smooth_method (tuple, optional):\nSmoothing methods that can be implemented. Default is ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar').
    • \n
    • smooth_window (int, optional):\nWindow size for smoothing the ion chromatogram. Default is 5.
    • \n
    • smooth_method (str, optional):\nSmoothing method to use. Default is 'savgol'. Other options are 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'.
    • \n
    • savgol_pol_order (int, optional):\nPolynomial order for Savitzky-Golay smoothing. Default is 2.
    • \n
    • peak_derivative_threshold (float, optional):\nThreshold for defining derivative crossing. Should be a value between 0 and 1.\nDefaults to 0.0005.
    • \n
    • peak_height_max_percent (float, optional):\nMaximum height percentage used for baseline detection. Should be a value between 1 and 100.\nDefaults to 10.
    • \n
    • peak_max_prominence_percent (float, optional):\nMaximum prominence percentage used for baseline detection. Should be a value between 1 and 100.\nDefaults to 1.
    • \n
    • min_peak_datapoints (float, optional):\nMinimum number of data points used for peak detection. Should be a value between 0 and infinity.\nDefaults to 5.
    • \n
    • max_peak_width (float, optional):\nMaximum peak width used for peak detection. Should be a value between 0 and infinity.\nDefaults to 0.1.
    • \n
    • noise_threshold_method (str, optional):\nMethod for detecting noise threshold. Default is 'manual_relative_abundance'.
    • \n
    • noise_threshold_methods_implemented (tuple, optional):\nMethods for detected noise threshold that can be implemented. Default is ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative').
    • \n
    • std_noise_threshold (int, optional):\nDefault is 3.
    • \n
    • peak_height_min_percent (float, optional):\n0-100 % used for peak detection. Default is 0.1.
    • \n
    • peak_min_prominence_percent (float, optional):\n0-100 % used for peak detection. Default is 0.1.
    • \n
    • eic_signal_threshold (float, optional):\n0-100 % used for extracted ion chromatogram peak detection. Default is 0.01.
    • \n
    • max_rt_distance (float, optional):\nMaximum distance allowance for hierarchical cluster, in minutes. Default is 0.025.
    • \n
    • verbose_processing (bool, optional):\nIf True, print verbose processing information. Default is True.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tuse_deconvolution: bool = False,\timplemented_smooth_method: tuple = ('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar'),\tsmooth_window: int = 5,\tsmooth_method: str = 'savgol',\tsavgol_pol_order: int = 2,\tpeak_derivative_threshold: float = 0.0005,\tpeak_height_max_percent: float = 10,\tpeak_max_prominence_percent: float = 1,\tmin_peak_datapoints: float = 5,\tmax_peak_width: float = 0.1,\tnoise_threshold_method: str = 'manual_relative_abundance',\tnoise_threshold_methods_implemented: tuple = ('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative'),\tstd_noise_threshold: int = 3,\tpeak_height_min_percent: float = 0.1,\tpeak_min_prominence_percent: float = 0.1,\teic_signal_threshold: float = 0.01,\tmax_rt_distance: float = 0.025,\tverbose_processing: bool = True)"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.use_deconvolution", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.use_deconvolution", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.implemented_smooth_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.implemented_smooth_method", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('savgol', 'hanning', 'blackman', 'bartlett', 'flat', 'boxcar')"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.smooth_window", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.smooth_window", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.smooth_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.smooth_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'savgol'"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.savgol_pol_order", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.savgol_pol_order", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "2"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_derivative_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_derivative_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0005"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_height_max_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_height_max_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "10"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_max_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_max_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.min_peak_datapoints", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.min_peak_datapoints", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "5"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.max_peak_width", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.max_peak_width", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.noise_threshold_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.noise_threshold_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'manual_relative_abundance'"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.noise_threshold_methods_implemented", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.noise_threshold_methods_implemented", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('auto_relative_abundance', 'manual_relative_abundance', 'second_derivative')"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.std_noise_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.std_noise_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_height_min_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_height_min_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.peak_min_prominence_percent", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.peak_min_prominence_percent", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.eic_signal_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.eic_signal_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.01"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.max_rt_distance", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.max_rt_distance", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.025"}, {"fullname": "corems.encapsulation.factory.processingSetting.GasChromatographSetting.verbose_processing", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "GasChromatographSetting.verbose_processing", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings", "kind": "class", "doc": "

    Settings for compound search

    \n\n
    Attributes
    \n\n
      \n
    • url_database (str, optional):\nURL for the database. Default is 'sqlite:///db/pnnl_lowres_gcms_compounds.sqlite'.
    • \n
    • ri_search_range (float, optional):\nRetention index search range. Default is 35.
    • \n
    • rt_search_range (float, optional):\nRetention time search range, in minutes. Default is 1.0.
    • \n
    • correlation_threshold (float, optional):\nThreshold for correlation for spectral similarity. Default is 0.5.
    • \n
    • score_threshold (float, optional):\nThreshold for compsite score. Default is 0.0.
    • \n
    • ri_spacing (float, optional):\nRetention index spacing. Default is 200.
    • \n
    • ri_std (float, optional):\nRetention index standard deviation. Default is 3.
    • \n
    • ri_calibration_compound_names (list, optional):\nList of compound names to use for retention index calibration. Default is ['Methyl Caprylate', 'Methyl Caprate', 'Methyl Pelargonate', 'Methyl Laurate', 'Methyl Myristate', 'Methyl Palmitate', 'Methyl Stearate', 'Methyl Eicosanoate', 'Methyl Docosanoate', 'Methyl Linocerate', 'Methyl Hexacosanoate', 'Methyl Octacosanoate', 'Methyl Triacontanoate'].
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.__init__", "kind": "function", "doc": "

    \n", "signature": "(\turl_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres',\tri_search_range: float = 35,\trt_search_range: float = 1.0,\tcorrelation_threshold: float = 0.5,\tscore_threshold: float = 0.0,\tri_spacing: float = 200,\tri_std: float = 3,\tri_calibration_compound_names: List = <factory>,\texploratory_mode: bool = False,\tscore_methods: tuple = ('highest_sim_score', 'highest_ss'),\toutput_score_method: str = 'All')"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.url_database", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.url_database", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/lowres'"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.ri_search_range", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.ri_search_range", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "35"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.rt_search_range", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.rt_search_range", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.correlation_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.correlation_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.5"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.score_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.score_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.ri_spacing", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.ri_spacing", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "200"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.ri_std", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.ri_std", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.ri_calibration_compound_names", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.ri_calibration_compound_names", "kind": "variable", "doc": "

    \n", "annotation": ": List"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.exploratory_mode", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.exploratory_mode", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.score_methods", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.score_methods", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('highest_sim_score', 'highest_ss')"}, {"fullname": "corems.encapsulation.factory.processingSetting.CompoundSearchSettings.output_score_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "CompoundSearchSettings.output_score_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'All'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings", "kind": "class", "doc": "

    Settings for molecular searching

    \n\n

    These are used to generate the database entries, do not change.

    \n\n
    Attributes
    \n\n
      \n
    • usedAtoms (dict, optional):\nDictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
    • \n
    • min_mz (float, optional):\nMinimum m/z to use for searching. Default is 50.0.
    • \n
    • max_mz (float, optional):\nMaximum m/z to use for searching. Default is 1200.0.
    • \n
    • min_dbe (float, optional):\nMinimum double bond equivalent to use for searching. Default is 0.
    • \n
    • max_dbe (float, optional):\nMaximum double bond equivalent to use for searching. Default is 50.
    • \n
    • use_pah_line_rule (bool, optional):\nIf True, use the PAH line rule. Default is False.
    • \n
    • isRadical (bool, optional):\nIf True, search for radical ions. Default is True.
    • \n
    • isProtonated (bool, optional):\nIf True, search for protonated ions. Default is True.
    • \n
    • url_database (str, optional):\nURL for the database. Default is None.
    • \n
    • db_jobs (int, optional):\nNumber of jobs to use for database queries. Default is 1.
    • \n
    • used_atom_valences (dict, optional):\nDictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.usedAtoms", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.usedAtoms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.min_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.min_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.max_mz", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.max_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.min_dbe", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.min_dbe", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.max_dbe", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.max_dbe", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.use_pah_line_rule", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.use_pah_line_rule", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.isRadical", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.isRadical", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.isProtonated", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.isProtonated", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.url_database", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.url_database", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.db_jobs", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.db_jobs", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularLookupDictSettings.used_atom_valences", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularLookupDictSettings.used_atom_valences", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings", "kind": "class", "doc": "

    Settings for molecular searching

    \n\n
    Attributes
    \n\n
      \n
    • use_isotopologue_filter (bool, optional):\nIf True, use isotopologue filter. Default is False.
    • \n
    • isotopologue_filter_threshold (float, optional):\nThreshold for isotopologue filter. Default is 33.
    • \n
    • isotopologue_filter_atoms (tuple, optional):\nTuple of atoms to use for isotopologue filter. Default is ('Cl', 'Br').
    • \n
    • use_runtime_kendrick_filter (bool, optional):\nIf True, use runtime Kendrick filter. Default is False.
    • \n
    • use_min_peaks_filter (bool, optional):\nIf True, use minimum peaks filter. Default is True.
    • \n
    • min_peaks_per_class (int, optional):\nMinimum number of peaks per class. Default is 15.
    • \n
    • url_database (str, optional):\nURL for the database. Default is 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'.
    • \n
    • db_jobs (int, optional):\nNumber of jobs to use for database queries. Default is 3.
    • \n
    • db_chunk_size (int, optional):\nChunk size to use for database queries. Default is 300.
    • \n
    • ion_charge (int, optional):\nIon charge. Default is -1.
    • \n
    • min_hc_filter (float, optional):\nMinimum hydrogen to carbon ratio. Default is 0.3.
    • \n
    • max_hc_filter (float, optional):\nMaximum hydrogen to carbon ratio. Default is 3.
    • \n
    • min_oc_filter (float, optional):\nMinimum oxygen to carbon ratio. Default is 0.0.
    • \n
    • max_oc_filter (float, optional):\nMaximum oxygen to carbon ratio. Default is 1.2.
    • \n
    • min_op_filter (float, optional):\nMinimum oxygen to phosphorous ratio. Default is 2.
    • \n
    • use_pah_line_rule (bool, optional):\nIf True, use the PAH line rule. Default is False.
    • \n
    • min_dbe (float, optional):\nMinimum double bond equivalent to use for searching. Default is 0.
    • \n
    • max_dbe (float, optional):\nMaximum double bond equivalent to use for searching. Default is 40.
    • \n
    • mz_error_score_weight (float, optional):\nWeight for m/z error score to contribute to composite score. Default is 0.6.
    • \n
    • isotopologue_score_weight (float, optional):\nWeight for isotopologue score to contribute to composite score. Default is 0.4.
    • \n
    • adduct_atoms_neg (tuple, optional):\nTuple of atoms to use in negative polarity. Default is ('Cl', 'Br').
    • \n
    • adduct_atoms_pos (tuple, optional):\nTuple of atoms to use in positive polarity. Default is ('Na', 'K').
    • \n
    • score_methods (tuple, optional):\nTuple of score method that can be implemented.\nDefault is ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error').
    • \n
    • score_method (str, optional):\nScore method to use. Default is 'prob_score'. Options are 'S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'.
    • \n
    • output_min_score (float, optional):\nMinimum score for output. Default is 0.1.
    • \n
    • output_score_method (str, optional):\nScore method to use for output. Default is 'All Candidates'.
    • \n
    • isRadical (bool, optional):\nIf True, search for radical ions. Default is False.
    • \n
    • isProtonated (bool, optional):\nIf True, search for protonated ions. Default is True.
    • \n
    • isAdduct (bool, optional):\nIf True, search for adduct ions. Default is False.
    • \n
    • usedAtoms (dict, optional):\nDictionary of atoms and ranges. Default is {'C': (1, 90), 'H': (4, 200), 'O': (0, 12), 'N': (0, 0), 'S': (0, 0), 'P': (0, 0), 'Cl': (0, 0)}.
    • \n
    • ion_types_excluded (list, optional):\nList of ion types to exclude from molecular id search, commonly ['[M+CH3COO]-]'] or ['[M+COOH]-'] depending on mobile phase content. Default is [].
    • \n
    • ionization_type (str, optional):\nIonization type. Default is 'ESI'.
    • \n
    • min_ppm_error (float, optional):\nMinimum ppm error. Default is -10.0.
    • \n
    • max_ppm_error (float, optional):\nMaximum ppm error. Default is 10.0.
    • \n
    • min_abun_error (float, optional):\nMinimum abundance error for isotolopologue search. Default is -100.0.
    • \n
    • max_abun_error (float, optional):\nMaximum abundance error for isotolopologue search. Default is 100.0.
    • \n
    • mz_error_range (float, optional):\nm/z error range. Default is 1.5.
    • \n
    • error_method (str, optional):\nError method. Default is 'None'. Options are 'distance', 'lowest', 'symmetrical','average' 'None'.
    • \n
    • mz_error_average (float, optional):\nm/z error average. Default is 0.0.
    • \n
    • used_atom_valences (dict, optional):\nDictionary of atoms and valences. Default is {'C': 4, '13C': 4, 'H': 1, 'O': 2, '18O': 2, 'N': 3, 'S': 2, '34S': 2, 'P': 3, 'Cl': 1, '37Cl': 1, 'Br': 1, 'Na': 1, 'F': 1, 'K': 0}.
    • \n
    \n"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.__init__", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tuse_isotopologue_filter: bool = False,\tisotopologue_filter_threshold: float = 33,\tisotopologue_filter_atoms: tuple = ('Cl', 'Br'),\tuse_runtime_kendrick_filter: bool = False,\tuse_min_peaks_filter: bool = True,\tmin_peaks_per_class: int = 15,\turl_database: str = 'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp',\tdb_jobs: int = 3,\tdb_chunk_size: int = 300,\tion_charge: int = -1,\tmin_hc_filter: float = 0.3,\tmax_hc_filter: float = 3,\tmin_oc_filter: float = 0.0,\tmax_oc_filter: float = 1.2,\tmin_op_filter: float = 2,\tuse_pah_line_rule: bool = False,\tmin_dbe: float = 0,\tmax_dbe: float = 40,\tmz_error_score_weight: float = 0.6,\tisotopologue_score_weight: float = 0.4,\tadduct_atoms_neg: tuple = ('Cl', 'Br'),\tadduct_atoms_pos: tuple = ('Na', 'K'),\tscore_methods: tuple = ('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error'),\tscore_method: str = 'prob_score',\toutput_min_score: float = 0.1,\toutput_score_method: str = 'All Candidates',\tisRadical: bool = False,\tisProtonated: bool = True,\tisAdduct: bool = False,\tusedAtoms: dict = <factory>,\tion_types_excluded: list = <factory>,\tionization_type: str = 'ESI',\tmin_ppm_error: float = -10.0,\tmax_ppm_error: float = 10.0,\tmin_abun_error: float = -100.0,\tmax_abun_error: float = 100.0,\tmz_error_range: float = 1.5,\terror_method: str = 'None',\tmz_error_average: float = 0.0,\tused_atom_valences: dict = <factory>)"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.use_isotopologue_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.use_isotopologue_filter", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isotopologue_filter_threshold", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isotopologue_filter_threshold", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "33"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isotopologue_filter_atoms", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isotopologue_filter_atoms", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('Cl', 'Br')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.use_runtime_kendrick_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.use_runtime_kendrick_filter", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.use_min_peaks_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.use_min_peaks_filter", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_peaks_per_class", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_peaks_per_class", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "15"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.url_database", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.url_database", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'postgresql+psycopg2://coremsappdb:coremsapppnnl@localhost:5432/coremsapp'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.db_jobs", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.db_jobs", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.db_chunk_size", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.db_chunk_size", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "300"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.ion_charge", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.ion_charge", "kind": "variable", "doc": "

    \n", "annotation": ": int", "default_value": "-1"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_hc_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_hc_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.3"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_hc_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_hc_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "3"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_oc_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_oc_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_oc_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_oc_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.2"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_op_filter", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_op_filter", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "2"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.use_pah_line_rule", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.use_pah_line_rule", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_dbe", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_dbe", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_dbe", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_dbe", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "40"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.mz_error_score_weight", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.mz_error_score_weight", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.6"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isotopologue_score_weight", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isotopologue_score_weight", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.4"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.adduct_atoms_neg", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.adduct_atoms_neg", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('Cl', 'Br')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.adduct_atoms_pos", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.adduct_atoms_pos", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('Na', 'K')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.score_methods", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.score_methods", "kind": "variable", "doc": "

    \n", "annotation": ": tuple", "default_value": "('S_P_lowest_error', 'N_S_P_lowest_error', 'lowest_error', 'prob_score', 'air_filter_error', 'water_filter_error', 'earth_filter_error')"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.score_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.score_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'prob_score'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.output_min_score", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.output_min_score", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.1"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.output_score_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.output_score_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'All Candidates'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isRadical", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isRadical", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isProtonated", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isProtonated", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "True"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.isAdduct", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.isAdduct", "kind": "variable", "doc": "

    \n", "annotation": ": bool", "default_value": "False"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.usedAtoms", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.usedAtoms", "kind": "variable", "doc": "

    \n", "annotation": ": dict"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.ion_types_excluded", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.ion_types_excluded", "kind": "variable", "doc": "

    \n", "annotation": ": list"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.ionization_type", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.ionization_type", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'ESI'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_ppm_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_ppm_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "-10.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_ppm_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_ppm_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "10.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.min_abun_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.min_abun_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "-100.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.max_abun_error", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.max_abun_error", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "100.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.mz_error_range", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.mz_error_range", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "1.5"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.error_method", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.error_method", "kind": "variable", "doc": "

    \n", "annotation": ": str", "default_value": "'None'"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.mz_error_average", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.mz_error_average", "kind": "variable", "doc": "

    \n", "annotation": ": float", "default_value": "0.0"}, {"fullname": "corems.encapsulation.factory.processingSetting.MolecularFormulaSearchSettings.used_atom_valences", "modulename": "corems.encapsulation.factory.processingSetting", "qualname": "MolecularFormulaSearchSettings.used_atom_valences", "kind": "variable", "doc": "

    \n", "annotation": ": dict"}, {"fullname": "corems.encapsulation.input", "modulename": "corems.encapsulation.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.input.parameter_from_json", "modulename": "corems.encapsulation.input.parameter_from_json", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_toml_parameters_ms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_toml_parameters_ms", "kind": "function", "doc": "

    Load parameters from a toml file and set the parameters in the mass_spec_obj

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec_obj (MassSpectrum):\ncorems MassSpectrum object
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(mass_spec_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_parameters_ms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_parameters_ms", "kind": "function", "doc": "

    Load parameters from a json file and set the parameters in the mass_spec_obj

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec_obj (MassSpectrum):\ncorems MassSpectrum object
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(mass_spec_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_toml_parameters_gcms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_toml_parameters_gcms", "kind": "function", "doc": "

    Load parameters from a toml file and set the parameters in the GCMS object

    \n\n
    Parameters
    \n\n
      \n
    • gcms_obj (GCMSBase):\ncorems GCMSBase object
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(gcms_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_parameters_gcms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_parameters_gcms", "kind": "function", "doc": "

    Load parameters from a json file and set the parameters in the GCMS object

    \n\n
    Parameters
    \n\n
      \n
    • gcms_obj (GCMSBase):\ncorems GCMSBase object
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(gcms_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_json_parameters_lcms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_json_parameters_lcms", "kind": "function", "doc": "

    Load parameters from a json file and set the parameters in the LCMS object

    \n\n
    Parameters
    \n\n
      \n
    • lcms_obj (LCMSBase):\ncorems LCMSBase object
    • \n
    • parameters_path (str):\npath to the parameters file saved as a .json, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(lcms_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_toml_parameters_lcms", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_toml_parameters_lcms", "kind": "function", "doc": "

    Load parameters from a toml file and set the parameters in the LCMS object

    \n\n
    Parameters
    \n\n
      \n
    • lcms_obj (LCMSBase):\ncorems LCMSBase object
    • \n
    • parameters_path (str):\npath to the parameters file saved as a .toml, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n", "signature": "(lcms_obj, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_toml_parameters_class", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_toml_parameters_class", "kind": "function", "doc": "

    Load parameters from a toml file and set the parameters in the instance_parameters_class

    \n\n
    Parameters
    \n\n
      \n
    • parameter_label (str):\nlabel of the parameters in the toml file
    • \n
    • instance_parameters_class (object):\ninstance of the parameters class
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n\n
    Returns
    \n\n
      \n
    • object: instance of the parameters class
    • \n
    \n", "signature": "(parameter_label, instance_parameters_class, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.input.parameter_from_json.load_and_set_parameters_class", "modulename": "corems.encapsulation.input.parameter_from_json", "qualname": "load_and_set_parameters_class", "kind": "function", "doc": "

    Load parameters from a json file and set the parameters in the instance_parameters_class

    \n\n
    Parameters
    \n\n
      \n
    • parameter_label (str):\nlabel of the parameters in the json file
    • \n
    • instance_parameters_class (object):\ninstance of the parameters class
    • \n
    • parameters_path (str, optional):\npath to the parameters file, by default False
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileNotFoundError: if the file is not found
    • \n
    \n\n
    Returns
    \n\n
      \n
    • object: instance of the parameters class
    • \n
    \n", "signature": "(parameter_label, instance_parameters_class, parameters_path=False):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output", "modulename": "corems.encapsulation.output", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.output.parameter_to_dict", "modulename": "corems.encapsulation.output.parameter_to_dict", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_all_default_data", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_all_default_data", "kind": "function", "doc": "

    Return a dictionary with all default parameters for MS and GCMS

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_data_lcms", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_data_lcms", "kind": "function", "doc": "

    Return a dictionary with all parameters for LCMSBase object

    \n\n
    Parameters
    \n\n
      \n
    • lcms_obj (LCMSBase):\nLCMSBase object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: dictionary with all parameters for LCMSBase object
    • \n
    \n", "signature": "(lcms_obj):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_lcms_default_data", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_lcms_default_data", "kind": "function", "doc": "

    Return a dictionary with all default parameters for LCMS

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_data_ms", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_data_ms", "kind": "function", "doc": "

    Return a dictionary with all parameters for MassSpectrum object

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec (MassSpectrum):\nMassSpectrum object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: dictionary with all parameters for MassSpectrum object
    • \n
    \n", "signature": "(mass_spec):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_ms_default_data", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_ms_default_data", "kind": "function", "doc": "

    Return a dictionary with all default parameters for MS including data input

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_gcms_default_data", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_gcms_default_data", "kind": "function", "doc": "

    Return a dictionary with all default parameters for GCMS

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_dict.get_dict_data_gcms", "modulename": "corems.encapsulation.output.parameter_to_dict", "qualname": "get_dict_data_gcms", "kind": "function", "doc": "

    Return a dictionary with all parameters for GCMS

    \n", "signature": "(gcms):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json", "modulename": "corems.encapsulation.output.parameter_to_json", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_all_settings_json", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_all_settings_json", "kind": "function", "doc": "

    Write JSON file into current directory with all the default settings for the CoreMS package.

    \n\n

    Parameters:

    \n\n

    filename : str, optional\n The name of the JSON file to be created. Default is 'SettingsCoreMS.json'.\nfile_path : str or Path, optional\n The path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.

    \n", "signature": "(filename='SettingsCoreMS.json', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_ms_settings_json", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_ms_settings_json", "kind": "function", "doc": "

    Write JSON file into current directory with all the mass spectrum default settings for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    • \n
    • file_path (str or Path, optional):\nThe path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.json', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_gcms_settings_json", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_gcms_settings_json", "kind": "function", "doc": "

    Write JSON file into current directory containing the default GCMS settings data.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the JSON file to be created. Default is 'SettingsCoreMS.json'.
    • \n
    • file_path (str or Path-like object, optional):\nThe path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.json', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_all_settings_toml", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_all_settings_toml", "kind": "function", "doc": "

    Write TOML file into the specified file path or the current directory with all the default settings for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    • \n
    • file_path (str or Path, optional):\nThe path where the TOML file will be saved. If not provided, the file will be saved in the current directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.toml', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_ms_settings_toml", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_ms_settings_toml", "kind": "function", "doc": "

    Write TOML file into the current directory with all the mass spectrum default settings for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the TOML file to be created. Default is 'SettingsCoreMS.toml'.
    • \n
    • file_path (str or Path, optional):\nThe path where the TOML file should be saved. If not provided, the file will be saved in the current working directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.toml', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_gcms_settings_toml", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_gcms_settings_toml", "kind": "function", "doc": "

    Write TOML file into current directory containing the default GCMS settings data.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    • \n
    • file_path (str or Path, optional):\nThe path where the TOML file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.toml', file_path=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_lcms_settings_json", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_lcms_settings_json", "kind": "function", "doc": "

    Write JSON file into current directory with all the LCMS settings data for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the JSON file. Defaults to 'SettingsCoreMS.json'.
    • \n
    • file_path (str or Path, optional):\nThe path where the JSON file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    • lcms_obj (object, optional):\nThe LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.json', file_path=None, lcms_obj=None):", "funcdef": "def"}, {"fullname": "corems.encapsulation.output.parameter_to_json.dump_lcms_settings_toml", "modulename": "corems.encapsulation.output.parameter_to_json", "qualname": "dump_lcms_settings_toml", "kind": "function", "doc": "

    Write TOML file into current directory with all the LCMS settings data for the CoreMS package.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str, optional):\nThe name of the TOML file. Defaults to 'SettingsCoreMS.toml'.
    • \n
    • file_path (str or Path, optional):\nThe path where the TOML file will be saved. If not provided, the file will be saved in the current working directory.
    • \n
    • lcms_obj (object, optional):\nThe LCMS object containing the settings data. If not provided, the settings data will be retrieved from the default settings.
    • \n
    \n", "signature": "(filename='SettingsCoreMS.toml', file_path=None, lcms_obj=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra", "modulename": "corems.mass_spectra", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc", "modulename": "corems.mass_spectra.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Calc", "modulename": "corems.mass_spectra.calc.GC_Calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations", "kind": "class", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations.calibrate_ri", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations.calibrate_ri", "kind": "function", "doc": "

    \n", "signature": "(self, ref_dict, cal_file_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations.smooth_tic", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations.smooth_tic", "kind": "function", "doc": "

    \n", "signature": "(self, tic):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations.centroid_detector", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations.centroid_detector", "kind": "function", "doc": "

    \n", "signature": "(self, tic, rt):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Calc.GC_Calculations.remove_outliers", "modulename": "corems.mass_spectra.calc.GC_Calc", "qualname": "GC_Calculations.remove_outliers", "kind": "function", "doc": "

    \n", "signature": "(self, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution", "kind": "class", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.run_deconvolution", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.run_deconvolution", "kind": "function", "doc": "

    \n", "signature": "(self, plot_res=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.centroid_detector", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.centroid_detector", "kind": "function", "doc": "

    this function has been replaced with sp.peak_picking_first_derivative\nand it not used

    \n", "signature": "(self, tic, rt):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.ion_extracted_chroma", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.ion_extracted_chroma", "kind": "function", "doc": "

    \n", "signature": "(self, mass_spectra_obj):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.hc", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.hc", "kind": "function", "doc": "

    \n", "signature": "(self, X, Y, max_rt_distance=0.025):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.find_peaks_entity", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.find_peaks_entity", "kind": "function", "doc": "

    combine eic with mathing rt apexes

    \n", "signature": "(self, eic_dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.mass_spec_factory", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.mass_spec_factory", "kind": "function", "doc": "

    \n", "signature": "(self, rt, datadict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.smooth_signal", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.smooth_signal", "kind": "function", "doc": "

    \n", "signature": "(self, signal):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.add_gcpeak", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.add_gcpeak", "kind": "function", "doc": "

    \n", "signature": "(\tself,\tnew_apex_index,\tstart_rt,\tfinal_rt,\tpeak_rt,\tsmoothed_tic,\tdatadict,\tplot_res):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.deconvolution", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.deconvolution", "kind": "function", "doc": "

    \n", "signature": "(self, peaks_entity_data, plot_res):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution.quadratic_interpolation", "modulename": "corems.mass_spectra.calc.GC_Deconvolution", "qualname": "MassDeconvolution.quadratic_interpolation", "kind": "function", "doc": "

    \n", "signature": "(self, rt_list, tic_list, apex_index):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.GC_RI_Calibration", "modulename": "corems.mass_spectra.calc.GC_RI_Calibration", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.GC_RI_Calibration.get_rt_ri_pairs", "modulename": "corems.mass_spectra.calc.GC_RI_Calibration", "qualname": "get_rt_ri_pairs", "kind": "function", "doc": "

    \n", "signature": "(gcms_ref_obj, sql_obj=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch", "modulename": "corems.mass_spectra.calc.MZSearch", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults", "kind": "class", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.__init__", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.__init__", "kind": "function", "doc": "

    \n", "signature": "(calculated_mz: float, exp_mz: float, error: float, tolerance: float)"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.calculated_mz", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.calculated_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.exp_mz", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.exp_mz", "kind": "variable", "doc": "

    \n", "annotation": ": float"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.error", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.error", "kind": "variable", "doc": "

    \n", "annotation": ": float"}, {"fullname": "corems.mass_spectra.calc.MZSearch.SearchResults.tolerance", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "SearchResults.tolerance", "kind": "variable", "doc": "

    \n", "annotation": ": float"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch", "kind": "class", "doc": "

    A class that represents a thread of control.

    \n\n

    This class can be safely subclassed in a limited fashion. There are two ways\nto specify the activity: by passing a callable object to the constructor, or\nby overriding the run() method in a subclass.

    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.__init__", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.__init__", "kind": "function", "doc": "
    Parameters
    \n\n
      \n
    • calculated_mzs ([float] calculated m/z):

    • \n
    • exp_mzs ([float] experimental m/z):

    • \n
    • method (string,):\nppm or ppb

    • \n
    • call run to trigger the m/z search algorithm
    • \n
    • or start if using it as thread
    • \n
    \n", "signature": "(\texp_mzs: List[float],\tcalculated_mzs: List[float],\ttolerance,\tmethod='ppm',\taverage_target_mz=True)"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.method", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.method", "kind": "variable", "doc": "

    method: string,\n ppm or ppb

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.results", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.results", "kind": "variable", "doc": "

    {calculated_mz: [SearchResults]}\ncontains the results of the search

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.averaged_target_mz", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.averaged_target_mz", "kind": "variable", "doc": "

    [float]\ncontains the average target m/z to be searched against

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.calculated_mzs", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.calculated_mzs", "kind": "variable", "doc": "

    [float]\ncontains the mz target to be searched against

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.exp_mzs", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.exp_mzs", "kind": "variable", "doc": "

    [float]\ncontains the exp mz to be searched against

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.tolerance", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.tolerance", "kind": "variable", "doc": "

    method: string,\n ppm or ppb

    \n"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.colapse_calculated", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.colapse_calculated", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.run", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.run", "kind": "function", "doc": "

    Method representing the thread's activity.

    \n\n

    You may override this method in a subclass. The standard run() method\ninvokes the callable object passed to the object's constructor as the\ntarget argument, if any, with sequential and keyword arguments taken\nfrom the args and kwargs arguments, respectively.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.calc_mz_error", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.calc_mz_error", "kind": "function", "doc": "
    Parameters
    \n\n
      \n
    • calculated_mz (float,):

    • \n
    • exp_mz (float):

    • \n
    • method (string,):\nppm or ppb

    • \n
    \n", "signature": "(calculated_mz, exp_mz, method='ppm'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.check_ppm_error", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.check_ppm_error", "kind": "function", "doc": "

    \n", "signature": "(tolerance, error):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.get_nominal_exp", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.get_nominal_exp", "kind": "function", "doc": "

    \n", "signature": "(self, exp_mzs) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.MZSearch.MZSearch.search_mz", "modulename": "corems.mass_spectra.calc.MZSearch", "qualname": "MZSearch.search_mz", "kind": "function", "doc": "

    \n", "signature": "(self, results, dict_nominal_exp_mz, calculated_mz, offset) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing", "modulename": "corems.mass_spectra.calc.SignalProcessing", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.peak_detector", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "peak_detector", "kind": "function", "doc": "

    Find peaks by detecting minima in the first derivative of the data\nUsed in LC/GC data processing

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • max_tic (float):\nmaximum value of the data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: tuple of indexes of the start, apex and final points of the peak
    • \n
    \n", "signature": "(tic, max_tic):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.find_nearest_scan", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "find_nearest_scan", "kind": "function", "doc": "

    Find nearest data point in a list of nodes (derivated data)\nin LC/GC this is 'scan', in MS this is 'm/z' data point

    \n\n
    Parameters
    \n\n
      \n
    • data (float):\ndata point to find the nearest node
    • \n
    • nodes (array):\narray of nodes to search for the nearest node
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: nearest node to the data point
    • \n
    \n", "signature": "(data, nodes):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.check_corrected_abundance", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "check_corrected_abundance", "kind": "function", "doc": "

    Check the corrected abundance of the peak

    \n\n
    Parameters
    \n\n
      \n
    • closest_left (int):\nindex of the closest left node
    • \n
    • closest_right (int):\nindex of the closest right node
    • \n
    • apex_index (int):\nindex of the apex node
    • \n
    • signal (array):\narray of data points to find the peaks
    • \n
    • max_signal (float):\nmaximum value of the data points
    • \n
    • signal_threshold (float):\nthreshold for the signal
    • \n
    • abun_norm (float):\nabundance normalization factor
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: corrected abundance of the peak
    • \n
    \n", "signature": "(\tclosest_left,\tclosest_right,\tapex_index,\tsignal,\tmax_signal,\tsignal_threshold,\tabun_norm):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.peak_picking_first_derivative", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "peak_picking_first_derivative", "kind": "function", "doc": "

    Find peaks by detecting minima in the first derivative of the data\nUsed in LC/GC and MS data processing\nOptional baseline correction, then peak apex detection via 1st derivative.\nFor each apex the peak datapoints surrounding the apex are determined.\nSome basic thresholding is applied (signal, number of datapoints, etc).

    \n\n
    Parameters
    \n\n
      \n
    • domain (array):\narray of data points to find the peaks
    • \n
    • signal (array):\narray of data points to find the peaks
    • \n
    • max_height (float):\nmaximum height of the peak
    • \n
    • max_prominence (float):\nmaximum prominence of the peak
    • \n
    • max_signal (float):\nmaximum signal of the peak
    • \n
    • min_peak_datapoints (int):\nminimum number of data points in the peak
    • \n
    • peak_derivative_threshold (float):\nthreshold for the peak derivative
    • \n
    • signal_threshold (float):\nthreshold for the signal
    • \n
    • correct_baseline (bool):\nflag to correct the baseline
    • \n
    • plot_res (bool):\nflag to plot the results
    • \n
    • abun_norm (float):\nabundance normalization factor
    • \n
    • check_abundance (bool):\nflag to check the abundance
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: tuple of indexes of the start, apex and final points of the peak
    • \n
    \n", "signature": "(\tdomain,\tsignal,\tmax_height,\tmax_prominence,\tmax_signal,\tmin_peak_datapoints,\tpeak_derivative_threshold,\tsignal_threshold=0.1,\tcorrect_baseline=True,\tplot_res=False,\tabun_norm=100,\tcheck_abundance=False,\tapex_indexes=[]):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.find_minima", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "find_minima", "kind": "function", "doc": "

    Find the index of the local minima in the given time-of-flight (TOF) intensity array.

    \n\n

    Parameters:

    \n\n

    index: int\n The starting index to search for the minima.\ntic: list\n TIC data points\nright : bool, optional\n Determines the direction of the search. If True, search to the right of the index. If False, search to the left of the index. Default is True.

    \n\n

    Returns:

    \n\n

    int\n The index of the local minima in the TIC array.

    \n", "signature": "(index, tic, right=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.derivate", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "derivate", "kind": "function", "doc": "

    Calculate derivative of the data points.\nReplaces nan with infinity

    \n\n
    Parameters
    \n\n
      \n
    • data_array (array):\narray of data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • array: array of the derivative of the data points
    • \n
    \n", "signature": "(data_array):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.minima_detector", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "minima_detector", "kind": "function", "doc": "

    Minima detector for the TIC data points.

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • max_tic (float):\nmaximum value of the data points
    • \n
    • peak_height_max_percent (float):\nmaximum height of the peak
    • \n
    • peak_max_prominence_percent (float):\nmaximum prominence of the peak
    • \n
    \n\n
    Returns
    \n\n
      \n
    • generator: generator of the indexes of the minima in the TIC array
    • \n
    \n", "signature": "(tic, max_tic, peak_height_max_percent, peak_max_prominence_percent):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.baseline_detector", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "baseline_detector", "kind": "function", "doc": "

    Baseline detector for the TIC data points.\nFor LC/GC data processing

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • rt (array):\narray of retention time data points
    • \n
    • peak_height_max_percent (float):\nmaximum height of the peak
    • \n
    • peak_max_prominence_percent (float):\nmaximum prominence of the peak
    • \n
    • do_interpolation (bool, optional):\nflag to interpolate the data points. Default is True
    • \n
    \n\n
    Returns
    \n\n
      \n
    • array: array of the baseline corrected data points
    • \n
    \n", "signature": "(\ttic,\trt,\tpeak_height_max_percent,\tpeak_max_prominence_percent,\tdo_interpolation=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.peak_detector_generator", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "peak_detector_generator", "kind": "function", "doc": "

    Peak detector generator for the TIC data points.

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • stds (float):\nstandard deviation
    • \n
    • method (str):\nmethod to detect the peaks\nAvailable methods: 'manual_relative_abundance', 'auto_relative_abundance', 'second_derivative'
    • \n
    • rt (array):\narray of retention time data points
    • \n
    • max_height (float):\nmaximum height of the peak
    • \n
    • min_height (float):\nminimum height of the peak
    • \n
    • max_prominence (float):\nmaximum prominence of the peak
    • \n
    • min_datapoints (int):\nminimum number of data points in the peak
    • \n
    \n\n
    Returns
    \n\n
      \n
    • generator: generator of the indexes of the peaks in the TIC array
    • \n
    \n", "signature": "(\ttic,\tstds,\tmethod,\trt,\tmax_height,\tmin_height,\tmax_prominence,\tmin_datapoints):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.smooth_signal", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "smooth_signal", "kind": "function", "doc": "

    Smooth the data using a window with requested size.

    \n\n

    This method is based on the convolution of a scaled window with the signal.\nThe signal is prepared by introducing reflected copies of the signal\n(with the window size) in both ends so that transient parts are minimized\nin the begining and end part of the output signal.

    \n\n
    Parameters
    \n\n
      \n
    • x (array):\nthe input signal
    • \n
    • window_len (int):\nthe dimension of the smoothing window; should be an odd integer
    • \n
    • window (str):\nthe type of window from 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'
    • \n
    • pol_order (int):\nthe order of the polynomial to fit the data
    • \n
    • implemented_smooth_method (list):\nlist of implemented smoothing methods
    • \n
    \n\n
    Returns
    \n\n
      \n
    • y (array):\nthe smoothed signal
    • \n
    • Notes:
    • \n
    • -----
    • \n
    • See also (numpy.hanning, numpy.hamming, numpy.bartlett, numpy.blackman, numpy.convolve):

    • \n
    • scipy.signal.savgol_filter

    • \n
    \n", "signature": "(x, window_len, window, pol_order, implemented_smooth_method):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.SignalProcessing.second_derivative_threshold", "modulename": "corems.mass_spectra.calc.SignalProcessing", "qualname": "second_derivative_threshold", "kind": "function", "doc": "

    Second derivative threshold for the TIC data points.\nFor LC/GC data processing

    \n\n
    Parameters
    \n\n
      \n
    • tic (array):\narray of data points to find the peaks
    • \n
    • stds (float):\nstandard deviation
    • \n
    • rt (array):\narray of retention time data points
    • \n
    • peak_height_max_percent (float):\nmaximum height of the peak
    • \n
    \n\n
    Returns
    \n\n
      \n
    • array: array of the indexes of the data points to remove
    • \n
    \n", "signature": "(tic, stds, rt, peak_height_max_percent, peak_max_prominence_percent):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc", "modulename": "corems.mass_spectra.calc.lc_calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.calc.lc_calc.find_closest", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "find_closest", "kind": "function", "doc": "

    Find the index of closest value in A to each value in target.

    \n\n
    Parameters
    \n\n
      \n
    • A (~numpy.array):\nThe array to search (blueprint). A must be sorted.
    • \n
    • target (~numpy.array):\nThe array of values to search for. target must be sorted.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ~numpy.array: The indices of the closest values in A to each value in target.
    • \n
    \n", "signature": "(A, target):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations", "kind": "class", "doc": "

    Methods for performing LC calculations on mass spectra data.

    \n\n
    Notes
    \n\n

    This class is intended to be used as a mixin for the LCMSBase class.

    \n\n
    Methods
    \n\n
      \n
    • get_max_eic(eic_data).\nReturns the maximum EIC value from the given EIC data. A static method.
    • \n
    • smooth_tic(tic).\nSmooths the TIC data using the specified smoothing method and settings.
    • \n
    • eic_centroid_detector(rt, eic, max_eic).\nPerforms EIC centroid detection on the given EIC data.
    • \n
    • find_nearest_scan(rt).\nFinds the nearest scan to the given retention time.
    • \n
    • get_average_mass_spectrum(scan_list, apex_scan, spectrum_mode=\"profile\", ms_level=1, auto_process=True, use_parser=False, perform_checks=True, polarity=None).\nReturns an averaged mass spectrum object.
    • \n
    • find_mass_features(ms_level=1).\nFind regions of interest for a given MS level (default is MS1).
    • \n
    • integrate_mass_features(drop_if_fail=False, ms_level=1).\nIntegrate mass features of interest and extracts EICs.
    • \n
    • find_c13_mass_features().\nEvaluate mass features and mark likely C13 isotopes.
    • \n
    • deconvolute_ms1_mass_features().\nDeconvolute mass features' ms1 mass spectra.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.get_max_eic", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.get_max_eic", "kind": "function", "doc": "

    Returns the maximum EIC value from the given EIC data.

    \n\n
    Notes
    \n\n

    This is a static method.

    \n\n
    Parameters
    \n\n
      \n
    • eic_data (dict):\nA dictionary containing EIC data.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The maximum EIC value.
    • \n
    \n", "signature": "(eic_data: dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.smooth_tic", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.smooth_tic", "kind": "function", "doc": "

    Smooths the TIC or EIC data using the specified smoothing method and settings.

    \n\n
    Parameters
    \n\n
      \n
    • tic (numpy.ndarray):\nThe TIC (or EIC) data to be smoothed.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The smoothed TIC data.
    • \n
    \n", "signature": "(self, tic):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.eic_centroid_detector", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.eic_centroid_detector", "kind": "function", "doc": "

    Performs EIC centroid detection on the given EIC data.

    \n\n
    Parameters
    \n\n
      \n
    • rt (numpy.ndarray):\nThe retention time data.
    • \n
    • eic (numpy.ndarray):\nThe EIC data.
    • \n
    • max_eic (float):\nThe maximum EIC value.
    • \n
    • apex_indexes (list, optional):\nThe apexes of the EIC peaks. Defaults to [], which means that the apexes will be calculated by the function.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The indexes of left, apex, and right limits as a generator.
    • \n
    \n", "signature": "(self, rt, eic, max_eic, apex_indexes=[]):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.find_nearest_scan", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.find_nearest_scan", "kind": "function", "doc": "

    Finds the nearest scan to the given retention time.

    \n\n
    Parameters
    \n\n
      \n
    • rt (float):\nThe retention time (in minutes) to find the nearest scan for.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The scan number of the nearest scan.
    • \n
    \n", "signature": "(self, rt):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.add_peak_metrics", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.add_peak_metrics", "kind": "function", "doc": "

    Add peak metrics to the mass features.

    \n\n

    This function calculates the peak metrics for each mass feature and adds them to the mass feature objects.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.get_average_mass_spectrum", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.get_average_mass_spectrum", "kind": "function", "doc": "

    Returns an averaged mass spectrum object

    \n\n
    Parameters
    \n\n
      \n
    • scan_list (list):\nList of scan numbers to average.
    • \n
    • apex_scan (int):\nNumber of the apex scan
    • \n
    • spectrum_mode (str, optional):\nThe spectrum mode to use. Defaults to \"profile\". Not that only \"profile\" mode is supported for averaging.
    • \n
    • ms_level (int, optional):\nThe MS level to use. Defaults to 1.
    • \n
    • auto_process (bool, optional):\nIf True, the averaged mass spectrum will be auto-processed. Defaults to True.
    • \n
    • use_parser (bool, optional):\nIf True, the mass spectra will be obtained from the parser. Defaults to False.
    • \n
    • perform_checks (bool, optional):\nIf True, the function will check if the data are within the ms_unprocessed dictionary and are the correct mode. Defaults to True. Only set to False if you are sure the data are profile, and (if not using the parser) are in the ms_unprocessed dictionary! ms_unprocessed dictionary also must be indexed on scan
    • \n
    • polarity (int, optional):\nThe polarity of the mass spectra (1 or -1). If not set, the polarity will be determined from the dataset. Defaults to None. (fastest if set to -1 or 1)
    • \n
    • ms_params (MSParameters, optional):\nThe mass spectrum parameters to use. If not set (None), the globally set parameters will be used. Defaults to None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpectrumProfile: The averaged mass spectrum object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the spectrum mode is not \"profile\".\nIf the MS level is not found in the unprocessed mass spectra dictionary.\nIf not all scan numbers are found in the unprocessed mass spectra dictionary.
    • \n
    \n", "signature": "(\tself,\tscan_list,\tapex_scan,\tspectrum_mode='profile',\tms_level=1,\tauto_process=True,\tuse_parser=False,\tperform_checks=True,\tpolarity=None,\tms_params=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.find_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.find_mass_features", "kind": "function", "doc": "

    Find mass features within an LCMSBase object

    \n\n

    Note that this is a wrapper function that calls the find_mass_features_ph function, but can be extended to support other peak picking methods in the future.

    \n\n
    Parameters
    \n\n
      \n
    • ms_level (int, optional):\nThe MS level to use for peak picking Default is 1.
    • \n
    • grid (bool, optional):\nIf True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded, used for persistent homology peak picking. Default is True.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no MS level data is found on the object.\nIf persistent homology peak picking is attempted on non-profile mode data.\nIf data is not gridded and grid is False.\nIf peak picking method is not implemented.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but assigns the mass_features and eics attributes to the object.
    • \n
    \n", "signature": "(self, ms_level=1, grid=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.integrate_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.integrate_mass_features", "kind": "function", "doc": "

    Integrate mass features and extract EICs.

    \n\n

    Populates the _eics attribute on the LCMSBase object for each unique mz in the mass_features dataframe and adds data (start_scan, final_scan, area) to the mass_features attribute.

    \n\n
    Parameters
    \n\n
      \n
    • drop_if_fail (bool, optional):\nWhether to drop mass features if the EIC limit calculations fail.\nDefault is True.
    • \n
    • drop_duplicates (bool, optional):\nWhether to mass features that appear to be duplicates\n(i.e., mz is similar to another mass feature and limits of the EIC are similar or encapsulating).\nDefault is True.
    • \n
    • ms_level (int, optional):\nThe MS level to use. Default is 1.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass features are found.\nIf no MS level data is found for the given MS level (either in data or in the scan data)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the eics attribute on the LCMSBase object and adds data (start_scan, final_scan, area) to the mass_features attribute.
    • \n
    \n\n
    Notes
    \n\n

    drop_if_fail is useful for discarding mass features that do not have good shapes, usually due to a detection on a shoulder of a peak or a noisy region (especially if minimal smoothing is used during mass feature detection).

    \n", "signature": "(self, drop_if_fail=True, drop_duplicates=True, ms_level=1):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.find_c13_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.find_c13_mass_features", "kind": "function", "doc": "

    Mark likely C13 isotopes and connect to monoisoitopic mass features.

    \n\n
    Returns
    \n\n
      \n
    • None, but populates the monoisotopic_mf_id and isotopologue_type attributes to the indivual LCMSMassFeatures within the mass_features attribute of the LCMSBase object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass features are found.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.LCCalculations.deconvolute_ms1_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "LCCalculations.deconvolute_ms1_mass_features", "kind": "function", "doc": "

    Deconvolute MS1 mass features

    \n\n

    Deconvolute mass features ms1 spectrum based on the correlation of all masses within a spectrum over the EIC of the mass features

    \n\n
    Parameters
    \n\n
      \n
    • None
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but assigns the _ms_deconvoluted_idx, mass_spectrum_deconvoluted_parent,
    • \n
    • and associated_mass_features_deconvoluted attributes to the mass features in the
    • \n
    • mass_features attribute of the LCMSBase object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass features are found, must run find_mass_features() first.\nIf no EICs are found, did you run integrate_mass_features() first?
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations", "kind": "class", "doc": "

    Methods for performing calculations related to 2D peak picking via persistent homology on LCMS data.

    \n\n
    Notes
    \n\n

    This class is intended to be used as a mixin for the LCMSBase class.

    \n\n
    Methods
    \n\n
      \n
    • sparse_mean_filter(idx, V, radius=[0, 1, 1]).\nSparse implementation of a mean filter.
    • \n
    • embed_unique_indices(a).\nCreates an array of indices, sorted by unique element.
    • \n
    • sparse_upper_star(idx, V).\nSparse implementation of an upper star filtration.
    • \n
    • check_if_grid(data).\nCheck if the data is gridded in mz space.
    • \n
    • grid_data(data).\nGrid the data in the mz dimension.
    • \n
    • find_mass_features_ph(ms_level=1, grid=True).\nFind mass features within an LCMSBase object using persistent homology.
    • \n
    • cluster_mass_features(drop_children=True).\nCluster regions of interest.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.sparse_mean_filter", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.sparse_mean_filter", "kind": "function", "doc": "

    Sparse implementation of a mean filter.

    \n\n
    Parameters
    \n\n
      \n
    • idx (~numpy.array):\nEdge indices for each dimension (MxN).
    • \n
    • V (~numpy.array):\nArray of intensity data (Mx1).
    • \n
    • radius (float or list):\nRadius of the sparse filter in each dimension. Values less than\nzero indicate no connectivity in that dimension.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ~numpy.array: Filtered intensities (Mx1).
    • \n
    \n\n
    Notes
    \n\n

    This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos.\nThis is a static method.

    \n", "signature": "(idx, V, radius=[0, 1, 1]):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.embed_unique_indices", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.embed_unique_indices", "kind": "function", "doc": "

    Creates an array of indices, sorted by unique element.

    \n\n
    Parameters
    \n\n
      \n
    • a (~numpy.array):\nArray of unique elements (Mx1).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ~numpy.array: Array of indices (Mx1).
    • \n
    \n\n
    Notes
    \n\n

    This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos\nThis is a static method.

    \n", "signature": "(a):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.sparse_upper_star", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.sparse_upper_star", "kind": "function", "doc": "

    Sparse implementation of an upper star filtration.

    \n\n
    Parameters
    \n\n
      \n
    • idx (~numpy.array):\nEdge indices for each dimension (MxN).
    • \n
    • V (~numpy.array):\nArray of intensity data (Mx1).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • idx (~numpy.array):\nIndex of filtered points (Mx1).
    • \n
    • persistence (~numpy.array):\nPersistence of each filtered point (Mx1).
    • \n
    \n\n
    Notes
    \n\n

    This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos

    \n", "signature": "(self, idx, V):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.check_if_grid", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.check_if_grid", "kind": "function", "doc": "

    Check if the data are gridded in mz space.

    \n\n
    Parameters
    \n\n
      \n
    • data (DataFrame):\nDataFrame containing the mass spectrometry data. Needs to have mz and scan columns.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • bool: True if the data is gridded in the mz direction, False otherwise.
    • \n
    \n\n
    Notes
    \n\n

    This function is used within the grid_data function and the find_mass_features function and is not intended to be called directly.

    \n", "signature": "(self, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.grid_data", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.grid_data", "kind": "function", "doc": "

    Grid the data in the mz dimension.

    \n\n

    Data must be gridded prior to persistent homology calculations.

    \n\n
    Parameters
    \n\n
      \n
    • data (DataFrame):\nThe input data containing mz, scan, scan_time, and intensity columns.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The gridded data with mz, scan, scan_time, and intensity columns.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If gridding fails.
    • \n
    \n", "signature": "(self, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.find_mass_features_ph", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.find_mass_features_ph", "kind": "function", "doc": "

    Find mass features within an LCMSBase object using persistent homology.

    \n\n

    Assigns the mass_features attribute to the object (a dictionary of LCMSMassFeature objects, keyed by mass feature id)

    \n\n
    Parameters
    \n\n
      \n
    • ms_level (int, optional):\nThe MS level to use. Default is 1.
    • \n
    • grid (bool, optional):\nIf True, will regrid the data before running the persistent homology calculations (after checking if the data is gridded). Default is True.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no MS level data is found on the object.\nIf data is not gridded and grid is False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but assigns the mass_features attribute to the object.
    • \n
    \n\n
    Notes
    \n\n

    This function has been adapted from the original implementation in the Deimos package: https://github.com/pnnl/deimos

    \n", "signature": "(self, ms_level=1, grid=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.calc.lc_calc.PHCalculations.cluster_mass_features", "modulename": "corems.mass_spectra.calc.lc_calc", "qualname": "PHCalculations.cluster_mass_features", "kind": "function", "doc": "

    Cluster mass features

    \n\n

    Based on their proximity in the mz and scan_time dimensions, priorizies the mass features with the highest persistence.

    \n\n
    Parameters
    \n\n
      \n
    • drop_children (bool, optional):\nWhether to drop the mass features that are not cluster parents. Default is True.
    • \n
    • sort_by (str, optional):\nThe column to sort the mass features by, this will determine which mass features get rolled up into a parent mass feature. Default is \"persistence\".
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass features are found.\nIf too many mass features are found.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None if drop_children is True, otherwise returns a list of mass feature ids that are not cluster parents.
    • \n
    \n", "signature": "(self, drop_children=True, sort_by='persistence'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory", "modulename": "corems.mass_spectra.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class", "modulename": "corems.mass_spectra.factory.GC_Class", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase", "kind": "class", "doc": "

    Base class for GC-MS data processing.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str, pathlib.Path, or s3path.S3Path):\nPath object containing the file location.
    • \n
    • analyzer (str, optional):\nName of the analyzer. Defaults to 'Unknown'.
    • \n
    • instrument_label (str, optional):\nLabel of the instrument. Defaults to 'Unknown'.
    • \n
    • sample_name (str, optional):\nName of the sample. If not provided, it is derived from the file location.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (pathlib.Path):\nPath object containing the file location.
    • \n
    • sample_name (str):\nName of the sample.
    • \n
    • analyzer (str):\nName of the analyzer.
    • \n
    • instrument_label (str):\nLabel of the instrument.
    • \n
    • gcpeaks (list):\nList of GCPeak objects.
    • \n
    • ri_pairs_ref (None):\nReference retention index pairs.
    • \n
    • cal_file_path (None):\nCalibration file path.
    • \n
    • _parameters (GCMSParameters):\nGC-MS parameters.
    • \n
    • _retention_time_list (list):\nList of retention times.
    • \n
    • _scans_number_list (list):\nList of scan numbers.
    • \n
    • _tic_list (list):\nList of total ion chromatogram values.
    • \n
    • _ms (dict):\nDictionary containing all mass spectra.
    • \n
    • _processed_tic (list):\nList of processed total ion chromatogram values.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • process_chromatogram(plot_res=False). Process the chromatogram.
    • \n
    • plot_gc_peaks(ax=None, color='red'). Plot the GC peaks.
    • \n
    \n", "bases": "corems.mass_spectra.calc.GC_Calc.GC_Calculations, corems.mass_spectra.calc.GC_Deconvolution.MassDeconvolution"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.__init__", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None)"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.file_location", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.analyzer", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.instrument_label", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.gcpeaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.gcpeaks", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.ri_pairs_ref", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.ri_pairs_ref", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.cal_file_path", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.cal_file_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.process_chromatogram", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.process_chromatogram", "kind": "function", "doc": "

    Process the chromatogram.

    \n\n

    This method processes the chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • plot_res (bool, optional):\nIf True, plot the results. Defaults to False.
    • \n
    \n", "signature": "(self, plot_res=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.add_mass_spectrum", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.add_mass_spectrum", "kind": "function", "doc": "

    Add a mass spectrum to the GC-MS object.

    \n\n

    This method adds a mass spectrum to the GC-MS object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec (MassSpectrum):\nMass spectrum to be added.
    • \n
    \n", "signature": "(self, mass_spec):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.set_tic_list_from_data", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.set_tic_list_from_data", "kind": "function", "doc": "

    Set the total ion chromatogram list from the mass spectra data within the GC-MS data object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.set_retention_time_from_data", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.set_retention_time_from_data", "kind": "function", "doc": "

    Set the retention time list from the mass spectra data within the GC-MS data object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.set_scans_number_from_data", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.set_scans_number_from_data", "kind": "function", "doc": "

    Set the scan number list from the mass spectra data within the GC-MS data object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.parameter", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.parameter", "kind": "variable", "doc": "

    GCMS Parameters

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.molecular_search_settings", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.molecular_search_settings", "kind": "variable", "doc": "

    Molecular Search Settings

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.chromatogram_settings", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.chromatogram_settings", "kind": "variable", "doc": "

    Chromatogram Settings

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.scans_number", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.scans_number", "kind": "variable", "doc": "

    Scans Number

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.retention_time", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.retention_time", "kind": "variable", "doc": "

    Retention Time

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.processed_tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.processed_tic", "kind": "variable", "doc": "

    Processed Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.tic", "kind": "variable", "doc": "

    Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.max_tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.max_tic", "kind": "variable", "doc": "

    Maximum Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.min_tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.min_tic", "kind": "variable", "doc": "

    Minimum Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.dynamic_range", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.dynamic_range", "kind": "variable", "doc": "

    Dynamic Range of the Total Ion Current

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.matched_peaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.matched_peaks", "kind": "variable", "doc": "

    Matched Peaks

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.sorted_gcpeaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.sorted_gcpeaks", "kind": "variable", "doc": "

    Sorted GC Peaks, by retention time

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.unique_metabolites", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.unique_metabolites", "kind": "variable", "doc": "

    Unique Metabolites

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.metabolites_data", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.metabolites_data", "kind": "variable", "doc": "

    Metabolites Data

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.no_matched_peaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.no_matched_peaks", "kind": "variable", "doc": "

    Peaks with no Matched Metabolites

    \n"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_gc_peaks", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_gc_peaks", "kind": "function", "doc": "

    Plot the GC peaks.

    \n\n

    This method plots the GC peaks.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the GC peaks. Defaults to None.
    • \n
    • color (str, optional):\nColor of the GC peaks. Defaults to 'red'.
    • \n
    \n", "signature": "(self, ax=None, color='red'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_excel", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_excel", "kind": "function", "doc": "

    Export the GC-MS data to an Excel file.

    \n\n

    This method exports the GC-MS data to an Excel file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str, pathlib.Path, or s3path.S3Path):\nPath object containing the file location.
    • \n
    • write_mode (str, optional):\nWrite mode. Defaults to 'ab'.
    • \n
    • write_metadata (bool, optional):\nIf True, write the metadata. Defaults to True.
    • \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(\tself,\tout_file_path,\twrite_mode='ab',\twrite_metadata=True,\tid_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_csv", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_csv", "kind": "function", "doc": "

    Export the GC-MS data to a CSV file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str, pathlib.Path, or s3path.S3Path):\nPath object containing the file location.
    • \n
    • separate_output (bool, optional):\nIf True, separate the output. Defaults to False.
    • \n
    • write_metadata (bool, optional):\nIf True, write the metadata. Defaults to True.
    • \n
    \n", "signature": "(\tself,\tout_file_path,\tseparate_output=False,\twrite_metadata=True,\tid_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_pandas", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_pandas", "kind": "function", "doc": "

    Export the GC-MS data to a Pandas dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str, pathlib.Path, or s3path.S3Path):\nPath object containing the file location.
    • \n
    • write_metadata (bool, optional):\nIf True, write the metadata. Defaults to True.
    • \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(self, out_file_path, write_metadata=True, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_dataframe", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_dataframe", "kind": "function", "doc": "

    Export the GC-MS data to a Pandas dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.processing_stats", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.processing_stats", "kind": "function", "doc": "

    Return the processing statistics.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.parameters_json", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.parameters_json", "kind": "function", "doc": "

    Return the parameters in JSON format.

    \n\n
    Parameters
    \n\n
      \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    • output_path (str, optional):\nPath object containing the file location. Defaults to \" \".
    • \n
    \n", "signature": "(self, id_label='corems:', output_path=' '):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_json", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_json", "kind": "function", "doc": "

    Export the GC-MS data to a JSON file.

    \n\n
    Parameters
    \n\n
      \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.to_hdf", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.to_hdf", "kind": "function", "doc": "

    Export the GC-MS data to a HDF file.

    \n\n
    Parameters
    \n\n
      \n
    • id_label (str, optional):\nLabel of the ID. Defaults to 'corems:'.
    • \n
    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_chromatogram", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_chromatogram", "kind": "function", "doc": "

    Plot the chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the chromatogram. Defaults to None.
    • \n
    • color (str, optional):\nColor of the chromatogram. Defaults to 'blue'.
    • \n
    \n", "signature": "(self, ax=None, color='blue'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_smoothed_chromatogram", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_smoothed_chromatogram", "kind": "function", "doc": "

    Plot the smoothed chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the smoothed chromatogram. Defaults to None.
    • \n
    • color (str, optional):\nColor of the smoothed chromatogram. Defaults to 'green'.
    • \n
    \n", "signature": "(self, ax=None, color='green'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_detected_baseline", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_detected_baseline", "kind": "function", "doc": "

    Plot the detected baseline.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the detected baseline. Defaults to None.
    • \n
    • color (str, optional):\nColor of the detected baseline. Defaults to 'blue'.
    • \n
    \n", "signature": "(self, ax=None, color='blue'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_baseline_subtraction", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_baseline_subtraction", "kind": "function", "doc": "

    Plot the baseline subtraction.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the baseline subtraction. Defaults to None.
    • \n
    • color (str, optional):\nColor of the baseline subtraction. Defaults to 'black'.
    • \n
    \n", "signature": "(self, ax=None, color='black'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.peaks_rt_tic", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.peaks_rt_tic", "kind": "function", "doc": "

    Return the peaks, retention time, and total ion chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • json_string (bool, optional):\nIf True, return the peaks, retention time, and total ion chromatogram in JSON format. Defaults to False.
    • \n
    \n", "signature": "(self, json_string=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.GC_Class.GCMSBase.plot_processed_chromatogram", "modulename": "corems.mass_spectra.factory.GC_Class", "qualname": "GCMSBase.plot_processed_chromatogram", "kind": "function", "doc": "

    Plot the processed chromatogram.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nAxes object to plot the processed chromatogram. Defaults to None.
    • \n
    • color (str, optional):\nColor of the processed chromatogram. Defaults to 'black'.
    • \n
    \n", "signature": "(self, ax=None, color='black'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.chromat_data", "modulename": "corems.mass_spectra.factory.chromat_data", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data", "kind": "class", "doc": "

    A class to represent total ion chromatogram data.

    \n\n

    scans: [int]\n original scan numbers\ntime: [floats]\n list of retention times\ntic: [floats]\n total ion current [chromatogram]\nbpc: [floats]\n base peak [chromatogram]\nApexes: [int]\n original thermo apex scan number after peak picking

    \n"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.__init__", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tscans: List[int] = <factory>,\ttime: List[float] = <factory>,\ttic: List[float] = <factory>,\tbpc: List[float] = <factory>,\tapexes: List[int] = <factory>)"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.scans", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.scans", "kind": "variable", "doc": "

    \n", "annotation": ": List[int]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.time", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.time", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.tic", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.tic", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.bpc", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.bpc", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.TIC_Data.apexes", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "TIC_Data.apexes", "kind": "variable", "doc": "

    \n", "annotation": ": List[int]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data", "kind": "class", "doc": "

    A class to represent extracted ion chromatogram data.

    \n\n

    scans: [int]\n original scan numbers\ntime: [floats]\n list of retention times\neic: [floats]\n extracted ion chromatogram\neic_smoothed: [floats]\n extracted ion chromatogram smoothed\napexes: [int]\n original apex scan number after peak picking\nareas: [floats]\n area under the curve for each apex

    \n"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.__init__", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tscans: List[int] = <factory>,\ttime: List[float] = <factory>,\teic: List[float] = <factory>,\teic_smoothed: List[float] = <factory>,\tapexes: List[int] = <factory>,\tareas: List[float] = <factory>)"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.scans", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.scans", "kind": "variable", "doc": "

    \n", "annotation": ": List[int]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.time", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.time", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.eic", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.eic", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.eic_smoothed", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.eic_smoothed", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.apexes", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.apexes", "kind": "variable", "doc": "

    \n", "annotation": ": List[int]"}, {"fullname": "corems.mass_spectra.factory.chromat_data.EIC_Data.areas", "modulename": "corems.mass_spectra.factory.chromat_data", "qualname": "EIC_Data.areas", "kind": "variable", "doc": "

    \n", "annotation": ": List[float]"}, {"fullname": "corems.mass_spectra.factory.lc_class", "modulename": "corems.mass_spectra.factory.lc_class", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase", "kind": "class", "doc": "

    Base class for mass spectra objects.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe location of the file containing the mass spectra data.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
    • \n
    • instrument_label (str, optional):\nThe type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
    • \n
    • sample_name (str, optional):\nThe name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
    • \n
    • spectra_parser (object, optional):\nThe spectra parser object used to create the mass spectra object. Defaults to None.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • spectra_parser_class (class):\nThe class of the spectra parser used to create the mass spectra object.
    • \n
    • file_location (str or Path):\nThe location of the file containing the mass spectra data.
    • \n
    • sample_name (str):\nThe name of the sample; defaults to the file name if not provided to the parser.
    • \n
    • analyzer (str):\nThe type of analyzer used to generate the mass spectra data. Derived from the spectra parser.
    • \n
    • instrument_label (str):\nThe type of instrument used to generate the mass spectra data. Derived from the spectra parser.
    • \n
    • _scan_info (dict):\nA dictionary containing the scan data with columns for scan number, scan time, ms level, precursor m/z,\nscan text, and scan window (lower and upper).\nAssociated with the property scan_df, which returns a pandas DataFrame or can set this attribute from a pandas DataFrame.
    • \n
    • _ms (dict):\nA dictionary containing mass spectra for the dataset, keys of dictionary are scan numbers. Initialized as an empty dictionary.
    • \n
    • _ms_unprocessed (dictionary of pandas.DataFrames or None):\nA dictionary of unprocssed mass spectra data, as an (optional) intermediate data product for peak picking.\nKey is ms_level, and value is dataframe with columns for scan number, m/z, and intensity. Default is None.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • add_mass_spectra(scan_list, spectrum_mode: str = 'profile', use_parser = True, auto_process=True).\nAdd mass spectra (or singlel mass spectrum) to _ms slot, from a list of scans
    • \n
    • get_time_of_scan_id(scan).\nReturns the scan time for the specified scan number.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.__init__", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None,\tspectra_parser=None)"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.file_location", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.analyzer", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.instrument_label", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.add_mass_spectrum", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.add_mass_spectrum", "kind": "function", "doc": "

    Adds a mass spectrum to the dataset.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec (MassSpectrum):\nThe corems MassSpectrum object to be added to the dataset.
    • \n
    \n\n
    Notes
    \n\n

    This is a helper function for the add_mass_spectra() method, and is not intended to be called directly.

    \n", "signature": "(self, mass_spec):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.add_mass_spectra", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.add_mass_spectra", "kind": "function", "doc": "

    Add mass spectra to _ms dictionary, from a list of scans or single scan

    \n\n
    Notes
    \n\n

    The mass spectra will inherit the mass_spectrum, ms_peak, and molecular_search parameters from the LCMSBase object.

    \n\n
    Parameters
    \n\n
      \n
    • scan_list (list of ints):\nList of scans to use to populate _ms slot
    • \n
    • spectrum_mode (str or None):\nThe spectrum mode to use for the mass spectra.\nIf None, method will use the spectrum mode from the spectra parser to ascertain the spectrum mode (this allows for mixed types).\nDefaults to None.
    • \n
    • ms_level (int, optional):\nThe MS level to use for the mass spectra.\nThis is used to pass the molecular_search parameters from the LCMS object to the individual MassSpectrum objects.\nDefaults to 1.
    • \n
    • using_parser (bool):\nWhether to use the mass spectra parser to get the mass spectra. Defaults to True.
    • \n
    • auto_process (bool):\nWhether to auto-process the mass spectra. Defaults to True.
    • \n
    • ms_params (MSParameters or None):\nThe mass spectrum parameters to use for the mass spectra. If None, uses the globally set MSParameters.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • TypeError: If scan_list is not a list of ints
    • \n
    • ValueError: If polarity is not 'positive' or 'negative'\nIf ms_level is not 1 or 2
    • \n
    \n", "signature": "(\tself,\tscan_list,\tspectrum_mode=None,\tms_level=1,\tuse_parser=True,\tauto_process=True,\tms_params=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.get_time_of_scan_id", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.get_time_of_scan_id", "kind": "function", "doc": "

    Returns the scan time for the specified scan number.

    \n\n
    Parameters
    \n\n
      \n
    • scan (int):\nThe scan number of the desired scan time.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The scan time for the specified scan number (in minutes).
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no scan time is found for the specified scan number.
    • \n
    \n", "signature": "(self, scan):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.MassSpectraBase.scan_df", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "MassSpectraBase.scan_df", "kind": "variable", "doc": "

    pandas.DataFrame : A pandas DataFrame containing the scan info data with columns for scan number, scan time, ms level, precursor m/z, scan text, and scan window (lower and upper).

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase", "kind": "class", "doc": "

    A class representing a liquid chromatography-mass spectrometry (LC-MS) data object.

    \n\n

    This class is not intended to be instantiated directly, but rather to be instantiated by an appropriate mass spectra parser using the get_lcms_obj() method.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe location of the file containing the mass spectra data.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used to generate the mass spectra data. Defaults to 'Unknown'.
    • \n
    • instrument_label (str, optional):\nThe type of instrument used to generate the mass spectra data. Defaults to 'Unknown'.
    • \n
    • sample_name (str, optional):\nThe name of the sample; defaults to the file name if not provided to the parser. Defaults to None.
    • \n
    • spectra_parser (object, optional):\nThe spectra parser object used to create the mass spectra object. Defaults to None.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • polarity (str):\nThe polarity of the ionization mode used for the dataset.
    • \n
    • _parameters (LCMSParameters):\nThe parameters used for all methods called on the LCMSBase object. Set upon instantiation from LCMSParameters.
    • \n
    • _retention_time_list (numpy.ndarray):\nAn array of retention times for the dataset.
    • \n
    • _scans_number_list (list):\nA list of scan numbers for the dataset.
    • \n
    • _tic_list (numpy.ndarray):\nAn array of total ion current (TIC) values for the dataset.
    • \n
    • eics (dict):\nA dictionary containing extracted ion chromatograms (EICs) for the dataset.\nKey is the mz of the EIC. Initialized as an empty dictionary.
    • \n
    • mass_features (dictionary of LCMSMassFeature objects):\nA dictionary containing mass features for the dataset.\nKey is mass feature ID. Initialized as an empty dictionary.
    • \n
    • spectral_search_results (dictionary of MS2SearchResults objects):\nA dictionary containing spectral search results for the dataset.\nKey is scan number : precursor mz. Initialized as an empty dictionary.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_parameters_json().\nReturns the parameters used for the LC-MS analysis in JSON format.
    • \n
    • add_associated_ms2_dda(add_to_lcmsobj=True, auto_process=True, use_parser=True)\nAdds which MS2 scans are associated with each mass feature to the\nmass_features dictionary and optionally adds the MS2 spectra to the _ms dictionary.
    • \n
    • add_associated_ms1(add_to_lcmsobj=True, auto_process=True, use_parser=True)\nAdds the MS1 spectra associated with each mass feature to the\nmass_features dictionary and adds the MS1 spectra to the _ms dictionary.
    • \n
    • mass_features_to_df()\nReturns a pandas dataframe summarizing the mass features in the dataset.
    • \n
    • set_tic_list_from_data(overwrite=False)\nSets the TIC list from the mass spectrum objects within the _ms dictionary.
    • \n
    • set_retention_time_from_data(overwrite=False)\nSets the retention time list from the data in the _ms dictionary.
    • \n
    • set_scans_number_from_data(overwrite=False)\nSets the scan number list from the data in the _ms dictionary.
    • \n
    \n", "bases": "MassSpectraBase, corems.mass_spectra.calc.lc_calc.LCCalculations, corems.mass_spectra.calc.lc_calc.PHCalculations, corems.molecular_id.search.lcms_spectral_search.LCMSSpectralSearch"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.__init__", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None,\tspectra_parser=None)"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.polarity", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.polarity", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.eics", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.eics", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.mass_features", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.mass_features", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.spectral_search_results", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.spectral_search_results", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.get_parameters_json", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.get_parameters_json", "kind": "function", "doc": "

    Returns the parameters stored for the LC-MS object in JSON format.

    \n\n
    Returns
    \n\n
      \n
    • str: The parameters used for the LC-MS analysis in JSON format.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.remove_unprocessed_data", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.remove_unprocessed_data", "kind": "function", "doc": "

    Removes the unprocessed data from the LCMSBase object.

    \n\n
    Parameters
    \n\n
      \n
    • ms_level (int, optional):\nThe MS level to remove the unprocessed data for. If None, removes unprocessed data for all MS levels.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If ms_level is not 1 or 2.
    • \n
    \n\n
    Notes
    \n\n

    This method is useful for freeing up memory after the data has been processed.

    \n", "signature": "(self, ms_level=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.add_associated_ms2_dda", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.add_associated_ms2_dda", "kind": "function", "doc": "

    Add MS2 spectra associated with mass features to the dataset.

    \n\n

    Populates the mass_features ms2_scan_numbers attribute (on mass_features dictionary on LCMSObject)

    \n\n
    Parameters
    \n\n
      \n
    • auto_process (bool, optional):\nIf True, auto-processes the MS2 spectra before adding it to the object's _ms dictionary. Default is True.
    • \n
    • use_parser (bool, optional):\nIf True, envoke the spectra parser to get the MS2 spectra. Default is True.
    • \n
    • spectrum_mode (str or None, optional):\nThe spectrum mode to use for the mass spectra. If None, method will use the spectrum mode\nfrom the spectra parser to ascertain the spectrum mode (this allows for mixed types).\nDefaults to None. (faster if defined, otherwise will check each scan)
    • \n
    • ms_params_key (string, optional):\nThe key of the mass spectrum parameters to use for the mass spectra, accessed from the LCMSObject.parameters.mass_spectrum attribute.\nDefaults to 'ms2'.
    • \n
    • scan_filter (str):\nA string to filter the scans to add to the _ms dictionary. If None, all scans are added. Defaults to None.\n\"hcd\" will pull out only HCD scans.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If mass_features is not set, must run find_mass_features() first.\nIf no MS2 scans are found in the dataset.\nIf no precursor m/z values are found in MS2 scans, not a DDA dataset.
    • \n
    \n", "signature": "(\tself,\tauto_process=True,\tuse_parser=True,\tspectrum_mode=None,\tms_params_key='ms2',\tscan_filter=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.add_associated_ms1", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.add_associated_ms1", "kind": "function", "doc": "

    Add MS1 spectra associated with mass features to the dataset.

    \n\n
    Parameters
    \n\n
      \n
    • auto_process (bool, optional):\nIf True, auto-processes the MS1 spectra before adding it to the object's _ms dictionary. Default is True.
    • \n
    • use_parser (bool, optional):\nIf True, envoke the spectra parser to get the MS1 spectra. Default is True.
    • \n
    • spectrum_mode (str or None, optional):\nThe spectrum mode to use for the mass spectra. If None, method will use the spectrum mode\nfrom the spectra parser to ascertain the spectrum mode (this allows for mixed types).\nDefaults to None. (faster if defined, otherwise will check each scan)
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If mass_features is not set, must run find_mass_features() first.\nIf apex scans are not profile mode, all apex scans must be profile mode for averaging.\nIf number of scans to average is not 1 or an integer with an integer median (i.e. 3, 5, 7, 9).\nIf deconvolute is True and no EICs are found, did you run integrate_mass_features() first?
    • \n
    \n", "signature": "(self, auto_process=True, use_parser=True, spectrum_mode=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.mass_features_to_df", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.mass_features_to_df", "kind": "function", "doc": "

    Returns a pandas dataframe summarizing the mass features.

    \n\n

    The dataframe contains the following columns: mf_id, mz, apex_scan, scan_time, intensity,\npersistence, area, monoisotopic_mf_id, and isotopologue_type. The index is set to mf_id (mass feature ID).

    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A pandas dataframe of mass features with the following columns:\nmf_id, mz, apex_scan, scan_time, intensity, persistence, area.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.mass_features_ms1_annot_to_df", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.mass_features_ms1_annot_to_df", "kind": "function", "doc": "

    Returns a pandas dataframe summarizing the MS1 annotations for the mass features in the dataset.

    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A pandas dataframe of MS1 annotations for the mass features in the dataset.\nThe index is set to mf_id (mass feature ID)
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Warning: If no MS1 annotations were found for the mass features in the dataset.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.mass_features_ms2_annot_to_df", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.mass_features_ms2_annot_to_df", "kind": "function", "doc": "

    Returns a pandas dataframe summarizing the MS2 annotations for the mass features in the dataset.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_metadata (dict of MolecularMetadata objects):\nA dictionary of MolecularMetadata objects, keyed by metabref_mol_id. Defaults to None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A pandas dataframe of MS2 annotations for the mass features in the dataset,\nand optionally molecular metadata. The index is set to mf_id (mass feature ID)
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Warning: If no MS2 annotations were found for the mass features in the dataset.
    • \n
    \n", "signature": "(self, molecular_metadata=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.set_tic_list_from_data", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.set_tic_list_from_data", "kind": "function", "doc": "

    Sets the TIC list from the mass spectrum objects within the _ms dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • overwrite (bool, optional):\nIf True, overwrites the TIC list if it is already set. Defaults to False.
    • \n
    \n\n
    Notes
    \n\n

    If the _ms dictionary is incomplete, sets the TIC list to an empty list.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass spectra are found in the dataset.\nIf the TIC list is already set and overwrite is False.
    • \n
    \n", "signature": "(self, overwrite=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.set_retention_time_from_data", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.set_retention_time_from_data", "kind": "function", "doc": "

    Sets the retention time list from the data in the _ms dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • overwrite (bool, optional):\nIf True, overwrites the retention time list if it is already set. Defaults to False.
    • \n
    \n\n
    Notes
    \n\n

    If the _ms dictionary is empty or incomplete, sets the retention time list to an empty list.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass spectra are found in the dataset.\nIf the retention time list is already set and overwrite is False.
    • \n
    \n", "signature": "(self, overwrite=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.set_scans_number_from_data", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.set_scans_number_from_data", "kind": "function", "doc": "

    Sets the scan number list from the data in the _ms dictionary.

    \n\n
    Notes
    \n\n

    If the _ms dictionary is empty or incomplete, sets the scan number list to an empty list.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If no mass spectra are found in the dataset.\nIf the scan number list is already set and overwrite is False.
    • \n
    \n", "signature": "(self, overwrite=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.ms1_scans", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.ms1_scans", "kind": "variable", "doc": "

    list : A list of MS1 scan numbers for the dataset.

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.parameters", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.parameters", "kind": "variable", "doc": "

    LCMSParameters : The parameters used for the LC-MS analysis.

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.scans_number", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.scans_number", "kind": "variable", "doc": "

    list : A list of scan numbers for the dataset.

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.retention_time", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.retention_time", "kind": "variable", "doc": "

    numpy.ndarray : An array of retention times for the dataset.

    \n"}, {"fullname": "corems.mass_spectra.factory.lc_class.LCMSBase.tic", "modulename": "corems.mass_spectra.factory.lc_class", "qualname": "LCMSBase.tic", "kind": "variable", "doc": "

    numpy.ndarray : An array of TIC values for the dataset.

    \n"}, {"fullname": "corems.mass_spectra.input", "modulename": "corems.mass_spectra.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF", "modulename": "corems.mass_spectra.input.andiNetCDF", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF", "kind": "class", "doc": "

    A class for reading AndiNetCDF files and extracting mass spectra data.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe location of the AndiNetCDF file.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used (default is 'Quadruple').
    • \n
    • instrument_label (str, optional):\nThe label of the instrument (default is 'GCMS-Agilent').
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the data (default is True).
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (Path):\nThe path to the AndiNetCDF file.
    • \n
    • net_cdf_obj (Dataset):\nThe NetCDF dataset object.
    • \n
    • ionization_type (str):\nThe ionization type used in the experiment.
    • \n
    • experiment_type (str):\nThe type of experiment.
    • \n
    • list_scans (range):\nThe range of scan numbers in the dataset.
    • \n
    • initial_scan_number (int):\nThe number of the initial scan.
    • \n
    • final_scan_number (int):\nThe number of the final scan.
    • \n
    • analyzer (str):\nThe type of analyzer used.
    • \n
    • instrument_label (str):\nThe label of the instrument.
    • \n
    • gcms (GCMSBase):\nThe GCMSBase object for storing mass spectra data.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • polarity().\nGet the polarity of the ionization.
    • \n
    • get_mass_spectrum(mz, abun, rp, d_params).\nAdd a mass spectrum to the GCMSBase object.
    • \n
    • run().\nPopulate the GCMSBase object with mass spectra data.
    • \n
    • import_mass_spectra(d_params).\nImport mass spectra data from the AndiNetCDF file.
    • \n
    • get_gcms_obj().\nGet the GCMSBase object.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.__init__", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(\tfile_location: str | pathlib.Path,\tanalyzer='Quadruple',\tinstrument_label='GCMS-Agilent',\tauto_process=True)"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.ionization_type", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.ionization_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.experiment_type", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.experiment_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.list_scans", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.list_scans", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.initial_scan_number", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.initial_scan_number", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.final_scan_number", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.final_scan_number", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.analyzer", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.instrument_label", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.gcms", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.gcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.polarity", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.polarity", "kind": "variable", "doc": "

    Get the polarity of the ionization.

    \n"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.get_mass_spectrum", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.get_mass_spectrum", "kind": "function", "doc": "

    Add a mass spectrum to the GCMSBase object.

    \n\n
    Parameters
    \n\n
      \n
    • mz (array-like):\nThe m/z values of the mass spectrum.
    • \n
    • abun (array-like):\nThe abundance values of the mass spectrum.
    • \n
    • rp (array-like):\nThe resolution values of the mass spectrum.
    • \n
    • d_params (dict):\nAdditional parameters for the mass spectrum.
    • \n
    \n", "signature": "(self, mz, abun, rp, d_params):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.run", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.run", "kind": "function", "doc": "

    Populate the GCMSBase object with mass spectra data.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.import_mass_spectra", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.import_mass_spectra", "kind": "function", "doc": "

    Import mass spectra data from the AndiNetCDF file.

    \n\n
    Parameters
    \n\n
      \n
    • d_params (dict):\nAdditional parameters for the mass spectra.
    • \n
    \n", "signature": "(self, d_params):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.andiNetCDF.ReadAndiNetCDF.get_gcms_obj", "modulename": "corems.mass_spectra.input.andiNetCDF", "qualname": "ReadAndiNetCDF.get_gcms_obj", "kind": "function", "doc": "

    Get the GCMSBase object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5", "modulename": "corems.mass_spectra.input.boosterHDF5", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra", "kind": "class", "doc": "

    Class for reading HDF5 files containing booster mass spectra.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (Path or S3Path):\nThe full path to the HDF5 file.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used for the mass spectra. Defaults to \"ICR\".
    • \n
    • instrument_label (str, optional):\nThe label of the instrument. Defaults to \"21T\".
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectra. Defaults to True.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.__init__", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.__init__", "kind": "function", "doc": "

    Initialize the ReadHDF_BoosterMassSpectra class.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (Path or S3Path):\nThe full path to the HDF5 file.
    • \n
    • analyzer (str, optional):\nThe type of analyzer used for the mass spectra. Defaults to \"ICR\".
    • \n
    • instrument_label (str, optional):\nThe label of the instrument. Defaults to \"21T\".
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectra. Defaults to True.
    • \n
    \n", "signature": "(\tfile_location: pathlib.Path | s3path.S3Path,\tanalyzer='ICR',\tinstrument_label='21T',\tauto_process=True)"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.lcms", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.lcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.hdf_obj", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.hdf_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.list_scans", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.list_scans", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.initial_scan_number", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.initial_scan_number", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.final_scan_number", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.final_scan_number", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.file_location", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.auto_process", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.auto_process", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.analyzer", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.instrument_label", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.get_polarity", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.get_polarity", "kind": "function", "doc": "

    Get the polarity of a scan.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (Path or S3Path):\nThe full path to the HDF5 file.
    • \n
    • scan (int):\nThe scan number.
    • \n
    \n", "signature": "(self, file_location: pathlib.Path | s3path.S3Path, scan: int):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.get_attr_data", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.get_attr_data", "kind": "function", "doc": "

    Get the attribute data of a scan.

    \n\n
    Parameters
    \n\n
      \n
    • scan (int):\nThe scan number.
    • \n
    • attr_srt (str):\nThe attribute name.
    • \n
    \n", "signature": "(self, scan, attr_srt):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.import_mass_spectra", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.import_mass_spectra", "kind": "function", "doc": "

    Import the mass spectra from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • d_params (dict):\nThe parameters for importing the mass spectra.
    • \n
    \n", "signature": "(self, d_params: dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.get_mass_spectrum", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.get_mass_spectrum", "kind": "function", "doc": "

    Get the mass spectrum for a scan.

    \n\n
    Parameters
    \n\n
      \n
    • scan (int):\nThe scan number.
    • \n
    • d_params (dict):\nThe parameters for creating the mass spectrum.
    • \n
    \n", "signature": "(self, scan: int, d_params: dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.run", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.run", "kind": "function", "doc": "

    Run the thread to create the LCMS object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.boosterHDF5.ReadHDF_BoosterMassSpectra.get_lcms_obj", "modulename": "corems.mass_spectra.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectra.get_lcms_obj", "kind": "function", "doc": "

    Get the LCMS object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix", "modulename": "corems.mass_spectra.input.brukerSolarix", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra", "kind": "class", "doc": "

    Class for reading Bruker Solarix Transient Mass Spectra.

    \n\n
    Parameters
    \n\n
      \n
    • d_directory_location (str, pathlib.Path, or s3path.S3Path):\nPath object from pathlib containing the file location.
    • \n
    • analyzer (str, optional):\nType of analyzer used in the mass spectrometer. Defaults to \"ICR\".
    • \n
    • instrument_label (str, optional):\nLabel for the instrument. Defaults to \"15T\".
    • \n
    • auto_process (bool, optional):\nFlag indicating whether to automatically process the mass spectra. Defaults to True.
    • \n
    • keep_profile (bool, optional):\nFlag indicating whether to keep the profile data in the mass spectra. Defaults to False.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.__init__", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(\td_directory_location: str | pathlib.Path | s3path.S3Path,\tanalyzer='ICR',\tinstrument_label='15T',\tauto_process=True,\tkeep_profile=False)"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.scan_attr", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.scan_attr", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.lcms", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.lcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.auto_process", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.auto_process", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.keep_profile", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.keep_profile", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.get_scan_attr", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.get_scan_attr", "kind": "function", "doc": "

    Get the scan attributes from the scan.xml file.

    \n\n
    Returns
    \n\n
      \n
    • dict: Dictionary containing the scan number as key and a tuple of retention time and TIC as value.
    • \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.import_mass_spectra", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.import_mass_spectra", "kind": "function", "doc": "

    Import the mass spectra from the scan.xml file.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.get_mass_spectrum", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.get_mass_spectrum", "kind": "function", "doc": "

    Get the mass spectrum for a given scan number.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int):\nScan number.
    • \n
    \n", "signature": "(self, scan_number: int):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.run", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.run", "kind": "function", "doc": "

    Run the import_mass_spectra method.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.brukerSolarix.ReadBruker_SolarixTransientMassSpectra.get_lcms_obj", "modulename": "corems.mass_spectra.input.brukerSolarix", "qualname": "ReadBruker_SolarixTransientMassSpectra.get_lcms_obj", "kind": "function", "doc": "

    Get the LCMSBase object.

    \n\n
    Raises
    \n\n
      \n
    • Exception: If the LCMSBase object is empty.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5", "modulename": "corems.mass_spectra.input.corems_hdf5", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra", "kind": "class", "doc": "

    Class to read CoreMS HDF5 files and populate a LCMS or MassSpectraBase object.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\nThe location of the HDF5 file to read, including the suffix.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (str):\nThe location of the HDF5 file to read.
    • \n
    • h5pydata (h5py.File):\nThe HDF5 file object.
    • \n
    • scans (list):\nA list of the location of individual mass spectra within the HDF5 file.
    • \n
    • scan_number_list (list):\nA list of the scan numbers of the mass spectra within the HDF5 file.
    • \n
    • parameters_location (str):\nThe location of the parameters file (json or toml).
    • \n
    \n\n
    Methods
    \n\n
      \n
    • import_mass_spectra(mass_spectra).\nImports all mass spectra from the HDF5 file onto the LCMS or MassSpectraBase object.
    • \n
    • get_mass_spectrum_from_scan(scan_number).\nReturn mass spectrum data object from scan number.
    • \n
    • load().\nPlaceholder method to meet the requirements of the SpectraParserInterface.
    • \n
    • run(mass_spectra).\nRuns the importer functions to populate a LCMS or MassSpectraBase object.
    • \n
    • import_scan_info(mass_spectra).\nImports the scan info from the HDF5 file to populate the _scan_info attribute\non the LCMS or MassSpectraBase object
    • \n
    • import_ms_unprocessed(mass_spectra).\nImports the unprocessed mass spectra from the HDF5 file to populate the\n_ms_unprocessed attribute on the LCMS or MassSpectraBase object
    • \n
    • import_parameters(mass_spectra).\nImports the parameters from the HDF5 file to populate the parameters\nattribute on the LCMS or MassSpectraBase object
    • \n
    • import_mass_features(mass_spectra).\nImports the mass features from the HDF5 file to populate the mass_features\nattribute on the LCMS or MassSpectraBase object
    • \n
    • import_eics(mass_spectra).\nImports the extracted ion chromatograms from the HDF5 file to populate the\neics attribute on the LCMS or MassSpectraBase object
    • \n
    • import_spectral_search_results(mass_spectra).\nImports the spectral search results from the HDF5 file to populate the\nspectral_search_results attribute on the LCMS or MassSpectraBase object
    • \n
    • get_mass_spectra_obj().\nReturn mass spectra data object, populating the _ms list on the LCMS or\nMassSpectraBase object from the HDF5 file
    • \n
    • get_lcms_obj().\nReturn LCMSBase object, populating the majority of the attributes on the\nLCMS object from the HDF5 file
    • \n
    \n", "bases": "corems.mass_spectra.input.parserbase.SpectraParserInterface, corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum, threading.Thread"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.__init__", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(file_location: str)"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.scans", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.scans", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.scan_number_list", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.scan_number_list", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_mass_spectrum_from_scan", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_mass_spectrum_from_scan", "kind": "function", "doc": "

    Return mass spectrum data object from scan number.

    \n", "signature": "(self, scan_number):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.load", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.load", "kind": "function", "doc": "

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_ms_raw", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_ms_raw", "kind": "function", "doc": "

    \n", "signature": "(self, spectra=None, scan_df=None) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_scan_df", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_scan_df", "kind": "function", "doc": "

    Return scan data as a pandas DataFrame.

    \n", "signature": "(self) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.run", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.run", "kind": "function", "doc": "

    Runs the importer functions to populate a LCMS or MassSpectraBase object.

    \n\n
    Notes
    \n\n

    The following functions are run in order, if the HDF5 file contains the necessary data:

    \n\n
      \n
    1. import_parameters(), which populates the parameters attribute on the LCMS or MassSpectraBase object.
    2. \n
    3. import_mass_spectra(), which populates the _ms list on the LCMS or MassSpectraBase object.
    4. \n
    5. import_scan_info(), which populates the _scan_info on the LCMS or MassSpectraBase object.
    6. \n
    7. import_ms_unprocessed(), which populates the _ms_unprocessed attribute on the LCMS or MassSpectraBase object.
    8. \n
    9. import_mass_features(), which populates the mass_features attribute on the LCMS or MassSpectraBase object.
    10. \n
    11. import_eics(), which populates the eics attribute on the LCMS or MassSpectraBase object.
    12. \n
    13. import_spectral_search_results(), which populates the spectral_search_results attribute on the LCMS or MassSpectraBase object.
    14. \n
    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase or MassSpectraBase):\nThe LCMS or MassSpectraBase object to populate with mass spectra, generally instantiated with only the file_location, analyzer, and instrument_label attributes.
    • \n
    • load_raw (bool):\nIf True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates several attributes on the LCMS or MassSpectraBase object.
    • \n
    \n", "signature": "(self, mass_spectra, load_raw=True) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_mass_spectra", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_mass_spectra", "kind": "function", "doc": "

    Imports all mass spectra from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with mass spectra.
    • \n
    • load_raw (bool):\nIf True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the '_ms' list on the LCMSBase or MassSpectraBase
    • \n
    • object with mass spectra from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra, load_raw=True) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_scan_info", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_scan_info", "kind": "function", "doc": "

    Imports the scan info from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • lcms (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase objects
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'scan_df' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a pandas DataFrame of the 'scan_info' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_ms_unprocessed", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_ms_unprocessed", "kind": "function", "doc": "

    Imports the unprocessed mass spectra from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • lcms (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase objects
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the '_ms_unprocessed' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a dictionary of the 'ms_unprocessed' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_parameters", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_parameters", "kind": "function", "doc": "

    Imports the parameters from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with parameters.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'parameters' attribute on the LCMS or MassSpectraBase
    • \n
    • object with a dictionary of the 'parameters' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_mass_features", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_mass_features", "kind": "function", "doc": "

    Imports the mass features from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with mass features.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'mass_features' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a dictionary of the 'mass_features' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_eics", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_eics", "kind": "function", "doc": "

    Imports the extracted ion chromatograms from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with extracted ion chromatograms.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'eics' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a dictionary of the 'eics' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.import_spectral_search_results", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.import_spectral_search_results", "kind": "function", "doc": "

    Imports the spectral search results from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (LCMSBase | MassSpectraBase):\nThe MassSpectraBase or LCMSBase object to populate with spectral search results.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but populates the 'spectral_search_results' attribute on the LCMSBase or MassSpectraBase
    • \n
    • object with a dictionary of the 'spectral_search_results' from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_mass_spectra_obj", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_mass_spectra_obj", "kind": "function", "doc": "

    Return mass spectra data object, populating the _ms list on MassSpectraBase object from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • load_raw (bool):\nIf True, load raw data (unprocessed) from HDF5 files for overall spectra object and individual mass spectra. Default is True.
    • \n
    \n", "signature": "(\tself,\tload_raw=True) -> corems.mass_spectra.factory.lc_class.MassSpectraBase:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.get_lcms_obj", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.get_lcms_obj", "kind": "function", "doc": "

    Return LCMSBase object, populating attributes on the LCMSBase object from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • load_raw (bool):\nIf True, load raw data (unprocessed) from HDF5 files for overall lcms object and individual mass spectra. Default is True.
    • \n
    • use_original_parser (bool):\nIf True, use the original parser to populate the LCMS object. Default is True.
    • \n
    • raw_file_path (str):\nThe location of the raw file to parse if attempting to use original parser.\nDefault is None, which attempts to get the raw file path from the HDF5 file.\nIf the original file path has moved, this parameter can be used to specify the new location.
    • \n
    \n", "signature": "(\tself,\tload_raw=True,\tuse_original_parser=True,\traw_file_path=None) -> corems.mass_spectra.factory.lc_class.LCMSBase:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.corems_hdf5.ReadCoreMSHDFMassSpectra.add_original_parser", "modulename": "corems.mass_spectra.input.corems_hdf5", "qualname": "ReadCoreMSHDFMassSpectra.add_original_parser", "kind": "function", "doc": "

    Add the original parser to the mass spectra object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectra (MassSpectraBase | LCMSBase):\nThe MassSpectraBase or LCMSBase object to add the original parser to.
    • \n
    • raw_file_path (str):\nThe location of the raw file to parse. Default is None, which attempts to get the raw file path from the HDF5 file.
    • \n
    \n", "signature": "(self, mass_spectra, raw_file_path=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList", "modulename": "corems.mass_spectra.input.massList", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText", "kind": "class", "doc": "

    Class for reading CoreMS mass spectra from a text file.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str, pathlib.Path, or s3path.S3Path):\nPath object from pathlib containing the file location
    • \n
    • analyzer (str, optional):\nName of the analyzer, by default 'Unknown'
    • \n
    • instrument_label (str, optional):\nLabel of the instrument, by default 'Unknown'
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • lcms (LCMSBase):\nLCMSBase object for storing the mass spectra data.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_scans_filename(). Get the filenames of all the scan files associated with the CoreMS file.
    • \n
    • set_filepath_datatype_and_delimiter(file_path_obj). Set the file path, data type, and delimiter based on the file path object.
    • \n
    • import_mass_spectra(). Import the mass spectra from the scan files and add them to the LCMSBase object.
    • \n
    • run(). Run the import_mass_spectra method to create the LCMSBase object.
    • \n
    • get_lcms_obj(). Get the LCMSBase object.
    • \n
    \n", "bases": "corems.mass_spectrum.input.massList.ReadCoremsMasslist, threading.Thread"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.__init__", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(file_location, analyzer='Unknown', instrument_label='Unknown')"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.lcms", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.lcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.get_scans_filename", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.get_scans_filename", "kind": "function", "doc": "

    \n", "signature": "(self) -> list:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.set_filepath_datatype_and_delimiter", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.set_filepath_datatype_and_delimiter", "kind": "function", "doc": "

    \n", "signature": "(self, file_path_obj) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.import_mass_spectra", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.import_mass_spectra", "kind": "function", "doc": "

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.run", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.run", "kind": "function", "doc": "

    Creates the LCMS object and imports mass spectra.

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.massList.ReadCoremsMassSpectraText.get_lcms_obj", "modulename": "corems.mass_spectra.input.massList", "qualname": "ReadCoremsMassSpectraText.get_lcms_obj", "kind": "function", "doc": "

    Returns the LCMSBase object associated with the massList.

    \n\n

    If the LCMSBase object is already initialized, it is returned.\nOtherwise, an exception is raised.

    \n\n

    Raises:\n Exception: If the LCMSBase object is not initialized.

    \n", "signature": "(self) -> corems.mass_spectra.factory.lc_class.LCMSBase:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml", "modulename": "corems.mass_spectra.input.mzml", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser", "kind": "class", "doc": "

    A class for parsing mzml spectrometry data files into MassSpectraBase or LCMSBase objects

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe path to the RAW file to be parsed.
    • \n
    • analyzer (str, optional):\nThe type of mass analyzer used in the instrument. Default is \"Unknown\".
    • \n
    • instrument_label (str, optional):\nThe name of the instrument used to acquire the data. Default is \"Unknown\".
    • \n
    • sample_name (str, optional):\nThe name of the sample being analyzed. If not provided, the stem of the file_location path will be used.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (Path):\nThe path to the RAW file being parsed.
    • \n
    • analyzer (str):\nThe type of mass analyzer used in the instrument.
    • \n
    • instrument_label (str):\nThe name of the instrument used to acquire the data.
    • \n
    • sample_name (str):\nThe name of the sample being analyzed.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • load().\nLoad mzML file using pymzml.run.Reader and return the data as a numpy array.
    • \n
    • run(spectra=True).\nParses the mzml file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe.
    • \n
    • get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True)\nParses the mzml file and returns a MassSpecBase object from a single scan.
    • \n
    • get_mass_spectra_obj().\nParses the mzml file and instantiates a MassSpectraBase object.
    • \n
    • get_lcms_obj().\nParses the mzml file and instantiates an LCMSBase object.
    • \n
    \n\n

    Inherits from ThermoBaseClass and SpectraParserInterface

    \n", "bases": "corems.mass_spectra.input.parserbase.SpectraParserInterface"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.__init__", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None)"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.file_location", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.analyzer", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.instrument_label", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.load", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.load", "kind": "function", "doc": "

    Load mzML file using pymzml.run.Reader and return the data as a numpy array.

    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The mass spectra data as a numpy array.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_scan_df", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_scan_df", "kind": "function", "doc": "

    Return scan data as a pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • data (pymzml.run.Reader):\nThe mass spectra data.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
    • \n
    \n", "signature": "(self, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_ms_raw", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_ms_raw", "kind": "function", "doc": "

    Return a dictionary of mass spectra data as a pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • spectra (str):\nWhich mass spectra data to include in the output.\nOptions: None, \"ms1\", \"ms2\", \"all\".
    • \n
    • scan_df (pandas.DataFrame):\nScan dataframe. Output from get_scan_df().
    • \n
    • data (pymzml.run.Reader):\nThe mass spectra data.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary containing the mass spectra data as pandas DataFrames, with keys corresponding to the MS level.
    • \n
    \n", "signature": "(self, spectra, scan_df, data):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.run", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.run", "kind": "function", "doc": "

    Parse the mzML file and return a dictionary of spectra dataframes and a scan metadata dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • spectra (str, optional):\nWhich mass spectra data to include in the output. Default is \"all\".\nOther options: None, \"ms1\", \"ms2\".
    • \n
    • scan_df (pandas.DataFrame, optional):\nScan dataframe. If not provided, the scan dataframe is created from the mzML file.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing two elements:\n
        \n
      • A dictionary containing the mass spectra data as numpy arrays, with keys corresponding to the MS level.
      • \n
      • A pandas DataFrame containing metadata for each scan, including scan number, MS level, polarity, and scan time.
      • \n
    • \n
    \n", "signature": "(self, spectra='all', scan_df=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_mass_spectrum_from_scan", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_mass_spectrum_from_scan", "kind": "function", "doc": "

    Instatiate a mass spectrum object from the mzML file.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int):\nThe scan number to be parsed.
    • \n
    • spectrum_mode (str):\nThe type of spectrum to instantiate. Must be'profile' or 'centroid'.
    • \n
    • polarity (int):\nThe polarity of the scan. Must be -1 or 1.
    • \n
    • auto_process (bool, optional):\nIf True, process the mass spectrum. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile | MassSpecCentroid: The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    • \n
    \n", "signature": "(self, scan_number, spectrum_mode, auto_process=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_mass_spectra_obj", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_mass_spectra_obj", "kind": "function", "doc": "

    Instatiate a MassSpectraBase object from the mzML file.

    \n\n
    Returns
    \n\n
      \n
    • MassSpectraBase: The MassSpectra object containing the parsed mass spectra.\nThe object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.mzml.MZMLSpectraParser.get_lcms_obj", "modulename": "corems.mass_spectra.input.mzml", "qualname": "MZMLSpectraParser.get_lcms_obj", "kind": "function", "doc": "

    Instatiates a LCMSBase object from the mzML file.

    \n\n
    Parameters
    \n\n
      \n
    • spectra (str, optional):\nWhich mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • LCMSBase: LCMS object containing mass spectra data.\nThe object is instatiated with the mzML file, analyzer, instrument, sample name, scan dataframe,\nand mz dataframe(s), as well as lists of scan numbers, retention times, and TICs.
    • \n
    \n", "signature": "(self, spectra='all'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase", "modulename": "corems.mass_spectra.input.parserbase", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface", "kind": "class", "doc": "

    Interface for parsing mass spectra data into MassSpectraBase objects.

    \n\n
    Methods
    \n\n
      \n
    • load().\nLoad mass spectra data.
    • \n
    • run().\nParse mass spectra data.
    • \n
    • get_mass_spectra_obj().\nReturn MassSpectraBase object with several attributes populated
    • \n
    • get_mass_spectrum_from_scan(scan_number).\nReturn MassSpecBase data object from scan number.
    • \n
    \n\n
    Notes
    \n\n

    This is an abstract class and should not be instantiated directly.

    \n", "bases": "abc.ABC"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.load", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.load", "kind": "function", "doc": "

    Load mass spectra data.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.run", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.run", "kind": "function", "doc": "

    Parse mass spectra data.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.get_scan_df", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.get_scan_df", "kind": "function", "doc": "

    Return scan data as a pandas DataFrame.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.get_ms_raw", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.get_ms_raw", "kind": "function", "doc": "

    Return a dictionary of mass spectra data as a pandas DataFrame.

    \n", "signature": "(self, spectra, scan_df):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.get_mass_spectra_obj", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.get_mass_spectra_obj", "kind": "function", "doc": "

    Return mass spectra data object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.parserbase.SpectraParserInterface.get_mass_spectrum_from_scan", "modulename": "corems.mass_spectra.input.parserbase", "qualname": "SpectraParserInterface.get_mass_spectrum_from_scan", "kind": "function", "doc": "

    Return mass spectrum data object from scan number.

    \n", "signature": "(self, scan_number, spectrum_mode, auto_process=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader", "modulename": "corems.mass_spectra.input.rawFileReader", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass", "kind": "class", "doc": "

    Class for parsing Thermo Raw files and extracting information from them.

    \n\n

    Parameters:

    \n\n

    file_location : str or pathlib.Path or s3path.S3Path\n Thermo Raw file path or S3 path.

    \n\n

    Attributes:

    \n\n

    file_path : str or pathlib.Path or s3path.S3Path\n The file path of the Thermo Raw file.\nparameters : LCMSParameters\n The LCMS parameters for the Thermo Raw file.\nchromatogram_settings : LiquidChromatographSetting\n The chromatogram settings for the Thermo Raw file.\nscans : list or tuple\n The selected scans for the Thermo Raw file.\nstart_scan : int\n The starting scan number for the Thermo Raw file.\nend_scan : int\n The ending scan number for the Thermo Raw file.

    \n\n

    Methods:

    \n\n
      \n
    • set_msordertype(scanFilter, mstype: str = 'ms1') -> scanFilter\nConvert the user-passed MS Type string to a Thermo MSOrderType object.
    • \n
    • get_creation_time() -> datetime.datetime\nExtract the creation date stamp from the .RAW file and return it as a formatted datetime object.
    • \n
    • remove_temp_file()\nRemove the temporary file if the path is from S3Path.
    • \n
    • get_polarity_mode(scan_number: int) -> int\nGet the polarity mode for the given scan number.
    • \n
    • get_filter_for_scan_num(scan_number: int) -> List[str]\nGet the filter for the given scan number.
    • \n
    • check_full_scan(scan_number: int) -> bool\nCheck if the given scan number is a full scan.
    • \n
    • get_all_filters() -> Tuple[Dict[int, str], List[str]]\nGet all scan filters for the Thermo Raw file.
    • \n
    • get_scan_header(scan: int) -> Dict[str, Any]\nGet the full dictionary of scan header metadata for the given scan number.
    • \n
    • get_rt_time_from_trace(trace) -> Tuple[List[float], List[float], List[int]]\nGet the retention time, intensity, and scan number from the given trace.
    • \n
    • get_eics(target_mzs: List[float], tic_data: Dict[str, Any], ms_type: str = 'MS !d',\n peak_detection: bool = True, smooth: bool = True, plot: bool = False,\n ax: Optional[matplotlib.axes.Axes] = None, legend: bool = False) -> Tuple[Dict[float, EIC_Data], matplotlib.axes.Axes]\nGet the extracted ion chromatograms (EICs) for the target m/z values.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.__init__", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.__init__", "kind": "function", "doc": "

    file_location: srt pathlib.Path or s3path.S3Path\nThermo Raw file path

    \n", "signature": "(file_location)"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.iRawDataPlus", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.iRawDataPlus", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.res", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.res", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.file_path", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.file_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.iFileHeader", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.iFileHeader", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.parameters", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.parameters", "kind": "variable", "doc": "

    Get or set the LCMSParameters object.

    \n", "annotation": ": corems.encapsulation.factory.parameters.LCMSParameters"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.chromatogram_settings", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.chromatogram_settings", "kind": "variable", "doc": "

    Get or set the LiquidChromatographSetting object.

    \n", "annotation": ": corems.encapsulation.factory.processingSetting.LiquidChromatographSetting"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.scans", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.scans", "kind": "variable", "doc": "

    scans : list or tuple\nIf list uses Thermo AverageScansInScanRange for selected scans, ortherwise uses Thermo AverageScans for a scan range

    \n", "annotation": ": list | tuple"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.start_scan", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.start_scan", "kind": "variable", "doc": "

    Get the starting scan number for the Thermo Raw file.

    \n", "annotation": ": int"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.end_scan", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.end_scan", "kind": "variable", "doc": "

    Get the ending scan number for the Thermo Raw file.

    \n", "annotation": ": int"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.set_msordertype", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.set_msordertype", "kind": "function", "doc": "

    Function to convert user passed string MS Type to Thermo MSOrderType object\nLimited to MS1 through MS10.

    \n\n

    Parameters:

    \n\n

    scanFilter : Thermo.ScanFilter\n The scan filter object.\nmstype : str, optional\n The MS Type string, by default 'ms1'

    \n", "signature": "(self, scanFilter, mstype: str = 'ms1'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_creation_time", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_creation_time", "kind": "function", "doc": "

    Extract the creation date stamp from the .RAW file\nReturn formatted creation date stamp.

    \n", "signature": "(self) -> datetime.datetime:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.remove_temp_file", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.remove_temp_file", "kind": "function", "doc": "

    if the path is from S3Path data cannot be serialized to io.ByteStream and\na temporary copy is stored at the temp dir\nuse this function only at the end of your execution scrip\nsome LCMS class methods depend on this file

    \n", "signature": "(self) -> None:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_polarity_mode", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_polarity_mode", "kind": "function", "doc": "

    Get the polarity mode for the given scan number.

    \n\n

    Parameters:

    \n\n

    scan_number : int\n The scan number.

    \n\n

    Raises:

    \n\n

    Exception\n If the polarity mode is unknown.

    \n", "signature": "(self, scan_number: int) -> int:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_filter_for_scan_num", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_filter_for_scan_num", "kind": "function", "doc": "

    Returns the closest matching run time that corresponds to scan_number for the current\ncontroller. This function is only supported for MS device controllers.\ne.g. ['FTMS', '-', 'p', 'NSI', 'Full', 'ms', '[200.00-1000.00]']

    \n\n

    Parameters:

    \n\n

    scan_number : int\n The scan number.

    \n", "signature": "(self, scan_number: int) -> System.Collections.Generic.List[String]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.check_full_scan", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.check_full_scan", "kind": "function", "doc": "

    \n", "signature": "(self, scan_number: int) -> bool:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_all_filters", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_all_filters", "kind": "function", "doc": "

    Get all scan filters.\nThis function is only supported for MS device controllers.\ne.g. ['FTMS', '-', 'p', 'NSI', 'Full', 'ms', '[200.00-1000.00]']

    \n", "signature": "(self) -> Tuple[Dict[int, str], System.Collections.Generic.List[String]]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_scan_header", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_scan_header", "kind": "function", "doc": "

    Get full dictionary of scan header meta data, i.e. AGC status, ion injection time, etc.

    \n\n

    Parameters:

    \n\n

    scan : int\n The scan number.

    \n", "signature": "(self, scan: int) -> Dict[str, Any]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_rt_time_from_trace", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_rt_time_from_trace", "kind": "function", "doc": "

    trace: ThermoFisher.CommonCore.Data.Business.ChromatogramSignal

    \n", "signature": "(\ttrace) -> Tuple[System.Collections.Generic.List[Double], System.Collections.Generic.List[Double], System.Collections.Generic.List[Int32]]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_eics", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_eics", "kind": "function", "doc": "

    ms_type: str ('MS', MS2')\nstart_scan: int default -1 will select the lowest available\nend_scan: int default -1 will select the highest available

    \n\n

    returns:

    \n\n
    chroma: dict{target_mz: EIC_Data(\n                            Scans: [int]\n                                original thermo scan numbers\n                            Time: [floats]\n                                list of retention times\n                            TIC: [floats]\n                                total ion chromatogram\n                            Apexes: [int]\n                                original thermo apex scan number after peak picking\n                            )\n
    \n", "signature": "(\tself,\ttarget_mzs: System.Collections.Generic.List[Double],\ttic_data: Dict[str, Any],\tms_type='MS !d',\tpeak_detection=True,\tsmooth=True,\tplot=False,\tax: Optional[matplotlib.axes._axes.Axes] = None,\tlegend=False) -> Tuple[Dict[float, corems.mass_spectra.factory.chromat_data.EIC_Data], matplotlib.axes._axes.Axes]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_tic", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_tic", "kind": "function", "doc": "

    ms_type: str ('MS !d', 'MS2', None)\n if you use None you get all scans.\npeak_detection: bool\nsmooth: bool\nplot: bool\nax: matplotlib axis object\ntrace_type: str ('TIC','BPC')

    \n\n

    returns:\n chroma: dict\n {\n Scan: [int]\n original thermo scan numberMS\n Time: [floats]\n list of retention times\n TIC: [floats]\n total ion chromatogram\n Apexes: [int]\n original thermo apex scan number after peak picking\n }

    \n", "signature": "(\tself,\tms_type='MS !d',\tpeak_detection=True,\tsmooth=True,\tplot=False,\tax=None,\ttrace_type='TIC') -> Tuple[corems.mass_spectra.factory.chromat_data.TIC_Data, matplotlib.axes._axes.Axes]:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_average_mass_spectrum", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_average_mass_spectrum", "kind": "function", "doc": "

    Averages mass spectra over a scan range using Thermo's AverageScansInScanRange method\nor a scan list using Thermo's AverageScans method\nspectrum_mode: str\n centroid or profile mass spectrum\nauto_process: bool\n If true performs peak picking, and noise threshold calculation after creation of mass spectrum object\nms_type: str\n String of form 'ms1' or 'ms2' or 'MS3' etc. Valid up to MS10.\n Internal function converts to Thermo MSOrderType class.

    \n", "signature": "(\tself,\tspectrum_mode: str = 'profile',\tauto_process: bool = True,\tppm_tolerance: float = 5.0,\tms_type: str = 'MS1') -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile | corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.set_metadata", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.set_metadata", "kind": "function", "doc": "

    Collect metadata to be ingested in the mass spectrum object

    \n\n

    scans_list: list[int] or false\nlastScanNumber: int\nfirstScanNumber: int

    \n", "signature": "(\tself,\tfirstScanNumber=0,\tlastScanNumber=0,\tscans_list=False,\tlabel='Thermo_Profile'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_centroid_msms_data", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_centroid_msms_data", "kind": "function", "doc": "

    Deprecated since version 2.0:\nThis function will be removed in CoreMS 2.0. Please use get_average_mass_spectrum() instead for similar functionality.

    \n", "signature": "(self, scan):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ThermoBaseClass.get_average_mass_spectrum_by_scanlist", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ThermoBaseClass.get_average_mass_spectrum_by_scanlist", "kind": "function", "doc": "

    Averages selected scans mass spectra using Thermo's AverageScans method\nscans_list: list[int]\nauto_process: bool\n If true performs peak picking, and noise threshold calculation after creation of mass spectrum object\nReturns:\n MassSpecProfile

    \n\n

    Deprecated since version 2.0.

    \n\n

    This function will be removed in CoreMS 2.0. Please use get_average_mass_spectrum() instead for similar functionality.

    \n", "signature": "(\tself,\tscans_list: System.Collections.Generic.List[Int32],\tauto_process: bool = True,\tppm_tolerance: float = 5.0) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile:", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader", "kind": "class", "doc": "

    A class for parsing Thermo RAW mass spectrometry data files and instatiating MassSpectraBase or LCMSBase objects

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or Path):\nThe path to the RAW file to be parsed.
    • \n
    • analyzer (str, optional):\nThe type of mass analyzer used in the instrument. Default is \"Unknown\".
    • \n
    • instrument_label (str, optional):\nThe name of the instrument used to acquire the data. Default is \"Unknown\".
    • \n
    • sample_name (str, optional):\nThe name of the sample being analyzed. If not provided, the stem of the file_location path will be used.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_location (Path):\nThe path to the RAW file being parsed.
    • \n
    • analyzer (str):\nThe type of mass analyzer used in the instrument.
    • \n
    • instrument_label (str):\nThe name of the instrument used to acquire the data.
    • \n
    • sample_name (str):\nThe name of the sample being analyzed.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run(spectra=True).\nParses the RAW file and returns a dictionary of mass spectra dataframes and a scan metadata dataframe.
    • \n
    • get_mass_spectrum_from_scan(scan_number, polarity, auto_process=True)\nParses the RAW file and returns a MassSpecBase object from a single scan.
    • \n
    • get_mass_spectra_obj().\nParses the RAW file and instantiates a MassSpectraBase object.
    • \n
    • get_lcms_obj().\nParses the RAW file and instantiates an LCMSBase object.
    • \n
    • get_icr_transient_times().\nReturn a list for transient time targets for all scans, or selected scans range
    • \n
    \n\n

    Inherits from ThermoBaseClass and SpectraParserInterface

    \n", "bases": "ThermoBaseClass, corems.mass_spectra.input.parserbase.SpectraParserInterface"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.__init__", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.__init__", "kind": "function", "doc": "

    file_location: srt pathlib.Path or s3path.S3Path\nThermo Raw file path

    \n", "signature": "(\tfile_location,\tanalyzer='Unknown',\tinstrument_label='Unknown',\tsample_name=None)"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.file_location", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.analyzer", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.instrument_label", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.load", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.load", "kind": "function", "doc": "

    Load mass spectra data.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_scan_df", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_scan_df", "kind": "function", "doc": "

    Return scan data as a pandas DataFrame.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_ms_raw", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_ms_raw", "kind": "function", "doc": "

    Return a dictionary of mass spectra data as a pandas DataFrame.

    \n", "signature": "(self, spectra, scan_df):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.run", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.run", "kind": "function", "doc": "

    Extracts mass spectra data from a raw file.

    \n\n
    Parameters
    \n\n
      \n
    • spectra (str, optional):\nWhich mass spectra data to include in the output. Default is all. Other options: none, ms1, ms2.
    • \n
    • scan_df (pandas.DataFrame, optional):\nScan dataframe. If not provided, the scan dataframe is created from the mzML file.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing two elements:\n
        \n
      • A dictionary containing mass spectra data, separated by MS level.
      • \n
      • A pandas DataFrame containing scan information, including scan number, scan time, TIC, MS level,\nscan text, scan window lower and upper bounds, polarity, and precursor m/z (if applicable).
      • \n
    • \n
    \n", "signature": "(self, spectra='all', scan_df=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_mass_spectrum_from_scan", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_mass_spectrum_from_scan", "kind": "function", "doc": "

    Instatiate a MassSpecBase object from a single scan number from the binary file, currently only supports profile mode.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int):\nThe scan number to extract the mass spectrum from.
    • \n
    • polarity (int):\nThe polarity of the scan. 1 for positive mode, -1 for negative mode.
    • \n
    • spectrum_mode (str):\nThe type of mass spectrum to extract. Must be 'profile' or 'centroid'.
    • \n
    • auto_process (bool, optional):\nIf True, perform peak picking and noise threshold calculation after creating the mass spectrum object. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile | MassSpecCentroid: The MassSpecProfile or MassSpecCentroid object containing the parsed mass spectrum.
    • \n
    \n", "signature": "(self, scan_number, spectrum_mode, auto_process=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_mass_spectra_obj", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_mass_spectra_obj", "kind": "function", "doc": "

    Instatiate a MassSpectraBase object from the binary data file file.

    \n\n
    Returns
    \n\n
      \n
    • MassSpectraBase: The MassSpectra object containing the parsed mass spectra. The object is instatiated with the mzML file, analyzer, instrument, sample name, and scan dataframe.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_lcms_obj", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_lcms_obj", "kind": "function", "doc": "

    Instatiates a LCMSBase object from the mzML file.

    \n\n
    Parameters
    \n\n
      \n
    • verbose (bool, optional):\nIf True, print progress messages. Default is True.
    • \n
    • spectra (str, optional):\nWhich mass spectra data to include in the output. Default is \"all\". Other options: \"none\", \"ms1\", \"ms2\".
    • \n
    \n\n
    Returns
    \n\n
      \n
    • LCMSBase: LCMS object containing mass spectra data. The object is instatiated with the file location, analyzer, instrument, sample name, scan info, mz dataframe (as specifified), polarity, as well as the attributes holding the scans, retention times, and tics.
    • \n
    \n", "signature": "(self, spectra='all'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.input.rawFileReader.ImportMassSpectraThermoMSFileReader.get_icr_transient_times", "modulename": "corems.mass_spectra.input.rawFileReader", "qualname": "ImportMassSpectraThermoMSFileReader.get_icr_transient_times", "kind": "function", "doc": "

    Return a list for transient time targets for all scans, or selected scans range

    \n\n
    Notes
    \n\n

    Resolving Power and Transient time targets based on 7T FT-ICR MS system

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output", "modulename": "corems.mass_spectra.output", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export", "modulename": "corems.mass_spectra.output.export", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.ion_type_dict", "modulename": "corems.mass_spectra.output.export", "qualname": "ion_type_dict", "kind": "variable", "doc": "

    \n", "default_value": "{'M+': [{}, {}], 'protonated': [{'H': 1}, {}], '[M+H]+': [{'H': 1}, {}], '[M+NH4]+': [{'N': 1, 'H': 4}, {}], '[M+Na]+': [{'Na': 1}, {}], '[M+K]+': [{'K': 1}, {}], '[M+2Na+Cl]+': [{'Na': 2, 'Cl': 1}, {}], '[M+2Na-H]+': [{'Na': 2}, {'H': 1}], '[M+C2H3Na2O2]+': [{'C': 2, 'H': 3, 'Na': 2, 'O': 2}, {}], '[M+C4H10N3]+': [{'C': 4, 'H': 10, 'N': 3}, {}], '[M+NH4+ACN]+': [{'C': 2, 'H': 7, 'N': 2}, {}], '[M+H-H2O]+': [{}, {'H': 1, 'O': 1}], 'de-protonated': [{}, {'H': 1}], '[M-H]-': [{}, {'H': 1}], '[M+Cl]-': [{'Cl': 1}, {}], '[M+HCOO]-': [{'C': 1, 'H': 1, 'O': 2}, {}], '[M+CH3COO]-': [{'C': 2, 'H': 3, 'O': 2}, {}], '[M+2NaAc+Cl]-': [{'Na': 2, 'C': 2, 'H': 3, 'O': 2, 'Cl': 1}, {}], '[M+K-2H]-': [{'K': 1}, {'H': 2}], '[M+Na-2H]-': [{'Na': 1}, {'H': 2}]}"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport", "kind": "class", "doc": "

    A class to export low resolution GC-MS data.

    \n\n

    This class provides methods to export low resolution GC-MS data to various formats such as Excel, CSV, HDF5, and Pandas DataFrame.

    \n\n

    Parameters:

    \n\n

    out_file_path : str\n The output file path.\ngcms : object\n The low resolution GCMS object.

    \n\n

    Attributes:

    \n\n

    output_file : Path\n The output file path as a Path object.\ngcms : object\n The low resolution GCMS object.

    \n\n

    Methods:

    \n\n
      \n
    • get_pandas_df(id_label=\"corems:\"). Get the exported data as a Pandas DataFrame.
    • \n
    • get_json(nan=False, id_label=\"corems:\"). Get the exported data as a JSON string.
    • \n
    • to_pandas(write_metadata=True, id_label=\"corems:\"). Export the data to a Pandas DataFrame and save it as a pickle file.
    • \n
    • to_excel(write_mode='a', write_metadata=True, id_label=\"corems:\"),\nExport the data to an Excel file.
    • \n
    • to_csv(separate_output=False, write_mode=\"w\", write_metadata=True, id_label=\"corems:\").\nExport the data to a CSV file.
    • \n
    • to_hdf(id_label=\"corems:\").\nExport the data to an HDF5 file.
    • \n
    • get_data_stats(gcms).\nGet statistics about the GCMS data.
    • \n
    \n"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.__init__", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.__init__", "kind": "function", "doc": "

    \n", "signature": "(out_file_path, gcms)"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.output_file", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.output_file", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.gcms", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.gcms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_pandas_df", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_pandas_df", "kind": "function", "doc": "

    Get the exported data as a Pandas DataFrame.

    \n\n

    Parameters:

    \n\n

    id_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n\n

    Returns:

    \n\n

    DataFrame\n The exported data as a Pandas DataFrame.

    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_json", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_json", "kind": "function", "doc": "

    Get the exported data as a JSON string.

    \n\n

    Parameters:

    \n\n

    nan : bool, optional\n Whether to include NaN values in the JSON string. Default is False.\nid_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n", "signature": "(self, nan=False, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.to_pandas", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.to_pandas", "kind": "function", "doc": "

    Export the data to a Pandas DataFrame and save it as a pickle file.

    \n\n

    Parameters:

    \n\n

    write_metadata : bool, optional\n Whether to write metadata to the output file.\nid_label : str, optional\n The ID label for the data.

    \n", "signature": "(self, write_metadata=True, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.to_excel", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.to_excel", "kind": "function", "doc": "

    Export the data to an Excel file.

    \n\n

    Parameters:

    \n\n

    write_mode : str, optional\n The write mode for the Excel file. Default is 'a' (append).\nwrite_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.\nid_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n", "signature": "(self, write_mode='a', write_metadata=True, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.to_csv", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.to_csv", "kind": "function", "doc": "

    Export the data to a CSV file.

    \n\n

    Parameters:

    \n\n

    separate_output : bool, optional\n Whether to separate the output into multiple files. Default is False.\nwrite_mode : str, optional\n The write mode for the CSV file. Default is 'w' (write).\nwrite_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.\nid_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n", "signature": "(\tself,\tseparate_output=False,\twrite_mode='w',\twrite_metadata=True,\tid_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.to_hdf", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.to_hdf", "kind": "function", "doc": "

    Export the data to an HDF5 file.

    \n\n

    Parameters:

    \n\n

    id_label : str, optional\n The ID label for the data. Default is \"corems:\".

    \n", "signature": "(self, id_label='corems:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_data_stats", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_data_stats", "kind": "function", "doc": "

    Get statistics about the GCMS data.

    \n\n

    Parameters:

    \n\n

    gcms : object\n The low resolution GCMS object.

    \n\n

    Returns:

    \n\n

    dict\n A dictionary containing the data statistics.

    \n", "signature": "(self, gcms):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_calibration_stats", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_calibration_stats", "kind": "function", "doc": "

    Get statistics about the GC-MS calibration.

    \n\n

    Parameters:

    \n", "signature": "(self, gcms, id_label):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_blank_stats", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_blank_stats", "kind": "function", "doc": "

    Get statistics about the GC-MS blank.

    \n", "signature": "(self, gcms):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_instrument_metadata", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_instrument_metadata", "kind": "function", "doc": "

    Get metadata about the GC-MS instrument.

    \n", "signature": "(self, gcms):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_data_metadata", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_data_metadata", "kind": "function", "doc": "

    Get metadata about the GC-MS data.

    \n\n

    Parameters:

    \n\n

    gcms : object\n The low resolution GCMS object.\nid_label : str\n The ID label for the data.\noutput_path : str\n The output file path.

    \n\n

    Returns:

    \n\n

    dict\n A dictionary containing the data metadata.

    \n", "signature": "(self, gcms, id_label, output_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_parameters_json", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_parameters_json", "kind": "function", "doc": "

    Get the parameters as a JSON string.

    \n\n

    Parameters:

    \n\n

    gcms : GCMS object\n The low resolution GCMS object.\nid_label : str\n The ID label for the data.\noutput_path : str\n The output file path.

    \n\n

    Returns:

    \n\n

    str\n The parameters as a JSON string.

    \n", "signature": "(self, gcms, id_label, output_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.write_settings", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.write_settings", "kind": "function", "doc": "

    Write the settings to a JSON file.

    \n\n

    Parameters:

    \n\n

    output_path : str\n The output file path.\ngcms : GCMS object\n The low resolution GCMS object.\nid_label : str\n The ID label for the data. Default is \"emsl:\".

    \n", "signature": "(self, output_path, gcms, id_label='emsl:'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LowResGCMSExport.get_list_dict_data", "modulename": "corems.mass_spectra.output.export", "qualname": "LowResGCMSExport.get_list_dict_data", "kind": "function", "doc": "

    Get the exported data as a list of dictionaries.

    \n\n

    Parameters:

    \n\n

    gcms : object\n The low resolution GCMS object.\ninclude_no_match : bool, optional\n Whether to include no match data. Default is True.\nno_match_inline : bool, optional\n Whether to include no match data inline. Default is False.

    \n\n

    Returns:

    \n\n

    list\n The exported data as a list of dictionaries.

    \n", "signature": "(self, gcms, include_no_match=True, no_match_inline=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport", "kind": "class", "doc": "

    A class to export high resolution mass spectra data.

    \n\n

    This class provides methods to export high resolution mass spectra data to various formats\nsuch as Excel, CSV, HDF5, and Pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str | Path):\nThe output file path.
    • \n
    • mass_spectra (object):\nThe high resolution mass spectra object.
    • \n
    • output_type (str, optional):\nThe output type. Default is 'excel'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • output_file (Path):\nThe output file path without suffix
    • \n
    • dir_loc (Path):\nThe directory location for the output file,\nby default this will be the output_file + \".corems\" and all output files will be\nwritten into this location
    • \n
    • mass_spectra (MassSpectraBase):\nThe high resolution mass spectra object.
    • \n
    \n", "bases": "corems.mass_spectrum.output.export.HighResMassSpecExport"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.__init__", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(out_file_path, mass_spectra, output_type='excel')"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.dir_loc", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.dir_loc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.output_file", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.output_file", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.mass_spectra", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.mass_spectra", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.atoms_order_list", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.atoms_order_list", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.get_pandas_df", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.get_pandas_df", "kind": "function", "doc": "

    Get the mass spectra as a list of Pandas DataFrames.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.to_pandas", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.to_pandas", "kind": "function", "doc": "

    Export the data to a Pandas DataFrame and save it as a pickle file.

    \n\n

    Parameters:

    \n\n

    write_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.

    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.to_excel", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.to_excel", "kind": "function", "doc": "

    Export the data to an Excel file.

    \n\n

    Parameters:

    \n\n

    write_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.

    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.to_csv", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.to_csv", "kind": "function", "doc": "

    Export the data to a CSV file.

    \n\n

    Parameters:

    \n\n

    write_metadata : bool, optional\n Whether to write metadata to the output file. Default is True.

    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.get_mass_spectra_attrs", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.get_mass_spectra_attrs", "kind": "function", "doc": "

    Get the mass spectra attributes as a JSON string.

    \n\n

    Parameters:

    \n\n

    mass_spectra : object\n The high resolution mass spectra object.

    \n\n

    Returns:

    \n\n

    str\n The mass spectra attributes as a JSON string.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.HighResMassSpectraExport.to_hdf", "modulename": "corems.mass_spectra.output.export", "qualname": "HighResMassSpectraExport.to_hdf", "kind": "function", "doc": "

    Export the data to an HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • overwrite (bool, optional):\nWhether to overwrite the output file. Default is False.
    • \n
    • export_raw (bool, optional):\nWhether to export the raw mass spectra data. Default is True.
    • \n
    \n", "signature": "(self, overwrite=False, export_raw=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LCMSExport", "modulename": "corems.mass_spectra.output.export", "qualname": "LCMSExport", "kind": "class", "doc": "

    A class to export high resolution LC-MS data.

    \n\n

    This class provides methods to export high resolution LC-MS data to HDF5.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str | Path):\nThe output file path, do not include the file extension.
    • \n
    • lcms_object (LCMSBase):\nThe high resolution lc-ms object.
    • \n
    \n", "bases": "HighResMassSpectraExport"}, {"fullname": "corems.mass_spectra.output.export.LCMSExport.__init__", "modulename": "corems.mass_spectra.output.export", "qualname": "LCMSExport.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(out_file_path, mass_spectra)"}, {"fullname": "corems.mass_spectra.output.export.LCMSExport.to_hdf", "modulename": "corems.mass_spectra.output.export", "qualname": "LCMSExport.to_hdf", "kind": "function", "doc": "

    Export the data to an HDF5.

    \n\n
    Parameters
    \n\n
      \n
    • overwrite (bool, optional):\nWhether to overwrite the output file. Default is False.
    • \n
    • save_parameters (bool, optional):\nWhether to save the parameters as a separate json or toml file. Default is True.
    • \n
    • parameter_format (str, optional):\nThe format to save the parameters in. Default is 'toml'.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If parameter_format is not 'json' or 'toml'.
    • \n
    \n", "signature": "(self, overwrite=False, save_parameters=True, parameter_format='toml'):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport", "kind": "class", "doc": "

    A class to export lipidomics data.

    \n\n

    This class provides methods to export lipidomics data to various formats and summarize the lipid report.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str | Path):\nThe output file path, do not include the file extension.
    • \n
    • mass_spectra (object):\nThe high resolution mass spectra object.
    • \n
    \n", "bases": "LCMSExport"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.__init__", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(out_file_path, mass_spectra)"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.ion_type_dict", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.ion_type_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.get_ion_formula", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.get_ion_formula", "kind": "function", "doc": "

    From a neutral formula and an ion type, return the formula of the ion.

    \n\n
    Notes
    \n\n

    This is a static method.\nIf the neutral_formula is not a string, this method will return None.

    \n\n
    Parameters
    \n\n
      \n
    • neutral_formula (str):\nThe neutral formula, this should be a string form from the MolecularFormula class\n(e.g. 'C2 H4 O2', isotopes OK), or simple string (e.g. 'C2H4O2', no isotope handling in this case).\nIn the case of a simple string, the atoms are parsed based on the presence of capital letters,\ne.g. MgCl2 is parsed as 'Mg Cl2.
    • \n
    • ion_type (str):\nThe ion type, e.g. 'protonated', '[M+H]+', '[M+Na]+', etc.\nSee the self.ion_type_dict for the available ion types.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: The formula of the ion as a string (like 'C2 H4 O2'); or None if the neutral_formula is not a string.
    • \n
    \n", "signature": "(neutral_formula, ion_type):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.get_isotope_type", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.get_isotope_type", "kind": "function", "doc": "

    From an ion formula, return the 13C isotope type of the ion.

    \n\n
    Notes
    \n\n

    This is a static method.\nIf the ion_formula is not a string, this method will return None.\nThis is currently only functional for 13C isotopes.

    \n\n
    Parameters
    \n\n
      \n
    • ion_formula (str):\nThe formula of the ion, expected to be a string like 'C2 H4 O2'.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: The isotope type of the ion, e.g. '13C1', '13C2', etc; or None if the ion_formula does not contain a 13C isotope.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the ion_formula is not a string.
    • \n
    \n", "signature": "(ion_formula):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.clean_ms1_report", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.clean_ms1_report", "kind": "function", "doc": "

    Clean the MS1 report.

    \n\n
    Parameters
    \n\n
      \n
    • ms1_summary_full (DataFrame):\nThe full MS1 summary DataFrame.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The cleaned MS1 summary DataFrame.
    • \n
    \n", "signature": "(self, ms1_summary_full):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.summarize_lipid_report", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.summarize_lipid_report", "kind": "function", "doc": "

    Summarize the lipid report.

    \n\n
    Parameters
    \n\n
      \n
    • ms2_annot (DataFrame):\nThe MS2 annotation DataFrame with all annotations.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The summarized lipid report.
    • \n
    \n", "signature": "(self, ms2_annot):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.clean_ms2_report", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.clean_ms2_report", "kind": "function", "doc": "

    Clean the MS2 report.

    \n\n
    Parameters
    \n\n
      \n
    • lipid_summary (DataFrame):\nThe full lipid summary DataFrame.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The cleaned lipid summary DataFrame.
    • \n
    \n", "signature": "(self, lipid_summary):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.to_report", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.to_report", "kind": "function", "doc": "

    Create a report of the mass features and their annotations.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_metadata (dict, optional):\nThe molecular metadata. Default is None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The report of the mass features and their annotations.
    • \n
    \n\n
    Notes
    \n\n

    The report will contain the mass features and their annotations from MS1 and MS2 (if available).

    \n", "signature": "(self, molecular_metadata=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectra.output.export.LipidomicsExport.report_to_csv", "modulename": "corems.mass_spectra.output.export", "qualname": "LipidomicsExport.report_to_csv", "kind": "function", "doc": "

    Create a report of the mass features and their annotations and save it as a CSV file.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_metadata (dict, optional):\nThe molecular metadata. Default is None.
    • \n
    \n", "signature": "(self, molecular_metadata=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum", "modulename": "corems.mass_spectrum", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc", "modulename": "corems.mass_spectrum.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "kind": "module", "doc": "

    Created on March 23 2023

    \n\n

    @author: Will Kew

    \n\n

    Modules for automatic mass internal recalibration

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration", "kind": "class", "doc": "

    This class is designed for high resolution (FTICR, Orbitrap) data of complex mixture, e.g. Organic matter

    \n\n

    The tool first does a broad mass range search for the most commonly expected ion type (i.e. CHO, deprotonated - for negative ESI)\nAnd then the assigned data mass error distribution is searched, with a gaussian fit to the most prominent range.\nThis tool works when the data are of sufficient quality, and not outwith the typical expected range of the mass analyzer\nIt presumes the mean error is out by 0-several ppm, but that the spread of error values is modest (<2ppm)

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nCoreMS mass spectrum object
    • \n
    • plot (bool, optional):\nWhether to plot the error distribution. The default is False.
    • \n
    • docker (bool, optional):\nWhether to use the docker database. The default is True. If not, it uses a dynamically generated sqlite database.
    • \n
    • ppmFWHMprior (float, optional):\nThe FWHM of the prior distribution (ppm). The default is 3.
    • \n
    • ppmRangeprior (float, optional):\nThe range of the prior distribution (ppm). The default is 15.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • determine_error_boundaries(). Determine the error boundaries for recalibration space.
    • \n
    \n\n
    Notes
    \n\n

    This initialisation function creates a copy of the MassSpectrum object to avoid over-writing assignments.\nPossible future task is to make the base class copyable.

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.__init__", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmass_spectrum,\tplot: bool = False,\tdocker: bool = True,\tppmFWHMprior: float = 3,\tppmRangeprior: float = 15)"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.mass_spectrum", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.plot", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.plot", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.docker", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.docker", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.ppmFWHMprior", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.ppmFWHMprior", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.ppmRangeprior", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.ppmRangeprior", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.set_uncal_settings", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.set_uncal_settings", "kind": "function", "doc": "

    Set uncalibrated formula search settings

    \n\n

    This function serves the uncalibrated data (hence broad error tolerance)\nIt only allows CHO formula in deprotonated ion type- as most common for SRFA ESI negative mode

    \n\n

    This will not work for positive mode data, or for other ion types, or other expected elemental searches.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.positive_search_settings", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.positive_search_settings", "kind": "function", "doc": "

    Set the positive mode elemental search settings

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.get_error_range", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.get_error_range", "kind": "function", "doc": "

    Get the error range from the error distribution

    \n\n

    Using lmfit and seaborn kdeplot to extract the error range from the error distribution of assigned species.

    \n\n
    Parameters
    \n\n
      \n
    • errors (list):\nlist of the errors of the assigned species (ppm)
    • \n
    • ppmFWHMprior (float, optional):\nThe FWHM of the prior distribution (ppm). The default is 3.
    • \n
    • plot_logic (bool, optional):\nWhether to plot the error distribution. The default is False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mean_error (float):\nmean mass error of the Gaussian distribution (ppm)
    • \n
    • fwhm_error (float):\nfull width half max of the gaussian error distribution (ppm)
    • \n
    • ppm_thresh (list):\nrecommended thresholds for the recalibration parameters (ppm)\nConsists of [mean_error-fwhm_error,mean_error+fwhm_error]
    • \n
    \n", "signature": "(errors: list, ppmFWHMprior: float = 3, plot_logic: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.AutoRecalibration.HighResRecalibration.determine_error_boundaries", "modulename": "corems.mass_spectrum.calc.AutoRecalibration", "qualname": "HighResRecalibration.determine_error_boundaries", "kind": "function", "doc": "

    Determine the error boundaries for recalibration space

    \n\n

    This is the main function in this class\nSets the Molecular Formulas search settings, performs the initial formula search\nConverts the data to a dataframe, and gets the error range\nReturns the error thresholds.

    \n\n
    Returns
    \n\n
      \n
    • mean_error (float):\nmean mass error of the Gaussian distribution (ppm)
    • \n
    • fwhm_error (float):\nfull width half max of the gaussian error distribution (ppm)
    • \n
    • ppm_thresh (list):\nrecommended thresholds for the recalibration parameters (ppm)\nConsists of [mean_error-fwhm_error,mean_error+fwhm_error]
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration", "modulename": "corems.mass_spectrum.calc.Calibration", "kind": "module", "doc": "

    Created on Wed May 13 02:16:09 2020

    \n\n

    @author: Will Kew

    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration", "kind": "class", "doc": "

    MzDomainCalibration class for recalibrating mass spectra

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (CoreMS MassSpectrum Object):\nThe mass spectrum to be calibrated.
    • \n
    • ref_masslist (str):\nThe path to a reference mass list.
    • \n
    • mzsegment (tuple of floats, optional):\nThe mz range to recalibrate, or None. Used for calibration of specific parts of the mz domain at a time.\nFuture work - allow multiple mzsegments to be passed.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum (CoreMS MassSpectrum Object):\nThe mass spectrum to be calibrated.
    • \n
    • mzsegment (tuple of floats or None):\nThe mz range to recalibrate, or None.
    • \n
    • ref_mass_list_path (str or Path):\nThe path to the reference mass list.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run().\nMain function to run this class.
    • \n
    • load_ref_mass_list().\nLoad reference mass list (Bruker format).
    • \n
    • gen_ref_mass_list_from_assigned(min_conf=0.7).\nGenerate reference mass list from assigned masses.
    • \n
    • find_calibration_points(df_ref, calib_ppm_error_threshold=(-1, 1), calib_snr_threshold=5).\nFind calibration points in the mass spectrum based on the reference mass list.
    • \n
    • robust_calib(param, cal_peaks_mz, cal_refs_mz, order=1).\nRecalibration function.
    • \n
    • recalibrate_mass_spectrum(cal_peaks_mz, cal_refs_mz, order=1, diagnostic=False).\nMain recalibration function which uses a robust linear regression.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.__init__", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.__init__", "kind": "function", "doc": "

    \n", "signature": "(mass_spectrum, ref_masslist, mzsegment=None)"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.mass_spectrum", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.mzsegment", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.mzsegment", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.ref_mass_list_path", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.ref_mass_list_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.load_ref_mass_list", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.load_ref_mass_list", "kind": "function", "doc": "

    Load reference mass list (Bruker format)

    \n\n

    Loads in a reference mass list from a .ref file\nNote that some versions of Bruker's software produce .ref files with a different format.\nAs such, users may need to manually edit the .ref file in a text editor to ensure it is in the correct format.\nCoreMS includes an example .ref file with the correct format for reference.

    \n\n
    Returns
    \n\n
      \n
    • df_ref (Pandas DataFrame):\nreference mass list object.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.gen_ref_mass_list_from_assigned", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.gen_ref_mass_list_from_assigned", "kind": "function", "doc": "

    Generate reference mass list from assigned masses

    \n\n

    This function will generate a ref mass dataframe object from an assigned corems mass spec obj\nusing assigned masses above a certain minimum confidence threshold.

    \n\n

    This function needs to be retested and check it is covered in the unit tests.

    \n\n
    Parameters
    \n\n
      \n
    • min_conf (float, optional):\nminimum confidence score. The default is 0.7.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • df_ref (Pandas DataFrame):\nreference mass list - based on calculated masses.
    • \n
    \n", "signature": "(self, min_conf: float = 0.7):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.find_calibration_points", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.find_calibration_points", "kind": "function", "doc": "

    Function to find calibration points in the mass spectrum

    \n\n

    Based on the reference mass list.

    \n\n
    Parameters
    \n\n
      \n
    • df_ref (Pandas DataFrame):\nreference mass list for recalibration.
    • \n
    • calib_ppm_error_threshold (tuple of floats, optional):\nppm error for finding calibration masses in the spectrum. The default is -1,1.\nNote: This is based on the calculation of ppm = ((mz_measure - mz_theoretical)/mz_theoretical)*1e6.\n Some software does this the other way around and value signs must be inverted for that to work.
    • \n
    • calib_snr_threshold (float, optional):\nsnr threshold for finding calibration masses in the spectrum. The default is 5.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • cal_peaks_mz (list of floats):\nmasses of measured ions to use in calibration routine
    • \n
    • cal_refs_mz (list of floats):\nreference mz values of found calibration points.
    • \n
    \n", "signature": "(\tself,\tdf_ref,\tcalib_ppm_error_threshold: tuple[float, float] = (-1, 1),\tcalib_snr_threshold: float = 5,\tcalibration_ref_match_method: str = 'legacy',\tcalibration_ref_match_tolerance: float = 0.003,\tcalibration_ref_match_std_raw_error_limit: float = 1.5):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.robust_calib", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.robust_calib", "kind": "function", "doc": "

    Recalibration function

    \n\n

    Computes the rms of m/z errors to minimize when calibrating.\nThis is adapted from from spike.

    \n\n
    Parameters
    \n\n
      \n
    • param (list of floats):\ngenerated by minimize function from scipy optimize.
    • \n
    • cal_peaks_mz (list of floats):\nmasses of measured peaks to use in mass calibration.
    • \n
    • cal_peaks_mz (list of floats):\nreference mz values of found calibration points.
    • \n
    • order (int, optional):\norder of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • rmserror (float):\nroot mean square mass error for calibration points.
    • \n
    \n", "signature": "(\tself,\tparam: list[float],\tcal_peaks_mz: list[float],\tcal_refs_mz: list[float],\torder: int = 1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.recalibrate_mass_spectrum", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.recalibrate_mass_spectrum", "kind": "function", "doc": "

    Main recalibration function which uses a robust linear regression

    \n\n

    This function performs the recalibration of the mass spectrum object.\nIt iteratively applies

    \n\n
    Parameters
    \n\n
      \n
    • cal_peaks_mz (list of float):\nmasses of measured peaks to use in mass calibration.
    • \n
    • cal_refs_mz (list of float):\nreference mz values of found calibration points.
    • \n
    • order (int, optional):\norder of the recalibration function. 1 = linear, 2 = quadratic. The default is 1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mass_spectrum (CoreMS mass spectrum object):\nCalibrated mass spectrum object
    • \n
    \n\n
    Notes
    \n\n

    This function is adapted, in part, from the SPIKE project [1,2] and is based on the robust linear regression method.

    \n\n
    References
    \n\n
      \n
    1. Chiron L., Coutouly M-A., Starck J-P., Rolando C., Delsuc M-A.\nSPIKE a Processing Software dedicated to Fourier Spectroscopies\nhttps://arxiv.org/abs/1608.06777 (2016)
    2. \n
    3. SPIKE - https://github.com/spike-project/spike
    4. \n
    \n", "signature": "(\tself,\tcal_peaks_mz: list[float],\tcal_refs_mz: list[float],\torder: int = 1,\tdiagnostic: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.Calibration.MzDomainCalibration.run", "modulename": "corems.mass_spectrum.calc.Calibration", "qualname": "MzDomainCalibration.run", "kind": "function", "doc": "

    Run the calibration routine

    \n\n

    This function runs the calibration routine.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration", "kind": "class", "doc": "

    Frequency Domain Calibration class for mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • selected_mass_peaks (list):\nList of selected mass peaks.
    • \n
    • include_isotopologue (bool, optional):\nFlag to include isotopologues, by default False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mz_exp (ndarray):\nArray of experimental m/z values.
    • \n
    • mz_calc (ndarray):\nArray of calculated m/z values.
    • \n
    • freq_exp (ndarray):\nArray of experimental frequencies.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • freq_exp_ms (ndarray):\nArray of experimental frequencies for mass spectrum.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • recal_mass_spec(mz_domain, Aterm, Bterm, Cterm).\nRecalibrate the mass spectrum with the given parameters.
    • \n
    • linear().\nPerform linear calibration.
    • \n
    • quadratic(iteration=False).\nPerform quadratic calibration.
    • \n
    • ledford_calibration(iteration=False).\nPerform Ledford calibration.
    • \n
    • step_fit(steps=4).\nPerform step fit calibration.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.__init__", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.__init__", "kind": "function", "doc": "

    \n", "signature": "(mass_spectrum, selected_mass_peaks, include_isotopologue=False)"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.selected_mspeaks", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.selected_mspeaks", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.mz_exp", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.mz_exp", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.mz_calc", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.mz_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.freq_exp", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.freq_exp", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.mass_spectrum", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.freq_exp_ms", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.freq_exp_ms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.recal_mass_spec", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.recal_mass_spec", "kind": "function", "doc": "

    Recalibrate the mass spectrum with the given parameters.

    \n\n
    Parameters
    \n\n
      \n
    • mz_domain (ndarray):\nArray of m/z values for recalibration.
    • \n
    • Aterm (float):\nAterm parameter for recalibration.
    • \n
    • Bterm (float):\nBterm parameter for recalibration.
    • \n
    • Cterm (float):\nCterm parameter for recalibration.
    • \n
    \n", "signature": "(self, mz_domain, Aterm, Bterm, Cterm):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.linear", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.linear", "kind": "function", "doc": "

    Perform linear calibration.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.quadratic", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.quadratic", "kind": "function", "doc": "

    Perform quadratic calibration.

    \n\n
    Parameters
    \n\n
      \n
    • iteration (bool, optional):\nFlag to perform iterative calibration, by default False.
    • \n
    \n", "signature": "(self, iteration: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.ledford_calibration", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.ledford_calibration", "kind": "function", "doc": "

    Perform Ledford calibration.

    \n\n
    Parameters
    \n\n
      \n
    • iteration (bool, optional):\nFlag to perform iterative calibration, by default False.
    • \n
    \n", "signature": "(self, iteration: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.CalibrationCalc.FreqDomain_Calibration.step_fit", "modulename": "corems.mass_spectrum.calc.CalibrationCalc", "qualname": "FreqDomain_Calibration.step_fit", "kind": "function", "doc": "

    Perform step fit calibration.

    \n\n
    Parameters
    \n\n
      \n
    • steps (int, optional):\nNumber of steps for step fit calibration, by default 4.
    • \n
    \n", "signature": "(self, steps: int = 4):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping", "kind": "class", "doc": "

    Class for Kendrick grouping of mass spectra.

    \n\n
    Methods
    \n\n
      \n
    • mz_odd_even_index_lists().\nGet odd and even indexes lists.
    • \n
    • calc_error(current, test).\nCalculate the error between two values.
    • \n
    • populate_kendrick_index_dict_error(list_indexes, sort=True).\nPopulate the Kendrick index dictionary based on error.
    • \n
    • populate_kendrick_index_dict_rounding(list_indexes, sort=True).\nPopulate the Kendrick index dictionary based on rounding.
    • \n
    • sort_abundance_kendrick_dict(even_kendrick_group_index, odd_kendrick_group_index).\nSort the Kendrick index dictionary based on abundance.
    • \n
    • kendrick_groups_indexes(sort=True).\nGet the Kendrick groups indexes dictionary.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.mz_odd_even_index_lists", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.mz_odd_even_index_lists", "kind": "function", "doc": "

    Get odd and even indexes lists.

    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the lists of even and odd indexes.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.calc_error", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.calc_error", "kind": "function", "doc": "

    Calculate the error between two values.

    \n\n
    Parameters
    \n\n
      \n
    • current (float):\nThe current value.
    • \n
    • test (float):\nThe test value.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The calculated error.
    • \n
    \n", "signature": "(self, current: float, test: float):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.populate_kendrick_index_dict_error", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.populate_kendrick_index_dict_error", "kind": "function", "doc": "

    Populate the Kendrick index dictionary based on error.

    \n\n
    Parameters
    \n\n
      \n
    • list_indexes (list):\nThe list of indexes.
    • \n
    • sort (bool, optional):\nWhether to sort the dictionary by abundance (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The Kendrick index dictionary.
    • \n
    \n", "signature": "(self, list_indexes: list, sort: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.populate_kendrick_index_dict_rounding", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.populate_kendrick_index_dict_rounding", "kind": "function", "doc": "

    Populate the Kendrick index dictionary based on rounding.

    \n\n
    Parameters
    \n\n
      \n
    • list_indexes (list):\nThe list of indexes.
    • \n
    • sort (bool, optional):\nWhether to sort the dictionary by abundance (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The Kendrick index dictionary.
    • \n
    \n", "signature": "(self, list_indexes: list, sort: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.sort_abundance_kendrick_dict", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.sort_abundance_kendrick_dict", "kind": "function", "doc": "

    Sort the Kendrick index dictionary based on abundance.

    \n\n
    Parameters
    \n\n
      \n
    • even_kendrick_group_index (dict):\nThe Kendrick index dictionary for even indexes.
    • \n
    • odd_kendrick_group_index (dict):\nThe Kendrick index dictionary for odd indexes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The sorted Kendrick index dictionary.
    • \n
    \n", "signature": "(\tself,\teven_kendrick_group_index: dict,\todd_kendrick_group_index: dict):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping.kendrick_groups_indexes", "modulename": "corems.mass_spectrum.calc.KendrickGroup", "qualname": "KendrickGrouping.kendrick_groups_indexes", "kind": "function", "doc": "

    Get the Kendrick groups indexes dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • sort (bool, optional):\nWhether to sort the dictionary by abundance (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The Kendrick groups indexes dictionary.
    • \n
    \n", "signature": "(self, sort: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction", "kind": "class", "doc": "

    Class for mass error prediction.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (list):\nList of mass spectrum objects.
    • \n
    • mz_overlay (int, optional):\nThe mz overlay value for peak simulation. Default is 10.
    • \n
    • rp_increments (int, optional):\nThe resolving power increments for peak simulation. Default is 10000.
    • \n
    • base_line_target (float, optional):\nThe target value for the baseline resolution. Default is 0.01.
    • \n
    • max_interation (int, optional):\nThe maximum number of iterations for peak simulation. Default is 1000.
    • \n
    • interpolation (str, optional):\nThe interpolation method for missing data. Default is 'linear'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum_obj (list):\nList of mass spectrum objects.
    • \n
    • mz_overlay (int):\nThe mz overlay value for peak simulation.
    • \n
    • rp_increments (int):\nThe resolving power increments for peak simulation.
    • \n
    • base_line_target (float):\nThe target value for the baseline resolution.
    • \n
    • max_interation (int):\nThe maximum number of iterations for peak simulation.
    • \n
    • df (DataFrame or None):\nThe calculated error distribution dataframe.
    • \n
    • interpolation (str):\nThe interpolation method for missing data.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run().\nRuns the mass error prediction calculation.
    • \n
    • get_results().\nReturns the calculated error distribution dataframe.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.__init__", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(\tmass_spectrum,\tmz_overlay=10,\trp_increments=10000,\tbase_line_target: float = 0.01,\tmax_interation=1000,\tinterpolation='linear')"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.mass_spectrum_obj", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.mass_spectrum_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.mz_overlay", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.mz_overlay", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.rp_increments", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.rp_increments", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.base_line_target", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.base_line_target", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.max_interation", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.max_interation", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.df", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.df", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.interpolation", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.interpolation", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.run", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.run", "kind": "function", "doc": "

    Runs the mass error prediction calculation.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.get_results", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.get_results", "kind": "function", "doc": "

    Returns the calculated error distribution dataframe.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.calc_error_dist", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.calc_error_dist", "kind": "function", "doc": "

    Calculate the error distribution.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.sum_data", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.sum_data", "kind": "function", "doc": "

    Sum the abundances of the simulated peaks.

    \n\n
    Parameters
    \n\n
      \n
    • tuple_mz_abun_list (tuple):\nA tuple containing the mz and abundance lists.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the summed mz and abundance lists.
    • \n
    \n", "signature": "(self, tuple_mz_abun_list: tuple):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.calc_error", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.calc_error", "kind": "function", "doc": "

    Calculate the error between two values.

    \n\n
    Parameters
    \n\n
      \n
    • mass_ref (float):\nThe reference value.
    • \n
    • mass_sim (float):\nThe simulated value.
    • \n
    • factor (float):\nThe factor to multiply the error by.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The calculated error.
    • \n
    \n", "signature": "(self, mass_ref, mass_sim, factor):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.find_peak_apex", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.find_peak_apex", "kind": "function", "doc": "

    Find the peak apex.

    \n\n
    Parameters
    \n\n
      \n
    • mz (array):\nThe mz array.
    • \n
    • abund (array):\nThe abundance array.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the peak apex mass and abundance.
    • \n
    \n", "signature": "(self, mz, abund):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassErrorPrediction.MassErrorPrediction.find_peak_valley", "modulename": "corems.mass_spectrum.calc.MassErrorPrediction", "qualname": "MassErrorPrediction.find_peak_valley", "kind": "function", "doc": "

    Find the peak valley.

    \n\n
    Parameters
    \n\n
      \n
    • mz (array):\nThe mz array.
    • \n
    • abund (array):\nThe abundance array.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the peak valley mz and abundance.
    • \n
    \n", "signature": "(self, mz, abund):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc", "kind": "class", "doc": "

    Class for Mass Spectrum Calculations

    \n\n

    Class including numerical calculations related to mass spectrum class\nInherited PeakPicking and NoiseThresholdCalc ensuring its methods are\navailable to the instantiated mass spectrum class object

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nCoreMS mass spectrum object
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • All Attributes are derivative from the MassSpecBase Class
    • \n
    \n\n
    Methods
    \n\n
      \n
    • check_mspeaks().\nCheck if the mspeaks attribute is populated
    • \n
    • sort_by_abundance().\nSort the mspeaks by abundance
    • \n
    • percentile_assigned(report_error=False).\nCalculate the percentage of assigned peaks
    • \n
    • resolving_power_calc(B, T).\nCalculate the resolving power
    • \n
    • number_average_molecular_weight(profile=False).\nCalculate the number average molecular weight
    • \n
    • weight_average_molecular_weight(profile=False).\nCalculate the weight average molecular weight
    • \n
    \n", "bases": "corems.mass_spectrum.calc.PeakPicking.PeakPicking, corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc.percentile_assigned", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc.percentile_assigned", "kind": "function", "doc": "

    Percentage of peaks which are assigned

    \n\n
    Parameters
    \n\n
      \n
    • report_error (bool, optional):\nReport the error of the assigned peaks. Default is False.
    • \n
    \n", "signature": "(self, report_error: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc.resolving_power_calc", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc.resolving_power_calc", "kind": "function", "doc": "

    Calculate the theoretical resolving power

    \n\n

    Calls on the MSPeak object function to calculate the resolving power of a peak, this calcs for all peaks in a spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • T (float):\ntransient time
    • \n
    • B (float):\nMagnetic Filed Strength (Tesla)
    • \n
    \n\n
    References
    \n\n
      \n
    1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)\nDOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    2. \n
    \n", "signature": "(self, B: float, T: float):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc.number_average_molecular_weight", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc.number_average_molecular_weight", "kind": "function", "doc": "

    Average molecular weight calculation

    \n\n
    Parameters
    \n\n
      \n
    • profile (bool, optional):\nis data profile or centroid mode. The default is False (e.g. Centroid data)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The average molecular weight.
    • \n
    \n", "signature": "(self, profile: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc.weight_average_molecular_weight", "modulename": "corems.mass_spectrum.calc.MassSpectrumCalc", "qualname": "MassSpecCalc.weight_average_molecular_weight", "kind": "function", "doc": "

    Weighted Average molecular weight calculation

    \n\n
    Returns
    \n\n
      \n
    • float: The weight average molecular weight.
    • \n
    \n", "signature": "(self, profile: bool = False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "kind": "module", "doc": "

    Created on June 2nd 2023

    \n\n

    @author: Will Kew

    \n\n

    Module for mean resolving power filtration\nBased upon the work in:

    \n\n

    Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P.\nFourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry.\nRapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940

    \n\n

    Calculates a m/z normalised resolving power, fits a gaussian distribution to this, and then filters out peaks which are outside of the user defined number of standard deviations

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter", "kind": "class", "doc": "

    Class for for mean resolving power filtration.

    \n\n

    This module implements a mean resolving power filter based on the work described [1]

    \n\n

    The MeanResolvingPowerFilter class provides methods to calculate the m/z normalized resolving power, fit a Gaussian distribution to it, and filter out peaks that are outside of the user-defined number of standard deviations.

    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum (object) (The mass spectrum object.):

    • \n
    • ndeviations (int) (The number of standard deviations used for filtering.):

    • \n
    • plot (bool) (Flag indicating whether to plot the results.):

    • \n
    • guess_pars (bool) (Flag indicating whether to guess the parameters for the Gaussian model.):

    • \n
    \n\n
    Methods
    \n\n
      \n
    • extract_peaks(): Extracts the peaks from the mass spectrum.
    • \n
    • normalise_rps(tmpdf_ms): Normalizes the resolving powers to be independent of m/z.
    • \n
    • calculate_distribution(tmpdf_ms): Calculates the distribution of the resolving powers.
    • \n
    • create_index_list_to_remove(tmpdf_ms, rps_thresh): Creates an index list of peaks to remove based on the calculated thresholds.
    • \n
    • main(): Executes the main filtering process and returns the index list of peaks to remove.
    • \n
    \n\n
    References
    \n\n
      \n
    1. Kanawati, B, Bader, TM, Wanczek, K-P, Li, Y, Schmitt-Kopplin, P.\nFourier transform (FT)-artifacts and power-function resolution filter in Fourier transform mass spectrometry.\nRapid Commun Mass Spectrom. 2017; 31: 1607- 1615. https://doi.org/10.1002/rcm.7940
    2. \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.__init__", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmass_spectrum,\tndeviations: float = 3,\tplot: bool = False,\tguess_pars: bool = False)"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.mass_spectrum", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.plot", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.plot", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.ndeviations", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.ndeviations", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.guess_pars", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.guess_pars", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.extract_peaks", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.extract_peaks", "kind": "function", "doc": "

    Extracts the peaks from the mass spectrum.

    \n\n
    Returns
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame containing the extracted peaks.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.normalise_rps", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.normalise_rps", "kind": "function", "doc": "

    Normalizes the resolving powers to be independent of m/z.

    \n\n
    Parameters
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame containing the extracted peaks.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame with the resolving powers normalized.
    • \n
    \n", "signature": "(self, tmpdf_ms):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.calculate_distribution", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.calculate_distribution", "kind": "function", "doc": "

    Calculates the distribution of the resolving powers.

    \n\n
    Parameters
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame containing the extracted peaks with normalized resolving powers.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • rps_thresh (list):\nA list of the calculated thresholds for filtering.
    • \n
    \n", "signature": "(self, tmpdf_ms):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.create_index_list_to_remove", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.create_index_list_to_remove", "kind": "function", "doc": "

    Creates an index list of peaks to remove based on the calculated thresholds.

    \n\n
    Parameters
    \n\n
      \n
    • tmpdf_ms (Pandas DataFrame):\nA DataFrame containing the extracted peaks with normalized resolving powers.
    • \n
    • rps_thresh (list):\nA list of the calculated thresholds for filtering.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • index_to_keep (list):\nA list of indices of peaks to keep.
    • \n
    \n", "signature": "(self, tmpdf_ms, rps_thresh: list):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.MeanResolvingPowerFilter.MeanResolvingPowerFilter.main", "modulename": "corems.mass_spectrum.calc.MeanResolvingPowerFilter", "qualname": "MeanResolvingPowerFilter.main", "kind": "function", "doc": "

    Executes the main filtering process and returns the index list of peaks to remove.

    \n\n
    Returns
    \n\n
      \n
    • index_to_remove (list):\nA list of indices of peaks to remove.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc", "kind": "class", "doc": "

    Class for noise threshold calculation.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • settings (MSParameters):\nThe mass spectrum parameters object.
    • \n
    • is_centroid (bool):\nFlag indicating whether the mass spectrum is centroid or profile.
    • \n
    • baseline_noise (float):\nThe baseline noise.
    • \n
    • baseline_noise_std (float):\nThe baseline noise standard deviation.
    • \n
    • max_signal_to_noise (float):\nThe maximum signal to noise.
    • \n
    • max_abundance (float):\nThe maximum abundance.
    • \n
    • abundance (np.array):\nThe abundance array.
    • \n
    • abundance_profile (np.array):\nThe abundance profile array.
    • \n
    • mz_exp (np.array):\nThe experimental m/z array.
    • \n
    • mz_exp_profile (np.array):\nThe experimental m/z profile array.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • None
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_noise_threshold(). Get the noise threshold.
    • \n
    • cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    • \n
    • get_noise_average(ymincentroid).\nGet the average noise and standard deviation.
    • \n
    • get_abundance_minima_centroid(abun_cut)\nGet the abundance minima for centroid data.
    • \n
    • run_log_noise_threshold_calc().\nRun the log noise threshold calculation.
    • \n
    • run_noise_threshold_calc().\nRun the noise threshold calculation.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.get_noise_threshold", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.get_noise_threshold", "kind": "function", "doc": "

    Get the noise threshold.

    \n\n
    Returns
    \n\n
      \n
    • Tuple[Tuple[float, float], Tuple[float, float]]: A tuple containing the m/z and abundance noise thresholds.\n(min_mz, max_mz), (noise_threshold, noise_threshold)
    • \n
    \n", "signature": "(self) -> Tuple[Tuple[float, float], Tuple[float, float]]:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.cut_mz_domain_noise", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.cut_mz_domain_noise", "kind": "function", "doc": "

    Cut the m/z domain to the noise threshold regions.

    \n\n
    Returns
    \n\n
      \n
    • Tuple[np.array, np.array]: A tuple containing the m/z and abundance arrays of the truncated spectrum region.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.get_noise_average", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.get_noise_average", "kind": "function", "doc": "

    Get the average noise and standard deviation.

    \n\n
    Parameters
    \n\n
      \n
    • ymincentroid (np.array):\nThe ymincentroid array.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Tuple[float, float]: A tuple containing the average noise and standard deviation.
    • \n
    \n", "signature": "(self, ymincentroid):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.get_abundance_minima_centroid", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.get_abundance_minima_centroid", "kind": "function", "doc": "

    Get the abundance minima for centroid data.

    \n\n
    Parameters
    \n\n
      \n
    • abun_cut (np.array):\nThe abundance cut array.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • np.array: The abundance minima array.
    • \n
    \n", "signature": "(self, abun_cut):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.run_log_noise_threshold_calc", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.run_log_noise_threshold_calc", "kind": "function", "doc": "

    Run the log noise threshold calculation.

    \n\n
    Returns
    \n\n
      \n
    • Tuple[float, float]: A tuple containing the average noise and standard deviation.
    • \n
    \n\n
    Notes
    \n\n

    Method for estimating the noise based on decimal log of all the data point

    \n\n

    Idea is that you calculate a histogram of of the log10(abundance) values.\nThe maximum of the histogram == the standard deviation of the noise.

    \n\n

    For aFT data it is a gaussian distribution of noise - not implemented here!\nFor mFT data it is a Rayleigh distribution, and the value is actually 10^(abu_max)*0.463.

    \n\n

    See the publication cited above for the derivation of this.

    \n\n
    References
    \n\n
      \n
    1. dx.doi.org/10.1021/ac403278t | Anal. Chem. 2014, 86, 3308\u22123316
    2. \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc.run_noise_threshold_calc", "modulename": "corems.mass_spectrum.calc.NoiseCalc", "qualname": "NoiseThresholdCalc.run_noise_threshold_calc", "kind": "function", "doc": "

    Runs noise threshold calculation (not log based method)

    \n\n
    Returns
    \n\n
      \n
    • Tuple[float, float]: A tuple containing the average noise and standard deviation.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "kind": "module", "doc": "

    This code is for Bayesian estimation of the noise levels.\nIt is it not implemented or used in the current code base.\nThe packages it uses are not part of the requirements.\nIf you want to use it, you will need to install them manually.

    \n"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes.BayesNoiseCalc", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "qualname": "BayesNoiseCalc", "kind": "class", "doc": "

    Class for noise threshold calculation.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • settings (MSParameters):\nThe mass spectrum parameters object.
    • \n
    • is_centroid (bool):\nFlag indicating whether the mass spectrum is centroid or profile.
    • \n
    • baseline_noise (float):\nThe baseline noise.
    • \n
    • baseline_noise_std (float):\nThe baseline noise standard deviation.
    • \n
    • max_signal_to_noise (float):\nThe maximum signal to noise.
    • \n
    • max_abundance (float):\nThe maximum abundance.
    • \n
    • abundance (np.array):\nThe abundance array.
    • \n
    • abundance_profile (np.array):\nThe abundance profile array.
    • \n
    • mz_exp (np.array):\nThe experimental m/z array.
    • \n
    • mz_exp_profile (np.array):\nThe experimental m/z profile array.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • None
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_noise_threshold(). Get the noise threshold.
    • \n
    • cut_mz_domain_noise(). Cut the m/z domain to the noise threshold regions.
    • \n
    • get_noise_average(ymincentroid).\nGet the average noise and standard deviation.
    • \n
    • get_abundance_minima_centroid(abun_cut)\nGet the abundance minima for centroid data.
    • \n
    • run_log_noise_threshold_calc().\nRun the log noise threshold calculation.
    • \n
    • run_noise_threshold_calc().\nRun the noise threshold calculation.
    • \n
    \n", "bases": "corems.mass_spectrum.calc.NoiseCalc.NoiseThresholdCalc"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes.BayesNoiseCalc.from_posterior", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "qualname": "BayesNoiseCalc.from_posterior", "kind": "function", "doc": "

    Legacy code for Bayesian efforts - not used.

    \n\n

    pymc3 is not installed by default,\n if have plans to use it manual installation of pymc3\n package before using this method is needed

    \n", "signature": "(self, param, samples):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes.BayesNoiseCalc.error_model_from_trace", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "qualname": "BayesNoiseCalc.error_model_from_trace", "kind": "function", "doc": "

    Legacy code for Bayesian efforts - not used.

    \n\n

    pymc3 is not installed by default,\n if have plans to use it manual installation of pymc3\n package before using this method is needed

    \n", "signature": "(self, trace, ymincentroid):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.NoiseCalc_Bayes.BayesNoiseCalc.simple_model_error_dist", "modulename": "corems.mass_spectrum.calc.NoiseCalc_Bayes", "qualname": "BayesNoiseCalc.simple_model_error_dist", "kind": "function", "doc": "

    Legacy code for Bayesian efforts - not used.

    \n\n

    pymc3 is not installed by default,\n if have plans to use it manual installation of pymc3\n package before using this method is needed

    \n", "signature": "(self, ymincentroid):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "kind": "module", "doc": "

    @author: Yuri E. Corilo\n@date: Jun 27, 2019

    \n"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking", "kind": "class", "doc": "

    Class for peak picking.

    \n\n
    Parameters
    \n\n
      \n
    • None
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • None
    • \n
    \n\n
    Methods
    \n\n
      \n
    • prepare_peak_picking_data().\nPrepare the mz, abundance, and frequence data for peak picking.
    • \n
    • cut_mz_domain_peak_picking().\nCut the m/z domain for peak picking.
    • \n
    • extrapolate_axes_for_pp(mz=None, abund=None, freq=None).\nExtrapolate the m/z axis and fill the abundance axis with 0s.
    • \n
    • do_peak_picking().\nPerform peak picking.
    • \n
    • find_minima(apex_index, abundance, len_abundance, right=True).\nFind the minima of a peak.
    • \n
    • linear_fit_calc(intes, massa, index_term, index_sign).\nAlgebraic solution to a linear fit.
    • \n
    • calculate_resolving_power(intes, massa, current_index).\nCalculate the resolving power of a peak.
    • \n
    • cal_minima(mass, abun).\nCalculate the minima of a peak.
    • \n
    • calc_centroid(mass, abund, freq).\nCalculate the centroid of a peak.
    • \n
    • get_threshold(intes).\nGet the intensity threshold for peak picking.
    • \n
    • algebraic_quadratic(list_mass, list_y).\nFind the apex of a peak - algebraically.
    • \n
    • find_apex_fit_quadratic(mass, abund, freq, current_index).\nFind the apex of a peak.
    • \n
    • check_prominence(abun, current_index, len_abundance, peak_height_diff).\nCheck the prominence of a peak.
    • \n
    • use_the_max(mass, abund, current_index, len_abundance, peak_height_diff).\nUse the max peak height as the centroid.
    • \n
    • calc_centroid_legacy(mass, abund, freq).\nLegacy centroid calculation. Deprecated - for deletion.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.prepare_peak_picking_data", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.prepare_peak_picking_data", "kind": "function", "doc": "

    Prepare the data for peak picking.

    \n\n

    This function will prepare the m/z, abundance, and frequency data for peak picking according to the settings.

    \n\n
    Returns
    \n\n
      \n
    • mz (ndarray):\nThe m/z axis.
    • \n
    • abundance (ndarray):\nThe abundance axis.
    • \n
    • freq (ndarray or None):\nThe frequency axis, if available.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.cut_mz_domain_peak_picking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.cut_mz_domain_peak_picking", "kind": "function", "doc": "

    Cut the m/z domain for peak picking.

    \n\n

    Simplified function

    \n\n
    Returns
    \n\n
      \n
    • mz_domain_X_low_cutoff (ndarray):\nThe m/z values within the specified range.
    • \n
    • mz_domain_low_Y_cutoff (ndarray):\nThe abundance values within the specified range.
    • \n
    • freq_domain_low_Y_cutoff (ndarray or None):\nThe frequency values within the specified range, if available.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.legacy_cut_mz_domain_peak_picking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.legacy_cut_mz_domain_peak_picking", "kind": "function", "doc": "

    Cut the m/z domain for peak picking.\nDEPRECATED

    \n\n
    Returns
    \n\n
      \n
    • mz_domain_X_low_cutoff (ndarray):\nThe m/z values within the specified range.
    • \n
    • mz_domain_low_Y_cutoff (ndarray):\nThe abundance values within the specified range.
    • \n
    • freq_domain_low_Y_cutoff (ndarray or None):\nThe frequency values within the specified range, if available.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.extrapolate_axis", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.extrapolate_axis", "kind": "function", "doc": "

    This function will extrapolate an input array in both directions by N pts.

    \n\n
    Parameters
    \n\n
      \n
    • initial_array (ndarray):\nThe input array.
    • \n
    • pts (int):\nThe number of points to extrapolate.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ndarray: The extrapolated array.
    • \n
    \n\n
    Notes
    \n\n

    This is a static method.

    \n", "signature": "(initial_array, pts):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.extrapolate_axes_for_pp", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.extrapolate_axes_for_pp", "kind": "function", "doc": "

    Extrapolate the m/z axis and fill the abundance axis with 0s.

    \n\n
    Parameters
    \n\n
      \n
    • mz (ndarray or None):\nThe m/z axis, if available. If None, the experimental m/z axis is used.
    • \n
    • abund (ndarray or None):\nThe abundance axis, if available. If None, the experimental abundance axis is used.
    • \n
    • freq (ndarray or None):\nThe frequency axis, if available. If None, the experimental frequency axis is used.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz (ndarray):\nThe extrapolated m/z axis.
    • \n
    • abund (ndarray):\nThe abundance axis with 0s filled.
    • \n
    • freq (ndarray or None):\nThe extrapolated frequency axis, if available.
    • \n
    \n\n
    Notes
    \n\n

    This function will extrapolate the mz axis by the number of datapoints specified in the settings,\nand fill the abundance axis with 0s.\nThis should prevent peak picking issues at the spectrum edge.

    \n", "signature": "(self, mz=None, abund=None, freq=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.do_peak_picking", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.do_peak_picking", "kind": "function", "doc": "

    Perform peak picking.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.find_minima", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.find_minima", "kind": "function", "doc": "

    Find the minima of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • apex_index (int):\nThe index of the peak apex.
    • \n
    • abundance (ndarray):\nThe abundance values.
    • \n
    • len_abundance (int):\nThe length of the abundance array.
    • \n
    • right (bool, optional):\nFlag indicating whether to search for minima to the right of the apex (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The index of the minima.
    • \n
    \n", "signature": "(self, apex_index, abundance, len_abundance, right=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.linear_fit_calc", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.linear_fit_calc", "kind": "function", "doc": "

    Algebraic solution to a linear fit - roughly 25-50x faster than numpy polyfit when passing only two vals and doing a 1st order fit

    \n\n
    Parameters
    \n\n
      \n
    • intes (ndarray):\nThe intensity values.
    • \n
    • massa (ndarray):\nThe mass values.
    • \n
    • index_term (int):\nThe index of the current term.
    • \n
    • index_sign (str):\nThe index sign
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ndarray: The coefficients of the linear fit.
    • \n
    \n\n
    Notes
    \n\n

    This is a static method.

    \n", "signature": "(intes, massa, index_term, index_sign):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.calculate_resolving_power", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.calculate_resolving_power", "kind": "function", "doc": "

    Calculate the resolving power of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • intes (ndarray):\nThe intensity values.
    • \n
    • massa (ndarray):\nThe mass values.
    • \n
    • current_index (int):\nThe index of the current peak.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The resolving power of the peak.
    • \n
    \n\n
    Notes
    \n\n

    This is a conservative calculation of resolving power,\nthe peak need to be resolved at least at the half-maximum magnitude,\notherwise, the combined full width at half maximum is used to calculate resolving power.

    \n", "signature": "(self, intes, massa, current_index):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.cal_minima", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.cal_minima", "kind": "function", "doc": "

    Calculate the minima of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • mass (ndarray):\nThe mass values.
    • \n
    • abun (ndarray):\nThe abundance values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ndarray or None: The mass values at the minima, if found.
    • \n
    \n", "signature": "(self, mass, abun):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.calc_centroid", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.calc_centroid", "kind": "function", "doc": "

    Calculate the centroid of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • mass (ndarray):\nThe mass values.
    • \n
    • abund (ndarray):\nThe abundance values.
    • \n
    • freq (ndarray or None):\nThe frequency values, if available.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None
    • \n
    \n", "signature": "(self, mass, abund, freq):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.get_threshold", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.get_threshold", "kind": "function", "doc": "

    Get the intensity threshold for peak picking.

    \n\n
    Parameters
    \n\n
      \n
    • intes (ndarray):\nThe intensity values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The intensity threshold.
    • \n
    • float: The factor to multiply the intensity threshold by.
    • \n
    \n", "signature": "(self, intes):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.algebraic_quadratic", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.algebraic_quadratic", "kind": "function", "doc": "

    Find the apex of a peak - algebraically.\nFaster than using numpy polyfit by ~28x per fit.

    \n\n
    Parameters
    \n\n
      \n
    • list_mass (ndarray):\nlist of m/z values (3 points)
    • \n
    • list_y (ndarray):\nlist of abundance values (3 points)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • a, b, c (float):\ncoefficients of the quadratic equation.
    • \n
    \n\n
    Notes
    \n\n

    This is a static method.

    \n", "signature": "(list_mass, list_y):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.find_apex_fit_quadratic", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.find_apex_fit_quadratic", "kind": "function", "doc": "

    Find the apex of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • mass (ndarray):\nThe mass values.
    • \n
    • abund (ndarray):\nThe abundance values.
    • \n
    • freq (ndarray or None):\nThe frequency values, if available.
    • \n
    • current_index (int):\nThe index of the current peak.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The m/z value of the peak apex.
    • \n
    • float: The frequency value of the peak apex, if available.
    • \n
    • float: The abundance value of the peak apex.
    • \n
    \n", "signature": "(self, mass, abund, freq, current_index):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.check_prominence", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.check_prominence", "kind": "function", "doc": "

    Check the prominence of a peak.

    \n\n
    Parameters
    \n\n
      \n
    • abun (ndarray):\nThe abundance values.
    • \n
    • current_index (int):\nThe index of the current peak.
    • \n
    • len_abundance (int):\nThe length of the abundance array.
    • \n
    • peak_height_diff (function):\nThe function to calculate the peak height difference.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple or False: A tuple containing the indexes of the peak, if the prominence is above the threshold.\nOtherwise, False.
    • \n
    \n", "signature": "(self, abun, current_index, len_abundance, peak_height_diff) -> tuple:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.use_the_max", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.use_the_max", "kind": "function", "doc": "

    Use the max peak height as the centroid

    \n\n
    Parameters
    \n\n
      \n
    • mass (ndarray):\nThe mass values.
    • \n
    • abund (ndarray):\nThe abundance values.
    • \n
    • current_index (int):\nThe index of the current peak.
    • \n
    • len_abundance (int):\nThe length of the abundance array.
    • \n
    • peak_height_diff (function):\nThe function to calculate the peak height difference.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The m/z value of the peak apex.
    • \n
    • float: The abundance value of the peak apex.
    • \n
    • tuple or None: A tuple containing the indexes of the peak, if the prominence is above the threshold.\nOtherwise, None.
    • \n
    \n", "signature": "(self, mass, abund, current_index, len_abundance, peak_height_diff):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.calc.PeakPicking.PeakPicking.calc_centroid_legacy", "modulename": "corems.mass_spectrum.calc.PeakPicking", "qualname": "PeakPicking.calc_centroid_legacy", "kind": "function", "doc": "

    Legacy centroid calculation\nDeprecated - for deletion.

    \n", "signature": "(self, mass, abund, freq):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory", "modulename": "corems.mass_spectrum.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.overrides", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "overrides", "kind": "function", "doc": "

    Checks if the method overrides a method from an interface class.

    \n", "signature": "(interface_class):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase", "kind": "class", "doc": "

    A mass spectrum base class, stores the profile data and instrument settings.

    \n\n

    Iteration over a list of MSPeaks classes stored at the _mspeaks attributes.\n_mspeaks is populated under the hood by calling process_mass_spec method.\nIteration is null if _mspeaks is empty.

    \n\n
    Parameters
    \n\n
      \n
    • mz_exp (array_like):\nThe m/z values of the mass spectrum.
    • \n
    • abundance (array_like):\nThe abundance values of the mass spectrum.
    • \n
    • d_params (dict):\nA dictionary of parameters for the mass spectrum.
    • \n
    • **kwargs: Additional keyword arguments.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mspeaks (list):\nA list of mass peaks.
    • \n
    • is_calibrated (bool):\nWhether the mass spectrum is calibrated.
    • \n
    • is_centroid (bool):\nWhether the mass spectrum is centroided.
    • \n
    • has_frequency (bool):\nWhether the mass spectrum has a frequency domain.
    • \n
    • calibration_order (None or int):\nThe order of the mass spectrum's calibration.
    • \n
    • calibration_points (None or ndarray):\nThe calibration points of the mass spectrum.
    • \n
    • calibration_ref_mzs (None or ndarray):\nThe reference m/z values of the mass spectrum's calibration.
    • \n
    • calibration_meas_mzs (None or ndarray):\nThe measured m/z values of the mass spectrum's calibration.
    • \n
    • calibration_RMS (None or float):\nThe root mean square of the mass spectrum's calibration.
    • \n
    • calibration_segment (None or CalibrationSegment):\nThe calibration segment of the mass spectrum.
    • \n
    • _abundance (ndarray):\nThe abundance values of the mass spectrum.
    • \n
    • _mz_exp (ndarray):\nThe m/z values of the mass spectrum.
    • \n
    • _mspeaks (list):\nA list of mass peaks.
    • \n
    • _dict_nominal_masses_indexes (dict):\nA dictionary of nominal masses and their indexes.
    • \n
    • _baseline_noise (float):\nThe baseline noise of the mass spectrum.
    • \n
    • _baseline_noise_std (float):\nThe standard deviation of the baseline noise of the mass spectrum.
    • \n
    • _dynamic_range (float or None):\nThe dynamic range of the mass spectrum.
    • \n
    • _transient_settings (None or TransientSettings):\nThe transient settings of the mass spectrum.
    • \n
    • _frequency_domain (None or FrequencyDomain):\nThe frequency domain of the mass spectrum.
    • \n
    • _mz_cal_profile (None or MzCalibrationProfile):\nThe m/z calibration profile of the mass spectrum.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • process_mass_spec(). Main function to process the mass spectrum,\nincluding calculating the noise threshold, peak picking, and resetting the MSpeak indexes.
    • \n
    \n\n

    See also: MassSpecCentroid(), MassSpecfromFreq(), MassSpecProfile()

    \n", "bases": "corems.mass_spectrum.calc.MassSpectrumCalc.MassSpecCalc, corems.mass_spectrum.calc.KendrickGroup.KendrickGrouping"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(mz_exp, abundance, d_params, **kwargs)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mspeaks", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mspeaks", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.is_calibrated", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.is_calibrated", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.is_centroid", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.is_centroid", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.has_frequency", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.has_frequency", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_order", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_order", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_points", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_points", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_ref_mzs", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_ref_mzs", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_meas_mzs", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_meas_mzs", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_RMS", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_RMS", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_segment", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_segment", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_raw_error_median", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_raw_error_median", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.calibration_raw_error_stdev", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.calibration_raw_error_stdev", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.set_indexes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.set_indexes", "kind": "function", "doc": "

    Set the mass spectrum to iterate over only the selected MSpeaks indexes.

    \n\n
    Parameters
    \n\n
      \n
    • list_indexes (list of int):\nA list of integers representing the indexes of the MSpeaks to iterate over.
    • \n
    \n", "signature": "(self, list_indexes):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.reset_indexes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.reset_indexes", "kind": "function", "doc": "

    Reset the mass spectrum to iterate over all MSpeaks objects.

    \n\n

    This method resets the mass spectrum to its original state, allowing iteration over all MSpeaks objects.\nIt also sets the index of each MSpeak object to its corresponding position in the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.add_mspeak", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.add_mspeak", "kind": "function", "doc": "

    Add a new MSPeak object to the MassSpectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • ion_charge (int):\nThe ion charge of the MSPeak.
    • \n
    • mz_exp (float):\nThe experimental m/z value of the MSPeak.
    • \n
    • abundance (float):\nThe abundance of the MSPeak.
    • \n
    • resolving_power (float):\nThe resolving power of the MSPeak.
    • \n
    • signal_to_noise (float):\nThe signal-to-noise ratio of the MSPeak.
    • \n
    • massspec_indexes (list):\nA list of indexes of the MSPeak in the MassSpectrum object.
    • \n
    • exp_freq (float, optional):\nThe experimental frequency of the MSPeak. Defaults to None.
    • \n
    • ms_parent (MSParent, optional):\nThe MSParent object associated with the MSPeak. Defaults to None.
    • \n
    \n", "signature": "(\tself,\tion_charge,\tmz_exp,\tabundance,\tresolving_power,\tsignal_to_noise,\tmassspec_indexes,\texp_freq=None,\tms_parent=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.reset_cal_therms", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.reset_cal_therms", "kind": "function", "doc": "

    Reset calibration terms and recalculate the mass-to-charge ratio and abundance.

    \n\n
    Parameters
    \n\n
      \n
    • Aterm (float):\nThe A-term calibration coefficient.
    • \n
    • Bterm (float):\nThe B-term calibration coefficient.
    • \n
    • C (float):\nThe C-term calibration coefficient.
    • \n
    • fas (float, optional):\nThe frequency amplitude scaling factor. Default is 0.
    • \n
    \n", "signature": "(self, Aterm, Bterm, C, fas=0):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.clear_molecular_formulas", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.clear_molecular_formulas", "kind": "function", "doc": "

    Clear the molecular formulas for all mspeaks in the MassSpectrum.

    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: An array of the cleared molecular formulas for each mspeak in the MassSpectrum.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.process_mass_spec", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.process_mass_spec", "kind": "function", "doc": "

    Process the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • keep_profile (bool, optional):\nWhether to keep the profile data after processing. Defaults to True.
    • \n
    \n\n
    Notes
    \n\n

    This method does the following:

    \n\n
      \n
    • calculates the noise threshold
    • \n
    • does peak picking (creates mspeak_objs)
    • \n
    • resets the mspeak_obj indexes
    • \n
    \n", "signature": "(self, keep_profile=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.cal_noise_threshold", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.cal_noise_threshold", "kind": "function", "doc": "

    Calculate the noise threshold of the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.parameters", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.parameters", "kind": "variable", "doc": "

    Return the parameters of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.set_parameter_from_json", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.set_parameter_from_json", "kind": "function", "doc": "

    Set the parameters of the mass spectrum from a JSON file.

    \n\n
    Parameters
    \n\n
      \n
    • parameters_path (str):\nThe path to the JSON file containing the parameters.
    • \n
    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.set_parameter_from_toml", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.set_parameter_from_toml", "kind": "function", "doc": "

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mspeaks_settings", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mspeaks_settings", "kind": "variable", "doc": "

    Return the MS peak settings of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.settings", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.settings", "kind": "variable", "doc": "

    Return the settings of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.molecular_search_settings", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.molecular_search_settings", "kind": "variable", "doc": "

    Return the molecular search settings of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_cal_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_cal_profile", "kind": "variable", "doc": "

    Return the calibrated m/z profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_cal", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_cal", "kind": "variable", "doc": "

    Return the calibrated m/z values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_exp", "kind": "variable", "doc": "

    Return the experimental m/z values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.freq_exp_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.freq_exp_profile", "kind": "variable", "doc": "

    Return the experimental frequency profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.freq_exp_pp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.freq_exp_pp", "kind": "variable", "doc": "

    Return the experimental frequency values of the mass spectrum that are used for peak picking.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_exp_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_exp_profile", "kind": "variable", "doc": "

    Return the experimental m/z profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.mz_exp_pp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.mz_exp_pp", "kind": "variable", "doc": "

    Return the experimental m/z values of the mass spectrum that are used for peak picking.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.abundance_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.abundance_profile", "kind": "variable", "doc": "

    Return the abundance profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.abundance_profile_pp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.abundance_profile_pp", "kind": "variable", "doc": "

    Return the abundance profile of the mass spectrum that is used for peak picking.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.abundance", "kind": "variable", "doc": "

    Return the abundance values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.freq_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.freq_exp", "kind": "function", "doc": "

    Return the experimental frequency values of the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.resolving_power", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.resolving_power", "kind": "variable", "doc": "

    Return the resolving power values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.signal_to_noise", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.signal_to_noise", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.nominal_mz", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.nominal_mz", "kind": "variable", "doc": "

    Return the nominal m/z values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.get_mz_and_abundance_peaks_tuples", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.get_mz_and_abundance_peaks_tuples", "kind": "function", "doc": "

    Return a list of tuples containing the m/z and abundance values of the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.kmd", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.kmd", "kind": "variable", "doc": "

    Return the Kendrick mass defect values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.kendrick_mass", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.kendrick_mass", "kind": "variable", "doc": "

    Return the Kendrick mass values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.max_mz_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.max_mz_exp", "kind": "variable", "doc": "

    Return the maximum experimental m/z value of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.min_mz_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.min_mz_exp", "kind": "variable", "doc": "

    Return the minimum experimental m/z value of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.max_abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.max_abundance", "kind": "variable", "doc": "

    Return the maximum abundance value of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.max_signal_to_noise", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.max_signal_to_noise", "kind": "variable", "doc": "

    Return the maximum signal-to-noise ratio of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.most_abundant_mspeak", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.most_abundant_mspeak", "kind": "variable", "doc": "

    Return the most abundant MSpeak object of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.min_abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.min_abundance", "kind": "variable", "doc": "

    Return the minimum abundance value of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.dynamic_range", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.dynamic_range", "kind": "variable", "doc": "

    Return the dynamic range of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.baseline_noise", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.baseline_noise", "kind": "variable", "doc": "

    Return the baseline noise of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.baseline_noise_std", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.baseline_noise_std", "kind": "variable", "doc": "

    Return the standard deviation of the baseline noise of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.Aterm", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.Aterm", "kind": "variable", "doc": "

    Return the A-term calibration coefficient of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.Bterm", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.Bterm", "kind": "variable", "doc": "

    Return the B-term calibration coefficient of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.Cterm", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.Cterm", "kind": "variable", "doc": "

    Return the C-term calibration coefficient of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filename", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filename", "kind": "variable", "doc": "

    Return the filename of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.dir_location", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.dir_location", "kind": "variable", "doc": "

    Return the directory location of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.sort_by_mz", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.sort_by_mz", "kind": "function", "doc": "

    Sort the mass spectrum by m/z values.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.sort_by_abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.sort_by_abundance", "kind": "function", "doc": "

    Sort the mass spectrum by abundance values.

    \n", "signature": "(self, reverse=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.tic", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.tic", "kind": "variable", "doc": "

    Return the total ion current of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.check_mspeaks_warning", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.check_mspeaks_warning", "kind": "function", "doc": "

    Check if the mass spectrum has MSpeaks objects.

    \n\n
    Raises
    \n\n
      \n
    • Warning: If the mass spectrum has no MSpeaks objects.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.check_mspeaks", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.check_mspeaks", "kind": "function", "doc": "

    Check if the mass spectrum has MSpeaks objects.

    \n\n
    Raises
    \n\n
      \n
    • Exception: If the mass spectrum has no MSpeaks objects.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.remove_assignment_by_index", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.remove_assignment_by_index", "kind": "function", "doc": "

    Remove the molecular formula assignment of the MSpeaks objects at the specified indexes.

    \n\n
    Parameters
    \n\n
      \n
    • indexes (list of int):\nA list of indexes of the MSpeaks objects to remove the molecular formula assignment from.
    • \n
    \n", "signature": "(self, indexes):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_index", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_index", "kind": "function", "doc": "

    Filter the mass spectrum by the specified indexes.

    \n\n
    Parameters
    \n\n
      \n
    • list_indexes (list of int):\nA list of indexes of the MSpeaks objects to drop.
    • \n
    \n", "signature": "(self, list_indexes):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_mz", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_mz", "kind": "function", "doc": "

    Filter the mass spectrum by the specified m/z range.

    \n\n
    Parameters
    \n\n
      \n
    • min_mz (float):\nThe minimum m/z value to keep.
    • \n
    • max_mz (float):\nThe maximum m/z value to keep.
    • \n
    \n", "signature": "(self, min_mz, max_mz):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_s2n", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_s2n", "kind": "function", "doc": "

    Filter the mass spectrum by the specified signal-to-noise ratio range.

    \n\n
    Parameters
    \n\n
      \n
    • min_s2n (float):\nThe minimum signal-to-noise ratio to keep.
    • \n
    • max_s2n (float, optional):\nThe maximum signal-to-noise ratio to keep. Defaults to False (no maximum).
    • \n
    \n", "signature": "(self, min_s2n, max_s2n=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_abundance", "kind": "function", "doc": "

    Filter the mass spectrum by the specified abundance range.

    \n\n
    Parameters
    \n\n
      \n
    • min_abund (float):\nThe minimum abundance to keep.
    • \n
    • max_abund (float, optional):\nThe maximum abundance to keep. Defaults to False (no maximum).
    • \n
    \n", "signature": "(self, min_abund, max_abund=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_max_resolving_power", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_max_resolving_power", "kind": "function", "doc": "

    Filter the mass spectrum by the specified maximum resolving power.

    \n\n
    Parameters
    \n\n
      \n
    • B (float):

    • \n
    • T (float):

    • \n
    \n", "signature": "(self, B, T):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_mean_resolving_power", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_mean_resolving_power", "kind": "function", "doc": "

    Filter the mass spectrum by the specified mean resolving power.

    \n\n
    Parameters
    \n\n
      \n
    • ndeviations (float, optional):\nThe number of standard deviations to use for filtering. Defaults to 3.
    • \n
    • plot (bool, optional):\nWhether to plot the resolving power distribution. Defaults to False.
    • \n
    • guess_pars (bool, optional):\nWhether to guess the parameters for the Gaussian model. Defaults to False.
    • \n
    \n", "signature": "(self, ndeviations=3, plot=False, guess_pars=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_min_resolving_power", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_min_resolving_power", "kind": "function", "doc": "

    Filter the mass spectrum by the specified minimum resolving power.

    \n\n
    Parameters
    \n\n
      \n
    • B (float):

    • \n
    • T (float):

    • \n
    \n", "signature": "(self, B, T):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.filter_by_noise_threshold", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.filter_by_noise_threshold", "kind": "function", "doc": "

    Filter the mass spectrum by the noise threshold.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.find_peaks", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.find_peaks", "kind": "function", "doc": "

    Find the peaks of the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.change_kendrick_base_all_mspeaks", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.change_kendrick_base_all_mspeaks", "kind": "function", "doc": "

    Change the Kendrick base of all MSpeaks objects.

    \n\n
    Parameters
    \n\n
      \n
    • kendrick_dict_base (dict):\nA dictionary of the Kendrick base to change to.
    • \n
    \n\n
    Notes
    \n\n

    Example of kendrick_dict_base parameter: kendrick_dict_base = {\"C\": 1, \"H\": 2} or {\"C\": 1, \"H\": 1, \"O\":1} etc

    \n", "signature": "(self, kendrick_dict_base):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.get_nominal_mz_first_last_indexes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.get_nominal_mz_first_last_indexes", "kind": "function", "doc": "

    Return the first and last indexes of the MSpeaks objects with the specified nominal mass.

    \n\n
    Parameters
    \n\n
      \n
    • nominal_mass (int):\nThe nominal mass to get the indexes for.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the first and last indexes of the MSpeaks objects with the specified nominal mass.
    • \n
    \n", "signature": "(self, nominal_mass):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.get_masses_count_by_nominal_mass", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.get_masses_count_by_nominal_mass", "kind": "function", "doc": "

    Return a dictionary of the nominal masses and their counts.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.datapoints_count_by_nominal_mz", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.datapoints_count_by_nominal_mz", "kind": "function", "doc": "

    Return a dictionary of the nominal masses and their counts.

    \n\n
    Parameters
    \n\n
      \n
    • mz_overlay (float, optional):\nThe m/z overlay to use for counting. Defaults to 0.1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary of the nominal masses and their counts.
    • \n
    \n", "signature": "(self, mz_overlay=0.1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.get_nominal_mass_indexes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.get_nominal_mass_indexes", "kind": "function", "doc": "

    Return the indexes of the MSpeaks objects with the specified nominal mass.

    \n\n
    Parameters
    \n\n
      \n
    • nominal_mass (int):\nThe nominal mass to get the indexes for.
    • \n
    • overlay (float, optional):\nThe m/z overlay to use for counting. Defaults to 0.1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • generator: A generator of the indexes of the MSpeaks objects with the specified nominal mass.
    • \n
    \n", "signature": "(self, nominal_mass, overlay=0.1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.plot_centroid", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.plot_centroid", "kind": "function", "doc": "

    Plot the centroid data of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nThe matplotlib axes to plot on. Defaults to None.
    • \n
    • c (str, optional):\nThe color to use for the plot. Defaults to 'g' (green).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes.Axes: The matplotlib axes containing the plot.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If no centroid data is found.
    • \n
    \n", "signature": "(self, ax=None, c='g'):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.plot_profile_and_noise_threshold", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.plot_profile_and_noise_threshold", "kind": "function", "doc": "

    Plot the profile data and noise threshold of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes.Axes, optional):\nThe matplotlib axes to plot on. Defaults to None.
    • \n
    • legend (bool, optional):\nWhether to show the legend. Defaults to False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes.Axes: The matplotlib axes containing the plot.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If no noise threshold is found.
    • \n
    \n", "signature": "(self, ax=None, legend=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.plot_mz_domain_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.plot_mz_domain_profile", "kind": "function", "doc": "

    Plot the m/z domain profile of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • color (str, optional):\nThe color to use for the plot. Defaults to 'green'.
    • \n
    • ax (matplotlib.axes.Axes, optional):\nThe matplotlib axes to plot on. Defaults to None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes.Axes: The matplotlib axes containing the plot.
    • \n
    \n", "signature": "(self, color='green', ax=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_excel", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_excel", "kind": "function", "doc": "

    Export the mass spectrum to an Excel file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe path to the Excel file to export to.
    • \n
    • write_metadata (bool, optional):\nWhether to write the metadata to the Excel file. Defaults to True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None
    • \n
    \n", "signature": "(self, out_file_path, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_hdf", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_hdf", "kind": "function", "doc": "

    Export the mass spectrum to an HDF file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe path to the HDF file to export to.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None
    • \n
    \n", "signature": "(self, out_file_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_csv", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_csv", "kind": "function", "doc": "

    Export the mass spectrum to a CSV file.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe path to the CSV file to export to.
    • \n
    • write_metadata (bool, optional):\nWhether to write the metadata to the CSV file. Defaults to True.
    • \n
    \n", "signature": "(self, out_file_path, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_pandas", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_pandas", "kind": "function", "doc": "

    Export the mass spectrum to a Pandas dataframe with pkl extension.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe path to the CSV file to export to.
    • \n
    • write_metadata (bool, optional):\nWhether to write the metadata to the CSV file. Defaults to True.
    • \n
    \n", "signature": "(self, out_file_path, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_dataframe", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_dataframe", "kind": "function", "doc": "

    Return the mass spectrum as a Pandas dataframe.

    \n\n
    Parameters
    \n\n
      \n
    • additional_columns (list, optional):\nA list of additional columns to include in the dataframe. Defaults to None.\nSuitable columns are: \"Aromaticity Index\", \"Aromaticity Index (modified)\", and \"NOSC\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: The mass spectrum as a Pandas dataframe.
    • \n
    \n", "signature": "(self, additional_columns=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.to_json", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.to_json", "kind": "function", "doc": "

    Return the mass spectrum as a JSON file.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.parameters_json", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.parameters_json", "kind": "function", "doc": "

    Return the parameters of the mass spectrum as a JSON string.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecBase.parameters_toml", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecBase.parameters_toml", "kind": "function", "doc": "

    Return the parameters of the mass spectrum as a TOML string.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecProfile", "kind": "class", "doc": "

    A mass spectrum class when the entry point is on profile format

    \n\n
    Notes
    \n\n

    Stores the profile data and instrument settings.\nIteration over a list of MSPeaks classes stored at the _mspeaks attributes.\n_mspeaks is populated under the hood by calling process_mass_spec method.\nIteration is null if _mspeaks is empty. Many more attributes and methods inherited from MassSpecBase().

    \n\n
    Parameters
    \n\n
      \n
    • data_dict (dict):\nA dictionary containing the profile data.
    • \n
    • d_params : dict{'str' (float, int or str}):\ncontains the instrument settings and processing settings
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Defaults to True.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _abundance (ndarray):\nThe abundance values of the mass spectrum.
    • \n
    • _mz_exp (ndarray):\nThe m/z values of the mass spectrum.
    • \n
    • _mspeaks (list):\nA list of mass peaks.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • process_mass_spec(). Process the mass spectrum.
    • \n
    \n\n

    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecCentroid()

    \n", "bases": "MassSpecBase"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecProfile.__init__", "kind": "function", "doc": "

    \n", "signature": "(data_dict, d_params, auto_process=True)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq", "kind": "class", "doc": "

    A mass spectrum class when data entry is on frequency domain

    \n\n
    Notes
    \n\n
      \n
    • Transform to m/z based on the settings stored at d_params
    • \n
    • Stores the profile data and instrument settings
    • \n
    • Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    • \n
    • _mspeaks is populated under the hood by calling process_mass_spec method
    • \n
    • iteration is null if _mspeaks is empty
    • \n
    \n\n
    Parameters
    \n\n
      \n
    • frequency_domain (list(float)):\nall datapoints in frequency domain in Hz
    • \n
    • magnitude : frequency_domain (list(float)):\nall datapoints in for magnitude of each frequency datapoint
    • \n
    • d_params : dict{'str' (float, int or str}):\ncontains the instrument settings and processing settings
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Defaults to True.
    • \n
    • keep_profile (bool, optional):\nWhether to keep the profile data. Defaults to True.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • has_frequency (bool):\nWhether the mass spectrum has frequency data.
    • \n
    • _frequency_domain (list(float)):\nFrequency domain in Hz
    • \n
    • label (str):\nstore label (Bruker, Midas Transient, see Labels class ). It across distinct processing points
    • \n
    • _abundance (ndarray):\nThe abundance values of the mass spectrum.
    • \n
    • _mz_exp (ndarray):\nThe m/z values of the mass spectrum.
    • \n
    • _mspeaks (list):\nA list of mass peaks.
    • \n
    • See Also (all the attributes of MassSpecBase class):
    • \n
    \n\n
    Methods
    \n\n
      \n
    • _set_mz_domain().\ncalculates the m_z based on the setting of d_params
    • \n
    • process_mass_spec(). Process the mass spectrum.
    • \n
    \n\n

    see also: MassSpecBase(), MassSpecProfile(), MassSpecCentroid()

    \n", "bases": "MassSpecBase"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfrequency_domain,\tmagnitude,\td_params,\tauto_process=True,\tkeep_profile=True)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.has_frequency", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.has_frequency", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.magnetron_frequency", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.magnetron_frequency", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.magnetron_frequency_sigma", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.magnetron_frequency_sigma", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.transient_settings", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.transient_settings", "kind": "variable", "doc": "

    Return the transient settings of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq.calc_magnetron_freq", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecfromFreq.calc_magnetron_freq", "kind": "function", "doc": "

    Calculates the magnetron frequency of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • max_magnetron_freq (float, optional):\nThe maximum magnetron frequency. Defaults to 50.
    • \n
    • magnetron_freq_bins (int, optional):\nThe number of bins to use for the histogram. Defaults to 300.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None
    • \n
    \n\n
    Notes
    \n\n

    Calculates the magnetron frequency by examining all the picked peaks and the distances between them in the frequency domain.\nA histogram of those values below the threshold 'max_magnetron_freq' with the 'magnetron_freq_bins' number of bins is calculated.\nA gaussian model is fit to this histogram - the center value of this (statistically probably) the magnetron frequency.\nThis appears to work well or nOmega datasets, but may not work well for 1x datasets or those with very low magnetron peaks.

    \n", "signature": "(self, max_magnetron_freq=50, magnetron_freq_bins=300):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid", "kind": "class", "doc": "

    A mass spectrum class when the entry point is on centroid format

    \n\n
    Notes
    \n\n
      \n
    • Stores the centroid data and instrument settings
    • \n
    • Simulate profile data based on Gaussian or Lorentzian peak shape
    • \n
    • Iteration over a list of MSPeaks classes stored at the _mspeaks attributes
    • \n
    • _mspeaks is populated under the hood by calling process_mass_spec method
    • \n
    • iteration is null if _mspeaks is empty
    • \n
    \n\n
    Parameters
    \n\n
      \n
    • data_dict : dict {string (numpy array float64 )):\ncontains keys [m/z, Abundance, Resolving Power, S/N]
    • \n
    • d_params : dict{'str' (float, int or str}):\ncontains the instrument settings and processing settings
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Defaults to True.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • label (str):\nstore label (Bruker, Midas Transient, see Labels class)
    • \n
    • _baseline_noise (float):\nstore baseline noise
    • \n
    • _baseline_noise_std (float):\nstore baseline noise std
    • \n
    • _abundance (ndarray):\nThe abundance values of the mass spectrum.
    • \n
    • _mz_exp (ndarray):\nThe m/z values of the mass spectrum.
    • \n
    • _mspeaks (list):\nA list of mass peaks.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • process_mass_spec().\nProcess the mass spectrum. Overriden from MassSpecBase. Populates the _mspeaks list with MSpeaks class using the centroid data.
    • \n
    • __simulate_profile__data__().\nSimulate profile data based on Gaussian or Lorentzian peak shape. Needs theoretical resolving power calculation and define peak shape, intended for plotting and inspection purposes only.
    • \n
    \n\n

    see also: MassSpecBase(), MassSpecfromFreq(), MassSpecProfile()

    \n", "bases": "MassSpecBase"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.__init__", "kind": "function", "doc": "

    \n", "signature": "(data_dict, d_params, auto_process=True)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.is_centroid", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.is_centroid", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.data_dict", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.data_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.mz_exp_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.mz_exp_profile", "kind": "variable", "doc": "

    Return the m/z profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.abundance_profile", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.abundance_profile", "kind": "variable", "doc": "

    Return the abundance profile of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.tic", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.tic", "kind": "variable", "doc": "

    Return the total ion current of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid.process_mass_spec", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroid.process_mass_spec", "kind": "function", "doc": "

    Process the mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes", "kind": "class", "doc": "

    A mass spectrum class when the entry point is on low resolution centroid format

    \n\n
    Notes
    \n\n

    Does not store MSPeak Objs, will iterate over mz, abundance pairs instead

    \n\n
    Parameters
    \n\n
      \n
    • data_dict : dict {string (numpy array float64 )):\ncontains keys [m/z, Abundance, Resolving Power, S/N]
    • \n
    • d_params : dict{'str' (float, int or str}):\ncontains the instrument settings and processing settings
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _processed_tic (float):\nstore processed total ion current
    • \n
    • _abundance (ndarray):\nThe abundance values of the mass spectrum.
    • \n
    • _mz_exp (ndarray):\nThe m/z values of the mass spectrum.
    • \n
    \n", "bases": "MassSpecCentroid"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.__init__", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.__init__", "kind": "function", "doc": "

    \n", "signature": "(data_dict, d_params)"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.mz_exp", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.mz_exp", "kind": "variable", "doc": "

    Return the m/z values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.abundance", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.abundance", "kind": "variable", "doc": "

    Return the abundance values of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.processed_tic", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.processed_tic", "kind": "variable", "doc": "

    Return the processed total ion current of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.tic", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.tic", "kind": "variable", "doc": "

    Return the total ion current of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.mz_abun_tuples", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.mz_abun_tuples", "kind": "variable", "doc": "

    Return the m/z and abundance values of the mass spectrum as a list of tuples.

    \n"}, {"fullname": "corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroidLowRes.mz_abun_dict", "modulename": "corems.mass_spectrum.factory.MassSpectrumClasses", "qualname": "MassSpecCentroidLowRes.mz_abun_dict", "kind": "variable", "doc": "

    Return the m/z and abundance values of the mass spectrum as a dictionary.

    \n"}, {"fullname": "corems.mass_spectrum.input", "modulename": "corems.mass_spectrum.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass", "modulename": "corems.mass_spectrum.input.baseClass", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass", "kind": "class", "doc": "

    The MassListBaseClass object reads mass list data types and returns the mass spectrum obj

    \n\n
    Parameters
    \n\n
      \n
    • file_location (Path or S3Path):\nFull data path.
    • \n
    • isCentroid (bool, optional):\nDetermines the mass spectrum data structure. If set to True, it assumes centroid mode. If set to False, it assumes profile mode and attempts to peak pick. Default is True.
    • \n
    • analyzer (str, optional):\nThe analyzer used for the mass spectrum. Default is 'Unknown'.
    • \n
    • instrument_label (str, optional):\nThe label of the instrument used for the mass spectrum. Default is 'Unknown'.
    • \n
    • sample_name (str, optional):\nThe name of the sample. Default is None.
    • \n
    • header_lines (int, optional):\nThe number of lines to skip in the file, including the column labels line. Default is 0.
    • \n
    • isThermoProfile (bool, optional):\nDetermines the number of expected columns in the file. If set to True, only m/z and intensity columns are expected. Signal-to-noise ratio (S/N) and resolving power (RP) will be calculated based on the data. Default is False.
    • \n
    • headerless (bool, optional):\nIf True, assumes that there are no headers present in the file (e.g., a .xy file from Bruker) and assumes two columns: m/z and intensity. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • parameters (DataInputSetting):\nThe data input settings for the mass spectrum.
    • \n
    • data_type (str):\nThe type of data in the file.
    • \n
    • delimiter (str):\nThe delimiter used to read text-based files.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • set_parameter_from_toml(parameters_path). Sets the data input settings from a TOML file.
    • \n
    • set_parameter_from_json(parameters_path). Sets the data input settings from a JSON file.
    • \n
    • get_dataframe(). Reads the file and returns the data as a pandas DataFrame.
    • \n
    • load_settings(mass_spec_obj, output_parameters). Loads the settings for the mass spectrum.
    • \n
    • get_output_parameters(polarity, scan_index=0). Returns the output parameters for the mass spectrum.
    • \n
    • clean_data_frame(dataframe). Cleans the data frame by removing columns that are not in the expected columns set.
    • \n
    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.__init__", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tfile_location: pathlib.Path | s3path.S3Path,\tisCentroid: bool = True,\tanalyzer: str = 'Unknown',\tinstrument_label: str = 'Unknown',\tsample_name: str = None,\theader_lines: int = 0,\tisThermoProfile: bool = False,\theaderless: bool = False)"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.file_location", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.header_lines", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.header_lines", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.isCentroid", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.isCentroid", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.isThermoProfile", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.isThermoProfile", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.headerless", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.headerless", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.analyzer", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.analyzer", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.instrument_label", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.instrument_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.sample_name", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.sample_name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.parameters", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.parameters", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.set_parameter_from_toml", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.set_parameter_from_toml", "kind": "function", "doc": "

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.set_parameter_from_json", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.set_parameter_from_json", "kind": "function", "doc": "

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.data_type", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.data_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.delimiter", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.delimiter", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.encoding_detector", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.encoding_detector", "kind": "function", "doc": "

    Detects the encoding of a file.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\nThe location of the file to be analyzed.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: The detected encoding of the file.
    • \n
    \n", "signature": "(self, file_location) -> str:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.set_data_type", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.set_data_type", "kind": "function", "doc": "

    Set the data type and delimiter based on the file extension.

    \n\n
    Raises
    \n\n
      \n
    • TypeError: If the data type could not be automatically recognized.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.get_dataframe", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.get_dataframe", "kind": "function", "doc": "

    Get the data as a pandas DataFrame.

    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: The data as a pandas DataFrame.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • TypeError: If the data type is not supported.
    • \n
    \n", "signature": "(self) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.load_settings", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.load_settings", "kind": "function", "doc": "

    TODO loading output parameters from json file is not functional

    \n\n

    Load settings from a JSON file and apply them to the given mass_spec_obj.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec_obj (MassSpec):\nThe mass spectrum object to apply the settings to.
    • \n
    \n", "signature": "(self, mass_spec_obj, output_parameters):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.get_output_parameters", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.get_output_parameters", "kind": "function", "doc": "

    Get the output parameters for the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (int):\nThe polarity of the mass spectrum +1 or -1.
    • \n
    • scan_index (int, optional):\nThe index of the scan. Default is 0.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary containing the output parameters.
    • \n
    \n", "signature": "(self, polarity: int, scan_index: int = 0) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.clean_data_frame", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.clean_data_frame", "kind": "function", "doc": "

    Clean the input dataframe by removing columns that are not expected.

    \n\n
    Parameters
    \n\n
      \n
    • pandas.DataFrame: The input dataframe to be cleaned.
    • \n
    \n", "signature": "(self, dataframe):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.check_columns", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.check_columns", "kind": "function", "doc": "

    Check if the given header labels match the expected columns.

    \n\n
    Parameters
    \n\n
      \n
    • header_labels (list):\nThe header labels to be checked.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If any expected column is not found in the header labels.
    • \n
    \n", "signature": "(self, header_labels: list[str]):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.read_xml_peaks", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.read_xml_peaks", "kind": "function", "doc": "

    Read peaks from a Bruker .xml file and return a pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • data (str):\nThe path to the .xml file.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A DataFrame containing the peak data with columns: 'm/z', 'I', 'Resolving Power', 'Area', 'S/N', 'fwhm'.
    • \n
    \n", "signature": "(self, data: str) -> pandas.core.frame.DataFrame:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.baseClass.MassListBaseClass.get_xml_polarity", "modulename": "corems.mass_spectrum.input.baseClass", "qualname": "MassListBaseClass.get_xml_polarity", "kind": "function", "doc": "

    Get the polarity from an XML peaklist.

    \n\n
    Returns
    \n\n
      \n
    • int: The polarity of the XML peaklist. Returns -1 for negative polarity, +1 for positive polarity.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If the data type is not XML peaklist in Bruker format or if the polarity is unhandled.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5", "modulename": "corems.mass_spectrum.input.boosterHDF5", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum", "kind": "class", "doc": "

    The ReadHDF_BoosterMassSpectrum class parses the mass spectrum data from an HDF file and generate a mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\nThe path to the HDF file.
    • \n
    • isCentroid (bool, optional):\nSpecifies whether the mass spectrum is centroided or not. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • polarity (int):\nThe polarity of the mass spectrum.
    • \n
    • h5pydata (h5py.File):\nThe HDF file object.
    • \n
    • scans (list):\nThe list of scan names in the HDF file.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_data_profile(mz, abundance, auto_process). Returns a MassSpecProfile object from the given m/z and abundance arrays.
    • \n
    • get_attr_data(scan, attr_srt). Returns the attribute value for the given scan and attribute name.
    • \n
    • get_polarity(file_location). Returns the polarity of the mass spectrum.
    • \n
    • get_mass_spectrum(auto_process). Returns the mass spectrum as a MassSpecProfile object.
    • \n
    • get_output_parameters(). Returns the default output parameters for the mass spectrum.
    • \n
    \n", "bases": "corems.mass_spectrum.input.baseClass.MassListBaseClass"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.__init__", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.__init__", "kind": "function", "doc": "

    \n", "signature": "(file_location, isCentroid=False)"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.polarity", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.polarity", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_data_profile", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_data_profile", "kind": "function", "doc": "

    Returns a MassSpecProfile object from the given m/z and abundance arrays.

    \n\n
    Parameters
    \n\n
      \n
    • mz (array_like):\nThe m/z values.
    • \n
    • abundance (array_like):\nThe abundance values.
    • \n
    • auto_process (bool):\nSpecifies whether to automatically process the mass spectrum.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile: The MassSpecProfile object.
    • \n
    \n", "signature": "(\tself,\tmz,\tabundance,\tauto_process) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_attr_data", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_attr_data", "kind": "function", "doc": "

    Returns the attribute value for the given scan and attribute name.

    \n\n
    Parameters
    \n\n
      \n
    • scan (int):\nThe scan index.
    • \n
    • attr_srt (str):\nThe attribute name.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • object: The attribute value.
    • \n
    \n", "signature": "(self, scan, attr_srt):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_polarity", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_polarity", "kind": "function", "doc": "

    Returns the polarity of the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • file_location (str):\nThe path to the HDF file.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The polarity of the mass spectrum.
    • \n
    \n", "signature": "(self, file_location: str | s3path.S3Path) -> int:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_mass_spectrum", "kind": "function", "doc": "

    Returns the mass spectrum as a MassSpecProfile object.

    \n\n
    Parameters
    \n\n
      \n
    • auto_process (bool, optional):\nSpecifies whether to automatically process the mass spectrum. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile: The MassSpecProfile object.
    • \n
    \n", "signature": "(\tself,\tauto_process=True) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecProfile:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.boosterHDF5.ReadHDF_BoosterMassSpectrum.get_output_parameters", "modulename": "corems.mass_spectrum.input.boosterHDF5", "qualname": "ReadHDF_BoosterMassSpectrum.get_output_parameters", "kind": "function", "doc": "

    Returns the default output parameters for the mass spectrum.

    \n\n
    Returns
    \n\n
      \n
    • dict: The default output parameters.
    • \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5", "modulename": "corems.mass_spectrum.input.coremsHDF5", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum", "kind": "class", "doc": "

    Class for reading mass spectrum data from a CoreMS HDF5 file.

    \n\n
    Attributes
    \n\n
      \n
    • h5pydata (h5py.File):\nThe HDF5 file object.
    • \n
    • scans (list):\nList of scan labels in the HDF5 file.
    • \n
    \n\n
    Parameters
    \n\n
      \n
    • file_location (str or S3Path):\nThe path to the CoreMS HDF5 file.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • load_raw_data(mass_spectrum, scan_index=0) Load raw data into the mass spectrum object.
    • \n
    • get_mass_spectrum(scan_number=0, time_index=-1, auto_process=True, load_settings=True, load_raw=True).Get a mass spectrum object.
    • \n
    • load_settings(mass_spectrum, scan_index=0, time_index=-1). Load settings into the mass spectrum object.
    • \n
    • get_dataframe(scan_index=0, time_index=-1). Get a pandas DataFrame representing the mass spectrum.
    • \n
    • get_time_index_to_pull(scan_label, time_index). Get the time index to pull from the HDF5 file.
    • \n
    • get_high_level_attr_data(attr_str). Get high-level attribute data from the HDF5 file.
    • \n
    • get_scan_group_attr_data(scan_index, time_index, attr_group, attr_srt=None). Get scan group attribute data from the HDF5 file.
    • \n
    • get_raw_data_attr_data(scan_index, attr_group, attr_str). Get raw data attribute data from the HDF5 file.
    • \n
    • get_output_parameters(polarity, scan_index=0). Get the output parameters for the mass spectrum.
    • \n
    \n", "bases": "corems.mass_spectrum.input.massList.ReadCoremsMasslist"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.__init__", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.__init__", "kind": "function", "doc": "

    \n", "signature": "(file_location)"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.h5pydata", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.h5pydata", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.scans", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.scans", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.load_raw_data", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.load_raw_data", "kind": "function", "doc": "

    Load raw data into the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpecCentroid):\nThe mass spectrum object to load the raw data into.
    • \n
    • scan_index (int, optional):\nThe index of the scan to load the raw data from. Default is 0.
    • \n
    \n", "signature": "(self, mass_spectrum, scan_index=0):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_mass_spectrum", "kind": "function", "doc": "

    Instantiate a mass spectrum object from the CoreMS HDF5 file.\nNote that this always returns a centroid mass spectrum object; functionality for profile and\nfrequency mass spectra is not yet implemented.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int, optional):\nThe index of the scan to retrieve the mass spectrum from. Default is 0.
    • \n
    • time_index (int, optional):\nThe index of the time point to retrieve the mass spectrum from. Default is -1.
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Default is True.
    • \n
    • load_settings (bool, optional):\nWhether to load the settings into the mass spectrum object. Default is True.
    • \n
    • load_raw (bool, optional):\nWhether to load the raw data into the mass spectrum object. Default is True.
    • \n
    • load_molecular_formula (bool, optional):\nWhether to load the molecular formula into the mass spectrum object.\nDefault is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecCentroid: The mass spectrum object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the CoreMS file is not valid.\nIf the mass spectrum has not been processed and load_molecular_formula is True.
    • \n
    \n", "signature": "(\tself,\tscan_number=0,\ttime_index=-1,\tauto_process=True,\tload_settings=True,\tload_raw=True,\tload_molecular_formula=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.load_settings", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.load_settings", "kind": "function", "doc": "

    Load settings into the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpecCentroid):\nThe mass spectrum object to load the settings into.
    • \n
    • scan_index (int, optional):\nThe index of the scan to load the settings from. Default is 0.
    • \n
    • time_index (int, optional):\nThe index of the time point to load the settings from. Default is -1.
    • \n
    \n", "signature": "(self, mass_spectrum, scan_index=0, time_index=-1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_dataframe", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_dataframe", "kind": "function", "doc": "

    Get a pandas DataFrame representing the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • scan_index (int, optional):\nThe index of the scan to retrieve the DataFrame from. Default is 0.
    • \n
    • time_index (int, optional):\nThe index of the time point to retrieve the DataFrame from. Default is -1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The pandas DataFrame representing the mass spectrum.
    • \n
    \n", "signature": "(self, scan_index=0, time_index=-1):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_time_index_to_pull", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_time_index_to_pull", "kind": "function", "doc": "

    Get the time index to pull from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • scan_label (str):\nThe label of the scan.
    • \n
    • time_index (int):\nThe index of the time point.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: The time index to pull.
    • \n
    \n", "signature": "(self, scan_label, time_index):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_high_level_attr_data", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_high_level_attr_data", "kind": "function", "doc": "

    Get high-level attribute data from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • attr_str (str):\nThe attribute string.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The attribute data.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • KeyError: If the attribute string is not found in the HDF5 file.
    • \n
    \n", "signature": "(self, attr_str):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_scan_group_attr_data", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_scan_group_attr_data", "kind": "function", "doc": "

    Get scan group attribute data from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • scan_index (int):\nThe index of the scan.
    • \n
    • time_index (int):\nThe index of the time point.
    • \n
    • attr_group (str):\nThe attribute group.
    • \n
    • attr_srt (str, optional):\nThe attribute string. Default is None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The attribute data.
    • \n
    \n\n
    Notes
    \n\n

    This method retrieves attribute data from the HDF5 file for a specific scan and time point.\nThe attribute data is stored in the specified attribute group.\nIf an attribute string is provided, only the corresponding attribute value is returned.\nIf no attribute string is provided, all attribute data in the group is returned as a dictionary.

    \n", "signature": "(self, scan_index, time_index, attr_group, attr_srt=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_raw_data_attr_data", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_raw_data_attr_data", "kind": "function", "doc": "

    Get raw data attribute data from the HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • scan_index (int):\nThe index of the scan.
    • \n
    • attr_group (str):\nThe attribute group.
    • \n
    • attr_str (str):\nThe attribute string.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The attribute data.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • KeyError: If the attribute string is not found in the attribute group.
    • \n
    \n\n
    Notes
    \n\n

    This method retrieves the attribute data associated with a specific scan, attribute group, and attribute string\nfrom the HDF5 file. It returns the attribute data as a dictionary.

    \n\n

    Example usage:

    \n\n
    \n
    >>> data = get_raw_data_attr_data(0, "group1", "attribute1")\n>>> print(data)\n{'key1': 'value1', 'key2': 'value2'}\n
    \n
    \n", "signature": "(self, scan_index, attr_group, attr_str):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.coremsHDF5.ReadCoreMSHDF_MassSpectrum.get_output_parameters", "modulename": "corems.mass_spectrum.input.coremsHDF5", "qualname": "ReadCoreMSHDF_MassSpectrum.get_output_parameters", "kind": "function", "doc": "

    Get the output parameters for the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (str):\nThe polarity of the mass spectrum.
    • \n
    • scan_index (int, optional):\nThe index of the scan. Default is 0.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The output parameters.
    • \n
    \n", "signature": "(self, polarity, scan_index=0):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.massList", "modulename": "corems.mass_spectrum.input.massList", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.massList.ReadCoremsMasslist", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadCoremsMasslist", "kind": "class", "doc": "

    The ReadCoremsMasslist object reads processed mass list data types\nand returns the mass spectrum obj with the molecular formula obj

    \n\n

    Only available for centroid mass spectrum type: it will ignore the parameter isCentroid\nPlease see MassListBaseClass for more details

    \n", "bases": "corems.mass_spectrum.input.baseClass.MassListBaseClass"}, {"fullname": "corems.mass_spectrum.input.massList.ReadCoremsMasslist.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadCoremsMasslist.get_mass_spectrum", "kind": "function", "doc": "

    Get the mass spectrum object from the processed mass list data.

    \n\n
    Parameters
    \n\n
      \n
    • loadSettings (bool, optional):\nWhether to load the settings for the mass spectrum. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecCentroid: The mass spectrum object.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the input file is not a valid CoreMS file.
    • \n
    \n", "signature": "(\tself,\tloadSettings: bool = True) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecCentroid:", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.massList.ReadCoremsMasslist.add_molecular_formula", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadCoremsMasslist.add_molecular_formula", "kind": "function", "doc": "

    Add molecular formula information to the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spec_obj (MassSpecCentroid):\nThe mass spectrum object to add the molecular formula to.
    • \n
    • dataframe (pandas.DataFrame):\nThe processed mass list data.
    • \n
    \n", "signature": "(self, mass_spec_obj, dataframe):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.massList.ReadMassList", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadMassList", "kind": "class", "doc": "

    The ReadMassList object reads unprocessed mass list data types\nand returns the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • MassListBaseClass (class):\nThe base class for reading mass list data types.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_mass_spectrum(polarity, scan=0, auto_process=True, loadSettings=True). Reads mass list data types and returns the mass spectrum object.
    • \n
    \n", "bases": "corems.mass_spectrum.input.baseClass.MassListBaseClass"}, {"fullname": "corems.mass_spectrum.input.massList.ReadMassList.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadMassList.get_mass_spectrum", "kind": "function", "doc": "

    Reads mass list data types and returns the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (int):\nThe polarity of the mass spectrum (+1 or -1).
    • \n
    • scan (int, optional):\nThe scan number of the mass spectrum (default is 0).
    • \n
    • auto_process (bool, optional):\nFlag indicating whether to automatically process the mass spectrum (default is True).
    • \n
    • loadSettings (bool, optional):\nFlag indicating whether to load settings for the mass spectrum (default is True).
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mass_spec (MassSpecCentroid or MassSpecProfile):\nThe mass spectrum object.
    • \n
    \n", "signature": "(\tself,\tpolarity: int,\tscan: int = 0,\tauto_process: bool = True,\tloadSettings: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.massList.ReadBrukerXMLList", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadBrukerXMLList", "kind": "class", "doc": "

    The ReadBrukerXMLList object reads Bruker XML objects\nand returns the mass spectrum object.\nSee MassListBaseClass for details

    \n\n
    Parameters
    \n\n
      \n
    • MassListBaseClass (class):\nThe base class for reading mass list data types and returning the mass spectrum object.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_mass_spectrum(polarity: bool = None, scan: int = 0, auto_process: bool = True, loadSettings: bool = True). Reads mass list data types and returns the mass spectrum object.
    • \n
    \n", "bases": "corems.mass_spectrum.input.baseClass.MassListBaseClass"}, {"fullname": "corems.mass_spectrum.input.massList.ReadBrukerXMLList.get_mass_spectrum", "modulename": "corems.mass_spectrum.input.massList", "qualname": "ReadBrukerXMLList.get_mass_spectrum", "kind": "function", "doc": "

    Reads mass list data types and returns the mass spectrum object.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (bool, optional):\nThe polarity of the mass spectrum. Can be +1 or -1. If not provided, it will be determined from the XML file.
    • \n
    • scan (int, optional):\nThe scan number of the mass spectrum. Default is 0.
    • \n
    • auto_process (bool, optional):\nWhether to automatically process the mass spectrum. Default is True.
    • \n
    • loadSettings (bool, optional):\nWhether to load the settings for the mass spectrum. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mass_spec (MassSpecCentroid):\nThe mass spectrum object representing the centroided mass spectrum.
    • \n
    \n", "signature": "(\tself,\tpolarity: bool = None,\tscan: int = 0,\tauto_process: bool = True,\tloadSettings: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.numpyArray", "modulename": "corems.mass_spectrum.input.numpyArray", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.input.numpyArray.ms_from_array_profile", "modulename": "corems.mass_spectrum.input.numpyArray", "qualname": "ms_from_array_profile", "kind": "function", "doc": "

    Create a MassSpecProfile object from an array of m/z values and abundance values.

    \n\n
    Parameters
    \n\n
      \n
    • mz (numpy.ndarray):\nArray of m/z values.
    • \n
    • abundance (numpy.ndarray):\nArray of abundance values.
    • \n
    • dataname (str):\nName of the data.
    • \n
    • polarity (int, optional):\nPolarity of the data. The default is -1.
    • \n
    • auto_process (bool, optional):\nFlag to automatically process the data. The default is True.
    • \n
    • data_type (str, optional):\nType of the data. The default is Labels.simulated_profile.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecProfile: The created MassSpecProfile object.
    • \n
    \n", "signature": "(\tmz,\tabundance,\tdataname: str,\tpolarity: int = -1,\tauto_process: bool = True,\tdata_type: str = 'Simulated Profile'):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.numpyArray.ms_from_array_centroid", "modulename": "corems.mass_spectrum.input.numpyArray", "qualname": "ms_from_array_centroid", "kind": "function", "doc": "

    Create a MassSpecCentroid object from an array of m/z values, abundance values, resolution power, and signal-to-noise ratio.

    \n\n
    Parameters
    \n\n
      \n
    • mz (numpy.ndarray):\nArray of m/z values.
    • \n
    • abundance (numpy.ndarray):\nArray of abundance values.
    • \n
    • rp (list(float)):\nList of resolving power values.
    • \n
    • s2n (list(float)):\nList of signal-to-noise ratio values.
    • \n
    • dataname (str):\nName of the data.
    • \n
    • polarity (int, optional):\nPolarity of the data. The default is -1.
    • \n
    • auto_process (bool, optional):
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecCentroid: The created MassSpecCentroid object.
    • \n
    \n", "signature": "(\tmz,\tabundance,\trp: list[float],\ts2n: list[float],\tdataname: str,\tpolarity: int = -1,\tauto_process: bool = True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.input.numpyArray.get_output_parameters", "modulename": "corems.mass_spectrum.input.numpyArray", "qualname": "get_output_parameters", "kind": "function", "doc": "

    Generate the output parameters for creating a MassSpecProfile or MassSpecCentroid object.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (int):\nPolarity of the data.
    • \n
    • file_location (str):\nFile location.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: Output parameters.
    • \n
    \n", "signature": "(polarity: int, file_location: str):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output", "modulename": "corems.mass_spectrum.output", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export", "modulename": "corems.mass_spectrum.output.export", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport", "kind": "class", "doc": "

    A class for exporting high-resolution mass spectra.

    \n\n
    Parameters
    \n\n
      \n
    • out_file_path (str):\nThe output file path.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    • output_type (str, optional):\nThe type of output file. Defaults to 'excel'. Can be 'excel', 'csv', 'pandas' or 'hdf5'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • output_file (Path):\nThe output file path.
    • \n
    • output_type (str):\nThe type of output file.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    • atoms_order_list (list):\nThe list of assigned atoms in the order specified by Atoms.atoms_order list.
    • \n
    • columns_label (list):\nThe column labels in order.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • save().\nSave the mass spectrum data to the output file.
    • \n
    • run().\nRun the export process.
    • \n
    • get_pandas_df().\nReturns the mass spectrum data as a pandas DataFrame.
    • \n
    • write_settings(output_path, mass_spectrum).\nWrites the settings of the mass spectrum to a JSON file.
    • \n
    • to_pandas(write_metadata=True).\nExports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.
    • \n
    • to_excel(write_metadata=True).\nExports the mass spectrum data to an Excel file.
    • \n
    • to_csv(write_metadata=True).\nExports the mass spectrum data to a CSV file.
    • \n
    • to_json().\nExports the mass spectrum data to a JSON string.
    • \n
    • to_hdf().\nExports the mass spectrum data to an HDF5 file.
    • \n
    • parameters_to_toml().\nConverts the mass spectrum parameters to a TOML string.
    • \n
    • parameters_to_json().\nConverts the mass spectrum parameters to a JSON string.
    • \n
    • get_mass_spec_attrs(mass_spectrum).\nReturns the mass spectrum attributes as a dictionary.
    • \n
    • get_all_used_atoms_in_order(mass_spectrum).\nReturns the list of assigned atoms in the order specified by Atoms.atoms_order list.
    • \n
    • list_dict_to_list(mass_spectrum, is_hdf5=False).\nReturns the mass spectrum data as a list of dictionaries.
    • \n
    • get_list_dict_data(mass_spectrum, include_no_match=True, include_isotopologues=True, isotopologue_inline=True, no_match_inline=False, is_hdf5=False).\nReturns the mass spectrum data as a list of dictionaries.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.__init__", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(out_file_path, mass_spectrum, output_type='excel')"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.output_file", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.output_file", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.output_type", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.output_type", "kind": "variable", "doc": "

    Returns the output type of the mass spectrum.

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.mass_spectrum", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.mass_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.atoms_order_list", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.atoms_order_list", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.save", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.save", "kind": "function", "doc": "

    Save the mass spectrum data to the output file.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If the output type is not supported.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.run", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.run", "kind": "function", "doc": "

    Run the export process.

    \n\n

    This method is called when the thread starts.\nIt calls the save method to perform the export.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.get_pandas_df", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.get_pandas_df", "kind": "function", "doc": "

    Returns the mass spectrum data as a pandas DataFrame.

    \n\n
    Parameters
    \n\n
      \n
    • additional_columns (list, optional):\nAdditional columns to include in the DataFrame. Defaults to None.\nSuitable additional columns are: 'Aromaticity Index', 'NOSC', 'Aromaticity Index (modified)'.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: The mass spectrum data as a pandas DataFrame.
    • \n
    \n", "signature": "(self, additional_columns=None):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.write_settings", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.write_settings", "kind": "function", "doc": "

    Writes the settings of the mass spectrum to a JSON file.

    \n\n
    Parameters
    \n\n
      \n
    • output_path (str):\nThe output file path.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    \n", "signature": "(self, output_path, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_pandas", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_pandas", "kind": "function", "doc": "

    Exports the mass spectrum data to a pandas DataFrame and saves it as a pickle file.

    \n\n
    Parameters
    \n\n
      \n
    • write_metadata (bool, optional):\nWhether to write the metadata to a JSON file. Defaults to True.
    • \n
    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_excel", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_excel", "kind": "function", "doc": "

    Exports the mass spectrum data to an Excel file.

    \n\n
    Parameters
    \n\n
      \n
    • write_metadata (bool, optional):\nWhether to write the metadata to a JSON file. Defaults to True.
    • \n
    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_csv", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_csv", "kind": "function", "doc": "

    Exports the mass spectrum data to a CSV file.

    \n\n
    Parameters
    \n\n
      \n
    • write_metadata (bool, optional):\nWhether to write the metadata to a JSON file. Defaults to True.
    • \n
    \n", "signature": "(self, write_metadata=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_json", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_json", "kind": "function", "doc": "

    Exports the mass spectrum data to a JSON string.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.add_mass_spectrum_to_hdf5", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.add_mass_spectrum_to_hdf5", "kind": "function", "doc": "

    Adds the mass spectrum data to an HDF5 file.

    \n\n
    Parameters
    \n\n
      \n
    • hdf_handle (h5py.File):\nThe HDF5 file handle.
    • \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to add to the HDF5 file.
    • \n
    • group_key (str):\nThe group key (where to add the mass spectrum data within the HDF5 file).
    • \n
    • mass_spectra_group (h5py.Group, optional):\nThe mass spectra group. Defaults to None (no group, mass spectrum is added to the root).
    • \n
    • export_raw (bool, optional):\nWhether to export the raw data. Defaults to True.\nIf False, only the processed data (peaks) is exported (essentially centroided data).
    • \n
    \n", "signature": "(\tself,\thdf_handle,\tmass_spectrum,\tgroup_key,\tmass_spectra_group=None,\texport_raw=True):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.to_hdf", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.to_hdf", "kind": "function", "doc": "

    Exports the mass spectrum data to an HDF5 file.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.parameters_to_toml", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.parameters_to_toml", "kind": "function", "doc": "

    Converts the mass spectrum parameters to a TOML string.

    \n\n
    Returns
    \n\n
      \n
    • str: The TOML string of the mass spectrum parameters.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.parameters_to_json", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.parameters_to_json", "kind": "function", "doc": "

    Converts the mass spectrum parameters to a JSON string.

    \n\n
    Returns
    \n\n
      \n
    • str: The JSON string of the mass spectrum parameters.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.get_mass_spec_attrs", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.get_mass_spec_attrs", "kind": "function", "doc": "

    Returns the mass spectrum attributes as a dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The mass spectrum attributes.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.get_all_used_atoms_in_order", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.get_all_used_atoms_in_order", "kind": "function", "doc": "

    Returns the list of assigned atoms in the order specified by Atoms.atoms_order list.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: The list of assigned atoms in the order specified by Atoms.atoms_order list.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.list_dict_to_list", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.list_dict_to_list", "kind": "function", "doc": "

    Returns the mass spectrum data as a list of dictionaries.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    • is_hdf5 (bool, optional):\nWhether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: The mass spectrum data as a list of dictionaries.
    • \n
    \n", "signature": "(self, mass_spectrum, is_hdf5=False):", "funcdef": "def"}, {"fullname": "corems.mass_spectrum.output.export.HighResMassSpecExport.get_list_dict_data", "modulename": "corems.mass_spectrum.output.export", "qualname": "HighResMassSpecExport.get_list_dict_data", "kind": "function", "doc": "

    Returns the mass spectrum data as a list of dictionaries.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum to export.
    • \n
    • include_no_match (bool, optional):\nWhether to include unassigned (no match) data. Defaults to True.
    • \n
    • include_isotopologues (bool, optional):\nWhether to include isotopologues. Defaults to True.
    • \n
    • isotopologue_inline (bool, optional):\nWhether to include isotopologues inline. Defaults to True.
    • \n
    • no_match_inline (bool, optional):\nWhether to include unassigned (no match) data inline. Defaults to False.
    • \n
    • is_hdf5 (bool, optional):\nWhether the mass spectrum is being exported to an HDF5 file. Defaults to False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: The mass spectrum data as a list of dictionaries.
    • \n
    \n", "signature": "(\tself,\tmass_spectrum,\tinclude_no_match=True,\tinclude_isotopologues=True,\tisotopologue_inline=True,\tno_match_inline=False,\tis_hdf5=False,\tadditional_columns=None):", "funcdef": "def"}, {"fullname": "corems.molecular_formula", "modulename": "corems.molecular_formula", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.calc", "modulename": "corems.molecular_formula.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.isospec_version", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "isospec_version", "kind": "variable", "doc": "

    \n", "default_value": "'2.2.2'"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "MolecularFormulaCalc", "kind": "class", "doc": "

    Class of calculations related to molecular formula

    \n\n

    This class is not intended to be used directly, but rather to be inherited by other classes in the molecular_formula/factory module like MolecularFormula, MolecularFormulaIsotopologue, and LCMSLibRefMolecularFormula

    \n\n
    Attributes
    \n\n
      \n
    • mz_calc (float):\nThe m/z value of the molecular formula.
    • \n
    • neutral_mass (float):\nThe neutral mass of the molecular formula.
    • \n
    • ion_charge (int):\nThe ion charge of the molecular formula.
    • \n
    • _external_mz (float):\nThe externally provided m/z value of the molecular formula.
    • \n
    • _d_molecular_formula (dict):\nThe dictionary representation of the molecular formula.
    • \n
    • _mspeak_parent (object):\nThe parent MS peak object associated with the molecular formula.
    • \n
    • _assignment_mass_error (float):\nThe mass error of the molecular formula.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • _calc_resolving_power_low_pressure(B, T)\nCalculate the resolving power at low pressure.
    • \n
    • _calc_resolving_power_high_pressure(B, T)\nCalculate the resolving power at high pressure.
    • \n
    • _adduct_mz(adduct_atom, ion_charge)\nGet the m/z value of an adducted ion version of the molecular formula.
    • \n
    • _protonated_mz(ion_charge)\nGet the m/z value of a protonated or deprotonated ion version of the molecular formula.
    • \n
    • _radical_mz(ion_charge)\nGet the m/z value of a radical ion version of the molecular formula.
    • \n
    • _neutral_mass()\nGet the neutral mass of the molecular formula.
    • \n
    • _calc_mz()\nGet the m/z value of the molecular formula.
    • \n
    • _calc_assignment_mass_error(method='ppm')\nCalculate the mass error of the molecular formula.
    • \n
    • _calc_mz_confidence(mean=0)\nCalculate the m/z confidence of the molecular formula.
    • \n
    • _calc_isotopologue_confidence()\nCalculate the isotopologue confidence of the molecular formula.
    • \n
    • normalize_distance(dist, dist_range)\nNormalize the distance value.
    • \n
    • subtract_formula(formula_obj, formated=True)\nSubtract a formula from the current formula object.
    • \n
    • _calc_average_mz_score()\nCalculate the average m/z error score of the molecular formula identification, including the isotopologues.
    • \n
    \n"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc.normalize_distance", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "MolecularFormulaCalc.normalize_distance", "kind": "function", "doc": "

    Normalize the distance value.

    \n\n
    Parameters
    \n\n
      \n
    • dist (float):\nThe distance value to be normalized.
    • \n
    • dist_range (list):\nThe range of the distance value.
    • \n
    \n", "signature": "(self, dist, dist_range):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc.subtract_formula", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "MolecularFormulaCalc.subtract_formula", "kind": "function", "doc": "

    Subtract a formula from the current formula object

    \n\n
    Parameters
    \n\n
      \n
    • formula_obj (MolecularFormula):\nMolecularFormula object to be subtracted from the current formula object
    • \n
    • formated (bool, optional):\nIf True, returns the formula in string format, by default True
    • \n
    \n", "signature": "(self, formula_obj, formated=True):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc.dbe_ai", "modulename": "corems.molecular_formula.calc.MolecularFormulaCalc", "qualname": "MolecularFormulaCalc.dbe_ai", "kind": "variable", "doc": "

    Calculate the double bond equivalent (DBE) of the molecular formula, based on the number of carbons, hydrogens, and oxygens.

    \n"}, {"fullname": "corems.molecular_formula.factory", "modulename": "corems.molecular_formula.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase", "kind": "class", "doc": "

    Base class for representing a molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_formula (dict, list, str):\nThe molecular formula.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • ion_type (str, optional):\nThe ion type. Defaults to None.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom. Defaults to None.
    • \n
    • mspeak_parent (_MSPeak, optional):\nThe parent mass spectrum peak object instance. Defaults to None.
    • \n
    • external_mz (float, optional):\nThe external m/z value. Defaults to None.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • TypeError: If the ion type is not 'DE_OR_PROTONATED', 'RADICAL' or 'ADDUCT'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • isotopologue_count_percentile (float):\nThe isotopologue count percentile.
    • \n
    • O_C (float):\nThe O/C ratio.
    • \n
    • H_C (float):\nThe H/C ratio.
    • \n
    • dbe (float):\nThe double bond equivalent.
    • \n
    • mz_nominal_calc (int):\nThe nominal m/z value.
    • \n
    • mz_error (float):\nThe m/z error.
    • \n
    • mz_calc (float):\nThe m/z value.
    • \n
    • protonated_mz (float):\nThe protonated or deprotonated m/z value.
    • \n
    • radical_mz (float):\nThe radical m/z value.
    • \n
    • neutral_mass (float):\nThe neutral mass.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • atoms (list):\nThe atoms in the molecular formula.
    • \n
    • confidence_score (float):\nThe confidence score of the molecular formula identification.
    • \n
    • isotopologue_similarity (float):\nThe isotopologue similarity score of the molecular formula identification.
    • \n
    • average_mz_error_score (float):\nThe average m/z error score of the molecular formula identification, including the isotopologues.
    • \n
    • mz_error_score (float):\nThe m/z error score of the molecular formula identification.
    • \n
    • kmd (float):\nThe Kendrick mass defect (KMD).
    • \n
    • kendrick_mass (float):\nThe Kendrick mass.
    • \n
    • knm (float):\nThe nominal Kendrick mass.
    • \n
    • string (str):\nThe molecular formula string.
    • \n
    • string_formated (str):\nThe molecular formula string formated with subscripts and superscripts.
    • \n
    • class_label (str):\nThe class label.
    • \n
    • class_dict (dict):\nThe class dictionary.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • change_kendrick_base(kendrick_dict_base).\nChange the Kendrick base.
    • \n
    • isotopologues(min_abundance, current_mono_abundance, dynamic_range).\nCalculate the isotopologues.
    • \n
    • atoms_qnt(atom).\nGet the atom quantity.
    • \n
    • atoms_symbol(atom).\nGet the atom symbol without the mass number.
    • \n
    • to_dict().\nGet the molecular formula as a dictionary.
    • \n
    • to_list().\nGet the molecular formula as a list.
    • \n
    \n", "bases": "corems.molecular_formula.calc.MolecularFormulaCalc.MolecularFormulaCalc"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.__init__", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmolecular_formula,\tion_charge,\tion_type=None,\tadduct_atom=None,\tmspeak_parent=None,\texternal_mz=None)"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.is_isotopologue", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.is_isotopologue", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.expected_isotopologues", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.expected_isotopologues", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mspeak_mf_isotopologues_indexes", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mspeak_mf_isotopologues_indexes", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.get", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.get", "kind": "function", "doc": "

    Get the atom quantity of a specific atom.

    \n\n
    Parameters
    \n\n
      \n
    • atom (str):\nThe atom symbol.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The atom quantity.
    • \n
    \n", "signature": "(self, atom):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.split", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.split", "kind": "function", "doc": "

    Splits the molecular formula string.

    \n\n
    Parameters
    \n\n
      \n
    • delimiters (list):\nThe list of delimiters.
    • \n
    • string (str):\nThe molecular formula string.
    • \n
    • maxsplit (int, optional):\nThe maximum number of splits. Defaults to 0.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: The molecular formula list.
    • \n
    \n\n
    Notes
    \n\n

    Does not work when formula has atoms with same characters in a row that below to different atoms, i.e. C10H21NNa.

    \n", "signature": "(self, delimiters, string, maxsplit=0):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.isotopologue_count_percentile", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.isotopologue_count_percentile", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.O_C", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.O_C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.H_C", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.H_C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.A_I", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.A_I", "kind": "variable", "doc": "

    Aromaticity index

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.A_I_mod", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.A_I_mod", "kind": "variable", "doc": "

    Modified aromaticity index

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.nosc", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.nosc", "kind": "variable", "doc": "

    Nominal oxidation state of carbon

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.dbe", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.dbe", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mz_nominal_calc", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mz_nominal_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mz_error", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mz_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mz_calc", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mz_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.protonated_mz", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.protonated_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.radical_mz", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.radical_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.neutral_mass", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.neutral_mass", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.adduct_mz", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.adduct_mz", "kind": "function", "doc": "

    Get m/z of an adducted ion version of the molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • adduct_atom (str):\nThe adduct atom.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The m/z value of the adducted ion version of the molecular formula.
    • \n
    \n", "signature": "(self, adduct_atom):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.ion_type", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.ion_type", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.ion_charge", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.ion_charge", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.atoms", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.atoms", "kind": "variable", "doc": "

    Get the atoms in the molecular formula.

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.confidence_score", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.confidence_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.isotopologue_similarity", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.isotopologue_similarity", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.average_mz_error_score", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.average_mz_error_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.mz_error_score", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.mz_error_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.kmd", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.kmd", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.kendrick_mass", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.kendrick_mass", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.knm", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.knm", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.change_kendrick_base", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.change_kendrick_base", "kind": "function", "doc": "

    Change the Kendrick base.

    \n\n
    Parameters
    \n\n
      \n
    • kendrick_dict_base (dict):\nThe Kendrick base dictionary. Ex: {\"C\": 1, \"H\": 2}
    • \n
    \n", "signature": "(self, kendrick_dict_base):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.isotopologues", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.isotopologues", "kind": "function", "doc": "

    Calculate the isotopologues for a given molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • min_abundance (float):\nThe minimum abundance.
    • \n
    • current_mono_abundance (float):\nThe current monoisotopic abundance.
    • \n
    • dynamic_range (float):\nThe dynamic range.
    • \n
    \n\n
    Yields
    \n\n
      \n
    • MolecularFormulaIsotopologue: The molecular formula isotopologue.
    • \n
    \n\n
    Notes
    \n\n

    This calculation ignores the hydrogen isotopes.

    \n", "signature": "(self, min_abundance, current_mono_abundance, dynamic_range):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.atoms_qnt", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.atoms_qnt", "kind": "function", "doc": "

    Get the atom quantity of a specific atom in the molecular formula.

    \n", "signature": "(self, atom):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.atoms_symbol", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.atoms_symbol", "kind": "function", "doc": "

    Get the atom symbol without the mass number.

    \n", "signature": "(self, atom):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.string", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.string", "kind": "variable", "doc": "

    Returns the molecular formula as a string.

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.string_formated", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.string_formated", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.to_dict", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.to_dict", "kind": "function", "doc": "

    Returns the molecular formula as a dictionary.

    \n\n
    Returns
    \n\n
      \n
    • dict: The molecular formula as a dictionary.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.to_list", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.to_list", "kind": "function", "doc": "

    Returns the molecular formula as a list.

    \n\n
    Returns
    \n\n
      \n
    • list: The molecular formula as a list.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If the molecular formula identification was not performed yet.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.class_label", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.class_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaBase.class_dict", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaBase.class_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue", "kind": "class", "doc": "

    Class for representing a molecular formula isotopologue.

    \n\n
    Parameters
    \n\n
      \n
    • _d_molecular_formula (dict):\nThe molecular formula as a dictionary.
    • \n
    • prob_ratio (float):\nThe probability ratio.
    • \n
    • mono_abundance (float):\nThe monoisotopic abundance.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • mspeak_parent (object, optional):\nThe parent mass spectrum peak object instance. Defaults to None.
    • \n
    • ion_type (str, optional):\nThe ion type. Defaults to None.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom. Defaults to None.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • prob_ratio (float):\nThe probability ratio.
    • \n
    • abundance_calc (float):\nThe calculated abundance.
    • \n
    • area_error (float):\nThe area error.
    • \n
    • abundance_error (float):\nThe abundance error.
    • \n
    • is_isotopologue (bool):\nThe isotopologue flag. Defaults to True.
    • \n
    • mspeak_index_mono_isotopic (int):\nThe index of the monoisotopic peak in the mass spectrum peak list. Defaults to None.
    • \n
    • mono_isotopic_formula_index (int):\nThe index of the monoisotopic formula in the molecular formula list. Defaults to None.
    • \n
    \n", "bases": "MolecularFormulaBase"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.__init__", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.__init__", "kind": "function", "doc": "

    \n", "signature": "(\t_d_molecular_formula,\tprob_ratio,\tmono_abundance,\tion_charge,\tmspeak_parent=None,\tion_type=None,\tadduct_atom=None)"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.prob_ratio", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.prob_ratio", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.abundance_calc", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.abundance_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.is_isotopologue", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.is_isotopologue", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.mspeak_index_mono_isotopic", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.mspeak_index_mono_isotopic", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.mono_isotopic_formula_index", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.mono_isotopic_formula_index", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.area_error", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.area_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormulaIsotopologue.abundance_error", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormulaIsotopologue.abundance_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula", "kind": "class", "doc": "

    Class for representing a molecular formula associated with a molecule in a LCMS library reference.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_formula (dict, list, str):\nThe molecular formula.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • ion_type (str, optional):\nThe ion type. Defaults to None.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom. Defaults to None.
    • \n
    • mspeak_parent (object, optional):\nThe parent mass spectrum peak object instance. Defaults to None.
    • \n
    • name (str, optional):\nThe name of the reference molecule. Defaults to None.
    • \n
    • kegg_id (str, optional):\nThe KEGG ID of the reference molecule. Defaults to None.
    • \n
    • cas (str, optional):\nThe CAS number of the reference molecule. Defaults to None.
    • \n
    \n", "bases": "MolecularFormulaBase"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula.__init__", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmolecular_formula,\tion_charge,\tion_type=None,\tadduct_atom=None,\tmspeak_parent=None,\tname=None,\tkegg_id=None,\tcas=None)"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula.name", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula.kegg_id", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula.kegg_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula.cas", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "LCMSLibRefMolecularFormula.cas", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormula", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormula", "kind": "class", "doc": "

    General class for representing a molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_formula (dict, list, str):\nThe molecular formula.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • ion_type (str, optional):\nThe ion type. Defaults to None.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom. Defaults to None.
    • \n
    • mspeak_parent (object, optional):\nThe parent mass spectrum peak object instance. Defaults to None.
    • \n
    • external_mz (float, optional):\nThe external m/z value. Defaults to False.
    • \n
    \n", "bases": "MolecularFormulaBase"}, {"fullname": "corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormula.__init__", "modulename": "corems.molecular_formula.factory.MolecularFormulaFactory", "qualname": "MolecularFormula.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmolecular_formula,\tion_charge,\tion_type=None,\tadduct_atom=None,\tmspeak_parent=None,\texternal_mz=False)"}, {"fullname": "corems.molecular_formula.input", "modulename": "corems.molecular_formula.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref", "modulename": "corems.molecular_formula.input.masslist_ref", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy", "kind": "class", "doc": "

    Proxy class for MolecularFormulaLink to be used in the molecular formula ref file import

    \n\n
    Parameters
    \n\n
      \n
    • molecular_formula (MolecularFormula | LCMSLibRefMolecularFormula):\ncorems MolecularFormula or LCMSLibRefMolecularFormula object
    • \n
    • mz (float):\ntarget m/z
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • C (int):\nnumber of carbon atoms
    • \n
    • H (int):\nnumber of hydrogen atoms
    • \n
    • H_C (float):\nratio of hydrogen to carbon atoms
    • \n
    • class_label (str):\nmolecular formula class label
    • \n
    • mz_calc (float):\ncalculated m/z
    • \n
    • dbe (int):\ndouble bond equivalent
    • \n
    • formula_dict (dict):\nmolecular formula dictionary
    • \n
    \n\n
    Methods
    \n\n
      \n
    • to_dict().\nreturn molecular formula dictionary
    • \n
    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.__init__", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.__init__", "kind": "function", "doc": "

    \n", "signature": "(molecular_formula, mz)"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.C", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.H", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.H", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.H_C", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.H_C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.class_label", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.class_label", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.mz_calc", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.mz_calc", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.dbe", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.dbe", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.formula_dict", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.formula_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.MolecularFormulaLinkProxy.to_dict", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "MolecularFormulaLinkProxy.to_dict", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef", "kind": "class", "doc": "

    Import Mass List from Reference File

    \n\n
    Parameters
    \n\n
      \n
    • ref_file_location (str):\npath to the reference file
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • ref_file_location (str):\npath to the reference file
    • \n
    \n\n
    Methods
    \n\n
      \n
    • molecular_formula_ref(mz, molecular_formula).\nReturn MolecularFormulaLinkProxy object
    • \n
    • from_lcms_lib_file(ion_charge, ion_types).\nReturn Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file
    • \n
    • from_bruker_ref_file().\nReturn List[MolecularFormula] from Bruker reference file
    • \n
    • from_corems_ref_file(delimiter).\nReturn List[MolecularFormula] from CoreMS reference file
    • \n
    • split(delimiters, string, maxsplit).\nSplits a string using a list of delimiters.
    • \n
    • mformula_s_to_dict(s_mformulatring, iontype).\nConverts a molecular formula string to a dict
    • \n
    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.__init__", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.__init__", "kind": "function", "doc": "

    \n", "signature": "(ref_file_location)"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.ref_file_location", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.ref_file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.molecular_formula_ref", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.molecular_formula_ref", "kind": "function", "doc": "

    Instantiate a MolecularFormulaLinkProxy object

    \n\n
    Parameters
    \n\n
      \n
    • mz (float):\ntarget m/z
    • \n
    • molecular_formula (MolecularFormula | LCMSLibRefMolecularFormula):\ncorems MolecularFormula or LCMSLibRefMolecularFormula object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MolecularFormulaLinkProxy: MolecularFormulaLinkProxy object
    • \n
    \n", "signature": "(self, mz, molecular_formula):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.from_lcms_lib_file", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.from_lcms_lib_file", "kind": "function", "doc": "

    Create a dictionary of LCMSLibRefMolecularFormula objects from LCMS library reference file

    \n\n
    Parameters
    \n\n
      \n
    • ion_charge (float):\nion charge
    • \n
    • ion_types (List[str]):\nlist of ion types
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Dict: Dict[standard_name, Dict[m/z, List[MolecularFormula]]] from LCMS library reference file. m/z is the target m/z; standard_name is the name of the molecular standard mix; MolecularFormula is the corems molecular formula class
    • \n
    \n", "signature": "(\tself,\tion_charge: float,\tion_types: List[str]) -> Dict[str, Dict[float, List[corems.molecular_formula.factory.MolecularFormulaFactory.LCMSLibRefMolecularFormula]]]:", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.from_bruker_ref_file", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.from_bruker_ref_file", "kind": "function", "doc": "

    Create a list of MolecularFormula objects from Bruker reference file

    \n\n
    Returns
    \n\n
      \n
    • List[MolecularFormula]: List of MolecularFormula objects from Bruker reference file
    • \n
    \n", "signature": "(\tself) -> List[corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormula]:", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.from_corems_ref_file", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.from_corems_ref_file", "kind": "function", "doc": "

    Create a list of MolecularFormula objects from CoreMS reference file

    \n\n

    Not being used

    \n\n
    Parameters
    \n\n
      \n
    • delimiter (str):\ndelimiter used in the reference file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • List[MolecularFormula]: List of MolecularFormula objects from CoreMS reference file
    • \n
    \n", "signature": "(self, delimiter='\\t'):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.split", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.split", "kind": "function", "doc": "

    Splits a string using a list of delimiters.

    \n\n

    Does not work when formula has atoms with same characters, i.e - C10H21NNa

    \n\n
    Parameters
    \n\n
      \n
    • delimiters (list):\nlist of delimiters
    • \n
    • string (str):\nstring to be split
    • \n
    • maxsplit (int, optional):\nmaximum number of splits. Default is 0
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: list of strings obtained after splitting the string
    • \n
    • list: list of counts obtained after splitting the string
    • \n
    \n", "signature": "(self, delimiters, string, maxsplit=0):", "funcdef": "def"}, {"fullname": "corems.molecular_formula.input.masslist_ref.ImportMassListRef.mformula_s_to_dict", "modulename": "corems.molecular_formula.input.masslist_ref", "qualname": "ImportMassListRef.mformula_s_to_dict", "kind": "function", "doc": "

    Converts a molecular formula string to a dict

    \n\n
    Parameters
    \n\n
      \n
    • s_mformulatring (str):\nmolecular formula string, i.e. 'C10H21NNa'
    • \n
    • iontype (str, optional):\nion type. Default is 'unknown'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: molecular formula dictionary
    • \n
    \n\n
    Notes
    \n\n

    Does not work if the atomic mass number is passed i.e. 37Cl, 81Br, convention follow the light isotope labeling 35Cl is Cl, 12C is C, etc.\nIf you need to use heavy isotopes please use another reference file format that separate the formula string by a blank space and parse it using the function corems_ref_file

    \n\n
    Raises
    \n\n
      \n
    • TypeError: Atom does not exist in Atoms.atoms_order list
    • \n
    • Exception: Empty molecular formula
    • \n
    \n", "signature": "(self, s_mformulatring, iontype='unknown'):", "funcdef": "def"}, {"fullname": "corems.molecular_id", "modulename": "corems.molecular_id", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc", "modulename": "corems.molecular_id.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.ClusterFilter", "modulename": "corems.molecular_id.calc.ClusterFilter", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter", "kind": "class", "doc": "

    Class for filtering and clustering mass spectra data using various algorithms.

    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nMass spectrum object.
    • \n
    • ms_peaks (list):\nList of mass peaks.
    • \n
    • ms_peak_indexes (list):\nList of peak indexes.
    • \n
    • min_samples (int):\nMinimum number of samples in a cluster.
    • \n
    • eps (float):\nThe maximum distance between two samples for one to be considered as in the neighborhood of the other.
    • \n
    • bandwidth (float):\nBandwidth used in MeanShift algorithm.
    • \n
    • quantile (float):\nQuantile used in estimate_bandwidth function.
    • \n
    • n_samples (int):\nNumber of samples used in estimate_bandwidth function.
    • \n
    • bin_seeding (bool):\nIf true, initial kernel locations are not locations of all points, but rather the location of the discretized version of points, where points are binned onto a grid whose coarseness corresponds to the bandwidth. Setting this option to True will speed up the algorithm because fewer seeds will be initialized.
    • \n
    • min_peaks_per_class (int):\nMinimum number of peaks per class.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_mass_error_matrix_data(ms_peaks).\nGet the mass error matrix data from a list of mass peaks.
    • \n
    • get_kendrick_matrix_data(mass_spectrum).\nGet the Kendrick matrix data from a mass spectrum.
    • \n
    • filter_kendrick(mass_spectrum).\nFilter the mass spectrum data using the Kendrick algorithm.
    • \n
    • filter_kendrick_by_index(ms_peak_indexes, mass_spectrum_obj).\nFilter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.
    • \n
    • remove_assignment_by_mass_error(mass_spectrum).\nRemove assignments from the mass spectrum based on mass error.
    • \n
    \n"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.get_mass_error_matrix_data", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.get_mass_error_matrix_data", "kind": "function", "doc": "

    Get the mass error matrix data from a list of mass peaks.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peaks (list):\nList of mass peaks.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matrix_data (ndarray):\nMatrix data containing mass and error values.
    • \n
    • list_indexes_mass_spec (list):\nList of indexes of mass peaks in the original mass spectrum.
    • \n
    \n", "signature": "(self, ms_peaks):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.get_kendrick_matrix_data", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.get_kendrick_matrix_data", "kind": "function", "doc": "

    Get the Kendrick matrix data from a mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nMass spectrum object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matrix_data (ndarray):\nMatrix data containing Kendrick mass and Kendrick mass defect values.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.filter_kendrick", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.filter_kendrick", "kind": "function", "doc": "

    Filter the mass spectrum data using the Kendrick algorithm.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nMass spectrum object.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.filter_kendrick_by_index", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.filter_kendrick_by_index", "kind": "function", "doc": "

    Filter the mass spectrum data using the Kendrick algorithm based on a list of peak indexes.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peak_indexes (list):\nList of peak indexes.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nMass spectrum object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • noise_idx (list):\nList of indexes of noise points in the mass spectrum.
    • \n
    \n", "signature": "(self, ms_peak_indexes, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.ClusterFilter.ClusteringFilter.remove_assignment_by_mass_error", "modulename": "corems.molecular_id.calc.ClusterFilter", "qualname": "ClusteringFilter.remove_assignment_by_mass_error", "kind": "function", "doc": "

    Remove assignments from the mass spectrum based on mass error.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nMass spectrum object.
    • \n
    \n", "signature": "(self, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.MolecularFilter", "modulename": "corems.molecular_id.calc.MolecularFilter", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.MolecularFilter.MolecularFormulaSearchFilters", "modulename": "corems.molecular_id.calc.MolecularFilter", "qualname": "MolecularFormulaSearchFilters", "kind": "class", "doc": "

    Class containing static methods for filtering molecular formulas in a mass spectrum.

    \n\n
    Methods
    \n\n
      \n
    • filter_kendrick(ms_peak_indexes, mass_spectrum_obj).\nApply Kendrick filter to the mass spectrum.
    • \n
    • check_min_peaks(ms_peak_indexes, mass_spectrum_obj).\nCheck if the number of peaks per class meets the minimum requirement.
    • \n
    • filter_isotopologue(ms_peak_indexes, mass_spectrum_obj).\nApply isotopologue filter to the mass spectrum.
    • \n
    \n"}, {"fullname": "corems.molecular_id.calc.MolecularFilter.MolecularFormulaSearchFilters.filter_kendrick", "modulename": "corems.molecular_id.calc.MolecularFilter", "qualname": "MolecularFormulaSearchFilters.filter_kendrick", "kind": "function", "doc": "

    Apply Kendrick filter to the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • filtered_ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects after applying the Kendrick filter.
    • \n
    \n", "signature": "(ms_peak_indexes, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.MolecularFilter.MolecularFormulaSearchFilters.check_min_peaks", "modulename": "corems.molecular_id.calc.MolecularFilter", "qualname": "MolecularFormulaSearchFilters.check_min_peaks", "kind": "function", "doc": "

    Check if the number of peaks per class meets the minimum requirement.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n", "signature": "(ms_peak_indexes, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.MolecularFilter.MolecularFormulaSearchFilters.filter_isotopologue", "modulename": "corems.molecular_id.calc.MolecularFilter", "qualname": "MolecularFormulaSearchFilters.filter_isotopologue", "kind": "function", "doc": "

    Apply isotopologue filter to the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • filtered_ms_peak_indexes (list):\nList of peak indexes and their associated molecular formula objects after applying the isotopologue filter.
    • \n
    \n", "signature": "(ms_peak_indexes, mass_spectrum):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.methods_name", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "methods_name", "kind": "variable", "doc": "

    \n", "default_value": "{'chebyshev_distance': 'Chebyshev Distance', 'squared_euclidean_distance': 'Squared Euclidean Distance', 'fidelity_similarity': 'Fidelity Similarity', 'matusita_distance': 'Matusita Distance', 'squared_chord_distance': 'Squared-chord Distance', 'harmonic_mean_similarity': 'Harmonic mean Distance', 'Pearson_chi_squared_distance': 'Pearson Chi Squared Distance', 'Neyman_chi_squared_distance': 'Neyman Chi Squared Distance', 'probabilistic_symmetric_chi_squared_distance': 'Probabilistic symmetric X2 Distance', 'topsoe_distance': 'Topsoe Distance', 'chernoff_distance': 'Chernoff Distance', 'ruzicka_distance': 'Ruzicka Distance', 'roberts_distance': 'Roberts Distance', 'motyka_distance': 'Motyka Distance', 'canberra_distance': 'Canberra Distance', 'canberra_metric': 'Canberra Metric', 'kulczynski_1_distance': 'Kulczynski 1 Distance', 'lorentzian_distance': 'Lorentzian Distance', 'clark_distance': 'Clark Distance', 'hellinger_distance': 'Hellinger Distance', 'whittaker_index_of_association_distance': 'Whittaker index of association Distance', 'spectral_contrast_angle_distance': 'Spectral Contrast Angle', 'wave_hedges_distance': 'Wave Hedges Distance', 'dice_similarity': 'Dice Similarity', 'inner_product_distance': 'Inner Product Distance', 'divergence_distance': 'Divergence Distance', 'jensen_difference_distance': 'Jensen Differences Distance', 'kumar_johnson_distance': 'Kumar Johnson Distance', 'avg_l_distance': 'Avg (L1, L8) Distance', 'vicis_wave_hadges_distance': 'Vicis Wave Hadges Distance', 'vicis_symmetric_chi_squared_1_distance': 'Vicis-Symmetric X2 1 Distance', 'vicis_symmetric_chi_squared_2_distance': 'Vicis-Symmetric X2 2 Distance', 'vicis_symmetric_chi_squared_3_distance': 'Vicis-Symmetric X2 3 Distance', 'max_symmetric_chi_squared_distance': 'Max Symmetric Chi Squared Distance', 'min_symmetric_chi_squared_distance': 'Min Symmetric Chi Squared Distance', 'additive_sym_chi_sq': 'Additive Symmetric Chi Squared', 'bhattacharya_distance': 'Battacharya Distance', 'generalized_ochiai_index': 'Generalized Ochiai Index', 'gower_distance': 'Gower Distance', 'impr_sqrt_cosine_sim': 'Improved Square Root Cosine Similarity', 'intersection_sim': 'Intersection Similarity', 'j_divergence': 'J Divergence', 'jensen_shannon_index': 'Jensen Shannon Index', 'k_divergence': 'K Divergence', 'VW6': 'VW6', 'VW5': 'VW5', 'VW4': 'VW4', 'VW3': 'VW3', 'VW2': 'VW2', 'VW1': 'VW1', 'taneja_divergence': 'Taneja Divergence', 'symmetric_chi_squared_distance': 'Symmetric Chi Squared Distance', 'squared_chi_squared_distance': 'Squared Chi Squared Distance', 'square_root_cosine_correlation': 'Square Root Cosine Correlation', 'sorensen_distance': 'Sorensen Distance', 'Minokowski_3': 'Minokowski 3 Distance', 'Minokowski_4': 'Minokowski 4 Distance', 'kumarjohnson_divergence': 'Kumar Johnson Divergence', 'kumarhassebrook_similarity': 'Kumar Hassebrook Similarity', 'kullbackleibler_divergence': 'Kullback Leibler Divergence', 'soergel_distance': 'Soergel Distance'}"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.methods_scale", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "methods_scale", "kind": "variable", "doc": "

    \n", "default_value": "{'entropy': [0, 1.3862943611198906], 'weighted_entropy': [0, 1.3862943611198906], 'absolute_value': [0, 2], 'avg_l': [0, 1.5], 'bhattacharya_1': [0, 2.4674011002723395], 'bhattacharya_2': [0, inf], 'canberra': [0, inf], 'clark': [0, inf], 'divergence': [0, inf], 'euclidean': [0, 1.4142135623730951], 'hellinger': [0, inf], 'improved_similarity': [0, inf], 'lorentzian': [0, inf], 'manhattan': [0, 2], 'matusita': [0, 1.4142135623730951], 'mean_character': [0, 2], 'motyka': [-0.5, 0], 'ms_for_id': [-inf, 0], 'ms_for_id_v1': [0, inf], 'pearson_correlation': [-1, 1], 'penrose_shape': [0, 1.4142135623730951], 'penrose_size': [0, inf], 'probabilistic_symmetric_chi_squared': [0, 1], 'similarity_index': [0, inf], 'squared_chord': [0, 2], 'squared_euclidean': [0, 2], 'symmetric_chi_squared': [0, 0.7071067811865476], 'topsoe': [0, 1.4142135623730951], 'vicis_symmetric_chi_squared_3': [0, 2], 'wave_hedges': [0, inf], 'whittaker_index_of_association': [0, inf]}"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity", "kind": "class", "doc": "

    Class containing methods for calculating spectral similarity between two mass spectra.

    \n\n
    Parameters
    \n\n
      \n
    • ms_mz_abun_dict (dict):\nDictionary of mass to abundance values for the experimental mass spectrum.
    • \n
    • ref_obj (dict):\nDictionary of mass to abundance values for the reference mass spectrum.
    • \n
    • norm_func (function):\nFunction to normalize the abundance values.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • normalize_func (function):\nFunction to normalize the abundance values.
    • \n
    • ms_mz_abun_dict (dict):\nDictionary of mass to abundance values for the experimental mass spectrum.
    • \n
    • ref_obj (dict):\nDictionary of mass to abundance values for the reference mass spectrum.
    • \n
    • exp_abun (list):\nList of abundance values for the experimental mass spectrum.
    • \n
    • exp_mz (list):\nList of mass values for the experimental mass spectrum.
    • \n
    • ref_mz (list):\nList of mass values for the reference mass spectrum.
    • \n
    • ref_abun (list):\nList of abundance values for the reference mass spectrum.
    • \n
    • ref_mz_abun_dict (dict):\nDictionary of mass to abundance values for the reference mass spectrum.
    • \n
    • df (DataFrame):\nDataFrame containing the experimental and reference mass spectrum data.
    • \n
    • zero_filled_u_l (tuple):\nTuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    • \n
    • common_mz_values (list):\nList of common mass values between the experimental and reference mass spectra.
    • \n
    • n_x_y (int):\nNumber of common mass values between the experimental and reference mass spectra.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • nan_fill(df, fill_with=0).\nFill missing mass values with a given value.
    • \n
    • normalize(x, y, norm_func=sum).\nNormalize the abundance values.
    • \n
    • weighted_cosine_correlation(a=0.5, b=1.3, nanfill=1e-10).\nCalculate the weighted cosine correlation between the experimental and reference mass spectra.
    • \n
    • cosine_correlation().\nCalculate the cosine correlation between the experimental and reference mass spectra.
    • \n
    • stein_scott().\nCalculate the Stein-Scott similarity between the experimental and reference mass spectra.
    • \n
    • pearson_correlation().\nCalculate the Pearson correlation between the experimental and reference mass spectra.
    • \n
    • spearman_correlation().\nCalculate the Spearman correlation between the experimental and reference mass spectra.
    • \n
    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.__init__", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.__init__", "kind": "function", "doc": "

    \n", "signature": "(ms_mz_abun_dict, ref_obj, norm_func=<built-in function sum>)"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.normalize_func", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.normalize_func", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ms_mz_abun_dict", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ms_mz_abun_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ref_obj", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ref_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.exp_abun", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.exp_abun", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.exp_mz", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.exp_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ref_mz", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ref_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ref_abun", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ref_abun", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.ref_mz_abun_dict", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.ref_mz_abun_dict", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.df", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.df", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.zero_filled_u_l", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.zero_filled_u_l", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.common_mz_values", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.common_mz_values", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.n_x_y", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.n_x_y", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.nan_fill", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.nan_fill", "kind": "function", "doc": "

    Fill missing mass values with a given value.

    \n\n
    Parameters
    \n\n
      \n
    • df (DataFrame):\nDataFrame containing the experimental and reference mass spectrum data.
    • \n
    • fill_with (float):\nValue to fill missing mass values with.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • x (list):\nList of abundance values for the experimental mass spectrum.
    • \n
    • y (list):\nList of abundance values for the reference mass spectrum.
    • \n
    \n", "signature": "(self, df, fill_with=0):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.normalize", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.normalize", "kind": "function", "doc": "

    Normalize the abundance values.

    \n\n
    Parameters
    \n\n
      \n
    • x (list):\nList of abundance values for the experimental mass spectrum.
    • \n
    • y (list):\nList of abundance values for the reference mass spectrum.
    • \n
    • norm_func (function):\nFunction to normalize the abundance values.\nDefault is sum
    • \n
    \n\n
    Returns
    \n\n
      \n
    • u_l (tuple):\nTuple containing the experimental and reference mass spectrum data after zero filling and normalization.
    • \n
    \n", "signature": "(self, x, y, norm_func=<built-in function sum>):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.weighted_cosine_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.weighted_cosine_correlation", "kind": "function", "doc": "

    Calculate the weighted cosine correlation between the experimental and reference mass spectra.

    \n\n
    Parameters
    \n\n
      \n
    • a (float):\nWeighting factor for the abundance values.\nDefault is 0.5
    • \n
    • b (float):\nWeighting factor for the mass values.\nDefault is 1.3
    • \n
    • nanfill (float):\nValue to fill missing mass values with.\nDefault is 1e-10
    • \n
    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nWeighted cosine correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self, a=0.5, b=1.3, nanfill=1e-10):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.cosine_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.cosine_correlation", "kind": "function", "doc": "

    Calculate the cosine correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nCosine correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.stein_scott", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.stein_scott", "kind": "function", "doc": "

    Calculate the Stein-Scott similarity between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • s_ss_x_y (float):\nStein-Scott similarity between the experimental and reference mass spectra.
    • \n
    • s_ss_x_y_nist (float):\nStein-Scott similarity between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.pearson_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.pearson_correlation", "kind": "function", "doc": "

    Calculate the Pearson correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nPearson correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.spearman_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.spearman_correlation", "kind": "function", "doc": "

    Calculate the Spearman correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • coorelation (float):\nSpearman correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.kendall_tau", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.kendall_tau", "kind": "function", "doc": "

    Calculate the Kendall's tau correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nKendall's tau correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.dft_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.dft_correlation", "kind": "function", "doc": "

    Calculate the DFT correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nDFT correlation between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.dwt_correlation", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.dwt_correlation", "kind": "function", "doc": "

    Calculate the DWT correlation between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nDWT correlation between the experimental and reference mass spectra.
    • \n
    \n\n
    Notes
    \n\n

    This function requires the PyWavelets library to be installed.\n This is not a default requirement as this function is not widely used.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.euclidean_distance", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.euclidean_distance", "kind": "function", "doc": "

    Calculate the Euclidean distance between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nEuclidean distance between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.manhattan_distance", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.manhattan_distance", "kind": "function", "doc": "

    Calculate the Manhattan distance between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nManhattan distance between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.jaccard_distance", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.jaccard_distance", "kind": "function", "doc": "

    Calculate the Jaccard distance between the experimental and reference mass spectra.

    \n\n
    Returns
    \n\n
      \n
    • correlation (float):\nJaccard distance between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.SpectralSimilarity.SpectralSimilarity.extra_distances", "modulename": "corems.molecular_id.calc.SpectralSimilarity", "qualname": "SpectralSimilarity.extra_distances", "kind": "function", "doc": "

    Function to calculate distances using additional metrics defined in math_distance.py

    \n\n

    Currently, calculates all distances.

    \n\n
    Returns
    \n\n
      \n
    • dict_res (dict):\nDictionary containing the distances between the experimental and reference mass spectra.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance", "modulename": "corems.molecular_id.calc.math_distance", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.calc.math_distance.entropy_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "entropy_distance", "kind": "function", "doc": "

    Calculate entropy distance between two vectors

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Entropy distance between v and y
    • \n
    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.weighted_entropy_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "weighted_entropy_distance", "kind": "function", "doc": "

    Calculate weighted entropy distance between two vectors

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Weighted entropy distance between v and y
    • \n
    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.chebyshev_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "chebyshev_distance", "kind": "function", "doc": "

    Chebyshev distance

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Chebyshev distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\underset{i}{\\max}{(|v_{i}\\ -\\ y_{i}|)}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.squared_euclidean_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "squared_euclidean_distance", "kind": "function", "doc": "

    Squared Euclidean distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Squared Euclidean distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum(v_{i}-y_{i})^2$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.fidelity_similarity", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "fidelity_similarity", "kind": "function", "doc": "

    Fidelity similarity:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Fidelity similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\sqrt{v_{i}y_{i}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.matusita_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "matusita_distance", "kind": "function", "doc": "

    Matusita distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Matusita distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt{\\sum(\\sqrt{v_{i}}-\\sqrt{y_{i}})^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.squared_chord_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "squared_chord_distance", "kind": "function", "doc": "

    Squared-chord distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Squared-chord distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum(\\sqrt{v_{i}}-\\sqrt{y_{i}})^2$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.bhattacharya_1_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "bhattacharya_1_distance", "kind": "function", "doc": "

    Bhattacharya 1 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Bhattacharya 1 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$(\\arccos{(\\sum\\sqrt{v_{i}y_{i}})})^2$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.bhattacharya_2_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "bhattacharya_2_distance", "kind": "function", "doc": "

    Bhattacharya 2 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Bhattacharya 2 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$-\\ln{(\\sum\\sqrt{v_{i}y_{i}})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.harmonic_mean_similarity", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "harmonic_mean_similarity", "kind": "function", "doc": "

    Harmonic mean similarity:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Harmonic mean similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$#1-2\\sum(\\frac{v_{i}y_{i}}{v_{i}+y_{i}})\n2\\sum(\\frac{v_{i}y_{i}}{v_{i}+y_{i}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.chernoff_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "chernoff_distance", "kind": "function", "doc": "

    Chernoff distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Chernoff distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\max{(-ln\\sum(v_{i}^ty_{i}^{1-t})^{1-t})},\\ t=0.1,\\ 0\\le\\ t<1$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.ruzicka_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "ruzicka_distance", "kind": "function", "doc": "

    Ruzicka distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Ruzicka distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{\\sum{|v_{i}-y_{i}|}}{\\sum{\\max(v_{i},y_{i})}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.roberts_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "roberts_distance", "kind": "function", "doc": "

    Roberts distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Roberts distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1-\\sum\\frac{(v_{i}+y_{i})\\frac{\\min{(v_{i},y_{i})}}{\\max{(v_{i},y_{i})}}}{\\sum(v_{i}+y_{i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.intersection_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "intersection_distance", "kind": "function", "doc": "

    Intersection distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Intersection distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1-\\frac{\\sum\\min{(v_{i},y_{i})}}{\\min(\\sum{v_{i},\\sum{y_{i})}}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.motyka_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "motyka_distance", "kind": "function", "doc": "

    Motyka distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Motyka distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$-\\frac{\\sum\\min{(y_{i},v_{i})}}{\\sum(y_{i}+v_{i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.canberra_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "canberra_distance", "kind": "function", "doc": "

    Canberra distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Canberra distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$#\\sum\\frac{|v_{i}-y_{i}|}{|v_{i}|+|y_{i}|}\n\\sum_{i}\\frac{|y_{i} - v_{i}|}{y_{i} + v_{i}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.canberra_metric", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "canberra_metric", "kind": "function", "doc": "

    Canberra Metric

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Canberra metric between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{\\sum_{i}I(v_{i}\\neq 0)}\\sum_{i}\\frac{|y_{i}-v_{i}|}{(y_{i}+v_{i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kulczynski_1_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kulczynski_1_distance", "kind": "function", "doc": "

    Kulczynski 1 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Kulczynski 1 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{\\sum{|v_i}-y_i|}{\\sum m\\ i\\ n\\ (v_i,y_i)}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.baroni_urbani_buser_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "baroni_urbani_buser_distance", "kind": "function", "doc": "

    Baroni-Urbani-Buser distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Baroni-Urbani-Buser distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1-\\frac{\\sum\\min{(v_i,y_i)}+\\sqrt{\\sum\\min{(v_i,y_i)}\\sum(\\max{(v)}-\\max{(v_i,y_i)})}}{\\sum{\\max{(v_i,y_i)}+\\sqrt{\\sum{\\min{(v_i,y_i)}\\sum(\\max{(v)}-\\max{(v_i,y_i)})}}}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.penrose_size_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "penrose_size_distance", "kind": "function", "doc": "

    Penrose size distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Penrose size distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt N\\sum{|y_i-v_i|}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.mean_character_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "mean_character_distance", "kind": "function", "doc": "

    Mean character distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Mean character distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{N}\\sum{|y_i-v_i|}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.lorentzian_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "lorentzian_distance", "kind": "function", "doc": "

    Lorentzian distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Lorentzian distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum{\\ln(1+|v_i-y_i|)}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.penrose_shape_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "penrose_shape_distance", "kind": "function", "doc": "

    Penrose shape distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Penrose shape distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt{\\sum((v_i-\\bar{v})-(y_i-\\bar{y}))^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.clark_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "clark_distance", "kind": "function", "doc": "

    Clark distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Clark distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$#(\\frac{1}{N}\\sum(\\frac{v_i-y_i}{|v_i|+|y_i|})^2)^\\frac{1}{2}\n\\sqrt{\\sum(\\frac{|v_i-y_i|}{v_i+y_i})^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.hellinger_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "hellinger_distance", "kind": "function", "doc": "

    Hellinger distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Hellinger distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$#\\sqrt{2\\sum(\\sqrt{\\frac{v_i}{\\bar{v}}}-\\sqrt{\\frac{y_i}{\\bar{y}}})^2}\n\\sqrt{2\\sum(\\sqrt{v_i}-\\sqrt{y_i})^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.whittaker_index_of_association_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "whittaker_index_of_association_distance", "kind": "function", "doc": "

    Whittaker index of association distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Whittaker index of association distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{2}\\sum|\\frac{v_i}{\\bar{v}}-\\frac{y_i}{\\bar{y}}|$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.similarity_index_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "similarity_index_distance", "kind": "function", "doc": "

    Similarity Index Distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Similarity Index Distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt{\\frac{\\sum{\\frac{v_i-y_i}{y_i}}^2}{N}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.improved_similarity_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "improved_similarity_distance", "kind": "function", "doc": "

    Improved Similarity Index:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Improved Similarity Index between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sqrt{\\frac{1}{N}\\sum{\\frac{y_i-v_i}{y_i+v_i}}^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.absolute_value_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "absolute_value_distance", "kind": "function", "doc": "

    Absolute Value Distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Absolute Value Distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac { \\sum(|y_i-v_i|)}{\\sum v_i}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.spectral_contrast_angle_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "spectral_contrast_angle_distance", "kind": "function", "doc": "

    Spectral Contrast Angle:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Spectral Contrast Angle between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1 - \\frac{\\sum{y_iv_i}}{\\sqrt{\\sum y_i^2\\sum v_i^2}}\n\\arccos(\\frac{\\sum_{P}y_{p}^* v_{p}^}{\\sqrt{\\sum_{P}y_{p}^{2} \\sum_{P}v_{p}^{*2}}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.wave_hedges_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "wave_hedges_distance", "kind": "function", "doc": "

    Wave Hedges distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Wave Hedges distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{|v_i-y_i|}{\\max{(v_i,y_i)}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.dice_similarity", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "dice_similarity", "kind": "function", "doc": "

    Dice similarity:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Dice similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{\\sum(v_i-y_i)^2}{\\sum v_i^2+\\sum y_i^2}\n\\frac{2 * \\sum_{i}v_{i}y_{i}}{\\sum_{i}y_{i}^2 + \\sum_{i}v_{i}^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.inner_product_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "inner_product_distance", "kind": "function", "doc": "

    Inner Product distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Inner product distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1-\\sum{v_iy_i}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.divergence_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "divergence_distance", "kind": "function", "doc": "

    Divergence distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Divergence distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$2\\sum\\frac{(v_i-y_i)^2}{(v_i+y_i)^2}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.jensen_difference_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "jensen_difference_distance", "kind": "function", "doc": "

    Jensen difference:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Jensen difference distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum[\\frac{1}{2}(v_i\\ln{v_i}+y_i\\ln{y_i})-(\\frac{v_i+y_i}{2})\\ln{(\\frac{v_i+y_i}{2})}]$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kumar_johnson_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kumar_johnson_distance", "kind": "function", "doc": "

    Kumar-Johnson distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Kumar Johnson distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{(v_i^2-y_i^2)^2}{2(v_iy_i)^\\frac{3}{2}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.avg_l_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "avg_l_distance", "kind": "function", "doc": "

    Avg (L1, L\u221e) distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Average L distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{2}(\\sum|v_i-y_i|+\\underset{i}{\\max}{|v_i-y_i|})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.vicis_wave_hadges_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "vicis_wave_hadges_distance", "kind": "function", "doc": "

    Vicis-Wave Hadges distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Vicis Wave Hadges distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{|v_i-y_i|}{\\min{(v_i,\\ y_i)}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.vicis_symmetric_chi_squared_1_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "vicis_symmetric_chi_squared_1_distance", "kind": "function", "doc": "

    Vicis-Symmetric \u03c72 1 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Vici Symmetric \u03c72 1 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{(v_i-y_i)^2}{\\min{(v_i,y_i)^2}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.vicis_symmetric_chi_squared_2_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "vicis_symmetric_chi_squared_2_distance", "kind": "function", "doc": "

    Vicis-Symmetric \u03c72 2 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Vicis Symmetric \u03c72 2 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{(v_i-y_i)^2}{\\min{(v_i,y_i)}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.vicis_symmetric_chi_squared_3_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "vicis_symmetric_chi_squared_3_distance", "kind": "function", "doc": "

    Vicis-Symmetric \u03c72 3 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Vici Symmetric \u03c72 3 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum\\frac{(v_i-y_i)^2}{\\max{(v_i,y_i)}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.max_symmetric_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "max_symmetric_chi_squared_distance", "kind": "function", "doc": "

    Max-Symmetric \u03c72 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Max-Symmetric \u03c72 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\max{(\\sum\\frac{(v_i-y_i)^2}{v_i},\\sum\\frac{(v_i-y_i)^2}{y_i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.min_symmetric_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "min_symmetric_chi_squared_distance", "kind": "function", "doc": "

    Min-Symmetric \u03c72 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Min-Symmetric \u03c72 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\min{(\\sum\\frac{(v_i-y_i)^2}{v_i},\\sum\\frac{(v_i-y_i)^2}{y_i})}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.additive_sym_chi_sq", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "additive_sym_chi_sq", "kind": "function", "doc": "

    Additive Symmetric \u03c72 distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Additive Symmetric \u03c72 distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum_{i}\\frac{(y_{i} - v_{i})^2(y_{i}+v_{i})}{y_{i}v_{i}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.bhattacharya_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "bhattacharya_distance", "kind": "function", "doc": "

    Bhattacharya Distance:

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Bhattcharya distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$-ln(\\sum_{i}\\sqrt{y_{i}v_{i}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.generalized_ochiai_index", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "generalized_ochiai_index", "kind": "function", "doc": "

    Generalized Ochiai Index

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Generalized Ochiai Index between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$1 - \\frac{\\sum_{i}min(y_{i}, v_{i})}{\\sqrt{\\sum_{i}y_{i} \\sum_{i}v_{i}}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.gower_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "gower_distance", "kind": "function", "doc": "

    Gower Distance

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Gower distance between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{N}\\sum_{i}|y_{i} - v_{i}|$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.impr_sqrt_cosine_sim", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "impr_sqrt_cosine_sim", "kind": "function", "doc": "

    Improved Square Root Cosine Similarity

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Improved Square Root Cosine Similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{\\sum_{i}\\sqrt{y_{i}v_{i}}}{\\sum_{i}\\sqrt{y_{i}}\\sum_{i}\\sqrt{v_{i}}}$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.intersection_sim", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "intersection_sim", "kind": "function", "doc": "

    Intersection Similarity

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Intersection Similarity between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum_{i}min(y_{i}, v_{i})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.j_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "j_divergence", "kind": "function", "doc": "

    J Divergence

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: J Divergence between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum_{i}(y_{i} - v_{i}) ln(\\frac{y_{i}}{v_{i}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.jensen_shannon_index", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "jensen_shannon_index", "kind": "function", "doc": "

    Jensen-Shannon Index

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Jensen Shannon Index between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\frac{1}{2}[\\sum_{i}y_{i}ln(\\frac{2y_{i}}{y_{i} + v_{i}}) + \\sum_{i}v_{i}ln(\\frac{2v_{i}}{y_{i}+v_{i}})]$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.k_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "k_divergence", "kind": "function", "doc": "

    K-Divergence

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: K-Divergence between v and y
    • \n
    \n\n
    Notes
    \n\n

    $$\\sum_{i}y_{i}ln(\\frac{2y_{i}}{y_{i} + v_{i}})$$

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.topsoe_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "topsoe_distance", "kind": "function", "doc": "

    Topsoe distance

    \n\n
    Parameters
    \n\n
      \n
    • v (array_like):\nVector 1
    • \n
    • y (array_like):\nVector 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Topsoe distance between v and y
    • \n
    • Notes
    • \n
    • -----
    • \n
    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.probabilistic_symmetric_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "probabilistic_symmetric_chi_squared_distance", "kind": "function", "doc": "

    Fixed\n\"I commented out the previous one; please review\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW6", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW6", "kind": "function", "doc": "

    \"appears to be the same as max_symmetric_chi_squared_distance\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW5", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW5", "kind": "function", "doc": "

    \"appears to be the same as max_symmetric_chi_squared_distance\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW4", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW4", "kind": "function", "doc": "

    \"Tecnically the Symmetric chi2 eq63\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW3", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW3", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW2", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW2", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.VW1", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "VW1", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.taneja_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "taneja_divergence", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.symmetric_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "symmetric_chi_squared_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.squared_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "squared_chi_squared_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.square_root_cosine_correlation", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "square_root_cosine_correlation", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.sorensen_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "sorensen_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.Pearson_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "Pearson_chi_squared_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.Neyman_chi_squared_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "Neyman_chi_squared_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.Minokowski_3", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "Minokowski_3", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.Minokowski_4", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "Minokowski_4", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kumarjohnson_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kumarjohnson_divergence", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kumarhassebrook_similarity", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kumarhassebrook_similarity", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.kullbackleibler_divergence", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "kullbackleibler_divergence", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.calc.math_distance.soergel_distance", "modulename": "corems.molecular_id.calc.math_distance", "qualname": "soergel_distance", "kind": "function", "doc": "

    \"New\"

    \n", "signature": "(v, y):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory", "modulename": "corems.molecular_id.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL", "modulename": "corems.molecular_id.factory.EI_SQL", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Base", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Base", "kind": "class", "doc": "

    The base class of the class hierarchy.

    \n\n

    When called, it accepts no arguments and returns a new featureless\ninstance that has no instance attributes and cannot be given any.

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Base.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Base.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Base.registry", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Base.registry", "kind": "variable", "doc": "

    \n", "default_value": "<sqlalchemy.orm.decl_api.registry object>"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Base.metadata", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Base.metadata", "kind": "variable", "doc": "

    \n", "default_value": "MetaData()"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar", "kind": "class", "doc": "

    This class is used to store the metadata of the compounds in the database

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe id of the compound.
    • \n
    • cas (str):\nThe CAS number of the compound.
    • \n
    • inchikey (str):\nThe InChiKey of the compound.
    • \n
    • inchi (str):\nThe InChi of the compound.
    • \n
    • chebi (str):\nThe ChEBI ID of the compound.
    • \n
    • smiles (str):\nThe SMILES of the compound.
    • \n
    • kegg (str):\nThe KEGG ID of the compound.
    • \n
    • iupac_name (str):\nThe IUPAC name of the compound.
    • \n
    • traditional_name (str):\nThe traditional name of the compound.
    • \n
    • common_name (str):\nThe common name of the compound.
    • \n
    • data_id (int):\nThe id of the compound in the molecularData table.
    • \n
    • data (LowResolutionEICompound):\nThe compound object.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.cas", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.cas", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.inchikey", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.inchikey", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.inchi", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.inchi", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.chebi", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.chebi", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.smiles", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.smiles", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.kegg", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.kegg", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.iupac_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.iupac_name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.traditional_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.traditional_name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.common_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.common_name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.data_id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.data_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.Metadatar.data", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "Metadatar.data", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound", "kind": "class", "doc": "

    This class is used to store the molecular and spectral data of the compounds in the low res EI database

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe id of the compound.
    • \n
    • name (str):\nThe name of the compound.
    • \n
    • classify (str):\nThe classification of the compound.
    • \n
    • formula (str):\nThe formula of the compound.
    • \n
    • ri (float):\nThe retention index of the compound.
    • \n
    • retention_time (float):\nThe retention time of the compound.
    • \n
    • source (str):\nThe source of the compound.
    • \n
    • casno (str):\nThe CAS number of the compound.
    • \n
    • comment (str):\nThe comment of the compound.
    • \n
    • source_temp_c (float):\nThe source temperature of the spectra.
    • \n
    • ev (float):\nThe electron volts of the spectra.
    • \n
    • peaks_count (int):\nThe number of peaks in the spectra.
    • \n
    • mz (numpy.ndarray):\nThe m/z values of the spectra.
    • \n
    • abundance (numpy.ndarray):\nThe abundance values of the spectra.
    • \n
    • metadatar (Metadatar):\nThe metadata object.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.__init__", "kind": "function", "doc": "

    \n", "signature": "(**dict_data)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.classify", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.classify", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.formula", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.formula", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.ri", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.ri", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.retention_time", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.retention_time", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.source", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.source", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.casno", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.casno", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.comment", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.comment", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.derivativenum", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.derivativenum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.derivatization", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.derivatization", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.source_temp_c", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.source_temp_c", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.ev", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.ev", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.peaks_count", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.peaks_count", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.mz", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.abundance", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.abundance", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResolutionEICompound.metadatar", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResolutionEICompound.metadatar", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata", "kind": "class", "doc": "

    Dataclass for the Metabolite Metadata

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe id of the compound.
    • \n
    • cas (str):\nThe CAS number of the compound.
    • \n
    • inchikey (str):\nThe InChiKey of the compound.
    • \n
    • inchi (str):\nThe InChi of the compound.
    • \n
    • chebi (str):\nThe ChEBI ID of the compound.
    • \n
    • smiles (str):\nThe SMILES of the compound.
    • \n
    • kegg (str):\nThe KEGG ID of the compound.
    • \n
    • iupac_name (str):\nThe IUPAC name of the compound.
    • \n
    • traditional_name (str):\nThe traditional name of the compound.
    • \n
    • common_name (str):\nThe common name of the compound.
    • \n
    • data_id (int):\nThe id of the compound in the molecularData table.
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tid: int,\tcas: str,\tinchikey: str,\tinchi: str,\tchebi: str,\tsmiles: str,\tkegg: str,\tdata_id: int,\tiupac_name: str,\ttraditional_name: str,\tcommon_name: str)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.id", "kind": "variable", "doc": "

    \n", "annotation": ": int"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.cas", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.cas", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.inchikey", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.inchikey", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.inchi", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.inchi", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.chebi", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.chebi", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.smiles", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.smiles", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.kegg", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.kegg", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.data_id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.data_id", "kind": "variable", "doc": "

    \n", "annotation": ": int"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.iupac_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.iupac_name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.traditional_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.traditional_name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata.common_name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "MetaboliteMetadata.common_name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef", "kind": "class", "doc": "

    Dataclass for the Low Resolution Compound Reference

    \n\n

    This class is used to store the molecular and spectral data of the compounds in the low res EI database

    \n\n
    Parameters
    \n\n
      \n
    • compounds_dict (dict):\nA dictionary representing the compound.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe id of the compound.
    • \n
    • name (str):\nThe name of the compound.
    • \n
    • ri (str):\nThe retention index of the compound.
    • \n
    • retention_time (str):\nThe retention time of the compound.
    • \n
    • casno (str):\nThe CAS number of the compound.
    • \n
    • comment (str):\nThe comment of the compound.
    • \n
    • peaks_count (int):\nThe number of peaks in the spectra.
    • \n
    • classify (str):\nThe classification of the compound.
    • \n
    • derivativenum (str):\nThe derivative number of the compound.
    • \n
    • derivatization (str):\nThe derivatization applied to the compound.
    • \n
    • mz (numpy.ndarray):\nThe m/z values of the spectra.
    • \n
    • abundance (numpy.ndarray):\nThe abundance values of the spectra.
    • \n
    • source_temp_c (float):\nThe source temperature of the spectra.
    • \n
    • ev (float):\nThe electron volts of the spectra.
    • \n
    • formula (str):\nThe formula of the compound.
    • \n
    • source (str):\nThe source of the spectra data.
    • \n
    • classify (str):\nThe classification of the compound.
    • \n
    • metadata (MetaboliteMetadata):\nThe metadata object.
    • \n
    • similarity_score (float):\nThe similarity score of the compound.
    • \n
    • ri_score (float):\nThe RI score of the compound.
    • \n
    • spectral_similarity_score (float):\nThe spectral similarity score of the compound.
    • \n
    • spectral_similarity_scores (dict):\nThe spectral similarity scores of the compound.
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.__init__", "kind": "function", "doc": "

    \n", "signature": "(compounds_dict)"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.id", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.name", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.ri", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.ri", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.retention_time", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.retention_time", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.casno", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.casno", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.comment", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.comment", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.peaks_count", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.peaks_count", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.classify", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.classify", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.derivativenum", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.derivativenum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.derivatization", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.derivatization", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.mz", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.abundance", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.abundance", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.source_temp_c", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.source_temp_c", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.ev", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.ev", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.formula", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.formula", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.source", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.source", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.similarity_score", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.similarity_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.ri_score", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.ri_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.spectral_similarity_score", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.spectral_similarity_score", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.LowResCompoundRef.spectral_similarity_scores", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "LowResCompoundRef.spectral_similarity_scores", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite", "kind": "class", "doc": "

    A class for interacting with a SQLite database for low-resolution EI compounds.

    \n\n
    Parameters
    \n\n
      \n
    • url (str, optional):\nThe URL of the SQLite database. Default is 'sqlite://'.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • engine (sqlalchemy.engine.Engine):\nThe SQLAlchemy engine for connecting to the database.
    • \n
    • session (sqlalchemy.orm.Session):\nThe SQLAlchemy session for executing database operations.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __init__(self, url='sqlite://').\nInitializes the EI_LowRes_SQLite object.
    • \n
    • __exit__(self, exc_type, exc_val, exc_tb).\nCloses the database connection.
    • \n
    • init_engine(self, url).\nInitializes the SQLAlchemy engine.
    • \n
    • __enter__(self).\nReturns the EI_LowRes_SQLite object.
    • \n
    • add_compound_list(self, data_dict_list).\nAdds a list of compounds to the database.
    • \n
    • add_compound(self, data_dict).\nAdds a single compound to the database.
    • \n
    • commit(self).\nCommits the changes to the database.
    • \n
    • row_to_dict(self, row).\nConverts a database row to a dictionary.
    • \n
    • get_all(self).\nRetrieves all compounds from the database.
    • \n
    • query_min_max_rt(self, min_max_rt).\nQueries compounds based on retention time range.
    • \n
    • query_min_max_ri(self, min_max_ri).\nQueries compounds based on RI range.
    • \n
    • query_names_and_rt(self, min_max_rt, compound_names).\nQueries compounds based on compound names and retention time range.
    • \n
    • query_min_max_ri_and_rt(self, min_max_ri, min_max_rt).\nQueries compounds based on RI range and retention time range.
    • \n
    • delete_compound(self, compound).\nDeletes a compound from the database.
    • \n
    • purge(self).\nDeletes all compounds from the database table.
    • \n
    • clear_data(self).\nClears all tables in the database.
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.__init__", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.__init__", "kind": "function", "doc": "

    \n", "signature": "(url='sqlite://')"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.engine", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.engine", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.session", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.session", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.init_engine", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.init_engine", "kind": "function", "doc": "

    Initializes the SQLAlchemy engine.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nThe URL of the SQLite database.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • sqlalchemy.engine.Engine: The SQLAlchemy engine for connecting to the database.
    • \n
    \n", "signature": "(self, url):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.add_compound_list", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.add_compound_list", "kind": "function", "doc": "

    Adds a list of compounds to the database.

    \n\n
    Parameters
    \n\n
      \n
    • data_dict_list (list of dict):\nA list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self, data_dict_list):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.add_compound", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.add_compound", "kind": "function", "doc": "

    Adds a single compound to the database.

    \n\n
    Parameters
    \n\n
      \n
    • data_dict (dict):\nA dictionary representing the compound.
    • \n
    \n", "signature": "(self, data_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.commit", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.commit", "kind": "function", "doc": "

    Commits the changes to the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.row_to_dict", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.row_to_dict", "kind": "function", "doc": "

    Converts a database row to a dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • row (sqlalchemy.engine.row.Row):\nA row from the database.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary representing the compound.
    • \n
    \n", "signature": "(self, row):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.get_all", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.get_all", "kind": "function", "doc": "

    Retrieves all compounds from the database.

    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.query_min_max_rt", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.query_min_max_rt", "kind": "function", "doc": "

    Queries compounds based on retention time range.

    \n\n
    Parameters
    \n\n
      \n
    • min_max_rt (tuple):\nA tuple containing the minimum and maximum retention time values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self, min_max_rt):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.query_min_max_ri", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.query_min_max_ri", "kind": "function", "doc": "

    Queries compounds based on RI range.

    \n\n
    Parameters
    \n\n
      \n
    • min_max_ri (tuple):\nA tuple containing the minimum and maximum RI values.
    • \n
    \n", "signature": "(self, min_max_ri):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.query_names_and_rt", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.query_names_and_rt", "kind": "function", "doc": "

    Queries compounds based on compound names and retention time range.

    \n\n
    Parameters
    \n\n
      \n
    • min_max_rt (tuple):\nA tuple containing the minimum and maximum retention time values.
    • \n
    • compound_names (list):\nA list of compound names.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self, min_max_rt, compound_names):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.query_min_max_ri_and_rt", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.query_min_max_ri_and_rt", "kind": "function", "doc": "

    Queries compounds based on RI range and retention time range.

    \n\n
    Parameters
    \n\n
      \n
    • min_max_ri (tuple):\nA tuple containing the minimum and maximum RI values.
    • \n
    • min_max_rt (tuple):\nA tuple containing the minimum and maximum retention time values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries representing the compounds.
    • \n
    \n", "signature": "(self, min_max_ri, min_max_rt):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.delete_compound", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.delete_compound", "kind": "function", "doc": "

    Deletes a compound from the database.

    \n\n
    Parameters
    \n\n
      \n
    • compound (LowResolutionEICompound):\nA compound object.
    • \n
    \n", "signature": "(self, compound):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.purge", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.purge", "kind": "function", "doc": "

    Deletes all compounds from the database table.

    \n\n
    Notes
    \n\n

    Careful, this will delete the entire database table.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.EI_SQL.EI_LowRes_SQLite.clear_data", "modulename": "corems.molecular_id.factory.EI_SQL", "qualname": "EI_LowRes_SQLite.clear_data", "kind": "function", "doc": "

    Clears all tables in the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.profiled", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "profiled", "kind": "function", "doc": "

    A context manager for profiling.

    \n", "signature": "():", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.insert_database_worker", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "insert_database_worker", "kind": "function", "doc": "

    Inserts data into the database.

    \n", "signature": "(args):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations", "kind": "class", "doc": "

    A class for generating molecular formula combinations.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_search_settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • sql_db (MolForm_SQL):\nThe SQLite database object.
    • \n
    • len_existing_classes (int):\nThe number of existing classes in the SQLite database.
    • \n
    • odd_ch_id (list):\nA list of odd carbon and hydrogen atom IDs.
    • \n
    • odd_ch_dict (list):\nA list of odd carbon and hydrogen atom dictionaries.
    • \n
    • odd_ch_mass (list):\nA list of odd carbon and hydrogen atom masses.
    • \n
    • odd_ch_dbe (list):\nA list of odd carbon and hydrogen atom double bond equivalents.
    • \n
    • even_ch_id (list):\nA list of even carbon and hydrogen atom IDs.
    • \n
    • even_ch_dict (list):\nA list of even carbon and hydrogen atom dictionaries.
    • \n
    • even_ch_mass (list):\nA list of even carbon and hydrogen atom masses.
    • \n
    • even_ch_dbe (list):\nA list of even carbon and hydrogen atom double bond equivalents.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • cProfile_worker(args)\nA cProfile worker for the get_mol_formulas function.
    • \n
    • check_database_get_class_list(molecular_search_settings)\nChecks if the database has all the classes, if not create the missing classes.
    • \n
    • get_carbonsHydrogens(settings, odd_even)\nRetrieves carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.
    • \n
    • add_carbonsHydrogens(settings, existing_classes_objs)\nAdds carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.
    • \n
    • runworker(molecular_search_settings)\nRuns the molecular formula lookup table worker.
    • \n
    • get_classes_in_order(molecular_search_settings)\nGets the classes in order.
    • \n
    • sort_classes(atoms_in_order, combination_dict)\nSorts the classes in order.
    • \n
    • get_fixed_initial_number_of_hydrogen(min_h, odd_even)\nGets the fixed initial number of hydrogen atoms.
    • \n
    • calc_mz(datadict, class_mass=0)\nCalculates the mass-to-charge ratio (m/z) of a molecular formula.
    • \n
    • calc_dbe_class(datadict)\nCalculates the double bond equivalent (DBE) of a molecular formula.
    • \n
    • populate_combinations(classe_tuple, settings)\nPopulates the combinations.
    • \n
    • get_or_add(SomeClass, kw)\nGets or adds a class.
    • \n
    • get_mol_formulas(odd_even_tag, classe_tuple, settings)\nGets the molecular formulas.
    • \n
    • get_h_odd_or_even(class_dict)\nGets the hydrogen odd or even.
    • \n
    • get_total_halogen_atoms(class_dict)\nGets the total number of halogen atoms.
    • \n
    • get_total_hetero_valence(class_dict)\nGets the total valence of heteroatoms other than N, F, Cl, and Br
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.__init__", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.__init__", "kind": "function", "doc": "

    \n", "signature": "(sql_db=None)"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.cProfile_worker", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.cProfile_worker", "kind": "function", "doc": "

    cProfile worker for the get_mol_formulas function

    \n", "signature": "(self, args):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.check_database_get_class_list", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.check_database_get_class_list", "kind": "function", "doc": "

    check if the database has all the classes, if not create the missing classes

    \n\n
    Parameters
    \n\n
      \n
    • molecular_search_settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: list of tuples with the class name and the class dictionary
    • \n
    \n", "signature": "(self, molecular_search_settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_carbonsHydrogens", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_carbonsHydrogens", "kind": "function", "doc": "

    Retrieve carbon and hydrogen atoms from the molecular lookup table based on user-defined settings.

    \n\n
    Parameters
    \n\n
      \n
    • settings (object):\nAn object containing user-defined settings.
    • \n
    • odd_even (str):\nA string indicating whether to retrieve even or odd hydrogen atoms.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of CarbonHydrogen objects that satisfy the specified conditions.
    • \n
    \n", "signature": "(self, settings, odd_even):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.add_carbonsHydrogens", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.add_carbonsHydrogens", "kind": "function", "doc": "

    Add carbon and hydrogen atoms to the molecular lookup table based on user-defined settings.

    \n\n
    Parameters
    \n\n
      \n
    • settings (object):\nAn object containing user-defined settings.
    • \n
    • existing_classes_objs (list):\nA list of HeteroAtoms objects.
    • \n
    \n", "signature": "(self, settings, existing_classes_objs):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.runworker", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.runworker", "kind": "function", "doc": "

    Run the molecular formula lookup table worker.

    \n\n
    Parameters
    \n\n
      \n
    • molecular_search_settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of tuples with the class name and the class dictionary.
    • \n
    \n", "signature": "(*args, **kw):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_classes_in_order", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_classes_in_order", "kind": "function", "doc": "

    Get the classes in order

    \n\n
    Parameters
    \n\n
      \n
    • molecular_search_settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary of classes in order.\nstructure is ('HC', {'HC': 1})
    • \n
    \n", "signature": "(self, molecular_search_settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.sort_classes", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.sort_classes", "kind": "function", "doc": "

    Sort the classes in order

    \n\n
    Parameters
    \n\n
      \n
    • atoms_in_order (list):\nA list of atoms in order.
    • \n
    • combination_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary of classes in order.
    • \n
    \n", "signature": "(atoms_in_order, combination_dict) -> Dict[str, Dict[str, int]]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_fixed_initial_number_of_hydrogen", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_fixed_initial_number_of_hydrogen", "kind": "function", "doc": "

    Get the fixed initial number of hydrogen atoms

    \n\n
    Parameters
    \n\n
      \n
    • min_h (int):\nThe minimum number of hydrogen atoms.
    • \n
    • odd_even (str):\nA string indicating whether to retrieve even or odd hydrogen atoms.
    • \n
    \n", "signature": "(min_h, odd_even):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.calc_mz", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.calc_mz", "kind": "function", "doc": "

    Calculate the mass-to-charge ratio (m/z) of a molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • datadict (dict):\nA dictionary of classes.
    • \n
    • class_mass (int):\nThe mass of the class.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The mass-to-charge ratio (m/z) of a molecular formula.
    • \n
    \n", "signature": "(self, datadict, class_mass=0):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.calc_dbe_class", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.calc_dbe_class", "kind": "function", "doc": "

    Calculate the double bond equivalent (DBE) of a molecular formula.

    \n\n
    Parameters
    \n\n
      \n
    • datadict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The double bond equivalent (DBE) of a molecular formula.
    • \n
    \n", "signature": "(self, datadict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.populate_combinations", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.populate_combinations", "kind": "function", "doc": "

    Populate the combinations

    \n\n
    Parameters
    \n\n
      \n
    • classe_tuple (tuple):\nA tuple containing the class name, the class dictionary, and the class ID.
    • \n
    • settings (object):\nAn object containing user-defined settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of molecular formula data dictionaries.
    • \n
    \n", "signature": "(self, classe_tuple, settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_or_add", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_or_add", "kind": "function", "doc": "

    Get or add a class

    \n\n
    Parameters
    \n\n
      \n
    • SomeClass (object):\nA class object.
    • \n
    • kw (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • object: A class object.
    • \n
    \n", "signature": "(self, SomeClass, kw):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_mol_formulas", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_mol_formulas", "kind": "function", "doc": "

    Get the molecular formulas

    \n\n
    Parameters
    \n\n
      \n
    • odd_even_tag (str):\nA string indicating whether to retrieve even or odd hydrogen atoms.
    • \n
    • classe_tuple (tuple):

    • \n
    • settings (object):\nAn object containing user-defined settings.

    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of molecular formula data dictionaries.
    • \n
    \n", "signature": "(self, odd_even_tag, classe_tuple, settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_h_odd_or_even", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_h_odd_or_even", "kind": "function", "doc": "

    Get the hydrogen odd or even

    \n\n
    Parameters
    \n\n
      \n
    • class_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: A string indicating whether to retrieve even or odd hydrogen atoms.
    • \n
    \n", "signature": "(self, class_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_total_heteroatoms", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_total_heteroatoms", "kind": "function", "doc": "

    Get the total number of heteroatoms other than N, F, Cl, Br

    \n\n
    Parameters
    \n\n
      \n
    • class_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The total number of heteroatoms.
    • \n
    \n", "signature": "(class_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_total_hetero_valence", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_total_hetero_valence", "kind": "function", "doc": "

    Get the total valence of heteroatoms other than N, F, Cl, Br

    \n\n
    Parameters
    \n\n
      \n
    • class_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The total heteroatom valence.
    • \n
    \n", "signature": "(class_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.MolecularLookupTable.MolecularCombinations.get_total_halogen_atoms", "modulename": "corems.molecular_id.factory.MolecularLookupTable", "qualname": "MolecularCombinations.get_total_halogen_atoms", "kind": "function", "doc": "

    Get the total number of halogen atoms

    \n\n
    Parameters
    \n\n
      \n
    • class_dict (dict):\nA dictionary of classes.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • int: The total number of halogen atoms.
    • \n
    \n", "signature": "(class_dict):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification", "modulename": "corems.molecular_id.factory.classification", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.flatten_list", "modulename": "corems.molecular_id.factory.classification", "qualname": "flatten_list", "kind": "function", "doc": "

    \n", "signature": "(l):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification", "kind": "class", "doc": "

    Class for grouping mass spectrum data by heteroatom classes (Nn, Oo, Ss, NnOo, NnSs, etc..)

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum (MassSpectrum):\nThe mass spectrum object.
    • \n
    • choose_molecular_formula (bool, optional):\nIf True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered. Default is True.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If no molecular formula is associated with any mspeak objects.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _ms_grouped_class (dict):\nA dictionary of classes and a list of ms_peak objects.
    • \n
    • choose_mf (bool):\nIf True, the molecular formula with the highest abundance is chosen. If False, all molecular formulas are considered.
    • \n
    • total_peaks (int):\nThe total number of peaks.
    • \n
    • sum_abundance (float):\nThe sum of the abundance of all peaks.
    • \n
    • min_max_mz (tuple):\nThe minimum and maximum mz values.
    • \n
    • min_max_abundance (tuple):\nThe minimum and maximum abundance values.
    • \n
    • min_ppm_error (float):\nThe minimum ppm error.
    • \n
    • max_ppm_error (float):\nThe maximum ppm error.
    • \n
    • all_identified_atoms (list):\nA list of all identified atoms.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __len__().\nReturns the number of classes.
    • \n
    • __getitem__(classe)\nReturns the molecular formula list for specified class.
    • \n
    • __iter__().\nReturns an iterator over the keys of the dictionary.
    • \n
    • get_classes(threshold_perc=1, isotopologue=True).\nReturns a list of classes with abundance percentile above threshold.
    • \n
    • molecular_formula_string(classe).\nReturns a list of molecular formula string for specified class.
    • \n
    • molecular_formula(classe).\nReturns a list of molecular formula for specified class.
    • \n
    • carbon_number(classe).\nReturns a list of carbon number for specified class.
    • \n
    • atom_count(atom, classe).\nReturns a list of atom count for specified class.
    • \n
    • dbe(classe).\nReturns a list of DBE for specified class.
    • \n
    • atoms_ratio(classe, numerator, denominator).\nReturns a list of atoms ratio for specified class.
    • \n
    • mz_exp(classe).\nReturns a list of experimental mz for specified class.
    • \n
    • abundance(classe).\nReturns a list of abundance for specified class.
    • \n
    • mz_error(classe).\nReturns a list of mz error for specified class.
    • \n
    • mz_calc(classe).\nReturns a list of calculated mz for specified class.
    • \n
    • peaks_count_percentile(classe).\nReturns the peaks count percentile of a specific class.
    • \n
    • abundance_count_percentile(classe).\nReturns the abundance percentile of a specific class.
    • \n
    • mz_exp_assigned().\nReturns a list of experimental mz for all assigned classes.
    • \n
    • abundance_assigned().\nReturns a list of abundance for all classes.
    • \n
    • mz_exp_all().\nReturns a list of mz for all classes.
    • \n
    \n", "bases": "collections.abc.Mapping"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.__init__", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.__init__", "kind": "function", "doc": "

    \n", "signature": "(mass_spectrum, choose_molecular_formula=True)"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.choose_mf", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.choose_mf", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.total_peaks", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.total_peaks", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.sum_abundance", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.sum_abundance", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.min_max_mz", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.min_max_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.min_max_abundance", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.min_max_abundance", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.min_ppm_error", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.min_ppm_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.max_ppm_error", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.max_ppm_error", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.all_identified_atoms", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.all_identified_atoms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.get_classes", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.get_classes", "kind": "function", "doc": "

    Return a list of classes with abundance percentile above threshold

    \n", "signature": "(self, threshold_perc=1, isotopologue=True):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.molecular_formula_string", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.molecular_formula_string", "kind": "function", "doc": "

    Return a list of molecular formula string for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.molecular_formula", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.molecular_formula", "kind": "function", "doc": "

    Return a list of molecular formula for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.carbon_number", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.carbon_number", "kind": "function", "doc": "

    Return a list of carbon number for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.atom_count", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.atom_count", "kind": "function", "doc": "

    Return a list of atom count for specified class

    \n", "signature": "(self, atom, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.dbe", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.dbe", "kind": "function", "doc": "

    Return a list of DBE for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.atoms_ratio", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.atoms_ratio", "kind": "function", "doc": "

    Return a list of atoms ratio for specified class

    \n", "signature": "(self, classe, numerator, denominator):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_exp", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_exp", "kind": "function", "doc": "

    Return a list of experimental mz for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.abundance", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.abundance", "kind": "function", "doc": "

    Return a list of abundance for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_error", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_error", "kind": "function", "doc": "

    Return a list of mz error for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_calc", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_calc", "kind": "function", "doc": "

    Return a list of calculated mz for specified class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.peaks_count_percentile", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.peaks_count_percentile", "kind": "function", "doc": "

    Return the peaks count percentile of a specific class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.abundance_count_percentile", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.abundance_count_percentile", "kind": "function", "doc": "

    Return the abundance percentile of a specific class

    \n", "signature": "(self, classe):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_exp_assigned", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_exp_assigned", "kind": "function", "doc": "

    Return a list of experimental mz for all assigned classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.abundance_assigned", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.abundance_assigned", "kind": "function", "doc": "

    Return a list of abundance for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_exp_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_exp_all", "kind": "function", "doc": "

    Return a list of mz for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.mz_error_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.mz_error_all", "kind": "function", "doc": "

    Return a list of mz error for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.carbon_number_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.carbon_number_all", "kind": "function", "doc": "

    Return a list of carbon number for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.dbe_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.dbe_all", "kind": "function", "doc": "

    Return a list of DBE for all classes

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.atoms_ratio_all", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.atoms_ratio_all", "kind": "function", "doc": "

    Return a list of atoms ratio for all classes

    \n", "signature": "(self, numerator, denominator):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.to_dataframe", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.to_dataframe", "kind": "function", "doc": "

    Return a pandas dataframe with all the data from the class

    \n\n
    Parameters
    \n\n
      \n
    • include_isotopologue (bool, optional):\nInclude isotopologues, by default False
    • \n
    • abundance_perc_threshold (int, optional):\nAbundance percentile threshold, by default 5
    • \n
    • include_unassigned (bool, optional):\nInclude unassigned peaks, by default False
    • \n
    \n\n
    Returns
    \n\n
      \n
    • DataFrame: Pandas dataframe with all the data from the class
    • \n
    \n", "signature": "(\tself,\tinclude_isotopologue=False,\tabundance_perc_threshold=5,\tinclude_unassigned=False):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_ms_assigned_unassigned", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_ms_assigned_unassigned", "kind": "function", "doc": "

    Plot stick mass spectrum of all classes

    \n\n
    Parameters
    \n\n
      \n
    • assigned_color (str, optional):\nMatplotlib color for the assigned peaks, by default \"b\"
    • \n
    • unassigned_color (str, optional):\nMatplotlib color for the unassigned peaks, by default \"r\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    \n", "signature": "(self, assigned_color='b', unassigned_color='r'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_mz_error", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_mz_error", "kind": "function", "doc": "

    Plot mz error scatter plot of all classes

    \n\n
    Parameters
    \n\n
      \n
    • color (str, optional):\nMatplotlib color, by default \"g\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    \n", "signature": "(self, color='g'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_mz_error_class", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_mz_error_class", "kind": "function", "doc": "

    Plot mz error scatter plot of a specific class

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nClass name
    • \n
    • color (str, optional):\nMatplotlib color, by default \"g\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    \n", "signature": "(self, classe, color='g'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_ms_class", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_ms_class", "kind": "function", "doc": "

    Plot stick mass spectrum of a specific class

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nClass name
    • \n
    • color (str, optional):\nMatplotlib color, by default \"g\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    \n", "signature": "(self, classe, color='g'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_van_krevelen", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_van_krevelen", "kind": "function", "doc": "

    Plot Van Krevelen Diagram

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nClass name
    • \n
    • max_hc (float, optional):\nMax H/C ratio, by default 2.5
    • \n
    • max_oc (float, optional):\nMax O/C ratio, by default 2
    • \n
    • ticks_number (int, optional):\nNumber of ticks, by default 5
    • \n
    • color (str, optional):\nMatplotlib color, by default \"viridis\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    • abun_perc (float):\nClass percentile of the relative abundance
    • \n
    \n", "signature": "(self, classe, max_hc=2.5, max_oc=2, ticks_number=5, color='viridis'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.classification.HeteroatomsClassification.plot_dbe_vs_carbon_number", "modulename": "corems.molecular_id.factory.classification", "qualname": "HeteroatomsClassification.plot_dbe_vs_carbon_number", "kind": "function", "doc": "

    Plot DBE vs Carbon Number

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nClass name
    • \n
    • max_c (int, optional):\nMax Carbon Number, by default 50
    • \n
    • max_dbe (int, optional):\nMax DBE, by default 40
    • \n
    • dbe_incr (int, optional):\nDBE increment, by default 5
    • \n
    • c_incr (int, optional):\nCarbon Number increment, by default 10
    • \n
    • color (str, optional):\nMatplotlib color, by default \"viridis\"
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ax (matplotlib.axes):\nMatplotlib axes object
    • \n
    • abun_perc (float):\nClass percentile of the relative abundance
    • \n
    \n", "signature": "(\tself,\tclasse,\tmax_c=50,\tmax_dbe=40,\tdbe_incr=5,\tc_incr=10,\tcolor='viridis'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata", "kind": "class", "doc": "

    Dataclass for the Lipid Metadata

    \n\n
    Parameters
    \n\n
      \n
    • name (str):\nThe name of the lipid, using the LIPID MAPS nomenclature
    • \n
    • casno (str):\nThe CAS number of the lipid
    • \n
    • formula (str):\nThe molecular formula of the lipid
    • \n
    • pubchem_id (str):\nThe PubChem ID of the lipid
    • \n
    • structure_level (str):\nThe structure level of the lipid, following the LIPID MAPS classification
    • \n
    • lipid_summed_name (str):\nThe summed name of the lipid, aka lipid species,\nfollowing the LIPID MAPS classification
    • \n
    • lipid_subclass (str):\nThe subclass of the lipid, following the LIPID MAPS classification
    • \n
    • lipid_class (str):\nThe class of the lipid, following the LIPID MAPS classification
    • \n
    • lipid_category (str):\nThe category of the lipid, following the LIPID MAPS classification
    • \n
    \n", "bases": "corems.molecular_id.factory.EI_SQL.MetaboliteMetadata"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.__init__", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tid: int,\tcas: str,\tinchikey: str,\tinchi: str,\tchebi: str,\tsmiles: str,\tkegg: str,\tdata_id: int,\tiupac_name: str,\ttraditional_name: str,\tcommon_name: str,\tname: str,\tcasno: str,\tformula: str,\tpubchem_id: str,\tstructure_level: str,\tlipid_summed_name: str,\tlipid_subclass: str,\tlipid_class: str,\tlipid_category: str)"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.name", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.casno", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.casno", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.formula", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.formula", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.pubchem_id", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.pubchem_id", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.structure_level", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.structure_level", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.lipid_summed_name", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.lipid_summed_name", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.lipid_subclass", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.lipid_subclass", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.lipid_class", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.lipid_class", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.lipid_molecular_metadata.LipidMetadata.lipid_category", "modulename": "corems.molecular_id.factory.lipid_molecular_metadata", "qualname": "LipidMetadata.lipid_category", "kind": "variable", "doc": "

    \n", "annotation": ": str"}, {"fullname": "corems.molecular_id.factory.molecularSQL", "modulename": "corems.molecular_id.factory.molecularSQL", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.Base", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "Base", "kind": "class", "doc": "

    The base class of the class hierarchy.

    \n\n

    When called, it accepts no arguments and returns a new featureless\ninstance that has no instance attributes and cannot be given any.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.Base.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "Base.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.Base.registry", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "Base.registry", "kind": "variable", "doc": "

    \n", "default_value": "<sqlalchemy.orm.decl_api.registry object>"}, {"fullname": "corems.molecular_id.factory.molecularSQL.Base.metadata", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "Base.metadata", "kind": "variable", "doc": "

    \n", "default_value": "MetaData()"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms", "kind": "class", "doc": "

    HeteroAtoms class for the heteroAtoms table in the SQLite database.

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe primary key for the table.
    • \n
    • name (str):\nThe name of the heteroAtoms class.
    • \n
    • halogensCount (int):\nThe number of halogens in the heteroAtoms class.
    • \n
    • carbonHydrogen (relationship):\nThe relationship to the carbonHydrogen table.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __repr__()\nReturns the string representation of the object.
    • \n
    • to_dict()\nReturns the heteroAtoms class as a dictionary.
    • \n
    • halogens_count()\nReturns the number of halogens as a float.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.id", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.name", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.name", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.halogensCount", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.halogensCount", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.carbonHydrogen", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.carbonHydrogen", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.halogens_count", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.halogens_count", "kind": "variable", "doc": "

    Returns the number of halogens as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.HeteroAtoms.to_dict", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "HeteroAtoms.to_dict", "kind": "function", "doc": "

    Returns the heteroAtoms class as a dictionary.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen", "kind": "class", "doc": "

    CarbonHydrogen class for the carbonHydrogen table in the SQLite database.

    \n\n
    Attributes
    \n\n
      \n
    • id (int):\nThe primary key for the table.
    • \n
    • C (int):\nThe number of carbon atoms.
    • \n
    • H (int):\nThe number of hydrogen atoms.
    • \n
    • heteroAtoms (relationship):\nThe relationship to the heteroAtoms table.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __repr__()\nReturns the string representation of the object.
    • \n
    • mass()\nReturns the mass of the carbonHydrogen class as a float.
    • \n
    • c()\nReturns the number of carbon atoms as a float.
    • \n
    • h()\nReturns the number of hydrogen atoms as a float.
    • \n
    • dbe()\nReturns the double bond equivalent as a float.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.id", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.C", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.C", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.H", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.H", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.heteroAtoms", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.heteroAtoms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.mass", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.mass", "kind": "variable", "doc": "

    Returns the mass of the carbonHydrogen class as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.c", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.c", "kind": "variable", "doc": "

    Returns the number of carbon atoms as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.h", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.h", "kind": "variable", "doc": "

    Returns the number of hydrogen atoms as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.CarbonHydrogen.dbe", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "CarbonHydrogen.dbe", "kind": "variable", "doc": "

    Returns the double bond equivalent as a float.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink", "kind": "class", "doc": "

    MolecularFormulaLink class for the molecularformula table in the SQLite database.

    \n\n
    Attributes
    \n\n
      \n
    • heteroAtoms_id (int):\nThe foreign key for the heteroAtoms table.
    • \n
    • carbonHydrogen_id (int):\nThe foreign key for the carbonHydrogen table.
    • \n
    • mass (float):\nThe mass of the molecular formula.
    • \n
    • DBE (float):\nThe double bond equivalent of the molecular formula.
    • \n
    • carbonHydrogen (relationship):\nThe relationship to the carbonHydrogen table.
    • \n
    • heteroAtoms (relationship):\nThe relationship to the heteroAtoms table.
    • \n
    • C (association_proxy):\nThe association proxy for the carbonHydrogen table.
    • \n
    • H (association_proxy):\nThe association proxy for the carbonHydrogen table.
    • \n
    • classe (association_proxy):\nThe association proxy for the heteroAtoms table.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __repr__()\nReturns the string representation of the object.
    • \n
    • to_dict()\nReturns the molecular formula as a dictionary.
    • \n
    • formula_string()\nReturns the molecular formula as a string.
    • \n
    • classe_string()\nReturns the heteroAtoms class as a string.
    • \n
    • _adduct_mz(ion_charge, adduct_atom)\nReturns the m/z of the adduct ion as a float.
    • \n
    • _protonated_mz(ion_charge)\nReturns the m/z of the protonated ion as a float.
    • \n
    • _radical_mz(ion_charge)\nReturns the m/z of the radical ion as a float.
    • \n
    \n", "bases": "sqlalchemy.orm.decl_api.Base"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.__init__", "kind": "function", "doc": "

    A simple constructor that allows initialization from kwargs.

    \n\n

    Sets attributes on the constructed instance using the names and\nvalues in kwargs.

    \n\n

    Only keys that are present as\nattributes of the instance's class are allowed. These could be,\nfor example, any mapped columns or relationships.

    \n", "signature": "(**kwargs)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.heteroAtoms_id", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.heteroAtoms_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.carbonHydrogen_id", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.carbonHydrogen_id", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.mass", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.mass", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.DBE", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.DBE", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.carbonHydrogen", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.carbonHydrogen", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.heteroAtoms", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.heteroAtoms", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.C", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.C", "kind": "variable", "doc": "

    A descriptor that presents a read/write view of an object attribute.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.H", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.H", "kind": "variable", "doc": "

    A descriptor that presents a read/write view of an object attribute.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.classe", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.classe", "kind": "variable", "doc": "

    A descriptor that presents a read/write view of an object attribute.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.to_dict", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.to_dict", "kind": "function", "doc": "

    Returns the molecular formula as a dictionary.

    \n\n
    Returns
    \n\n
      \n
    • dict: The molecular formula as a dictionary.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.formula_string", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.formula_string", "kind": "variable", "doc": "

    Returns the molecular formula as a string.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolecularFormulaLink.classe_string", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolecularFormulaLink.classe_string", "kind": "variable", "doc": "

    Returns the heteroAtoms class as a string.

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL", "kind": "class", "doc": "

    MolForm_SQL class for the SQLite database.

    \n\n
    Attributes
    \n\n
      \n
    • engine (sqlalchemy.engine.base.Engine):\nThe SQLAlchemy engine.
    • \n
    • session (sqlalchemy.orm.session.Session):\nThe SQLAlchemy session.
    • \n
    • type (str):\nThe type of database.
    • \n
    • chunks_count (int):\nThe number of chunks to use when querying the database.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • __init__(url=None, echo=False)\nInitializes the database.
    • \n
    • __exit__(exc_type, exc_val, exc_tb)\nCloses the database.
    • \n
    • initiate_database(url, database_name)\nCreates the database.
    • \n
    • commit()\nCommits the session.
    • \n
    • init_engine(url)\nInitializes the SQLAlchemy engine.
    • \n
    • __enter__()

    • \n
    • get_dict_by_classes(classes, ion_type, nominal_mzs, ion_charge, molecular_search_settings, adducts=None)\nReturns a dictionary of molecular formulas.

    • \n
    • check_entry(classe, ion_type, molecular_search_settings)\nChecks if a molecular formula is in the database.
    • \n
    • get_all_classes()\nReturns a list of all classes in the database.
    • \n
    • get_all()\nReturns a list of all molecular formulas in the database.
    • \n
    • delete_entry(row)\nDeletes a molecular formula from the database.
    • \n
    • purge(cls)\nDeletes all molecular formulas from the database.
    • \n
    • clear_data()\nClears the database.
    • \n
    • close(commit=True)\nCloses the database.
    • \n
    • add_engine_pidguard(engine)\nAdds multiprocessing guards.
    • \n
    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.__init__", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.__init__", "kind": "function", "doc": "

    \n", "signature": "(url=None, echo=False)"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.engine", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.engine", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.session", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.session", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.initiate_database", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.initiate_database", "kind": "function", "doc": "

    Creates the database.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nThe URL for the database.
    • \n
    • database_name (str):\nThe name of the database.
    • \n
    \n", "signature": "(self, url, database_name):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.commit", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.commit", "kind": "function", "doc": "

    Commits the session.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.init_engine", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.init_engine", "kind": "function", "doc": "

    Initializes the SQLAlchemy engine.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nThe URL for the database.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • sqlalchemy.engine.base.Engine: The SQLAlchemy engine.
    • \n
    \n", "signature": "(self, url):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.get_dict_by_classes", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.get_dict_by_classes", "kind": "function", "doc": "

    Returns a dictionary of molecular formulas.

    \n\n
    Parameters
    \n\n
      \n
    • classes (list):\nThe list of classes.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • nominal_mzs (list):\nThe list of nominal m/z values.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • molecular_search_settings (MolecularFormulaSearchSettings):\nThe molecular formula search settings.
    • \n
    • adducts (list, optional):\nThe list of adducts. Default is None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The dictionary of molecular formulas.
    • \n
    \n\n
    Notes
    \n\n

    Known issue, when using SQLite:\nif the number of classes and nominal_m/zs are higher than 999 the query will fail\nSolution: use postgres or split query

    \n", "signature": "(\tself,\tclasses,\tion_type,\tnominal_mzs,\tion_charge,\tmolecular_search_settings,\tadducts=None):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.check_entry", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.check_entry", "kind": "function", "doc": "

    Checks if a molecular formula is in the database.

    \n\n
    Parameters
    \n\n
      \n
    • classe (str):\nThe class of the molecular formula.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • molecular_search_settings (MolecularFormulaSearchSettings):\nThe molecular formula search settings.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • sqlalchemy.orm.query.Query: The query.
    • \n
    \n", "signature": "(self, classe, ion_type, molecular_search_settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.get_all_classes", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.get_all_classes", "kind": "function", "doc": "

    Returns a list of all classes in the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.get_all", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.get_all", "kind": "function", "doc": "

    Returns a list of all molecular formulas in the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.delete_entry", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.delete_entry", "kind": "function", "doc": "

    Deletes a molecular formula from the database.

    \n", "signature": "(self, row):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.purge", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.purge", "kind": "function", "doc": "

    Deletes all molecular formulas from the database.

    \n\n
    Notes
    \n\n

    Careful, this will delete the entire database table

    \n", "signature": "(self, cls):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.clear_data", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.clear_data", "kind": "function", "doc": "

    Clears the database.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.close", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.close", "kind": "function", "doc": "

    Closes the database.

    \n\n
    Parameters
    \n\n
      \n
    • commit (bool, optional):\nWhether to commit the session. Default is True.
    • \n
    \n", "signature": "(self, commit=True):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.molecularSQL.MolForm_SQL.add_engine_pidguard", "modulename": "corems.molecular_id.factory.molecularSQL", "qualname": "MolForm_SQL.add_engine_pidguard", "kind": "function", "doc": "

    Adds multiprocessing guards.

    \n\n

    Forces a connection to be reconnected if it is detected\nas having been shared to a sub-process.

    \n\n
    Parameters
    \n\n
      \n
    • engine (sqlalchemy.engine.base.Engine):\nThe SQLAlchemy engine.
    • \n
    \n", "signature": "(self, engine):", "funcdef": "def"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results", "modulename": "corems.molecular_id.factory.spectrum_search_results", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults", "kind": "class", "doc": "

    Class for storing Search Results for a single Spectrum Query

    \n\n
    Parameters
    \n\n
      \n
    • query_spectrum (MassSpectrum):\nThe queried mass spectrum
    • \n
    • precursor_mz (float, optional):\nThe queried precursor_mz. None is interpreted as an open query, i.e. no precursor_mz
    • \n
    • spectral_similarity_search_results (dict):\nThe search results for the queried spectrum, which will be unpacked into attributes
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • query_spectrum (MassSpectrum):\nThe queried mass spectrum
    • \n
    • query_spectrum_id (int):\nThe id of the queried spectrum (the scan number within an MassSpectra object)
    • \n
    • precursor_mz (float):\nThe precursor m/z of the queried spectrum
    • \n
    \n\n
    Other Possible Attributes
    \n\n

    ref_mol_id : str\n The id of the molecule associated with the query spectrum in reference database\nref_ms_id : str\n The id of the query spectrum in reference database\nref_precursor_mz : float\n The precursor mass of the query spectrum\nprecursor_mz_error_ppm : float\n The ppm error between the query spectrum and the reference spectrum\nentropy_similarity : float\n The entropy similarity between the query spectrum and the reference spectrum\nref_ion_type : str\n The ion type of the reference spectrum, i.e. [M+H]+, [M+Na]+, etc.\nquery_mz_in_ref_n : list\n The number of query m/z peaks that are in the reference spectrum\nquery_mz_in_ref_fract : float\n The fraction of query m/z peaks that are in the reference spectrum\nquery_frag_types : list\n The fragment types of the query spectrum that are in the reference spectrum,\n i.e. LSF (lipid species fragments) or MSF (molecular species fragments),\n generally used for only for lipidomics\nref_mz_in_query_n : list\n The number of reference m/z peaks that are in the query spectrum\nref_mz_in_query_fract : float\n The fraction of reference m/z peaks that are in the query spectrum\nref_frag_types : list\n The fragment types of the reference spectrum,\n i.e. LSF (lipid species fragments) or MSF (molecular species fragments),\n generally used for only for lipidomics

    \n\n
    Methods
    \n\n

    *to_dataframe().\n Convert the SpectrumSearchResults to a pandas DataFrame

    \n"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults.__init__", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults.__init__", "kind": "function", "doc": "

    \n", "signature": "(query_spectrum, precursor_mz, spectral_similarity_search_results)"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults.query_spectrum", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults.query_spectrum", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults.precursor_mz", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults.precursor_mz", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.factory.spectrum_search_results.SpectrumSearchResults.to_dataframe", "modulename": "corems.molecular_id.factory.spectrum_search_results", "qualname": "SpectrumSearchResults.to_dataframe", "kind": "function", "doc": "

    Convert the SpectrumSearchResults to a pandas DataFrame

    \n\n
    Parameters
    \n\n
      \n
    • cols_to_drop (list, optional):\nA list of columns to drop from the DataFrame. Default is None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • pandas.DataFrame: A DataFrame with the SpectrumSearchResults attributes as columns
    • \n
    \n", "signature": "(self, cols_to_drop=None):", "funcdef": "def"}, {"fullname": "corems.molecular_id.input", "modulename": "corems.molecular_id.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.input.nistMSI", "modulename": "corems.molecular_id.input.nistMSI", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI", "kind": "class", "doc": "

    A class for reading NIST MSI files and storing the data in a SQLite database.

    \n\n
    Parameters
    \n\n
      \n
    • file_path (str):\nThe path to the NIST MSI file.
    • \n
    • url (str, optional):\nThe URL for the SQLite database. Default is 'sqlite://'.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • FileExistsError: If the specified file does not exist.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • file_path (str):\nThe path to the NIST MSI file.
    • \n
    • url (str):\nThe URL for the SQLite database.
    • \n
    • sqlLite_obj (EI_LowRes_SQLite):\nThe SQLite object for storing the compound data.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run().\nRuns the thread and initializes the SQLite object.
    • \n
    • get_sqlLite_obj().\nReturns the SQLite object.
    • \n
    • get_compound_data_dict_list().\nParses the NIST MSI file and returns a list of compound data dictionaries.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.__init__", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(file_path, url='sqlite://')"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.file_path", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.file_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.url", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.url", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.run", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.run", "kind": "function", "doc": "

    Runs the thread and initializes the SQLite object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.get_sqlLite_obj", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.get_sqlLite_obj", "kind": "function", "doc": "

    Returns the SQLite object.

    \n\n
    Returns
    \n\n
      \n
    • EI_LowRes_SQLite: The SQLite object for storing the compound data.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.input.nistMSI.ReadNistMSI.get_compound_data_dict_list", "modulename": "corems.molecular_id.input.nistMSI", "qualname": "ReadNistMSI.get_compound_data_dict_list", "kind": "function", "doc": "

    Parses the NIST MSI file and returns a list of compound data dictionaries.

    \n\n
    Returns
    \n\n
      \n
    • list: A list of compound data dictionaries.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search", "modulename": "corems.molecular_id.search", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.compoundSearch", "modulename": "corems.molecular_id.search.compoundSearch", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch", "kind": "class", "doc": "

    A class representing a low-resolution mass spectral match.

    \n\n
    Parameters
    \n\n
      \n
    • gcms_obj (object):\nThe GC-MS object.
    • \n
    • sql_obj (object, optional):\nThe SQL object for database operations. Default is None.
    • \n
    • calibration (bool, optional):\nFlag indicating if the match is for calibration. Default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • gcms_obj (object):\nThe GC-MS object.
    • \n
    • sql_obj (object):\nThe SQL object for database operations.
    • \n
    • calibration (bool):\nFlag indicating if the match is for calibration.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • metabolite_detector_score(gc_peak, ref_obj, spectral_simi).\nCalculates the spectral similarity scores and the similarity score for a given GC peak and reference object.
    • \n
    • run().\nRuns the low-resolution mass spectral match.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.__init__", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(gcms_obj, sql_obj=None, calibration=False)"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.gcms_obj", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.gcms_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.calibration", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.calibration", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.metabolite_detector_score", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.metabolite_detector_score", "kind": "function", "doc": "

    Calculates the spectral similarity scores and the similarity score for a given GC peak and reference object.

    \n\n
    Parameters
    \n\n
      \n
    • gc_peak (object):\nThe GC peak object.
    • \n
    • ref_obj (object):\nThe reference object.
    • \n
    • spectral_simi (object):\nThe spectral similarity object.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: A tuple containing the spectral similarity scores, RI score, and similarity score.
    • \n
    \n", "signature": "(self, gc_peak, ref_obj, spectral_simi):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.compoundSearch.LowResMassSpectralMatch.run", "modulename": "corems.molecular_id.search.compoundSearch", "qualname": "LowResMassSpectralMatch.run", "kind": "function", "doc": "

    Runs the low-resolution mass spectral match.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces", "modulename": "corems.molecular_id.search.database_interfaces", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface", "kind": "class", "doc": "

    Base class that facilitates connection to spectral reference databases,\nsuch as EMSL's Metabolomics Reference Database (MetabRef).

    \n", "bases": "abc.ABC"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.__init__", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.__init__", "kind": "function", "doc": "

    Initialize instance.

    \n\n
    Parameters
    \n\n
      \n
    • key (str):\nToken key.
    • \n
    \n", "signature": "(key=None)"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.key", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.key", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.set_token", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.set_token", "kind": "function", "doc": "

    Set environment variable for MetabRef database token.

    \n\n
    Parameters
    \n\n
      \n
    • path (str):\nPath to token.
    • \n
    \n", "signature": "(self, path):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.get_token", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.get_token", "kind": "function", "doc": "

    Get environment variable for database token.

    \n\n
    Returns
    \n\n
      \n
    • str: Token string.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.get_header", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.get_header", "kind": "function", "doc": "

    Access stored database token and prepare as header.

    \n\n
    Returns
    \n\n
      \n
    • str: Header string.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.get_query", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.get_query", "kind": "function", "doc": "

    Request payload from URL according to get protocol.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nURL for request.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: Response as JSON.
    • \n
    \n", "signature": "(self, url):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.SpectralDatabaseInterface.post_query", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "SpectralDatabaseInterface.post_query", "kind": "function", "doc": "

    Request payload from URL according to post protocol.

    \n\n
    Parameters
    \n\n
      \n
    • url (str):\nURL for request.
    • \n
    • variable (str):\nVariable to query.
    • \n
    • values (str):\nSpecific values of variable to query.
    • \n
    • tolerance (str):\nQuery tolerance relative to values.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: Response as JSON.
    • \n
    \n", "signature": "(self, url, variable, values, tolerance):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefInterface", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefInterface", "kind": "class", "doc": "

    Interface to the Metabolomics Reference Database.

    \n", "bases": "SpectralDatabaseInterface"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefInterface.__init__", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefInterface.__init__", "kind": "function", "doc": "

    Initialize instance.

    \n", "signature": "()"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefInterface.spectrum_to_array", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefInterface.spectrum_to_array", "kind": "function", "doc": "

    Convert MetabRef-formatted spectrum to array.

    \n\n
    Parameters
    \n\n
      \n
    • spectrum (str):\nMetabRef spectrum, i.e. list of (m/z,abundance) pairs.
    • \n
    • normalize (bool):\nNormalize the spectrum by its magnitude.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • ~numpy.array: Array of shape (N, 2), with m/z in the first column and abundance in\nthe second.
    • \n
    \n", "signature": "(self, spectrum, normalize=True):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface", "kind": "class", "doc": "

    Interface to the Metabolomics Reference Database.

    \n", "bases": "MetabRefInterface"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.__init__", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.__init__", "kind": "function", "doc": "

    Initialize instance.

    \n", "signature": "()"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.GCMS_LIBRARY_URL", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.GCMS_LIBRARY_URL", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.FAMES_URL", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.FAMES_URL", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.available_formats", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.available_formats", "kind": "function", "doc": "

    View list of available formats.

    \n\n
    Returns
    \n\n
      \n
    • list: Format map keys.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.get_library", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.get_library", "kind": "function", "doc": "

    Request MetabRef GC/MS library.

    \n\n
    Parameters
    \n\n
      \n
    • format (str):\nFormat of requested library, i.e. \"json\", \"sql\", \"flashentropy\".\nSee available_formats method for aliases.
    • \n
    • normalize (bool):\nNormalize the spectrum by its magnitude.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Library in requested format.
    • \n
    \n", "signature": "(self, format='json', normalize=False):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefGCInterface.get_fames", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefGCInterface.get_fames", "kind": "function", "doc": "

    Request MetabRef GC/MS FAMEs library.

    \n\n
    Parameters
    \n\n
      \n
    • format (str):\nFormat of requested library, i.e. \"json\", \"sql\", \"flashentropy\".\nSee available_formats method for aliases.
    • \n
    • normalize (bool):\nNormalize the spectrum by its magnitude.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Library in requested format.
    • \n
    \n", "signature": "(self, format='json', normalize=False):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface", "kind": "class", "doc": "

    Interface to the Metabolomics Reference Database for LC-MS data.

    \n", "bases": "MetabRefInterface"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.__init__", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.__init__", "kind": "function", "doc": "

    Initialize instance.

    \n", "signature": "()"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.PRECURSOR_MZ_URL", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.PRECURSOR_MZ_URL", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.PRECURSOR_MZ_ALL_URL", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.PRECURSOR_MZ_ALL_URL", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.query_by_precursor", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.query_by_precursor", "kind": "function", "doc": "

    Query MetabRef by precursor m/z values.

    \n\n
    Parameters
    \n\n
      \n
    • mz_list (list):\nList of precursor m/z values.
    • \n
    • polarity (str):\nIonization polarity, either \"positive\" or \"negative\".
    • \n
    • mz_tol_ppm (float):\nTolerance in ppm for each precursor m/z value.\nUsed for retrieving from a potential match from database.
    • \n
    • mz_tol_da_api (float, optional):\nMaximum tolerance between precursor m/z values for API search, in daltons.\nUsed to group similar mzs into a single API query for speed. Default is 0.2.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: List of library entries in original JSON format.
    • \n
    \n", "signature": "(self, mz_list, polarity, mz_tol_ppm, mz_tol_da_api=0.2):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.request_all_precursors", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.request_all_precursors", "kind": "function", "doc": "

    Request all precursor m/z values from MetabRef.

    \n\n
    Parameters
    \n\n
      \n
    • polarity (str):\nIonization polarity, either \"positive\" or \"negative\".
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: List of all precursor m/z values.
    • \n
    \n", "signature": "(self, polarity):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.database_interfaces.MetabRefLCInterface.get_lipid_library", "modulename": "corems.molecular_id.search.database_interfaces", "qualname": "MetabRefLCInterface.get_lipid_library", "kind": "function", "doc": "

    Request MetabRef lipid library.

    \n\n
    Parameters
    \n\n
      \n
    • mz_list (list):\nList of precursor m/z values.
    • \n
    • polarity (str):\nIonization polarity, either \"positive\" or \"negative\".
    • \n
    • mz_tol_ppm (float):\nTolerance in ppm for each precursor m/z value.\nUsed for retrieving from a potential match from database.
    • \n
    • mz_tol_da_api (float, optional):\nMaximum tolerance between precursor m/z values for API search, in daltons.\nUsed to group similar mzs into a single API query for speed. Default is 0.2.
    • \n
    • format (str, optional):\nFormat of requested library, i.e. \"json\", \"sql\", \"flashentropy\".\nSee available_formats method for aliases. Default is \"json\".
    • \n
    • normalize (bool, optional):\nNormalize the spectrum by its magnitude. Default is True.
    • \n
    • fe_kwargs (dict, optional):\nKeyword arguments for FlashEntropy search. Default is {}.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: Library in requested format and lipid metadata as a LipidMetadata dataclass.
    • \n
    \n", "signature": "(\tself,\tmz_list,\tpolarity,\tmz_tol_ppm,\tmz_tol_da_api=0.2,\tformat='json',\tnormalize=True,\tfe_kwargs={}):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks", "kind": "class", "doc": "

    Class to find Oxygen peaks in a mass spectrum for formula assignment search

    \n\n

    Class to walk 14Da units over oxygen space for negative ion mass spectrum of natural organic matter\nReturns a list of MSPeak class containing the possible Molecular Formula class objects.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nThis is where we store MassSpec class obj,
    • \n
    • lookupTableSettings (MolecularLookupTableSettings class):\nThis is where we store MolecularLookupTableSettings class obj
    • \n
    • min_O , max_O (int):\nminium and maximum of Oxygen to allow the software to look for\nit will override the settings at lookupTableSettings.usedAtoms\ndefault min = 1, max = 22
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nThis is where we store MassSpec class obj,
    • \n
    • lookupTableSettings (MolecularLookupTableSettings class):\nThis is where we store MolecularLookupTableSettings class obj
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run().\nwill be called when the instantiated class method start is called
    • \n
    • get_list_found_peaks().\nreturns a list of MSpeaks classes cotaining all the MolecularFormula candidates inside the MSPeak\nfor more details of the structure see MSPeak class and MolecularFormula class
    • \n
    • set_mass_spec_indexes_by_found_peaks().\nset the mass spectrum to interate over only the selected indexes
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.__init__", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(\tmass_spectrum_obj,\tsql_db: bool = False,\tmin_O: int = 1,\tmax_O: int = 22)"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.mass_spectrum_obj", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.mass_spectrum_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.min_0", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.min_0", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.max_O", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.max_O", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.run", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.run", "kind": "function", "doc": "

    Run the thread

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.find_most_abundant_formula", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.find_most_abundant_formula", "kind": "function", "doc": "

    Find the most abundant formula in the mass spectrum

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nMass spectrum object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MolecularFormula class obj: most abundant MolecularFormula with the lowest mass error
    • \n
    \n", "signature": "(self, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.find_most_abundant_formula_test", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.find_most_abundant_formula_test", "kind": "function", "doc": "

    [Test function] Find the most abundant formula in the mass spectrum

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nMass spectrum object
    • \n
    • settings (MolecularSearchSettings class):\nMolecular search settings object
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MolecularFormula class obj: most abundant MolecularFormula with the lowest mass error
    • \n
    \n", "signature": "(self, mass_spectrum_obj, settings):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.find_series_mspeaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.find_series_mspeaks", "kind": "function", "doc": "

    Find a series of abundant peaks in the mass spectrum for a given molecular formula

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpec class):\nMass spectrum object
    • \n
    • molecular_formula_obj_reference (MolecularFormula class):\nMolecular formula object
    • \n
    • deltamz (float):\ndelta m/z to look for peaks
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: list of MSpeak class objects
    • \n
    \n", "signature": "(self, mass_spectrum_obj, molecular_formula_obj_reference, deltamz=14):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.get_list_found_peaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.get_list_found_peaks", "kind": "function", "doc": "

    Get the list of found peaks

    \n\n
    Returns
    \n\n
      \n
    • list: list of MSpeak class objects
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.findOxygenPeaks.FindOxygenPeaks.set_mass_spec_indexes_by_found_peaks", "modulename": "corems.molecular_id.search.findOxygenPeaks", "qualname": "FindOxygenPeaks.set_mass_spec_indexes_by_found_peaks", "kind": "function", "doc": "

    Set the mass spectrum to interate over only the selected indexes.

    \n\n
    Notes
    \n\n

    Warning!!!!\nset the mass spectrum to interate over only the selected indexes\ndon not forget to call mass_spectrum_obj.reset_indexes after the job is done

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.lcms_spectral_search", "modulename": "corems.molecular_id.search.lcms_spectral_search", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.lcms_spectral_search.LCMSSpectralSearch", "modulename": "corems.molecular_id.search.lcms_spectral_search", "qualname": "LCMSSpectralSearch", "kind": "class", "doc": "

    Methods for searching LCMS spectra.

    \n\n

    This class is designed to be a mixin class for the ~corems.mass_spectra.factory.lc_class.LCMSBase class.

    \n"}, {"fullname": "corems.molecular_id.search.lcms_spectral_search.LCMSSpectralSearch.get_more_match_quals", "modulename": "corems.molecular_id.search.lcms_spectral_search", "qualname": "LCMSSpectralSearch.get_more_match_quals", "kind": "function", "doc": "

    Return additional match qualities between query and library entry.

    \n\n
    Parameters
    \n\n
      \n
    • query_mz_arr (np.array):\nArray of query spectrum. Shape (N, 2), with m/z in the first column\nand abundance in the second.
    • \n
    • lib_entry (dict):\nLibrary spectrum entry, with 'mz' key containing the spectrum in\nthe format (mz, abundance),(mz, abundance), i.e. from MetabRef.
    • \n
    • mz_tol_da (float, optional):\nTolerance in Da for matching peaks (in MS2). Default is 0.1.
    • \n
    • include_fragment_types (bool, optional):\nIf True, include fragment type comparisons in output.\nDefaults to False.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • tuple: Tuple of (query_in_lib, query_in_lib_fract, lib_in_query, lib_in_query_fract, query_frags, lib_frags, lib_precursor_mz).
    • \n
    \n\n
    Notes
    \n\n

    query_in_lib : int\n Number of peaks in query that are present in the library entry (within mz_tol_da).\nquery_in_lib_fract : float\n Fraction of peaks in query that are present in the library entry (within mz_tol_da).\nlib_in_query : int\n Number of peaks in the library entry that are present in the query (within mz_tol_da).\nlib_in_query_fract : float\n Fraction of peaks in the library entry that are present in the query (within mz_tol_da).\nquery_frags : list\n List of unique fragment types present in the query, generally 'MLF' or 'LSF' or both.\nlib_frags : list\n List of unique fragment types present in the library entry, generally 'MLF' or 'LSF' or both.

    \n\n
    Raises
    \n\n
      \n
    • ValueError: If library entry does not have 'fragment_types' key and include_fragment_types is True.
    • \n
    \n", "signature": "(query_mz_arr, lib_entry, mz_tol_da=0.1, include_fragment_types=False):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.lcms_spectral_search.LCMSSpectralSearch.fe_search", "modulename": "corems.molecular_id.search.lcms_spectral_search", "qualname": "LCMSSpectralSearch.fe_search", "kind": "function", "doc": "

    Search LCMS spectra using a FlashEntropy approach.

    \n\n
    Parameters
    \n\n
      \n
    • scan_list (list):\nList of scan numbers to search.
    • \n
    • fe_lib (~ms_entropy.FlashEntropySearch):\nFlashEntropy Search instance.
    • \n
    • precursor_mz_list (list, optional):\nList of precursor m/z values to search, by default [], which implies\nmatched with mass features; to enable this use_mass_features must be True.
    • \n
    • use_mass_features (bool, optional):\nIf True, use mass features to get precursor m/z values, by default True.\nIf True, will add search results to mass features' ms2_similarity_results attribute.
    • \n
    • peak_sep_da (float, optional):\nMinimum separation between m/z peaks spectra in Da. This needs match the\napproximate resolution of the search spectra and the FlashEntropySearch\ninstance, by default 0.01.
    • \n
    • get_additional_metrics (bool, optional):\nIf True, get additional metrics from FlashEntropy search, by default True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • None, but adds results to self.spectral_search_results and associates these
    • \n
    • spectral_search_results with mass_features within the self.mass_features dictionary.
    • \n
    \n", "signature": "(\tself,\tscan_list,\tfe_lib,\tprecursor_mz_list=[],\tuse_mass_features=True,\tpeak_sep_da=0.01,\tget_additional_metrics=True):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.last_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "last_error", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.last_dif", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "last_dif", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.closest_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "closest_error", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.error_average", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "error_average", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.nbValues", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "nbValues", "kind": "variable", "doc": "

    \n", "default_value": "0"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas", "kind": "class", "doc": "

    Class for searching molecular formulas in a mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • sql_db (MolForm_SQL, optional):\nThe SQL database object, by default None.
    • \n
    • first_hit (bool, optional):\nFlag to indicate whether to skip peaks that already have a molecular formula assigned, by default False.
    • \n
    • find_isotopologues (bool, optional):\nFlag to indicate whether to find isotopologues, by default True.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • sql_db (MolForm_SQL):\nThe SQL database object.
    • \n
    • first_hit (bool):\nFlag to indicate whether to skip peaks that already have a molecular formula assigned.
    • \n
    • find_isotopologues (bool):\nFlag to indicate whether to find isotopologues.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run_search().\nRun the molecular formula search.
    • \n
    • run_worker_mass_spectrum().\nRun the molecular formula search on the mass spectrum object.
    • \n
    • run_worker_ms_peaks().\nRun the molecular formula search on the given list of mass spectrum peaks.
    • \n
    • database_to_dict().\nConvert the database results to a dictionary.
    • \n
    • run_molecular_formula().\nRun the molecular formula search on the given list of mass spectrum peaks.
    • \n
    • search_mol_formulas().\nSearch for molecular formulas in the mass spectrum.
    • \n
    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.__init__", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.__init__", "kind": "function", "doc": "

    \n", "signature": "(\tmass_spectrum_obj,\tsql_db=None,\tfirst_hit: bool = False,\tfind_isotopologues: bool = True)"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.first_hit", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.first_hit", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.find_isotopologues", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.find_isotopologues", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.mass_spectrum_obj", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.mass_spectrum_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.run_search", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.run_search", "kind": "function", "doc": "

    Run the molecular formula search.

    \n\n
    Parameters
    \n\n
      \n
    • mspeaks (list of MSPeak):\nThe list of mass spectrum peaks.
    • \n
    • query (dict):\nThe query dictionary containing the possible molecular formulas.
    • \n
    • min_abundance (float):\nThe minimum abundance threshold.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom, by default None.
    • \n
    \n", "signature": "(\tself,\tmspeaks: list,\tquery: dict,\tmin_abundance: float,\tion_type: str,\tion_charge: int,\tadduct_atom=None):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.run_worker_mass_spectrum", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.run_worker_mass_spectrum", "kind": "function", "doc": "

    Run the molecular formula search on the mass spectrum object.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.run_worker_ms_peaks", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.run_worker_ms_peaks", "kind": "function", "doc": "

    Run the molecular formula search on the given list of mass spectrum peaks.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peaks (list of MSPeak):\nThe list of mass spectrum peaks.
    • \n
    \n", "signature": "(self, ms_peaks):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.database_to_dict", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.database_to_dict", "kind": "function", "doc": "

    Convert the database results to a dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • classe_str_list (list):\nThe list of class strings.
    • \n
    • nominal_mzs (list):\nThe list of nominal m/z values.
    • \n
    • mf_search_settings (MolecularFormulaSearchSettings):\nThe molecular formula search settings.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: The dictionary containing the database results.
    • \n
    \n", "signature": "(classe_str_list, nominal_mzs, mf_search_settings, ion_charge):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.run_molecular_formula", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.run_molecular_formula", "kind": "function", "doc": "

    Run the molecular formula search on the given list of mass spectrum peaks.

    \n\n
    Parameters
    \n\n
      \n
    • ms_peaks (list of MSPeak):\nThe list of mass spectrum peaks.
    • \n
    \n", "signature": "(*args, **kw):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulas.search_mol_formulas", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulas.search_mol_formulas", "kind": "function", "doc": "

    Search for molecular formulas in the mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • possible_formulas_list (list of MolecularFormula):\nThe list of possible molecular formulas.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • neutral_molform (bool, optional):\nFlag to indicate whether the molecular formulas are neutral, by default True.
    • \n
    • find_isotopologues (bool, optional):\nFlag to indicate whether to find isotopologues, by default True.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom, by default None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list of MSPeak: The list of mass spectrum peaks with assigned molecular formulas.
    • \n
    \n", "signature": "(\tself,\tpossible_formulas_list: List[corems.molecular_formula.factory.MolecularFormulaFactory.MolecularFormula],\tion_type: str,\tneutral_molform=True,\tfind_isotopologues=True,\tadduct_atom=None) -> List[corems.ms_peak.factory.MSPeakClasses._MSPeak]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker", "kind": "class", "doc": "

    Class for searching molecular formulas in a mass spectrum.

    \n\n
    Parameters
    \n\n
      \n
    • find_isotopologues (bool, optional):\nFlag to indicate whether to find isotopologues, by default True.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • find_isotopologues (bool):\nFlag to indicate whether to find isotopologues.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • reset_error().\nReset the error variables.
    • \n
    • set_last_error().\nSet the last error.
    • \n
    • find_formulas().\nFind the formulas.
    • \n
    • calc_error().\nCalculate the error.
    • \n
    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.__init__", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.__init__", "kind": "function", "doc": "

    \n", "signature": "(find_isotopologues=True)"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.find_isotopologues", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.find_isotopologues", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.reset_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.reset_error", "kind": "function", "doc": "

    Reset the error variables.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n", "signature": "(self, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.set_last_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.set_last_error", "kind": "function", "doc": "

    Set the last error.

    \n\n
    Parameters
    \n\n
      \n
    • error (float):\nThe error.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    \n", "signature": "(self, error, mass_spectrum_obj):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.calc_error", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.calc_error", "kind": "function", "doc": "

    Calculate the error.

    \n\n
    Parameters
    \n\n
      \n
    • mz_exp (float):\nThe experimental m/z value.
    • \n
    • mz_calc (float):\nThe calculated m/z value.
    • \n
    • method (str, optional):\nThe method, by default 'ppm'.
    • \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If the method is not ppm or ppb.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: The error.
    • \n
    \n", "signature": "(mz_exp, mz_calc, method='ppm'):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulaWorker.find_formulas", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulaWorker.find_formulas", "kind": "function", "doc": "

    Find the formulas.

    \n\n
    Parameters
    \n\n
      \n
    • formulas (list of MolecularFormula):\nThe list of molecular formulas.
    • \n
    • min_abundance (float):\nThe minimum abundance threshold.
    • \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • ms_peak (MSPeak):\nThe mass spectrum peak.
    • \n
    • ion_type (str):\nThe ion type.
    • \n
    • ion_charge (int):\nThe ion charge.
    • \n
    • adduct_atom (str, optional):\nThe adduct atom, by default None.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list of MSPeak: The list of mass spectrum peaks with assigned molecular formulas.
    • \n
    \n\n
    Notes
    \n\n

    Uses the closest error the next search (this is not ideal, it needs to use confidence\nmetric to choose the right candidate then propagate the error using the error from the best candidate).\nIt needs to add s/n to the equation.\nIt need optimization to define the mz_error_range within a m/z unit since it is directly proportional\nwith the mass, and inversely proportional to the rp. It's not linear, i.e., sigma mass.\nThe idea it to correlate sigma to resolving power, signal to noise and sample complexity per mz unit.\nMethod='distance'

    \n", "signature": "(\tself,\tformulas,\tmin_abundance,\tmass_spectrum_obj,\tms_peak,\tion_type,\tion_charge,\tadduct_atom=None):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC", "kind": "class", "doc": "

    Class for searching molecular formulas in a LC object.

    \n\n
    Parameters
    \n\n
      \n
    • lcms_obj (LC):\nThe LC object.
    • \n
    • sql_db (MolForm_SQL, optional):\nThe SQL database object, by default None.
    • \n
    • first_hit (bool, optional):\nFlag to indicate whether to skip peaks that already have a molecular formula assigned, by default False.
    • \n
    • find_isotopologues (bool, optional):\nFlag to indicate whether to find isotopologues, by default True.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run_untargeted_worker_ms1().\nRun untargeted molecular formula search on the ms1 mass spectrum.
    • \n
    • run_target_worker_ms1().\nRun targeted molecular formula search on the ms1 mass spectrum.
    • \n
    \n", "bases": "SearchMolecularFormulas"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.__init__", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.__init__", "kind": "function", "doc": "

    \n", "signature": "(lcms_obj, sql_db=None, first_hit=False, find_isotopologues=True)"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.first_hit", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.first_hit", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.find_isotopologues", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.find_isotopologues", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.lcms_obj", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.lcms_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.run_untargeted_worker_ms1", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.run_untargeted_worker_ms1", "kind": "function", "doc": "

    Run untargeted molecular formula search on the ms1 mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.molecularFormulaSearch.SearchMolecularFormulasLC.run_target_worker_ms1", "modulename": "corems.molecular_id.search.molecularFormulaSearch", "qualname": "SearchMolecularFormulasLC.run_target_worker_ms1", "kind": "function", "doc": "

    Run targeted molecular formula search on the ms1 mass spectrum.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment", "modulename": "corems.molecular_id.search.priorityAssignment", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment", "kind": "class", "doc": "

    A class for assigning priority to oxygen classes in a molecular search.

    \n\n
    Parameters
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • sql_db (bool, optional):\nWhether to use an SQL database. The default is False.
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • mass_spectrum_obj (MassSpectrum):\nThe mass spectrum object.
    • \n
    • sql_db (MolForm_SQL):\nThe SQL database object.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • run().\nRun the priority assignment process.
    • \n
    • create_data_base().\nCreate the molecular database for the specified heteroatomic classes.
    • \n
    • run_worker_mass_spectrum(assign_classes_order_tuples).\nRun the molecular formula search for each class in the specified order.
    • \n
    • get_dict_molecular_database(classe_str_list).\nGet the molecular database as a dictionary.
    • \n
    • ox_classes_and_peaks_in_order_().\nGet the oxygen classes and associated peaks in order.
    • \n
    • get_classes_in_order(dict_ox_class_and_ms_peak)\nGet the classes in order.
    • \n
    \n", "bases": "threading.Thread"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.__init__", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.__init__", "kind": "function", "doc": "

    This constructor should always be called with keyword arguments. Arguments are:

    \n\n

    group should be None; reserved for future extension when a ThreadGroup\nclass is implemented.

    \n\n

    target is the callable object to be invoked by the run()\nmethod. Defaults to None, meaning nothing is called.

    \n\n

    name is the thread name. By default, a unique name is constructed of\nthe form \"Thread-N\" where N is a small decimal number.

    \n\n

    args is the argument tuple for the target invocation. Defaults to ().

    \n\n

    kwargs is a dictionary of keyword arguments for the target\ninvocation. Defaults to {}.

    \n\n

    If a subclass overrides the constructor, it must make sure to invoke\nthe base class constructor (Thread.__init__()) before doing anything\nelse to the thread.

    \n", "signature": "(mass_spectrum_obj, sql_db=False)"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.mass_spectrum_obj", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.mass_spectrum_obj", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.run", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.run", "kind": "function", "doc": "

    Run the priority assignment process.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.create_data_base", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.create_data_base", "kind": "function", "doc": "

    Create the molecular database for the specified heteroatomic classes.

    \n\n
    Returns
    \n\n
      \n
    • assign_classes_order_str_dict_tuple_ (list):\nA list of tuples containing the class names and dictionaries of class attributes.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.run_worker_mass_spectrum", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.run_worker_mass_spectrum", "kind": "function", "doc": "

    Run the molecular formula search for each class in the specified order.

    \n\n
    Parameters
    \n\n
      \n
    • assign_classes_order_tuples (list):\nA list of tuples containing the class names and dictionaries of class attributes.
    • \n
    \n", "signature": "(self, assign_classes_order_tuples):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.get_dict_molecular_database", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.get_dict_molecular_database", "kind": "function", "doc": "

    Get the molecular database as a dictionary.

    \n\n
    Parameters
    \n\n
      \n
    • classe_str_list (list):\nA list of class names.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary containing the molecular database.
    • \n
    \n", "signature": "(self, classe_str_list):", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.ox_classes_and_peaks_in_order_", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.ox_classes_and_peaks_in_order_", "kind": "function", "doc": "

    Get the oxygen classes and associated peaks in order.

    \n\n
    Returns
    \n\n
      \n
    • dict: A dictionary containing the oxygen classes and associated peaks.
    • \n
    \n", "signature": "(self) -> dict:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.get_classes_in_order", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.get_classes_in_order", "kind": "function", "doc": "

    Get the classes in order.

    \n\n
    Parameters
    \n\n
      \n
    • dict_ox_class_and_ms_peak (dict):\nA dictionary containing the oxygen classes and associated peaks.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of tuples containing the class names and dictionaries of class attributes.
    • \n
    \n\n
    Notes
    \n\n

    structure is\n ('HC', {'HC': 1})

    \n", "signature": "(self, dict_ox_class_and_ms_peak) -> [(<class 'str'>, <class 'dict'>)]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.get_class_strings_dict", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.get_class_strings_dict", "kind": "function", "doc": "

    Get the class strings and dictionaries.

    \n\n
    Parameters
    \n\n
      \n
    • all_atoms_tuples (tuple):\nA tuple containing the atoms.
    • \n
    • atoms_in_order (list):\nA list of atoms in order.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of tuples containing the class strings and dictionaries.
    • \n
    \n", "signature": "(all_atoms_tuples, atoms_in_order) -> [(<class 'str'>, <class 'dict'>)]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.combine_ox_class_with_other", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.combine_ox_class_with_other", "kind": "function", "doc": "

    Combine the oxygen classes with other classes.

    \n\n
    Parameters
    \n\n
      \n
    • atoms_in_order (list):\nA list of atoms in order.
    • \n
    • classes_strings_dict_tuples (list):

    • \n
    • dict_ox_class_and_ms_peak (dict):\nA dictionary containing the oxygen classes and associated peaks.

    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of dictionaries.
    • \n
    \n", "signature": "(\tatoms_in_order,\tclasses_strings_dict_tuples,\tdict_ox_class_and_ms_peak) -> [<class 'dict'>]:", "funcdef": "def"}, {"fullname": "corems.molecular_id.search.priorityAssignment.OxygenPriorityAssignment.sort_classes", "modulename": "corems.molecular_id.search.priorityAssignment", "qualname": "OxygenPriorityAssignment.sort_classes", "kind": "function", "doc": "

    Sort the classes.

    \n\n
    Parameters
    \n\n
      \n
    • atoms_in_order (list):\nA list of atoms in order.
    • \n
    • combination_tuples (list):
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: A list of tuples containing the class strings and dictionaries.
    • \n
    \n", "signature": "(atoms_in_order, combination_tuples) -> [(<class 'str'>, <class 'dict'>)]:", "funcdef": "def"}, {"fullname": "corems.ms_peak", "modulename": "corems.ms_peak", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.calc", "modulename": "corems.ms_peak.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc", "modulename": "corems.ms_peak.calc.MSPeakCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation", "kind": "class", "doc": "

    Class to perform calculations on MSPeak objects.

    \n\n

    This class provides methods to perform various calculations on MSPeak objects, such as calculating Kendrick Mass Defect (KMD) and Kendrick Mass (KM), calculating peak area, and fitting peak lineshape using different models.

    \n\n
    Parameters
    \n\n
      \n
    • None
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • _ms_parent (MSParent):\nThe parent MSParent object associated with the MSPeakCalculation object.
    • \n
    • mz_exp (float):\nThe experimental m/z value of the peak.
    • \n
    • peak_left_index (int):\nThe start scan index of the peak.
    • \n
    • peak_right_index (int):\nThe final scan index of the peak.
    • \n
    • resolving_power (float):\nThe resolving power of the peak.
    • \n
    \n\n
    Methods
    \n\n
      \n
    • _calc_kmd(dict_base).\nCalculate the Kendrick Mass Defect (KMD) and Kendrick Mass (KM) for a given base formula.
    • \n
    • calc_area().\nCalculate the peak area using numpy's trapezoidal fit.
    • \n
    • fit_peak(mz_extend=6, delta_rp=0, model='Gaussian').\nPerform lineshape analysis on a peak using lmfit module.
    • \n
    • voigt_pso(w, r, yoff, width, loc, a).\nCalculate the Voigt function for particle swarm optimization (PSO) fitting.
    • \n
    • objective_pso(x, w, u).\nCalculate the objective function for PSO fitting.
    • \n
    • minimize_pso(lower, upper, w, u).\nMinimize the objective function using the particle swarm optimization algorithm.
    • \n
    • fit_peak_pso(mz_extend=6, upsample_multiplier=5).\nPerform lineshape analysis on a peak using particle swarm optimization (PSO) fitting.
    • \n
    • voigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1).\n[Legacy] Perform voigt lineshape analysis on a peak.
    • \n
    • pseudovoigt(oversample_multiplier=1, delta_rp=0, mz_overlay=1, fraction=0.5).\n[Legacy] Perform pseudovoigt lineshape analysis on a peak.
    • \n
    • lorentz(oversample_multiplier=1, delta_rp=0, mz_overlay=1).\n[Legacy] Perform lorentz lineshape analysis on a peak.
    • \n
    • gaussian(oversample_multiplier=1, delta_rp=0, mz_overlay=1).\n[Legacy] Perform gaussian lineshape analysis on a peak.
    • \n
    • get_mz_domain(oversample_multiplier, mz_overlay).\n[Legacy] Resample/interpolate datapoints for lineshape analysis.
    • \n
    • number_possible_assignments().\nReturn the number of possible molecular formula assignments for the peak.
    • \n
    • molecular_formula_lowest_error().\nReturn the molecular formula with the smallest absolute mz error.
    • \n
    • molecular_formula_highest_prob_score().\nReturn the molecular formula with the highest confidence score.
    • \n
    • molecular_formula_earth_filter(lowest_error=True).\nFilter molecular formula using the 'Earth' filter.
    • \n
    • molecular_formula_water_filter(lowest_error=True).\nFilter molecular formula using the 'Water' filter.
    • \n
    • molecular_formula_air_filter(lowest_error=True).\nFilter molecular formula using the 'Air' filter.
    • \n
    • cia_score_S_P_error().\nCompound Identification Algorithm SP Error - Assignment Filter.
    • \n
    • cia_score_N_S_P_error().\nCompound Identification Algorithm NSP Error - Assignment Filter.
    • \n
    \n"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.calc_area", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.calc_area", "kind": "function", "doc": "

    Calculate the peak area using numpy's trapezoidal fit

    \n\n

    uses provided mz_domain to accurately integrate areas independent of digital resolution

    \n\n
    Returns
    \n\n
      \n
    • float: peak area
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.fit_peak", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.fit_peak", "kind": "function", "doc": "

    Lineshape analysis on a peak using lmfit module.

    \n\n

    Model and fit peak lineshape by defined function - using lmfit module\nDoes not oversample/resample/interpolate data points\nBetter to go back to time domain and perform more zero filling - if possible.

    \n\n
    Parameters
    \n\n
      \n
    • mz_extend (int):\nextra points left and right of peak definition to include in fitting
    • \n
    • delta_rp (float):\ndelta resolving power to add to resolving power
    • \n
    • model (str):\nType of lineshape model to use.\nModels allowed: Gaussian, Lorentz, Voigt
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    • fit_peak (lmfit object):\nfit results object from lmfit module
    • \n
    \n\n
    Notes
    \n\n

    Returns the calculated mz domain, initial defined abundance profile, and the fit peak results object from lmfit module\nmz_extend here extends the x-axis domain so that we have sufficient points either side of the apex to fit.\nTakes about 10ms per peak

    \n", "signature": "(self, mz_extend=6, delta_rp=0, model='Gaussian'):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.voigt_pso", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.voigt_pso", "kind": "function", "doc": "

    Voigt function for particle swarm optimisation (PSO) fitting

    \n\n

    From https://github.com/pnnl/nmrfit/blob/master/nmrfit/equations.py.\nCalculates a Voigt function over w based on the relevant properties of the distribution.

    \n\n
    Parameters
    \n\n
      \n
    • w (ndarray):\nArray over which the Voigt function will be evaluated.
    • \n
    • r (float):\nRatio between the Guassian and Lorentzian functions.
    • \n
    • yoff (float):\nY-offset of the Voigt function.
    • \n
    • width (float):\nThe width of the Voigt function.
    • \n
    • loc (float):\nCenter of the Voigt function.
    • \n
    • a (float):\nArea of the Voigt function.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • V (ndarray):\nArray defining the Voigt function over w.
    • \n
    \n\n
    References
    \n\n
      \n
    1. https://github.com/pnnl/nmrfit
    2. \n
    \n\n
    Notes
    \n\n

    Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.

    \n", "signature": "(self, w, r, yoff, width, loc, a):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.objective_pso", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.objective_pso", "kind": "function", "doc": "

    Objective function for particle swarm optimisation (PSO) fitting

    \n\n

    The objective function used to fit supplied data. Evaluates sum of squared differences between the fit and the data.

    \n\n
    Parameters
    \n\n
      \n
    • x (list of floats):\nParameter vector.
    • \n
    • w (ndarray):\nArray of frequency data.
    • \n
    • u (ndarray):\nArray of data to be fit.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • rmse (float):\nRoot mean square error between the data and fit.
    • \n
    \n\n
    References
    \n\n
      \n
    1. https://github.com/pnnl/nmrfit
    2. \n
    \n", "signature": "(self, x, w, u):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.minimize_pso", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.minimize_pso", "kind": "function", "doc": "

    Minimization function for particle swarm optimisation (PSO) fitting

    \n\n

    Minimizes the objective function using the particle swarm optimization algorithm.\nMinimization function based on defined parameters

    \n\n
    Parameters
    \n\n
      \n
    • lower (list of floats):\nLower bounds for the parameters.
    • \n
    • upper (list of floats):\nUpper bounds for the parameters.
    • \n
    • w (ndarray):\nArray of frequency data.
    • \n
    • u (ndarray):\nArray of data to be fit.
    • \n
    \n\n
    Notes
    \n\n

    Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.\nCurrent parameters take ~2 seconds per peak.

    \n\n
    References
    \n\n
      \n
    1. https://github.com/pnnl/nmrfit
    2. \n
    \n", "signature": "(self, lower, upper, w, u):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.fit_peak_pso", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.fit_peak_pso", "kind": "function", "doc": "

    Lineshape analysis on a peak using particle swarm optimisation (PSO) fitting

    \n\n

    Function to fit a Voigt peakshape using particle swarm optimisation (PSO).\nShould return better results than lmfit, but much more computationally expensive

    \n\n
    Parameters
    \n\n
      \n
    • mz_extend (int, optional):\nextra points left and right of peak definition to include in fitting. Defaults to 6.
    • \n
    • upsample_multiplier (int, optional):\nfactor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 5.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • xopt (array):\nvariables describing the voigt function.\nG/L ratio, width (fwhm), apex (x-axis), area.\ny-axis offset is fixed at 0
    • \n
    • fopt (float):\nobjective score (rmse)
    • \n
    • psfit (array):\nrecalculated y values based on function and optimised fit
    • \n
    • psfit_hdp (tuple of arrays):\n0 - linspace x-axis upsampled grid\n1 - recalculated y values based on function and upsampled x-axis grid\nDoes not change results, but aids in visualisation of the 'true' voigt lineshape
    • \n
    \n\n
    Notes
    \n\n

    Particle swarm optimisation (PSO) fitting function can be significantly more computationally expensive than lmfit, with more parameters to optimise.

    \n", "signature": "(self, mz_extend: int = 6, upsample_multiplier: int = 5):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.voigt", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.voigt", "kind": "function", "doc": "

    [Legacy] Voigt lineshape analysis function\nLegacy function for voigt lineshape analysis

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int):\nfactor to increase x-axis points by for simulation of fitted lineshape function
    • \n
    • delta_rp (float):\ndelta resolving power to add to resolving power
    • \n
    • mz_overlay (int):\nextra points left and right of peak definition to include in fitting
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    • calc_abundance (ndarray):\ncalculated abundance profile based on voigt function
    • \n
    \n", "signature": "(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.pseudovoigt", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.pseudovoigt", "kind": "function", "doc": "

    [Legacy] pseudovoigt lineshape function

    \n\n

    Legacy function for pseudovoigt lineshape analysis.\nNote - Code may not be functional currently.

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int, optional):\nfactor to increase x-axis points by for simulation of fitted lineshape function. Defaults to 1.
    • \n
    • delta_rp (float, optional):\ndelta resolving power to add to resolving power. Defaults to 0.
    • \n
    • mz_overlay (int, optional):\nextra points left and right of peak definition to include in fitting. Defaults to 1.
    • \n
    • fraction (float, optional):\nfraction of gaussian component in pseudovoigt function. Defaults to 0.5.
    • \n
    \n", "signature": "(\tself,\toversample_multiplier=1,\tdelta_rp=0,\tmz_overlay=1,\tfraction=0.5):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.lorentz", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.lorentz", "kind": "function", "doc": "

    [Legacy] Lorentz lineshape analysis function

    \n\n

    Legacy function for lorentz lineshape analysis

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int):\nfactor to increase x-axis points by for simulation of fitted lineshape function
    • \n
    • delta_rp (float):\ndelta resolving power to add to resolving power
    • \n
    • mz_overlay (int):\nextra points left and right of peak definition to include in fitting
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    • calc_abundance (ndarray):\ncalculated abundance profile based on lorentz function
    • \n
    \n", "signature": "(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.gaussian", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.gaussian", "kind": "function", "doc": "

    [Legacy] Gaussian lineshape analysis function\nLegacy gaussian lineshape analysis function

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int):\nfactor to increase x-axis points by for simulation of fitted lineshape function
    • \n
    • delta_rp (float):\ndelta resolving power to add to resolving power
    • \n
    • mz_overlay (int):\nextra points left and right of peak definition to include in fitting
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    • calc_abundance (ndarray):\ncalculated abundance profile based on gaussian function
    • \n
    \n", "signature": "(self, oversample_multiplier=1, delta_rp=0, mz_overlay=1):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.get_mz_domain", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.get_mz_domain", "kind": "function", "doc": "

    [Legacy] function to resample/interpolate datapoints for lineshape analysis

    \n\n

    This code is used for the legacy line fitting functions and not recommended.\nLegacy function to support expanding mz domain for legacy lineshape functions

    \n\n
    Parameters
    \n\n
      \n
    • oversample_multiplier (int):\nfactor to increase x-axis points by for simulation of fitted lineshape function
    • \n
    • mz_overlay (int):\nextra points left and right of peak definition to include in fitting
    • \n
    \n\n
    Returns
    \n\n
      \n
    • mz_domain (ndarray):\nx-axis domain for fit
    • \n
    \n", "signature": "(self, oversample_multiplier, mz_overlay):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.number_possible_assignments", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.number_possible_assignments", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_lowest_error", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_lowest_error", "kind": "function", "doc": "

    Return the molecular formula with the smallest absolute mz error

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_highest_prob_score", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_highest_prob_score", "kind": "function", "doc": "

    Return the molecular formula with the highest confidence score score

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_earth_filter", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_earth_filter", "kind": "function", "doc": "

    Filter molecular formula using the 'Earth' filter

    \n\n

    This function applies the Formularity-esque 'Earth' filter to possible molecular formula assignments.\nEarth Filter:\n O > 0 AND N <= 3 AND P <= 2 AND 3P <= O

    \n\n

    If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Earth filter.\nOtherwise, it will return all Earth-filter compliant formulas.

    \n\n
    Parameters
    \n\n
      \n
    • lowest_error (bool, optional.):\nReturn only the lowest error formula which also fits the Earth filter.\nIf False, return all Earth-filter compliant formulas. Default is True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: List of molecular formula objects which fit the Earth filter
    • \n
    \n\n
    References
    \n\n
      \n
    1. Nikola Tolic et al., \"Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra\"\nAnal. Chem. 2017, 89, 23, 12659\u201312665\ndoi: 10.1021/acs.analchem.7b03318
    2. \n
    \n", "signature": "(self, lowest_error=True):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_water_filter", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_water_filter", "kind": "function", "doc": "

    Filter molecular formula using the 'Water' filter

    \n\n

    This function applies the Formularity-esque 'Water' filter to possible molecular formula assignments.\nWater Filter:\n O > 0 AND N <= 3 AND S <= 2 AND P <= 2

    \n\n

    If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Water filter.\nOtherwise, it will return all Water-filter compliant formulas.

    \n\n
    Parameters
    \n\n
      \n
    • lowest_error (bool, optional):\nReturn only the lowest error formula which also fits the Water filter.\nIf False, return all Water-filter compliant formulas. Defaults to 2
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: List of molecular formula objects which fit the Water filter
    • \n
    \n\n
    References
    \n\n
      \n
    1. Nikola Tolic et al., \"Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra\"\nAnal. Chem. 2017, 89, 23, 12659\u201312665\ndoi: 10.1021/acs.analchem.7b03318
    2. \n
    \n", "signature": "(self, lowest_error=True):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.molecular_formula_air_filter", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.molecular_formula_air_filter", "kind": "function", "doc": "

    Filter molecular formula using the 'Air' filter

    \n\n

    This function applies the Formularity-esque 'Air' filter to possible molecular formula assignments.\nAir Filter:\n O > 0 AND N <= 3 AND S <= 1 AND P = 0 AND 3(S+N) <= O

    \n\n

    If the lowest_error method is also used, it will return the single formula annotation with the smallest absolute error which also fits the Air filter.\nOtherwise, it will return all Air-filter compliant formulas.

    \n\n
    Parameters
    \n\n
      \n
    • lowest_error (bool, optional):\nReturn only the lowest error formula which also fits the Air filter.\nIf False, return all Air-filter compliant formulas. Defaults to True.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • list: List of molecular formula objects which fit the Air filter
    • \n
    \n\n
    References
    \n\n
      \n
    1. Nikola Tolic et al., \"Formularity: Software for Automated Formula Assignment of Natural and Other Organic Matter from Ultrahigh-Resolution Mass Spectra\"\nAnal. Chem. 2017, 89, 23, 12659\u201312665\ndoi: 10.1021/acs.analchem.7b03318
    2. \n
    \n", "signature": "(self, lowest_error=True):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.cia_score_S_P_error", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.cia_score_S_P_error", "kind": "function", "doc": "

    Compound Identification Algorithm SP Error - Assignment Filter

    \n\n

    This function applies the Compound Identification Algorithm (CIA) SP Error filter to possible molecular formula assignments.

    \n\n

    It takes the molecular formula with the lowest S+P count, and returns the formula with the lowest absolute error from this subset.

    \n\n
    Returns
    \n\n
      \n
    • MolecularFormula: A single molecular formula which fits the rules of the CIA SP Error filter
    • \n
    \n\n
    References
    \n\n
      \n
    1. Elizabeth B. Kujawinski and Mark D. Behn, \"Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter\"\nAnal. Chem. 2006, 78, 13, 4363\u20134373\ndoi: 10.1021/ac0600306
    2. \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.calc.MSPeakCalc.MSPeakCalculation.cia_score_N_S_P_error", "modulename": "corems.ms_peak.calc.MSPeakCalc", "qualname": "MSPeakCalculation.cia_score_N_S_P_error", "kind": "function", "doc": "

    Compound Identification Algorithm NSP Error - Assignment Filter

    \n\n

    This function applies the Compound Identification Algorithm (CIA) NSP Error filter to possible molecular formula assignments.

    \n\n

    It takes the molecular formula with the lowest N+S+P count, and returns the formula with the lowest absolute error from this subset.

    \n\n
    Returns
    \n\n
      \n
    • MolecularFormula: A single molecular formula which fits the rules of the CIA NSP Error filter
    • \n
    \n\n
    References
    \n\n
      \n
    1. Elizabeth B. Kujawinski and Mark D. Behn, \"Automated Analysis of Electrospray Ionization Fourier Transform Ion Cyclotron Resonance Mass Spectra of Natural Organic Matter\"\nAnal. Chem. 2006, 78, 13, 4363\u20134373\ndoi: 10.1021/ac0600306
    2. \n
    \n\n
    Raises
    \n\n
      \n
    • Exception: If no molecular formula are associated with mass spectrum peak.
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.factory", "modulename": "corems.ms_peak.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses", "modulename": "corems.ms_peak.factory.MSPeakClasses", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.ICRMassPeak", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "ICRMassPeak", "kind": "class", "doc": "

    A class representing a peak in an ICR mass spectrum.

    \n", "bases": "_MSPeak"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.ICRMassPeak.__init__", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "ICRMassPeak.__init__", "kind": "function", "doc": "

    \n", "signature": "(*args, ms_parent=None, exp_freq=None)"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.ICRMassPeak.resolving_power_calc", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "ICRMassPeak.resolving_power_calc", "kind": "function", "doc": "

    Calculate the theoretical resolving power of the peak.

    \n\n
    Parameters
    \n\n
      \n
    • T (float):\ntransient time
    • \n
    • B (float):\nMagnetic Filed Strength (Tesla)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • float: Theoretical resolving power of the peak.
    • \n
    \n\n
    References
    \n\n
      \n
    1. Marshall et al. (Mass Spectrom Rev. 1998 Jan-Feb;17(1):1-35.)\nDOI: 10.1002/(SICI)1098-2787(1998)17:1<1::AID-MAS1>3.0.CO;2-K
    2. \n
    \n", "signature": "(self, B, T):", "funcdef": "def"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.ICRMassPeak.set_calc_resolving_power", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "ICRMassPeak.set_calc_resolving_power", "kind": "function", "doc": "

    Set the resolving power of the peak to the calculated one.

    \n", "signature": "(self, B: float, T: float):", "funcdef": "def"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.TOFMassPeak", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "TOFMassPeak", "kind": "class", "doc": "

    A class representing a peak in a TOF mass spectrum.

    \n", "bases": "_MSPeak"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.TOFMassPeak.__init__", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "TOFMassPeak.__init__", "kind": "function", "doc": "

    \n", "signature": "(*args, exp_freq=None)"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.TOFMassPeak.set_calc_resolving_power", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "TOFMassPeak.set_calc_resolving_power", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.OrbiMassPeak", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "OrbiMassPeak", "kind": "class", "doc": "

    A class representing a peak in an Orbitrap mass spectrum.

    \n", "bases": "_MSPeak"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.OrbiMassPeak.__init__", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "OrbiMassPeak.__init__", "kind": "function", "doc": "

    \n", "signature": "(*args, exp_freq=None)"}, {"fullname": "corems.ms_peak.factory.MSPeakClasses.OrbiMassPeak.set_calc_resolving_power", "modulename": "corems.ms_peak.factory.MSPeakClasses", "qualname": "OrbiMassPeak.set_calc_resolving_power", "kind": "function", "doc": "

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient", "modulename": "corems.transient", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.calc", "modulename": "corems.transient.calc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.calc.TransientCalc", "modulename": "corems.transient.calc.TransientCalc", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations", "kind": "class", "doc": "

    Transient Calculations

    \n\n
    Parameters
    \n\n
      \n
    • parameters (corems.transient.parameters.TransientParameters):\nThe transient parameters
    • \n
    • bandwidth (float):\nThe bandwidth of the transient (Hz)
    • \n
    • number_data_points (int):\nThe number of data points of the transient
    • \n
    • exc_low_freq (float):\nThe low frequency of the excitation (Hz)
    • \n
    • exc_high_freq (float):\nThe high frequency of the excitation (Hz)
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • parameters (corems.transient.parameters.TransientParameters):\nThe transient parameters
    • \n
    • bandwidth (float):\nThe bandwidth of the transient (Hz)
    • \n
    • number_data_points (int):\nThe number of data points of the transient
    • \n
    • exc_low_freq (float):\nThe low frequency of the excitation (Hz)
    • \n
    • exc_high_freq (float):\nThe high frequency of the excitation (Hz)
    • \n
    \n\n
    Methods
    \n\n
      \n
    • cal_transient_time().\nCalculate the time domain length of the transient
    • \n
    • zero_fill(transient).\nZero fill the transient
    • \n
    • truncation(transient).\nTruncate the transient
    • \n
    • apodization(transient).\nApodization of the transient
    • \n
    • calculate_frequency_domain(number_data_points).\nCalculate the frequency domain (axis) of the transient
    • \n
    • cut_freq_domain(freqdomain_X, freqdomain_Y).\nCut the frequency domain of the transient
    • \n
    • phase_and_absorption_mode_ft().\n[Not Functional] Produce a phased absorption mode FT spectrum
    • \n
    • magnitude_mode_ft(transient).\nPerform magnitude mode FT of the transient
    • \n
    • correct_dc_offset().\n[Not Yet Implemented] Correct the DC offset of the transient
    • \n
    \n"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.cal_transient_time", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.cal_transient_time", "kind": "function", "doc": "

    Calculate the time domain length of the transient

    \n\n
    Returns
    \n\n
      \n
    • float: The time domain length of the transient (s)
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.zero_fill", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.zero_fill", "kind": "function", "doc": "

    Zero fill the transient

    \n\n
    Parameters
    \n\n
      \n
    • transient (numpy.ndarray):\nThe transient data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The transient data points zerofilled
    • \n
    \n\n
    Notes
    \n\n

    The number of zero fills is defined by the transient parameter number_of_zero_fills.\nThe function first calculate the next power of two of the transient length and zero fills to that length, to take advantage of FFT algorithm.\n If the parameter next_power_of_two is set to False, the function will zero fill to the length of the original transient times the number of zero fills

    \n", "signature": "(self, transient):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.truncation", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.truncation", "kind": "function", "doc": "

    Truncate the transient

    \n\n
    Parameters
    \n\n
      \n
    • transient (numpy.ndarray):\nThe transient data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The truncated transient data points
    • \n
    \n\n
    Notes
    \n\n

    The number of truncations is defined by the transient parameter number_of_truncations

    \n", "signature": "(self, transient):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.apodization", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.apodization", "kind": "function", "doc": "

    Apodization of the transient

    \n\n
    Parameters
    \n\n
      \n
    • transient (numpy.ndarray):\nThe transient data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The apodized transient data points
    • \n
    \n\n
    Notes
    \n\n

    The apodization method is defined by the transient parameter apodization_method.\nThe following apodization methods are available:\n Hamming,\n Hanning,\n Blackman,\n Full-Sine,\n Half-Sine,\n Kaiser,\n Half-Kaiser.

    \n\n

    For Kaiser and Half-Kaiser, an additional parameter 'beta' is required, set by the transient parameter kaiser_beta.

    \n", "signature": "(self, transient):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.calculate_frequency_domain", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.calculate_frequency_domain", "kind": "function", "doc": "

    Calculate the frequency domain (axis) of the transient

    \n\n
    Parameters
    \n\n
      \n
    • number_data_points (int):\nThe number of data points of the transient
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The frequency domain of the transient (Hz)
    • \n
    \n", "signature": "(self, number_data_points):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.cut_freq_domain", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.cut_freq_domain", "kind": "function", "doc": "

    Cut the frequency domain of the transient

    \n\n
    Parameters
    \n\n
      \n
    • freqdomain_X (numpy.ndarray):\nThe frequency domain of the transient (Hz)
    • \n
    • freqdomain_Y (numpy.ndarray):\nThe frequency domain of the transient (Hz)
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The frequency domain of the transient (Hz)
    • \n
    • numpy.ndarray: The frequency domain of the transient (Hz)
    • \n
    \n", "signature": "(self, freqdomain_X, freqdomain_Y):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.phase_and_absorption_mode_ft", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.phase_and_absorption_mode_ft", "kind": "function", "doc": "

    [Not Functional] Produce a phased absorption mode FT spectrum

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.perform_magniture_mode_ft", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.perform_magniture_mode_ft", "kind": "function", "doc": "

    Perform magnitude mode FT of the transient

    \n\n
    Parameters
    \n\n
      \n
    • transient (numpy.ndarray):\nThe transient data points
    • \n
    \n\n
    Returns
    \n\n
      \n
    • numpy.ndarray: The frequency domain of the transient (Hz)
    • \n
    • numpy.ndarray: The magnitude of the transient (a.u.)
    • \n
    \n", "signature": "(self, transient):", "funcdef": "def"}, {"fullname": "corems.transient.calc.TransientCalc.TransientCalculations.correct_dc_offset", "modulename": "corems.transient.calc.TransientCalc", "qualname": "TransientCalculations.correct_dc_offset", "kind": "function", "doc": "

    [Not Yet Implemented] Correct the DC offset of the transient

    \n\n

    A simple baseline correction to compensate for a DC offset in the recorded transient.\nNot implemented.

    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.factory", "modulename": "corems.transient.factory", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses", "modulename": "corems.transient.factory.TransientClasses", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient", "kind": "class", "doc": "

    The Transient object contains the transient data and the parameters used to process it

    \n\n
    Parameters
    \n\n
      \n
    • data (numpy.ndarray):\nArray with the transient data
    • \n
    • d_params (dict):\nDictionary with the parameters to be set
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • calibration_terms (tuple):\nTuple with the calibration terms (A, B, C)
    • \n
    • bandwidth (float):\nThe bandwidth of the transient (Hz)
    • \n
    • number_data_points (int):\nThe number of data points of the transient
    • \n
    • polarity (int):\nThe polarity of the transient
    • \n
    • transient_time (float):\nThe time domain length of the transient
    • \n
    • d_params (dict):\nDictionary with the parameters to be set
    • \n
    • frequency_domain (numpy.ndarray):\nArray with the frequency domain
    • \n
    • magnitude (numpy.ndarray):\nArray with the magnitude
    • \n
    • _full_filename_path (str):\nThe full path of the transient file
    • \n
    • _exc_high_freq (float):\nThe high frequency of the excitation (Hz)
    • \n
    • _exc_low_freq (float):\nThe low frequency of the excitation (Hz)
    • \n
    • _parameters (corems.transient.parameters.TransientParameters):\nThe transient parameters
    • \n
    • _transient_data (numpy.ndarray):\nArray with the transient data
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_frequency_domain(plot_result=True).\nGet the frequency domain and magnitude from the transient data
    • \n
    • get_mass_spectrum(auto_process=True, plot_result=True, keep_profile=True).\nGet the mass spectrum from the transient data
    • \n
    • set_processing_parameter(apodization_method, number_of_truncations, number_of_zero_fills).\nSet the processing parameters
    • \n
    • scale_plot_size(factor=1.5).\nScale the plot size by a factor
    • \n
    • plot_transient(ax=None, c='k').\nPlot the transient data
    • \n
    • plot_zerofilled_transient(ax=None, c='k').\nPlot the transient data with zero fill
    • \n
    • plot_apodized_transient(ax=None, c='k').\nPlot the transient data with apodization
    • \n
    • plot_frequency_domain(ax=None, c='k').\nPlot the frequency domain and magnitude
    • \n
    • set_parameter_from_toml(parameters_path).\nSet the processing parameters from a toml file
    • \n
    • set_parameter_from_json(parameters_path).\nSet the processing parameters from a json file
    • \n
    \n", "bases": "corems.transient.calc.TransientCalc.TransientCalculations"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.__init__", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.__init__", "kind": "function", "doc": "

    \n", "signature": "(data, d_params)"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.d_params", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.d_params", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.frequency_domain", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.frequency_domain", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.magnitude", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.magnitude", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.scale_plot_size", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.scale_plot_size", "kind": "function", "doc": "

    Scale the plot size by a factor

    \n\n
    Parameters
    \n\n
      \n
    • factor (float, optional):\nThe factor to scale the plot size, by default 1.5
    • \n
    \n", "signature": "(self, factor=1.5):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.set_processing_parameter", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.set_processing_parameter", "kind": "function", "doc": "

    Set the processing parameters

    \n\n
    Parameters
    \n\n
      \n
    • apodization_method (str):\nApodization method to be used
    • \n
    • number_of_truncations (int):\nNumber of truncations to be used
    • \n
    • number_of_zero_fills (int):\nNumber of zero fills to be used
    • \n
    \n", "signature": "(\tself,\tapodization_method: str,\tnumber_of_truncations: int,\tnumber_of_zero_fills: int):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.parameters", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.parameters", "kind": "variable", "doc": "

    The transient parameters

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.set_parameter_from_toml", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.set_parameter_from_toml", "kind": "function", "doc": "

    Set the processing parameters from a toml file

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.set_parameter_from_json", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.set_parameter_from_json", "kind": "function", "doc": "

    Set the processing parameters from a json file

    \n", "signature": "(self, parameters_path):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.get_frequency_domain", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.get_frequency_domain", "kind": "function", "doc": "

    Get the frequency domain and magnitude from the transient data

    \n\n
    Parameters
    \n\n
      \n
    • plot_result (bool, optional):\nPlot the frequency domain and magnitude, by default True
    • \n
    \n\n
    Returns
    \n\n
      \n
    • frequency_domain (numpy.ndarray):\nArray with the frequency domain
    • \n
    • magnitude (numpy.ndarray):\nArray with the magnitude
    • \n
    \n", "signature": "(self, plot_result=True):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.get_mass_spectrum", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.get_mass_spectrum", "kind": "function", "doc": "

    Get the mass spectrum from the transient data

    \n\n
    Parameters
    \n\n
      \n
    • auto_process (bool, optional):\nProcess the transient data, by default True
    • \n
    • plot_result (bool, optional):\nPlot the frequency domain and magnitude, by default True
    • \n
    • keep_profile (bool, optional):\nKeep the profile data, by default True
    • \n
    \n\n
    Returns
    \n\n
      \n
    • MassSpecfromFreq: Mass spectrum object
    • \n
    \n", "signature": "(\tself,\tauto_process: bool = True,\tplot_result: bool = True,\tkeep_profile: bool = True) -> corems.mass_spectrum.factory.MassSpectrumClasses.MassSpecfromFreq:", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.filename", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.filename", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.dir_location", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.dir_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.A_therm", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.A_therm", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.B_therm", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.B_therm", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.C_therm", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.C_therm", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.plot_transient", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.plot_transient", "kind": "function", "doc": "

    Plot the transient data

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes, optional):\nMatplotlib axes object, by default None
    • \n
    • c (str, optional):\nColor, by default 'k'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes: Matplotlib axes object
    • \n
    \n", "signature": "(self, ax=None, c='k'):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.plot_zerofilled_transient", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.plot_zerofilled_transient", "kind": "function", "doc": "

    Plot the transient data with zero fill

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes, optional):\nMatplotlib axes object, by default None
    • \n
    • c (str, optional):\nColor, by default 'k'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes: Matplotlib axes object
    • \n
    \n", "signature": "(self, ax=None, c='k'):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.plot_apodized_transient", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.plot_apodized_transient", "kind": "function", "doc": "

    Plot the transient data with apodization

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes, optional):\nMatplotlib axes object, by default None
    • \n
    • c (str, optional):\nColor, by default 'k'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes: Matplotlib axes object
    • \n
    \n", "signature": "(self, ax=None, c='k'):", "funcdef": "def"}, {"fullname": "corems.transient.factory.TransientClasses.Transient.plot_frequency_domain", "modulename": "corems.transient.factory.TransientClasses", "qualname": "Transient.plot_frequency_domain", "kind": "function", "doc": "

    Plot the frequency domain and magnitude

    \n\n
    Parameters
    \n\n
      \n
    • ax (matplotlib.axes, optional):\nMatplotlib axes object, by default None
    • \n
    • c (str, optional):\nColor, by default 'k'
    • \n
    \n\n
    Returns
    \n\n
      \n
    • matplotlib.axes: Matplotlib axes object
    • \n
    \n", "signature": "(self, ax=None, c='k'):", "funcdef": "def"}, {"fullname": "corems.transient.input", "modulename": "corems.transient.input", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.input.brukerSolarix", "modulename": "corems.transient.input.brukerSolarix", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix", "kind": "class", "doc": "

    A class used to Read a single Transient from Bruker's FT-MS acquisition station (fid, or ser)

    \n\n
    Parameters
    \n\n
      \n
    • d_directory_location (str):\nthe full path of the .d folder
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • d_directory_location (str):\nthe full path of the .d folder
    • \n
    • file_location (str):\nthe full path of the .d folder
    • \n
    • parameter_filename_location (str):\nthe full path of the apexAcquisition.method file
    • \n
    • transient_data_path (str):\nthe full path of the fid or ser file
    • \n
    • scan_attr (str):\nthe full path of the scan.xml file
    • \n
    \n\n
    Methods
    \n\n
      \n
    • get_transient().\nRead the data and settings returning a Transient class
    • \n
    • get_scan_attr().\nRead the scan retention times, TIC values and scan indices.
    • \n
    • locate_file(folder, type_file_name).\nFind the full path of a specific file within the acquisition .d folder or subfolders
    • \n
    • parse_parameters(parameters_filename).\nOpen the given file and retrieve all parameters from apexAcquisition.method
    • \n
    • fix_freq_limits(d_parameters).\nRead and set the correct frequency limits for the spectrum
    • \n
    • get_excite_sweep_range(filename).\nDetermine excitation sweep range from ExciteSweep file
    • \n
    \n"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.__init__", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.__init__", "kind": "function", "doc": "

    \n", "signature": "(d_directory_location)"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.d_directory_location", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.d_directory_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.file_location", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.file_location", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.get_scan_attr", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.get_scan_attr", "kind": "function", "doc": "

    Function to get the scan retention times, TIC values and scan indices.

    \n\n

    Gets information from scan.xml file in the bruker .d folder.\nNote this file is only present in some .d format - e.g. for imaging mode data, it is not present.

    \n\n
    Returns
    \n\n
      \n
    • dict_scan_rt_tic (dict):\na dictionary with scan number as key and rt and tic as values
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.get_transient", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.get_transient", "kind": "function", "doc": "

    Function to get the transient data and parameters from a Bruker Solarix .d folder.

    \n\n
    Parameters
    \n\n
      \n
    • scan_number (int):\nthe scan number to be read. Default is 1.
    • \n
    \n\n
    Returns
    \n\n
      \n
    • Transient: a transient object
    • \n
    \n", "signature": "(self, scan_number=1):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.fix_freq_limits", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.fix_freq_limits", "kind": "function", "doc": "

    Function to read and set the correct frequency limits for the spectrum

    \n\n
    Notes
    \n\n

    This is using the excitation limits from the apexAcquisition.method file,\nwhich may not match the intended detection limits in edge cases.\nIn default acquisitions, excitation and detection are the same.\nBut, they may not be in some cases with selective excitation, custom excite waveforms, or in 2DMS applications.

    \n\n
    Parameters
    \n\n
      \n
    • d_parameters (dict):\na dictionary with the parameters from the apexAcquisition.method file
    • \n
    \n", "signature": "(self, d_parameters):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.get_excite_sweep_range", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.get_excite_sweep_range", "kind": "function", "doc": "

    Function to determine excitation sweep range from ExciteSweep file

    \n\n

    This looks at the first and last rows of the ExciteSweep file to determine the excitation frequency range.\nNote that this assumes the excitation sweep was linear and the first and last rows are the lowest and highest frequencies.\nThis is presumably always true, but again may be incorrect for edge cases with custom excitation waveforms.

    \n\n
    Parameters
    \n\n
      \n
    • filename (str):\nthe full path to the ExciteSweep file
    • \n
    \n", "signature": "(filename):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.locate_file", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.locate_file", "kind": "function", "doc": "

    Function to locate a file in a folder

    \n\n

    Find the full path of a specific file within the acquisition .d folder or subfolders

    \n\n
    Parameters
    \n\n
      \n
    • folder (str):\nthe full path to the folder
    • \n
    • type_file_name (str):\nthe name of the file to be located\nExpected options: ExciteSweep or apexAcquisition.method
    • \n
    \n\n
    Returns
    \n\n
      \n
    • str: the full path to the file
    • \n
    \n\n
    Notes
    \n\n

    adapted from code from SPIKE library, https://github.com/spike-project/spike

    \n", "signature": "(folder, type_file_name='apexAcquisition.method'):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.parse_parameters", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.parse_parameters", "kind": "function", "doc": "

    Function to parse the parameters from apexAcquisition.method file

    \n\n

    Open the given file and retrieve all parameters from apexAcquisition.method\n None is written when no value for value is found

    \n\n
    structure : <param name = \"AMS_ActiveExclusion\"><value>0</value></param>\n
    \n\n
    Parameters
    \n\n
      \n
    • parameters_filename (str):\nthe full path to the apexAcquisition.method file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • dict: a dictionary with the parameters and values
    • \n
    \n\n
    Notes
    \n\n

    Adapted from code from SPIKE library, https://github.com/spike-project/spike.\nCode may not handle all possible parameters, but should be sufficient for most common use cases

    \n", "signature": "(parameters_filename):", "funcdef": "def"}, {"fullname": "corems.transient.input.brukerSolarix.ReadBrukerSolarix.parse_sqlite", "modulename": "corems.transient.input.brukerSolarix", "qualname": "ReadBrukerSolarix.parse_sqlite", "kind": "function", "doc": "

    \n", "signature": "(self, sqlite_filename='chromatography-data.sqlite'):", "funcdef": "def"}, {"fullname": "corems.transient.input.midasDatFile", "modulename": "corems.transient.input.midasDatFile", "kind": "module", "doc": "

    \n"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile", "kind": "class", "doc": "

    [Not Implemented] Reads MIDAS .dat files (binary transient data)

    \n\n

    This class will read .dat binary format transient data, e.g. midas format from Predator or Thermo datastations\nThis code is not yet implemented and is not fully functional.

    \n\n
    Parameters
    \n\n
      \n
    • filename_path (str):\nThe path to the .dat file
    • \n
    \n\n
    Attributes
    \n\n
      \n
    • filename_path (str):\nThe path to the .dat file
    • \n
    • d_params (dict):\nA dictionary with the parameters of the .dat file
    • \n
    • transient_data (numpy.ndarray):\nThe transient data
    • \n
    \n\n
    Methods
    \n\n
      \n
    • read_file().\nReads the .dat file and returns the transient data and the parameters
    • \n
    • get_transient_data(data_file, d_params).\nReads the transient data from the .dat file
    • \n
    • parse_parameter(f).\nParses the parameters from the .dat file
    • \n
    \n\n
    Raises
    \n\n
      \n
    • NotImplementedError: This class is not yet implemented.
    • \n
    \n"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.__init__", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.__init__", "kind": "function", "doc": "

    \n", "signature": "(filename_path)"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.filename_path", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.filename_path", "kind": "variable", "doc": "

    \n"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.read_file", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.read_file", "kind": "function", "doc": "

    Reads the .dat file and returns the transient data and the parameters

    \n\n
    Returns
    \n\n
      \n
    • transient_data (numpy.ndarray):\nThe transient data
    • \n
    • d_params (dict):\nA dictionary with the parameters of the .dat file
    • \n
    \n", "signature": "(self):", "funcdef": "def"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.get_transient_data", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.get_transient_data", "kind": "function", "doc": "

    Reads the transient data from the .dat file

    \n\n
    Parameters
    \n\n
      \n
    • data_file (file):\nThe .dat file
    • \n
    • d_params (dict):\nA dictionary with the parameters of the .dat file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • myarray (numpy.ndarray):\nThe transient data
    • \n
    \n", "signature": "(self, data_file, d_params):", "funcdef": "def"}, {"fullname": "corems.transient.input.midasDatFile.ReadMidasDatFile.parse_parameter", "modulename": "corems.transient.input.midasDatFile", "qualname": "ReadMidasDatFile.parse_parameter", "kind": "function", "doc": "

    Parses the parameters from the .dat file

    \n\n
    Parameters
    \n\n
      \n
    • f (file):\nThe .dat file
    • \n
    \n\n
    Returns
    \n\n
      \n
    • output_parameters (dict):\nA dictionary with the parameters of the .dat file
    • \n
    \n", "signature": "(self, f):", "funcdef": "def"}]; // mirrored in build-search-index.js (part 1) // Also split on html tags. this is a cheap heuristic, but good enough.