diff --git a/SettingsCoreMS.json b/SettingsCoreMS.json deleted file mode 100644 index 6b2f3dde8..000000000 --- a/SettingsCoreMS.json +++ /dev/null @@ -1,446 +0,0 @@ -{ - "MolecularFormulaSearch": { - "use_isotopologue_filter": false, - "isotopologue_filter_threshold": 33.0, - "isotopologue_filter_atoms": [ - "Cl", "Br" - ], - "use_runtime_kendrick_filter": false, - "use_min_peaks_filter": true, - "min_peaks_per_class": 10, - "url_database": null, - "db_jobs": 3, - "db_chunk_size": 300, - "ion_charge": -1, - "min_hc_filter": 0.3, - "max_hc_filter": 3.0, - "min_oc_filter": 0.0, - "max_oc_filter": 1.2, - "min_op_filter": 2.0, - "use_pah_line_rule": true, - "min_dbe": 0, - "max_dbe": 50, - "mz_error_score_weight": 0.6, - "isotopologue_score_weight": 0.4, - "adduct_atoms_neg": [ - "Cl", "Br" - ], - "adduct_atoms_pos": [ - "Na", "K" - ], - "score_methods": [ - "S_P_lowest_error", "N_S_P_lowest_error", "lowest_error", "prob_score", "air_filter_error", "water_filter_error", "earth_filter_error" - ], - "score_method": "prob_score", "output_min_score": 0.1, - "output_score_method": "prob_score", "isRadical": true, - "isProtonated": true, - "isAdduct": false, - "usedAtoms": { - "C": [ - 1, - 100 - ], - "H": [ - 4, - 200 - ], - "O": [ - 2, - 18 - ] - }, - "ionization_type": "ESI", "min_ppm_error": -3, - "max_ppm_error": 5, - "min_abun_error": -30, - "max_abun_error": 70, - "mz_error_range": 1, - "error_method": "None", "mz_error_average": 0, - "used_atom_valences": { - "C": 4, - "13C": 4, - "N": 3, - "O": 2, - "S": 2, - "H": 1, - "F": 0, - "Cl": 0, - "Br": [ - 1, - 0 - ], - "I": [ - 1, - 0 - ], - "At": 1, - "Li": [ - 1, - 0 - ], - "Na": [ - 1, - 0 - ], - "K": [ - 1, - 0 - ], - "Rb": 1, - "Cs": 1, - "Fr": 1, - "B": [ - 4, - 3, - 2, - 1 - ], - "In": [ - 3, - 2, - 1 - ], - "Al": [ - 3, - 1, - 2 - ], - "P": 0, - "Ga": [ - 3, - 1, - 2 - ], - "Mg": [ - 2, - 1 - ], - "Be": [ - 2, - 1 - ], - "Ca": [ - 2, - 1 - ], - "Sr": [ - 2, - 1 - ], - "Ba": 2, - "Ra": 2, - "V": [ - 5, - 4, - 3, - 2, - 1 - ], - "Fe": [ - 3, - 2, - 4, - 5, - 6 - ], - "Si": [ - 4, - 3, - 2 - ], - "Sc": [ - 3, - 2, - 1 - ], - "Ti": [ - 4, - 3, - 2, - 1 - ], - "Cr": [ - 1, - 2, - 3, - 4, - 5, - 6 - ], - "Mn": [ - 1, - 2, - 3, - 4, - 5, - 6, - 7 - ], - "Co": [ - 1, - 2, - 3, - 4, - 5 - ], - "Ni": [ - 1, - 2, - 3, - 4 - ], - "Cu": [ - 2, - 1, - 3, - 4 - ], - "Zn": [ - 2, - 1 - ], - "Ge": [ - 4, - 3, - 2, - 1 - ], - "As": [ - 5, - 3, - 2, - 1 - ], - "Se": [ - 6, - 4, - 2, - 1 - ], - "Y": [ - 3, - 2, - 1 - ], - "Zr": [ - 4, - 3, - 2, - 1 - ], - "Nb": [ - 5, - 4, - 3, - 2, - 1 - ], - "Mo": [ - 6, - 5, - 4, - 3, - 2, - 1 - ], - "Tc": [ - 7, - 6, - 5, - 4, - 3, - 2, - 1 - ], - "Ru": [ - 8, - 7, - 6, - 5, - 4, - 3, - 2, - 1 - ], - "Rh": [ - 6, - 5, - 4, - 3, - 2, - 1 - ], - "Pd": [ - 4, - 2, - 1 - ], - "Ag": [ - 0, - 1, - 2, - 3, - 4 - ], - "Cd": [ - 2, - 1 - ], - "Sn": [ - 4, - 2 - ], - "Sb": [ - 5, - 3 - ], - "Te": [ - 6, - 5, - 4, - 2 - ], - "La": [ - 3, - 2 - ], - "Hf": [ - 4, - 3, - 2 - ], - "Ta": [ - 5, - 4, - 3, - 2 - ], - "W": [ - 6, - 5, - 4, - 3, - 2, - 1 - ], - "Re": [ - 4, - 7, - 6, - 5, - 3, - 2, - 1 - ], - "Os": [ - 4, - 8, - 7, - 6, - 5, - 3, - 2, - 1 - ], - "Ir": [ - 4, - 8, - 6, - 5, - 3, - 2, - 1 - ], - "Pt": [ - 4, - 6, - 5, - 3, - 2, - 1 - ], - "Au": [ - 3, - 5, - 2, - 1 - ], - "Hg": [ - 1, - 2, - 4 - ], - "Tl": [ - 3, - 1 - ], - "Pb": [ - 4, - 2 - ], - "Bi": [ - 3, - 1, - 5 - ], - "Po": [ - 2, - 4, - 6 - ], - "Ac": [ - 3, - 2 - ] - } - }, - "Transient": { - "implemented_apodization_function": [ - "Hamming", "Hanning", "Blackman", "Full-Sine", "Half-Sine", "Kaiser", "Half-Kaiser" - ], - "apodization_method": "Hanning", "number_of_truncations": 0, - "number_of_zero_fills": 1, - "next_power_of_two": false, - "kaiser_beta": 8.6 - }, - "MassSpectrum": { - "noise_threshold_method": "relative_abundance", "noise_threshold_methods_implemented": [ - "minima", "signal_noise", "relative_abundance", "absolute_abundance", "log" - ], - "noise_threshold_min_std": 3, - "s2n_threshold": 4.0, - "noise_threshold_min_relative_abundance": 1, - "noise_thresould_absolute_abundance": 1000000.0, - "noise_thresould_log_nsigma": 10, - "noise_thresould_log_nsigma_corr_factor": 0.463, - "noise_thresould_log_nsigma_bins": 500, - "noise_min_mz": 50.0, - "noise_max_mz": 1200.0, - "min_picking_mz": 50.0, - "max_picking_mz": 1200.0, - "calib_minimize_method": "Powell", "calib_pol_order": 2, - "max_calib_ppm_error": 10, - "min_calib_ppm_error": -10, - "calib_sn_threshold": 2.0, - "do_calibration": true - }, - "MassSpecPeak": { - "kendrick_base": { - "C": 1, - "H": 2 - }, - "kendrick_rounding_method": "floor", "implemented_kendrick_rounding_methods": [ - "floor", "ceil", "round" - ], - "peak_derivative_threshold": 0.0, - "peak_min_prominence_percent": 1, - "min_peak_datapoints": 5.0, - "peak_max_prominence_percent": 0.1, - "peak_height_max_percent": 10.0, - "legacy_resolving_power": true - }, - "DataInput": { - "header_translate": { - "m/z": "m/z", "mOz": "m/z", "Mass": "m/z", "Resolving Power": "Resolving Power", "Res.": "Resolving Power", "Intensity": "Peak Height", "I": "Peak Height", "Abundance": "Peak Height", "Signal/Noise": "S/N", "S/N": "S/N", "abs_abu": "Peak Height", "sn": "S/N", "resolution": "Resolving Power" - } - } -} \ No newline at end of file diff --git a/SettingsCoreMS.toml b/SettingsCoreMS.toml deleted file mode 100644 index 6db38adad..000000000 --- a/SettingsCoreMS.toml +++ /dev/null @@ -1,172 +0,0 @@ -[MolecularFormulaSearch] -use_isotopologue_filter = false -isotopologue_filter_threshold = 33.0 -isotopologue_filter_atoms = [ "Cl", "Br",] -use_runtime_kendrick_filter = false -use_min_peaks_filter = true -min_peaks_per_class = 10 -db_jobs = 3 -db_chunk_size = 300 -ion_charge = -1 -min_hc_filter = 0.3 -max_hc_filter = 3.0 -min_oc_filter = 0.0 -max_oc_filter = 1.2 -min_op_filter = 2.0 -use_pah_line_rule = true -min_dbe = 0 -max_dbe = 50 -mz_error_score_weight = 0.6 -isotopologue_score_weight = 0.4 -adduct_atoms_neg = [ "Cl", "Br",] -adduct_atoms_pos = [ "Na", "K",] -score_methods = [ "S_P_lowest_error", "N_S_P_lowest_error", "lowest_error", "prob_score", "air_filter_error", "water_filter_error", "earth_filter_error",] -score_method = "prob_score" -output_min_score = 0.1 -output_score_method = "prob_score" -isRadical = true -isProtonated = true -isAdduct = false -ionization_type = "ESI" -min_ppm_error = -3 -max_ppm_error = 5 -min_abun_error = -30 -max_abun_error = 70 -mz_error_range = 1 -error_method = "None" -mz_error_average = 0 - -[Transient] -implemented_apodization_function = [ "Hamming", "Hanning", "Blackman", "Full-Sine", "Half-Sine", "Kaiser", "Half-Kaiser",] -apodization_method = "Hanning" -number_of_truncations = 0 -number_of_zero_fills = 1 -next_power_of_two = false -kaiser_beta = 8.6 - -[MassSpectrum] -noise_threshold_method = "relative_abundance" -noise_threshold_methods_implemented = [ "minima", "signal_noise", "relative_abundance", "absolute_abundance", "log",] -noise_threshold_min_std = 3 -s2n_threshold = 4.0 -noise_threshold_min_relative_abundance = 1 -noise_thresould_absolute_abundance = 1000000.0 -noise_thresould_log_nsigma = 10 -noise_thresould_log_nsigma_corr_factor = 0.463 -noise_thresould_log_nsigma_bins = 500 -noise_min_mz = 50.0 -noise_max_mz = 1200.0 -min_picking_mz = 50.0 -max_picking_mz = 1200.0 -calib_minimize_method = "Powell" -calib_pol_order = 2 -max_calib_ppm_error = 10 -min_calib_ppm_error = -10 -calib_sn_threshold = 2.0 -do_calibration = true - -[MassSpecPeak] -kendrick_rounding_method = "floor" -implemented_kendrick_rounding_methods = [ "floor", "ceil", "round",] -peak_derivative_threshold = 0.0 -peak_min_prominence_percent = 1 -min_peak_datapoints = 5.0 -peak_max_prominence_percent = 0.1 -peak_height_max_percent = 10.0 -legacy_resolving_power = true - -[MolecularFormulaSearch.usedAtoms] -C = [ 1, 100,] -H = [ 4, 200,] -O = [ 2, 18,] - -[MolecularFormulaSearch.used_atom_valences] -C = 4 -13C = 4 -N = 3 -O = 2 -S = 2 -H = 1 -F = 0 -Cl = 0 -Br = [ 1, 0,] -I = [ 1, 0,] -At = 1 -Li = [ 1, 0,] -Na = [ 1, 0,] -K = [ 1, 0,] -Rb = 1 -Cs = 1 -Fr = 1 -B = [ 4, 3, 2, 1,] -In = [ 3, 2, 1,] -Al = [ 3, 1, 2,] -P = 0 -Ga = [ 3, 1, 2,] -Mg = [ 2, 1,] -Be = [ 2, 1,] -Ca = [ 2, 1,] -Sr = [ 2, 1,] -Ba = 2 -Ra = 2 -V = [ 5, 4, 3, 2, 1,] -Fe = [ 3, 2, 4, 5, 6,] -Si = [ 4, 3, 2,] -Sc = [ 3, 2, 1,] -Ti = [ 4, 3, 2, 1,] -Cr = [ 1, 2, 3, 4, 5, 6,] -Mn = [ 1, 2, 3, 4, 5, 6, 7,] -Co = [ 1, 2, 3, 4, 5,] -Ni = [ 1, 2, 3, 4,] -Cu = [ 2, 1, 3, 4,] -Zn = [ 2, 1,] -Ge = [ 4, 3, 2, 1,] -As = [ 5, 3, 2, 1,] -Se = [ 6, 4, 2, 1,] -Y = [ 3, 2, 1,] -Zr = [ 4, 3, 2, 1,] -Nb = [ 5, 4, 3, 2, 1,] -Mo = [ 6, 5, 4, 3, 2, 1,] -Tc = [ 7, 6, 5, 4, 3, 2, 1,] -Ru = [ 8, 7, 6, 5, 4, 3, 2, 1,] -Rh = [ 6, 5, 4, 3, 2, 1,] -Pd = [ 4, 2, 1,] -Ag = [ 0, 1, 2, 3, 4,] -Cd = [ 2, 1,] -Sn = [ 4, 2,] -Sb = [ 5, 3,] -Te = [ 6, 5, 4, 2,] -La = [ 3, 2,] -Hf = [ 4, 3, 2,] -Ta = [ 5, 4, 3, 2,] -W = [ 6, 5, 4, 3, 2, 1,] -Re = [ 4, 7, 6, 5, 3, 2, 1,] -Os = [ 4, 8, 7, 6, 5, 3, 2, 1,] -Ir = [ 4, 8, 6, 5, 3, 2, 1,] -Pt = [ 4, 6, 5, 3, 2, 1,] -Au = [ 3, 5, 2, 1,] -Hg = [ 1, 2, 4,] -Tl = [ 3, 1,] -Pb = [ 4, 2,] -Bi = [ 3, 1, 5,] -Po = [ 2, 4, 6,] -Ac = [ 3, 2,] - -[MassSpecPeak.kendrick_base] -C = 1 -H = 2 - -[DataInput.header_translate] -"m/z" = "m/z" -mOz = "m/z" -Mass = "m/z" -"Resolving Power" = "Resolving Power" -"Res." = "Resolving Power" -Intensity = "Peak Height" -I = "Peak Height" -Abundance = "Peak Height" -"Signal/Noise" = "S/N" -"S/N" = "S/N" -abs_abu = "Peak Height" -sn = "S/N" -resolution = "Resolving Power" diff --git a/conftest.py b/conftest.py new file mode 100644 index 000000000..570b2f105 --- /dev/null +++ b/conftest.py @@ -0,0 +1,36 @@ +import pytest +from pathlib import Path + +from corems.transient.input.brukerSolarix import ReadBrukerSolarix +from corems.encapsulation.factory.parameters import MSParameters + +@pytest.fixture +def mass_spectrum_ftms(bruker_transient): + """Creates a mass spectrum object to be used in the tests""" + # Instantiate the mass spectrum object + mass_spectrum = bruker_transient.get_mass_spectrum( + plot_result=False, auto_process=False, keep_profile=True + ) + mass_spectrum.parameters = MSParameters(use_defaults=True) + # Process the mass spectrum + mass_spectrum.process_mass_spec() + + return mass_spectrum + +@pytest.fixture +def ref_file_location(): + """Returns the location of the reference file for calibration for the tests""" + return Path.cwd() / "tests/tests_data/ftms/SRFA.ref" + +@pytest.fixture +def ftms_file_location(): + """Returns the location of the FTMS file for the tests""" + return Path.cwd() / "tests/tests_data/ftms/ESI_NEG_SRFA.d/" + +@pytest.fixture +def bruker_transient(ftms_file_location): + """Returns the transient object for the FTMS file""" + bruker_reader = ReadBrukerSolarix(ftms_file_location) + bruker_transient = bruker_reader.get_transient() + + return bruker_transient \ No newline at end of file diff --git a/pytest.ini b/pytest.ini index 1dc99d85c..6f2b921da 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] +# To run tests with coverage, uncomment the following line addopts = -ra -v --cov=corems --cov-config=pytest.ini --cov-report html --cov-report term -p no:warnings norecursedirs = win_only testpaths = tests diff --git a/requirements-dev.txt b/requirements-dev.txt index 83eea4201..d69a7ac19 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,6 +1,6 @@ pylint -pytest~=7.2.1 -pytest-cov +pytest~=8.3.2 +pytest-cov~=5.0.0 pyprof2calltree memory_profiler twine diff --git a/tests/s3_test.py b/tests/archive_tests/s3_test.py similarity index 100% rename from tests/s3_test.py rename to tests/archive_tests/s3_test.py diff --git a/tests/win_only/test_ImportMassSpectraCompassXtract.py b/tests/archive_tests/test_ImportMassSpectraCompassXtract.py similarity index 99% rename from tests/win_only/test_ImportMassSpectraCompassXtract.py rename to tests/archive_tests/test_ImportMassSpectraCompassXtract.py index db9b49e19..b6bf7d59b 100644 --- a/tests/win_only/test_ImportMassSpectraCompassXtract.py +++ b/tests/archive_tests/test_ImportMassSpectraCompassXtract.py @@ -1,3 +1,4 @@ +''' import sys sys.path.append(".") @@ -46,3 +47,4 @@ mass_spec = lcms.get_mass_spec_by_scan_number(1) mass_spec.plot_mz_domain_profile() mass_spec.plot_profile_and_noise_threshold() +''' diff --git a/tests/chemstation.py b/tests/chemstation.py deleted file mode 100644 index 6636f794b..000000000 --- a/tests/chemstation.py +++ /dev/null @@ -1,20 +0,0 @@ -import sys -sys.path.append("./ext_lib") -sys.path.append(".") -from corems.encapsulation.factory.parameters import default_parameters -from corems.mass_spectra.factory.LC_Class import LCMSBase -from corems.mass_spectrum.factory.MassSpectrumClasses import MassSpecProfile, MassSpecCentroid -from corems.encapsulation.constant import Labels - -import clr -clr.AddReference("ChemstationMSFileReader") -import ChemstationMSFileReader - -file_loc = '/tests/tests_data/DATA.MS' -clsChemstation = ChemstationMSFileReader.clsChemstationDataMSFileReader(file_loc) - -header = clsChemstation.ReadHeaders(file_loc) - -clsChemstation.GetSpectrum() - -#System.Int32,ChemstationMSFileReader.clsSpectralRecord@,System.Int32@) \ No newline at end of file diff --git a/tests/test.py b/tests/test.py deleted file mode 100644 index ed63db8d8..000000000 --- a/tests/test.py +++ /dev/null @@ -1,65 +0,0 @@ -from corems.transient.input.brukerSolarix import ReadBrukerSolarix -from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas -from corems.mass_spectrum.output.export import HighResMassSpecExport -from matplotlib import pyplot - -file_path= 'tests/tests_data/ftms/ESI_NEG_SRFA.d' - -#Bruker Solarix class reader -bruker_reader = ReadBrukerSolarix(file_path) - -#access the transient object -bruker_transient_obj = bruker_reader.get_transient() - -#calculates the transient duration time -T = bruker_transient_obj.transient_time - -#access the mass spectrum object -mass_spectrum_obj = bruker_transient_obj.get_mass_spectrum(plot_result=False, auto_process=True) - -# - search monoisotopic molecular formulas for all mass spectral peaks -# - calculate fine isotopic structure based on monoisotopic molecular formulas found and current dynamic range -# - search molecular formulas of correspondent calculated isotopologues, -# - settings are stored at SearchConfig.json and can be changed directly on the file or inside the framework class - -SearchMolecularFormulas(mass_spectrum_obj, first_hit=False).run_worker_mass_spectrum() - -# iterate over mass spectral peaks objs -for mspeak in mass_spectrum_obj.sort_by_abundance(): - - # returns true if there is at least one molecular formula associated - # with the mass spectral peak - # same as mspeak.is_assigned -- > bool - if mspeak: - - # get the molecular formula with the highest mass accuracy - molecular_formula = mspeak.molecular_formula_lowest_error - - # plot mz and peak height, use mass_spectrum_obj.mz_exp to access all mz - # and mass_spectrum_obj.mz_exp_profile to access mz with all available datapoints - pyplot.plot(mspeak.mz_exp, mspeak.abundance, 'o', c='g') - - # iterate over all molecular formulae associated with the ms peaks obj - for molecular_formula in mspeak: - - #check if the molecular formula is a isotopologue - if molecular_formula.is_isotopologue: - - #access the molecular formula text representation - print (molecular_formula.string) - - #get 13C atoms count - print (molecular_formula['13C']) - else: - #get mz and peak height - print(mspeak.mz_exp,mspeak.abundance) - - -#exporting data -mass_spectrum_obj.to_csv("filename") - -mass_spectrum_obj.to_hdf("filename") -# save pandas Datarame to pickle -mass_spectrum_obj.to_pandas("filename") -# get pandas Dataframe -df = mass_spectrum_obj.to_dataframe() \ No newline at end of file diff --git a/tests/test_aaaaa_thermo.py-disable b/tests/test_aaaaa_thermo.py-disable deleted file mode 100644 index accbf3842..000000000 --- a/tests/test_aaaaa_thermo.py-disable +++ /dev/null @@ -1,50 +0,0 @@ -from pathlib import Path -from corems.encapsulation.factory.parameters import MSParameters -from corems.mass_spectra.input import rawFileReader -import os - - -def test_AAAA_import_thermo(): - print(Path.cwd()) - file_location = Path.cwd() / "tests/tests_data/ftms/" / "NEG_ESI_LIGNIN.raw" - get_stats = os.stat(file_location) - print(get_stats) - - # Open the binary file - with open(file_location, "rb") as f: - # Read the first 256 bytes of data - data = f.read(256) - print(data[:16].hex()) - first_block = bytes.fromhex("01a1460069006e006e006900670061006e0000000000000000000000000000000000080042000000c0b1e2a225d0d401530059005300540045004d0000000000") - # "01a1460069006e006e00690067006100" - # Check if the data matches the expected value - assert data[:64] == first_block - # Print the data - - - # change parameters here - MSParameters.mass_spectrum.noise_threshold_method = "relative_abundance" - MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 1 - - # creates the parser obj - parser = rawFileReader.ImportMassSpectraThermoMSFileReader(file_location) - - # sums all the mass spectra - - parser.chromatogram_settings.scans = (-1, -1) - mass_spectrum = parser.get_average_mass_spectrum(spectrum_mode="profile") - - # sums scans in selected range - parser.chromatogram_settings.scans = (1, 1) - mass_spectrum = parser.get_average_mass_spectrum(spectrum_mode="profile") - - parser.chromatogram_settings.scans = [1] - - # sums scans in selected range - mass_spectrum = parser.get_average_mass_spectrum(spectrum_mode="profile") - - mass_spectrum.plot_mz_domain_profile() - mass_spectrum.plot_profile_and_noise_threshold() - - # print("polarity", mass_spectrum.polarity) - # plt.savefig("test.png") diff --git a/tests/test_calibration.py b/tests/test_calibration.py index 61680b765..4e9f63210 100644 --- a/tests/test_calibration.py +++ b/tests/test_calibration.py @@ -1,220 +1,185 @@ -__author__ = "Yuri E. Corilo" -__date__ = "Aug 26, 2019" - - -import sys, time, pytest, matplotlib +import sys from pathlib import Path -sys.path.append(".") +import pytest -import numpy as np -from matplotlib import pyplot +sys.path.append(".") -from corems.encapsulation.factory.parameters import MSParameters -from corems.mass_spectrum.calc.CalibrationCalc import FreqDomain_Calibration +from corems.mass_spectrum.calc.AutoRecalibration import HighResRecalibration from corems.mass_spectrum.calc.Calibration import MzDomainCalibration -from corems.molecular_id.search.findOxygenPeaks import FindOxygenPeaks -from corems.transient.input.brukerSolarix import ReadBrukerSolarix -from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas +from corems.mass_spectrum.calc.CalibrationCalc import FreqDomain_Calibration +from corems.mass_spectrum.input.massList import ReadCoremsMasslist from corems.molecular_id.calc.ClusterFilter import ClusteringFilter -from corems.mass_spectrum.input.massList import ReadCoremsMasslist, ReadMassList -from corems.mass_spectrum.calc.AutoRecalibration import HighResRecalibration +from corems.molecular_id.search.findOxygenPeaks import FindOxygenPeaks +from corems.encapsulation.factory.parameters import MSParameters, reset_ms_parameters -def create_mass_spectrum(): - # Creates a profile mode mass spectrum object - '''parse transient data from Bruker into a mass spectrum class object - - Parameters - ---------- - file_location: str - The full path of the *.d data folder - - Returns - ------- - MassSpecfromFreq() class - (See MassSpecfromFreq class for more details) - ''' - - file_location = Path.cwd() / "tests/tests_data/ftms/ESI_NEG_SRFA.d/" - bruker_reader = ReadBrukerSolarix(file_location) - bruker_transient = bruker_reader.get_transient() - - MSParameters.mass_spectrum.noise_threshold_method = 'log' - MSParameters.mass_spectrum.noise_threshold_log_nsigma = 12 - MSParameters.ms_peak.peak_min_prominence_percent = 0.01 - - mass_spectrum = bruker_transient.get_mass_spectrum(plot_result=False, - auto_process=True, - keep_profile=True) - - - return mass_spectrum -def create_centroid_mass_spectrum(): - # Creates a centroid mass spectrum object +@pytest.fixture +def mass_spectrum_centroid(): + file_location = Path.cwd() / "tests/tests_data/ftms/ESI_NEG_SRFA_UnCal_Unassign.csv" - - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' + MSParameters.mass_spectrum.noise_threshold_method = "relative_abundance" MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 0.1 - - #load any type of mass list file, change the delimeter to read another type of file, i.e : "," for csv, "\t" for tabulated mass list, etc - mass_list_reader = ReadCoremsMasslist(file_location, analyzer='ICR', instrument_label='12T') + + # load any type of mass list file, change the delimeter to read another type of file, i.e : "," for csv, "\t" for tabulated mass list, etc + mass_list_reader = ReadCoremsMasslist( + file_location, analyzer="ICR", instrument_label="12T" + ) mass_spectrum = mass_list_reader.get_mass_spectrum(loadSettings=False) + # Return the MSParameters to the default values + reset_ms_parameters() + return mass_spectrum - -def test_mz_domain_calibration(): +def test_mz_domain_calibration(mass_spectrum_ftms, ref_file_location): + print("test_mz_domain_calibration") + mass_spectrum_ftms.settings.min_calib_ppm_error = -10 + mass_spectrum_ftms.settings.max_calib_ppm_error = 10 + mass_spectrum_ftms.filter_by_noise_threshold() - MSParameters.mass_spectrum.min_calib_ppm_error = -10 - MSParameters.mass_spectrum.max_calib_ppm_error = 10 + # Check that mass_spectrum_ftms has not been calibrated + assert set(mass_spectrum_ftms.mz_cal) == {None} - ref_file_location = Path.cwd() / "tests/tests_data/ftms/SRFA.ref" + MzDomainCalibration(mass_spectrum_ftms, ref_file_location).run() - mass_spectrum = create_mass_spectrum() + # Check that the calibration was successful + assert mass_spectrum_ftms.calibration_RMS < 2 - mass_spectrum.filter_by_noise_threshold() - MzDomainCalibration(mass_spectrum, ref_file_location).run() +def test_autorecalibration(mass_spectrum_ftms, ref_file_location): + mass_spectrum_ftms.filter_by_noise_threshold() -def test_mz_domain_calibration_centroid(): + # Check that mass_spectrum_ftms has not been calibrated + assert set(mass_spectrum_ftms.mz_cal) == {None} - MSParameters.mass_spectrum.min_calib_ppm_error = -10 - MSParameters.mass_spectrum.max_calib_ppm_error = 10 - MSParameters.mass_spectrum.calib_pol_order = 1 + auto_error_bounds = HighResRecalibration( + mass_spectrum_ftms, plot=False, docker=False + ).determine_error_boundaries() - ref_file_location = Path.cwd() / "tests/tests_data/ftms/SRFA.ref" + mass_spectrum_ftms.settings.min_calib_ppm_error = auto_error_bounds[-1][0] + mass_spectrum_ftms.settings.max_calib_ppm_error = auto_error_bounds[-1][1] - mass_spectrum = create_centroid_mass_spectrum() + MzDomainCalibration(mass_spectrum_ftms, ref_file_location).run() - mass_spectrum.filter_by_noise_threshold() + # Check that the calibration was successful + assert mass_spectrum_ftms.calibration_RMS < 2 - MzDomainCalibration(mass_spectrum, ref_file_location).run() - # check there is an output - assert mass_spectrum.calibration_order == 1 - assert(mass_spectrum.calibration_points == 25) - assert(round(mass_spectrum.calibration_RMS, 4) == round(0.8690388563830891, 4)) +def test_segmentedmzcalibration(mass_spectrum_ftms, ref_file_location): + # Tests profile mode recalibration + mass_spectrum_ftms.filter_by_noise_threshold() + mass_spectrum_ftms.parameters.mass_spectrum.min_calib_ppm_error = -5 + mass_spectrum_ftms.parameters.mass_spectrum.max_calib_ppm_error = 5 -def test_autorecalibration(): + # Check that mass_spectrum_ftms has not been calibrated + assert set(mass_spectrum_ftms.mz_cal) == {None} - mass_spectrum = create_mass_spectrum() + MzDomainCalibration(mass_spectrum_ftms, ref_file_location, mzsegment=(0, 300)).run() - mass_spectrum.filter_by_noise_threshold() + # Check that the calibration was successful + assert mass_spectrum_ftms.calibration_RMS < 2 - auto_error_bounds = HighResRecalibration(mass_spectrum,plot=False,docker=False).determine_error_boundaries() - MSParameters.mass_spectrum.min_calib_ppm_error = auto_error_bounds[-1][0] - MSParameters.mass_spectrum.max_calib_ppm_error = auto_error_bounds[-1][1] +def test_old_calibration(mass_spectrum_ftms): + usedatoms = {"C": (1, 100), "H": (4, 200), "O": (1, 10)} - ref_file_location = Path.cwd() / "tests/tests_data/ftms/SRFA.ref" + mass_spectrum_ftms.molecular_search_settings.url_database = "" + mass_spectrum_ftms.molecular_search_settings.error_method = "None" + mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -5 + mass_spectrum_ftms.molecular_search_settings.max_ppm_error = 5 + mass_spectrum_ftms.molecular_search_settings.mz_error_range = 1 + mass_spectrum_ftms.molecular_search_settings.isProtonated = True + mass_spectrum_ftms.molecular_search_settings.isRadical = True + mass_spectrum_ftms.molecular_search_settings.usedAtoms = usedatoms - MzDomainCalibration(mass_spectrum, ref_file_location).run() + # Check that mass_spectrum_ftms has not been calibrated by checking that mz_cal are all None + assert set(mass_spectrum_ftms.mz_cal) == {None} + find_formula_thread = FindOxygenPeaks(mass_spectrum_ftms) + find_formula_thread.run() -def test_autorecalibration_centroid(): + mspeaks_results = find_formula_thread.get_list_found_peaks() - mass_spectrum = create_centroid_mass_spectrum() + calibrate = FreqDomain_Calibration(mass_spectrum_ftms, mspeaks_results) + calibrate.linear() + calibrate.step_fit() + calibrate.quadratic(iteration=True) + calibrate.ledford_calibration() - mass_spectrum.filter_by_noise_threshold() + # Check that the calibration was successful + assert set(mass_spectrum_ftms.mz_cal) != {None} - HighResRecalibration(mass_spectrum,plot=False,docker=False).determine_error_boundaries() + mass_spectrum_ftms.molecular_search_settings.error_method = "symmetrical" + mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -3 + mass_spectrum_ftms.molecular_search_settings.max_ppm_error = 3 + mass_spectrum_ftms.molecular_search_settings.mz_error_range = 1 + mass_spectrum_ftms.molecular_search_settings.mz_error_average = 0 + mass_spectrum_ftms.molecular_search_settings.min_abun_error = -30 # percentage + mass_spectrum_ftms.molecular_search_settings.max_abun_error = 70 # percentage + mass_spectrum_ftms.molecular_search_settings.isProtonated = True + mass_spectrum_ftms.molecular_search_settings.isRadical = True + mass_spectrum_ftms.molecular_search_settings.usedAtoms = { + "C": (1, 100), + "H": (4, 200), + "O": (0, 20), + "N": (0, 1), + "S": (0, 0), + "P": (0, 0), + } -def test_segmentedmzcalibration(): - # Tests profile mode recalibration - mass_spectrum = create_mass_spectrum() + ClusteringFilter().filter_kendrick(mass_spectrum_ftms) - mass_spectrum.filter_by_noise_threshold() - ref_file_location = Path.cwd() / "tests/tests_data/ftms/SRFA.ref" +def test_mz_domain_calibration_centroid(mass_spectrum_centroid, ref_file_location): + mass_spectrum_centroid.settings.min_calib_ppm_error = -10 + mass_spectrum_centroid.settings.max_calib_ppm_error = 10 + mass_spectrum_centroid.calib_pol_order = 1 - MzDomainCalibration(mass_spectrum, ref_file_location, mzsegment=(0,300)).run() + mass_spectrum_centroid.filter_by_noise_threshold() + # Check that mass_spectrum_centroid has not been calibrated + assert set(mass_spectrum_centroid.mz_cal) == {None} -def test_segmentedmzcalibration_centroid(): - # Tests centroided mode recalibration - mass_spectrum = create_centroid_mass_spectrum() + MzDomainCalibration(mass_spectrum_centroid, ref_file_location).run() - mass_spectrum.filter_by_noise_threshold() + # check there is an output + assert mass_spectrum_centroid.calibration_points == 25 + assert round(mass_spectrum_centroid.calibration_RMS, 2) == round(0.591, 2) - ref_file_location = Path.cwd() / "tests/tests_data/ftms/SRFA.ref" - MzDomainCalibration(mass_spectrum, ref_file_location, mzsegment=(0,300)).run() +def test_auto_calibration_centroid(mass_spectrum_centroid, ref_file_location): + mass_spectrum_centroid.filter_by_noise_threshold() + # Check that mass_spectrum_centroid has not been calibrated + assert set(mass_spectrum_centroid.mz_cal) == {None} -def test_old_calibration(): - - ''' Mass calibration test module: - - creates a mass spectrum object - - find oxygen most abundant peaks separated by 14Da - - calibrate on frequency domain using ledford equation - - filter data based on kendrick mass with CH2O base - - search for all molecular formula candidates + auto_error_bounds = HighResRecalibration( + mass_spectrum_centroid, plot=False, docker=False + ).determine_error_boundaries() - Returns - ------- - Nothing - - Store the results inside the mass spectrum class - (See Docs for the structural details) - ''' - usedatoms = {'C': (1,100) , 'H': (4,200), 'O': (1,10)} + mass_spectrum_centroid.settings.min_calib_ppm_error = auto_error_bounds[-1][0] + mass_spectrum_centroid.settings.max_calib_ppm_error = auto_error_bounds[-1][1] - MSParameters.molecular_search.error_method = 'None' - MSParameters.molecular_search.min_ppm_error = -5 - MSParameters.molecular_search.max_ppm_error = 5 - MSParameters.molecular_search.mz_error_range = 1 - MSParameters.molecular_search.isProtonated = True - MSParameters.molecular_search.isRadical= True - MSParameters.molecular_search.usedAtoms = usedatoms - mass_spectrum = create_mass_spectrum() - mass_spectrum.parameters.molecular_search.url_database = '' + MzDomainCalibration(mass_spectrum_centroid, ref_file_location).run() - find_formula_thread = FindOxygenPeaks(mass_spectrum) - find_formula_thread.run() - #find_formula_thread.join() - - mspeaks_results = find_formula_thread.get_list_found_peaks() - - calibrate = FreqDomain_Calibration(mass_spectrum, mspeaks_results) - calibrate.linear() - calibrate.step_fit() - calibrate.quadratic(iteration=True) - calibrate.ledford_calibration() - - mass_spectrum.parameters.molecular_search.error_method = 'symmetrical' - mass_spectrum.parameters.molecular_search.min_ppm_error = -3 - mass_spectrum.parameters.molecular_search.max_ppm_error = 3 - mass_spectrum.parameters.molecular_search.mz_error_range = 1 - mass_spectrum.parameters.molecular_search.mz_error_average = 0 - mass_spectrum.parameters.molecular_search.min_abun_error = -30 # percentage - mass_spectrum.parameters.molecular_search.max_abun_error = 70 # percentage - mass_spectrum.parameters.molecular_search.isProtonated = True - mass_spectrum.parameters.molecular_search.isRadical= True - - mass_spectrum.parameters.molecular_search.usedAtoms = {'C': (1, 100), - 'H': (4, 200), - 'O': (0, 20), - 'N': (0, 1), - 'S': (0, 0), - 'P': (0, 0), - } - - #print(len(mass_spectrum)) - ClusteringFilter().filter_kendrick(mass_spectrum) - #print(len(mass_spectrum)) - - SearchMolecularFormulas(mass_spectrum).run_worker_mass_spectrum() - ClusteringFilter().remove_assignment_by_mass_error(mass_spectrum) - -def test_import_ref_list(): - pass - -if __name__ == "__main__": - - test_old_calibration() - #test_mz_domain_calibration() - #test_autorecalibration() - \ No newline at end of file + # Check that the calibration was successful + assert mass_spectrum_centroid.calibration_RMS < 0.6 + + +def test_segmentedmzcalibration_centroid(mass_spectrum_centroid, ref_file_location): + mass_spectrum_centroid.filter_by_noise_threshold() + mass_spectrum_centroid.settings.min_calib_ppm_error = -10 + mass_spectrum_centroid.settings.max_calib_ppm_error = 10 + + # Check that mass_spectrum_centroid has not been calibrated + assert set(mass_spectrum_centroid.mz_cal) == {None} + + MzDomainCalibration( + mass_spectrum_centroid, ref_file_location, mzsegment=(0, 300) + ).run() + + # Check that the calibration was successful + assert mass_spectrum_centroid.calibration_RMS < 0.6 diff --git a/tests/test_classification.py b/tests/test_classification.py index d56d9aaa2..d96178774 100644 --- a/tests/test_classification.py +++ b/tests/test_classification.py @@ -1,56 +1,40 @@ import sys sys.path.append('.') -from pathlib import Path -import pytest - -from corems.encapsulation.factory.parameters import MSParameters from corems.molecular_id.factory.classification import HeteroatomsClassification, Labels from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas -from test_molecularFormulaSearch import create_mass_spectrum - -def test_heteroatoms_classification(): - MSParameters.molecular_search.error_method = 'None' - MSParameters.molecular_search.min_ppm_error = -10 - MSParameters.molecular_search.max_ppm_error = 10 - MSParameters.molecular_search.mz_error_range = 1 - MSParameters.molecular_search.isProtonated = True - MSParameters.molecular_search.isRadical= False - MSParameters.molecular_search.isAdduct= False - - MSParameters.molecular_search.usedAtoms['C'] = (1, 100) - MSParameters.molecular_search.usedAtoms['H'] = (4, 200) - MSParameters.molecular_search.usedAtoms['O'] = (1, 18) - #MSParameters.molecular_search.usedAtoms = usedatoms +def test_heteroatoms_classification(mass_spectrum_ftms): + mass_spectrum_ftms.molecular_search_settings.url_database = '' + mass_spectrum_ftms.molecular_search_settings.error_method = 'None' + mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -10 + mass_spectrum_ftms.molecular_search_settings.max_ppm_error = 10 + mass_spectrum_ftms.molecular_search_settings.mz_error_range = 1 + mass_spectrum_ftms.molecular_search_settings.isProtonated = True + mass_spectrum_ftms.molecular_search_settings.isRadical = False + mass_spectrum_ftms.molecular_search_settings.isAdduct = False + usedAtoms = {'C': (1, 100), 'H': (4, 200), 'O': (1, 18)} + mass_spectrum_ftms.molecular_search_settings.usedAtoms = usedAtoms + + # Check that there are not assigned peaks + assert mass_spectrum_ftms.percentile_assigned()[2] == 0 - mass_spec_obj = create_mass_spectrum() - mass_spec_obj.parameters.molecular_search.url_database = '' + SearchMolecularFormulas(mass_spectrum_ftms).run_worker_mass_spectrum() - assignOx = SearchMolecularFormulas(mass_spec_obj).run_worker_mass_spectrum() - - #test classification - mass_spec_obj.percentile_assigned() + # Check if search was successful + assert mass_spectrum_ftms.percentile_assigned()[2] > 0 - mass_spectrum_by_classes = HeteroatomsClassification(mass_spec_obj) + mass_spectrum_by_classes = HeteroatomsClassification(mass_spectrum_ftms) + # Check that the plot is created mass_spectrum_by_classes.plot_ms_assigned_unassigned() - - mass_spectrum_by_classes.atoms_ratio_all("H", "C") - - mass_spectrum_by_classes.dbe_all() - mass_spectrum_by_classes.carbon_number_all() - - mass_spectrum_by_classes.abundance_assigned() - - mass_spectrum_by_classes.mz_exp_assigned() - - mass_spectrum_by_classes.abundance_count_percentile(Labels.unassigned) - - mass_spectrum_by_classes.peaks_count_percentile(Labels.unassigned) - -if __name__ == "__main__": - - test_heteroatoms_classification() \ No newline at end of file + # Check that ratios, DBE, carbon number, abundance and mz_exp are calculated + + assert len(mass_spectrum_by_classes.atoms_ratio_all("H", "C")) > 0 + assert len(mass_spectrum_by_classes.dbe_all()) > 0 + assert len(mass_spectrum_by_classes.abundance_assigned()) > 0 + assert len(mass_spectrum_by_classes.mz_exp_assigned()) > 0 + assert mass_spectrum_by_classes.abundance_count_percentile(Labels.unassigned) > 0 + assert mass_spectrum_by_classes.peaks_count_percentile(Labels.unassigned) > 0 \ No newline at end of file diff --git a/tests/test_input.py b/tests/test_input.py index b87b7574e..61221ed02 100644 --- a/tests/test_input.py +++ b/tests/test_input.py @@ -7,83 +7,77 @@ sys.path.append(".") from pathlib import Path - -import pytest -from matplotlib import pyplot - - -from corems.mass_spectra.input.boosterHDF5 import ReadHDF_BoosterMassSpectra +from corems.encapsulation.constant import Labels +from corems.encapsulation.factory.parameters import MSParameters, reset_ms_parameters +from corems.mass_spectra.input import rawFileReader from corems.mass_spectra.input.andiNetCDF import ReadAndiNetCDF -from corems.mass_spectra.input.brukerSolarix import ReadBruker_SolarixTransientMassSpectra -from corems.mass_spectra.input.coremsHDF5 import ReadCoreMSHDF_MassSpectra +from corems.mass_spectra.input.boosterHDF5 import ReadHDF_BoosterMassSpectra +from corems.mass_spectra.input.brukerSolarix import ( + ReadBruker_SolarixTransientMassSpectra, +) from corems.mass_spectra.input.massList import ReadCoremsMassSpectraText from corems.mass_spectrum.input.boosterHDF5 import ReadHDF_BoosterMassSpectrum from corems.mass_spectrum.input.coremsHDF5 import ReadCoreMSHDF_MassSpectrum from corems.mass_spectrum.input.massList import ReadCoremsMasslist, ReadMassList -from corems.transient.input.brukerSolarix import ReadBrukerSolarix -from corems.encapsulation.factory.parameters import MSParameters -from corems.mass_spectra.input import rawFileReader - +from corems.mass_spectrum.input.numpyArray import ms_from_array_profile def test_andi_netcdf_gcms(): - - file_path = Path.cwd() / "tests/tests_data/gcms/" / "GCMS_FAMES_01_GCMS-01_20191023.cdf" + file_path = ( + Path.cwd() / "tests/tests_data/gcms/" / "GCMS_FAMES_01_GCMS-01_20191023.cdf" + ) reader_gcms = ReadAndiNetCDF(file_path) - + reader_gcms.run() - + + gcms = reader_gcms.get_gcms_obj() + + assert len(gcms.tic) > 0 + + def test_import_booster_mass_spectrum_hdf(): + file_path = ( + Path.cwd() + / "tests/tests_data/ftms/" + / "ESFA_100k_9767-13548_chB.A_re_pc_CoAddAll_mFT.h5" + ) + + booster_reader = ReadHDF_BoosterMassSpectrum(file_path, isCentroid=False) + + mass_spectrum = booster_reader.get_mass_spectrum(auto_process=False) + mass_spectrum.parameters = MSParameters(use_defaults=True) + mass_spectrum.process_mass_spec() + + assert len(mass_spectrum) > 0 + assert mass_spectrum.number_average_molecular_weight() > 0 + assert mass_spectrum.weight_average_molecular_weight() > 0 + assert round(mass_spectrum[0].mz_exp, 3) == 220.147 - file_path = Path.cwd() / "tests/tests_data/ftms/" / "ESFA_100k_9767-13548_chB.A_re_pc_CoAddAll_mFT.h5" - - if file_path.exists(): - - #polarity need to be set or read from the file - - booster_reader = ReadHDF_BoosterMassSpectrum(file_path, isCentroid=False) - - mass_spectrum = booster_reader.get_mass_spectrum(auto_process=True) - - #mass_spectrum.plot_mz_domain_profile() - - print( - "number_average_molecular_weight", - mass_spectrum.number_average_molecular_weight(), - ) - print( - "weight_average_molecular_weight", - mass_spectrum.weight_average_molecular_weight(), - ) - - assert round(mass_spectrum[0].mz_exp,3) == 220.147 - - else: - - FileNotFoundError(file_path) def test_import_booster_mass_spectra_hdf(): + file_path = ( + Path.cwd() + / "tests/tests_data/ftms/" + / "ESFA_100k_9767-13548_chB.A_re_pc_CoAddAll_mFT.h5" + ) - file_path = Path.cwd() / "tests/tests_data/ftms/" / "ESFA_100k_9767-13548_chB.A_re_pc_CoAddAll_mFT.h5" - - if file_path.exists(): - #polarity need to be set or read from the file - polarity = -1 + polarity = -1 - booster_reader = ReadHDF_BoosterMassSpectra(file_path, polarity) + booster_reader = ReadHDF_BoosterMassSpectra(file_path, polarity) - booster_reader.start() - booster_reader.join() - #lcms = booster_reader.get_lcms_obj() - -def test_import_lcms_from_transient(): + booster_reader.start() + booster_reader.join() + mass_spectra = booster_reader.get_lcms_obj() + assert len(mass_spectra) == 1 - file_location = Path.cwd() / "tests/tests_data/ftms/" / "NEG_ESI_SRFA_Auto.d"#"SOM_LC_PeatMix_2p8_0p6_2_30AUG19_GIMLI_ZORBAX-1186_1_01_259.d" - MSParameters.mass_spectrum.noise_threshold_method = 'log' +def test_import_lcms_from_transient(): + file_location = Path.cwd() / "tests/tests_data/ftms/" / "NEG_ESI_SRFA_Auto.d" + + MSParameters.mass_spectrum.noise_threshold_method = "log" MSParameters.mass_spectrum.noise_threshold_log_nsigma = 20 MSParameters.ms_peak.peak_min_prominence_percent = 1 - + read_lcms = ReadBruker_SolarixTransientMassSpectra(file_location) read_lcms.start() @@ -94,72 +88,20 @@ def test_import_lcms_from_transient(): lcms.scans_number lcms.set_retention_time_from_data() lcms.set_tic_list_from_data() - lcms.retention_time - lcms.tic - lcms[0] - - for ms in lcms: - #assign mf - - for mspeak in ms: - #mspeak.mz_exp,mspeak.mz_abund - for mf in mspeak: - mf.string, mf.mz_calc, mf.is_isotopologue - pass - -def test_import_transient(): - - # from corems.structure.input.MidasDatFile import ReadMidasDatFile - # file_location = Path.cwd() / "tests/tests_data/ftms/SRFAII_20ppm_14Jul2020_IATp08_After_WebEx_1_01_54136.d/" - file_location = Path.cwd() / "tests/tests_data/ftms/ESI_NEG_SRFA.d" - - MSParameters.transient.apodization_method = "Hanning" - MSParameters.transient.number_of_truncations = 0 - MSParameters.transient.number_of_zero_fills = 1 + assert lcms.retention_time[0] > 0 + assert len(lcms.tic) > 0 + assert len(lcms) > 0 - with ReadBrukerSolarix(file_location) as bruker_transient: - - #MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' - #MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 1 + # Return the MSParameters to the default values + reset_ms_parameters() - #MSParameters.mass_spectrum.noise_threshold_method = 'signal_noise' - #MSParameters.mass_spectrum.noise_threshold_min_s2n = 50 +def test_import_transient(mass_spectrum_ftms): + # This test is using the fixture mass_spectrum_ftms + mass_spectrum_ftms.plot_profile_and_noise_threshold() + assert len(mass_spectrum_ftms) > 0 - MSParameters.mass_spectrum.noise_threshold_method = 'log' - MSParameters.mass_spectrum.noise_threshold_log_nsigma = 20 - MSParameters.ms_peak.peak_min_prominence_percent = 1 - - mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=True) - #from corems.encapsulation.constant import Labels - #from corems.mass_spectrum.input import numpyArray - - #mass_spectrum_test = numpyArray.ms_from_array_profile(mz=mass_spectrum_obj.mz_exp_profile, - # abundance=mass_spectrum_obj.abundance_profile, - # dataname='test', - # polarity=-1, - # data_type=Labels.booster_profile, - # ) - - #mass_spectrum_test.plot_mz_domain_profile() - - mass_spectrum_obj.plot_profile_and_noise_threshold() - - #pyplot.show() - - #mass_spectrum_test.plot_profile_and_noise_threshold() - - #mass_spectrum_obj.filter_by_noise_threshold() - - #print(mass_spectrum_obj.get_noise_threshold()) - - # pyplot.show() - - #print(len(mass_spectrum_obj)) - - #print(mass_spectrum_obj.mspeaks[0].mz_exp, mass_spectrum_obj.mspeaks[-1].mz_exp) def test_import_corems_hdf5(): - file_location = Path.cwd() / "tests/tests_data/ftms/" / "NEG_ESI_SRFA_CoreMS.hdf5" #load any type of mass list file, change the delimeter to read another type of file, i.e : "," for csv, "\t" for tabulated mass list, etc @@ -175,110 +117,109 @@ def test_import_corems_hdf5(): assert len(mass_spectrum) == 20 # Import unprocessed mass spectrum, check that the mass spectrum is loaded correctly - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' mass_spectrum2 = mass_list_reader.get_mass_spectrum( load_settings=False, auto_process=False, load_molecular_formula=False ) + mass_spectrum2.parameters.mass_spectrum.noise_threshold_method = 'relative_abundance' + assert mass_spectrum2.settings.noise_threshold_method == 'relative_abundance' assert len(mass_spectrum2) == 0 - - read_lc_ms = ReadCoreMSHDF_MassSpectra(file_location) - - read_lc_ms.start() - read_lc_ms.join() - - mass_spectra = read_lc_ms.get_lcms_obj() - - for mspeak in mass_spectra[0]: - - if mspeak: - - for mf in mspeak: - - print('mass_spectra', mf.string) def test_import_corems_mass_list(): + file_location = ( + Path.cwd() / "tests/tests_data/ftms/ESI_NEG_SRFA_COREMS_withdupes.csv" + ) - file_location = Path.cwd() / "tests/tests_data/ftms/ESI_NEG_SRFA_COREMS_withdupes.csv" - - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' + MSParameters.mass_spectrum.noise_threshold_method = "relative_abundance" MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 0.1 - - #load any type of mass list file, change the delimeter to read another type of file, i.e : "," for csv, "\t" for tabulated mass list, etc - mass_list_reader = ReadCoremsMasslist(file_location, analyzer='ICR', instrument_label='12T') - mass_spectrum = mass_list_reader.get_mass_spectrum(loadSettings=False) + # load any type of mass list file, change the delimeter to read another type of file, i.e : "," for csv, "\t" for tabulated mass list, etc + mass_list_reader = ReadCoremsMasslist( + file_location, analyzer="ICR", instrument_label="12T" + ) - for mspeak in mass_spectrum: - - if mspeak: - - for mf in mspeak: - print(mf.string) + mass_spectrum = mass_list_reader.get_mass_spectrum(loadSettings=False) + assert mass_spectrum.to_dataframe().shape[1] == 26 + assert mass_spectrum.to_dataframe().shape[0] > 0 + assert round(mass_spectrum[0].mz_exp, 0) == 576 + assert mass_spectrum[0][0].string == "C25 H20 O16" - file_location = Path.cwd() / "tests/tests_data/ftms/" / "NEG_ESI_SRFA_CoreMS.corems" + file_location = Path.cwd() / "tests/tests_data/ftms/" / "NEG_ESI_SRFA_CoreMS.corems" read_lc_ms = ReadCoremsMassSpectraText(file_location) read_lc_ms.start() read_lc_ms.join() - - + mass_spectra = read_lc_ms.get_lcms_obj() + assert len(mass_spectra) > 0 + assert mass_spectra[0].to_dataframe().shape[0] > 0 + assert round(mass_spectra[0][0].mz_exp, 0) == 227 + + # Return the MSParameters to the default values + reset_ms_parameters() - for mspeak in mass_spectra[0]: - - if mspeak: - - for mf in mspeak: - - print('mass_spectra', mf.string) def test_import_thermo_profile_mass_list(): + file_location = ( + Path.cwd() / "tests/tests_data/ftms/" / "Thermo_Profile_MassList.txt" + ) - file_location = Path.cwd() / "tests/tests_data/ftms/" / "Thermo_Profile_MassList.txt" - - mass_list_reader = ReadMassList(file_location, header_lines=7, isCentroid=False, isThermoProfile=True) + mass_list_reader = ReadMassList( + file_location, header_lines=7, isCentroid=False, isThermoProfile=True + ) polarity = +1 - mass_spectrum = mass_list_reader.get_mass_spectrum(polarity, auto_process=True, loadSettings=False) - - #mass_spectrum.plot_profile_and_noise_threshold() + mass_spectrum = mass_list_reader.get_mass_spectrum( + polarity, auto_process=False, loadSettings=False + ) + mass_spectrum.parameters = MSParameters(use_defaults=True) + mass_spectrum.process_mass_spec() + + assert mass_spectrum.to_dataframe().shape[0] > 0 + assert round(mass_spectrum[0].mz_exp, 0) == 59 + + +def test_import_numpy_array_profile(mass_spectrum_ftms): + mass_spectrum_new = ms_from_array_profile( + mz=mass_spectrum_ftms.mz_exp_profile, + abundance=mass_spectrum_ftms.abundance_profile, + dataname="test", + polarity=-1, + data_type=Labels.booster_profile, + auto_process=False + ) + mass_spectrum_new.parameters = mass_spectrum_ftms.parameters + mass_spectrum_new.process_mass_spec() - from corems.encapsulation.constant import Labels - from corems.mass_spectrum.input import numpyArray - mass_spectrum_test = numpyArray.ms_from_array_profile(mz=mass_spectrum.mz_exp_profile, - abundance=mass_spectrum.abundance_profile, - dataname='test', - polarity=-1, - data_type=Labels.booster_profile) + assert mass_spectrum_new.to_dataframe().shape == mass_spectrum_ftms.to_dataframe().shape + assert round(mass_spectrum_new[0].mz_exp, 0) == round(mass_spectrum_ftms[0].mz_exp, 0) + assert not mass_spectrum_new.is_centroid - mass_spectrum_test.plot_mz_domain_profile() + mass_spectrum_new.plot_mz_domain_profile() - # pyplot.show() def test_import_maglab_pks(): - file_location = Path.cwd() / "tests/tests_data/ftms/" / "SRFA.pks" - - ref_file_location = Path.cwd() / "tests/tests_data/ftms/SRFA.ref" mass_list_reader = ReadMassList(file_location) polarity = -1 - #MSParameters.mass_spectrum.min_calib_ppm_error = 3 - #MSParameters.mass_spectrum.max_calib_ppm_error = 4 - - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' + MSParameters.mass_spectrum.noise_threshold_method = "relative_abundance" MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 0.1 mass_spectrum = mass_list_reader.get_mass_spectrum(polarity) - #MzDomainCalibration(mass_spectrum, ref_file_location).run() + assert mass_spectrum.to_dataframe().shape[0] > 0 + assert round(mass_spectrum[0].mz_exp, 0) == 131 + + # Return the MSParameters to the default values + reset_ms_parameters() + def test_import_xml_mass_list(): @@ -295,101 +236,96 @@ def test_import_xml_mass_list(): assert len(mass_spectrum)>30_000 # check the 100th peak is as expected assert round(mass_spectrum.mz_exp[100],3) == 118.049 + + # Return the MSParameters to the default values + reset_ms_parameters() -def test_import_mass_list(): +def test_import_xml_mass_list(): + + file_location = Path.cwd() / "tests/tests_data/ftms/" / "srfa_neg_xml_example.xml" + + mass_list_reader = ReadMassList(file_location, isCentroid=True, isThermoProfile=False) + polarity = -1 + + MSParameters.mass_spectrum.noise_threshold_method = 'absolute_abundance' + MSParameters.mass_spectrum.noise_threshold_absolute_abundance = 1000 + mass_spectrum = mass_list_reader.get_mass_spectrum(polarity, auto_process=True, loadSettings=False) + # check there are lots of peaks (should be ~36k) + assert len(mass_spectrum)>30_000 + # check the 100th peak is as expected + assert round(mass_spectrum.mz_exp[100],3) == 118.049 + + # Return the MSParameters to the default values + reset_ms_parameters() + + +def test_import_mass_list(): file_location = Path.cwd() / "tests/tests_data/ftms/" / "NEG_ESI_SRFA_CoreMS.xlsx" - + mass_list_reader = ReadMassList(file_location) file_location = Path.cwd() / "tests/tests_data/ftms/" / "ESI_NEG_ESFA.ascii" - + mass_list_reader = ReadMassList(file_location) - #polarity need to be set or read from the file + # polarity need to be set or read from the file polarity = -1 - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' - MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 1 - - # MSParameters.mass_spectrum.noise_threshold_method = 'signal_noise' - # MSParameters.mass_spectrum.noise_threshold_min_s2n = 100 - - #MSParameters.mass_spectrum.noise_threshold_method = 'log' - #MSParameters.mass_spectrum.noise_threshold_min_std = 32 - - #load any type of mass list file, change the delimeter to read another type of file, i.e : "," for csv, "\t" for tabulated mass list, etc mass_list_reader = ReadMassList(file_location) - mass_spectrum = mass_list_reader.get_mass_spectrum(polarity, auto_process=True) - - print(mass_spectrum.baseline_noise, mass_spectrum.baseline_noise_std) + mass_spectrum = mass_list_reader.get_mass_spectrum(polarity, auto_process=False) + mass_spectrum.parameters.mass_spectrum.noise_threshold_method = "relative_abundance" + mass_spectrum.parameters.mass_spectrum.noise_threshold_min_relative_abundance = 1 + mass_spectrum.process_mass_spec() + + assert mass_spectrum.baseline_noise > 10000 + assert mass_spectrum.baseline_noise_std > 10000 mass_spectrum.filter_by_noise_threshold() - print(len(mass_spectrum)) - #mass_spectrum.plot_mz_domain_profile() + assert mass_spectrum.to_dataframe().shape[0] > 0 + assert len(mass_spectrum) > 0 + assert round(mass_spectrum.number_average_molecular_weight()) > 200 + assert round(mass_spectrum.weight_average_molecular_weight()) > 200 + mass_spectrum.plot_profile_and_noise_threshold() - # pyplot.show() - print( - "number_average_molecular_weight", - mass_spectrum.number_average_molecular_weight(), - ) - print( - "weight_average_molecular_weight", - mass_spectrum.weight_average_molecular_weight(), - ) + og_len = len(mass_spectrum) mass_spectrum.filter_by_s2n(100) - - # mass_list_reader = ReadMassList(file_location, isCentroid=False,) + assert len(mass_spectrum) < og_len - # mass_spectrum = mass_list_reader.get_mass_spectrum(polarity,auto_process=True) def test_import_thermo_average(): - file_location = Path.cwd() / "tests/tests_data/ftms/" / "SRFA_NEG_ESI_ORB.raw" - # change parameters here - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' - MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 1 - # creates the parser obj parser = rawFileReader.ImportMassSpectraThermoMSFileReader(file_location) # sums all the mass spectra - parser.chromatogram_settings.scans = (-1, -1) - mass_spectrum = parser.get_average_mass_spectrum(spectrum_mode='profile') + mass_spectrum = parser.get_average_mass_spectrum(spectrum_mode="profile", auto_process=False) + mass_spectrum.parameters = MSParameters(use_defaults=True) + mass_spectrum.parameters.mass_spectrum.noise_threshold_method = "relative_abundance" + mass_spectrum.parameters.mass_spectrum.noise_threshold_min_relative_abundance = 1 + mass_spectrum.process_mass_spec() + assert len(mass_spectrum) == 762 # sums scans in selected range parser.chromatogram_settings.scans = (1, 1) - mass_spectrum = parser.get_average_mass_spectrum(spectrum_mode='profile') + mass_spectrum = parser.get_average_mass_spectrum(spectrum_mode="profile") + mass_spectrum.parameters = MSParameters(use_defaults=True) + mass_spectrum.parameters.mass_spectrum.noise_threshold_method = "relative_abundance" + mass_spectrum.parameters.mass_spectrum.noise_threshold_min_relative_abundance = 1 + mass_spectrum.process_mass_spec() + assert len(mass_spectrum) == 953 parser.chromatogram_settings.scans = [1] # sums scans in selected range - mass_spectrum = parser.get_average_mass_spectrum(spectrum_mode='profile') + mass_spectrum = parser.get_average_mass_spectrum(spectrum_mode="profile") mass_spectrum.plot_mz_domain_profile() mass_spectrum.plot_profile_and_noise_threshold() - #print("polarity", mass_spectrum.polarity) - #pyplot.show() - - -if __name__ == '__main__': - - pass - # test_import_booster_mass_spectrum_hdf() - # test_import_booster_mass_spectra_hdf() - #test_import_lcms_from_transient() - #test_import_thermo_profile_mass_list() - # test_import_transient() - test_import_corems_hdf5() - #test_import_corems_mass_list() - #test_import_mass_list() - #test_import_maglab_pks() - #test_andi_netcdf_gcms() - #test_import_corems_mass_list() - #test_import_thermo_average() - + assert mass_spectrum.to_dataframe().shape[0] == 1518 + assert round(mass_spectrum[0].mz_exp, 0) == 100 diff --git a/tests/test_massErrorPrediction.py b/tests/test_massErrorPrediction.py index 596a97acc..3849c8e27 100644 --- a/tests/test_massErrorPrediction.py +++ b/tests/test_massErrorPrediction.py @@ -5,12 +5,11 @@ from corems.transient.input.brukerSolarix import ReadBrukerSolarix from corems.mass_spectrum.calc.MassErrorPrediction import MassErrorPrediction from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas -from test_molecularFormulaSearch import create_mass_spectrum -def x_test_error_prediction(): +def x_test_error_prediction(mass_spectrum_ftms): 'This function will be removed in CoreMS 2.0. adding x to skip test' - mass_spectrum = create_mass_spectrum() + mass_spectrum = mass_spectrum_ftms mass_error_prediction = MassErrorPrediction(mass_spectrum) diff --git a/tests/test_mass_spectrum.py b/tests/test_mass_spectrum.py index b948b53d7..0778fd00f 100644 --- a/tests/test_mass_spectrum.py +++ b/tests/test_mass_spectrum.py @@ -1,103 +1,106 @@ -__author__ = "Yuri E. Corilo" -__date__ = "Jul 25, 2019" - -import os - -import sys -from pathlib import Path -sys.path.append('.') - - -from corems.transient.input.brukerSolarix import ReadBrukerSolarix -from corems.encapsulation.factory.processingSetting import TransientSetting -from corems.encapsulation.factory.parameters import MSParameters -from corems.mass_spectrum.output.export import HighResMassSpecExport -from corems.mass_spectrum.input.coremsHDF5 import ReadCoreMSHDF_MassSpectrum - -def test_create_mass_spectrum(): - - file_location = Path.cwd() / "tests/tests_data/ftms/ESI_NEG_SRFA.d/" - - bruker_reader = ReadBrukerSolarix(file_location) - - TransientSetting.apodization_method = 'Hamming' - bruker_transient = bruker_reader.get_transient() - - TransientSetting.apodization_method = 'Blackman' - TransientSetting.number_of_truncations = 1 - bruker_transient = bruker_reader.get_transient() - - MSParameters.mass_spectrum.noise_threshold_method = 'signal_noise' - MSParameters.mass_spectrum.noise_threshold_min_s2n = 4 - mass_spectrum_obj = bruker_transient.get_mass_spectrum( plot_result=False, auto_process=True) - assert mass_spectrum_obj.settings.noise_threshold_method == 'signal_noise' - - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' - MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 20 - mass_spectrum_obj = bruker_transient.get_mass_spectrum( plot_result=False, auto_process=True) - assert mass_spectrum_obj.settings.noise_threshold_method == 'relative_abundance' - - MSParameters.mass_spectrum.noise_threshold_method = 'log' - MSParameters.mass_spectrum.noise_threshold_log_nsigma = 20 - mass_spectrum_obj = bruker_transient.get_mass_spectrum( plot_result=False, auto_process=True) - assert mass_spectrum_obj.settings.noise_threshold_method == 'log' - - mass_spectrum_obj.freq_exp - mass_spectrum_obj.dir_location - mass_spectrum_obj.resolving_power - mass_spectrum_obj.signal_to_noise - mass_spectrum_obj.max_abundance - mass_spectrum_obj.filter_by_mz(200, 1000) - mass_spectrum_obj.reset_indexes() - - og_length = len(mass_spectrum_obj.mz_exp_pp) - assert len(mass_spectrum_obj.mz_exp_pp) == len(mass_spectrum_obj.abundance_profile_pp) - mass_spectrum_obj.settings.picking_point_extrapolate = 5 - assert len(mass_spectrum_obj.mz_exp_pp) == len(mass_spectrum_obj.abundance_profile_pp) - assert len(mass_spectrum_obj.mz_exp_pp) > og_length - - mass_spectrum_obj.filter_by_abundance(0, 1000) - mass_spectrum_obj.reset_indexes() - mass_spectrum_obj.filter_by_max_resolving_power(12, 3) - mass_spectrum_obj.reset_indexes() - mass_spectrum_obj.filter_by_min_resolving_power(15, 3) - mass_spectrum_obj.reset_indexes() - mass_spectrum_obj.filter_by_noise_threshold() - mass_spectrum_obj.reset_indexes() - - mass_spectrum_obj.get_mz_and_abundance_peaks_tuples() - mass_spectrum_obj.get_masses_count_by_nominal_mass() - mass_spectrum_obj.resolving_power_calc(12, 1) - mass_spectrum_obj._f_to_mz() - mass_spectrum_obj.number_average_molecular_weight(profile=True) - - mass_spectrum_obj.reset_cal_therms(mass_spectrum_obj.Aterm,mass_spectrum_obj.Bterm,mass_spectrum_obj.Cterm) - mass_spectrum_obj.reset_indexes() - mass_spectrum_obj.plot_profile_and_noise_threshold() - - return mass_spectrum_obj - -def test_export_import_profile(): - if os.path.exists("my_mass_spec.hdf5"): - os.remove("my_mass_spec.hdf5") - mass_spectrum_obj = test_create_mass_spectrum() - assert not mass_spectrum_obj.is_centroid - ms_peaksn = mass_spectrum_obj.to_dataframe().shape[0] - - exportMS = HighResMassSpecExport("my_mass_spec", mass_spectrum_obj) - exportMS._output_type = "hdf5" - exportMS.save() - - parser = ReadCoreMSHDF_MassSpectrum("my_mass_spec.hdf5") - mass_spectrum_obj2 = parser.get_mass_spectrum(auto_process=True, load_settings=True) - ms_peaksn2 = mass_spectrum_obj2.to_dataframe().shape[0] - assert ms_peaksn == ms_peaksn2 - - os.remove("my_mass_spec.hdf5") - - -if __name__ == "__main__": - # mass_spectrum_obj, kendrick_group_index = test_create_mass_spectrum() - # mass_spectrum_obj.plot_profile_and_noise_threshold() - #test_create_mass_spectrum() - test_export_import_profile() +# Tests for adpodization methods +def test_hamming(bruker_transient): + """Test the creation of a mass spectrum object with the Hamming apodization method""" + bruker_transient.set_processing_parameter(apodization_method='Hamming', + number_of_truncations=0, + number_of_zero_fills=1) + assert bruker_transient.parameters.apodization_method == "Hamming" + mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=False) + mass_spectrum_obj.plot_mz_domain_profile() + +def test_blackman(bruker_transient): + """Test the creation of a mass spectrum object with the Blackman apodization method""" + bruker_transient.set_processing_parameter(apodization_method='Blackman', + number_of_truncations=0, + number_of_zero_fills=1) + assert bruker_transient.parameters.apodization_method == "Blackman" + mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=False) + mass_spectrum_obj.plot_mz_domain_profile() + +def xtest_fullsine(bruker_transient): + """Test the creation of a mass spectrum object with the Full-Sine apodization method""" + # This test is disabled because the Full-Sine apodization method is behaving strangely, see issue #163 + bruker_transient.set_processing_parameter(apodization_method='Full-Sine', + number_of_truncations=0, + number_of_zero_fills=1) + assert bruker_transient.parameters.apodization_method == "Full-Sine" + mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=False) + mass_spectrum_obj.plot_mz_domain_profile() + +def test_kaiser(bruker_transient): + """Test the creation of a mass spectrum object with the Kaiser apodization method""" + bruker_transient.set_processing_parameter(apodization_method='Kaiser', + number_of_truncations=0, + number_of_zero_fills=1) + assert bruker_transient.parameters.apodization_method == "Kaiser" + mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=False) + mass_spectrum_obj.plot_mz_domain_profile() + +# Tests for noise threshold methods (note that mass_spectrum_ftms is processed by log in the fixture, no need to test it) +def test_relative_abundance(bruker_transient, mass_spectrum_ftms): + """Test the creation of a mass spectrum object with the relative abundance noise threshold method""" + mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=False) + mass_spectrum_obj.settings.noise_threshold_method = "relative_abundance" + mass_spectrum_obj.settings.noise_threshold_relative_abundance = 0.01 + mass_spectrum_obj.process_mass_spec() + assert mass_spectrum_obj.settings.noise_threshold_method == "relative_abundance" + assert len(mass_spectrum_obj) != len(mass_spectrum_ftms) + +def test_absolute_abundance(bruker_transient, mass_spectrum_ftms): + """Test the creation of a mass spectrum object with the absolute abundance noise threshold method""" + mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=False) + mass_spectrum_obj.settings.noise_threshold_method = "absolute_abundance" + mass_spectrum_obj.settings.noise_threshold_absolute_abundance = 20000000 + mass_spectrum_obj.process_mass_spec() + assert mass_spectrum_obj.settings.noise_threshold_method == "absolute_abundance" + assert len(mass_spectrum_obj) != len(mass_spectrum_ftms) + +def test_signal_to_noise(bruker_transient, mass_spectrum_ftms): + """Test the creation of a mass spectrum object with the signal to noise noise threshold method""" + mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=False) + mass_spectrum_obj.settings.noise_threshold_method = "signal_noise" + mass_spectrum_obj.settings.noise_threshold_min_s2n = 4 + mass_spectrum_obj.process_mass_spec() + assert mass_spectrum_obj.settings.noise_threshold_method == "signal_noise" + assert len(mass_spectrum_obj) != len(mass_spectrum_ftms) + +def test_minima(bruker_transient, mass_spectrum_ftms): + """Test the creation of a mass spectrum object with the minima noise threshold method""" + mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=False) + mass_spectrum_obj.settings.noise_threshold_method = "minima" + mass_spectrum_obj.settings.noise_threshold_min_std = 10 + mass_spectrum_obj.process_mass_spec() + assert mass_spectrum_obj.settings.noise_threshold_method == "minima" + assert len(mass_spectrum_obj) != len(mass_spectrum_ftms) + +# Tests for peak filtering methods +def test_mass_spectrum_filtering(mass_spectrum_ftms): + """Test the filtering methods of the mass spectrum object""" + og_peaks = len(mass_spectrum_ftms) + + # Test the filtering methods and check that the number of peaks has decreased each time + mass_spectrum_ftms.filter_by_mz(200, 1000) + assert len(mass_spectrum_ftms) < og_peaks + mass_spectrum_ftms.reset_indexes() + assert len(mass_spectrum_ftms) == og_peaks + mass_spectrum_ftms.filter_by_abundance(0, 1000) + assert len(mass_spectrum_ftms) < og_peaks + mass_spectrum_ftms.reset_indexes() + mass_spectrum_ftms.filter_by_max_resolving_power(2, 3) + assert len(mass_spectrum_ftms) < og_peaks + mass_spectrum_ftms.reset_indexes() + mass_spectrum_ftms.filter_by_min_resolving_power(15, 3) + assert len(mass_spectrum_ftms) < og_peaks + mass_spectrum_ftms.reset_indexes() + mass_spectrum_ftms.settings.noise_threshold_method = "absolute_abundance" + mass_spectrum_ftms.settings.noise_threshold_absolute_abundance = 10000000 + mass_spectrum_ftms.filter_by_noise_threshold() + assert len(mass_spectrum_ftms) < og_peaks + +# Tests for mass spectrum properties +def test_mass_spectrum_properties(mass_spectrum_ftms): + """Test the properties of the mass spectrum object""" + res = mass_spectrum_ftms.get_mz_and_abundance_peaks_tuples() + assert len(res) == len(mass_spectrum_ftms) + mass_spectrum_ftms.get_masses_count_by_nominal_mass() + mass_spectrum_ftms.resolving_power_calc(12, 1) \ No newline at end of file diff --git a/tests/test_mass_spectra_export_import.py b/tests/test_mass_spectrum_export_import.py similarity index 50% rename from tests/test_mass_spectra_export_import.py rename to tests/test_mass_spectrum_export_import.py index ebf4ca4e5..dc9e035ef 100644 --- a/tests/test_mass_spectra_export_import.py +++ b/tests/test_mass_spectrum_export_import.py @@ -1,61 +1,64 @@ import os +import pytest from corems.mass_spectrum.input.numpyArray import ms_from_array_centroid from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas -from corems.encapsulation.factory.parameters import MSParameters from corems.mass_spectrum.output.export import HighResMassSpecExport from corems.mass_spectrum.input.coremsHDF5 import ReadCoreMSHDF_MassSpectrum -def prep_mass_spec_obj(): +@pytest.fixture +def mass_spectrum_silico(): # Test for generating accurate molecular formula from a single mass using the local sql database # Now also tests that it is handling isotopes correctly (for non-adducts) mz = [760.58156938877, 761.58548] abundance = [1000, 400] rp, s2n = [[1, 1], [10, 10]] - MSParameters.mass_spectrum.noise_threshold_method = "relative_abundance" - MSParameters.mass_spectrum.noise_threshold_absolute_abundance = 0 - - MSParameters.molecular_search.url_database = "" - MSParameters.molecular_search.error_method = "None" - MSParameters.molecular_search.min_ppm_error = -5 - MSParameters.molecular_search.max_ppm_error = 5 - MSParameters.molecular_search.mz_error_range = 1 - MSParameters.molecular_search.isProtonated = True - MSParameters.molecular_search.isRadical = False - MSParameters.molecular_search.isAdduct = False - - usedatoms = {"C": (1, 57), "H": (4, 200), "N": (0, 1)} - MSParameters.molecular_search.usedAtoms = usedatoms mass_spectrum_obj = ms_from_array_centroid( - mz, abundance, rp, s2n, "single mf search", polarity=1, auto_process=True + mz, abundance, rp, s2n, "single mf search", polarity=1, auto_process=False ) - return mass_spectrum_obj - -def run_molecular_formula_search(mass_spectrum_obj): + # Set the settings for the molecular search on the mass spectrum object + mass_spectrum_obj.settings.noise_threshold_method = "relative_abundance" + mass_spectrum_obj.settings.noise_threshold_absolute_abundance = 0 + + mass_spectrum_obj.molecular_search_settings.url_database = "" + mass_spectrum_obj.molecular_search_settings.error_method = "None" + mass_spectrum_obj.molecular_search_settings.min_ppm_error = -5 + mass_spectrum_obj.molecular_search_settings.max_ppm_error = 5 + mass_spectrum_obj.molecular_search_settings.mz_error_range = 1 + mass_spectrum_obj.molecular_search_settings.isProtonated = True + mass_spectrum_obj.molecular_search_settings.isRadical = False + mass_spectrum_obj.molecular_search_settings.isAdduct = False mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False mass_spectrum_obj.molecular_search_settings.use_isotopologue_filter = False - SearchMolecularFormulas( - mass_spectrum_obj, find_isotopologues=True - ).run_worker_ms_peaks([mass_spectrum_obj[0]]) + + usedatoms = {"C": (1, 57), "H": (4, 200), "N": (0, 1)} + mass_spectrum_obj.molecular_search_settings.usedAtoms = usedatoms + + mass_spectrum_obj.process_mass_spec() + return mass_spectrum_obj -def test_mass_spec_export_import_with_annote(): - if os.path.exists("my_mass_spec.hdf5"): - os.remove("my_mass_spec.hdf5") - mass_spectrum_obj = prep_mass_spec_obj() - mass_spectrum_obj.parameters.molecular_search.url_database = "" - mass_spectrum_obj = run_molecular_formula_search(mass_spectrum_obj) - ms_df1 = mass_spectrum_obj.to_dataframe() - assert mass_spectrum_obj[0][0].string == "C56 H73 N1" +def test_molecular_formula_search(mass_spectrum_silico): + SearchMolecularFormulas( + mass_spectrum_silico, find_isotopologues=True + ).run_worker_ms_peaks([mass_spectrum_silico[0]]) + + ms_df1 = mass_spectrum_silico.to_dataframe() + assert mass_spectrum_silico[0][0].string == "C56 H73 N1" assert ms_df1.shape == (2, 26) - assert mass_spectrum_obj[1][0].string == "C55 H73 N1 13C1" - assert mass_spectrum_obj._mz_exp[0] == 760.58156938877 + assert mass_spectrum_silico[1][0].string == "C55 H73 N1 13C1" + - exportMS = HighResMassSpecExport("my_mass_spec", mass_spectrum_obj) +def test_mass_spec_export_import_with_annote(mass_spectrum_silico): + SearchMolecularFormulas( + mass_spectrum_silico, find_isotopologues=True + ).run_worker_ms_peaks([mass_spectrum_silico[0]]) + + exportMS = HighResMassSpecExport("my_mass_spec", mass_spectrum_silico) exportMS._output_type = "hdf5" exportMS.save() @@ -69,8 +72,4 @@ def test_mass_spec_export_import_with_annote(): assert mass_spectrum_obj2._mz_exp[0] == 760.58156938877 # Remove the file - os.remove("my_mass_spec.hdf5") - - -if __name__ == "__main__": - test_mass_spec_export_import_with_annote() + os.remove("my_mass_spec.hdf5") \ No newline at end of file diff --git a/tests/test_molecularFormula.py b/tests/test_molecularFormula.py deleted file mode 100644 index 65d5eb375..000000000 --- a/tests/test_molecularFormula.py +++ /dev/null @@ -1,71 +0,0 @@ -__author__ = "Yuri E. Corilo" -__date__ = "Jul 22, 2019" - -import sys -sys.path.append(".") - -import pytest -from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula -from corems.encapsulation.constant import Labels -from copy import deepcopy - -def test_molecular_formula(): - - '''test the MolecularFormula class and the calculation of isotopologues''' - - formula_dict = {'C':10, 'H':0, 'O':10,'Cl':2, Labels.ion_type: 'radical'} - - ion_charge = 1 - formula_obj = MolecularFormula(formula_dict, ion_charge, ion_type='radical', adduct_atom=None) - print("ion_type", formula_obj.ion_type) - assert round(formula_obj.mz_calc,2) == round(349.886303060457,2) - - min_abundance, current_abundance = 1,1 - #print(min_abundance, current_abundance) - isotopologues = list(formula_obj.isotopologues(0.01, current_abundance, 500)) - - assert round(isotopologues[0].mz_calc,2) == round(351.883352980637,2) - assert round(isotopologues[0].prob_ratio,2) == round(0.6399334750069298,2) - assert isotopologues[0].string == 'C10 O10 Cl1 37Cl1' - - formula_obj.ion_type = 'RADICAL' - formula_obj.kmd - formula_obj.kendrick_mass - formula_obj.knm - formula_obj.atoms_qnt('C') - formula_obj.class_label - formula_obj.atoms_symbol('13C') - - formula_str = 'C10 H21 N1' - formula_obj = MolecularFormula(formula_str, ion_charge) - ''' - for isotopologue_obj in formula_obj.isotopologues(0.01, current_abundance): - - print("formula:", isotopologue_obj.string, - "mz_calc:", isotopologue_obj.mz_calc, - "prob_ratio:", isotopologue_obj.prob_ratio) - ''' -def test_molecular_formula_adducts(): - - '''test the MolecularFormula class and the calculation of isotopologues with adducts''' - - mol_form = MolecularFormula( - {'C': 6, 'H': 10, 'O': 6}, - ion_charge = -1, - ion_type = 'ADDUCT', - adduct_atom = 'Cl') - - isotopologues = list(mol_form.isotopologues(0.05, 1, dynamic_range=1000)) - - assert round(mol_form.mz_calc,2) == round(213.01713930162907,2) - assert round(isotopologues[0].mz_calc,2) == round(215.01418922162907,2) - assert round(isotopologues[0].prob_ratio,2) == round(0.3199577613516368,2) - assert isotopologues[0].string == 'C6 H10 O6' - assert isotopologues[0].adduct_atom == '37Cl' - - - -if __name__ == "__main__": - test_molecular_formula_adducts() - - \ No newline at end of file diff --git a/tests/test_molecularFormulaDBFactory.py b/tests/test_molecularFormulaDBFactory.py deleted file mode 100644 index 07fd63bfe..000000000 --- a/tests/test_molecularFormulaDBFactory.py +++ /dev/null @@ -1,66 +0,0 @@ -__author__ = "Yuri E. Corilo" -__date__ = "Jul 22, 2019" - - -import pickle - -from pathlib import Path -import time, sys, os, pytest -sys.path.append(".") - -from corems.encapsulation.constant import Labels -from corems.molecular_id.factory.MolecularLookupTable import MolecularCombinations -from corems.molecular_id.factory.molecularSQL import MolForm_SQL -from corems.molecular_id.input.nistMSI import ReadNistMSI -from corems.encapsulation.factory.processingSetting import MolecularFormulaSearchSettings - -def test_nist_to_sql(): - - file_location = Path.cwd() / "tests/tests_data/gcms/" / "PNNLMetV20191015.MSL" - - sqlLite_obj = ReadNistMSI(file_location).get_sqlLite_obj() - - sqlLite_obj.query_min_max_ri((1637.30, 1638.30)) - sqlLite_obj.query_min_max_rt((17.111, 18.111)) - sqlLite_obj.query_min_max_ri_and_rt((1637.30, 1638.30),(17.111, 18.111)) - -def test_query_sql(): - - with MolForm_SQL() as sqldb: - #sqldb.clear_data() - - ion_type = Labels.protonated_de_ion - print('ion_type', ion_type) - classe = ['{"O": 2}'] - nominal_mz = [301] - results = sqldb.get_dict_by_classes(classe, ion_type, nominal_mz, +1, MolecularFormulaSearchSettings()) - - #print('total mol formulas found: ', len(list( results.get(classe[0]).get(301)))) - -def generate_database(): - - '''corems_parameters_file: Path for CoreMS JSON Parameters file - --jobs: Number of processes to run - ''' - - #url = "postgresql://postgres:labthomson0102@172.22.113.27:5432/" - #url = "postgresql://doadmin:rn9fenbsdbwqis9v@db-postgresql-corems-do-user-7454084-0.a.db.ondigitalocean.com:25060/defaultdb?sslmode=require" - #url = "postgresql://postgres:qqmica@34.71.74.212/defaultdb?sslmode=require" - #molecular_search_settings.url_database = url - #molecular_search_settings.db_jobs = jobs - - molecular_search_settings = MolecularFormulaSearchSettings() - #molecular_search_settings.usedAtoms['C'] = (1,5) - #molecular_search_settings.usedAtoms['O'] = (1,20) - MolecularCombinations().runworker(molecular_search_settings) - -if __name__ == '__main__': - - - test_query_sql() - #settings_parsers.load_search_setting_yaml() - #settings_parsers.load_search_setting_json() - #test_nist_to_sql() - #generate_database() - - \ No newline at end of file diff --git a/tests/test_molecularFormulaSearch.py b/tests/test_molecularFormulaSearch.py deleted file mode 100644 index cf4eb21da..000000000 --- a/tests/test_molecularFormulaSearch.py +++ /dev/null @@ -1,213 +0,0 @@ -__author__ = "Yuri E. Corilo" -__date__ = "Jul 25, 2019" - - -import sys -sys.path.append('.') - -import time -from pathlib import Path - -import pytest - -from corems.molecular_id.factory.classification import HeteroatomsClassification -from corems.mass_spectrum.input.numpyArray import ms_from_array_centroid -from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas -from corems.molecular_id.search.priorityAssignment import OxygenPriorityAssignment -from corems.transient.input.brukerSolarix import ReadBrukerSolarix -from corems.encapsulation.factory.parameters import MSParameters - -def create_mass_spectrum(): - - file_location = Path.cwd() / "tests/tests_data/ftms/ESI_NEG_SRFA.d/" - - bruker_reader = ReadBrukerSolarix(file_location) - MSParameters.molecular_search.url_database = '' - MSParameters.mass_spectrum.noise_threshold_method = 'log' - MSParameters.mass_spectrum.noise_threshold_log_nsigma = 10 - MSParameters.ms_peak.peak_min_prominence_percent = 1 - - MSParameters.molecular_search.min_ppm_error = -5 - MSParameters.molecular_search.max_ppm_error = 5 - MSParameters.molecular_search.mz_error_range = 1 - MSParameters.molecular_search.isProtonated = True - MSParameters.molecular_search.isRadical= False - MSParameters.molecular_search.isAdduct= False - - usedatoms = {'C': (1,100) , 'H': (4,200), 'O': (0,10), 'N': (0,1), 'P': (0,1)} - MSParameters.molecular_search.usedAtoms = usedatoms - MSParameters.molecular_search.usedAtoms = usedatoms - - bruker_transient = bruker_reader.get_transient() - - mass_spectrum_obj = bruker_transient.get_mass_spectrum( - plot_result=False, auto_process=True) - - - # polariy need to be set if reading a text file - #polariy = -1 - # load any type of mass list file, change the delimeter to read another type of file, i.e : "," for csv, "\t" for tabulated mass list, etc - #mass_list_reader = Read_MassList(file_location, ) - #mass_spectrum_obj = mass_list_reader.get_mass_spectrum(polarity, auto_process=True) - - return mass_spectrum_obj - -def test_run_molecular_formula_search(): - # Test for generating accurate molecular formula from a single mass using the local sql database - # Now also tests that it is handling isotopes correctly (for non-adducts) - mz = [760.58156938877, 761.58548] - abundance = [1, 0.4] - rp, s2n = [[1, 1],[1, 1]] - - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' - MSParameters.mass_spectrum.noise_threshold_absolute_abundance = 0 - - MSParameters.molecular_search.url_database = '' - MSParameters.molecular_search.error_method = 'None' - MSParameters.molecular_search.min_ppm_error = -5 - MSParameters.molecular_search.max_ppm_error = 5 - MSParameters.molecular_search.mz_error_range = 1 - MSParameters.molecular_search.isProtonated = True - MSParameters.molecular_search.isRadical= False - MSParameters.molecular_search.isAdduct= False - - usedatoms = {'C': (1,57) , 'H': (4,200), 'N': (0,1)} - MSParameters.molecular_search.usedAtoms = usedatoms - MSParameters.molecular_search.usedAtoms = usedatoms - mass_spectrum_obj = ms_from_array_centroid(mz, abundance, rp, s2n, 'single mf search', polarity=1, auto_process=True) - mass_spectrum_obj.settings.noise_threshold_method = 'relative threshold' - mass_spectrum_obj.parameters.molecular_search.url_database = '' - mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False - mass_spectrum_obj.molecular_search_settings.use_isotopologue_filter = False - SearchMolecularFormulas(mass_spectrum_obj, find_isotopologues=True).run_worker_ms_peaks([mass_spectrum_obj[0]]) - mass_spectrum_obj.to_dataframe() - - assert mass_spectrum_obj[0][0].string == 'C56 H73 N1' - assert mass_spectrum_obj[1][0].string == 'C55 H73 N1 13C1' - -def test_run_molecular_formula_search_adduct(): - # Test for generating accurate molecular formula from a single mass using the local sql database - # Now also tests that it is handling isotopes correctly (for non-adducts) - mz = [782.563522, 783.566877] #Na+ adduct of C56H73N1 and its M+1 - abundance = [1, 0.4] - rp, s2n = [[1, 1],[1, 1]] - - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' - MSParameters.mass_spectrum.noise_threshold_absolute_abundance = 0 - - MSParameters.molecular_search.url_database = '' - MSParameters.molecular_search.error_method = 'None' - MSParameters.molecular_search.min_ppm_error = -5 - MSParameters.molecular_search.max_ppm_error = 5 - MSParameters.molecular_search.mz_error_range = 1 - MSParameters.molecular_search.isProtonated = True - MSParameters.molecular_search.isRadical= False - MSParameters.molecular_search.isAdduct= True - - usedatoms = {'C': (1,57) , 'H': (4,200), 'N': (0,1)} - MSParameters.molecular_search.usedAtoms = usedatoms - mass_spectrum_obj = ms_from_array_centroid(mz, abundance, rp, s2n, 'single mf search', polarity=1, auto_process=True) - mass_spectrum_obj.settings.noise_threshold_method = 'relative threshold' - mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False - mass_spectrum_obj.molecular_search_settings.use_isotopologue_filter = False - mass_spectrum_obj.molecular_search_settings.url_database = '' - SearchMolecularFormulas(mass_spectrum_obj, find_isotopologues=True).run_worker_ms_peaks([mass_spectrum_obj[0]]) - mass_spectrum_obj.to_dataframe() - - assert mass_spectrum_obj[0][0].string == 'C56 H73 N1' - assert mass_spectrum_obj[0][0].H_C == 73/56 - assert mass_spectrum_obj[1][0].string == 'C55 H73 N1 13C1' - assert mass_spectrum_obj[1][0].H_C == 73/56 - - - -def test_mspeak_search(): - - mass_spec_obj = create_mass_spectrum() - mass_spec_obj.parameters.molecular_search.url_database = '' - mspeak_obj = mass_spec_obj.most_abundant_mspeak - SearchMolecularFormulas(mass_spec_obj).run_worker_ms_peaks([mspeak_obj]) - if mspeak_obj.is_assigned: - if len(mspeak_obj.molecular_formula_earth_filter()) > 0: - print(mspeak_obj.molecular_formula_earth_filter().string) - if len(mspeak_obj.molecular_formula_water_filter()) > 0: - print(mspeak_obj.molecular_formula_water_filter().string) - if len(mspeak_obj.molecular_formula_air_filter()) > 0: - print(mspeak_obj.molecular_formula_air_filter().string) - print(mspeak_obj.cia_score_S_P_error().string) - print(mspeak_obj.cia_score_N_S_P_error().string) - print(mspeak_obj.best_molecular_formula_candidate.string) - print(mspeak_obj[0].mz_error, mspeak_obj[0].string_formated) - -def test_molecular_formula_search_db(): - - MSParameters.molecular_search.isAdduct = False - MSParameters.molecular_search.isRadical = False - - mass_spec_obj = create_mass_spectrum() - mass_spec_obj.parameters.molecular_search.url_database = '' - - time1 = time.time() - - SearchMolecularFormulas(mass_spec_obj, first_hit=True).run_worker_mass_spectrum() - - print('searching molecular formulas took %.3f seconds' % (time.time() - time1)) - - i = 0 - j = 0 - error = list() - mass = list() - abundance = list() - - for mspeak in mass_spec_obj.sort_by_abundance(): - - if mspeak.is_assigned: - i += 1 - for mformula in mspeak: - mass.append(mspeak.mz_exp) - error.append(mformula.mz_error) - abundance.append(mspeak.abundance) - else: - j += 1 - pass - - print('%i peaks assigned and %i peaks not assigned' % (i, j)) - -def test_priorityAssignment(): - - MSParameters.molecular_search.error_method = 'None' - MSParameters.molecular_search.min_ppm_error = -3 - MSParameters.molecular_search.max_ppm_error = 5 - MSParameters.molecular_search.mz_error_range = 1 - MSParameters.molecular_search.isProtonated = True - MSParameters.molecular_search.isRadical= True - MSParameters.molecular_search.isAdduct= False - usedatoms = {'C': (1,100) , 'H': (4,200), 'O': (1,10)} - MSParameters.molecular_search.usedAtoms = usedatoms - - mass_spec_obj = create_mass_spectrum() - mass_spec_obj.process_mass_spec() - mass_spec_obj.parameters.molecular_search.url_database = '' - - assignOx = OxygenPriorityAssignment(mass_spec_obj) - - assignOx.run() - - #test classification - mass_spec_obj.percentile_assigned() - - mass_spectrum_by_classes = HeteroatomsClassification(mass_spec_obj) - - mass_spectrum_by_classes.plot_ms_assigned_unassigned() - - mass_spectrum_by_classes.atoms_ratio_all("H", "C") - - mass_spectrum_by_classes.atoms_ratio_all("H", "C") - -if __name__ == "__main__": - - #test_priorityAssignment() - #() - test_run_molecular_formula_search() - test_run_molecular_formula_search_adduct() - #test_mspeak_search() diff --git a/tests/test_molecular_formula.py b/tests/test_molecular_formula.py new file mode 100644 index 000000000..3b8793a02 --- /dev/null +++ b/tests/test_molecular_formula.py @@ -0,0 +1,59 @@ +__author__ = "Yuri E. Corilo" +__date__ = "Jul 22, 2019" + +import sys + +sys.path.append(".") + +from corems.molecular_formula.factory.MolecularFormulaFactory import MolecularFormula +from corems.encapsulation.constant import Labels + + +def test_molecular_formula_from_dict(): + """Test the parsing of a molecular formula string and the calculation of isotopologues""" + formula_dict = {"C": 10, "H": 0, "O": 10, "Cl": 2, Labels.ion_type: "RADICAL"} + ion_charge = 1 + formula_obj = MolecularFormula( + molecular_formula=formula_dict, ion_charge=ion_charge + ) + assert formula_obj.ion_type == "RADICAL" + assert round(formula_obj.mz_calc, 2) == round(349.886303060457, 2) + assert formula_obj.kmd == -50 + assert round(formula_obj.kendrick_mass, 2) == round(349.4956152007638, 2) + assert formula_obj.knm == 349 + assert formula_obj.class_label == "O10 Cl2 -R" + assert formula_obj.atoms_qnt("C") == 10 + assert formula_obj.atoms_symbol("13C") == "C" + assert formula_obj.string == "C10 O10 Cl2" + + # Create isotopologues of the formula_obj + isotopologues = list(formula_obj.isotopologues(0.01, 1, 500)) + assert round(isotopologues[0].mz_calc, 2) == round(351.883352980637, 2) + assert round(isotopologues[0].prob_ratio, 2) == round(0.6399334750069298, 2) + assert isotopologues[0].string == "C10 O10 Cl1 37Cl1" + + +def test_molecular_formula_from_string(): + """Test the parsing of a molecular formula string and the dealing with neutral mass""" + ion_charge = 1 + formula_str = "C10 H21 N1" + formula_obj = MolecularFormula(formula_str, ion_charge) + assert formula_obj.string == "C10 H21 N1" + assert formula_obj.ion_type is None + # This returns a neutral mass since the ion type is not set and therefore the ion_type is interpreted as None - is that expected? + assert round(formula_obj.mz_calc, 2) == round(formula_obj.neutral_mass, 2) + + +def test_molecular_formula_adducts(): + """Test the parsing of a molecular formula string with adducts and the calculation of isotopologues""" + formula_obj = MolecularFormula( + {"C": 6, "H": 10, "O": 6}, ion_charge=-1, ion_type="ADDUCT", adduct_atom="Cl" + ) + + isotopologues = list(formula_obj.isotopologues(0.05, 1, dynamic_range=1000)) + + assert round(formula_obj.mz_calc, 2) == round(213.01713930162907, 2) + assert round(isotopologues[0].mz_calc, 2) == round(215.01418922162907, 2) + assert round(isotopologues[0].prob_ratio, 2) == round(0.3199577613516368, 2) + assert isotopologues[0].string == "C6 H10 O6" + assert isotopologues[0].adduct_atom == "37Cl" diff --git a/tests/test_molecular_formula_db_factory.py b/tests/test_molecular_formula_db_factory.py new file mode 100644 index 000000000..ddbef2b37 --- /dev/null +++ b/tests/test_molecular_formula_db_factory.py @@ -0,0 +1,32 @@ +from pathlib import Path + +from corems.encapsulation.constant import Labels +from corems.molecular_id.factory.molecularSQL import MolForm_SQL +from corems.molecular_id.input.nistMSI import ReadNistMSI +from corems.encapsulation.factory.processingSetting import MolecularFormulaSearchSettings + +def test_nist_to_sql(): + + file_location = Path.cwd() / "tests/tests_data/gcms/" / "PNNLMetV20191015.MSL" + + sqlLite_obj = ReadNistMSI(file_location).get_sqlLite_obj() + + response = sqlLite_obj.query_min_max_ri((1637.30, 1638.30)) + assert len(response) == 6 + + response = sqlLite_obj.query_min_max_rt((17.111, 18.111)) + assert len(response) == 137 + + response = sqlLite_obj.query_min_max_ri_and_rt((1637.30, 1638.30),(17.111, 18.111)) + assert len(response) == 6 + +def test_query_sql(): + + sqldb = MolForm_SQL() + + ion_type = Labels.protonated_de_ion + classe = ['{"O": 2}'] + nominal_mz = [301] + results = sqldb.get_dict_by_classes(classe, ion_type, nominal_mz, +1, MolecularFormulaSearchSettings()) + assert len(results.get(classe[0]).get(301)) == 3 + \ No newline at end of file diff --git a/tests/test_molecular_formula_search.py b/tests/test_molecular_formula_search.py new file mode 100644 index 000000000..f8146f19a --- /dev/null +++ b/tests/test_molecular_formula_search.py @@ -0,0 +1,173 @@ +import sys + +sys.path.append(".") + +from corems.molecular_id.factory.classification import HeteroatomsClassification +from corems.mass_spectrum.input.numpyArray import ms_from_array_centroid +from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas +from corems.molecular_id.search.priorityAssignment import OxygenPriorityAssignment + + +def test_run_molecular_formula_search(): + """Test for generating accurate molecular formula from mass and isotope using the local sql database""" + # Generate a mass spectrum object from a list of mz and abundance + mz = [760.58156938877, 761.58548] + abundance = [1, 0.4] + rp, s2n = [[1, 1], [1, 1]] + mass_spectrum_obj = ms_from_array_centroid( + mz, abundance, rp, s2n, "single mf search", polarity=1, auto_process=False + ) + mass_spectrum_obj.settings.noise_threshold_method = "absolute_abundance" + mass_spectrum_obj.settings.noise_threshold_absolute_abundance = 0 + + # Set the settings for the molecular search on the mass spectrum object + mass_spectrum_obj.molecular_search_settings.url_database = "" + mass_spectrum_obj.molecular_search_settings.error_method = "None" + mass_spectrum_obj.molecular_search_settings.min_ppm_error = -5 + mass_spectrum_obj.molecular_search_settings.max_ppm_error = 5 + mass_spectrum_obj.molecular_search_settings.mz_error_range = 1 + mass_spectrum_obj.molecular_search_settings.isProtonated = True + mass_spectrum_obj.molecular_search_settings.isRadical = False + mass_spectrum_obj.molecular_search_settings.isAdduct = False + mass_spectrum_obj.molecular_search_settings.usedAtoms = { + "C": (1, 57), + "H": (4, 200), + "N": (0, 1), + } + + # Process the mass spectrum object + mass_spectrum_obj.process_mass_spec() + + # Run the molecular formula search on the mass spectrum object + SearchMolecularFormulas( + mass_spectrum_obj, find_isotopologues=True + ).run_worker_ms_peaks([mass_spectrum_obj[0]]) + assert mass_spectrum_obj.to_dataframe().shape[0] > 1 + assert mass_spectrum_obj[0][0].string == "C56 H73 N1" + assert mass_spectrum_obj[1][0].string == "C55 H73 N1 13C1" + + +def test_run_molecular_formula_search_adduct(): + """Test for generating accurate molecular formula from mass and isotope, for an adduct, using the local sql database""" + # Generate a mass spectrum object from a list of mz and abundance + mz = [782.563522, 783.566877] # Na+ adduct of C56H73N1 and its M+1 + abundance = [1, 0.4] + rp, s2n = [[1, 1], [1, 1]] + mass_spectrum_obj = ms_from_array_centroid( + mz, abundance, rp, s2n, "single mf search", polarity=1, auto_process=False + ) + mass_spectrum_obj.settings.noise_threshold_method = "absolute_abundance" + mass_spectrum_obj.settings.noise_threshold_absolute_abundance = 0 + + # Set the settings for the molecular search on the mass spectrum object + mass_spectrum_obj.molecular_search_settings.url_database = "" + mass_spectrum_obj.molecular_search_settings.error_method = "None" + mass_spectrum_obj.molecular_search_settings.min_ppm_error = -5 + mass_spectrum_obj.molecular_search_settings.max_ppm_error = 5 + mass_spectrum_obj.molecular_search_settings.mz_error_range = 1 + mass_spectrum_obj.molecular_search_settings.isProtonated = True + mass_spectrum_obj.molecular_search_settings.isRadical = False + mass_spectrum_obj.molecular_search_settings.isAdduct = True + mass_spectrum_obj.molecular_search_settings.usedAtoms = { + "C": (1, 57), + "H": (4, 200), + "N": (0, 1), + } + mass_spectrum_obj.molecular_search_settings.use_min_peaks_filter = False + mass_spectrum_obj.molecular_search_settings.use_isotopologue_filter = False + + # Process the mass spectrum object + mass_spectrum_obj.process_mass_spec() + + # Run the molecular formula search on the mass spectrum object + SearchMolecularFormulas( + mass_spectrum_obj, find_isotopologues=True + ).run_worker_ms_peaks([mass_spectrum_obj[0]]) + assert mass_spectrum_obj.to_dataframe().shape[0] > 1 + assert mass_spectrum_obj[0][0].string == "C56 H73 N1" + assert mass_spectrum_obj[0][0].H_C == 73 / 56 + assert mass_spectrum_obj[1][0].string == "C55 H73 N1 13C1" + assert mass_spectrum_obj[1][0].H_C == 73 / 56 + + +def test_mspeak_search(mass_spectrum_ftms): + mass_spec_obj = mass_spectrum_ftms + mass_spec_obj.molecular_search_settings.url_database = "" + mass_spec_obj.molecular_search_settings.usedAtoms = { + "C": (1, 100), + "H": (4, 200), + "O": (0, 10), + "N": (0, 1), + "P": (0, 1), + } + mass_spec_obj.molecular_search_settings.isAdduct = False + mass_spec_obj.molecular_search_settings.isRadical = False + mspeak_obj = mass_spec_obj.most_abundant_mspeak + SearchMolecularFormulas(mass_spec_obj).run_worker_ms_peaks([mspeak_obj]) + assert mspeak_obj.is_assigned + # Try each of the possible filters + mspeak_obj.molecular_formula_earth_filter() + mspeak_obj.molecular_formula_water_filter() + mspeak_obj.molecular_formula_air_filter() + mspeak_obj.cia_score_S_P_error() + mspeak_obj.cia_score_N_S_P_error() + assert mspeak_obj.best_molecular_formula_candidate.string == "C29 H11 O2 P1" + mspeak_obj[0].string_formated + mspeak_obj[0].mz_error + + +def test_molecular_formula_search_db(mass_spectrum_ftms): + mass_spec_obj = mass_spectrum_ftms + mass_spec_obj.molecular_search_settings.url_database = "" + mass_spec_obj.molecular_search_settings.usedAtoms = { + "C": (1, 100), + "H": (4, 200), + "O": (0, 10), + "N": (0, 1), + "P": (0, 1), + } + + SearchMolecularFormulas(mass_spec_obj, first_hit=True).run_worker_mass_spectrum() + + i = 0 + j = 0 + error = list() + mass = list() + abundance = list() + + for mspeak in mass_spec_obj.sort_by_abundance(): + if mspeak.is_assigned: + i += 1 + for mformula in mspeak: + mass.append(mspeak.mz_exp) + error.append(mformula.mz_error) + abundance.append(mspeak.abundance) + else: + j += 1 + pass + fraction_assigned = i / (i + j) + assert fraction_assigned > 0.7 + + +def test_priorityAssignment(mass_spectrum_ftms): + mass_spectrum_ftms.molecular_search_settings.url_database = "" + mass_spectrum_ftms.molecular_search_settings.error_method = "None" + mass_spectrum_ftms.molecular_search_settings.min_ppm_error = -3 + mass_spectrum_ftms.molecular_search_settings.max_ppm_error = 5 + mass_spectrum_ftms.molecular_search_settings.mz_error_range = 1 + mass_spectrum_ftms.molecular_search_settings.isProtonated = True + mass_spectrum_ftms.molecular_search_settings.isRadical = True + mass_spectrum_ftms.molecular_search_settings.isAdduct = False + usedatoms = {"C": (1, 100), "H": (4, 200), "O": (1, 10)} + mass_spectrum_ftms.molecular_search_settings.usedAtoms = usedatoms + mass_spectrum_ftms.process_mass_spec() + + # Run the molecular formula search on the mass spectrum object and check the percentage of assigned peaks + assignOx = OxygenPriorityAssignment(mass_spectrum_ftms) + assignOx.run() + assert mass_spectrum_ftms.percentile_assigned()[0] > 15 + + # Test the HeteroatomsClassification class + mass_spectrum_by_classes = HeteroatomsClassification(mass_spectrum_ftms) + mass_spectrum_by_classes.plot_ms_assigned_unassigned() + assert mass_spectrum_by_classes.atoms_ratio_all("H", "C")[0] > 0.5 diff --git a/tests/test_mspeak.py b/tests/test_mspeak.py index 1520e6c74..ea34a4377 100644 --- a/tests/test_mspeak.py +++ b/tests/test_mspeak.py @@ -1,37 +1,17 @@ - -import pytest import sys -from pathlib import Path -sys.path.append(".") +sys.path.append(".") from corems.ms_peak.factory.MSPeakClasses import ICRMassPeak -from corems.transient.input.brukerSolarix import ReadBrukerSolarix - -__author__ = "Yuri E. Corilo" -__date__ = "Jul 02, 2019" - -def test_mspeaks_fit(): - - from matplotlib import pyplot - file_location = Path.cwd() / "tests/tests_data/ftms/ESI_NEG_SRFA.d/" - bruker_reader = ReadBrukerSolarix(file_location) - bruker_transient = bruker_reader.get_transient() +def test_mspeaks_fit(mass_spectrum_ftms): + mass_spectrum_ftms[3].plot_simulation() + mass_spectrum_ftms[3].plot_simulation(sim_type="gaussian", oversample_multiplier=10) + mass_spectrum_ftms[3].plot_simulation() - mass_spectrum_obj = bruker_transient.get_mass_spectrum(plot_result=False, auto_process=True) - i = 0 - - mass_spectrum_obj[3].plot_simulation() - mass_spectrum_obj[3].plot_simulation(sim_type="gaussian", oversample_multiplier=10) - mass_spectrum_obj[3].plot_simulation() - - # pyplot.show() def test_mspeak_calculations(): - - kendrick_base = {'C': 1, 'H': 2} - + kendrick_base = {"C": 1, "H": 2} ion_charge = +1 mz_exp = 212.1234 abundance = 200 @@ -39,8 +19,16 @@ def test_mspeak_calculations(): signal_to_noise = 200 massspec_index = (300, 300, 300) index = 1 - mspeak = ICRMassPeak(ion_charge, mz_exp, abundance, - resolving_power, signal_to_noise, massspec_index, index) + mspeak = ICRMassPeak( + ion_charge, + mz_exp, + abundance, + resolving_power, + signal_to_noise, + massspec_index, + index, + ) + assert mspeak.resolving_power == 1000000 mspeak.change_kendrick_base(kendrick_base) @@ -50,12 +38,8 @@ def test_mspeak_calculations(): mspeak.plot() assert round(mspeak.kendrick_mass, 3) == 211.887 - print(round(mspeak.kmd * 100, 0)) == -89 + assert round(mspeak.kmd * 100, 0) == -89 assert mspeak.knm == 211 mspeak.set_calc_resolving_power(50, 3) - -if __name__ == '__main__': - - # test_mspeaks_fit() - test_mspeak_calculations() + assert round(mspeak.resolving_power, 0) == 9008907 diff --git a/tests/test_output.py b/tests/test_output.py index 7ce7fba08..760e19662 100644 --- a/tests/test_output.py +++ b/tests/test_output.py @@ -1,91 +1,59 @@ - import sys -from pathlib import Path sys.path.append(".") +import os -import pytest - -from corems.mass_spectra.output.export import HighResMassSpectraExport from corems.mass_spectrum.output.export import HighResMassSpecExport -from corems.mass_spectrum.input.massList import ReadCoremsMasslist -from corems.mass_spectra.input.boosterHDF5 import ReadHDF_BoosterMassSpectra -from corems.encapsulation.factory.parameters import MSParameters - -def import_corems_mass_list(): - - file_location = Path.cwd() / "tests/tests_data/ftms/" / "ESI_NEG_SRFA_COREMS.csv" - - # polarity need to be set or read from the file - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' - MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 1 - - # load any type of mass list file, change the delimeter to read another type of file, i.e : "," for csv, "\t" for tabulated mass list, etc - mass_list_reader = ReadCoremsMasslist(file_location) - - mass_spectrum = mass_list_reader.get_mass_spectrum() - - return mass_spectrum - -def import_booster_mass_spectra_hdf(): - - file_path = Path.cwd() / "tests/tests_data/ftms/" / "ESFA_100k_9767-13548_chB.A_re_pc_CoAddAll_mFT.h5" - - if file_path.exists(): - # polarity need to be set or read from the file - booster_reader = ReadHDF_BoosterMassSpectra(file_path) - - booster_reader.start() - booster_reader.join() - - return booster_reader.get_lcms_obj() - - -def test_export_mass_spectra(): - mass_spectra = import_booster_mass_spectra_hdf() +def test_export_mass_spectrum(mass_spectrum_ftms): - exportMS = HighResMassSpectraExport('NEG_ESI_SRFA_CoreMS', mass_spectra) - - exportMS.get_mass_spectra_attrs(mass_spectra) - exportMS.get_pandas_df() - exportMS.to_pandas() - exportMS.to_excel() - exportMS.to_csv() - exportMS.to_hdf() - - -def test_export_mass_spectrum(): - - mass_spectrum = import_corems_mass_list() - - exportMS = HighResMassSpecExport('NEG_ESI_SRFA_CoreMS', mass_spectrum) - - # exportMS.to_pandas() - # exportMS.to_excel() - # exportMS.to_csv() - # exportMS.to_hdf() + exportMS = HighResMassSpecExport('NEG_ESI_SRFA_CoreMS', mass_spectrum_ftms) exportMS._output_type = 'excel' exportMS.save() + # Check that the file was created and remove it + assert os.path.exists('NEG_ESI_SRFA_CoreMS.xlsx') + os.remove('NEG_ESI_SRFA_CoreMS.xlsx') + os.remove('NEG_ESI_SRFA_CoreMS.json') + exportMS._output_type = 'csv' exportMS.save() + assert os.path.exists('NEG_ESI_SRFA_CoreMS.csv') + os.remove('NEG_ESI_SRFA_CoreMS.csv') + os.remove('NEG_ESI_SRFA_CoreMS.json') + exportMS._output_type = 'pandas' exportMS.save() + assert os.path.exists('NEG_ESI_SRFA_CoreMS.pkl') + os.remove('NEG_ESI_SRFA_CoreMS.pkl') + os.remove('NEG_ESI_SRFA_CoreMS.json') + exportMS._output_type = 'hdf5' exportMS.save() - exportMS.get_pandas_df() - exportMS.to_json() - - mass_spectrum.to_excel('NEG_ESI_SRFA_CoreMS') - mass_spectrum.to_dataframe() - - mass_spectrum.molecular_search_settings.output_score_method = "prob_score" - mass_spectrum.to_csv('NEG_ESI_SRFA_CoreMS_prob_score') - - mass_spectrum.to_json() - mass_spectrum.to_pandas('NEG_ESI_SRFA_CoreMS') - -if __name__ == "__main__": - - test_export_mass_spectra() - #test_export_mass_spectrum() + assert os.path.exists('NEG_ESI_SRFA_CoreMS.hdf5') + os.remove('NEG_ESI_SRFA_CoreMS.hdf5') + + df = exportMS.get_pandas_df() + assert df.shape[0] > 10 + json_dump1 = exportMS.to_json() + + mass_spectrum_ftms.to_excel('NEG_ESI_SRFA_CoreMS') + assert os.path.exists('NEG_ESI_SRFA_CoreMS.xlsx') + os.remove('NEG_ESI_SRFA_CoreMS.xlsx') + os.remove('NEG_ESI_SRFA_CoreMS.json') + df = mass_spectrum_ftms.to_dataframe() + assert df.shape[0] > 10 + + json_dump = mass_spectrum_ftms.to_json() + assert len(json_dump) > 10 + assert json_dump1 == json_dump + + mass_spectrum_ftms.molecular_search_settings.output_score_method = "prob_score" + mass_spectrum_ftms.to_csv('NEG_ESI_SRFA_CoreMS_prob_score') + assert os.path.exists('NEG_ESI_SRFA_CoreMS_prob_score.csv') + os.remove('NEG_ESI_SRFA_CoreMS_prob_score.csv') + os.remove('NEG_ESI_SRFA_CoreMS_prob_score.json') + + mass_spectrum_ftms.to_pandas('NEG_ESI_SRFA_CoreMS') + assert os.path.exists('NEG_ESI_SRFA_CoreMS.pkl') + os.remove('NEG_ESI_SRFA_CoreMS.pkl') + os.remove('NEG_ESI_SRFA_CoreMS.json') \ No newline at end of file diff --git a/tests/test_searchMassListRef.py b/tests/test_searchMassListRef.py deleted file mode 100644 index 32766a834..000000000 --- a/tests/test_searchMassListRef.py +++ /dev/null @@ -1,50 +0,0 @@ - -import os, sys -from pathlib import Path -sys.path.append(".") - -from corems.molecular_formula.input.masslist_ref import ImportMassListRef -from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas -from corems.encapsulation.constant import Labels -from corems.encapsulation.factory.parameters import MSParameters - -import pytest - -__author__ = "Yuri E. Corilo" -__date__ = "Jul 02, 2019" - -def get_mass_spectrum(): - - from corems.mass_spectrum.input.massList import ReadMassList - - file_location = Path.cwd() / "tests/tests_data/ftms/" / "ESI_NEG_ESFA.ascii" - - #polarity needs to be set or read from the file - - polarity = -1 - - return ReadMassList(file_location).get_mass_spectrum(polarity, auto_process=True) - -def test_search_imported_ref_files(): - MSParameters.mass_spectrum.noise_threshold_method = 'relative_abundance' - MSParameters.mass_spectrum.noise_threshold_min_relative_abundance = 1 - mass_spectrum_obj = get_mass_spectrum() - mass_spectrum_obj.parameters.molecular_search.url_database = '' - - ref_file_location = os.path.join(os.getcwd(), os.path.normcase("tests/tests_data/ftms/")) + "SRFA.ref" - - mf_references_list = ImportMassListRef(ref_file_location).from_bruker_ref_file() - - for mf in mf_references_list: - - print(mf.mz_calc, mf.class_label) - - ion_type = 'unknown' - - ms_peaks_assigned = SearchMolecularFormulas(mass_spectrum_obj).search_mol_formulas( mf_references_list, ion_type, neutral_molform=False, find_isotopologues=False) - - assert (len(ms_peaks_assigned)) > 0 - -if __name__ == '__main__': - - test_search_imported_ref_files() \ No newline at end of file diff --git a/tests/test_search_mass_list.py b/tests/test_search_mass_list.py new file mode 100644 index 000000000..9573f25f5 --- /dev/null +++ b/tests/test_search_mass_list.py @@ -0,0 +1,22 @@ +import sys + +sys.path.append(".") + +from corems.molecular_formula.input.masslist_ref import ImportMassListRef +from corems.molecular_id.search.molecularFormulaSearch import SearchMolecularFormulas + +def test_search_imported_ref_files(mass_spectrum_ftms, ref_file_location): + mass_spectrum_obj = mass_spectrum_ftms + mass_spectrum_obj.molecular_search_settings.url_database = "" + mf_references_list = ImportMassListRef(ref_file_location).from_bruker_ref_file() + assert len(mf_references_list) == 60 + assert round(mf_references_list[0].mz_calc, 2) == 149.06 + assert mf_references_list[0].class_label == "O2" + + ion_type = "unknown" + + ms_peaks_assigned = SearchMolecularFormulas(mass_spectrum_obj).search_mol_formulas( + mf_references_list, ion_type, neutral_molform=False, find_isotopologues=False + ) + + assert (len(ms_peaks_assigned)) > 10 diff --git a/tests/test_setting_settings.py b/tests/test_setting_settings.py index e75fbef4d..5b51328a9 100644 --- a/tests/test_setting_settings.py +++ b/tests/test_setting_settings.py @@ -1,36 +1,50 @@ import sys +import os sys.path.append(".") -from corems.encapsulation.input import parameter_from_json from corems.encapsulation.output import parameter_to_json, parameter_to_dict from corems.encapsulation.factory.processingSetting import MolecularLookupDictSettings def test_toml(): parameter_to_json.dump_all_settings_toml() + assert os.path.exists('SettingsCoreMS.toml') + os.remove('SettingsCoreMS.toml') + parameter_to_json.dump_gcms_settings_toml() + assert os.path.exists('SettingsCoreMS.toml') + os.remove('SettingsCoreMS.toml') + parameter_to_json.dump_ms_settings_toml() + assert os.path.exists('SettingsCoreMS.toml') + os.remove('SettingsCoreMS.toml') def test_json(): parameter_to_json.dump_all_settings_json() + assert os.path.exists('SettingsCoreMS.json') + os.remove('SettingsCoreMS.json') + parameter_to_json.dump_gcms_settings_json() + assert os.path.exists('SettingsCoreMS.json') + os.remove('SettingsCoreMS.json') + parameter_to_json.dump_ms_settings_json() + assert os.path.exists('SettingsCoreMS.json') + os.remove('SettingsCoreMS.json') def test_data(): - parameter_to_dict.get_dict_ms_default_data() - parameter_to_dict.get_dict_gcms_default_data() + param_dict = parameter_to_dict.get_dict_ms_default_data() + assert len(param_dict) > 4 + param_dict = parameter_to_dict.get_dict_gcms_default_data() + assert len(param_dict) > 1 def test_settings_search(): - test = MolecularLookupDictSettings() - test.usedAtoms['C'] = (0,0) - test.url_database = 'test' + test = MolecularLookupDictSettings().__dict__ + assert len(test) > 14 + assert "usedAtoms" in test + assert "url_database" in test -if __name__ == "__main__": - - test_json() - test_data() - test_settings_search() \ No newline at end of file diff --git a/tests/win_only/__init__.py b/tests/win_only/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/win_only/test_rawFileReader.py b/tests/win_only/test_rawFileReader.py deleted file mode 100644 index 8cba286a4..000000000 --- a/tests/win_only/test_rawFileReader.py +++ /dev/null @@ -1,308 +0,0 @@ -import sys -import clr - -sys.path.append("ext_lib") - -clr.AddReference("ThermoFisher.CommonCore.RawFileReader") -clr.AddReference("ThermoFisher.CommonCore.Data") -clr.AddReference("ThermoFisher.CommonCore.MassPrecisionEstimator") - -from matplotlib import pyplot - -from ThermoFisher.CommonCore.RawFileReader import RawFileReaderAdapter -from ThermoFisher.CommonCore.Data import ToleranceUnits, Extensions - -from ThermoFisher.CommonCore.Data.Business import Scan -from ThermoFisher.CommonCore.Data.Business import Device -from ThermoFisher.CommonCore.Data.Business import MassOptions -from ThermoFisher.CommonCore.MassPrecisionEstimator import PrecisionEstimate -from System.Collections.Generic import List -from System import String - -def getRawFile(filename): - - rawFile = RawFileReaderAdapter.FileFactory(filename) - - print("The RAW file has data from {0} instruments".format(rawFile.InstrumentCount)) - - rawFile.SelectInstrument(Device.MS, 1) - - return rawFile - -def readAllSpectra( rawFile, firstScanNumber:int, lastScanNumber:int, outputData:bool): - - for scanNumber in range(firstScanNumber,lastScanNumber+1): - - scanFilter = rawFile.GetFilterForScanNumber(firstScanNumber) - - print( scanFilter.ToString() ) - - # Get the scan from the RAW file. This method uses the Scan.FromFile method which returns a - # Scan object that contains both the segmented and centroid (label) data from an FTMS scan - # or just the segmented data in non-FTMS scans. The GetSpectrum method demonstrates an - # alternative method for reading scans. - - scan = Scan.FromFile(rawFile, scanNumber) - - # If that scan contains FTMS data then Centroid stream will be populated so check to see if it is present. - labelSize = 0 - - if scan.HasCentroidStream: - - labelSize = scan.CentroidScan.Length - - - # for non-FTMS data, the preferred data will be populated - dataSize = scan.PreferredMasses.Length - - if outputData: - - print("Spectrum {0} - {1}: normal {2}, label {3} points".format(scanNumber, scanFilter.ToString(), dataSize, labelSize) ) - -def AnalyzeAllScans(rawFile, firstScanNumber:int, lastScanNumber:int): - - # Test the preferred (normal) data and centroid (high resolution/label) data - failedCentroid = 0 - failedPreferred = 0 - - for scanNumber in range(firstScanNumber,lastScanNumber+1): - # Get each scan from the RAW file - scan = Scan.FromFile(rawFile, scanNumber) - - # Check to see if the RAW file contains label (high-res) data and if it is present - # then look for any data that is out of order - if scan.HasCentroidStream: - - if scan.CentroidScan.Length > 0: - - currentMass = scan.CentroidScan.Masses[0] - - for index in range(1, scan.CentroidScan.Length): - - if scan.CentroidScan.Masses[index] > currentMass: - - currentMass = scan.CentroidScan.Masses[index] - - else: - - if failedCentroid == 0: - - print("First failure: Failed in scan data at: Scan: " + scanNumber + " Mass: " - + currentMass.ToString("F4")) - - - failedCentroid += 1 - - - # Check the normal (non-label) data in the RAW file for any out-of-order data - if scan.PreferredMasses.Length > 0: - - currentMass = scan.PreferredMasses[0] - - # print(scan.PreferredMasses.Length, scan.CentroidScan.Length, scan.SegmentedScan.Positions.Length) - - for index in range(1, scan.PreferredMasses.Length): - - if scan.PreferredMasses[index] > currentMass: - - currentMass = scan.PreferredMasses[index] - - else: - - if (failedPreferred == 0): - - print("First failure: Failed in scan data at: Scan: " + str(scanNumber) + " Mass: " - + currentMass.ToString("F2")) - - failedPreferred += 1 - - # Display a message indicating if any of the scans had data that was "out of order" - if failedPreferred == 0 and failedCentroid == 0: - - print("Analysis completed: No out of order data found") - - else: - - print("Analysis completed: Preferred data failed: " + str(failedPreferred) + " Centroid data failed: " + str(failedCentroid) ) - - -def CalculateMassPrecision(rawFile, scanNumber:int): - - # Get the scan from the RAW file - scan = Scan.FromFile(rawFile, scanNumber) - - # Get the scan event and from the scan event get the analyzer type for this scan - scanEvent = rawFile.GetScanEventForScanNumber(scanNumber) - - scanFilter = rawFile.GetFilterForScanNumber(scanNumber) - - print(scanFilter.MassAnalyzer) - print(scanEvent) - - # Get the trailer extra data to get the ion time for this file - logEntry = rawFile.GetTrailerExtraInformation(scanNumber) - - print(logEntry.Labels) - - trailerHeadings = List[String]() - trailerValues = List[String]() - for i in range(logEntry.Length): - - trailerHeadings.Add(String(logEntry.Labels[i])) - trailerValues.Add(String(logEntry.Values[i])) - - # create the mass precision estimate object - precisionEstimate = PrecisionEstimate() - - # Get the ion time from the trailer extra data values - ionTime = precisionEstimate.GetIonTime(scanFilter.MassAnalyzer, scan, trailerHeadings, trailerValues) - - # Calculate the mass precision for the scan - listResults = precisionEstimate.GetMassPrecisionEstimate(scan, scanFilter.MassAnalyzer, ionTime, rawFile.RunHeader.MassResolution) - - # Output the mass precision results - if len(listResults) > 0: - - print("Mass Precision Results:") - - for result in listResults: - - print("Mass {}, mmu = {}, ppm = {}".format(result.Mass, result.MassAccuracyInMmu, result.MassAccuracyInPpm) ) - - -def GetAverageSpectrum(rawFile, firstScanNumber:int, lastScanNumber:int, outputData:bool): - - # Create the mass options object that will be used when averaging the scans - options = MassOptions() - - options.ToleranceUnits = ToleranceUnits.ppm - options.Tolerance = 5.0 - - # Get the scan filter for the first scan. This scan filter will be used to located - # scans within the given scan range of the same type - scanFilter = rawFile.GetFilterForScanNumber(firstScanNumber) - - print(scanFilter.ScanMode) - - # Get the average mass spectrum for the provided scan range. In addition to getting the - # average scan using a scan range, the library also provides a similar method that takes - # a time range. - averageScan = Extensions.AverageScansInScanRange(rawFile, firstScanNumber, lastScanNumber, scanFilter, options) - - #average= ScanAveragerFactory.GetScanAverager(rawFile) - - #averageScan = rawFile.AverageScansInScanRange(firstScanNumber, lastScanNumber, scanFilter, options) - if averageScan.HasCentroidStream: - - print("Average spectrum ({0} points)".format( averageScan.CentroidScan.Length) ) - - # Print the spectral data (mass, intensity values) - #if outputData: - - # for i in range(averageScan.CentroidScan.Length): - - # print(" {}\t{}".format(averageScan.CentroidScan.Masses[i], averageScan.CentroidScan.Intensities[i])) - - # This example uses a different method to get the same average spectrum that was calculated in the - # previous portion of this method. Instead of passing the start and end scan, a list of scans will - # be passed to the GetAveragedMassSpectrum function. - scans = List[int]() - for scan in (1, 6, 7, 9, 11, 12, 14): - scans.Add(scan) - - averageScan = Extensions.AverageScans(rawFile,scans, options) - - len_data = averageScan.SegmentedScan.Positions.Length - - mz_list = list(averageScan.SegmentedScan.Positions) - abund_list = list(averageScan.SegmentedScan.Intensities) - - #for i in range(len_data): - - # mz_list.append(averageScan.SegmentedScan.Positions[i]) - - # abund_list.append(averageScan.SegmentedScan.Intensities[i]) - - pyplot.plot(mz_list, abund_list) - #pyplot.show() - - centroid_mz_list = [] - abundance_mz_list = [] - if averageScan.HasCentroidStream: - - print("Average spectrum ({0} points)".format(averageScan.CentroidScan.Length)) - - # Print the spectral data (mass, intensity values) - if outputData: - - for i in range(averageScan.CentroidScan.Length): - centroid_mz_list.append(averageScan.CentroidScan.Masses[i]) - averageScan.CentroidScan.Resolutions - abundance_mz_list.append(averageScan.CentroidScan.Intensities[i]) - #print(" {}\t{}".format(averageScan.CentroidScan.Masses[i], averageScan.CentroidScan.Intensities[i]) ) - - pyplot.plot(centroid_mz_list, abundance_mz_list, linewidth=0, marker='o' ) - pyplot.show() - - print() - - -def get_metadata(rawFile): - - firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum - lastScanNumber = rawFile.RunHeaderEx.LastSpectrum - - startTime = rawFile.RunHeaderEx.StartTime - endTime = rawFile.RunHeaderEx.EndTime - - print() - print("General File Information:") - print(" RAW file: " + str( rawFile.FileName) ) - print(" RAW file version: " + str( rawFile.FileHeader.Revision)) - print(" Creation date: " + str( rawFile.FileHeader.CreationDate)) - print(" Operator: " + str( rawFile.FileHeader.WhoCreatedId)) - print(" Number of instruments: " + str( rawFile.InstrumentCount)) - print(" Description: " + str( rawFile.FileHeader.FileDescription)) - print(" Instrument model: " + str( rawFile.GetInstrumentData().Model)) - print(" Instrument name: " + str( rawFile.GetInstrumentData().Name)) - print(" Serial number: " + str( rawFile.GetInstrumentData().SerialNumber)) - print(" Software version: " + str( rawFile.GetInstrumentData().SoftwareVersion)) - print(" Firmware version: " + str( rawFile.GetInstrumentData().HardwareVersion)) - print(" Units: " + str( rawFile.GetInstrumentData().Units)) - print(" Mass resolution: {} ".format(rawFile.RunHeaderEx.MassResolution) ) - print(" Number of scans: {}".format( rawFile.RunHeaderEx.SpectraCount) ) - print(" Scan range: {} - {}".format( firstScanNumber, lastScanNumber) ) - print(" Time range: {} - {}".format( startTime, endTime) ) - print(" Mass range: {} - {}".format( rawFile.RunHeaderEx.LowMass, rawFile.RunHeaderEx.HighMass) ) - print() - - #Get information related to the sample that was processed - print("Sample Information:") - print(" Sample name: " + str( rawFile.SampleInformation.SampleName)) - print(" Sample id: " + str( rawFile.SampleInformation.SampleId)) - print(" Sample type: " + str( rawFile.SampleInformation.SampleType)) - print(" Sample comment: " + str( rawFile.SampleInformation.Comment)) - print(" Sample vial: " + str( rawFile.SampleInformation.Vial)) - print(" Sample volume: " + str( rawFile.SampleInformation.SampleVolume)) - print(" Sample injection volume: " + str( rawFile.SampleInformation.InjectionVolume)) - print(" Sample row number: " + str( rawFile.SampleInformation.RowNumber)) - print(" Sample dilution factor: " + str( rawFile.SampleInformation.DilutionFactor)) - -if __name__ == "__main__": - - filename = "tests/tests_data/ftms/Exploris_SRFA_Example.raw" - - rawFile = getRawFile(filename) - - get_metadata(rawFile) - - firstScanNumber = rawFile.RunHeaderEx.FirstSpectrum - lastScanNumber = rawFile.RunHeaderEx.LastSpectrum - - # readAllSpectra(rawFile, firstScanNumber, lastScanNumber, True) - - #AnalyzeAllScans(rawFile, firstScanNumber, lastScanNumber) - - # CalculateMassPrecision(rawFile, firstScanNumber) - - GetAverageSpectrum(rawFile, firstScanNumber, lastScanNumber, True) \ No newline at end of file