Skip to content

Commit

Permalink
Merge branch 'centroid_adduct_fix' into 'master'
Browse files Browse the repository at this point in the history
Centroid handling and adduct reporting fix

Closes #123 and #125

See merge request mass-spectrometry/corems!82
  • Loading branch information
corilo committed Mar 20, 2024
2 parents 6136b42 + c107426 commit e02f8b3
Show file tree
Hide file tree
Showing 26 changed files with 11,442 additions and 10,196 deletions.
21 changes: 21 additions & 0 deletions NEG_ESI_SRFA_CoreMS_prob_score.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
Index,m/z,Calibrated m/z,Calculated m/z,Peak Height,Peak Area,Resolving Power,S/N,Ion Charge,m/z Error (ppm),m/z Error Score,Isotopologue Similarity,Confidence Score,DBE,H/C,O/C,Heteroatom Class,Ion Type,Adduct,Is Isotopologue,Mono Isotopic Index,Molecular Formula,C,H,O,N
0,576.0751562737335,576.0751562737335,576.0756831376291,2019730.8188135864,nan,159720.69595125286,8.604138304064543,-1,-0.9145740933897943,0.8591841639497192,0.0,0.5155104983698315,16.0,0.8,0.64,O16 -R,radical,,0,,C25 H20 O16,25,20,16,
1,585.0773168347159,585.0773168347159,584.069022064519,1746260.2046435073,nan,275209.4679144153,7.43914197658413,-1,1726.3281086760753,0.0,0.0,0.0,35.5,0.3170731707317073,0.12195121951219512,O5,de-protonated,,0,,C41 H13 O5,41,13,5,
2,593.0820452356501,593.0820452356501,592.074107444949,6469034.8640819015,nan,180997.3119366648,27.55836082011744,-1,1702.3845123893364,0.0,0.0,0.0,37.5,0.3023255813953488,0.09302325581395349,O4,de-protonated,,0,,C43 H13 O4,43,13,4,
3,597.0754291924612,597.0754291924612,597.076017490259,2068238.426915153,nan,134839.10894778575,8.810782756393296,-1,-0.9852979864257353,0.8384900332430503,0.0,0.5030940199458301,19.0,0.7037037037037037,0.5555555555555556,N1 O15 -R,radical,,0,,C27 H19 O15 N1,27,19,15,1
4,605.0809297183079,605.0809297183079,605.081102870689,4934544.835880899,nan,152063.12451789412,21.02136870915638,-1,-0.2861639214521796,0.9852510523460122,0.0,0.5911506314076073,21.0,0.6551724137931034,0.4827586206896552,N1 O14 -R,radical,,0,,C29 H19 O14 N1,29,19,14,1
5,567.0647395512881,567.0647395512881,,3868906.6064003,nan,162257.55310330895,16.481704995981246,-1,,,,,,,,unassigned,,,,,,,,,
6,567.0663087811229,567.0663087811229,,5463881.346367143,nan,162258.1245318804,23.276364525029468,-1,,,,,,,,unassigned,,,,,,,,,
7,575.0713087215903,575.0713087215903,,9739433.203502472,nan,186665.9786183644,41.49039540593753,-1,,,,,,,,unassigned,,,,,,,,,
8,580.0696581879541,580.0696581879541,,1759661.6215911638,nan,185057.4786056693,7.496232576882939,-1,,,,,,,,unassigned,,,,,,,,,
9,583.0745286807011,583.0745286807011,,3341444.3943076613,nan,157802.2673899728,14.234693770159195,-1,,,,,,,,unassigned,,,,,,,,,
10,583.0764232748777,583.0764232748777,,4624182.129382659,nan,157802.6959610134,19.69921051547032,-1,,,,,,,,unassigned,,,,,,,,,
11,587.0717089529447,587.0717089529447,,9364333.081322271,nan,156728.69594017448,39.89245309647384,-1,,,,,,,,unassigned,,,,,,,,,
12,588.0748249834461,588.0748249834461,,2102297.732396686,nan,273806.7179139468,8.955876831392676,-1,,,,,,,,unassigned,,,,,,,,,
13,595.0748319653359,595.0748319653359,,2248638.4360681637,nan,135292.60896146248,9.57929438890816,-1,,,,,,,,unassigned,,,,,,,,,
14,595.0769607848,595.0769607848,,2614726.533038756,nan,135293.10896192447,11.138845091638458,-1,,,,,,,,unassigned,,,,,,,,,
15,597.0775112334587,597.0775112334587,,2624788.5554853342,nan,154102.55308819498,11.18170973079855,-1,,,,,,,,unassigned,,,,,,,,,
16,601.0851016849045,601.0851016849045,,3264511.328060631,nan,133939.60894898468,13.906955669626551,-1,,,,,,,,unassigned,,,,,,,,,
17,605.0824960183015,605.0824960183015,,5173077.227051935,nan,152063.55308892002,22.03752673601681,-1,,,,,,,,unassigned,,,,,,,,,
18,613.0848231619533,613.0848231619533,,2643269.346562413,nan,116727.207961776,11.26043867869326,-1,,,,,,,,unassigned,,,,,,,,,
19,613.0872526006148,613.0872526006148,,4497043.9691487,nan,350181.62381927815,19.157596601285515,-1,,,,,,,,unassigned,,,,,,,,,
201 changes: 201 additions & 0 deletions NEG_ESI_SRFA_CoreMS_prob_score.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
{
"MassSpecAttrs": {
"Aterm": null,
"Bterm": null,
"Cterm": null,
"baseline_noise": 1746260.2046435073,
"baseline_noise_std": 0.0,
"mobility_rt": 0,
"mobility_scan": 0,
"polarity": -1,
"rt": 0,
"tic": 80568004.66102132
},
"MassSpecPeak": {
"implemented_kendrick_rounding_methods": [
"floor",
"ceil",
"round"
],
"kendrick_base": {
"C": 1,
"H": 2
},
"kendrick_rounding_method": "floor",
"legacy_resolving_power": true,
"min_peak_datapoints": 5.0,
"peak_derivative_threshold": 0.0,
"peak_height_max_percent": 10.0,
"peak_max_prominence_percent": 0.1,
"peak_min_prominence_percent": 0.1
},
"MassSpectrum": {
"calib_minimize_method": "Powell",
"calib_pol_order": 2,
"calib_sn_threshold": 2.0,
"do_calibration": true,
"max_calib_ppm_error": 1.0,
"max_picking_mz": 1200.0,
"min_calib_ppm_error": -1.0,
"min_picking_mz": 50.0,
"noise_max_mz": 1200.0,
"noise_min_mz": 50.0,
"noise_threshold_absolute_abundance": 1000000.0,
"noise_threshold_log_nsigma": 6,
"noise_threshold_log_nsigma_bins": 500,
"noise_threshold_log_nsigma_corr_factor": 0.463,
"noise_threshold_method": "relative_abundance",
"noise_threshold_methods_implemented": [
"minima",
"signal_noise",
"relative_abundance",
"absolute_abundance",
"log"
],
"noise_threshold_min_relative_abundance": 1,
"noise_threshold_min_s2n": 4.0,
"noise_threshold_min_std": 6,
"picking_point_extrapolate": 3
},
"MolecularFormulaSearch": {
"adduct_atoms_neg": [
"Cl",
"Br"
],
"adduct_atoms_pos": [
"Na",
"K"
],
"db_chunk_size": 300,
"db_jobs": 3,
"error_method": "None",
"ion_charge": -1,
"ionization_type": "ESI",
"isAdduct": false,
"isProtonated": true,
"isRadical": false,
"isotopologue_filter_atoms": [
"Cl",
"Br"
],
"isotopologue_filter_threshold": 33.0,
"isotopologue_score_weight": 0.4,
"max_abun_error": 100.0,
"max_dbe": 40.0,
"max_hc_filter": 3.0,
"max_oc_filter": 1.2,
"max_ppm_error": 10.0,
"min_abun_error": -100.0,
"min_dbe": 0.0,
"min_hc_filter": 0.3,
"min_oc_filter": 0.0,
"min_op_filter": 2.0,
"min_peaks_per_class": 15,
"min_ppm_error": -10.0,
"mz_error_average": 0.0,
"mz_error_range": 1.5,
"mz_error_score_weight": 0.6,
"output_min_score": 0.1,
"output_score_method": "prob_score",
"score_method": "prob_score",
"score_methods": [
"S_P_lowest_error",
"N_S_P_lowest_error",
"lowest_error",
"prob_score",
"air_filter_error",
"water_filter_error",
"earth_filter_error"
],
"url_database": "postgresql+psycopg2://coremsappdb:coremsapppnnl@molformdb:5432/coremsapp",
"use_isotopologue_filter": false,
"use_min_peaks_filter": true,
"use_pah_line_rule": false,
"use_runtime_kendrick_filter": false,
"usedAtoms": {
"C": [
1,
100
],
"H": [
1,
200
]
},
"used_atom_valences": {
"13C": 4,
"Ac": 3,
"Ag": 0,
"Al": 3,
"As": 5,
"At": 1,
"Au": 3,
"B": 4,
"Ba": 2,
"Be": 2,
"Bi": 3,
"Br": 1,
"C": 4,
"Ca": 2,
"Cd": 2,
"Cl": 1,
"Co": 1,
"Cr": 1,
"Cs": 1,
"Cu": 2,
"F": 1,
"Fe": 3,
"Fr": 1,
"Ga": 3,
"Ge": 4,
"H": 1,
"Hf": 4,
"Hg": 1,
"I": 1,
"In": 3,
"Ir": 4,
"K": 1,
"La": 3,
"Li": 1,
"Mg": 2,
"Mn": 1,
"Mo": 6,
"N": 3,
"Na": 1,
"Nb": 5,
"Ni": 1,
"O": 2,
"Os": 4,
"P": 3,
"Pb": 4,
"Pd": 4,
"Po": 2,
"Pt": 4,
"Ra": 2,
"Rb": 1,
"Re": 4,
"Rh": 6,
"Ru": 8,
"S": 2,
"Sb": 5,
"Sc": 3,
"Se": 6,
"Si": 4,
"Sn": 4,
"Sr": 2,
"Ta": 5,
"Tc": 7,
"Te": 6,
"Ti": 4,
"Tl": 3,
"V": 5,
"W": 6,
"Y": 3,
"Zn": 2,
"Zr": 4
}
},
"analyzer": "Unknown",
"instrument_label": "Unknown",
"sample_name": "ESI_NEG_SRFA_COREMS"
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
![CoreMS Logo](https://github.com/EMSL-Computing/CoreMS/blob/v2.0.1/docs/CoreMS.COLOR_small.png?raw=true)
![CoreMS Logo](https://github.com/EMSL-Computing/CoreMS/tree/master/docs/CoreMS.COLOR_small.png?raw=true)

<div align="left">

Expand Down
2 changes: 2 additions & 0 deletions corems/mass_spectrum/factory/MassSpectrumClasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -1376,6 +1376,8 @@ def __init__(self, data_dict, d_params, auto_process=True):

self.is_centroid = True
self.data_dict = data_dict
self._mz_exp = data_dict[Labels.mz]
self._abundance = data_dict[Labels.abundance]

if auto_process:
self.process_mass_spec()
Expand Down
10 changes: 9 additions & 1 deletion corems/mass_spectrum/input/massList.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ def add_molecular_formula(self, mass_spec_obj, dataframe):
ion_type_df = dataframe["Ion Type"]
ion_charge_df = dataframe["Ion Charge"]
is_isotopologue_df = dataframe['Is Isotopologue']
if 'Adduct' in dataframe:
adduct_df = dataframe['Adduct']
else:
adduct_df = None

mass_spec_mz_exp_list = mass_spec_obj.mz_exp

Expand All @@ -102,7 +106,11 @@ def add_molecular_formula(self, mass_spec_obj, dataframe):
if sum(counts) > 0:

ion_type = str(Labels.ion_type_translate.get(ion_type_df[df_index]))
mfobj = MolecularFormula(formula_list, int(ion_charge_df[df_index]), mspeak_parent=mass_spec_obj[ms_peak_index] , ion_type=ion_type)
if adduct_df is not None:
adduct_atom = str(adduct_df[df_index])
else:
adduct_atom = None
mfobj = MolecularFormula(formula_list, int(ion_charge_df[df_index]), mspeak_parent=mass_spec_obj[ms_peak_index] , ion_type=ion_type, adduct_atom=adduct_atom)
mfobj.is_isotopologue = bool(is_isotopologue_df[df_index])
mass_spec_obj[ms_peak_index].add_molecular_formula(mfobj)

Expand Down
85 changes: 53 additions & 32 deletions corems/mass_spectrum/input/numpyArray.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,27 @@
from corems.encapsulation.constant import Labels

def ms_from_array_profile(mz, abundance, dataname:str, polarity:int = -1, auto_process:bool = True, data_type:str = Labels.simulated_profile):
"""
Create a MassSpecProfile object from an array of m/z values and abundance values.
"""Create a MassSpecProfile object from an array of m/z values and abundance values.
Parameters:
mz (numpy.ndarray): Array of m/z values.
abundance (numpy.ndarray): Array of abundance values.
dataname (str): Name of the data.
polarity (int, optional): Polarity of the data. Defaults to -1.
auto_process (bool, optional): Flag to automatically process the data. Defaults to True.
data_type (str, optional): Type of the data. Defaults to Labels.simulated_profile.
Parameters
----------
mz : numpy.ndarray
Array of m/z values.
abundance : numpy.ndarray
Array of abundance values.
dataname : str
Name of the data.
polarity : int, optional
Polarity of the data. The default is -1.
auto_process : bool, optional
Flag to automatically process the data. The default is True.
data_type : str, optional
Type of the data. The default is Labels.simulated_profile.
Returns:
MassSpecProfile: The created MassSpecProfile object.
Returns
-------
MassSpecProfile
The created MassSpecProfile object.
"""
data_dict = {Labels.mz: mz, Labels.abundance: abundance}

Expand All @@ -32,37 +40,50 @@ def ms_from_array_profile(mz, abundance, dataname:str, polarity:int = -1, auto_p
return ms

def ms_from_array_centroid(mz, abundance, rp:list[float], s2n:list[float], dataname:str, polarity:int=-1, auto_process:bool=True):
"""
Create a MassSpecCentroid object from an array of m/z values, abundance values, resolution power, and signal-to-noise ratio.
"""Create a MassSpecCentroid object from an array of m/z values, abundance values, resolution power, and signal-to-noise ratio.
Parameters:
mz (numpy.ndarray): Array of m/z values.
abundance (numpy.ndarray): Array of abundance values.
rp list(float): Resolution power.
s2n list(float): Signal-to-noise ratio.
dataname (str): Name of the data.
polarity (int, optional): Polarity of the data. Defaults to -1.
auto_process (bool, optional): Flag to automatically process the data. Defaults to True.
Parameters
----------
mz : numpy.ndarray
Array of m/z values.
abundance : numpy.ndarray
Array of abundance values.
rp : list(float)
List of resolving power values.
s2n : list(float)
List of signal-to-noise ratio values.
dataname : str
Name of the data.
polarity : int, optional
Polarity of the data. The default is -1.
auto_process : bool, optional
Returns:
MassSpecCentroid: The created MassSpecCentroid object.
Returns
-------
MassSpecCentroid
The created MassSpecCentroid object.
"""
data_dict = {Labels.mz: mz, Labels.abundance: abundance, Labels.s2n : s2n, Labels.rp: rp}

output_parameters = get_output_parameters(polarity, dataname)

return MassSpecCentroid(data_dict, output_parameters)
output_parameters[Labels.label] = Labels.corems_centroid

return MassSpecCentroid(data_dict, output_parameters, auto_process)

def get_output_parameters(polarity:int, file_location:str):
"""
Get the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
"""Generate the output parameters for creating a MassSpecProfile or MassSpecCentroid object.
Parameters:
polarity (int): Polarity of the data.
file_location (str): File location.
Parameters
----------
polarity : int
Polarity of the data.
file_location : str
File location.
Returns:
dict: Output parameters.
Returns
-------
dict
Output parameters.
"""
d_params = default_parameters(file_location)

Expand Down
Loading

0 comments on commit e02f8b3

Please sign in to comment.