Skip to content

Commit

Permalink
Merge pull request #34 from computational-ms/feature/break_tests
Browse files Browse the repository at this point in the history
Feature/break tests
  • Loading branch information
ArtiVlasov authored Mar 7, 2023
2 parents 29af69b + db4c03b commit 30e7ebc
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 12 deletions.
23 changes: 17 additions & 6 deletions pyiohat/parsers/ident/comet_2020_01_4_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,10 +96,21 @@ def map_mod_mass(self):
if entry_tag.endswith("cvParam"):
mod_name = entry.attrib["name"]
elif entry_tag.endswith("SearchModification"):
if mod_name == "unknown modification":
potential_mod = self.mod_mapper.mass_to_names(
float(entry.attrib["massDelta"]), decimals=4
)
if len(potential_mod) == 0:
logger.error(
f"Cannot map modification with mass {entry.attrib['massDelta']}."
)
raise ValueError
else:
mod_name = potential_mod[0]
mod_mass_map[entry.attrib["massDelta"]] = mod_name
if entry.attrib["fixedMod"] == "true":
_key = entry.attrib["residues"]
fixed_mods[_key] = mod_name
residue = entry.attrib["residues"]
fixed_mods[residue] = mod_name
elif entry_tag.endswith("ModificationParams"):
break
entry.clear()
Expand All @@ -120,9 +131,9 @@ def get_peptide_lookup(self):
for event, entry in etree.iterparse(self.input_file):
entry_tag = entry.tag

if entry_tag.endswith("DBSequence"):
if entry_tag.endswith("PeptideSequence"):
peptide_information = True
elif peptide_information is True:
if peptide_information is True:
if entry_tag.endswith("PeptideSequence"):
sequence = entry.text
if len(self.fixed_mods) > 0:
Expand Down Expand Up @@ -160,9 +171,9 @@ def get_spec_records(self):
for event, entry in etree.iterparse(self.input_file):
entry_tag = entry.tag

if entry_tag.endswith("Inputs"):
if entry_tag.endswith("PeptideEvidenceRef"):
spec_information = True
elif spec_information is True:
if spec_information is True:
if entry_tag.endswith("cvParam"):
if entry.attrib["name"] in self.mapping_dict:
_key = self.mapping_dict[entry.attrib["name"]]
Expand Down
4 changes: 2 additions & 2 deletions pyiohat/parsers/ident/msgfplus_2021_03_22_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ def get_peptide_lookup(self):
for event, entry in etree.iterparse(self.input_file):
entry_tag = entry.tag

if entry_tag.endswith("DBSequence"):
if entry_tag.endswith("PeptideSequence"):
peptide_information = True
elif peptide_information is True:
if peptide_information is True:
if entry_tag.endswith("PeptideSequence"):
sequence = {"sequence": entry.text}
elif entry_tag.endswith("cvParam"):
Expand Down
16 changes: 14 additions & 2 deletions pyiohat/parsers/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,21 @@ def get_atom_counts(sequences, modifications, compositions):
np.char.count(sequences, aa_or_mod), ordered_element_multiplier
)
else:
atom_counts += np.outer(
np.char.count(modifications, aa_or_mod), ordered_element_multiplier
mod_counts = []
escaped_mod_name = re.escape(aa_or_mod)
search_pattern = re.compile(
rf"(^{escaped_mod_name}:\d+)(?=;)|(?<=;)({escaped_mod_name}:\d+)(?=;)|(?<=;)({escaped_mod_name}:\d+$)|^({escaped_mod_name}:\d+)$"
)
for mod in modifications:
mod_counts.append(
len(
re.findall(
search_pattern,
mod,
)
)
)
atom_counts += np.outer(mod_counts, ordered_element_multiplier)
# Remove water (peptide bonds)
water = np.zeros(shape=(1, len(elements)), dtype=int)
water[0, elements.index("H")] = 2
Expand Down
1 change: 1 addition & 0 deletions tests/data/BSA1_comet_2020_01_4.mzid
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
</AnalysisSoftwareList>
<SequenceCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.2">
<DBSequence id="sp|P02769|ALBU_BOVIN" accession="sp|P02769|ALBU_BOVIN" searchDatabase_ref="DB0" />
<DBSequence id="sp|P02760|ALBU_BOVIN2" accession="sp|P02769|ALBU_BOVIN2" searchDatabase_ref="DB1" />
<Peptide id="AEFVEVTK;">
<PeptideSequence>AEFVEVTK</PeptideSequence>
</Peptide>
Expand Down
3 changes: 3 additions & 0 deletions tests/data/BSA1_msgfplus_2021_03_22.mzid
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
<DBSequence length="607" searchDatabase_ref="SearchDB_1" accession="sp|P02769|ALBU_BOVIN" id="DBSeq1">
<cvParam cvRef="PSI-MS" accession="MS:1001088" name="protein description" value="sp|P02769|ALBU_BOVIN Serum albumin OS=Bos taurus GN=ALB PE=1 SV=4"/>
</DBSequence>
<DBSequence length="608" searchDatabase_ref="SearchDB_1" accession="sp|P02769|ALBU2_BOVIN" id="DBSeq1">
<cvParam cvRef="PSI-MS" accession="MS:1001089" name="protein description" value="sp|P02769|ALBU2_BOVIN Serum albumin OS=Bos taurus GN=ALB PE=1 SV=4"/>
</DBSequence>
<Peptide id="Pep_YICDNQDTISSK">
<PeptideSequence>YICDNQDTISSK</PeptideSequence>
<Modification location="3" monoisotopicMassDelta="57.021464">
Expand Down
4 changes: 2 additions & 2 deletions tests/parsers/misc/test_get_atom_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_simple():
[
"",
"",
"Magic",
"Magic:1",
],
dtype=str,
)
Expand Down Expand Up @@ -43,7 +43,7 @@ def test_negative():
}
modifications = np.array(
[
"Magic",
"Magic:1",
],
dtype=str,
)
Expand Down

0 comments on commit 30e7ebc

Please sign in to comment.