Skip to content

Commit

Permalink
Merge pull request #107 from compomics/feature/optional-pyopenms
Browse files Browse the repository at this point in the history
make pyOpenMS optional
  • Loading branch information
RalfG authored Nov 18, 2024
2 parents 133559a + e014823 commit 414688e
Show file tree
Hide file tree
Showing 6 changed files with 32 additions and 32 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: ruff check --output-format=github .

- name: Install package and its dependencies
run: pip install --editable .[dev]
run: pip install --editable .[dev,idxml]

- name: Test with pytest and codecov
run: |
Expand Down Expand Up @@ -58,7 +58,7 @@ jobs:
- name: Install package and its dependencies
run: |
python -m pip install --upgrade pip
pip install .[dev]
pip install .[dev,idxml]
- name: Test imports
run: python -c "import psm_utils"
Expand Down
10 changes: 5 additions & 5 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,15 @@ Goals and non-goals
Supported file formats
**********************

===================================================================================================================== ======================== =============== ===============
File format psm_utils tag Read support Write support
===================================================================================================================== ======================== =============== ===============
===================================================================================================================== ======================== =============== =============== ==========
File format psm_utils tag Read support Write support Comments
===================================================================================================================== ======================== =============== =============== ==========
`AlphaDIA precursors TSV <https://alphadia.readthedocs.io/en/latest/quickstart.html#output-files>`_ ``alphadia`` ✅ ❌
`DIA-NN TSV <https://github.com/vdemichev/DiaNN#output>`_ ``diann`` ✅ ❌
`FlashLFQ generic TSV <https://github.com/smith-chem-wisc/FlashLFQ/wiki/Identification-Input-Formats>`_ ``flashlfq`` ✅ ✅
`FragPipe PSM TSV <https://fragpipe.nesvilab.org/docs/tutorial_fragpipe_outputs.html#psmtsv/>`_ ``fragpipe`` ✅ ❌
`ionbot CSV <https://ionbot.cloud/>`_ ``ionbot`` ✅ ❌
`OpenMS idXML <https://www.openms.de/>`_ ``idxml`` ✅ ✅
`OpenMS idXML <https://www.openms.de/>`_ ``idxml`` ✅ ✅ Requires the optional ``openms`` dependency (``pip install psm-utils[idxml]``)
`MaxQuant msms.txt <https://www.maxquant.org/>`_ ``msms`` ✅ ❌
`MS Amanda CSV <https://ms.imp.ac.at/?goto=msamanda>`_ ``msamanda`` ✅ ❌
`mzIdentML <https://psidev.info/mzidentml>`_ ``mzid`` ✅ ✅
Expand All @@ -108,7 +108,7 @@ Supported file formats
`ProteoScape Parquet <#>`_ ``proteoscape`` ✅ ❌
`TSV <https://psm-utils.readthedocs.io/en/stable/api/psm_utils.io/#module-psm_utils.io.tsv>`_ ``tsv`` ✅ ✅
`X!Tandem XML <https://www.thegpm.org/tandem/>`_ ``xtandem`` ✅ ❌
===================================================================================================================== ======================== =============== ===============
===================================================================================================================== ======================== =============== =============== ==========

Legend: ✅ Supported, ❌ Unsupported

Expand Down
11 changes: 10 additions & 1 deletion psm_utils/io/idxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,12 @@
module="pyopenms",
)

import pyopenms as oms #noqa: E402
try:
import pyopenms as oms #noqa: E402
except ImportError:
_has_openms = False
else:
_has_openms = True

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -99,6 +104,8 @@ def __init__(self, filename: Union[Path, str], *args, **kwargs) -> None:
>>> psm_list = [psm for psm in reader]
"""
super().__init__(filename, *args, **kwargs)
if not _has_openms:
raise ImportError("pyOpenMS is required to read idXML files")
self.protein_ids, self.peptide_ids = self._parse_idxml()
self.user_params_metadata = self._get_userparams_metadata(self.peptide_ids[0].getHits()[0])
self.rescoring_features = self._get_rescoring_features(self.peptide_ids[0].getHits()[0])
Expand Down Expand Up @@ -326,6 +333,8 @@ def __init__(
"""
super().__init__(filename, *args, **kwargs)
if not _has_openms:
raise ImportError("pyOpenMS is required to write idXML files")
self.protein_ids = protein_ids
self.peptide_ids = peptide_ids
self._writer = None
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ dependencies = [
"psims",
"pyarrow",
"pydantic >= 2",
"pyopenms",
"pyteomics >= 4",
"rich",
"sqlalchemy",
Expand All @@ -46,6 +45,7 @@ docs = [
"toml",
]
online = ["streamlit", "plotly"]
idxml = ["pyopenms"]

[project.urls]
GitHub = "https://github.com/compomics/psm_utils"
Expand Down
Binary file added tests/test_data/test.pq
Binary file not shown.
37 changes: 14 additions & 23 deletions tests/test_io/test_parquet.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Tests for psm_utils.io.tsv."""

import hashlib
import os

from psm_utils.io.parquet import ParquetReader, ParquetWriter
Expand Down Expand Up @@ -32,40 +31,32 @@
]


def compute_checksum(filename):
hash_func = hashlib.sha256()
with open(filename, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_func.update(chunk)
return hash_func.hexdigest()


class TestParquetWriter:
expected_checksum = "1e5ee7afc5d4131bce8f1d0908136b8c559303abb7bbd7d052afa111d5e64f0c"

def test_write_psm(self):
with ParquetWriter("test.pq") as writer:
for test_case in test_cases:
writer.write_psm(PSM(**test_case))
actual_checksum = compute_checksum("test.pq")
assert actual_checksum == self.expected_checksum, "Checksums do not match"

with ParquetReader("test.pq") as reader:
for i, psm in enumerate(reader):
assert psm == PSM(**test_cases[i])

os.remove("test.pq")

def test_write_file(self):
with ParquetWriter("test.pq") as writer:
writer.write_file(PSMList(psm_list=[PSM(**t) for t in test_cases]))
actual_checksum = compute_checksum("test.pq")
assert actual_checksum == self.expected_checksum, "Checksums do not match"
# os.remove("test.pq")

with ParquetReader("test.pq") as reader:
for i, psm in enumerate(reader):
assert psm == PSM(**test_cases[i])

os.remove("test.pq")


class TestParquetReader:
def test_iter(self):
# Write test cases to file
ParquetWriter("test.pq").write_file(PSMList(psm_list=[PSM(**t) for t in test_cases]))

# Read test cases from file
for i, psm in enumerate(ParquetReader("test.pq")):
assert psm == PSM(**test_cases[i])

os.remove("test.pq")
with ParquetReader("tests/test_data/test.pq") as reader:
for i, psm in enumerate(reader):
assert psm == PSM(**test_cases[i])

0 comments on commit 414688e

Please sign in to comment.