Merge pull request #6 from compomics/minor-changes

Minor changes
compomics · Dec 3, 2024 · f435e91 · f435e91
2 parents 1f9c335 + 6efcfbb
commit f435e91
Show file tree

Hide file tree

Showing 8 changed files with 721 additions and 520 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -0,0 +1,51 @@
+name: Test and Lint Workflow
+
+on:
+  push:
+    branches:
+      - '*'  
+  pull_request:
+    branches:
+      - main  
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.10', '3.11', '3.12'] 
+
+    steps:
+      # Checkout the code from the repository
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      # Set up Python environment for each version in the matrix
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      # Install dependencies
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[dev]  # Install both runtime and dev dependencies
+
+      # Run linting with ruff (this will catch print statements)
+      - name: Lint with ruff
+        run: |
+          pip install ruff  # Ensure ruff is available if not installed through dev dependencies
+          ruff check mumble tests  # Lint your package and test directories
+
+      # Run formatting checks with black
+      - name: Check formatting with black
+        run: |
+          pip install black  # Ensure black is available if not installed through dev dependencies
+          black --check .  # Check if code is correctly formatted
+
+      # Run tests with pytest
+      - name: Run tests with pytest
+        run: |
+          pip install pytest
+          pytest --maxfail=1 --disable-warnings -q
diff --git a/mumble/__init__.py b/mumble/__init__.py
@@ -1,3 +1,4 @@
-__version__ = "0.1.2"
+__version__ = "0.2.0"
+__all__ = ["PSMHandler"]
 
 from mumble.mumble import PSMHandler
diff --git a/mumble/__main__.py b/mumble/__main__.py
@@ -1,8 +1,19 @@
 import click
+import logging
+
+from rich.logging import RichHandler
 
 from mumble import PSMHandler
 
 
+# setup logging
+logging.basicConfig(
+    level=logging.INFO,  # Set the logging level
+    format="%(message)s",  # Simple format for logging
+    datefmt="[%X]",  # Time format
+    handlers=[RichHandler(rich_tracebacks=True, show_path=False)],
+)
+
 # Define CLI options as a dictionary
 CLI_OPTIONS = {
     "psm_list": {

diff --git a/mumble/file_handler.py b/mumble/file_handler.py
@@ -30,7 +30,7 @@ class _SpectrumFileHandler:
         This class uses the pyteomics library for parsing MGF and mzML files.
         Parsed spectra are stored as `rustyms.RawSpectrum` objects.
     """
-    
+
     def __init__(self, spectrum_file: str):
         self.spectrum_file = spectrum_file
         self.spectra = {}  # Initialize an empty dictionary to hold the spectra
@@ -46,7 +46,6 @@ def __init__(self, spectrum_file: str):
         else:
             raise ValueError("Unsupported file format. Only MGF and mzML are supported.")
 
-
     def _parse_mgf(self):
         """
         Parse an MGF (Mascot Generic Format) file and store each spectrum as a RawSpectrum object.
@@ -61,31 +60,37 @@ def _parse_mgf(self):
         try:
             with mgf.MGF(self.spectrum_file) as spectra:
                 for spectrum in spectra:
-                    spectrum_id = spectrum['params'].get('title', 'Unknown')  # Extract spectrum ID from the MGF params
-                    precursor_mass = spectrum['params'].get('pepmass', [None])[0]  # Extract precursor mass
-
+                    spectrum_id = spectrum["params"].get(
+                        "title", "Unknown"
+                    )  # Extract spectrum ID from the MGF params
+                    precursor_mass = spectrum["params"].get("pepmass", [None])[
+                        0
+                    ]  # Extract precursor mass
+
                     # Extract retention time
                     rt = 0.0
-                    if 'rtinseconds' in spectrum['params']:
-                        rt = float(spectrum['params']['rtinseconds'])
-                    elif 'retention time' in spectrum['params']:
-                        rt = float(spectrum['params']['retention time'])
+                    if "rtinseconds" in spectrum["params"]:
+                        rt = float(spectrum["params"]["rtinseconds"])
+                    elif "retention time" in spectrum["params"]:
+                        rt = float(spectrum["params"]["retention time"])
 
                     # Extract precursor charge
                     precursor_charge = 0
-                    if 'charge' in spectrum['params']:
-                        charge_str = spectrum['params']['charge']
-                        precursor_charge = int(charge_str.strip('+'))  # Remove '+' and convert to int
+                    if "charge" in spectrum["params"]:
+                        charge_str = spectrum["params"]["charge"]
+                        precursor_charge = int(
+                            charge_str.strip("+")
+                        )  # Remove '+' and convert to int
 
                     # Create a RawSpectrum object using required fields and additional attributes
                     self.spectra[spectrum_id] = RawSpectrum(
-                        title=spectrum_id, 
-                        num_scans=len(spectrum['m/z array']),
+                        title=spectrum_id,
+                        num_scans=len(spectrum["m/z array"]),
                         rt=rt,
                         precursor_charge=precursor_charge,
-                        mz_array=np.array(spectrum['m/z array']),
-                        intensity_array=np.array(spectrum['intensity array']),
-                        precursor_mass=precursor_mass 
+                        mz_array=np.array(spectrum["m/z array"]),
+                        intensity_array=np.array(spectrum["intensity array"]),
+                        precursor_mass=precursor_mass,
                     )
             logging.info(f"Parsed {len(self.spectra)} spectra from {self.spectrum_file}")
         except Exception as e:
@@ -105,36 +110,40 @@ def _parse_mzml(self):
         try:
             with mzml.MzML(self.spectrum_file) as spectra:
                 for spectrum in spectra:
-                    spectrum_id = spectrum.get('id', None)  # Get the spectrum ID from the mzML spectrum
+                    spectrum_id = spectrum.get(
+                        "id", None
+                    )  # Get the spectrum ID from the mzML spectrum
                     precursor_mass = 0.0
                     precursor_charge = 0
                     rt = 0.0
 
                     # Extract precursor mass and charge if available
-                    if 'precursorList' in spectrum and spectrum['precursorList']:
-                        precursor = spectrum['precursorList']['precursor'][0]
-                        if 'selectedIonList' in precursor:
-                            selected_ion = precursor['selectedIonList']['selectedIon'][0]
-                            precursor_mass = selected_ion.get('selected ion m/z', 0.0)
-                            precursor_charge = int(selected_ion.get('charge state', 0))
+                    if "precursorList" in spectrum and spectrum["precursorList"]:
+                        precursor = spectrum["precursorList"]["precursor"][0]
+                        if "selectedIonList" in precursor:
+                            selected_ion = precursor["selectedIonList"]["selectedIon"][0]
+                            precursor_mass = selected_ion.get("selected ion m/z", 0.0)
+                            precursor_charge = int(selected_ion.get("charge state", 0))
 
                     # Extract retention time
-                    if 'scanList' in spectrum and spectrum['scanList']:
-                        scan = spectrum['scanList']['scan'][0]
-                        for cv_param in scan.get('cvParam', []):
-                            if cv_param.get('accession') == 'MS:1000016':  # accession for scan start time
-                                rt = float(cv_param.get('value', 0.0))
+                    if "scanList" in spectrum and spectrum["scanList"]:
+                        scan = spectrum["scanList"]["scan"][0]
+                        for cv_param in scan.get("cvParam", []):
+                            if (
+                                cv_param.get("accession") == "MS:1000016"
+                            ):  # accession for scan start time
+                                rt = float(cv_param.get("value", 0.0))
                                 break
 
                     # Create a RawSpectrum object using required fields and additional attributes
                     self.spectra[spectrum_id] = RawSpectrum(
                         title=spectrum_id,
-                        num_scans=len(spectrum['m/z array']),
+                        num_scans=len(spectrum["m/z array"]),
                         rt=rt,
                         precursor_charge=precursor_charge,
-                        mz_array=np.array(spectrum['m/z array']),
-                        intensity_array=np.array(spectrum['intensity array']),
-                        precursor_mass=precursor_mass
+                        mz_array=np.array(spectrum["m/z array"]),
+                        intensity_array=np.array(spectrum["intensity array"]),
+                        precursor_mass=precursor_mass,
                     )
             logging.info(f"Parsed {len(self.spectra)} spectra from {self.spectrum_file}")
         except Exception as e:
@@ -143,10 +152,10 @@ def _parse_mzml(self):
     def get_spectrum_from_psm(self, psm: PSM):
         """
         Retrieve a RawSpectrum for a PSM by its ID.
-        
+
         Args:
             psm (PSM): psm object
-        
+
         Returns:
             RawSpectrum: The retrieved spectrum or None if not found.
         """
@@ -155,10 +164,10 @@ def get_spectrum_from_psm(self, psm: PSM):
     def get_spectra_from_psm_list(self, psmList: PSMList):
         """
         Retrieve all spectra for a PSMList.
-        
+
         Args:
             psmList (PSMList): A list of PSM objects.
-        
+
         Returns:
             list: A list of RawSpectrum objects corresponding to the PSMs.
                 None is included for any spectra not found.
@@ -168,7 +177,7 @@ def get_spectra_from_psm_list(self, psmList: PSMList):
     def get_all_spectra(self):
         """
         Retrieve all parsed spectra.
-        
+
         Returns:
             dict: A dictionary of all parsed spectra, where keys are spectrum IDs
                 and values are RawSpectrum objects.
@@ -180,12 +189,12 @@ class _MetadataParser:
     """
     Class to parse metadata files (CSV/TSV) containing PSM information.
     """
-    
+
     @staticmethod
     def parse_csv_file(file_name: str, delimiter: str = "\t") -> list:
         """
         Parse a CSV or TSV file containing PSM information and create PSM objects.
-        
+
         Args:
             file_name (str): Path to the CSV or TSV file.
             delimiter (str, optional): Delimiter used in the file. Defaults to "\t".
@@ -200,11 +209,11 @@ def parse_csv_file(file_name: str, delimiter: str = "\t") -> list:
             pd.errors.ParserError: If there's an error parsing the file.
 
         Notes:
-            The file must contain at least the following columns: 
+            The file must contain at least the following columns:
             'peptidoform', 'spectrum_id', and 'precursor_mz'.
             If any of these columns are missing, an error is logged and an empty list is returned.
         """
-        
+
         try:
             df = pd.read_csv(file_name, delimiter=delimiter)
         except FileNotFoundError as e:
@@ -228,7 +237,11 @@ def parse_csv_file(file_name: str, delimiter: str = "\t") -> list:
 
         # Create a list of PSM objects from the DataFrame rows
         peptidoforms = [
-            PSM(peptidoform=row["peptidoform"], spectrum_id=row["spectrum_id"], precursor_mz=row["precursor_mz"])
+            PSM(
+                peptidoform=row["peptidoform"],
+                spectrum_id=row["spectrum_id"],
+                precursor_mz=row["precursor_mz"],
+            )
             for _, row in df.iterrows()
         ]