use ruff for linter testing (#106)

* update dependancies * update action * run all ruff tests and report any failure right after * fix nafc ruff check issues * fix seabird imports
cioos-siooc · Aug 13, 2024 · fcbbc0a · fcbbc0a
1 parent 916edb2
commit fcbbc0a
Show file tree

Hide file tree

Showing 16 changed files with 1,369 additions and 1,328 deletions.
diff --git a/.github/workflows/test-package.yaml b/.github/workflows/test-package.yaml
@@ -26,25 +26,40 @@ jobs:
   testing:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
         with:
           python-version: '3.10'
       - name: Install dependencies
         run: |
           python -m pip install --upgrade pip
           pip install poetry
           poetry install --with dev,geo
-      - name: Lint with flake8
-        run: |
-          # stop the build if there are Python syntax errors or undefined names
-          poetry run flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-          poetry run flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-      - name: Run ruff
+      - name: ruff check
+        id: ruff_check
         run: poetry run ruff check --output-format=github .
+        continue-on-error: true
+      - name: ruff import sorting check
+        id: ruff_imports
+        run: poetry run ruff check --select I --output-format=github .
+        continue-on-error: true
+      - name: ruff format
+        id: ruff_format
+        run: poetry run ruff format --check .
+        continue-on-error: true
+      - name: Check for ruff issues 
+        if: steps.ruff_check.outcome == 'failure' || steps.ruff_imports.outcome == 'failure' || steps.ruff_format.outcome == 'failure'
+        run: |
+          echo "Some ruff steps failed. Please check the logs for more information.\n"
+          echo "Some of the issues can be fixed by running the following commands:\n"
+          echo " poetry run ruff check --fix . # for fixing all default issues"
+          echo " poetry run ruff check --fix --select I . # for fixing import sorting issues"
+          echo " poetry run ruff format . # for formatting the code"
+          exit 1
+          
       - name: Review if metadata is updated
         uses: dorny/paths-filter@v3
+        continue-on-error: true
         id: changes
         with:
           filters: |
@@ -62,7 +77,9 @@ jobs:
         run: poetry run pytest -W error::UserWarning -k "not test_metadata" -n auto
       - name: Run benchmark
         run: poetry run pytest tests/run_benchmark.py  --benchmark-json output.json
-      - name: Update CHANGELOG
+      - name: CHANGELOG updated check
         if: steps.changes.outputs.changelog == 'false'
-        run: exit 1
+        run: |
+          echo "CHANGELOG.md update is required."
+          exit 1
         
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add odpy convert `input_table` input through config file, which gives the
 ability to list multiple file glob expression and associated metadata.
 - Add Onset.csv timestamp format: "\d+\/\d+\/\d\d\d\d\s+\d+\:\d+\:\d+" = "%m/%d/%Y %H:%M:%S"
+- Rely on ruff for format and linter testing
+
+### Fixed
+
+- seabird parsers module import sorting
+- nafc ruff check issues
 
 ## `0.5.2` - 2024-06-22
 

diff --git a/ocean_data_parser/batch/utils.py b/ocean_data_parser/batch/utils.py
@@ -98,9 +98,7 @@ def generate_output_path(
 
     if "." in file_name and not output_format:
         file_name, output_format = file_name.rsplit(".", 1)
-    assert (
-        output_format
-    ), "Unknown output file format extension: define the format through the path or output_format inputs"
+    assert output_format, "Unknown output file format extension: define the format through the path or output_format inputs"
 
     # Generate path
     return Path(output_path) / (

diff --git a/ocean_data_parser/parsers/amundsen.py b/ocean_data_parser/parsers/amundsen.py
@@ -118,9 +118,9 @@ def int_format(
             elif line == "% In situ density TEOS10 ((s, t, p) - 1000) [kg/m^3]":
                 metadata["D_CT"] = "In situ density TEOS10 ((s, t, p) - 1000) [kg/m^3]"
             elif line == "% Potential density TEOS10 ((s, t, 0) - 1000) [kg/m^3]":
-                metadata[
-                    "D0CT"
-                ] = "Potential density TEOS10 ((s, t, 0) - 1000) [kg/m^3]"
+                metadata["D0CT"] = (
+                    "Potential density TEOS10 ((s, t, 0) - 1000) [kg/m^3]"
+                )
             elif line == "% Potential density TEOS10 (s, t, 0) [kg/m^3]":
                 metadata["D0CT"] = "Potential density TEOS10 (s, t, 0) [kg/m^3]"
             elif re.match(r"% .* \[.+\]", line):

diff --git a/ocean_data_parser/parsers/dfo/ios_source/IosObsFile.py b/ocean_data_parser/parsers/dfo/ios_source/IosObsFile.py
@@ -458,9 +458,9 @@ def add_to_history(self, input):
             self.history = {}
         if "ios_transform_history" not in self.history:
             self.history["ios_transform_history"] = "IOS Transform History:\n"
-        self.history[
-            "ios_transform_history"
-        ] += f"{datetime.now().isoformat()} - {input}\n"
+        self.history["ios_transform_history"] += (
+            f"{datetime.now().isoformat()} - {input}\n"
+        )
 
     def get_data(self, formatline=None):
         # reads data using the information in FORMAT

diff --git a/ocean_data_parser/parsers/dfo/nafc.py b/ocean_data_parser/parsers/dfo/nafc.py
@@ -266,7 +266,7 @@ def _get_range(attrs: dict) -> tuple:
         dtype = _get_dtype(attrs["name"])
 
         # Use int(float(x)) method because the integers have decimals
-        func = (lambda x: int(float(x))) if dtype == int else float
+        func = (lambda x: int(float(x))) if dtype is int else float
         return tuple(
             map(
                 func,
@@ -463,7 +463,7 @@ def _parse_ship_trip_stn():
         logger.error("No data found in file")
 
     # Review datatypes
-    if any([dtype == object for _, dtype in ds.dtypes.items()]):
+    if any([dtype is object for _, dtype in ds.dtypes.items()]):
         logger.warning(
             "Some columns dtype=object suggest the file data wasn't correctely parsed."
         )
@@ -561,9 +561,9 @@ def _parse_ship_trip_stn():
                 if apply_func not in (None, np.nan)
                 else var
             )
-            ds.attrs[
-                "history"
-            ] += f"\n{pd.Timestamp.now()} - Generated variable {name} = {apply_func}"
+            ds.attrs["history"] += (
+                f"\n{pd.Timestamp.now()} - Generated variable {name} = {apply_func}"
+            )
             attrs["source"] = f"Generated variable {name} = {apply_func}"
             ds[name] = (var.dims, new_data.data, {**var.attrs, **attrs})
 

diff --git a/ocean_data_parser/parsers/dfo/odf_source/flags.py b/ocean_data_parser/parsers/dfo/odf_source/flags.py
@@ -73,9 +73,9 @@ def add_flag_attributes(dataset):
     """
 
     def _add_ancillary(ancillary, variable):
-        dataset[variable].attrs[
-            "ancillary_variables"
-        ] = f"{dataset[variable].attrs.get('ancillary_variables','')} {ancillary}".strip()
+        dataset[variable].attrs["ancillary_variables"] = (
+            f"{dataset[variable].attrs.get('ancillary_variables','')} {ancillary}".strip()
+        )
         return dataset[variable]
 
     # Add ancillary_variable attribute
@@ -88,9 +88,9 @@ def _add_ancillary(ancillary, variable):
                     _add_ancillary(variable, var)
         elif variable.startswith("Q") and variable[1:] in dataset:
             dataset[variable[1:]] = _add_ancillary(variable, variable[1:])
-            dataset[variable].attrs[
-                "long_name"
-            ] = f"Quality Flag for Parameter: {dataset[variable[1:]].attrs['long_name']}"
+            dataset[variable].attrs["long_name"] = (
+                f"Quality Flag for Parameter: {dataset[variable[1:]].attrs['long_name']}"
+            )
         else:
             # ignore normal variables
             continue

diff --git a/ocean_data_parser/parsers/dfo/odf_source/process.py b/ocean_data_parser/parsers/dfo/odf_source/process.py
@@ -86,9 +86,9 @@ def parse_odf(
         "source": odf_path,
     }
     dataset = attributes.global_attributes_from_header(dataset, metadata)
-    dataset.attrs[
-        "history"
-    ] += f"# Convert ODF to NetCDF with ocean_data_parser V {__version__}\n"
+    dataset.attrs["history"] += (
+        f"# Convert ODF to NetCDF with ocean_data_parser V {__version__}\n"
+    )
 
     # Handle ODF flag variables
     dataset = flags.rename_qqqq_flags(dataset)

diff --git a/ocean_data_parser/parsers/pme.py b/ocean_data_parser/parsers/pme.py
@@ -160,9 +160,9 @@ def _append_to_history(msg):
 
     if rename_variables:
         ds = ds.rename_vars(VARIABLE_RENAMING_MAPPING)
-    ds.attrs[
-        "history"
-    ] += f"\n{pd.Timestamp.now().isoformat()} Rename variables: {VARIABLE_RENAMING_MAPPING}"
+    ds.attrs["history"] += (
+        f"\n{pd.Timestamp.now().isoformat()} Rename variables: {VARIABLE_RENAMING_MAPPING}"
+    )
 
     ds = standardize_dataset(ds)
     return ds

diff --git a/ocean_data_parser/parsers/seabird.py b/ocean_data_parser/parsers/seabird.py
@@ -9,11 +9,11 @@
 import logging
 import re
 from datetime import datetime
+from pyexpat import ExpatError
 
 import pandas as pd
 import xarray
 import xmltodict
-from pyexpat import ExpatError
 
 from ocean_data_parser.parsers.utils import convert_datetime_str, standardize_dataset
 from ocean_data_parser.vocabularies.load import seabird_vocabulary
@@ -217,14 +217,14 @@ def btl(
     # Add cell_method attribute
     for var in ds:
         if var.endswith("_sdev") and var[:-5] in ds:
-            ds[var].attrs[
-                "cell_method"
-            ] = f"scan: standard_deviation (previous {n_scan_per_bottle} scans)"
+            ds[var].attrs["cell_method"] = (
+                f"scan: standard_deviation (previous {n_scan_per_bottle} scans)"
+            )
             # TODO confirm that seabird uses the previous records from this timestamp
         elif var not in ["time", "bottle"]:
-            ds[var].attrs[
-                "cell_method"
-            ] = f"scan: mean (previous {n_scan_per_bottle} scans)"
+            ds[var].attrs["cell_method"] = (
+                f"scan: mean (previous {n_scan_per_bottle} scans)"
+            )
 
     if not save_orginal_header:
         ds.attrs.pop("seabird_header")