Major changes following best practices for packaging, publishing, tes…

…ting similar to sep005-io-fast package.
OWI-Lab · Apr 25, 2024 · 4e42d4d · 4e42d4d
1 parent 2fb5066
commit 4e42d4d
Show file tree

Hide file tree

Showing 11 changed files with 257 additions and 94 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -0,0 +1,33 @@
+name: CI for Python package
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:  
+  build:
+    if: ${{ ! contains(github.event.head_commit.message, 'Bumpversion') }}
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: ["3.9", "3.10", "3.11", "3.12"]
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v4
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install -e .[dev]
+    - name: Lint with flake8
+      run: |
+        flake8 ./sep005_io_ashes ./tests --count --max-line-length=127 --show-source --statistics
+    - name: Test with pytest
+      run: |
+        pytest ./tests
+        pytest --cov=./tests
diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -0,0 +1,40 @@
+name: Publish release
+
+on:
+  push:
+    tags:
+      - 'v*'
+
+permissions: write-all 
+
+jobs:
+    publish:
+      runs-on: ubuntu-latest
+      steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python "3.12"
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.12"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install build
+      - name: Build distribution
+        run: |
+          python -m build
+      - name: Publish distribution
+        uses: pypa/gh-action-pypi-publish@release/v1.8
+        with:
+            user: __token__
+            password: ${{ secrets.PYPI_TOKEN }}
+
+    release:
+      needs: publish
+      runs-on: ubuntu-latest
+      steps:
+        - uses: marvinpinto/action-automatic-releases@latest
+          name: Release
+          with:
+            repo_token: "${{ secrets.GITHUB_TOKEN }}"
+            prerelease: false
diff --git a/README.md b/README.md
@@ -4,13 +4,26 @@ SEP005 <- Ashes io
 Basic package to import data generated by Ashes.txt files compliant with
 SDyPy format for timeseries as proposed in SEP005.
 
+Installation
+------------
+Regular install:
+```
+pip install sep005-io-ashes
+```
+For developers:
+
+After cloning into your folder
+```
+pip install -e .[dev]
+```
+
 Using the package
 ------------------
 
 ```
 from sep005_io_ashes import read_ashes_file
 
-filepaths = {# Define file paths or path}
+filepaths = # Define file paths or path
 signals = read_ashes_file(filepaths)
 
 ```

diff --git a/noxfile.py b/noxfile.py
@@ -0,0 +1,22 @@
+# noxfile.py
+import nox
+
+
+@nox.session(python="3.12")
+def tests(session):
+    session.run("pip", "install", "-e", "./[dev]")
+    session.run("pytest", "./tests")
+    session.run("pytest", "--cov=./tests")
+
+
+@nox.session(python="3.12")
+def lint(session):
+    session.install("flake8")
+    session.run("flake8", "./sep005_io_ashes", "./tests", "--max-line-length=127")
+
+
+@nox.session(python="3.12")
+def format(session):
+    session.install("isort", "black")
+    session.run("isort", "./sep005_io_ashes", "./tests")
+    session.run("black", "./sep005_io_ashes", "./tests")
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,51 @@
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "sep005-io-ashes"
+authors = [
+    {name = "Abdulelah Al-Ghuwaidi", email = "[email protected]"}
+]
+maintainers = [
+    {name = "Abdulelah Al-Ghuwaidi", email = "[email protected]"},
+    {name = "Arsen Melnikov", email = "[email protected]"}
+]
+description = "Transform Ashes .txt files with data into the data compliant with SDyPy SEP005"
+requires-python = ">=3.9"
+keywords = ["io", "Ashes", "SEP005"]
+license = {text = "MIT license"}
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12"
+]
+dynamic = [
+    "dependencies",
+    "version",
+    "readme"
+]
+
+[project.optional-dependencies]
+dev = [
+    "black",
+    "flake8",
+    "isort",
+    "nox",
+    "pytest",
+    "pytest-cov",
+    "pytest-mock",
+    "pytest-xdist"
+]
+
+[project.urls]
+Repository = "https://github.com/OWI-Lab/sep005-io-ashes"
+
+[tool.setuptools.dynamic]
+dependencies = {file = ["requirements.txt"]}
+version = {attr = "sep005_io_ashes.__version__"}
+readme = {file = "README.md", content-type = "text/markdown"}
diff --git a/release.sh b/release.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+version_old=$(git describe --tags --abbrev=0)
+if [ "$version_old" == "v$1" ]; then
+    echo "Error: you did not change the version! Please check the __version__ in your __init__.py"
+    exit 1
+fi
+echo "Creating new release: $version_old -> v$1"
+git add .
+git commit -m "Bumpversion $version_old -> v$1"
+git push
+git tag -a "v$1" -m "Release v$1"
+git push origin "v$1"
diff --git a/sep005_io_ashes/__init__.py b/sep005_io_ashes/__init__.py
@@ -1,3 +1,3 @@
 __version__ = "0.0.1"
 
-from .ashes import read_ashes_file
+from .ashes import read_ashes_file  # noqa: F401
diff --git a/sep005_io_ashes/ashes.py b/sep005_io_ashes/ashes.py
@@ -5,39 +5,48 @@
 @author: AA000139
 """
 
-import pandas as pd
-from datetime import datetime
-import numpy as np
+# import numpy as np
 import os
+from datetime import datetime
+
+import pandas as pd
+
+
+def read_ashes_file(filepaths: dict[str]) -> list[dict]:
+    """Convert an Ashes file to a list of signals in SEP005 format.
+
+    Args:
+        filepaths: Dictionary with paths to the corresponding Ashes files.
+    Returns:
+        List of signals in SEP005 format.
+    """
 
-def read_ashes_file(filepaths: str) -> list[dict]:
-
     # Initialize an empty list to store DataFrames
     dfs = []
     units = []
 
     # Loop through each file and read data into DataFrame with appropriate skiprows
     for key, filepath in filepaths.items():
         if os.path.exists(filepath):
-            if key in ['Sensor Mooring line']:
+            if key in ["Sensor Mooring line"]:
                 # For Mooring file, skip 12 rows and drop 6 rows
                 df = pd.read_table(filepath, skiprows=12, dtype=str)
                 df.drop(range(6), inplace=True)
                 df.reset_index(drop=True, inplace=True)
                 dfs.append(df)
-            elif key in ['Sensor Blade [Time] [Blade 1]']:
+            elif key in ["Sensor Blade [Time] [Blade 1]"]:
                 # For Blade file, skip 12 rows and drop 12 rows
                 df = pd.read_table(filepath, skiprows=12, dtype=str)
                 df.drop(range(6), inplace=True)
                 df.reset_index(drop=True, inplace=True)
                 dfs.append(df)
-            elif key in ['Sensor Node [Node Hub Hub]']:
+            elif key in ["Sensor Node [Node Hub Hub]"]:
                 # For Node file, skip 11 rows and drop 17 rows
                 df = pd.read_table(filepath, skiprows=11, dtype=str)
                 df.drop(range(17), inplace=True)
                 df.reset_index(drop=True, inplace=True)
                 dfs.append(df)
-            elif key in ['Sensor Beam element [Element 1 Tubular tower]']:
+            elif key in ["Sensor Beam element [Element 1 Tubular tower]"]:
                 # For Node file, skip 11 rows and drop 17 rows
                 df = pd.read_table(filepath, skiprows=11, dtype=str)
                 df.drop(range(12), inplace=True)
@@ -51,60 +60,77 @@ def read_ashes_file(filepaths: str) -> list[dict]:
                 dfs.append(df)
             # Rename columns if it's Sensor Rotor or Sensor Beam element to
             # avoid duplicated columns with other sensors
-            if key == 'Sensor Rotor':
-                df.columns = [f"{col}_rotor" if col != 'Time [s]' else col for col in df.columns]
-            elif key == 'Sensor Beam element [Element 1 Tubular tower]':
-                df.columns = [f"{col}_beam" if col != 'Time [s]' else col for col in df.columns]
-
+            if key == "Sensor Rotor":
+                df.columns = [
+                    f"{col}_rotor" if col != "Time [s]" else col for col in df.columns
+                ]
+            elif key == "Sensor Beam element [Element 1 Tubular tower]":
+                df.columns = [
+                    f"{col}_beam" if col != "Time [s]" else col for col in df.columns
+                ]
+
     # Combine all DataFrames in one Dataframe along the columns axis
     combined_df = pd.concat(dfs, axis=1)
 
     # Extract units from column headers
     for column in combined_df.columns:
-        unit = column.split('[')[-1].split(']')[0]
+        unit = column.split("[")[-1].split("]")[0]
         units.append(unit)
     units_df = pd.DataFrame([units], columns=combined_df.columns)
 
     # Extract the first encounter of Time [s] column and set it as the index
-    time_column_index = next((i for i, col in enumerate(combined_df.columns) if col == 'Time [s]'), None)
+    time_column_index = next(
+        (i for i, col in enumerate(combined_df.columns) if col == "Time [s]"), None
+    )
     if time_column_index is not None:
-        combined_df.index = pd.to_datetime(combined_df.iloc[:, time_column_index], unit='s')
-        combined_df = combined_df.drop(columns=[col for col in combined_df.columns if 'Time [s]' in col])
-        units_df = units_df.drop(columns=[col for col in units_df.columns if 'Time [s]' in col])
+        combined_df.index = pd.to_datetime(
+            combined_df.iloc[:, time_column_index], unit="s"
+        )
+        combined_df = combined_df.drop(
+            columns=[col for col in combined_df.columns if "Time [s]" in col]
+        )
+        units_df = units_df.drop(
+            columns=[col for col in units_df.columns if "Time [s]" in col]
+        )
 
     # Define a fictive measurement start datetime for Ashes data
     fictive_measurement_start_ashes = datetime(2022, 1, 1)
-    combined_df.index += (fictive_measurement_start_ashes - combined_df.index[0])
+    combined_df.index += fictive_measurement_start_ashes - combined_df.index[0]
 
     # Initialize an empty list to store the time vector
     start_timestamp = combined_df.index[0]
     time_seconds = []
     # Extract time vector from the datetime index
     for timestamp in combined_df.index:
         time_seconds.append((timestamp - start_timestamp).total_seconds())
-    time = np.array(time_seconds, dtype=float)
+    # time = np.array(time_seconds, dtype=float)
 
-    # Clean dataframe's columns 
-    combined_df.columns = combined_df.columns.str.replace(r'\[.*?\]', '', regex=True).str.strip()
-    units_df.columns = units_df.columns.str.replace(r'\[.*?\]', '', regex=True).str.strip()
+    # Clean dataframe's columns
+    combined_df.columns = combined_df.columns.str.replace(
+        r"\[.*?\]", "", regex=True
+    ).str.strip()
+    units_df.columns = units_df.columns.str.replace(
+        r"\[.*?\]", "", regex=True
+    ).str.strip()
 
     # Write in Sep005 format
-    fs = 1 / (combined_df.index[1] - combined_df.index[0]).total_seconds()  # Sampling frequency in Hz
-    duration = (len(combined_df) / fs)
+    fs = (
+        1 / (combined_df.index[1] - combined_df.index[0]).total_seconds()
+    )  # Sampling frequency in Hz
+    duration = len(combined_df) / fs
     signals = []
-        
+
     for channel in combined_df.columns:
-        combined_df[channel] = pd.to_numeric(combined_df[channel], errors='coerce')
+        combined_df[channel] = pd.to_numeric(combined_df[channel], errors="coerce")
         data = combined_df[channel].to_numpy()
         fs_signal = len(data) / duration
         signal = {
-            'name': channel,
-            'data': data,
-            'start_timestamp': str(start_timestamp),
-            'fs': fs_signal,
-            'unit_str': str(units_df[channel].iloc[0]),
+            "name": channel,
+            "data": data,
+            "start_timestamp": str(start_timestamp),
+            "fs": fs_signal,
+            "unit_str": str(units_df[channel].iloc[0]),
         }
         signals.append(signal)
 
-
     return signals