Skip to content

Commit

Permalink
fix: partially address Alastair's comments
Browse files Browse the repository at this point in the history
  • Loading branch information
mfacchinelli committed Aug 30, 2024
1 parent 1bcb16a commit 55e0838
Show file tree
Hide file tree
Showing 12 changed files with 82 additions and 81 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ on:
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:

# write to checks/pull-request extra permission needed by 5monkeys/cobertura-action to post coverage stats
# write packages needed by docker image step
permissions:
id-token: write
contents: write
Expand Down
13 changes: 1 addition & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ requests = "^2.32.3"
pandas = "^2.2.2"
imap-data-access = "^0.9.0"
cdflib = "^1.3.1"
single-version = "^1.6.0"
psycopg = {extras = ["binary"], version = "^3.2.1"}

[tool.poetry.group.dev.dependencies]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
"""

from datetime import datetime

import sqlalchemy as sa
from alembic import op

Expand All @@ -18,11 +20,23 @@

def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("files", sa.Column("version", sa.Integer(), nullable=False))
op.add_column("files", sa.Column("hash", sa.String(length=64), nullable=False))
op.add_column("files", sa.Column("date", sa.DateTime(), nullable=False))
op.add_column(
"files", sa.Column("software_version", sa.String(length=16), nullable=False)
"files", sa.Column("version", sa.Integer(), nullable=False, default=0)
)
op.add_column(
"files", sa.Column("hash", sa.String(length=64), nullable=False, default="")
)
op.add_column(
"files",
sa.Column(
"date", sa.DateTime(), nullable=False, default=datetime.fromtimestamp(0)
),
)
op.add_column(
"files",
sa.Column(
"software_version", sa.String(length=16), nullable=False, default="0.0.0"
),
)
op.create_unique_constraint(None, "files", ["path"])
# ### end Alembic commands ###
Expand Down
6 changes: 3 additions & 3 deletions src/imap_mag/DB.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sqlalchemy.orm import sessionmaker

from imap_mag import __version__
from imap_mag.outputManager import IMetadataProvider, IOutputManager
from imap_mag.outputManager import IFileMetadataProvider, IOutputManager


class IDatabase(abc.ABC):
Expand Down Expand Up @@ -89,8 +89,8 @@ def __init__(
self.__database = database

def add_file(
self, original_file: Path, metadata_provider: IMetadataProvider
) -> tuple[Path, IMetadataProvider]:
self, original_file: Path, metadata_provider: IFileMetadataProvider
) -> tuple[Path, IFileMetadataProvider]:
(destination_file, metadata_provider) = self.__output_manager.add_file(
original_file, metadata_provider
)
Expand Down
12 changes: 9 additions & 3 deletions src/imap_mag/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
"""The main module for project."""

from pathlib import Path
from importlib.metadata import PackageNotFoundError, version

from single_version import get_version

__version__ = get_version("imap-mag", Path(__file__).parent.parent)
def get_version() -> str:
try:
return version("imap-mag")
except PackageNotFoundError:
print("IMAP MAG CLI Version unknown, not installed via pip.")


__version__ = get_version()
11 changes: 8 additions & 3 deletions src/imap_mag/appUtils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import hashlib
import logging
from pathlib import Path
from typing import Optional
Expand All @@ -8,7 +9,7 @@

from .appConfig import Destination
from .DB import DatabaseOutputManager
from .outputManager import IMetadataProvider, IOutputManager, OutputManager
from .outputManager import IFileMetadataProvider, IOutputManager, OutputManager

IMAP_EPOCH = np.datetime64("2010-01-01T00:00:00", "ns")
J2000_EPOCH = np.datetime64("2000-01-01T11:58:55.816", "ns")
Expand Down Expand Up @@ -56,6 +57,10 @@ def convertToDatetime(string: str) -> np.datetime64:
raise typer.Abort()


def generate_hash(file: Path) -> str:
return hashlib.md5(file.read_bytes()).hexdigest()


def getOutputManager(destination: Destination) -> IOutputManager:
"""Retrieve output manager based on destination."""

Expand All @@ -71,10 +76,10 @@ def copyFileToDestination(
file_path: Path,
destination: Destination,
output_manager: Optional[OutputManager] = None,
) -> tuple[Path, IMetadataProvider]:
) -> tuple[Path, IFileMetadataProvider]:
"""Copy file to destination folder."""

class SimpleMetadataProvider(IMetadataProvider):
class SimpleMetadataProvider(IFileMetadataProvider):
"""Simple metadata provider for compatibility."""

def __init__(self, filename: str) -> None:
Expand Down
4 changes: 3 additions & 1 deletion src/imap_mag/cli/fetchBinary.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class FetchBinaryOptions(typing.TypedDict):
class FetchBinary:
"""Manage WebPODA data."""

__MAG_PREFIX: str = "mag_"

__web_poda: WebPODA
__output_manager: IOutputManager | None

Expand Down Expand Up @@ -68,7 +70,7 @@ def download_binaries(
file,
descriptor=options["packet"]
.lower()
.strip("mag_")
.strip(self.__MAG_PREFIX)
.replace("_", "-"),
date=dates[d],
extension="pkts",
Expand Down
36 changes: 18 additions & 18 deletions src/imap_mag/outputManager.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import abc
import hashlib
import logging
import shutil
import typing
Expand All @@ -8,8 +7,10 @@

import typer

from .appUtils import generate_hash

class IMetadataProvider(abc.ABC):

class IFileMetadataProvider(abc.ABC):
"""Interface for metadata providers."""

version: int = 0
Expand All @@ -23,7 +24,7 @@ def get_file_name(self) -> str:
"""Retireve file name."""


class DefaultMetadataProvider(IMetadataProvider):
class DatastoreScienceFilepathGenerator(IFileMetadataProvider):
"""Metadata for output files."""

prefix: str | None = "imap_mag"
Expand Down Expand Up @@ -66,14 +67,16 @@ class IOutputManager(abc.ABC):

@abc.abstractmethod
def add_file(
self, original_file: Path, metadata_provider: IMetadataProvider
) -> tuple[Path, IMetadataProvider]:
self, original_file: Path, metadata_provider: IFileMetadataProvider
) -> tuple[Path, IFileMetadataProvider]:
"""Add file to output location."""

def add_default_file(
self, original_file: Path, **metadata: typing.Any
) -> tuple[Path, IMetadataProvider]:
return self.add_file(original_file, DefaultMetadataProvider(**metadata))
) -> tuple[Path, IFileMetadataProvider]:
return self.add_file(
original_file, DatastoreScienceFilepathGenerator(**metadata)
)


class OutputManager(IOutputManager):
Expand All @@ -85,8 +88,8 @@ def __init__(self, location: Path) -> None:
self.location = location

def add_file(
self, original_file: Path, metadata_provider: IMetadataProvider
) -> tuple[Path, IMetadataProvider]:
self, original_file: Path, metadata_provider: IFileMetadataProvider
) -> tuple[Path, IFileMetadataProvider]:
"""Add file to output location."""

if not self.location.exists():
Expand All @@ -102,14 +105,11 @@ def add_file(
destination_file.parent.mkdir(parents=True, exist_ok=True)

if destination_file.exists():
if (
hashlib.md5(destination_file.read_bytes()).hexdigest()
== hashlib.md5(original_file.read_bytes()).hexdigest()
):
if generate_hash(destination_file) == generate_hash(original_file):
logging.info(f"File {destination_file} already exists and is the same.")
return (destination_file, metadata_provider)

metadata_provider.version = self.__find_viable_version(
metadata_provider.version = self.__get_next_available_version(
destination_file, metadata_provider
)
destination_file = self.__assemble_full_path(metadata_provider)
Expand All @@ -120,7 +120,7 @@ def add_file(

return (destination_file, metadata_provider)

def __assemble_full_path(self, metadata_provider: IMetadataProvider) -> Path:
def __assemble_full_path(self, metadata_provider: IFileMetadataProvider) -> Path:
"""Assemble full path from metadata."""

return (
Expand All @@ -129,13 +129,13 @@ def __assemble_full_path(self, metadata_provider: IMetadataProvider) -> Path:
/ metadata_provider.get_file_name()
)

def __find_viable_version(
self, destination_file: Path, metadata_provider: IMetadataProvider
def __get_next_available_version(
self, destination_file: Path, metadata_provider: IFileMetadataProvider
) -> int:
"""Find a viable version for a file."""

while destination_file.exists():
logging.info(
logging.debug(
f"File {destination_file} already exists and is different. Increasing version to {metadata_provider.version}."
)
metadata_provider.version += 1
Expand Down
12 changes: 6 additions & 6 deletions tests/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from imap_db.model import File
from imap_mag import __version__
from imap_mag.DB import DatabaseOutputManager, IDatabase
from imap_mag.outputManager import DefaultMetadataProvider, IOutputManager
from imap_mag.outputManager import DatastoreScienceFilepathGenerator, IOutputManager

from .testUtils import create_test_file, enableLogging, tidyDataFolders # noqa: F401

Expand All @@ -37,7 +37,7 @@ def test_database_output_manager_writes_to_database(
original_file = create_test_file(
Path(tempfile.gettempdir()) / "some_file", "some content"
)
metadata_provider = DefaultMetadataProvider(
metadata_provider = DatastoreScienceFilepathGenerator(
version=1, descriptor="hsk-pw", date=datetime(2025, 5, 2), extension="txt"
)

Expand Down Expand Up @@ -72,7 +72,7 @@ def check_inserted_file(file: File):
assert actual_metadata_provider == metadata_provider


def test_database_output_manager_errors_destination_file_not_found(
def test_database_output_manager_errors_when_destination_file_is_not_found(
mock_output_manager: mock.Mock, mock_database: mock.Mock
) -> None:
# Set up.
Expand All @@ -81,7 +81,7 @@ def test_database_output_manager_errors_destination_file_not_found(
original_file = create_test_file(
Path(tempfile.gettempdir()) / "some_file", "some content"
)
metadata_provider = DefaultMetadataProvider(
metadata_provider = DatastoreScienceFilepathGenerator(
version=1, descriptor="hsk-pw", date=datetime(2025, 5, 2), extension="txt"
)

Expand All @@ -107,7 +107,7 @@ def test_database_output_manager_errors_destination_file_different_hash(
original_file = create_test_file(
Path(tempfile.gettempdir()) / "some_file", "some content"
)
metadata_provider = DefaultMetadataProvider(
metadata_provider = DatastoreScienceFilepathGenerator(
version=1, descriptor="hsk-pw", date=datetime(2025, 5, 2), extension="txt"
)

Expand All @@ -131,7 +131,7 @@ def test_database_output_manager_errors_database_error(
original_file = create_test_file(
Path(tempfile.gettempdir()) / "some_file", "some content"
)
metadata_provider = DefaultMetadataProvider(
metadata_provider = DatastoreScienceFilepathGenerator(
version=1, descriptor="hsk-pw", date=datetime(2025, 5, 2), extension="txt"
)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_fetchScience.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def test_fetch_science_no_matching_files(
mock_soc, mock_output_manager, modes=[MAGMode.Normal], sensors=[MAGSensor.OBS]
)

mock_soc.get_filename.side_effect = lambda **_: {}
mock_soc.get_filename.side_effect = lambda **_: {} # return empty dictionary

# Exercise.
actual_downloaded: list[Path] = fetchScience.download_latest_science(
Expand Down
Loading

0 comments on commit 55e0838

Please sign in to comment.