Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement DataSchema-5.1 #42

Merged
merged 29 commits into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@

Currently implemented schemas are:
- `Recipe` validator for **dgpost**, versions `{2.1, 1.0}`
- `DataSchema` validator for **yadg**, versions `{5.0, 4.2, 4.1, 4.0}`
- `DataSchema` validator for **yadg**, versions `{5.1, 5.0, 4.2, 4.1, 4.0}`
- `Payload` validator for **tomato**, at version `{0.2, 0.1}`

6 changes: 5 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ style = pep440-pre
versionfile_source = src/dgbowl_schemas/_version.py
versionfile_build = dgbowl_schemas/_version.py
tag_prefix =
parentdir_prefix = dgbowl_schemas-
parentdir_prefix = dgbowl_schemas-

[flake8]
max-line-length = 88
extend-ignore = E203
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
python_requires=">=3.8",
install_requires=[
"pydantic~=2.0",
"babel >= 2.15",
"pyyaml>=5.0",
"tzlocal",
],
Expand Down
10 changes: 8 additions & 2 deletions src/dgbowl_schemas/yadg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from . import dataschema
from pydantic import ValidationError
from pydantic.v1 import ValidationError as ValidationError_v1
from .dataschema_5_1 import DataSchema as DataSchema_5_1
from .dataschema_5_0 import DataSchema as DataSchema_5_0, Metadata as Metadata_5_0
from .dataschema_4_2 import DataSchema as DataSchema_4_2, Metadata as Metadata_4_2
from .dataschema_4_1 import DataSchema as DataSchema_4_1, Metadata as Metadata_4_1
Expand All @@ -10,6 +11,7 @@
logger = logging.getLogger(__name__)

models = {
"5.1": (DataSchema_5_1, None),
"5.0": (DataSchema_5_0, Metadata_5_0),
"4.2": (DataSchema_4_2, Metadata_4_2),
"4.1": (DataSchema_4_1, Metadata_4_1),
Expand All @@ -23,8 +25,12 @@ def to_dataschema(**kwargs):
for ver, tup in models.items():
Model, Metadata = tup
try:
Metadata(**kwargs["metadata"])
break
if Metadata is None:
schema = Model(**kwargs)
return schema
else:
Metadata(**kwargs["metadata"])
break
except (ValidationError, ValidationError_v1) as e:
errors.append(
f"Could not parse 'kwargs['metadata']' using Metadata v{ver}:"
Expand Down
6 changes: 3 additions & 3 deletions src/dgbowl_schemas/yadg/dataschema/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from ..dataschema_5_0 import (
from ..dataschema_5_1 import (
DataSchema,
Metadata,
StepDefaults,
FileType,
FileTypes,
ExtractorFactory,
)

__all__ = ["DataSchema", "Metadata", "StepDefaults", "FileType", "ExtractorFactory"]
__all__ = ["DataSchema", "StepDefaults", "FileType", "FileTypes", "ExtractorFactory"]
4 changes: 3 additions & 1 deletion src/dgbowl_schemas/yadg/dataschema_4_1/step.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ class Params(BaseModel, extra=Extra.forbid):
class XRDTrace(BaseModel, extra=Extra.forbid):
class Params(BaseModel, extra=Extra.forbid):
filetype: Literal[
"panalytical.xy", "panalytical.csv", "panalytical.xrdml"
"panalytical.xy",
"panalytical.csv",
"panalytical.xrdml",
] = "panalytical.csv"

parser: Literal["xrdtrace"]
Expand Down
1 change: 0 additions & 1 deletion src/dgbowl_schemas/yadg/dataschema_4_2/step.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ class ElectroChem(BaseModel, extra=Extra.forbid):
"""Parser for electrochemistry files."""

class Params(BaseModel, extra=Extra.forbid):

filetype: Literal["eclab.mpt", "eclab.mpr", "tomato.json"] = "eclab.mpr"

transpose: bool = True
Expand Down
44 changes: 44 additions & 0 deletions src/dgbowl_schemas/yadg/dataschema_5_0/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
from pydantic import BaseModel, Field
from typing import Sequence
import logging
from .metadata import Metadata
from .step import Steps
from .stepdefaults import StepDefaults
from .filetype import ( # noqa: F401
ExtractorFactory as ExtractorFactory,
FileType as FileType,
)
from ..dataschema_5_1 import DataSchema as NewDataSchema

logger = logging.getLogger(__name__)


class DataSchema(BaseModel, extra="forbid"):
Expand All @@ -23,3 +27,43 @@ class DataSchema(BaseModel, extra="forbid"):

steps: Sequence[Steps]
"""Input commands for :mod:`yadg`'s parsers, organised as a sequence of steps."""

def update(self):
logger.info("Updating from DataSchema-5.0 to DataSchema-5.1")

nsch = {"version": "5.1", "metadata": {}, "step_defaults": {}, "steps": []}
for k, v in self.metadata.provenance.model_dump(exclude_none=True).items():
if k == "version":
continue
else:
nsch["metadata"][k] = v

nsch["step_defaults"] = self.step_defaults.model_dump(
exclude_none=True, exclude_defaults=True
)

for step in self.steps:
nstep = {
"tag": step.tag,
"input": step.input.model_dump(exclude_none=True),
}
extractor = step.extractor.model_dump(exclude_none=True)
if step.parser == "dummy":
if step.extractor.filetype == "tomato.json":
extractor["filetype"] = "tomato.json"
else:
extractor["filetype"] = "example"
elif step.parser == "basiccsv":
extractor["filetype"] = "basic.csv"
elif step.parser == "meascsv":
extractor["filetype"] = "fhimcpt.csv"
if extractor["filetype"] == "labview.csv":
extractor["filetype"] = "fhimcpt.vna"
if step.parameters is not None:
extractor["parameters"] = step.parameters.model_dump(exclude_none=True)
if step.externaldate is not None:
nstep["externaldate"] = step.externaldate.model_dump(exclude_none=True)
nstep["extractor"] = extractor
nsch["steps"].append(nstep)

return NewDataSchema(**nsch)
28 changes: 28 additions & 0 deletions src/dgbowl_schemas/yadg/dataschema_5_1/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from pydantic import BaseModel, Field
from typing import Sequence, Optional, Mapping, Any, Literal
from .step import Step
from .stepdefaults import StepDefaults
from .filetype import ( # noqa: F401
ExtractorFactory as ExtractorFactory,
FileType as FileType,
FileTypes as FileTypes,
)


class DataSchema(BaseModel, extra="forbid"):
"""
A :class:`pydantic.BaseModel` implementing ``DataSchema-5.1`` model
introduced in ``yadg-5.1``.
"""

version: Literal["5.1"]

metadata: Optional[Mapping[str, Any]]
"""Input metadata for :mod:`yadg`."""

step_defaults: StepDefaults = Field(StepDefaults())
"""Default values for configuration of each :class:`Step`."""

steps: Sequence[Step]
"""Input commands for :mod:`yadg`'s extractors, organised as a :class:`Sequence`
of :class:`Steps`."""
59 changes: 59 additions & 0 deletions src/dgbowl_schemas/yadg/dataschema_5_1/externaldate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from pydantic import BaseModel
from typing import Literal, Optional, Union


class ExternalDateFile(BaseModel, extra="forbid"):
"""Read external date information from file."""

class Content(BaseModel, extra="forbid"):
path: str
"""Path to the external date information file."""

type: str
"""Type of the external date information file."""

match: Optional[str] = None
"""String to be matched within the file."""

file: Content


class ExternalDateFilename(BaseModel, extra="forbid"):
"""Read external date information from the file name."""

class Content(BaseModel, extra="forbid"):
format: str
"""``strptime``-like format string for processing the date."""

len: int
"""Number of characters from the start of the filename to parse."""

filename: Content


class ExternalDateISOString(BaseModel, extra="forbid"):
"""Read a constant external date using an ISO-formatted string."""

isostring: str


class ExternalDateUTSOffset(BaseModel, extra="forbid"):
"""Read a constant external date using a Unix timestamp offset."""

utsoffset: float


class ExternalDate(BaseModel, extra="forbid"):
"""Supply timestamping information that are external to the processed file."""

using: Union[
ExternalDateFile,
ExternalDateFilename,
ExternalDateISOString,
ExternalDateUTSOffset,
]
"""Specification of the external date format."""

mode: Literal["add", "replace"] = "add"
"""Whether the external timestamps should be added to or should replace the
parsed data."""
Loading
Loading