Skip to content

Commit

Permalink
0.1 release
Browse files Browse the repository at this point in the history
  • Loading branch information
cmutel committed Oct 7, 2024
1 parent b64bd79 commit d372618
Show file tree
Hide file tree
Showing 11 changed files with 3,441 additions and 17 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ jobs:
ubuntu-22.04,
windows-2019,
macos-13]
py-version: ["3.9", "3.10", "3.11", "3.12"]
py-version: ["3.11", "3.12"]

steps:
- uses: actions/checkout@v4
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,11 @@ classifiers = [
]
requires-python = ">=3.11"
dependencies = [
"openpyxl",
"pandas",
"peewee",
"platformdirs",
"pydantic",
"rdflib",
"SPARQLWrapper",
"structlog",
Expand Down
17 changes: 16 additions & 1 deletion sentier_data_tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,26 @@

__all__ = (
"__version__",
"Datapackage",
"ProductIRI",
"Record",
"UnitIRI",
"example_data_dir",
"reset_local_database",
)

__version__ = "0.0.1"
__version__ = "0.1"

from pathlib import Path

from sentier_data_tools.iri import ProductIRI, UnitIRI
from sentier_data_tools.local_data_store import (
Datapackage,
Record,
db,
initialize_local_database,
reset_local_database,
)

initialize_local_database(db)
example_data_dir = Path(__file__).parent.resolve() / "example_data"
76 changes: 76 additions & 0 deletions sentier_data_tools/datapackage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import json
import os
from datetime import datetime, timezone
from pathlib import Path
from typing import List, Optional

import pandas as pd

from sentier_data_tools.datapackage_validation import Contributors, DatapackageMetadata
from sentier_data_tools.licenses import LICENSES


class Datapackage:
def __init__(
self,
*,
name: str,
description: str,
contributors: list,
homepage: Optional[str] = None,
created: Optional[datetime] = None,
version: str = "1",
licenses: Optional[list] = None,
):
self.name = name
self.description = description
self.contributors = contributors
self.homepage = homepage
self.created = created or datetime.now(timezone.utc).isoformat()
self.licenses = licenses or [LICENSES["CC-BY-4.0"]]
self.version = version
self.resources = {}

for contributor in contributors:
Contributors(**contributor)
DatapackageMetadata(
name=self.name,
description=self.description,
homepage=self.homepage,
created=self.created,
version=self.version,
licenses=self.licenses,
)

def metadata(self) -> dict:
data = {
"name": self.name,
"description": self.description,
"contributors": self.contributors,
"created": self.created.isoformat()
if isinstance(self.created, datetime)
else self.created,
"version": self.version,
"licenses": self.licenses,
}
if self.homepage:
data["homepage"] = self.homepage
return data

def add_resource(
self, dataframe: pd.DataFrame, units: List[str], logs: Optional[list] = None
) -> None:
pass

def to_json(self, filepath: Path) -> Path:
if not isinstance(filepath, Path):
filepath = Path(filepath)
if filepath.suffix.lower() != ".json":
filepath = filepath.parent / f"{filepath.name}.json"
if not os.access(filepath.parent, os.W_OK):
raise OSError(f"Can't write to directory {filepath.parent}")

with open(filepath, "w", encoding="utf-8") as f:
json.dump(self.metadata() | self.data(), f, indent=2, ensure_ascii=False)

return filepath
23 changes: 23 additions & 0 deletions sentier_data_tools/datapackage_validation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from datetime import datetime
from typing import Dict, List, Optional

from pydantic import BaseModel


class Contributors(BaseModel):
title: str
role: str
path: str


class DatapackageMetadata(BaseModel):
"""Validate given Datapackage metadata.
Pydantic gives us nice error messages for free."""

name: str
description: str
homepage: Optional[str] = None
created: Optional[datetime] = datetime.now()
version: str
licenses: List[Dict[str, str]]
Binary file not shown.
36 changes: 22 additions & 14 deletions sentier_data_tools/iri.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Optional, Union
from rdflib import URIRef, Literal, Graph

from rdflib import Graph, Literal, URIRef
from SPARQLWrapper import JSON, SPARQLWrapper

from sentier_data_tools.logs import stdout_feedback_logger as logger
Expand All @@ -9,10 +9,12 @@


def convert_json_object(obj: dict) -> Union[URIRef, Literal]:
if obj['type'] == 'literal':
return Literal(obj['value'], lang=obj.get("xml:lang"), datatype=obj.get('datatype'))
if obj["type"] == "literal":
return Literal(
obj["value"], lang=obj.get("xml:lang"), datatype=obj.get("datatype")
)
else:
return URIRef(obj['value'])
return URIRef(obj["value"])


class VocabIRI(URIRef):
Expand Down Expand Up @@ -46,17 +48,23 @@ def triples(self, subject: bool = True, limit: Optional[int] = 25) -> List[tuple
logger.info(f"Retrieved {len(results)} triples from {VOCAB_FUSEKI}")

if subject:
return [(
URIRef(str(self)),
convert_json_object(line['p']),
convert_json_object(line['o']),
) for line in results]
return [
(
URIRef(str(self)),
convert_json_object(line["p"]),
convert_json_object(line["o"]),
)
for line in results
]
else:
return [(
convert_json_object(line['s']),
convert_json_object(line['p']),
URIRef(str(self)),
) for line in results]
return [
(
convert_json_object(line["s"]),
convert_json_object(line["p"]),
URIRef(str(self)),
)
for line in results
]

def graph(self, subject: bool = True) -> Graph:
"""Return an `rdflib` graph of the data from the sentier.dev vocabulary for this IRI"""
Expand Down
Loading

0 comments on commit d372618

Please sign in to comment.