Skip to content

Commit

Permalink
add export endpoint to datacontract api (datacontract#221)
Browse files Browse the repository at this point in the history
* add export endpoint to datacontract api

* ran ruff format

---------

Co-authored-by: jochenchrist <[email protected]>
  • Loading branch information
torbenkeller and jochenchrist authored May 29, 2024
1 parent e727468 commit cf41b9e
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 35 deletions.
24 changes: 1 addition & 23 deletions datacontract/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from typing_extensions import Annotated

from datacontract.catalog.catalog import create_index_html, create_data_contract_html
from datacontract.data_contract import DataContract
from datacontract.data_contract import DataContract, ExportFormat
from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException
from datacontract.publish.publish import publish_to_datamesh_manager

Expand Down Expand Up @@ -141,28 +141,6 @@ def test(
_handle_result(run)


class ExportFormat(str, Enum):
jsonschema = "jsonschema"
pydantic_model = "pydantic-model"
sodacl = "sodacl"
dbt = "dbt"
dbt_sources = "dbt-sources"
dbt_staging_sql = "dbt-staging-sql"
odcs = "odcs"
rdf = "rdf"
avro = "avro"
protobuf = "protobuf"
great_expectations = "great-expectations"
terraform = "terraform"
avro_idl = "avro-idl"
sql = "sql"
sql_query = "sql-query"
html = "html"
go = "go"
bigquery = "bigquery"
dbml = "dbml"


@app.command()
def export(
format: Annotated[ExportFormat, typer.Option(help="The export format.")],
Expand Down
27 changes: 26 additions & 1 deletion datacontract/data_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
import tempfile
import typing
from enum import Enum

import yaml
from pyspark.sql import SparkSession
Expand Down Expand Up @@ -49,6 +50,28 @@
from datacontract.model.run import Run, Check


class ExportFormat(str, Enum):
jsonschema = "jsonschema"
pydantic_model = "pydantic-model"
sodacl = "sodacl"
dbt = "dbt"
dbt_sources = "dbt-sources"
dbt_staging_sql = "dbt-staging-sql"
odcs = "odcs"
rdf = "rdf"
avro = "avro"
protobuf = "protobuf"
great_expectations = "great-expectations"
terraform = "terraform"
avro_idl = "avro-idl"
sql = "sql"
sql_query = "sql-query"
html = "html"
go = "go"
bigquery = "bigquery"
dbml = "dbml"


class DataContract:
def __init__(
self,
Expand Down Expand Up @@ -275,7 +298,9 @@ def get_data_contract_specification(self) -> DataContractSpecification:
inline_quality=self._inline_quality,
)

def export(self, export_format, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto") -> str:
def export(
self, export_format: ExportFormat, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto"
) -> str:
data_contract = resolve.resolve_data_contract(
self._data_contract_file,
self._data_contract_str,
Expand Down
41 changes: 39 additions & 2 deletions datacontract/web.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Annotated, Union
from typing import Annotated, Union, Optional

import typer
from fastapi import FastAPI, File

from datacontract.data_contract import DataContract
from datacontract.data_contract import DataContract, ExportFormat
from fastapi.responses import PlainTextResponse

app = FastAPI()

Expand All @@ -12,3 +14,38 @@ def lint(file: Annotated[bytes, File()], linters: Union[str, set[str]] = "all"):
data_contract = DataContract(data_contract_str=str(file, encoding="utf-8"))
lint_result = data_contract.lint(enabled_linters=linters)
return {"result": lint_result.result, "checks": lint_result.checks}


@app.post("/export", response_class=PlainTextResponse)
def export(
file: Annotated[bytes, File()],
export_format: Annotated[ExportFormat, typer.Option(help="The export format.")],
server: Annotated[str, typer.Option(help="The server name to export.")] = None,
model: Annotated[
str,
typer.Option(
help="Use the key of the model in the data contract yaml file "
"to refer to a model, e.g., `orders`, or `all` for all "
"models (default)."
),
] = "all",
rdf_base: Annotated[
Optional[str],
typer.Option(help="[rdf] The base URI used to generate the RDF graph.", rich_help_panel="RDF Options"),
] = None,
sql_server_type: Annotated[
Optional[str],
typer.Option(
help="[sql] The server type to determine the sql dialect. By default, it uses 'auto' to automatically detect the sql dialect via the specified servers in the data contract.",
rich_help_panel="SQL Options",
),
] = "auto",
):
result = DataContract(data_contract_str=str(file, encoding="utf-8"), server=server).export(
export_format=export_format,
model=model,
rdf_base=rdf_base,
sql_server_type=sql_server_type,
)

return result
15 changes: 9 additions & 6 deletions tests/fixtures/local-json/datacontract.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"description": "Model representing the Consumer Price Index for Germany",
"type": "object",
"properties": {
"Statistik_Code": {
Expand Down Expand Up @@ -69,7 +68,8 @@
"description": "Consumer price index value"
},
"PREIS1__Verbraucherpreisindex__q": {
"type": "string"
"type": "string",
"description": "e\nendg\u00fcltiger Wert\n0\nweniger als die H\u00e4lfte von 1 in der letzten besetzten Stelle, jedoch mehr als nichts\n-\nnichts vorhanden\n...\nAngabe f\u00e4llt sp\u00e4ter an\n/\nkeine Angaben, da Zahlenwert nicht sicher genug\n.\nZahlenwert unbekannt oder geheimzuhalten\nx\nTabellenfach gesperrt, weil Aussage nicht sinnvoll\n()\nAussagewert eingeschr\u00e4nkt, da der Zahlenwert statistisch relativ unsicher ist\np\nvorl\u00e4ufige Zahl\nr\nberichtigte Zahl\ns\ngesch\u00e4tzte Zahl\n"
},
"Verbraucherpreisindex__CH0004": {
"type": [
Expand All @@ -79,7 +79,8 @@
"description": "Ver\u00e4nderung zum Vorjahresmonat"
},
"Verbraucherpreisindex__CH0004__q": {
"type": "string"
"type": "string",
"description": "e\nendg\u00fcltiger Wert\n0\nweniger als die H\u00e4lfte von 1 in der letzten besetzten Stelle, jedoch mehr als nichts\n-\nnichts vorhanden\n...\nAngabe f\u00e4llt sp\u00e4ter an\n/\nkeine Angaben, da Zahlenwert nicht sicher genug\n.\nZahlenwert unbekannt oder geheimzuhalten\nx\nTabellenfach gesperrt, weil Aussage nicht sinnvoll\n()\nAussagewert eingeschr\u00e4nkt, da der Zahlenwert statistisch relativ unsicher ist\np\nvorl\u00e4ufige Zahl\nr\nberichtigte Zahl\ns\ngesch\u00e4tzte Zahl\n"
},
"PREIS1__CH0005": {
"type": [
Expand All @@ -89,7 +90,8 @@
"description": "Ver\u00e4nderung zum Vormonat"
},
"PREIS1__CH0005__q": {
"type": "string"
"type": "string",
"description": "e\nendg\u00fcltiger Wert\n0\nweniger als die H\u00e4lfte von 1 in der letzten besetzten Stelle, jedoch mehr als nichts\n-\nnichts vorhanden\n...\nAngabe f\u00e4llt sp\u00e4ter an\n/\nkeine Angaben, da Zahlenwert nicht sicher genug\n.\nZahlenwert unbekannt oder geheimzuhalten\nx\nTabellenfach gesperrt, weil Aussage nicht sinnvoll\n()\nAussagewert eingeschr\u00e4nkt, da der Zahlenwert statistisch relativ unsicher ist\np\nvorl\u00e4ufige Zahl\nr\nberichtigte Zahl\ns\ngesch\u00e4tzte Zahl\n"
}
},
"required": [
Expand All @@ -110,5 +112,6 @@
"PREIS1__Verbraucherpreisindex__q",
"Verbraucherpreisindex__CH0004__q",
"PREIS1__CH0005__q"
]
}
],
"description": "Model representing the Consumer Price Index for Germany"
}
8 changes: 5 additions & 3 deletions tests/test_export_jsonschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typer.testing import CliRunner

from datacontract.cli import app
from datacontract.data_contract import DataContract
from datacontract.export.jsonschema_converter import to_jsonschemas
from datacontract.model.data_contract_specification import DataContractSpecification

Expand All @@ -19,9 +20,10 @@ def test_cli():


def test_to_jsonschemas():
data_contract_file = "fixtures/local-json/datacontract.yaml"
file_content = read_file(data_contract_file=data_contract_file)
data_contract = DataContractSpecification.from_string(file_content)
data_contract = DataContract(
data_contract_file="fixtures/local-json/datacontract.yaml", inline_definitions=True
).get_data_contract_specification()

with open("fixtures/local-json/datacontract.json") as file:
expected_json_schema = file.read()

Expand Down
17 changes: 17 additions & 0 deletions tests/test_web.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,20 @@ def test_lint():
assert response.json()["result"] == "passed"
assert len(response.json()["checks"]) == 8
assert all([check["result"] == "passed" for check in response.json()["checks"]])


def test_export_jsonschema():
with open("fixtures/local-json/datacontract.yaml", "rb") as f:
response = client.post(
url="/export",
files={"file": ("datacontract.yaml", f, "application/yaml")},
params={"export_format": "jsonschema"},
)
assert response.status_code == 200
print(response.text)

with open("fixtures/local-json/datacontract.json") as file:
expected_json_schema = file.read()
print("")
print(expected_json_schema)
assert response.text == expected_json_schema

0 comments on commit cf41b9e

Please sign in to comment.