From cf41b9efa44580dc035970d322e2dc6d78db4bc0 Mon Sep 17 00:00:00 2001 From: Torben Keller <33001558+torbenkeller@users.noreply.github.com> Date: Wed, 29 May 2024 19:12:06 +0200 Subject: [PATCH] add export endpoint to datacontract api (#221) * add export endpoint to datacontract api * ran ruff format --------- Co-authored-by: jochenchrist --- datacontract/cli.py | 24 +----------- datacontract/data_contract.py | 27 +++++++++++++- datacontract/web.py | 41 ++++++++++++++++++++- tests/fixtures/local-json/datacontract.json | 15 +++++--- tests/test_export_jsonschema.py | 8 ++-- tests/test_web.py | 17 +++++++++ 6 files changed, 97 insertions(+), 35 deletions(-) diff --git a/datacontract/cli.py b/datacontract/cli.py index 45524cdc..c1a6cb4e 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -13,7 +13,7 @@ from typing_extensions import Annotated from datacontract.catalog.catalog import create_index_html, create_data_contract_html -from datacontract.data_contract import DataContract +from datacontract.data_contract import DataContract, ExportFormat from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException from datacontract.publish.publish import publish_to_datamesh_manager @@ -141,28 +141,6 @@ def test( _handle_result(run) -class ExportFormat(str, Enum): - jsonschema = "jsonschema" - pydantic_model = "pydantic-model" - sodacl = "sodacl" - dbt = "dbt" - dbt_sources = "dbt-sources" - dbt_staging_sql = "dbt-staging-sql" - odcs = "odcs" - rdf = "rdf" - avro = "avro" - protobuf = "protobuf" - great_expectations = "great-expectations" - terraform = "terraform" - avro_idl = "avro-idl" - sql = "sql" - sql_query = "sql-query" - html = "html" - go = "go" - bigquery = "bigquery" - dbml = "dbml" - - @app.command() def export( format: Annotated[ExportFormat, typer.Option(help="The export format.")], diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index 3e0c33a7..2a019384 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -2,6 +2,7 @@ import logging import tempfile import typing +from enum import Enum import yaml from pyspark.sql import SparkSession @@ -49,6 +50,28 @@ from datacontract.model.run import Run, Check +class ExportFormat(str, Enum): + jsonschema = "jsonschema" + pydantic_model = "pydantic-model" + sodacl = "sodacl" + dbt = "dbt" + dbt_sources = "dbt-sources" + dbt_staging_sql = "dbt-staging-sql" + odcs = "odcs" + rdf = "rdf" + avro = "avro" + protobuf = "protobuf" + great_expectations = "great-expectations" + terraform = "terraform" + avro_idl = "avro-idl" + sql = "sql" + sql_query = "sql-query" + html = "html" + go = "go" + bigquery = "bigquery" + dbml = "dbml" + + class DataContract: def __init__( self, @@ -275,7 +298,9 @@ def get_data_contract_specification(self) -> DataContractSpecification: inline_quality=self._inline_quality, ) - def export(self, export_format, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto") -> str: + def export( + self, export_format: ExportFormat, model: str = "all", rdf_base: str = None, sql_server_type: str = "auto" + ) -> str: data_contract = resolve.resolve_data_contract( self._data_contract_file, self._data_contract_str, diff --git a/datacontract/web.py b/datacontract/web.py index 83e37efb..452def39 100644 --- a/datacontract/web.py +++ b/datacontract/web.py @@ -1,8 +1,10 @@ -from typing import Annotated, Union +from typing import Annotated, Union, Optional +import typer from fastapi import FastAPI, File -from datacontract.data_contract import DataContract +from datacontract.data_contract import DataContract, ExportFormat +from fastapi.responses import PlainTextResponse app = FastAPI() @@ -12,3 +14,38 @@ def lint(file: Annotated[bytes, File()], linters: Union[str, set[str]] = "all"): data_contract = DataContract(data_contract_str=str(file, encoding="utf-8")) lint_result = data_contract.lint(enabled_linters=linters) return {"result": lint_result.result, "checks": lint_result.checks} + + +@app.post("/export", response_class=PlainTextResponse) +def export( + file: Annotated[bytes, File()], + export_format: Annotated[ExportFormat, typer.Option(help="The export format.")], + server: Annotated[str, typer.Option(help="The server name to export.")] = None, + model: Annotated[ + str, + typer.Option( + help="Use the key of the model in the data contract yaml file " + "to refer to a model, e.g., `orders`, or `all` for all " + "models (default)." + ), + ] = "all", + rdf_base: Annotated[ + Optional[str], + typer.Option(help="[rdf] The base URI used to generate the RDF graph.", rich_help_panel="RDF Options"), + ] = None, + sql_server_type: Annotated[ + Optional[str], + typer.Option( + help="[sql] The server type to determine the sql dialect. By default, it uses 'auto' to automatically detect the sql dialect via the specified servers in the data contract.", + rich_help_panel="SQL Options", + ), + ] = "auto", +): + result = DataContract(data_contract_str=str(file, encoding="utf-8"), server=server).export( + export_format=export_format, + model=model, + rdf_base=rdf_base, + sql_server_type=sql_server_type, + ) + + return result diff --git a/tests/fixtures/local-json/datacontract.json b/tests/fixtures/local-json/datacontract.json index f78f00ba..2e2921c0 100644 --- a/tests/fixtures/local-json/datacontract.json +++ b/tests/fixtures/local-json/datacontract.json @@ -1,6 +1,5 @@ { "$schema": "http://json-schema.org/draft-07/schema#", - "description": "Model representing the Consumer Price Index for Germany", "type": "object", "properties": { "Statistik_Code": { @@ -69,7 +68,8 @@ "description": "Consumer price index value" }, "PREIS1__Verbraucherpreisindex__q": { - "type": "string" + "type": "string", + "description": "e\nendg\u00fcltiger Wert\n0\nweniger als die H\u00e4lfte von 1 in der letzten besetzten Stelle, jedoch mehr als nichts\n-\nnichts vorhanden\n...\nAngabe f\u00e4llt sp\u00e4ter an\n/\nkeine Angaben, da Zahlenwert nicht sicher genug\n.\nZahlenwert unbekannt oder geheimzuhalten\nx\nTabellenfach gesperrt, weil Aussage nicht sinnvoll\n()\nAussagewert eingeschr\u00e4nkt, da der Zahlenwert statistisch relativ unsicher ist\np\nvorl\u00e4ufige Zahl\nr\nberichtigte Zahl\ns\ngesch\u00e4tzte Zahl\n" }, "Verbraucherpreisindex__CH0004": { "type": [ @@ -79,7 +79,8 @@ "description": "Ver\u00e4nderung zum Vorjahresmonat" }, "Verbraucherpreisindex__CH0004__q": { - "type": "string" + "type": "string", + "description": "e\nendg\u00fcltiger Wert\n0\nweniger als die H\u00e4lfte von 1 in der letzten besetzten Stelle, jedoch mehr als nichts\n-\nnichts vorhanden\n...\nAngabe f\u00e4llt sp\u00e4ter an\n/\nkeine Angaben, da Zahlenwert nicht sicher genug\n.\nZahlenwert unbekannt oder geheimzuhalten\nx\nTabellenfach gesperrt, weil Aussage nicht sinnvoll\n()\nAussagewert eingeschr\u00e4nkt, da der Zahlenwert statistisch relativ unsicher ist\np\nvorl\u00e4ufige Zahl\nr\nberichtigte Zahl\ns\ngesch\u00e4tzte Zahl\n" }, "PREIS1__CH0005": { "type": [ @@ -89,7 +90,8 @@ "description": "Ver\u00e4nderung zum Vormonat" }, "PREIS1__CH0005__q": { - "type": "string" + "type": "string", + "description": "e\nendg\u00fcltiger Wert\n0\nweniger als die H\u00e4lfte von 1 in der letzten besetzten Stelle, jedoch mehr als nichts\n-\nnichts vorhanden\n...\nAngabe f\u00e4llt sp\u00e4ter an\n/\nkeine Angaben, da Zahlenwert nicht sicher genug\n.\nZahlenwert unbekannt oder geheimzuhalten\nx\nTabellenfach gesperrt, weil Aussage nicht sinnvoll\n()\nAussagewert eingeschr\u00e4nkt, da der Zahlenwert statistisch relativ unsicher ist\np\nvorl\u00e4ufige Zahl\nr\nberichtigte Zahl\ns\ngesch\u00e4tzte Zahl\n" } }, "required": [ @@ -110,5 +112,6 @@ "PREIS1__Verbraucherpreisindex__q", "Verbraucherpreisindex__CH0004__q", "PREIS1__CH0005__q" - ] -} + ], + "description": "Model representing the Consumer Price Index for Germany" +} \ No newline at end of file diff --git a/tests/test_export_jsonschema.py b/tests/test_export_jsonschema.py index 604beeaa..94d6d4a6 100644 --- a/tests/test_export_jsonschema.py +++ b/tests/test_export_jsonschema.py @@ -6,6 +6,7 @@ from typer.testing import CliRunner from datacontract.cli import app +from datacontract.data_contract import DataContract from datacontract.export.jsonschema_converter import to_jsonschemas from datacontract.model.data_contract_specification import DataContractSpecification @@ -19,9 +20,10 @@ def test_cli(): def test_to_jsonschemas(): - data_contract_file = "fixtures/local-json/datacontract.yaml" - file_content = read_file(data_contract_file=data_contract_file) - data_contract = DataContractSpecification.from_string(file_content) + data_contract = DataContract( + data_contract_file="fixtures/local-json/datacontract.yaml", inline_definitions=True + ).get_data_contract_specification() + with open("fixtures/local-json/datacontract.json") as file: expected_json_schema = file.read() diff --git a/tests/test_web.py b/tests/test_web.py index 89d4c2d7..d0736cc4 100644 --- a/tests/test_web.py +++ b/tests/test_web.py @@ -15,3 +15,20 @@ def test_lint(): assert response.json()["result"] == "passed" assert len(response.json()["checks"]) == 8 assert all([check["result"] == "passed" for check in response.json()["checks"]]) + + +def test_export_jsonschema(): + with open("fixtures/local-json/datacontract.yaml", "rb") as f: + response = client.post( + url="/export", + files={"file": ("datacontract.yaml", f, "application/yaml")}, + params={"export_format": "jsonschema"}, + ) + assert response.status_code == 200 + print(response.text) + + with open("fixtures/local-json/datacontract.json") as file: + expected_json_schema = file.read() + print("") + print(expected_json_schema) + assert response.text == expected_json_schema