diff --git a/CHANGELOG.md b/CHANGELOG.md index debd964e..84d06556 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added + +- `datacontract export --format go`: Export to go types format - datacontract catalog: Search form - `datacontract import --format bigquery`: Import from BigQuery format (#110) - `datacontract export --format bigquery`: Export to BigQuery format (#111) diff --git a/README.md b/README.md index e2a0f10d..c9414b4e 100644 --- a/README.md +++ b/README.md @@ -563,7 +563,7 @@ models: ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ │ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|dbt-staging The export format. [default: None] [required] │ │ -sql|odcs|rdf|avro|protobuf|great-expectations|terraform|avro │ -│ -idl|sql|sql-query|html|bigquery] │ +│ -idl|sql|sql-query|html|bigquery|go] │ │ --output PATH Specify the file path where the exported data will be saved. │ │ If no path is provided, the output will be printed to stdout. │ │ [default: None] │ @@ -609,7 +609,8 @@ Available export options: | `sql-query` | Export to SQL Query | ✅ | | `great-expectations` | Export to Great Expectations Suites in JSON Format | ✅ | | `bigquery` | Export to BigQuery Schemas | ✅ | -| `pydantic` | Export to pydantic models | TBD | +| `go` | Export to Go types | ✅ | +| `pydantic-model` | Export to pydantic models | ✅ | | Missing something? | Please create an issue on GitHub | TBD | #### Great Expectations diff --git a/datacontract/cli.py b/datacontract/cli.py index e9971e1d..d2625385 100644 --- a/datacontract/cli.py +++ b/datacontract/cli.py @@ -15,8 +15,10 @@ from datacontract.catalog.catalog import create_index_html, create_data_contract_html from datacontract.data_contract import DataContract from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException + from datacontract.publish.publish import publish_to_datamesh_manager + console = Console() @@ -158,6 +160,7 @@ class ExportFormat(str, Enum): sql = "sql" sql_query = "sql-query" html = "html" + go = "go" bigquery = "bigquery" diff --git a/datacontract/data_contract.py b/datacontract/data_contract.py index 0664332c..319cd275 100644 --- a/datacontract/data_contract.py +++ b/datacontract/data_contract.py @@ -6,13 +6,11 @@ import yaml from pyspark.sql import SparkSession -from datacontract.breaking.breaking import models_breaking_changes, \ - quality_breaking_changes +from datacontract.breaking.breaking import models_breaking_changes, quality_breaking_changes from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import ( check_that_datacontract_contains_valid_server_configuration, ) -from datacontract.engines.fastjsonschema.check_jsonschema import \ - check_jsonschema +from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema from datacontract.engines.soda.check_soda_execute import check_soda_execute from datacontract.export.avro_converter import to_avro_schema_json from datacontract.export.avro_idl_converter import to_avro_idl @@ -26,6 +24,7 @@ from datacontract.export.odcs_converter import to_odcs_yaml from datacontract.export.protobuf_converter import to_protobuf from datacontract.export.pydantic_converter import to_pydantic_model_str +from datacontract.export.go_converter import to_go_types from datacontract.export.rdf_converter import to_rdf_n3 from datacontract.export.sodacl_converter import to_sodacl_yaml from datacontract.export.sql_converter import to_sql_ddl, to_sql_query @@ -34,24 +33,18 @@ from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json from datacontract.imports.glue_importer import import_glue from datacontract.imports.sql_importer import import_sql -from datacontract.integration.publish_datamesh_manager import \ - publish_datamesh_manager +from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager from datacontract.integration.publish_opentelemetry import publish_opentelemetry from datacontract.lint import resolve from datacontract.lint.linters.description_linter import DescriptionLinter from datacontract.lint.linters.example_model_linter import ExampleModelLinter from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter -from datacontract.lint.linters.field_reference_linter import \ - FieldReferenceLinter +from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter -from datacontract.lint.linters.quality_schema_linter import \ - QualityUsesSchemaLinter -from datacontract.lint.linters.valid_constraints_linter import \ - ValidFieldConstraintsLinter -from datacontract.model.breaking_change import BreakingChanges, BreakingChange, \ - Severity -from datacontract.model.data_contract_specification import \ - DataContractSpecification, Server +from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter +from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter +from datacontract.model.breaking_change import BreakingChanges, BreakingChange, Severity +from datacontract.model.data_contract_specification import DataContractSpecification, Server from datacontract.model.exceptions import DataContractException from datacontract.model.run import Run, Check @@ -329,6 +322,8 @@ def export(self, export_format, model: str = "all", rdf_base: str = None, sql_se return to_pydantic_model_str(data_contract) if export_format == "html": return to_html(data_contract) + if export_format == "go": + return to_go_types(data_contract) if export_format == "bigquery": model_name, model_value = self._check_models_for_export(data_contract, model, export_format) found_server = data_contract.servers.get(self._server) diff --git a/datacontract/export/go_converter.py b/datacontract/export/go_converter.py new file mode 100644 index 00000000..37b62e73 --- /dev/null +++ b/datacontract/export/go_converter.py @@ -0,0 +1,98 @@ +import datacontract.model.data_contract_specification as spec +from typing import List +import re + + +def to_go_types(contract: spec.DataContractSpecification) -> str: + result = "package main\n\n" + + for key in contract.models.keys(): + go_types = generate_go_type(contract.models[key], key) + for go_type in go_types: + # print(go_type + "\n\n") + result += f"\n{go_type}\n" + + return result + + +def python_type_to_go_type(py_type) -> str: + match py_type: + case "text": + return "string" + case "timestamp": + return "time.Time" + case "long": + return "int64" + case "int": + return "int" + case "float": + return "float64" + case "boolean": + return "bool" + case _: + return "interface{}" + + +def to_camel_case(snake_str) -> str: + return "".join(word.capitalize() for word in re.split(r"_|(? str: + go_type = "interface{}" + if field_info.fields: + nested_type_name = to_camel_case(f"{type_name}_{camel_case_name}") + nested_types[nested_type_name] = field_info.fields + go_type = nested_type_name + + match field_info.type: + case "array": + if field_info.items: + item_type = get_subtype(field_info.items, nested_types, type_name, camel_case_name + "Item") + go_type = f"[]{item_type}" + else: + go_type = "[]interface{}" + case "record": + if field_info.fields: + nested_type_name = to_camel_case(f"{type_name}_{camel_case_name}") + nested_types[nested_type_name] = field_info.fields + go_type = nested_type_name + else: + go_type = "interface{}" + case "object": + pass + case _: + go_type = field_info.type + + return go_type + + +def generate_go_type(model, model_name) -> List[str]: + go_types = [] + type_name = to_camel_case(model_name) + lines = [f"type {type_name} struct {{"] + + nested_types = {} + + for field_name, field_info in model.fields.items(): + go_type = python_type_to_go_type(field_info.type) + camel_case_name = to_camel_case(field_name) + json_tag = field_name if field_info.required else f"{field_name},omitempty" + avro_tag = field_name + + if go_type == "interface{}": + go_type = get_subtype(field_info, nested_types, type_name, camel_case_name) + + go_type = go_type if field_info.required else f"*{go_type}" + + lines.append( + f' {camel_case_name} {go_type} `json:"{json_tag}" avro:"{avro_tag}"` // {field_info.description}' + ) + lines.append("}") + go_types.append("\n".join(lines)) + + for nested_type_name, nested_fields in nested_types.items(): + nested_model = spec.Model(fields=nested_fields) + nested_go_types = generate_go_type(nested_model, nested_type_name) + go_types.extend(nested_go_types) + + return go_types diff --git a/tests/test_export_go.py b/tests/test_export_go.py new file mode 100644 index 00000000..5bb84cfc --- /dev/null +++ b/tests/test_export_go.py @@ -0,0 +1,30 @@ +import logging + +from typer.testing import CliRunner + +from datacontract.cli import app +from datacontract.data_contract import DataContract + +logging.basicConfig(level=logging.DEBUG, force=True) + + +def test_cli(): + runner = CliRunner() + result = runner.invoke(app, ["export", "./fixtures/export/datacontract.yaml", "--format", "go"]) + assert result.exit_code == 0 + + +def test_to_go_types(): + actual = DataContract(data_contract_file="fixtures/export/datacontract.yaml").export("go") + expected = """ +package main + + +type Orders struct { + OrderId varchar `json:"order_id" avro:"order_id"` // None + OrderTotal bigint `json:"order_total" avro:"order_total"` // The order_total field + OrderStatus string `json:"order_status" avro:"order_status"` // None +} + +""" + assert actual.strip() == expected.strip()