Skip to content

Commit

Permalink
Add ability to export to go types (datacontract#195)
Browse files Browse the repository at this point in the history
* Add ability to export to go types

* add test

* rename to types

* updated naming

* update docs
  • Loading branch information
Mark Olliver authored May 15, 2024
1 parent 15e3e40 commit 2e87d90
Show file tree
Hide file tree
Showing 6 changed files with 147 additions and 18 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added

- `datacontract export --format go`: Export to go types format
- datacontract catalog: Search form
- `datacontract import --format bigquery`: Import from BigQuery format (#110)
- `datacontract export --format bigquery`: Export to BigQuery format (#111)
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ models:
╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
│ * --format [jsonschema|pydantic-model|sodacl|dbt|dbt-sources|dbt-staging The export format. [default: None] [required]
│ -sql|odcs|rdf|avro|protobuf|great-expectations|terraform|avro │
│ -idl|sql|sql-query|html|bigquery]
│ -idl|sql|sql-query|html|bigquery|go]
│ --output PATH Specify the file path where the exported data will be saved. │
│ If no path is provided, the output will be printed to stdout. │
[default: None]
Expand Down Expand Up @@ -609,7 +609,8 @@ Available export options:
| `sql-query` | Export to SQL Query ||
| `great-expectations` | Export to Great Expectations Suites in JSON Format ||
| `bigquery` | Export to BigQuery Schemas ||
| `pydantic` | Export to pydantic models | TBD |
| `go` | Export to Go types ||
| `pydantic-model` | Export to pydantic models ||
| Missing something? | Please create an issue on GitHub | TBD |

#### Great Expectations
Expand Down
3 changes: 3 additions & 0 deletions datacontract/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@
from datacontract.catalog.catalog import create_index_html, create_data_contract_html
from datacontract.data_contract import DataContract
from datacontract.init.download_datacontract_file import download_datacontract_file, FileExistsException

from datacontract.publish.publish import publish_to_datamesh_manager


console = Console()


Expand Down Expand Up @@ -158,6 +160,7 @@ class ExportFormat(str, Enum):
sql = "sql"
sql_query = "sql-query"
html = "html"
go = "go"
bigquery = "bigquery"


Expand Down
27 changes: 11 additions & 16 deletions datacontract/data_contract.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,11 @@
import yaml
from pyspark.sql import SparkSession

from datacontract.breaking.breaking import models_breaking_changes, \
quality_breaking_changes
from datacontract.breaking.breaking import models_breaking_changes, quality_breaking_changes
from datacontract.engines.datacontract.check_that_datacontract_contains_valid_servers_configuration import (
check_that_datacontract_contains_valid_server_configuration,
)
from datacontract.engines.fastjsonschema.check_jsonschema import \
check_jsonschema
from datacontract.engines.fastjsonschema.check_jsonschema import check_jsonschema
from datacontract.engines.soda.check_soda_execute import check_soda_execute
from datacontract.export.avro_converter import to_avro_schema_json
from datacontract.export.avro_idl_converter import to_avro_idl
Expand All @@ -26,6 +24,7 @@
from datacontract.export.odcs_converter import to_odcs_yaml
from datacontract.export.protobuf_converter import to_protobuf
from datacontract.export.pydantic_converter import to_pydantic_model_str
from datacontract.export.go_converter import to_go_types
from datacontract.export.rdf_converter import to_rdf_n3
from datacontract.export.sodacl_converter import to_sodacl_yaml
from datacontract.export.sql_converter import to_sql_ddl, to_sql_query
Expand All @@ -34,24 +33,18 @@
from datacontract.imports.bigquery_importer import import_bigquery_from_api, import_bigquery_from_json
from datacontract.imports.glue_importer import import_glue
from datacontract.imports.sql_importer import import_sql
from datacontract.integration.publish_datamesh_manager import \
publish_datamesh_manager
from datacontract.integration.publish_datamesh_manager import publish_datamesh_manager
from datacontract.integration.publish_opentelemetry import publish_opentelemetry
from datacontract.lint import resolve
from datacontract.lint.linters.description_linter import DescriptionLinter
from datacontract.lint.linters.example_model_linter import ExampleModelLinter
from datacontract.lint.linters.field_pattern_linter import FieldPatternLinter
from datacontract.lint.linters.field_reference_linter import \
FieldReferenceLinter
from datacontract.lint.linters.field_reference_linter import FieldReferenceLinter
from datacontract.lint.linters.notice_period_linter import NoticePeriodLinter
from datacontract.lint.linters.quality_schema_linter import \
QualityUsesSchemaLinter
from datacontract.lint.linters.valid_constraints_linter import \
ValidFieldConstraintsLinter
from datacontract.model.breaking_change import BreakingChanges, BreakingChange, \
Severity
from datacontract.model.data_contract_specification import \
DataContractSpecification, Server
from datacontract.lint.linters.quality_schema_linter import QualityUsesSchemaLinter
from datacontract.lint.linters.valid_constraints_linter import ValidFieldConstraintsLinter
from datacontract.model.breaking_change import BreakingChanges, BreakingChange, Severity
from datacontract.model.data_contract_specification import DataContractSpecification, Server
from datacontract.model.exceptions import DataContractException
from datacontract.model.run import Run, Check

Expand Down Expand Up @@ -329,6 +322,8 @@ def export(self, export_format, model: str = "all", rdf_base: str = None, sql_se
return to_pydantic_model_str(data_contract)
if export_format == "html":
return to_html(data_contract)
if export_format == "go":
return to_go_types(data_contract)
if export_format == "bigquery":
model_name, model_value = self._check_models_for_export(data_contract, model, export_format)
found_server = data_contract.servers.get(self._server)
Expand Down
98 changes: 98 additions & 0 deletions datacontract/export/go_converter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import datacontract.model.data_contract_specification as spec
from typing import List
import re


def to_go_types(contract: spec.DataContractSpecification) -> str:
result = "package main\n\n"

for key in contract.models.keys():
go_types = generate_go_type(contract.models[key], key)
for go_type in go_types:
# print(go_type + "\n\n")
result += f"\n{go_type}\n"

return result


def python_type_to_go_type(py_type) -> str:
match py_type:
case "text":
return "string"
case "timestamp":
return "time.Time"
case "long":
return "int64"
case "int":
return "int"
case "float":
return "float64"
case "boolean":
return "bool"
case _:
return "interface{}"


def to_camel_case(snake_str) -> str:
return "".join(word.capitalize() for word in re.split(r"_|(?<!^)(?=[A-Z])", snake_str))


def get_subtype(field_info, nested_types, type_name, camel_case_name) -> str:
go_type = "interface{}"
if field_info.fields:
nested_type_name = to_camel_case(f"{type_name}_{camel_case_name}")
nested_types[nested_type_name] = field_info.fields
go_type = nested_type_name

match field_info.type:
case "array":
if field_info.items:
item_type = get_subtype(field_info.items, nested_types, type_name, camel_case_name + "Item")
go_type = f"[]{item_type}"
else:
go_type = "[]interface{}"
case "record":
if field_info.fields:
nested_type_name = to_camel_case(f"{type_name}_{camel_case_name}")
nested_types[nested_type_name] = field_info.fields
go_type = nested_type_name
else:
go_type = "interface{}"
case "object":
pass
case _:
go_type = field_info.type

return go_type


def generate_go_type(model, model_name) -> List[str]:
go_types = []
type_name = to_camel_case(model_name)
lines = [f"type {type_name} struct {{"]

nested_types = {}

for field_name, field_info in model.fields.items():
go_type = python_type_to_go_type(field_info.type)
camel_case_name = to_camel_case(field_name)
json_tag = field_name if field_info.required else f"{field_name},omitempty"
avro_tag = field_name

if go_type == "interface{}":
go_type = get_subtype(field_info, nested_types, type_name, camel_case_name)

go_type = go_type if field_info.required else f"*{go_type}"

lines.append(
f' {camel_case_name} {go_type} `json:"{json_tag}" avro:"{avro_tag}"` // {field_info.description}'
)
lines.append("}")
go_types.append("\n".join(lines))

for nested_type_name, nested_fields in nested_types.items():
nested_model = spec.Model(fields=nested_fields)
nested_go_types = generate_go_type(nested_model, nested_type_name)
go_types.extend(nested_go_types)

return go_types
30 changes: 30 additions & 0 deletions tests/test_export_go.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import logging

from typer.testing import CliRunner

from datacontract.cli import app
from datacontract.data_contract import DataContract

logging.basicConfig(level=logging.DEBUG, force=True)


def test_cli():
runner = CliRunner()
result = runner.invoke(app, ["export", "./fixtures/export/datacontract.yaml", "--format", "go"])
assert result.exit_code == 0


def test_to_go_types():
actual = DataContract(data_contract_file="fixtures/export/datacontract.yaml").export("go")
expected = """
package main
type Orders struct {
OrderId varchar `json:"order_id" avro:"order_id"` // None
OrderTotal bigint `json:"order_total" avro:"order_total"` // The order_total field
OrderStatus string `json:"order_status" avro:"order_status"` // None
}
"""
assert actual.strip() == expected.strip()

0 comments on commit 2e87d90

Please sign in to comment.