diff --git a/CHANGELOG.md b/CHANGELOG.md index aaa0d08c..72bab437 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,10 +12,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added export format **great-expectations**: `datacontract export --format great-expectations` - Added gRPC support to OpenTelemetry integration for publishing test results - Added Databricks SQL dialect for `datacontract export --format sql` +- Added AVRO import support for namespace (#121) ### Fixed - Use `sql_type_converter` to build checks. +- Fixed AVRO import when doc is missing (#121) ## [0.9.7] - 2024-03-15 diff --git a/datacontract/imports/avro_importer.py b/datacontract/imports/avro_importer.py index 2b37a656..3ef4a064 100644 --- a/datacontract/imports/avro_importer.py +++ b/datacontract/imports/avro_importer.py @@ -10,7 +10,8 @@ def import_avro(data_contract_specification: DataContractSpecification, source: data_contract_specification.models = {} try: - avro_schema = avro.schema.parse(open(source, "rb").read()) + with open(source, "r") as file: + avro_schema = avro.schema.parse(file.read()) except Exception as e: raise DataContractException( type="schema", @@ -27,9 +28,14 @@ def import_avro(data_contract_specification: DataContractSpecification, source: data_contract_specification.models[avro_schema.name] = Model( type="table", fields=fields, - description=avro_schema.doc, ) + if avro_schema.get_prop("doc") is not None: + data_contract_specification.models[avro_schema.name].description = avro_schema.get_prop("doc") + + if avro_schema.get_prop("namespace") is not None: + data_contract_specification.models[avro_schema.name].namespace = avro_schema.get_prop("namespace") + return data_contract_specification diff --git a/datacontract/model/data_contract_specification.py b/datacontract/model/data_contract_specification.py index 42686623..8788f64d 100644 --- a/datacontract/model/data_contract_specification.py +++ b/datacontract/model/data_contract_specification.py @@ -88,6 +88,7 @@ class Field(pyd.BaseModel): class Model(pyd.BaseModel): description: str = None type: str = None + namespace: str = None fields: Dict[str, Field] = {} diff --git a/tests/examples/avro/data/orders.avsc b/tests/examples/avro/data/orders.avsc index 7dce413d..116e8fc0 100644 --- a/tests/examples/avro/data/orders.avsc +++ b/tests/examples/avro/data/orders.avsc @@ -41,5 +41,6 @@ ], "name": "orders", "doc": "My Model", - "type": "record" + "type": "record", + "namespace": "com.sample.schema" } \ No newline at end of file diff --git a/tests/test_import_avro.py b/tests/test_import_avro.py index 1f5e6baf..954a3f50 100644 --- a/tests/test_import_avro.py +++ b/tests/test_import_avro.py @@ -39,6 +39,7 @@ def test_import_avro_schema(): orders: type: table description: My Model + namespace: com.sample.schema fields: ordertime: type: long