From 0c03d0d83e82d4bc3ccdbc5b65b8791aefbc85c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Enrique=20Catal=C3=A1?= Date: Mon, 6 May 2024 22:03:41 +0200 Subject: [PATCH] Bugfix of importing avro schema that includes an array inside a nested record or object (#176) * added docker-compose.yml and documentation * add conda environment * Adding support for debugging with vscode * bugfix avro import when array type is inside a nested field of the avro schema * revert conda and vscode debug configurations --- README.md | 2 +- datacontract/imports/avro_importer.py | 2 + .../avro/data/nested_with_arrays.avsc | 121 ++++++++++++++++++ tests/test_import_avro.py | 55 ++++++++ 4 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/avro/data/nested_with_arrays.avsc diff --git a/README.md b/README.md index 4d7170de..1271d61e 100644 --- a/README.md +++ b/README.md @@ -975,4 +975,4 @@ Created by [Stefan Negele](https://www.linkedin.com/in/stefan-negele-573153112/) - + \ No newline at end of file diff --git a/datacontract/imports/avro_importer.py b/datacontract/imports/avro_importer.py index 336eb071..e219d00d 100644 --- a/datacontract/imports/avro_importer.py +++ b/datacontract/imports/avro_importer.py @@ -120,6 +120,8 @@ def map_type_from_avro(avro_type_str: str): return "boolean" elif avro_type_str == "record": return "record" + elif avro_type_str == "array": + return "array" else: raise DataContractException( type="schema", diff --git a/tests/fixtures/avro/data/nested_with_arrays.avsc b/tests/fixtures/avro/data/nested_with_arrays.avsc new file mode 100644 index 00000000..81d059ff --- /dev/null +++ b/tests/fixtures/avro/data/nested_with_arrays.avsc @@ -0,0 +1,121 @@ +{ + "fields": [ + { + "name": "Entries", + "type": { + "items": { + "fields": [ + { + "name": "Identifier", + "type": { + "logicalType": "uuid", + "type": "string" + } + }, + { + "default": null, + "name": "BranchPromo", + "type": [ + "null", + { + "fields": [ + { + "name": "CodePrefix", + "type": "int" + }, + { + "name": "Criteria", + "type": { + "fields": [ + { + "default": null, + "name": "MinimumSpendThreshold", + "type": [ + "null", + "double" + ] + }, + { + "default": null, + "name": "ApplicableBranchIDs", + "type": [ + "null", + { + "items": "string", + "type": "array" + } + ] + }, + { + "default": null, + "name": "ProductGroupDetails", + "type": [ + "null", + { + "fields": [ + { + "name": "IncludesAlcohol", + "type": "boolean" + }, + { + "default": null, + "name": "ItemList", + "type": [ + "null", + { + "items": { + "fields": [ + { + "name": "ProductID", + "type": "string" + }, + { + "default": null, + "name": "IsPromoItem", + "type": [ + "null", + "boolean" + ] + } + ], + "name": "ItemRecord", + "namespace": "domain.DemoNamespace.DemoEvent.DemoRecord.CriteriaRecord.ProductGroupDetailsRecord", + "type": "record" + }, + "type": "array" + } + ] + } + ], + "name": "ProductGroupDetailsRecord", + "namespace": "domain.DemoNamespace.DemoEvent.DemoRecord.CriteriaRecord", + "type": "record" + } + ] + } + ], + "name": "CriteriaRecord", + "namespace": "domain.DemoNamespace.DemoEvent.DemoRecord", + "type": "record" + } + } + ], + "name": "DemoRecord", + "namespace": "domain.DemoNamespace.DemoEvent", + "type": "record" + } + ] + } + ], + "name": "DemoEvent", + "namespace": "domain.DemoNamespace", + "type": "record" + }, + "type": "array" + } + } + ], + "name": "MarketingLoyaltyAggregation", + "namespace": "domain.schemas", + "type": "record" +} diff --git a/tests/test_import_avro.py b/tests/test_import_avro.py index 0f1291f5..1200374d 100644 --- a/tests/test_import_avro.py +++ b/tests/test_import_avro.py @@ -155,3 +155,58 @@ def test_import_avro_nested_records(): print("Result:\n", result.to_yaml()) assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed() + + +def test_import_avro_nested_records_with_arrays(): + result = DataContract().import_from_source("avro", "fixtures/avro/data/nested_with_arrays.avsc") + + expected = """ +dataContractSpecification: 0.9.3 +id: my-data-contract-id +info: + title: My Data Contract + version: 0.0.1 +models: + MarketingLoyaltyAggregation: + namespace: domain.schemas + fields: + Entries: + type: array + required: true + items: + type: object + fields: + Identifier: + type: string + required: true + BranchPromo: + type: record + required: false + fields: + CodePrefix: + type: int + required: true + Criteria: + type: object + required: true + fields: + MinimumSpendThreshold: + type: double + required: false + ApplicableBranchIDs: + type: array + required: false + ProductGroupDetails: + type: record + required: false + fields: + IncludesAlcohol: + type: boolean + required: true + ItemList: + type: array + required: false +""" + print("Result:\n", result.to_yaml()) + assert yaml.safe_load(result.to_yaml()) == yaml.safe_load(expected) + assert DataContract(data_contract_str=expected).lint(enabled_linters="none").has_passed()