diff --git a/.gitignore b/.gitignore index 91b6beef..44528a58 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ tmp /quality/ db.duckdb .soda/ +.vscode/ +.duckdb/ ### JetBrains template diff --git a/datacontract/export/sql_type_converter.py b/datacontract/export/sql_type_converter.py index 4e7ca3c0..2525c11f 100644 --- a/datacontract/export/sql_type_converter.py +++ b/datacontract/export/sql_type_converter.py @@ -161,8 +161,7 @@ def convert_to_duckdb(field: Field) -> None | str: if type.lower() in ["time"]: return "TIME" # TIME WITHOUT TIME ZONE if type.lower() in ["number", "decimal", "numeric"]: - # precision and scale not supported by data contract - return "DECIMAL" + return f"DECIMAL({field.precision},{field.scale})" if type.lower() in ["float"]: return "FLOAT" if type.lower() in ["double"]: diff --git a/datacontract/imports/glue_importer.py b/datacontract/imports/glue_importer.py index c17d6795..94464700 100644 --- a/datacontract/imports/glue_importer.py +++ b/datacontract/imports/glue_importer.py @@ -142,6 +142,13 @@ def import_glue(data_contract_specification: DataContractSpecification, source: fields[column["Name"]] = field + if "decimal" in column["Type"]: + # Extract precision and scale from the string + perc_scale = column["Type"][8:-1].split(',') + print(perc_scale) + field.precision = int(perc_scale[0]) + field.scale = int(perc_scale[1]) + data_contract_specification.models[table_name] = Model( type="table", fields=fields, @@ -180,5 +187,7 @@ def map_type_from_sql(sql_type: str): return "timestamp" elif sql_type.lower().startswith("date"): return "date" + elif sql_type.lower().startswith("decimal"): + return "decimal" else: return "variant" diff --git a/tests/fixtures/glue/datacontract.yaml b/tests/fixtures/glue/datacontract.yaml index 5696fd49..7778e5f7 100644 --- a/tests/fixtures/glue/datacontract.yaml +++ b/tests/fixtures/glue/datacontract.yaml @@ -19,6 +19,10 @@ models: type: integer field_three: type: timestamp + field_four: + type: decimal + precision: 6 + scale: 2 part_one: description: Partition Key required: True diff --git a/tests/fixtures/parquet/data/decimal.parquet b/tests/fixtures/parquet/data/decimal.parquet new file mode 100644 index 00000000..50cc2a2b Binary files /dev/null and b/tests/fixtures/parquet/data/decimal.parquet differ diff --git a/tests/fixtures/parquet/datacontract_decimal.yaml b/tests/fixtures/parquet/datacontract_decimal.yaml new file mode 100644 index 00000000..eb0f8ca7 --- /dev/null +++ b/tests/fixtures/parquet/datacontract_decimal.yaml @@ -0,0 +1,22 @@ +dataContractSpecification: 0.9.2 +id: orders-unit-test +info: + title: Orders Unit Test + version: 1.0.0 +servers: + production: + type: local + path: ./fixtures/parquet/data/decimal.parquet + format: parquet + dataProductId: orders + outputPortId: parquet +models: + orders: + fields: + id: + type: varchar + price: + type: decimal + precision: 6 + scale: 2 + description: price \ No newline at end of file diff --git a/tests/test_import_glue.py b/tests/test_import_glue.py index 3fbf6fd9..3d82ffad 100644 --- a/tests/test_import_glue.py +++ b/tests/test_import_glue.py @@ -55,6 +55,8 @@ def setup_mock_glue(aws_credentials): "Name": "field_three", "Type": "timestamp", }, + { "Name": "field_four", + "Type": "decimal(6,2)"} ] }, "PartitionKeys": [ diff --git a/tests/test_test_parquet.py b/tests/test_test_parquet.py index 966a742c..f170fbea 100644 --- a/tests/test_test_parquet.py +++ b/tests/test_test_parquet.py @@ -48,3 +48,12 @@ def test_timestamp(): run = data_contract.test() print(run.pretty()) assert run.result == "passed" + + +def test_decimal(): + data_contract = DataContract( + data_contract_file="fixtures/parquet/datacontract_decimal.yaml", + ) + run = data_contract.test() + print(run.pretty()) + assert run.result == "passed"