Skip to content

Commit

Permalink
support decimal prec and scale when testing parquet files and in glue…
Browse files Browse the repository at this point in the history
… table import (datacontract#265)
  • Loading branch information
samdaviestvg authored Jun 15, 2024
1 parent 69838a8 commit cd93354
Show file tree
Hide file tree
Showing 8 changed files with 49 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ tmp
/quality/
db.duckdb
.soda/
.vscode/
.duckdb/


### JetBrains template
Expand Down
3 changes: 1 addition & 2 deletions datacontract/export/sql_type_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,7 @@ def convert_to_duckdb(field: Field) -> None | str:
if type.lower() in ["time"]:
return "TIME" # TIME WITHOUT TIME ZONE
if type.lower() in ["number", "decimal", "numeric"]:
# precision and scale not supported by data contract
return "DECIMAL"
return f"DECIMAL({field.precision},{field.scale})"
if type.lower() in ["float"]:
return "FLOAT"
if type.lower() in ["double"]:
Expand Down
9 changes: 9 additions & 0 deletions datacontract/imports/glue_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,13 @@ def import_glue(data_contract_specification: DataContractSpecification, source:

fields[column["Name"]] = field

if "decimal" in column["Type"]:
# Extract precision and scale from the string
perc_scale = column["Type"][8:-1].split(',')
print(perc_scale)
field.precision = int(perc_scale[0])
field.scale = int(perc_scale[1])

data_contract_specification.models[table_name] = Model(
type="table",
fields=fields,
Expand Down Expand Up @@ -180,5 +187,7 @@ def map_type_from_sql(sql_type: str):
return "timestamp"
elif sql_type.lower().startswith("date"):
return "date"
elif sql_type.lower().startswith("decimal"):
return "decimal"
else:
return "variant"
4 changes: 4 additions & 0 deletions tests/fixtures/glue/datacontract.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ models:
type: integer
field_three:
type: timestamp
field_four:
type: decimal
precision: 6
scale: 2
part_one:
description: Partition Key
required: True
Expand Down
Binary file added tests/fixtures/parquet/data/decimal.parquet
Binary file not shown.
22 changes: 22 additions & 0 deletions tests/fixtures/parquet/datacontract_decimal.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
dataContractSpecification: 0.9.2
id: orders-unit-test
info:
title: Orders Unit Test
version: 1.0.0
servers:
production:
type: local
path: ./fixtures/parquet/data/decimal.parquet
format: parquet
dataProductId: orders
outputPortId: parquet
models:
orders:
fields:
id:
type: varchar
price:
type: decimal
precision: 6
scale: 2
description: price
2 changes: 2 additions & 0 deletions tests/test_import_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ def setup_mock_glue(aws_credentials):
"Name": "field_three",
"Type": "timestamp",
},
{ "Name": "field_four",
"Type": "decimal(6,2)"}
]
},
"PartitionKeys": [
Expand Down
9 changes: 9 additions & 0 deletions tests/test_test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,12 @@ def test_timestamp():
run = data_contract.test()
print(run.pretty())
assert run.result == "passed"


def test_decimal():
data_contract = DataContract(
data_contract_file="fixtures/parquet/datacontract_decimal.yaml",
)
run = data_contract.test()
print(run.pretty())
assert run.result == "passed"

0 comments on commit cd93354

Please sign in to comment.