Skip to content

Commit

Permalink
AVRO export: Logical Types should be nested (datacontract#233)
Browse files Browse the repository at this point in the history
  • Loading branch information
jochenchrist committed May 30, 2024
1 parent 1eb4f72 commit f35f839
Show file tree
Hide file tree
Showing 5 changed files with 37 additions and 29 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Test data contract against dataframes / temporary views (#175)

### Fixed
- AVRO export: Logical Types should be nested (#233)

## [0.10.6] - 2024-05-29

### Fixed
Expand Down
33 changes: 14 additions & 19 deletions datacontract/export/avro_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,20 +34,26 @@ def to_avro_field(field, field_name):
if field.description is not None:
avro_field["doc"] = field.description
avro_field["type"] = to_avro_type(field, field_name)
# add logical type definitions for any of the date type fields
if field.type in ["timestamp", "timestamp_tz", "timestamp_ntz", "date"]:
avro_field["logicalType"] = to_avro_logical_type(field.type)

if field.config:
if "avroLogicalType" in field.config:
avro_field["logicalType"] = field.config["avroLogicalType"]
if "avroDefault" in field.config:
avro_field["default"] = field.config["avroDefault"]

return avro_field


def to_avro_type(field: Field, field_name: str) -> str | dict:
if field.config:
if "avroLogicalType" in field.config and "avroType" in field.config:
return {"type": field.config["avroType"], "logicalType": field.config["avroLogicalType"]}
if "avroLogicalType" in field.config:
if field.config["avroLogicalType"] in ["timestamp-millis", "timestamp-micros", "local-timestamp-millis", "local-timestamp-micros", "time-micros"]:
return {"type": "long", "logicalType": field.config["avroLogicalType"]}
if field.config["avroLogicalType"] in ["time-millis", "date"]:
return {"type": "int", "logicalType": field.config["avroLogicalType"]}
if "avroType" in field.config:
return field.config["avroLogicalType"]

if field.type is None:
return "null"
if field.type in ["string", "varchar", "text"]:
Expand All @@ -64,11 +70,11 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
elif field.type in ["boolean"]:
return "boolean"
elif field.type in ["timestamp", "timestamp_tz"]:
return "long"
return {"type": "long", "logicalType": "timestamp-millis"}
elif field.type in ["timestamp_ntz"]:
return "long"
return {"type": "long", "logicalType": "local-timestamp-millis"}
elif field.type in ["date"]:
return "int"
return {"type": "int", "logicalType": "date"}
elif field.type in ["time"]:
return "long"
elif field.type in ["object", "record", "struct"]:
Expand All @@ -82,14 +88,3 @@ def to_avro_type(field: Field, field_name: str) -> str | dict:
return "null"
else:
return "bytes"


def to_avro_logical_type(type: str) -> str:
if type in ["timestamp", "timestamp_tz"]:
return "timestamp-millis"
elif type in ["timestamp_ntz"]:
return "local-timestamp-millis"
elif type in ["date"]:
return "date"
else:
return ""
11 changes: 7 additions & 4 deletions tests/fixtures/avro/export/datacontract_logicalType.avsc
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,13 @@
{"name": "device_id", "type": "int"},
{"name": "test_value", "type": "double"},
{"name": "num_items", "type": "int"},
{"name": "processed_timestamp",
"type": "long",
"doc": "The date the event was processed: for more info https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29",
"logicalType": "local-timestamp-micros"},
{"name": "processed_timestamp",
"type": {
"type": "long",
"logicalType": "local-timestamp-micros"
},
"doc": "The date the event was processed: for more info https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29"
},
{"name": "description", "type": "string"},
{"name": "is_processed", "type": "boolean",
"default": false}
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/avro/export/datacontract_logicalType.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ models:
required: true
description: 'The date the event was processed: for more info https://avro.apache.org/docs/current/spec.html#Local+timestamp+%28microsecond+precision%29'
config:
avroType: long
avroLogicalType: local-timestamp-micros
description:
type: string
Expand Down
18 changes: 12 additions & 6 deletions tests/fixtures/avro/export/orders_with_datefields.avsc
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,24 @@
{
"name": "orderdate",
"doc": "My Field",
"type": "int",
"logicalType": "date"
"type": {
"type": "int",
"logicalType": "date"
}
},
{
"name": "order_timestamp",
"type": "long",
"logicalType": "timestamp-millis"
"type": {
"type": "long",
"logicalType": "timestamp-millis"
}
},
{
"name": "delivery_timestamp",
"type": "long",
"logicalType": "local-timestamp-millis"
"type": {
"type": "long",
"logicalType": "local-timestamp-millis"
}
},
{
"name": "orderid",
Expand Down

0 comments on commit f35f839

Please sign in to comment.