forked from datacontract/datacontract-cli
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add sql and sql-query as export formats.
- Loading branch information
1 parent
7a0d631
commit 9276c2c
Showing
13 changed files
with
406 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
from datacontract.export.sql_type_converter import convert_to_sql_type | ||
from datacontract.model.data_contract_specification import \ | ||
DataContractSpecification, Model | ||
|
||
|
||
def to_sql_query(data_contract_spec: DataContractSpecification, model_name: str, model_value: Model, server_type: str = "snowflake") -> str: | ||
if data_contract_spec is None: | ||
return "" | ||
if data_contract_spec.models is None or len(data_contract_spec.models) == 0: | ||
return "" | ||
|
||
result = "" | ||
result += f"-- Data Contract: {data_contract_spec.id}\n" | ||
result += f"-- SQL Dialect: {server_type}\n" | ||
result += _to_sql_query(model_name, model_value, server_type) | ||
|
||
return result | ||
|
||
|
||
def _to_sql_query(model_name, model_value, server_type) -> str: | ||
columns = [] | ||
for field_name, field in model_value.fields.items(): | ||
# TODO escape SQL reserved key words, probably dependent on server type | ||
columns.append(field_name) | ||
|
||
result = "select\n" | ||
current_column_index = 1 | ||
number_of_columns = len(columns) | ||
for column in columns: | ||
result += f" {column}" | ||
if current_column_index < number_of_columns: | ||
result += "," | ||
result += "\n" | ||
current_column_index += 1 | ||
result += f"from {model_name}\n" | ||
return result | ||
|
||
|
||
def to_sql_ddl(data_contract_spec: DataContractSpecification, server_type: str = "snowflake") -> str: | ||
if data_contract_spec is None: | ||
return "" | ||
if data_contract_spec.models is None or len(data_contract_spec.models) == 0: | ||
return "" | ||
|
||
for server_name, server in iter(data_contract_spec.servers.items()): | ||
if server.type == server_type: | ||
break | ||
if server.type == "snowflake": | ||
server_type = "snowflake" | ||
break | ||
if server.type == "postgres": | ||
server_type = "postgres" | ||
break | ||
|
||
result = "" | ||
result += f"-- Data Contract: {data_contract_spec.id}\n" | ||
result += f"-- SQL Dialect: {server_type}\n" | ||
for model_name, model in iter(data_contract_spec.models.items()): | ||
result += _to_sql_table(model_name, model, server_type) | ||
|
||
return result.strip() | ||
|
||
|
||
def _to_sql_table(model_name, model, server_type="snowflake"): | ||
result = f"CREATE TABLE {model_name} (\n" | ||
fields = len(model.fields) | ||
current_field_index = 1 | ||
for field_name, field in iter(model.fields.items()): | ||
type = convert_to_sql_type(field, server_type) | ||
result += f" {field_name} {type}" | ||
if field.required: | ||
result += " not null" | ||
if field.primary: | ||
result += " primary key" | ||
if current_field_index < fields: | ||
result += "," | ||
result += "\n" | ||
current_field_index += 1 | ||
result += ");\n" | ||
return result | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
from datacontract.model.data_contract_specification import Field | ||
|
||
|
||
def convert_to_sql_type(field: Field, server_type: str) -> str: | ||
if server_type == "snowflake": | ||
return convert_to_snowflake(field) | ||
if server_type == "postgres": | ||
return convert_type_to_postgres(field) | ||
return str(type) | ||
|
||
# snowflake data types: | ||
# https://docs.snowflake.com/en/sql-reference/data-types.html | ||
def convert_to_snowflake(field) -> None | str: | ||
type = field.type | ||
# currently optimized for snowflake | ||
# LEARNING: data contract has no direct support for CHAR,CHARACTER | ||
# LEARNING: data contract has no support for "date-time", "datetime", "time" | ||
# LEARNING: No precision and scale support in data contract | ||
# LEARNING: no support for any | ||
# GEOGRAPHY and GEOMETRY are not supported by the mapping | ||
if type is None: | ||
return None | ||
if type.lower() in ["string", "varchar", "text"]: | ||
return type.upper() # STRING, TEXT, VARCHAR are all the same in snowflake | ||
if type.lower() in ["timestamp", "timestamp_tz"]: | ||
return "TIMESTAMP_TZ" | ||
if type.lower() in ["timestamp_ntz"]: | ||
return "TIMESTAMP_NTZ" | ||
if type.lower() in ["date"]: | ||
return "DATE" | ||
if type.lower() in ["time"]: | ||
return "TIME" | ||
if type.lower() in ["number", "decimal", "numeric"]: | ||
# precision and scale not supported by data contract | ||
return "NUMBER" | ||
if type.lower() in ["float", "double"]: | ||
return "FLOAT" | ||
if type.lower() in ["integer", "int", "long", "bigint"]: | ||
return "NUMBER" # always NUMBER(38,0) | ||
if type.lower() in ["boolean"]: | ||
return "BOOLEAN" | ||
if type.lower() in ["object", "record", "struct"]: | ||
return "OBJECT" | ||
if type.lower() in ["bytes"]: | ||
return "BINARY" | ||
if type.lower() in ["array"]: | ||
return "ARRAY" | ||
return None | ||
|
||
|
||
|
||
# https://www.postgresql.org/docs/current/datatype.html | ||
# Using the name whenever possible | ||
def convert_type_to_postgres(field : Field) -> None | str: | ||
type = field.type | ||
if type is None: | ||
return None | ||
if type.lower() in ["string", "varchar", "text"]: | ||
if field.format == "uuid": | ||
return "uuid" | ||
return "text" # STRING does not exist, TEXT and VARCHAR are all the same in postrges | ||
if type.lower() in ["timestamp", "timestamp_tz"]: | ||
return "timestamptz" | ||
if type.lower() in ["timestamp_ntz"]: | ||
return "timestamp" | ||
if type.lower() in ["date"]: | ||
return "date" | ||
if type.lower() in ["time"]: | ||
return "time" | ||
if type.lower() in ["number", "decimal", "numeric"]: | ||
# precision and scale not supported by data contract | ||
if type.lower() == "number": | ||
return "numeric" | ||
return type.lower() | ||
if type.lower() in ["float"]: | ||
return "real" | ||
if type.lower() in ["double"]: | ||
return "double precision" | ||
if type.lower() in ["integer", "int", "bigint"]: | ||
return type.lower() | ||
if type.lower() in ["long"]: | ||
return "bigint" | ||
if type.lower() in ["boolean"]: | ||
return "boolean" | ||
if type.lower() in ["object", "record", "struct"]: | ||
return "jsonb" | ||
if type.lower() in ["bytes"]: | ||
return "bytea" | ||
if type.lower() in ["array"]: | ||
return convert_to_sql_type(field.items, "postgres") + "[]" | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
-- Create the table | ||
CREATE TABLE public.my_table ( | ||
field_one VARCHAR(10) primary key, | ||
field_two INT not null, | ||
field_three TIMESTAMP | ||
); | ||
|
||
-- Insert the data | ||
INSERT INTO public.my_table (field_one, field_two, field_three) VALUES | ||
('CX-263-DU', 50, '2023-06-16 13:12:56'), | ||
('IK-894-MN', 47, '2023-10-08 22:40:57'), | ||
('ER-399-JY', 22, '2023-05-16 01:08:22'), | ||
('MT-939-FH', 63, '2023-03-15 05:15:21'), | ||
('LV-849-MI', 33, '2023-09-08 20:08:43'), | ||
('VS-079-OH', 85, '2023-04-15 00:50:32'), | ||
('DN-297-XY', 79, '2023-11-08 12:55:42'), | ||
('ZE-172-FP', 14, '2023-12-03 18:38:38'), | ||
('ID-840-EG', 89, '2023-10-02 17:17:58'), | ||
('FK-230-KZ', 64, '2023-11-27 15:21:48'); |
Oops, something went wrong.