Skip to content

Commit

Permalink
UPDATE
Browse files Browse the repository at this point in the history
  • Loading branch information
simonharrer committed Apr 29, 2024
1 parent 758d974 commit bb6f099
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
2 changes: 1 addition & 1 deletion datacontract/engines/soda/check_soda_execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def check_soda_execute(

if server.type in ["s3", "azure", "local"]:
if server.format in ["json", "parquet", "csv", "delta"]:
con = get_duckdb_connection(data_contract, server)
con = get_duckdb_connection(data_contract, server, run)
scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type)
scan.set_data_source_name(server.type)
else:
Expand Down
7 changes: 5 additions & 2 deletions datacontract/engines/soda/connections/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@

from deltalake import DeltaTable

from datacontract.model.run import Run

def get_duckdb_connection(data_contract, server):

def get_duckdb_connection(data_contract, server, run: Run):
con = duckdb.connect(database=":memory:")
path: str = ""
if server.type == "local":
Expand All @@ -22,7 +24,7 @@ def get_duckdb_connection(data_contract, server):
model_path = path
if "{model}" in model_path:
model_path = model_path.format(model=model_name)
logging.info(f"Creating table {model_name} for {model_path}")
run.log_info(f"Creating table {model_name} for {model_path}")

if server.format == "json":
format = "auto"
Expand All @@ -39,6 +41,7 @@ def get_duckdb_connection(data_contract, server):
""")
elif server.format == "csv":
columns = to_csv_types(model)
run.log_info("Using columns: " + str(columns))
if columns is None:
con.sql(
f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1);"""
Expand Down

0 comments on commit bb6f099

Please sign in to comment.