diff --git a/datacontract/engines/soda/check_soda_execute.py b/datacontract/engines/soda/check_soda_execute.py index d88a04a2..0e52d9e5 100644 --- a/datacontract/engines/soda/check_soda_execute.py +++ b/datacontract/engines/soda/check_soda_execute.py @@ -26,7 +26,7 @@ def check_soda_execute( if server.type in ["s3", "azure", "local"]: if server.format in ["json", "parquet", "csv", "delta"]: - con = get_duckdb_connection(data_contract, server) + con = get_duckdb_connection(data_contract, server, run) scan.add_duckdb_connection(duckdb_connection=con, data_source_name=server.type) scan.set_data_source_name(server.type) else: diff --git a/datacontract/engines/soda/connections/duckdb.py b/datacontract/engines/soda/connections/duckdb.py index 54fa4136..af2325c7 100644 --- a/datacontract/engines/soda/connections/duckdb.py +++ b/datacontract/engines/soda/connections/duckdb.py @@ -6,8 +6,10 @@ from deltalake import DeltaTable +from datacontract.model.run import Run -def get_duckdb_connection(data_contract, server): + +def get_duckdb_connection(data_contract, server, run: Run): con = duckdb.connect(database=":memory:") path: str = "" if server.type == "local": @@ -22,7 +24,7 @@ def get_duckdb_connection(data_contract, server): model_path = path if "{model}" in model_path: model_path = model_path.format(model=model_name) - logging.info(f"Creating table {model_name} for {model_path}") + run.log_info(f"Creating table {model_name} for {model_path}") if server.format == "json": format = "auto" @@ -39,6 +41,7 @@ def get_duckdb_connection(data_contract, server): """) elif server.format == "csv": columns = to_csv_types(model) + run.log_info("Using columns: " + str(columns)) if columns is None: con.sql( f"""CREATE VIEW "{model_name}" AS SELECT * FROM read_csv('{model_path}', hive_partitioning=1);"""