From f600da6db609afd24191d8eb80aab2d62d81573a Mon Sep 17 00:00:00 2001 From: Mateusz Kulas Date: Mon, 2 Oct 2023 13:15:50 +0200 Subject: [PATCH] [Redshift] Fix for duplicated column keys --- odd_collector/adapters/redshift/adapter.py | 29 +++++++++---------- odd_collector/adapters/redshift/repository.py | 1 + 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/odd_collector/adapters/redshift/adapter.py b/odd_collector/adapters/redshift/adapter.py index 375e7402..4cddbb64 100644 --- a/odd_collector/adapters/redshift/adapter.py +++ b/odd_collector/adapters/redshift/adapter.py @@ -1,3 +1,4 @@ +from collections import defaultdict from odd_collector_sdk.domain.adapter import BaseAdapter from odd_models.models import DataEntity, DataEntityList from oddrn_generator import RedshiftGenerator, Generator @@ -41,10 +42,8 @@ def get_data_entity_list(self) -> DataEntityList: self.generator.set_oddrn_paths(**{"databases": self.database}) - tables_by_schema = {} + tables_by_schema = defaultdict(list) for mtable in mtables.items: - if mtable.schema_name not in tables_by_schema: - tables_by_schema[mtable.schema_name] = [] tables_by_schema[mtable.schema_name].append(mtable) for schema in mschemas.items: @@ -67,27 +66,27 @@ def get_data_entity_list(self) -> DataEntityList: ], ) except Exception as e: - logger.error("Failed to load metadata for tables", exc_info=True) + logger.error(f"Failed to load metadata for tables: {e}", exc_info=True) @staticmethod def append_columns(mtables: MetadataTables, mcolumns: MetadataColumns): - columns_by_table = {} + columns_by_table = defaultdict(list) for column in mcolumns.items: - if column.table_name not in columns_by_table: - columns_by_table[column.table_name] = [] - columns_by_table[column.table_name].append(column) + columns_by_table[(column.schema_name, column.table_name)].append(column) for table in mtables.items: - table.columns = columns_by_table.get(table.table_name, []) + table.columns = columns_by_table.get( + (table.schema_name, table.table_name), [] + ) @staticmethod def append_primary_keys(mtables: MetadataTables, primary_keys: list[tuple]): - grouped_pks = {} + grouped_pks = defaultdict(list) for pk in primary_keys: - table_name, column_name = pk - if table_name not in grouped_pks: - grouped_pks[table_name] = [] - grouped_pks[table_name].append(column_name) + schema_name, table_name, column_name = pk + grouped_pks[(schema_name, table_name)].append(column_name) for table in mtables.items: - table.primary_keys = grouped_pks.get(table.table_name, []) + table.primary_keys = grouped_pks.get( + (table.schema_name, table.table_name), [] + ) diff --git a/odd_collector/adapters/redshift/repository.py b/odd_collector/adapters/redshift/repository.py index 989746c1..3346c46e 100644 --- a/odd_collector/adapters/redshift/repository.py +++ b/odd_collector/adapters/redshift/repository.py @@ -273,6 +273,7 @@ def metadata_columns_external_query(self, schemas: Optional[list[str]]): def primary_keys_query(self): return """ select + kcu.table_schema, kcu.table_name, kcu.column_name from information_schema.table_constraints tco