Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

[Redshift] Fix for duplicated column keys #231

Merged
merged 1 commit into from
Oct 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 14 additions & 15 deletions odd_collector/adapters/redshift/adapter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import defaultdict
from odd_collector_sdk.domain.adapter import BaseAdapter
from odd_models.models import DataEntity, DataEntityList
from oddrn_generator import RedshiftGenerator, Generator
Expand Down Expand Up @@ -41,10 +42,8 @@ def get_data_entity_list(self) -> DataEntityList:

self.generator.set_oddrn_paths(**{"databases": self.database})

tables_by_schema = {}
tables_by_schema = defaultdict(list)
for mtable in mtables.items:
if mtable.schema_name not in tables_by_schema:
tables_by_schema[mtable.schema_name] = []
tables_by_schema[mtable.schema_name].append(mtable)

for schema in mschemas.items:
Expand All @@ -67,27 +66,27 @@ def get_data_entity_list(self) -> DataEntityList:
],
)
except Exception as e:
logger.error("Failed to load metadata for tables", exc_info=True)
logger.error(f"Failed to load metadata for tables: {e}", exc_info=True)

@staticmethod
def append_columns(mtables: MetadataTables, mcolumns: MetadataColumns):
columns_by_table = {}
columns_by_table = defaultdict(list)
for column in mcolumns.items:
if column.table_name not in columns_by_table:
columns_by_table[column.table_name] = []
columns_by_table[column.table_name].append(column)
columns_by_table[(column.schema_name, column.table_name)].append(column)

for table in mtables.items:
table.columns = columns_by_table.get(table.table_name, [])
table.columns = columns_by_table.get(
(table.schema_name, table.table_name), []
)

@staticmethod
def append_primary_keys(mtables: MetadataTables, primary_keys: list[tuple]):
grouped_pks = {}
grouped_pks = defaultdict(list)
for pk in primary_keys:
table_name, column_name = pk
if table_name not in grouped_pks:
grouped_pks[table_name] = []
grouped_pks[table_name].append(column_name)
schema_name, table_name, column_name = pk
grouped_pks[(schema_name, table_name)].append(column_name)

for table in mtables.items:
table.primary_keys = grouped_pks.get(table.table_name, [])
table.primary_keys = grouped_pks.get(
(table.schema_name, table.table_name), []
)
1 change: 1 addition & 0 deletions odd_collector/adapters/redshift/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ def metadata_columns_external_query(self, schemas: Optional[list[str]]):
def primary_keys_query(self):
return """
select
kcu.table_schema,
kcu.table_name,
kcu.column_name
from information_schema.table_constraints tco
Expand Down