Skip to content
This repository has been archived by the owner on Nov 6, 2023. It is now read-only.

Commit

Permalink
[Redshift] Fix for duplicated column keys
Browse files Browse the repository at this point in the history
  • Loading branch information
Mateusz Kulas committed Oct 2, 2023
1 parent 4d5e064 commit f600da6
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 15 deletions.
29 changes: 14 additions & 15 deletions odd_collector/adapters/redshift/adapter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from collections import defaultdict
from odd_collector_sdk.domain.adapter import BaseAdapter
from odd_models.models import DataEntity, DataEntityList
from oddrn_generator import RedshiftGenerator, Generator
Expand Down Expand Up @@ -41,10 +42,8 @@ def get_data_entity_list(self) -> DataEntityList:

self.generator.set_oddrn_paths(**{"databases": self.database})

tables_by_schema = {}
tables_by_schema = defaultdict(list)
for mtable in mtables.items:
if mtable.schema_name not in tables_by_schema:
tables_by_schema[mtable.schema_name] = []
tables_by_schema[mtable.schema_name].append(mtable)

for schema in mschemas.items:
Expand All @@ -67,27 +66,27 @@ def get_data_entity_list(self) -> DataEntityList:
],
)
except Exception as e:
logger.error("Failed to load metadata for tables", exc_info=True)
logger.error(f"Failed to load metadata for tables: {e}", exc_info=True)

@staticmethod
def append_columns(mtables: MetadataTables, mcolumns: MetadataColumns):
columns_by_table = {}
columns_by_table = defaultdict(list)
for column in mcolumns.items:
if column.table_name not in columns_by_table:
columns_by_table[column.table_name] = []
columns_by_table[column.table_name].append(column)
columns_by_table[(column.schema_name, column.table_name)].append(column)

for table in mtables.items:
table.columns = columns_by_table.get(table.table_name, [])
table.columns = columns_by_table.get(
(table.schema_name, table.table_name), []
)

@staticmethod
def append_primary_keys(mtables: MetadataTables, primary_keys: list[tuple]):
grouped_pks = {}
grouped_pks = defaultdict(list)
for pk in primary_keys:
table_name, column_name = pk
if table_name not in grouped_pks:
grouped_pks[table_name] = []
grouped_pks[table_name].append(column_name)
schema_name, table_name, column_name = pk
grouped_pks[(schema_name, table_name)].append(column_name)

for table in mtables.items:
table.primary_keys = grouped_pks.get(table.table_name, [])
table.primary_keys = grouped_pks.get(
(table.schema_name, table.table_name), []
)
1 change: 1 addition & 0 deletions odd_collector/adapters/redshift/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ def metadata_columns_external_query(self, schemas: Optional[list[str]]):
def primary_keys_query(self):
return """
select
kcu.table_schema,
kcu.table_name,
kcu.column_name
from information_schema.table_constraints tco
Expand Down

0 comments on commit f600da6

Please sign in to comment.