Skip to content

Commit

Permalink
fix(mapper): impropper array context shift was leaning to dropping/in…
Browse files Browse the repository at this point in the history
…valid result
  • Loading branch information
yshalenyk committed Oct 4, 2024
1 parent 34ba459 commit 4e19fef
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions nightingale/mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def map(self, loader: Any, validate_mapping: bool = False) -> list[dict[str, Any

logger.info("MappingTemplate data loaded")
data = loader.load(config.selector)
logger.info("Source data is loaded...")
logger.info("Start fetching rows from datasource")
if validate_mapping:
logger.info("Validating mapping template...")
validator = MappingTemplateValidator(loader, self.mapping)
Expand All @@ -83,10 +83,12 @@ def transform_data(
curr_release = {}
array_counters = {}
mapped = []
count = 0

ocid_mapping = mapping.get_ocid_mapping()
for row in data:
ocid = row.get(ocid_mapping, "")

if not ocid:
logger.warning(f"No OCID found in row: {row}. Skipping.")
continue
Expand All @@ -101,6 +103,8 @@ def transform_data(
curr_release = self.transform_row(
row, mapping, mapping.get_schema(), curr_release, array_counters=array_counters, codelists=codelists
)
count += 1
logger.info(f"Processed {count} rows")

if curr_release:
self.finish_release(curr_ocid, curr_release, mapped)
Expand Down Expand Up @@ -143,16 +147,21 @@ def transform_row(
# XXX: some duplication in code present maybe refactoring needed
def set_nested_value(nested_dict, keys, value, schema, add_new=False, append_once=False):
value = self.map_codelist_value(keys, schema, codelists, value)
last_key = keys[-1]
keys_path = "/" + "/".join(keys)

for i, key in enumerate(keys[:-1]):
subpath = "/" + "/".join(keys[: i + 1])
if isinstance(nested_dict, list):
nested_dict = self.shift_current_array(nested_dict, subpath, array_counters)
if key not in nested_dict:
nested_dict[key] = [] if schema.get(subpath, {}).get("type") == "array" else {}
nested_dict = nested_dict[key]
last_key = keys[-1]
subpath = "/" + "/".join(keys[:-1])
if schema.get(keys_path, {}).get("type") == "array" and isinstance(nested_dict, list) and nested_dict:
nested_dict = self.shift_current_array(nested_dict, subpath, array_counters)
if isinstance(nested_dict, list):
nested_dict = self.shift_current_array(nested_dict, "/" + "/".join(keys), array_counters)
nested_dict = self.shift_current_array(nested_dict, keys_path, array_counters)
if add_new:
if last_key not in nested_dict:
nested_dict[last_key] = []
Expand All @@ -179,6 +188,7 @@ def set_nested_value(nested_dict, keys, value, schema, add_new=False, append_onc

if not result:
result = {}

for flat_col, value in input_data.items():
if not value:
continue
Expand Down

0 comments on commit 4e19fef

Please sign in to comment.