diff --git a/src/hub/dataload/sources/civic/civic_parser.py b/src/hub/dataload/sources/civic/civic_parser.py index c689dc78..f92a33ea 100644 --- a/src/hub/dataload/sources/civic/civic_parser.py +++ b/src/hub/dataload/sources/civic/civic_parser.py @@ -7,7 +7,21 @@ from utils.hgvs import get_hgvs_from_vcf from biothings.utils.dataload import unlist, dict_sweep, to_int -# data_folder = "/Users/v/dev/scripps/myvariant.info-copy/src/hub/dataload/sources/civic" + +def merge_dicts(self, d1, d2): + merged = d1.copy() + for key, value in d2.items(): + if key in merged: + if isinstance(merged[key], dict) and isinstance(value, dict): + merged[key] = self.merge_dicts(merged[key], value) + elif isinstance(merged[key], list) and isinstance(value, list): + merged[key] = merged[key] + value # Concatenate lists + else: + merged[key] = value # Overwrite value + else: + merged[key] = value + return merged + def load_data(data_folder): # number of civic ids with ref, alt, chrom @@ -19,10 +33,17 @@ def load_data(data_folder): # number of civic ids with no alt and ref no_case4 = 0 # for infile in glob.glob(os.path.join(data_folder,"variant_*.json")): - print(glob.glob(os.path.join(data_folder,"variant_*.json"))) + # print(glob.glob(os.path.join(data_folder,"variant_*.json"))) for infile in glob.glob(os.path.join(data_folder,"variant_*.json")): - logging.info(infile) - doc = json.load(open(infile)) + # logging.info(infile) + variant_data = json.load(open(infile)) + + doc = {} + doc = merge_dicts(doc, variant_data["VariantSummary"]["data"]) + doc = merge_dicts(doc, variant_data["VariantDetail"]["data"]["variant"]) + doc = merge_dicts(doc, variant_data["ContributorAvatars"]["data"]["variant"]) + doc = merge_dicts(doc, variant_data["GeneVariant"]["data"]["variant"]) + if set(['error', 'status']) != set(doc.keys()): print("### doc") print(doc) diff --git a/src/hub/dataload/sources/civic/graphql_dump.py b/src/hub/dataload/sources/civic/graphql_dump.py index 0c36dfaf..b8e17c44 100644 --- a/src/hub/dataload/sources/civic/graphql_dump.py +++ b/src/hub/dataload/sources/civic/graphql_dump.py @@ -39,24 +39,11 @@ def dump_variant(self, api_url: str, variant_id: int): api_url=api_url, variant_id=variant_id ) - variant_data = {} - variant_data = self.merge_dicts(variant_data, res_contributor_avatars["data"]) - variant_data = self.merge_dicts(variant_data, res_gene_variant["data"]["variant"]) - variant_data = self.merge_dicts(variant_data, res_detail["data"]["variant"]) - variant_data = self.merge_dicts(variant_data, res_summary["data"]["variant"]) + variant_data = { + "VariantSummary": res_summary, + "VariantDetail": res_detail, + "ContributorAvatars": res_contributor_avatars, + "GeneVariant": res_gene_variant + } return variant_data - - def merge_dicts(self, d1, d2): - merged = d1.copy() - for key, value in d2.items(): - if key in merged: - if isinstance(merged[key], dict) and isinstance(value, dict): - merged[key] = self.merge_dicts(merged[key], value) - elif isinstance(merged[key], list) and isinstance(value, list): - merged[key] = merged[key] + value # Concatenate lists - else: - merged[key] = value # Overwrite value - else: - merged[key] = value - return merged