Skip to content

Commit

Permalink
Issue 260 classification content (#261)
Browse files Browse the repository at this point in the history
* Add content to vcv and rcv classification. Fix bug where 'classifications' were being included in the disassembled rcv_accession as well as in their own table

* Add content column to vcv/rcv classification bq schemas
  • Loading branch information
theferrit32 authored Dec 5, 2024
1 parent 6d32923 commit 6acc240
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,9 @@
{
"name": "clinical_impact_clinical_significance",
"type": "STRING"
},
{
"name": "content",
"type": "STRING"
}
]
Original file line number Diff line number Diff line change
Expand Up @@ -42,5 +42,9 @@
{
"name": "clinical_impact_clinical_significance",
"type": "STRING"
},
{
"name": "content",
"type": "STRING"
}
]
2 changes: 1 addition & 1 deletion clinvar_ingest/cloud/bigquery/processing_history.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def write_started( # noqa: PLR0913
f"release_tag={release_tag}, bucket_dir={bucket_dir}"
)
_logger.warning(
f"Expected 0 rows to exist for the started event, but found {row.c}."
f"Expected 0 rows to exist for the started event, but found {row.c}. "
f"file_type={file_type}, release_date={release_date}, "
f"release_tag={release_tag}, bucket_dir={bucket_dir}"
)
Expand Down
16 changes: 12 additions & 4 deletions clinvar_ingest/model/variation_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,9 +787,11 @@ class RcvAccessionClassification(Model):
clinical_impact_assertion_type: str
clinical_impact_clinical_significance: str

content: dict

@staticmethod
def jsonifiable_fields() -> list[str]:
return []
return ["content"]

def __post_init__(self):
self.entity_type = "rcv_accession_classification"
Expand All @@ -802,7 +804,9 @@ def from_xml_single(inp: dict, statement_type: StatementType, rcv_id: str):
or OncogenicityClassification entry. The statement_type is the key
from the original `Classifications` XML/dict, indicating the type.
"""
raw_description = extract(inp, "Description")
# TODO is there a chance they add fields to Description? Maybe don't extract.
# raw_description = extract(inp, "Description")
raw_description = get(inp, "Description") or {}
return RcvAccessionClassification(
rcv_id=rcv_id,
statement_type=statement_type,
Expand All @@ -819,6 +823,7 @@ def from_xml_single(inp: dict, statement_type: StatementType, rcv_id: str):
raw_description,
"@ClinicalImpactClinicalSignificance",
),
content=inp,
)

@staticmethod
Expand Down Expand Up @@ -945,7 +950,7 @@ def disassemble(self):
yield from c.disassemble()
del self_copy.classifications

yield self
yield self_copy


@dataclasses.dataclass
Expand All @@ -965,9 +970,11 @@ class VariationArchiveClassification(Model):
clinical_impact_assertion_type: str
clinical_impact_clinical_significance: str

content: dict

@staticmethod
def jsonifiable_fields() -> list[str]:
return []
return ["content"]

def __post_init__(self):
self.entity_type = "variation_archive_classification"
Expand Down Expand Up @@ -998,6 +1005,7 @@ def from_xml_single(inp: dict, statement_type: StatementType, vcv_id: str):
interp_description,
"@ClinicalImpactClinicalSignificance",
),
content=inp,
)

@staticmethod
Expand Down

0 comments on commit 6acc240

Please sign in to comment.