feat: initial commit

CDCgov · Dec 16, 2024 · 5ebabcd · 5ebabcd
1 parent 64158df
commit 5ebabcd
Show file tree

Hide file tree

Showing 16 changed files with 283 additions and 197 deletions.
diff --git a/src/recordlinker/assets/initial_algorithms.json b/src/recordlinker/assets/initial_algorithms.json
@@ -9,7 +9,7 @@
             {
                 "blocking_keys": [
                     "BIRTHDATE",
-                    "MRN",
+                    "IDENTIFIER",  
                     "SEX"
                 ],
                 "evaluators": [
@@ -75,7 +75,7 @@
             {
                 "blocking_keys": [
                     "BIRTHDATE",
-                    "MRN",
+                    "IDENTIFIER",
                     "SEX"
                 ],
                 "evaluators": [

diff --git a/src/recordlinker/hl7/fhir.py b/src/recordlinker/hl7/fhir.py
@@ -33,26 +33,19 @@ def fhir_record_to_pii_record(fhir_record: dict) -> schemas.PIIRecord:
         "birthDate": fhir_record.get("birthDate"),
         "sex": fhir_record.get("gender"),
         "address": fhir_record.get("address", []),
-        "mrn": None,
-        "ssn": None,
         "race": None,
         "gender": None,
         "telecom": fhir_record.get("telecom", []),
-        "drivers_license": None,
+        "identifiers": [],
     }
     for identifier in fhir_record.get("identifier", []):
-        for coding in identifier.get("type", {}).get("coding", []):
-            if coding.get("code") == "MR":
-                val["mrn"] = identifier.get("value")
-            elif coding.get("code") == "SS":
-                val["ssn"] = identifier.get("value")
-            elif coding.get("code") == "DL":
-                license_number = identifier.get("value")
-                authority = identifier.get("assigner", {}).get("identifier", {}).get("value", "")  # Assuming `issuer` contains authority info
-                val["drivers_license"] = {
-                    "value": license_number,
-                    "authority": authority
-                }
+        for code in identifier.get("type", {}).get("coding", []):
+            val["identifiers"].append({
+                "value": identifier.get("value"),
+                "type": code.get("code"),
+                "authority": identifier.get("assigner", {}).get("identifier", {}).get("value", ""),
+            })
+            break   # Sholdn't be more than 1 code
     for address in val["address"]:
         address["county"] = address.get("district", "")
         for extension in address.get("extension", []):

diff --git a/src/recordlinker/linking/link.py b/src/recordlinker/linking/link.py
@@ -51,7 +51,7 @@ def compare(
     details: dict[str, typing.Any] = {"patient.reference_id": str(patient.reference_id)}
     for e in evals:
         # TODO: can we do this check earlier?
-        feature = getattr(schemas.Feature, e.feature, None)
+        feature = schemas.Feature.parse(e.feature)
         if feature is None:
             raise ValueError(f"Invalid comparison field: {e.feature}")
         # Evaluate the comparison function and append the result to the list

diff --git a/src/recordlinker/linking/matchers.py b/src/recordlinker/linking/matchers.py
@@ -179,7 +179,7 @@ def compare_fuzzy_match(
       beyond which to classify the strings as a partial match.
     :return: A float indicating whether the features are a fuzzy match.
     """
-    similarity_measure, threshold = _get_fuzzy_params(str(key), **kwargs)
+    similarity_measure, threshold = _get_fuzzy_params(str(key.attribute), **kwargs)
     comp_func = getattr(rapidfuzz.distance, similarity_measure).normalized_similarity
     for x in record.feature_iter(key):
         for y in patient.record.feature_iter(key):
@@ -203,11 +203,11 @@ def compare_probabilistic_fuzzy_match(
       beyond which to classify the strings as a partial match.
     :return: A float of the score the feature comparison earned.
     """
-    log_odds = kwargs.get("log_odds", {}).get(str(key))
+    log_odds = kwargs.get("log_odds", {}).get(str(key.attribute))
     if log_odds is None:
         raise ValueError(f"Log odds not found for feature {key}")
 
-    similarity_measure, threshold = _get_fuzzy_params(str(key), **kwargs)
+    similarity_measure, threshold = _get_fuzzy_params(str(key.attribute), **kwargs)
     comp_func = getattr(rapidfuzz.distance, similarity_measure).normalized_similarity
     max_score = 0.0
     for x in patient.record.feature_iter(key):

diff --git a/src/recordlinker/models/mpi.py b/src/recordlinker/models/mpi.py
@@ -121,12 +121,13 @@ class BlockingKey(enum.Enum):
     """
 
     BIRTHDATE = ("BIRTHDATE", 1, "Date of birth as YYYY-MM-DD")
-    MRN = ("MRN", 2, "Last 4 characters of Medical record number")
+    # MRN = ("MRN", 2, "Last 4 characters of Medical record number")
     SEX = ("SEX", 3, "Sex at birth; M, F or U")
     ZIP = ("ZIP", 4, "5 digital US Postal Code")
     FIRST_NAME = ("FIRST_NAME", 5, "First 4 characters of the first name")
     LAST_NAME = ("LAST_NAME", 6, "First 4 characters of the last name")
     ADDRESS = ("ADDRESS", 7, "First 4 characters of the address")
+    IDENTIFIER = ("IDENTIFIER", 8, "")  # TODO: fill in the description
 
     def __init__(self, value: str, _id: int, description: str):
         self._value = value

diff --git a/src/recordlinker/schemas/__init__.py b/src/recordlinker/schemas/__init__.py
@@ -11,6 +11,7 @@
 from .mpi import PatientRef
 from .mpi import PersonRef
 from .pii import Feature
+from .pii import FeatureAttribute
 from .pii import PIIRecord
 from .seed import Cluster
 from .seed import ClusterGroup
@@ -22,6 +23,7 @@
     "AlgorithmPass",
     "AlgorithmSummary",
     "Feature",
+    "FeatureAttribute",
     "PIIRecord",
     "Prediction",
     "LinkInput",

diff --git a/src/recordlinker/schemas/algorithm.py b/src/recordlinker/schemas/algorithm.py
@@ -13,7 +13,6 @@
 
 from recordlinker.linking import matchers
 from recordlinker.models.mpi import BlockingKey
-from recordlinker.schemas.pii import Feature
 
 
 class Evaluator(pydantic.BaseModel):
@@ -23,10 +22,9 @@ class Evaluator(pydantic.BaseModel):
 
     model_config = pydantic.ConfigDict(from_attributes=True, use_enum_values=True)
 
-    feature: Feature
+    feature: str
     func: matchers.FeatureFunc
 
-
 class AlgorithmPass(pydantic.BaseModel):
     """
     The schema for an algorithm pass record.