Merge pull request #135 from ranking-agent/use-more-values

Use more values
ranking-agent · Jul 19, 2024 · f7025b7 · f7025b7
2 parents 17a6d85 + 2fd49a7
commit f7025b7
Show file tree

Hide file tree

Showing 3 changed files with 125 additions and 25 deletions.
diff --git a/ranker/server.py b/ranker/server.py
@@ -16,7 +16,7 @@
 from ranker.util.omnicorp_get_node_pmids import get_node_pmids
 
 # set the app version
-APP_VERSION = '3.3.3'
+APP_VERSION = '3.3.4'
 
 APP = FastAPI(title='ARAGORN Ranker', version=APP_VERSION)
 

diff --git a/ranker/shared/ranker_obj.py b/ranker/shared/ranker_obj.py
@@ -112,11 +112,11 @@ def score(self, answer, jaccard_like=False):
         # And organizing nodes and edges into a more manageable form scoring
         # There is some repeated work accross analyses so we calculate all r_graphs
         # at once
-        r_gaphs = self.get_rgraph(answer)
+        r_graphs = self.get_rgraph(answer)
 
         # For each analysis we have a unique r_graph to score
         analysis_details = []
-        for i_analysis, r_graph in enumerate(r_gaphs):
+        for i_analysis, r_graph in enumerate(r_graphs):
             # First we calculate the graph laplacian
             # The probes are needed to make sure we don't remove anything
             # that we actually wanted to use for scoring
@@ -132,7 +132,10 @@ def score(self, answer, jaccard_like=False):
             # Once we have the graph laplacian we can find the effective resistance
             # Between all of the probes
             # The exp(-1 * .) here converts us back to normalized space
-            score = np.exp(-kirchhoff(laplacian, probe_inds))
+            try:
+                score = np.exp(-kirchhoff(laplacian, probe_inds))
+            except:
+                breakpoint()
 
             # Fail safe to get rid of NaNs.
             score = score if np.isfinite(score) and score >= 0 else -1
@@ -484,7 +487,8 @@ def get_edge_values(self, edge_id):
             "publications": [],
             "num_publications": 0,
             "literature_coocurrence": None,
-            "p_value": None
+            "p_value": None,
+            "affinity": None
         }
 
         # Look through attributes and 
@@ -523,10 +527,17 @@ def get_edge_values(self, edge_id):
 
                 usable_edge_attr["publications"] = pubs
                 usable_edge_attr["num_publications"] = len(pubs)
-
+
+            if attr_type_id == "biolink:evidence_count":
+                usable_edge_attr["num_publications"] = attribute.get("value", 0)
+
             # P-Values
+            # first 4 probably never happen
             if "p_value" in orig_attr_name or "p-value" in orig_attr_name or \
-                "p_value" in attr_type_id or "p-value" in attr_type_id:
+                "p_value" in attr_type_id or "p-value" in attr_type_id or \
+                "pValue" in orig_attr_name or \
+                "fisher_exact_p" in orig_attr_name or \
+                "gwas_pvalue" in orig_attr_name:
 
                 p_value = attribute.get("value", None)
 
@@ -569,6 +580,14 @@ def get_edge_values(self, edge_id):
             #     # Every other edge has an assumed publication of 1
             #     usable_edge_attr['num_publications'] += 1
 
+            # affinities
+            if orig_attr_name == "affinity":
+                usable_edge_attr["affinity"] = attribute.get("value", 0)
+
+            # confidence score
+            if orig_attr_name == "biolink:tmkp_confidence_score":
+                usable_edge_attr["confidence_score"] = attribute.get("value", 0)
+
         # At this point we have all of the information extracted from the edge
         # We have have looked through all attributes and filled up usable_edge_attr
         # Now we can construct the edge values using these attributes and the base weight
@@ -582,13 +601,13 @@ def get_edge_values(self, edge_id):
             property_w = get_source_sigmoid(
                 usable_edge_attr["p_value"],
                 edge_source,
-                "p-value",
+                "p_value",
                 self.source_transformation,
                 self.unknown_source_transformation
             )
             source_w = get_source_weight(
                 edge_source,
-                "p-value",
+                "p_value",
                 self.source_weights,
                 self.unknown_source_weight
             )
@@ -624,7 +643,6 @@ def get_edge_values(self, edge_id):
             }
 
         if usable_edge_attr['literature_coocurrence'] is not None:
-
             property_w = get_source_sigmoid(
                 usable_edge_attr['literature_coocurrence'],
                 edge_source,
@@ -639,17 +657,39 @@ def get_edge_values(self, edge_id):
                 self.source_weights,
                 self.unknown_source_weight
             )
-
             this_edge_vals[edge_source]["literature_coocurrence"] = {
                 "value": usable_edge_attr["literature_coocurrence"],
                 "property_weight": property_w,
                 "source_weight": source_w,
                 "weight": property_w * source_w
             }
 
+        if usable_edge_attr["affinity"] is not None:
+
+            property_w = get_source_sigmoid(
+                usable_edge_attr['affinity'],
+                edge_source,
+                "affinity",
+                self.source_transformation,
+                self.unknown_source_transformation,
+            )
+
+            source_w = get_source_weight(
+                edge_source,
+                "affinity",
+                self.source_weights,
+                self.unknown_source_weight
+            )
+
+            this_edge_vals[edge_source]["affinity"] = {
+                "value": usable_edge_attr["affinity"],
+                "property_weight": property_w,
+                "source_weight": source_w,
+                "weight": property_w * source_w
+            }
+
         # Cache it
         self.edge_values[edge_id] = this_edge_vals
-
         return this_edge_vals
 
 def kirchhoff(L, probes):

diff --git a/ranker/shared/sources.py b/ranker/shared/sources.py
@@ -11,6 +11,19 @@
         "infores:omnicorp": {
             "literature_co-occurrence": 1,
         },
+        "infores:text-mining-provider-targeted": {
+            "publications": 0.5,
+            "confidence_score": 1
+        },
+        "infores:genetics-data-provider": {
+            "p_value": 1
+        },
+        "infores:icees-kg": {
+            "p_value": 1
+        },
+        "infores:gwas-catalog": {
+            "p_value": 1
+        }
     },
     "source_transformation": {
         "infores:omnicorp": {
@@ -20,11 +33,50 @@
                 "midpoint": 0,
                 "rate": 0.00033
             }
+        },
+        "infores:text-mining-provider-targeted": {
+            "publications": {
+                "lower": -1,
+                "upper": 1,
+                "midpoint": 0,
+                "rate": 4e-4
+            },
+            "confidence_score": {
+                "lower": -1,
+                "upper": 1,
+                "midpoint": 0,
+                "rate": 3
+            },
+        },
+        "infores:genetics-data-provider": {
+            "p_value": {
+                "lower": 0,
+                "upper": 1,
+                "midpoint": 0.05,
+                "rate": -150
+            }
+        },
+        "infores:icees-kg": {
+            "p_value": {
+                "lower": 0,
+                "upper": 1,
+                "midpoint": 0.05,
+                "rate": -150
+            }
+        },
+        "infores:gwas-catalog": {
+            "p_value": {
+                "lower": 0,
+                "upper": 1,
+                "midpoint": 0.05,
+                "rate": -150
+            }
         }
     },
     "unknown_source_weight": {
         "publications": 1,
         "literature_co-occurrence": 1,
+        "affinity": 1,
         "unknown_property" : 0
     },
     "unknown_source_transformation": {
@@ -40,6 +92,12 @@
             "midpoint": 0,
             "rate": 0.001373265360835
         },
+        "affinity": {
+            "lower": -1,
+            "upper": 1,
+            "midpoint": 0,
+            "rate": 0.4
+        },
         "unknown_property": {
             "lower": 0,
             "upper": 0,
@@ -52,7 +110,9 @@
         "infores:omnicorp": 0,
         "infores:drugcentral": 5e-2,
         "infores:hetionet": 3e-2,
-        "infores:text-mining-provider-targeted": 5e-3
+        "infores:text-mining-provider-targeted": 5e-3,
+        "infores:icees-kg": 3e-2,
+        "infores:gwas-catalog": 3e-2
     },
     "omnicorp_relevence": 0.0025
 
@@ -78,7 +138,7 @@
                 "midpoint": 0,
                 "rate": .574213221
             },
-            "p-value": {
+            "p_value": {
                 "lower": 1,
                 "upper": 0,
                 "midpoint": 0.055,
@@ -92,7 +152,7 @@
                 "midpoint": 0,
                 "rate": .574213221
             },
-            "p-value": {
+            "p_value": {
                 "lower": 1,
                 "upper": 0,
                 "midpoint": 0.055,
@@ -106,7 +166,7 @@
                 "midpoint": 0,
                 "rate": .574213221
             },
-            "p-value": {
+            "p_value": {
                 "lower": 1,
                 "upper": 0,
                 "midpoint": 0.055,
@@ -125,7 +185,7 @@
             "midpoint": 0,
             "rate": .574213221
         },
-        "p-value": {
+        "p_value": {
             "lower": 1,
             "upper": 0,
             "midpoint": 0.055,
@@ -165,7 +225,7 @@
                 "midpoint": 0,
                 "rate": .574213221
             },
-            "p-value": {
+            "p_value": {
                 "lower": 1,
                 "upper": 0,
                 "midpoint": 0.055,
@@ -179,7 +239,7 @@
                 "midpoint": 0,
                 "rate": .574213221
             },
-            "p-value": {
+            "p_value": {
                 "lower": 1,
                 "upper": 0,
                 "midpoint": 0.055,
@@ -193,7 +253,7 @@
                 "midpoint": 0,
                 "rate": .574213221
             },
-            "p-value": {
+            "p_value": {
                 "lower": 1,
                 "upper": 0,
                 "midpoint": 0.055,
@@ -212,7 +272,7 @@
             "midpoint": 0,
             "rate": .574213221
         },
-        "p-value": {
+        "p_value": {
             "lower": 1,
             "upper": 0,
             "midpoint": 0.055,
@@ -252,7 +312,7 @@
                 "midpoint": 0,
                 "rate": .574213221
             },
-            "p-value": {
+            "p_value": {
                 "lower": 1,
                 "upper": 0,
                 "midpoint": 0.055,
@@ -266,7 +326,7 @@
                 "midpoint": 0,
                 "rate": .574213221
             },
-            "p-value": {
+            "p_value": {
                 "lower": 1,
                 "upper": 0,
                 "midpoint": 0.055,
@@ -280,7 +340,7 @@
                 "midpoint": 0,
                 "rate": .574213221
             },
-            "p-value": {
+            "p_value": {
                 "lower": 1,
                 "upper": 0,
                 "midpoint": 0.055,
@@ -299,7 +359,7 @@
             "midpoint": 0,
             "rate": .574213221
         },
-        "p-value": {
+        "p_value": {
             "lower": 1,
             "upper": 0,
             "midpoint": 0.055,