From a7d8de72770ce36d40617adb4a4dfea69e53889b Mon Sep 17 00:00:00 2001 From: Charlie Fox Date: Fri, 19 Apr 2024 15:10:09 -0400 Subject: [PATCH 1/5] replace None with empty string --- ranker/shared/ranker_obj.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ranker/shared/ranker_obj.py b/ranker/shared/ranker_obj.py index 85fcc8f..8136613 100644 --- a/ranker/shared/ranker_obj.py +++ b/ranker/shared/ranker_obj.py @@ -489,8 +489,8 @@ def get_edge_values(self, edge_id): # Look through attributes and for attribute in edge.get("attributes", []): - orig_attr_name = attribute.get("original_attribute_name", None) - attr_type_id = attribute.get("attribute_type_id", None) + orig_attr_name = attribute.get("original_attribute_name", "") + attr_type_id = attribute.get("attribute_type_id", "") # We will look at both the original_attribute_name and the # attribute_type_id. The attribute_type_id is the real method From bbd7c6fee88a653e680c9eba6e324062b4fdbefd Mon Sep 17 00:00:00 2001 From: Charlie Fox Date: Mon, 13 May 2024 16:12:17 -0400 Subject: [PATCH 2/5] add affinity, evidence_count, different p values --- ranker/shared/ranker_obj.py | 45 +++++++++++++++++++++--- ranker/shared/sources.py | 68 ++++++++++++++++++++++++++++++------- 2 files changed, 97 insertions(+), 16 deletions(-) diff --git a/ranker/shared/ranker_obj.py b/ranker/shared/ranker_obj.py index ac5b5a5..ea1e431 100644 --- a/ranker/shared/ranker_obj.py +++ b/ranker/shared/ranker_obj.py @@ -499,7 +499,8 @@ def get_edge_values(self, edge_id): # Publications if orig_attr_name == "publications" or \ attr_type_id == "biolink:supporting_document" or \ - attr_type_id == "biolink:publications": + attr_type_id == "biolink:publications" or \ + attr_type_id == "biolink:evidence_count": # Parse pubs to handle all the cases we have observed pubs = attribute.get("value", []) @@ -520,8 +521,12 @@ def get_edge_values(self, edge_id): usable_edge_attr["num_publications"] = len(pubs) # P-Values + # first 4 probably never happen if "p_value" in orig_attr_name or "p-value" in orig_attr_name or \ - "p_value" in attr_type_id or "p-value" in attr_type_id: + "p_value" in attr_type_id or "p-value" in attr_type_id or \ + "pValue" in orig_attr_name or \ + "fisher_exact_p" in orig_attr_name or \ + "gwas_pvalue" in orig_attr_name: p_value = attribute.get("value", None) @@ -557,6 +562,14 @@ def get_edge_values(self, edge_id): # # Every other edge has an assumed publication of 1 # usable_edge_attr['num_publications'] += 1 + # affinities + if orig_attr_name == "affinity": + usable_edge_attr["affinity"] = attribute.get("value", 0) + + # confidence score + if orig_attr_name == "biolink:tmkp_confidence_score": + usable_edge_attr["confidence_score"] = attribute.get("value", 0) + # At this point we have all of the information extracted from the edge # We have have looked through all attributes and filled up usable_edge_attr # Now we can construct the edge values using these attributes and the base weight @@ -570,13 +583,13 @@ def get_edge_values(self, edge_id): property_w = get_source_sigmoid( usable_edge_attr["p_value"], edge_source, - "p-value", + "p_value", self.source_transformation, self.unknown_source_transformation ) source_w = get_source_weight( edge_source, - "p-value", + "p_value", self.source_weights, self.unknown_source_weight ) @@ -635,6 +648,30 @@ def get_edge_values(self, edge_id): "weight": property_w * source_w } + if usable_edge_attr["affinity"] is not None: + + property_w = get_source_sigmoid( + usable_edge_attr['affinity'], + edge_source, + "affinity", + self.source_transformation, + self.unknown_source_transformation, + ) + + source_w = get_source_weight( + edge_source, + "affinity", + self.source_weights, + self.unknown_source_weight + ) + + this_edge_vals[edge_source]["affinity"] = { + "value": usable_edge_attr["affinity"], + "property_weight": property_w, + "source_weight": source_w, + "weight": property_w * source_w + } + # Cache it self.edge_values[edge_id] = this_edge_vals diff --git a/ranker/shared/sources.py b/ranker/shared/sources.py index 6b880dc..e176117 100644 --- a/ranker/shared/sources.py +++ b/ranker/shared/sources.py @@ -20,6 +20,44 @@ "midpoint": 0, "rate": 0.00033 } + }, + "infores:text-mining-provider-targeted": { + "publications": { + "lower": -1, + "upper": 1, + "midpoint": 0, + "rate": 4e-4 + }, + "confidence_score": { + "lower": -1, + "upper": 1, + "midpoint": 0, + "rate": 3 + }, + }, + "infores:genetics-data-provider": { + "p_value": { + "lower": -1, + "upper": 1, + "midpoint": 0, + "rate": 2000 + } + }, + "infores:icees-kg": { + "p_value": { + "lower": -1, + "upper": 1, + "midpoint": 0, + "rate": 5 + } + }, + "infores:gwas-catalog": { + "p_value": { + "lower": -1, + "upper": 1, + "midpoint": 0, + "rate": 1e8 + } } }, "unknown_source_weight": { @@ -40,6 +78,12 @@ "midpoint": 0, "rate": 0.001373265360835 }, + "affinity": { + "lower": -1, + "upper": 1, + "midpoint": 0, + "rate": 0.4 + }, "unknown_property": { "lower": 0, "upper": 0, @@ -78,7 +122,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -92,7 +136,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -106,7 +150,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -125,7 +169,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -165,7 +209,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -179,7 +223,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -193,7 +237,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -212,7 +256,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -252,7 +296,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -266,7 +310,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -280,7 +324,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -299,7 +343,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, From 0fbcb2653ae3f7a3c9ffd8df288eebbd1d1d5bfa Mon Sep 17 00:00:00 2001 From: Charlie Fox Date: Tue, 18 Jun 2024 15:06:53 -0400 Subject: [PATCH 3/5] improve sigmoids --- ranker/shared/ranker_obj.py | 23 +++++++++++++---------- ranker/shared/sources.py | 36 ++++++++++++++++++++++++++---------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/ranker/shared/ranker_obj.py b/ranker/shared/ranker_obj.py index ea1e431..7f0d068 100644 --- a/ranker/shared/ranker_obj.py +++ b/ranker/shared/ranker_obj.py @@ -112,11 +112,11 @@ def score(self, answer, jaccard_like=False): # And organizing nodes and edges into a more manageable form scoring # There is some repeated work accross analyses so we calculate all r_graphs # at once - r_gaphs = self.get_rgraph(answer) + r_graphs = self.get_rgraph(answer) # For each analysis we have a unique r_graph to score analysis_details = [] - for i_analysis, r_graph in enumerate(r_gaphs): + for i_analysis, r_graph in enumerate(r_graphs): # First we calculate the graph laplacian # The probes are needed to make sure we don't remove anything # that we actually wanted to use for scoring @@ -132,7 +132,10 @@ def score(self, answer, jaccard_like=False): # Once we have the graph laplacian we can find the effective resistance # Between all of the probes # The exp(-1 * .) here converts us back to normalized space - score = np.exp(-kirchhoff(laplacian, probe_inds)) + try: + score = np.exp(-kirchhoff(laplacian, probe_inds)) + except: + breakpoint() # Fail safe to get rid of NaNs. score = score if np.isfinite(score) and score >= 0 else -1 @@ -484,7 +487,8 @@ def get_edge_values(self, edge_id): "publications": [], "num_publications": 0, "literature_coocurrence": None, - "p_value": None + "p_value": None, + "affinity": None } # Look through attributes and @@ -499,8 +503,7 @@ def get_edge_values(self, edge_id): # Publications if orig_attr_name == "publications" or \ attr_type_id == "biolink:supporting_document" or \ - attr_type_id == "biolink:publications" or \ - attr_type_id == "biolink:evidence_count": + attr_type_id == "biolink:publications": # Parse pubs to handle all the cases we have observed pubs = attribute.get("value", []) @@ -519,7 +522,10 @@ def get_edge_values(self, edge_id): usable_edge_attr["publications"] = pubs usable_edge_attr["num_publications"] = len(pubs) - + + if attr_type_id == "biolink:evidence_count": + usable_edge_attr["num_publications"] = attribute.get("value", 0) + # P-Values # first 4 probably never happen if "p_value" in orig_attr_name or "p-value" in orig_attr_name or \ @@ -625,7 +631,6 @@ def get_edge_values(self, edge_id): } if usable_edge_attr['literature_coocurrence'] is not None: - property_w = get_source_sigmoid( usable_edge_attr['literature_coocurrence'], edge_source, @@ -640,7 +645,6 @@ def get_edge_values(self, edge_id): self.source_weights, self.unknown_source_weight ) - this_edge_vals[edge_source]["literature_coocurrence"] = { "value": usable_edge_attr["literature_coocurrence"], "property_weight": property_w, @@ -674,7 +678,6 @@ def get_edge_values(self, edge_id): # Cache it self.edge_values[edge_id] = this_edge_vals - return this_edge_vals def kirchhoff(L, probes): diff --git a/ranker/shared/sources.py b/ranker/shared/sources.py index e176117..1e601fa 100644 --- a/ranker/shared/sources.py +++ b/ranker/shared/sources.py @@ -11,6 +11,19 @@ "infores:omnicorp": { "literature_co-occurrence": 1, }, + "infores:text-mining-provider-targeted": { + "publications": 0.5, + "confidence_score": 1 + }, + "infores:genetics-data-provider": { + "p_value": 1 + }, + "infores:icees-kg": { + "p_value": 1 + }, + "infores:gwas-catalog": { + "p_value": 1 + } }, "source_transformation": { "infores:omnicorp": { @@ -37,32 +50,33 @@ }, "infores:genetics-data-provider": { "p_value": { - "lower": -1, + "lower": 0, "upper": 1, - "midpoint": 0, - "rate": 2000 + "midpoint": 0.05, + "rate": -150 } }, "infores:icees-kg": { "p_value": { - "lower": -1, + "lower": 0, "upper": 1, - "midpoint": 0, - "rate": 5 + "midpoint": 0.05, + "rate": -150 } }, "infores:gwas-catalog": { "p_value": { - "lower": -1, + "lower": 0, "upper": 1, - "midpoint": 0, - "rate": 1e8 + "midpoint": 0.05, + "rate": -150 } } }, "unknown_source_weight": { "publications": 1, "literature_co-occurrence": 1, + "affinity": 1, "unknown_property" : 0 }, "unknown_source_transformation": { @@ -96,7 +110,9 @@ "infores:omnicorp": 0, "infores:drugcentral": 5e-2, "infores:hetionet": 3e-2, - "infores:text-mining-provider-targeted": 5e-3 + "infores:text-mining-provider-targeted": 5e-3, + "infores:icees-kg": 3e-2, + "infores:gwas-catalog": 3e-2 }, "omnicorp_relevence": 0.0025 From fc24f6934a65555404b61ca7cca06e1641828236 Mon Sep 17 00:00:00 2001 From: kmorton Date: Wed, 10 Jul 2024 13:11:17 -0400 Subject: [PATCH 4/5] Better handle none properties and stringified numbers --- ranker/shared/ranker_obj.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/ranker/shared/ranker_obj.py b/ranker/shared/ranker_obj.py index 7f0d068..499f440 100644 --- a/ranker/shared/ranker_obj.py +++ b/ranker/shared/ranker_obj.py @@ -494,7 +494,12 @@ def get_edge_values(self, edge_id): # Look through attributes and for attribute in edge.get("attributes", []): orig_attr_name = attribute.get("original_attribute_name", '') + if not orig_attr_name: + orig_attr_name = '' + attr_type_id = attribute.get("attribute_type_id", '') + if not attr_type_id: + attr_type_id = '' # We will look at both the original_attribute_name and the # attribute_type_id. The attribute_type_id is the real method @@ -537,8 +542,15 @@ def get_edge_values(self, edge_id): p_value = attribute.get("value", None) # Some times the reported p_value is a list like [p_value] - if isinstance(attribute["value"], list): + if isinstance(p_value, list): p_value = (p_value[0] if len(p_value) > 0 else None) + + if isinstance(p_value, str): + # Parse strings safely + try: + p_value = float(p_value) + except: + p_value = None usable_edge_attr["p_value"] = p_value From 2fd49a7f2289245e635049ac340567dd64ef5adb Mon Sep 17 00:00:00 2001 From: uhbrar Date: Fri, 19 Jul 2024 15:58:08 -0400 Subject: [PATCH 5/5] bump version --- ranker/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ranker/server.py b/ranker/server.py index 8094921..b7dc625 100644 --- a/ranker/server.py +++ b/ranker/server.py @@ -16,7 +16,7 @@ from ranker.util.omnicorp_get_node_pmids import get_node_pmids # set the app version -APP_VERSION = '3.3.3' +APP_VERSION = '3.3.4' APP = FastAPI(title='ARAGORN Ranker', version=APP_VERSION)