diff --git a/ranker/server.py b/ranker/server.py index 8094921..b7dc625 100644 --- a/ranker/server.py +++ b/ranker/server.py @@ -16,7 +16,7 @@ from ranker.util.omnicorp_get_node_pmids import get_node_pmids # set the app version -APP_VERSION = '3.3.3' +APP_VERSION = '3.3.4' APP = FastAPI(title='ARAGORN Ranker', version=APP_VERSION) diff --git a/ranker/shared/ranker_obj.py b/ranker/shared/ranker_obj.py index 6a11b50..499f440 100644 --- a/ranker/shared/ranker_obj.py +++ b/ranker/shared/ranker_obj.py @@ -112,11 +112,11 @@ def score(self, answer, jaccard_like=False): # And organizing nodes and edges into a more manageable form scoring # There is some repeated work accross analyses so we calculate all r_graphs # at once - r_gaphs = self.get_rgraph(answer) + r_graphs = self.get_rgraph(answer) # For each analysis we have a unique r_graph to score analysis_details = [] - for i_analysis, r_graph in enumerate(r_gaphs): + for i_analysis, r_graph in enumerate(r_graphs): # First we calculate the graph laplacian # The probes are needed to make sure we don't remove anything # that we actually wanted to use for scoring @@ -132,7 +132,10 @@ def score(self, answer, jaccard_like=False): # Once we have the graph laplacian we can find the effective resistance # Between all of the probes # The exp(-1 * .) here converts us back to normalized space - score = np.exp(-kirchhoff(laplacian, probe_inds)) + try: + score = np.exp(-kirchhoff(laplacian, probe_inds)) + except: + breakpoint() # Fail safe to get rid of NaNs. score = score if np.isfinite(score) and score >= 0 else -1 @@ -484,7 +487,8 @@ def get_edge_values(self, edge_id): "publications": [], "num_publications": 0, "literature_coocurrence": None, - "p_value": None + "p_value": None, + "affinity": None } # Look through attributes and @@ -523,10 +527,17 @@ def get_edge_values(self, edge_id): usable_edge_attr["publications"] = pubs usable_edge_attr["num_publications"] = len(pubs) - + + if attr_type_id == "biolink:evidence_count": + usable_edge_attr["num_publications"] = attribute.get("value", 0) + # P-Values + # first 4 probably never happen if "p_value" in orig_attr_name or "p-value" in orig_attr_name or \ - "p_value" in attr_type_id or "p-value" in attr_type_id: + "p_value" in attr_type_id or "p-value" in attr_type_id or \ + "pValue" in orig_attr_name or \ + "fisher_exact_p" in orig_attr_name or \ + "gwas_pvalue" in orig_attr_name: p_value = attribute.get("value", None) @@ -569,6 +580,14 @@ def get_edge_values(self, edge_id): # # Every other edge has an assumed publication of 1 # usable_edge_attr['num_publications'] += 1 + # affinities + if orig_attr_name == "affinity": + usable_edge_attr["affinity"] = attribute.get("value", 0) + + # confidence score + if orig_attr_name == "biolink:tmkp_confidence_score": + usable_edge_attr["confidence_score"] = attribute.get("value", 0) + # At this point we have all of the information extracted from the edge # We have have looked through all attributes and filled up usable_edge_attr # Now we can construct the edge values using these attributes and the base weight @@ -582,13 +601,13 @@ def get_edge_values(self, edge_id): property_w = get_source_sigmoid( usable_edge_attr["p_value"], edge_source, - "p-value", + "p_value", self.source_transformation, self.unknown_source_transformation ) source_w = get_source_weight( edge_source, - "p-value", + "p_value", self.source_weights, self.unknown_source_weight ) @@ -624,7 +643,6 @@ def get_edge_values(self, edge_id): } if usable_edge_attr['literature_coocurrence'] is not None: - property_w = get_source_sigmoid( usable_edge_attr['literature_coocurrence'], edge_source, @@ -639,7 +657,6 @@ def get_edge_values(self, edge_id): self.source_weights, self.unknown_source_weight ) - this_edge_vals[edge_source]["literature_coocurrence"] = { "value": usable_edge_attr["literature_coocurrence"], "property_weight": property_w, @@ -647,9 +664,32 @@ def get_edge_values(self, edge_id): "weight": property_w * source_w } + if usable_edge_attr["affinity"] is not None: + + property_w = get_source_sigmoid( + usable_edge_attr['affinity'], + edge_source, + "affinity", + self.source_transformation, + self.unknown_source_transformation, + ) + + source_w = get_source_weight( + edge_source, + "affinity", + self.source_weights, + self.unknown_source_weight + ) + + this_edge_vals[edge_source]["affinity"] = { + "value": usable_edge_attr["affinity"], + "property_weight": property_w, + "source_weight": source_w, + "weight": property_w * source_w + } + # Cache it self.edge_values[edge_id] = this_edge_vals - return this_edge_vals def kirchhoff(L, probes): diff --git a/ranker/shared/sources.py b/ranker/shared/sources.py index 6b880dc..1e601fa 100644 --- a/ranker/shared/sources.py +++ b/ranker/shared/sources.py @@ -11,6 +11,19 @@ "infores:omnicorp": { "literature_co-occurrence": 1, }, + "infores:text-mining-provider-targeted": { + "publications": 0.5, + "confidence_score": 1 + }, + "infores:genetics-data-provider": { + "p_value": 1 + }, + "infores:icees-kg": { + "p_value": 1 + }, + "infores:gwas-catalog": { + "p_value": 1 + } }, "source_transformation": { "infores:omnicorp": { @@ -20,11 +33,50 @@ "midpoint": 0, "rate": 0.00033 } + }, + "infores:text-mining-provider-targeted": { + "publications": { + "lower": -1, + "upper": 1, + "midpoint": 0, + "rate": 4e-4 + }, + "confidence_score": { + "lower": -1, + "upper": 1, + "midpoint": 0, + "rate": 3 + }, + }, + "infores:genetics-data-provider": { + "p_value": { + "lower": 0, + "upper": 1, + "midpoint": 0.05, + "rate": -150 + } + }, + "infores:icees-kg": { + "p_value": { + "lower": 0, + "upper": 1, + "midpoint": 0.05, + "rate": -150 + } + }, + "infores:gwas-catalog": { + "p_value": { + "lower": 0, + "upper": 1, + "midpoint": 0.05, + "rate": -150 + } } }, "unknown_source_weight": { "publications": 1, "literature_co-occurrence": 1, + "affinity": 1, "unknown_property" : 0 }, "unknown_source_transformation": { @@ -40,6 +92,12 @@ "midpoint": 0, "rate": 0.001373265360835 }, + "affinity": { + "lower": -1, + "upper": 1, + "midpoint": 0, + "rate": 0.4 + }, "unknown_property": { "lower": 0, "upper": 0, @@ -52,7 +110,9 @@ "infores:omnicorp": 0, "infores:drugcentral": 5e-2, "infores:hetionet": 3e-2, - "infores:text-mining-provider-targeted": 5e-3 + "infores:text-mining-provider-targeted": 5e-3, + "infores:icees-kg": 3e-2, + "infores:gwas-catalog": 3e-2 }, "omnicorp_relevence": 0.0025 @@ -78,7 +138,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -92,7 +152,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -106,7 +166,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -125,7 +185,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -165,7 +225,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -179,7 +239,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -193,7 +253,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -212,7 +272,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -252,7 +312,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -266,7 +326,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -280,7 +340,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055, @@ -299,7 +359,7 @@ "midpoint": 0, "rate": .574213221 }, - "p-value": { + "p_value": { "lower": 1, "upper": 0, "midpoint": 0.055,