Skip to content

Commit

Permalink
Merge pull request #256 from ranking-agent/transitive_rules
Browse files Browse the repository at this point in the history
Transitive rules
  • Loading branch information
cbizon authored Jul 18, 2024
2 parents 58e677c + 5cb757e commit 378aeda
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 271 deletions.
2 changes: 1 addition & 1 deletion openapi-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ servers:
# url: http://127.0.0.1:5000
termsOfService: http://robokop.renci.org:7055/tos?service_long=ARAGORN&provider_long=RENCI
title: ARAGORN
version: 2.8.0
version: 2.8.1
tags:
- name: translator
- name: ARA
Expand Down
260 changes: 0 additions & 260 deletions src/rules/kara_typed_rules/rules_with_types_cleaned_finalized.json
Original file line number Diff line number Diff line change
Expand Up @@ -662,136 +662,6 @@
}
}
},
{
"Rule": "?i biolink:treats_or_applied_or_studied_to_treat ?b ?a biolink:has_part ?f ?i biolink:has_part ?f => ?a biolink:treats ?b",
"Head Coverage": "0.036715444",
"Std Confidence": "0.726559093",
"PCA Confidence": "0.919426049",
"Positive Examples": "1666",
"Body size": "2293",
"PCA Body size": "1812",
"Functional variable": "?a",
"template": {
"query_graph": {
"nodes": {
"$source": {
"ids": [
"$source_id"
],
"categories": [
"biolink:ChemicalEntity"
]
},
"$target": {
"ids": [
"$target_id"
],
"categories": [
"biolink:DiseaseOrPhenotypicFeature"
]
},
"i": {
"categories": [
"biolink:ChemicalEntity"
]
},
"f": {
"categories": [
"biolink:ChemicalEntity"
]
}
},
"edges": {
"edge_0": {
"subject": "i",
"object": "$target",
"predicates": [
"biolink:treats_or_applied_or_studied_to_treat"
]
},
"edge_1": {
"subject": "$source",
"object": "f",
"predicates": [
"biolink:has_part"
]
},
"edge_2": {
"subject": "i",
"object": "f",
"predicates": [
"biolink:has_part"
]
}
}
}
}
},
{
"Rule": "?e biolink:treats_or_applied_or_studied_to_treat ?b ?i biolink:has_part ?a ?i biolink:has_part ?e => ?a biolink:treats ?b",
"Head Coverage": "0.090400212",
"Std Confidence": "0.623688612",
"PCA Confidence": "0.72345679",
"Positive Examples": "4102",
"Body size": "6577",
"PCA Body size": "5670",
"Functional variable": "?a",
"template": {
"query_graph": {
"nodes": {
"$source": {
"ids": [
"$source_id"
],
"categories": [
"biolink:ChemicalEntity"
]
},
"$target": {
"ids": [
"$target_id"
],
"categories": [
"biolink:DiseaseOrPhenotypicFeature"
]
},
"e": {
"categories": [
"biolink:ChemicalEntity"
]
},
"i": {
"categories": [
"biolink:NamedThing"
]
}
},
"edges": {
"edge_0": {
"subject": "e",
"object": "$target",
"predicates": [
"biolink:treats_or_applied_or_studied_to_treat"
]
},
"edge_1": {
"subject": "i",
"object": "$source",
"predicates": [
"biolink:has_part"
]
},
"edge_2": {
"subject": "i",
"object": "e",
"predicates": [
"biolink:has_part"
]
}
}
}
}
},
{
"Rule": "?a biolink:treats_or_applied_or_studied_to_treat ?f ?f biolink:has_phenotype ?b ?b biolink:has_phenotype ?f => ?a biolink:treats ?b",
"Head Coverage": "0.041233251",
Expand Down Expand Up @@ -2113,136 +1983,6 @@
}
}
},
{
"Rule": "?e biolink:contraindicated_for ?b ?i biolink:has_part ?a ?i biolink:has_part ?e => ?a biolink:contraindicated_for ?b",
"Head Coverage": "0.22202098",
"Std Confidence": "0.746321664",
"PCA Confidence": "0.945220884",
"Positive Examples": "5884",
"Body size": "7884",
"PCA Body size": "6225",
"Functional variable": "?a",
"template": {
"query_graph": {
"nodes": {
"$source": {
"ids": [
"$source_id"
],
"categories": [
"biolink:ChemicalEntity"
]
},
"$target": {
"ids": [
"$target_id"
],
"categories": [
"biolink:DiseaseOrPhenotypicFeature"
]
},
"e": {
"categories": [
"biolink:ChemicalEntity"
]
},
"i": {
"categories": [
"biolink:ChemicalEntity"
]
}
},
"edges": {
"edge_0": {
"subject": "e",
"object": "$target",
"predicates": [
"biolink:contraindicated_for"
]
},
"edge_1": {
"subject": "i",
"object": "$source",
"predicates": [
"biolink:has_part"
]
},
"edge_2": {
"subject": "i",
"object": "e",
"predicates": [
"biolink:has_part"
]
}
}
}
}
},
{
"Rule": "?a biolink:contraindicated_for ?f ?i biolink:has_part ?b ?i biolink:has_part ?f => ?a biolink:contraindicated_for ?b",
"Head Coverage": "0.029997736",
"Std Confidence": "0.722727273",
"PCA Confidence": "0.722727273",
"Positive Examples": "795",
"Body size": "1100",
"PCA Body size": "1100",
"Functional variable": "?a",
"template": {
"query_graph": {
"nodes": {
"$source": {
"ids": [
"$source_id"
],
"categories": [
"biolink:ChemicalEntity"
]
},
"$target": {
"ids": [
"$target_id"
],
"categories": [
"biolink:DiseaseOrPhenotypicFeature"
]
},
"f": {
"categories": [
"biolink:DiseaseOrPhenotypicFeature"
]
},
"i": {
"categories": [
"biolink:DiseaseOrPhenotypicFeature"
]
}
},
"edges": {
"edge_0": {
"subject": "$source",
"object": "f",
"predicates": [
"biolink:contraindicated_for"
]
},
"edge_1": {
"subject": "i",
"object": "$target",
"predicates": [
"biolink:has_part"
]
},
"edge_2": {
"subject": "i",
"object": "f",
"predicates": [
"biolink:has_part"
]
}
}
}
}
},
{
"Rule": "?a biolink:contraindicated_for ?f ?i biolink:has_part ?b ?i biolink:related_to ?f => ?a biolink:contraindicated_for ?b",
"Head Coverage": "0.029997736",
Expand Down
43 changes: 34 additions & 9 deletions src/service_aggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,21 +408,46 @@ async def filter_promiscuous_results(response,guid):
MAX_C = 10
if len(response["message"]["results"]) < MAX_C:
return
prom_qnodes = get_promiscuous_qnodes(response)
prom_qnodes = await get_promiscuous_qnodes(response)
#This is a dictionary from bound knodes to the index of their result
prom_counter = defaultdict(list)
#There should only be one such node
for qnode in prom_qnodes:
#How many distinct results have the same bozo in this spot?
# It's possible that there are multiple knodes that could be filtered. But when we filter out the first one
# then the indices of the rest will change. So we need to do this one at a time.
await remove_promiscuous_knode_results(MAX_C, qnode, response)


async def remove_promiscuous_knode_results(MAX_C, qnode, response):
"""Given a response and a qnode, look at all the results and count how many of the results have the
same knode bound to that qnode. If that number is greater than MAX_C, remove those results."""
still_going = True
#This is written as a loop with the idea that once we've removed one promiscuous node, it might require
# recalculating everything since the results change. In retrospect, that might not be true because we are
# specifiying the qnode. I'm still think it's possible (but perhaps unlikely) if there are multiple knodes
# bound to the same qnode.
while still_going:
still_going = False
# How many distinct results have the same bozo in this spot?
prom_counter = defaultdict(list)
for result_i, result in enumerate(response["message"]["results"]):
for binding in result["node_bindings"][qnode]:
knode = binding["id"]
prom_counter[knode].append(result_i)
# If there's too many results with the same knode in one of these spots,then they gotta go.
for knode, mapped_results in prom_counter.items():
if len(mapped_results) > MAX_C:
for index in reversed(mapped_results):
del response["message"]["results"][index]
# now figure out the most common knode
max_knode = None
max_count = 0
for knode, mapped_result_indices in prom_counter.items():
if len(mapped_result_indices) > max_count:
max_knode = knode
max_count = len(mapped_result_indices)
# Now remove all the results with that knode (if it occurs in more than MAX_C results)
if max_count > MAX_C:
still_going = True
#These are the indices of the results that we want to remove
mapped_result_indices = prom_counter[max_knode]
#Remove them from right to left, otherwise the indices change on you
for index in reversed(mapped_result_indices):
del response["message"]["results"][index]


async def get_promiscuous_qnodes(response):
Expand Down Expand Up @@ -763,7 +788,7 @@ async def aragorn_lookup(input_message, params, guid, infer, answer_qnode, bypas
if "knowledge_graph" not in rmessage["message"] or "results" not in rmessage["message"]:
continue
await filter_repeated_nodes(rmessage, guid)
#await filter_promiscuous_results(rmessage, guid)
await filter_promiscuous_results(rmessage, guid)
result_messages.append(rmessage)
logger.info(f"{guid}: strider complete")
#Clean out the repeat node stuff
Expand Down
2 changes: 1 addition & 1 deletion tests/test_expand.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_expand_query():
}
}
m = expand_query(q,{},"abcd")
assert len(m) > 20 #This depends on how many rules we're allowing
assert len(m) > 15 #This depends on how many rules we're allowing

def test_expand_qualified_query():
q = {
Expand Down

0 comments on commit 378aeda

Please sign in to comment.