Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for icd11foundation in EFO #380

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
10 changes: 9 additions & 1 deletion src/datahandlers/efo.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(self):
end = dt.now()
print('loading complete')
print(f'took {end-start}')

def pull_EFO_labels_and_synonyms(self,lname,sname):
with open(lname, 'w') as labelfile, open(sname,'w') as synfile:
#for labeltype in ['skos:prefLabel','skos:altLabel','rdfs:label']:
Expand Down Expand Up @@ -59,6 +60,7 @@ def pull_EFO_labels_and_synonyms(self,lname,sname):
synfile.write(f'{EFO}:{efo_id}\t{labeltype}\t{label}\n')
if not labeltype == 'skos:altLabel':
labelfile.write(f'{EFO}:{efo_id}\t{label}\n')

def pull_EFO_ids(self,roots,idfname):
with open(idfname, 'w') as idfile:
for root,rtype in roots:
Expand All @@ -75,6 +77,7 @@ def pull_EFO_ids(self,roots,idfname):
if efoid.startswith("EFO_"):
efo_id = efoid.split("_")[-1]
idfile.write(f'{EFO}:{efo_id}\t{rtype}\n')

def get_exacts(self, iri, outfile):
query = f"""
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
Expand All @@ -98,7 +101,12 @@ def get_exacts(self, iri, outfile):
qres = self.m.query(query)
for row in list(qres):
other = str(row["match"])
otherid = Text.opt_to_curie(other[1:-1])
try:
otherid = Text.opt_to_curie(other[1:-1])
except ValueError as verr:
print(f"Could not translate {other[1:-1]} into a CURIE, will be used as-is: {verr}")
otherid = other[1:-1]

if otherid.startswith("ORPHANET"):
print(row["match"])
print(other)
Expand Down
12 changes: 11 additions & 1 deletion src/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,12 @@ def clean_list(self,input_identifiers):
return cleaned

def load_extra_labels(self,prefix):
if self.label_dir is None:
print (f"WARNING: no label_dir specified in load_extra_labels({self}, {prefix}), can't load extra labels for {prefix}. Skipping.")
return
if prefix is None:
print (f"WARNING: no prefix specified in load_extra_labels({self}, {prefix}), can't load extra labels. Skipping.")
return
labelfname = os.path.join(self.label_dir,prefix,'labels')
lbs = {}
if os.path.exists(labelfname):
Expand All @@ -375,7 +381,11 @@ def apply_labels(self, input_identifiers, labels):
if iid in labels:
labeled_list.append( LabeledID(identifier=iid, label = labels[iid]))
else:
prefix = Text.get_prefix(iid)
try:
prefix = Text.get_prefix(iid)
except ValueError as e:
print(f"ERROR: Unable to apply_labels({self}, {input_identifiers}, {labels}): could not obtain prefix for identifier {iid}")
raise e
if prefix not in self.extra_labels:
self.load_extra_labels(prefix)
if iid in self.extra_labels[prefix]:
Expand Down
1 change: 1 addition & 0 deletions src/prefixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@
HGNCFAMILY='HGNC.FAMILY'
PANTHERFAMILY='PANTHER.FAMILY'
COMPLEXPORTAL='ComplexPortal'
ICD11FOUNDATION='icd11.foundation'

PMID = 'PMID'
DOI = 'doi'
Expand Down
83 changes: 61 additions & 22 deletions src/ubergraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ def get_all_labels(self):
)
for x in rr:
y = {}
y['iri'] = Text.opt_to_curie(x['thing'])
try:
y['iri'] = Text.opt_to_curie(x['thing'])
except ValueError as verr:
print(f"WARNING: Unable to translate {x['thing']} to a CURIE; it will be used as-is: {verr}")
y['iri'] = x['thing']
y['label'] = x['label']
results.append(y)

Expand Down Expand Up @@ -113,7 +117,11 @@ def get_all_descriptions(self):
)
for x in rr:
y = {}
y['iri'] = Text.opt_to_curie(x['thing'])
try:
y['iri'] = Text.opt_to_curie(x['thing'])
except ValueError as verr:
print(f"WARNING: Unable to translate {x['thing']} to a CURIE; it will be used as-is: {verr}")
y['iri'] = x['thing']
y['description'] = x['description']
results.append(y)

Expand Down Expand Up @@ -185,7 +193,12 @@ def get_all_synonyms(self):
template_text=text \
)
for x in rr:
y = ( Text.opt_to_curie(x['cls']), x['pred'], x['val'])
try:
cls_curie = Text.opt_to_curie(x['cls'])
except ValueError as verr:
print(f"Unable to convert {x['cls']} to a CURIE; it will be used as-is: {verr}")
cls_curie = x['cls']
y = ( cls_curie, x['pred'], x['val'])
results.append(y)

return results
Expand Down Expand Up @@ -221,7 +234,11 @@ def get_subclasses_of(self,iri):
results = []
for x in rr:
y = {}
y['descendent'] = Text.opt_to_curie(x['descendent'])
try:
y['descendent'] = Text.opt_to_curie(x['descendent'])
except ValueError as verr:
print(f"Descendent {x['descendent']} could not be converted to a CURIE, will be used as-is: {verr}")
y['descendent'] = x['descendent']
y['descendentLabel'] = x['descendentLabel']
results.append(y)
return results
Expand Down Expand Up @@ -258,7 +275,11 @@ def get_subclasses_and_smiles(self,iri):
results = []
for x in rr:
y = {}
y['descendent'] = Text.opt_to_curie(x['descendent'])
try:
y['descendent'] = Text.opt_to_curie(x['descendent'])
except ValueError as verr:
print(f"Descendent {x['descendent']} could not be converted to a CURIE, will be used as-is: {verr}")
y['descendent'] = x['descendent']
if x['descendentSmiles'] is not None:
y['SMILES'] = x['descendentSmiles']
results.append(y)
Expand Down Expand Up @@ -295,12 +316,14 @@ def get_subclasses_and_xrefs(self,iri):
)
results = defaultdict(set)
for row in resultmap:
dcurie = Text.opt_to_curie(row['descendent'])
#Sometimes we're getting back just strings that aren't curies, skip those (but complain)
if ':' not in row['xref']:
print(f'Bad XREF from {row["descendent"]} to {row["xref"]}')
# Sometimes we're getting back just strings that aren't curies, skip those (but complain)
try:
dcurie = Text.opt_to_curie(row['descendent'])
results[ dcurie ].add( (Text.opt_to_curie(row['xref']) ))
except ValueError as verr:
print(f'Bad XREF from {row["descendent"]} to {row["xref"]}: {verr}')
continue
results[ dcurie ].add( (Text.opt_to_curie(row['xref']) ))

return results

def get_subclasses_and_exacts(self,iri):
Expand Down Expand Up @@ -346,15 +369,23 @@ def get_subclasses_and_exacts(self,iri):
}, outputs=[ 'descendent', 'match'] )
results = defaultdict(list)
for row in resultmap:
desc=Text.opt_to_curie(row['descendent'])
try:
desc = Text.opt_to_curie(row['descendent'])
except ValueError as verr:
print(f"Descendant {row['descendent']} could not be converted to a CURIE, will be used as-is: {verr}")
desc = row['descendent']

if row['match'] is None:
results[desc] += []
else:
results[ desc ].append( (Text.opt_to_curie(row['match']) ))
#Sometimes, if there are no exact_matches, we'll get some kind of blank node id
# like 't19830198'. Want to filter those out.
for k,v in results.items():
results[k] = list(filter(lambda x: ':' in x, v))
# Sometimes, if there are no exact_matches, we'll get some kind of blank node id
# like 't19830198'. Want to filter those out.
try:
results[ desc ].append(Text.opt_to_curie(row['match']))
except ValueError as verr:
print(f'Row {row} could not be converted to a CURIE: {verr}')
continue

return results

def get_subclasses_and_close(self,iri):
Expand Down Expand Up @@ -395,15 +426,23 @@ def get_subclasses_and_close(self,iri):
}, outputs=[ 'descendent', 'match' ] )
results = defaultdict(list)
for row in resultmap:
desc = Text.opt_to_curie(row['descendent'])
try:
desc = Text.opt_to_curie(row['descendent'])
except ValueError as verr:
print(f"Descendant {row['descendent']} could not be converted to a CURIE, will be used as-is: {verr}")
desc = row['descendent']

if row['match'] is None:
results[desc] += []
else:
results[ desc].append( (Text.opt_to_curie(row['match']) ))
#Sometimes, if there are no exact_matches, we'll get some kind of blank node id
# like 't19830198'. Want to filter those out.
for k,v in results.items():
results[k] = list(filter(lambda x: ':' in x, v))
try:
results[ desc].append( (Text.opt_to_curie(row['match']) ))
except ValueError as verr:
# Sometimes, if there are no exact_matches, we'll get some kind of blank node id
# like 't19830198'. Want to filter those out.
print(f"Value {row['match']} in row {row} could not be converted to a CURIE: {verr}")
continue

return results

def write_normalized_information_content(self, filename):
Expand Down
24 changes: 18 additions & 6 deletions src/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import copy
from logging.handlers import RotatingFileHandler
from src.LabeledID import LabeledID
from src.prefixes import OMIM, OMIMPS, UMLS, SNOMEDCT, KEGGPATHWAY, KEGGREACTION, NCIT, ICD10, ICD10CM
from src.prefixes import OMIM, OMIMPS, UMLS, SNOMEDCT, KEGGPATHWAY, KEGGREACTION, NCIT, ICD10, ICD10CM, ICD11FOUNDATION
import src.prefixes as prefixes

#loggers = {}
Expand Down Expand Up @@ -79,10 +79,15 @@ def get_curie (text):
return text.upper().split(':', 1)[0] if ':' in text else None

@staticmethod
def get_prefix (text):
if isinstance(text,LabeledID):
text = text.identifier
return text.split(':', 1)[0] if ':' in text else None
def get_prefix (id):
if isinstance(id,LabeledID):
text = id.identifier
else:
text = id
if ':' in text:
return text.split(':', 1)[0]
raise ValueError(f"Unable to get_prefix({id}) with text '{text}': no colons found in identifier.")


@classmethod
def recurie(cls,text,new_prefix=None):
Expand Down Expand Up @@ -124,7 +129,10 @@ def opt_to_curie (text):
if text is None:
return None
#grumble, I should be better about handling prefixes
if text.startswith('http://purl.obolibrary.org') or text.startswith('http://www.orpha.net') or text.startswith('http://www.ebi.ac.uk/efo'):
if text.startswith('http://purl.obolibrary.org/obo/mondo/sources/icd11foundation/'):
# This has to go on top because it's a 'purl.obolibrary.org' which doesn't follow the same pattern as the others.
r = f'{ICD11FOUNDATION}:{text[61:]}'
elif text.startswith('http://purl.obolibrary.org') or text.startswith('http://www.orpha.net') or text.startswith('http://www.ebi.ac.uk/efo'):
p = text.split('/')[-1].split('_')
r = ':'.join( p )
elif text.startswith('https://omim.org/'):
Expand Down Expand Up @@ -153,8 +161,12 @@ def opt_to_curie (text):
r = Text.recurie(text,KEGGREACTION)
else:
r = text

if ':' in r:
return Text.recurie(r)
else:
raise ValueError(f"Unable to opt_to_curie({text}): output calculated as {r}, which has no colon.")

return r

@staticmethod
Expand Down