TranslatorSRI · gaurav · Dec 13, 2024 · Dec 13, 2024 · Dec 14, 2024 · Dec 14, 2024
diff --git a/src/datahandlers/efo.py b/src/datahandlers/efo.py
@@ -31,6 +31,7 @@ def __init__(self):
         end = dt.now()
         print('loading complete')
         print(f'took {end-start}')
+
     def pull_EFO_labels_and_synonyms(self,lname,sname):
         with open(lname, 'w') as labelfile, open(sname,'w') as synfile:
             #for labeltype in ['skos:prefLabel','skos:altLabel','rdfs:label']:
@@ -59,6 +60,7 @@ def pull_EFO_labels_and_synonyms(self,lname,sname):
                     synfile.write(f'{EFO}:{efo_id}\t{labeltype}\t{label}\n')
                     if not labeltype == 'skos:altLabel':
                         labelfile.write(f'{EFO}:{efo_id}\t{label}\n')
+
     def pull_EFO_ids(self,roots,idfname):
         with open(idfname, 'w') as idfile:
             for root,rtype in roots:
@@ -75,6 +77,7 @@ def pull_EFO_ids(self,roots,idfname):
                     if efoid.startswith("EFO_"):
                         efo_id = efoid.split("_")[-1]
                         idfile.write(f'{EFO}:{efo_id}\t{rtype}\n')
+
     def get_exacts(self, iri, outfile):
         query = f"""
          prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
@@ -98,7 +101,12 @@ def get_exacts(self, iri, outfile):
         qres = self.m.query(query)
         for row in list(qres):
             other = str(row["match"])
-            otherid = Text.opt_to_curie(other[1:-1])
+            try:
+                otherid = Text.opt_to_curie(other[1:-1])
+            except ValueError as verr:
+                print(f"Could not translate {other[1:-1]} into a CURIE, will be used as-is: {verr}")
+                otherid = other[1:-1]
+
             if otherid.startswith("ORPHANET"):
                 print(row["match"])
                 print(other)

diff --git a/src/node.py b/src/node.py
@@ -354,6 +354,12 @@ def clean_list(self,input_identifiers):
         return cleaned
 
     def load_extra_labels(self,prefix):
+        if self.label_dir is None:
+            print (f"WARNING: no label_dir specified in load_extra_labels({self}, {prefix}), can't load extra labels for {prefix}. Skipping.")
+            return
+        if prefix is None:
+            print (f"WARNING: no prefix specified in load_extra_labels({self}, {prefix}), can't load extra labels. Skipping.")
+            return
         labelfname = os.path.join(self.label_dir,prefix,'labels')
         lbs = {}
         if os.path.exists(labelfname):
@@ -375,7 +381,11 @@ def apply_labels(self, input_identifiers, labels):
             if iid in labels:
                 labeled_list.append( LabeledID(identifier=iid, label = labels[iid]))
             else:
-                prefix = Text.get_prefix(iid)
+                try:
+                    prefix = Text.get_prefix(iid)
+                except ValueError as e:
+                    print(f"ERROR: Unable to apply_labels({self}, {input_identifiers}, {labels}): could not obtain prefix for identifier {iid}")
+                    raise e
                 if prefix not in self.extra_labels:
                     self.load_extra_labels(prefix)
                 if iid in self.extra_labels[prefix]:

diff --git a/src/prefixes.py b/src/prefixes.py
@@ -73,6 +73,7 @@
 HGNCFAMILY='HGNC.FAMILY'
 PANTHERFAMILY='PANTHER.FAMILY'
 COMPLEXPORTAL='ComplexPortal'
+ICD11FOUNDATION='icd11.foundation'
 
 PMID = 'PMID'
 DOI = 'doi'

diff --git a/src/ubergraph.py b/src/ubergraph.py
@@ -61,7 +61,11 @@ def get_all_labels(self):
                 )
             for x in rr:
                 y = {}
-                y['iri'] = Text.opt_to_curie(x['thing'])
+                try:
+                    y['iri'] = Text.opt_to_curie(x['thing'])
+                except ValueError as verr:
+                    print(f"WARNING: Unable to translate {x['thing']} to a CURIE; it will be used as-is: {verr}")
+                    y['iri'] = x['thing']
                 y['label'] = x['label']
                 results.append(y)
 
@@ -113,7 +117,11 @@ def get_all_descriptions(self):
                 )
             for x in rr:
                 y = {}
-                y['iri'] = Text.opt_to_curie(x['thing'])
+                try:
+                    y['iri'] = Text.opt_to_curie(x['thing'])
+                except ValueError as verr:
+                    print(f"WARNING: Unable to translate {x['thing']} to a CURIE; it will be used as-is: {verr}")
+                    y['iri'] = x['thing']
                 y['description'] = x['description']
                 results.append(y)
 
@@ -185,7 +193,12 @@ def get_all_synonyms(self):
                 template_text=text \
                 )
             for x in rr:
-                y = ( Text.opt_to_curie(x['cls']), x['pred'], x['val'])
+                try:
+                    cls_curie = Text.opt_to_curie(x['cls'])
+                except ValueError as verr:
+                    print(f"Unable to convert {x['cls']} to a CURIE; it will be used as-is: {verr}")
+                    cls_curie = x['cls']
+                y = ( cls_curie, x['pred'], x['val'])
                 results.append(y)
 
         return results
@@ -221,7 +234,11 @@ def get_subclasses_of(self,iri):
         results = []
         for x in rr:
             y = {}
-            y['descendent'] = Text.opt_to_curie(x['descendent'])
+            try:
+                y['descendent'] = Text.opt_to_curie(x['descendent'])
+            except ValueError as verr:
+                print(f"Descendent {x['descendent']} could not be converted to a CURIE, will be used as-is: {verr}")
+                y['descendent'] = x['descendent']
             y['descendentLabel'] = x['descendentLabel']
             results.append(y)
         return results
@@ -258,7 +275,11 @@ def get_subclasses_and_smiles(self,iri):
         results = []
         for x in rr:
             y = {}
-            y['descendent'] = Text.opt_to_curie(x['descendent'])
+            try:
+                y['descendent'] = Text.opt_to_curie(x['descendent'])
+            except ValueError as verr:
+                print(f"Descendent {x['descendent']} could not be converted to a CURIE, will be used as-is: {verr}")
+                y['descendent'] = x['descendent']
             if x['descendentSmiles'] is not None:
                 y['SMILES'] = x['descendentSmiles']
             results.append(y)
@@ -295,12 +316,14 @@ def get_subclasses_and_xrefs(self,iri):
         )
         results = defaultdict(set)
         for row in resultmap:
-            dcurie = Text.opt_to_curie(row['descendent'])
-            #Sometimes we're getting back just strings that aren't curies, skip those (but complain)
-            if ':' not in row['xref']:
-                print(f'Bad XREF from {row["descendent"]} to {row["xref"]}')
+            # Sometimes we're getting back just strings that aren't curies, skip those (but complain)
+            try:
+                dcurie = Text.opt_to_curie(row['descendent'])
+                results[ dcurie ].add( (Text.opt_to_curie(row['xref']) ))
+            except ValueError as verr:
+                print(f'Bad XREF from {row["descendent"]} to {row["xref"]}: {verr}')
                 continue
-            results[ dcurie ].add( (Text.opt_to_curie(row['xref']) ))
+
         return results
 
     def get_subclasses_and_exacts(self,iri):
@@ -346,15 +369,23 @@ def get_subclasses_and_exacts(self,iri):
                 }, outputs=[ 'descendent', 'match'] )
         results = defaultdict(list)
         for row in resultmap:
-            desc=Text.opt_to_curie(row['descendent'])
+            try:
+                desc = Text.opt_to_curie(row['descendent'])
+            except ValueError as verr:
+                print(f"Descendant {row['descendent']} could not be converted to a CURIE, will be used as-is: {verr}")
+                desc = row['descendent']
+
             if row['match'] is None:
                 results[desc] += []
             else:
-                results[ desc ].append( (Text.opt_to_curie(row['match']) ))
-        #Sometimes, if there are no exact_matches, we'll get some kind of blank node id
-        # like 't19830198'. Want to filter those out.
-        for k,v in results.items():
-            results[k] = list(filter(lambda x: ':' in x, v))
+                # Sometimes, if there are no exact_matches, we'll get some kind of blank node id
+                # like 't19830198'. Want to filter those out.
+                try:
+                    results[ desc ].append(Text.opt_to_curie(row['match']))
+                except ValueError as verr:
+                    print(f'Row {row} could not be converted to a CURIE: {verr}')
+                    continue
+
         return results
 
     def get_subclasses_and_close(self,iri):
@@ -395,15 +426,23 @@ def get_subclasses_and_close(self,iri):
                }, outputs=[ 'descendent', 'match' ] )
         results = defaultdict(list)
         for row in resultmap:
-            desc = Text.opt_to_curie(row['descendent'])
+            try:
+                desc = Text.opt_to_curie(row['descendent'])
+            except ValueError as verr:
+                print(f"Descendant {row['descendent']} could not be converted to a CURIE, will be used as-is: {verr}")
+                desc = row['descendent']
+
             if row['match'] is None:
                 results[desc] += []
             else:
-                results[ desc].append( (Text.opt_to_curie(row['match']) ))
-        #Sometimes, if there are no exact_matches, we'll get some kind of blank node id
-        # like 't19830198'. Want to filter those out.
-        for k,v in results.items():
-            results[k] = list(filter(lambda x: ':' in x, v))
+                try:
+                    results[ desc].append( (Text.opt_to_curie(row['match']) ))
+                except ValueError as verr:
+                    # Sometimes, if there are no exact_matches, we'll get some kind of blank node id
+                    # like 't19830198'. Want to filter those out.
+                    print(f"Value {row['match']} in row {row} could not be converted to a CURIE: {verr}")
+                    continue
+
         return results
 
     def write_normalized_information_content(self, filename):

diff --git a/src/util.py b/src/util.py
@@ -6,7 +6,7 @@
 import copy
 from logging.handlers import RotatingFileHandler
 from src.LabeledID import LabeledID
-from src.prefixes import OMIM, OMIMPS, UMLS, SNOMEDCT, KEGGPATHWAY, KEGGREACTION, NCIT, ICD10, ICD10CM
+from src.prefixes import OMIM, OMIMPS, UMLS, SNOMEDCT, KEGGPATHWAY, KEGGREACTION, NCIT, ICD10, ICD10CM, ICD11FOUNDATION
 import src.prefixes as prefixes
 
 #loggers = {}
@@ -79,10 +79,15 @@ def get_curie (text):
         return text.upper().split(':', 1)[0] if ':' in text else None
 
     @staticmethod
-    def get_prefix (text):
-        if isinstance(text,LabeledID):
-            text = text.identifier
-        return text.split(':', 1)[0] if ':' in text else None
+    def get_prefix (id):
+        if isinstance(id,LabeledID):
+            text = id.identifier
+        else:
+            text = id
+        if ':' in text:
+            return text.split(':', 1)[0]
+        raise ValueError(f"Unable to get_prefix({id}) with text '{text}': no colons found in identifier.")
+
 
     @classmethod
     def recurie(cls,text,new_prefix=None):
@@ -124,7 +129,10 @@ def opt_to_curie (text):
         if text is None:
             return None
         #grumble, I should be better about handling prefixes
-        if text.startswith('http://purl.obolibrary.org') or text.startswith('http://www.orpha.net') or text.startswith('http://www.ebi.ac.uk/efo'):
+        if text.startswith('http://purl.obolibrary.org/obo/mondo/sources/icd11foundation/'):
+            # This has to go on top because it's a 'purl.obolibrary.org' which doesn't follow the same pattern as the others.
+            r = f'{ICD11FOUNDATION}:{text[61:]}'
+        elif text.startswith('http://purl.obolibrary.org') or text.startswith('http://www.orpha.net') or text.startswith('http://www.ebi.ac.uk/efo'):
             p = text.split('/')[-1].split('_')
             r = ':'.join( p )
         elif text.startswith('https://omim.org/'):
@@ -153,8 +161,12 @@ def opt_to_curie (text):
             r = Text.recurie(text,KEGGREACTION)
         else:
             r = text
+
         if ':' in r:
             return Text.recurie(r)
+        else:
+            raise ValueError(f"Unable to opt_to_curie({text}): output calculated as {r}, which has no colon.")
+
         return r
 
     @staticmethod