From 6c4c32f75d6ffff666b046392fab748b565eb816 Mon Sep 17 00:00:00 2001
From: Alliyya Mo <alliyyamo.dev@gmail.com>
Date: Tue, 17 May 2022 12:51:22 -0400
Subject: [PATCH] Updating genre mapping #36

---
 Mods/modsBib.py | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/Mods/modsBib.py b/Mods/modsBib.py
index c5dd149..4711bcd 100644
--- a/Mods/modsBib.py
+++ b/Mods/modsBib.py
@@ -286,17 +286,23 @@ def __init__(self, filename, matched_documents):
     def parse_db_refs(self):
         """
         Maps all genres within a textscope to the given DBREF
-        Used to map to blibiography
+        Used to map to bibliography
         :return: None
         """
         textscopes = self.soup.find_all('TEXTSCOPE')
 
         for ts in textscopes:
             ts_parent = ts.parent
-
-            if 'DBREF' in ts.attrs:
-                db_ref = ts.attrs['DBREF']
-
+            rec_id = None
+            
+            # Using REF attribute over DBREF
+            if 'REF' in ts.attrs:
+                rec_id = ts.attrs['REF'].split(":")[2]                
+            elif 'DBREF' in ts.attrs:
+                rec_id = ts.attrs['DBREF']
+
+            # Extracting Genres
+            if rec_id:
                 tgenres = ts_parent.find_all('TGENRE')
                 genres = []
 
@@ -305,7 +311,18 @@ def parse_db_refs(self):
                         name = genre.attrs['GENRENAME']
                         genres.append(name)
 
-                self.matched_documents[db_ref] = genres
+                
+                if rec_id in self.matched_documents:
+                    for x in genres:
+                        if x not in self.matched_documents[rec_id]:
+                            self.matched_documents[rec_id].append(x)
+                else:
+                        self.matched_documents[rec_id] = list(set(genres))
+            
+            
+            else:
+                logger.error("TEXTSCOPE missing REF & DBREF attribute")
+                
 
 
 class BibliographyParse:
@@ -1185,6 +1202,7 @@ def add_types_to_graph(graph,uri,label):
     test_filenames = ["d75215cb-d102-4256-9538-c44bfbf490d9.xml","2e3e602e-b82c-441d-81bc-883f834b20c1.xml","13f8e71a-def5-41e4-90a0-6ae1092ae446.xml","16d427db-a8a2-4f33-ac53-9f811672584b.xml","4109f3c5-0508-447b-9f86-ea8052ff3981.xml",
                       "e1b2f98f-1001-4787-a711-464f1527e5a7.xml", "15655c66-8c0b-4493-8f68-8d6cf4998303.xml","0d0e00bf-3224-4286-8ec4-f389ec6cc7bb.xml"] # VW, the wave
     # test_filenames = ["e57c7868-a3b7-460e-9f20-399fab7f894c.xml"] 
+    test_filenames = ["e35f16d8-d8f6-414d-b465-2a8a916ba53a.xml"] 
     # test_filenames = ["64d3c008-8a9d-415b-b52b-91d232c00952.xml",
     # test_filenames = ["55aff3fb-8ea9-4e95-9e04-0f3e630896e3.xml", "0c133817-f55e-4a8f-a9b4-474566418d9b.xml"]