diff --git a/Utils/citation.py b/Utils/citation.py index d6b7693..1edf11e 100644 --- a/Utils/citation.py +++ b/Utils/citation.py @@ -15,17 +15,22 @@ def __init__(self, bibcit_tag): self.label = bibcit_tag.get("PLACEHOLDER") self.citing_entity = bibcit_tag.get("DBREF") self.uri = bibcit_tag.get("REF") - + self.entry_id = utilities.get_entry_id(self.tag) + + if self.citing_entity: + if " " in self.citing_entity: + logger.error(F"In entry: {self.entry_id} - BIBCIT: Space encountered in DBREF attribute: {bibcit_tag}") + self.citing_entity = self.citing_entity.replace(" ","") def to_triple(self, target_uri, source_url=None): g = utilities.create_graph() if not self.citing_entity: - logger.warning(F"Missing DBREF attribute: {self.tag}") + logger.warning(F"In entry: {self.entry_id} - BIBCIT: Missing DBREF attribute: {self.tag}") return g if not self.label: - logger.warning(F"Missing PLACEHOLDER attribute: {self.tag}") + logger.warning(F"In entry: {self.entry_id} - BIBCIT: Missing PLACEHOLDER attribute: {self.tag}") return g uri = None @@ -34,6 +39,8 @@ def to_triple(self, target_uri, source_url=None): uri = rdflib.URIRef(self.uri+"_dbref") citing_uri = rdflib.URIRef(self.uri) else: + logger.error(F"In entry: {self.entry_id} - BIBCIT: tag missing REF attribute: {self.tag}") + uri = utilities.create_uri("data", "dbref_"+self.citing_entity) citing_uri = utilities.create_uri("data", self.citing_entity) diff --git a/Utils/utilities.py b/Utils/utilities.py index 961d69e..06a27b7 100644 --- a/Utils/utilities.py +++ b/Utils/utilities.py @@ -124,6 +124,8 @@ def to_triple(self, context): g.add((context.uri, self.predicate, self.object)) return g +def get_entry_id(tag): + return tag.find_parent("ENTRY").get("ID") def remove_unwanted_tags(tag): unwanted_tag_names = ["BIBCITS", "RESPONSIBILITIES", "KEYWORDCLASSES","RESEARCHNOTE"]