diff --git a/test/data/software/OpenAire-software-reference.xml b/test/data/software/OpenAire-software-reference.xml new file mode 100644 index 0000000..b69b841 --- /dev/null +++ b/test/data/software/OpenAire-software-reference.xml @@ -0,0 +1,65 @@ + + https://zbmath.org/software/2 + + + Li, X. S. + X. S. + Li + + + Demmel, J. W. + J. W. + Demmel + + + + SuperLU-DIST + + zbMATH Open Web Interface contents unavailable due to conflicting licenses. + + 35 + 65 + 68 + 74 + 76 + 05 + 15 + 60 + 78 + 80 + 81 + 82 + 85 + 86 + 90 + 92 + orms + + + http://crd-legacy.lbl.gov/~xiaoye/SuperLU/ + https://zbmath.org/2187846 + + + + metadata only access + + The zbMATH Open OAI-PMH API is subject to the Terms and Conditions for the zbMATH Open API Service of FIZ Karlsruhe – Leibniz-Institut für Informationsinfrastruktur GmbH. +Content generated by zbMATH Open, such as reviews, classifications, software, or author disambiguation data, are distributed under CC-BY-SA 4.0. This defines the license for the whole dataset, which also contains non-copyrighted bibliographic metadata and reference data derived from I4OC (CC0). Note that the API only provides a subset of the data in the zbMATH Open Web interface. In several cases, third-party information, such as abstracts, cannot be made available under a suitable license through the API. In those cases, we replaced the data with the string +"zbMATH Open Web Interface contents unavailable due to conflicting licenses." + + application/xml + eng + zbMATH Open Web Interface contents unavailable due to conflicting licenses. + zbMATH Open Web Interface contents unavailable due to conflicting licenses. + diff --git a/test/test_metadata-software_OpenAire.py b/test/test_metadata-software_OpenAire.py new file mode 100644 index 0000000..8a7f45d --- /dev/null +++ b/test/test_metadata-software_OpenAire.py @@ -0,0 +1,53 @@ +import os +import re +import unittest + +import lxml.etree as ET +from xmldiff import main +from xmldiff.actions import MoveNode + +if os.path.basename(os.getcwd()) == 'test': + os.chdir(os.path.dirname(os.getcwd())) + + +# Function to split DOI into parts +def split_doi(doi): + # Define regular expression patterns for various DOI formats + doi_patterns = [ + r"(10\.\d{4,}/\S+)", + r"(/?\S+)", # For DOIs without prefix + ] + + # Iterate through patterns to find a match + for pattern in doi_patterns: + match = re.match(pattern, doi) + if match: + return match.group(1), doi.replace(match.group(1), '', 1).lstrip('/') + return None, None + + +class PlainXmlTest(unittest.TestCase): + def test_similarity(self): + dom = ET.parse('test/data/software/plain.xml') + xslt = ET.parse('xslt/software/xslt-software-OpenAire.xslt') + + transform = ET.XSLT(xslt) # is it a reserved word + newdom = transform(dom) + + real_string = ET.tostring(newdom, pretty_print=True, encoding='utf8').decode() + # test if result is parsable + reference = ET.parse('test/data/software/OpenAire-software-reference.xml') + + expected_string = ET.tostring(reference, pretty_print=True, encoding='utf8').decode() + diff = main.diff_texts(expected_string, real_string, { + 'ratio_mode': 'fast', # is that for latency + 'F': 1, + }) + + essentials = list(filter(lambda e: not isinstance(e, MoveNode), diff)) + + self.assertLessEqual(len(essentials), 0, "Found differences between expected and transformed XML") + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file