Skip to content

Commit

Permalink
Merge pull request #110 from MaRDI4NFDI/OpenAire_for_Software
Browse files Browse the repository at this point in the history
Open aire for software metadata (unittest - reference)
  • Loading branch information
Mazztok45 authored Nov 8, 2024
2 parents 84063a5 + 909ebb3 commit c33b860
Show file tree
Hide file tree
Showing 2 changed files with 118 additions and 0 deletions.
65 changes: 65 additions & 0 deletions test/data/software/OpenAire-software-reference.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<resource xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:datacite="http://datacite.org/schema/kernel-4"
xmlns:oaire="http://www.openarchives.org/OAI/2.0/oai_dc/"
xmlns="http://namespace.openaire.eu/schema/oaire/"
xsi:schemaLocation="http://namespace.openaire.eu/schema/oaire/ https://www.openaire.eu/schema/repo-lit/4.0/openaire.xsd">
<datacite:identifier IdentifierType="URL">https://zbmath.org/software/2</datacite:identifier>
<datacite:creators>
<datacite:creator>
<datacite:creatorName nameType="Personal">Li, X. S.</datacite:creatorName>
<datacite:givenName>X. S.</datacite:givenName>
<datacite:familyName>Li</datacite:familyName>
</datacite:creator>
<datacite:creator>
<datacite:creatorName nameType="Personal">Demmel, J. W.</datacite:creatorName>
<datacite:givenName>J. W.</datacite:givenName>
<datacite:familyName>Demmel</datacite:familyName>
</datacite:creator>
</datacite:creators>
<datacite:titles>
<datacite:title>SuperLU-DIST</datacite:title>
</datacite:titles>
<dc:description xml:lang="en" descriptionType="Abstract">zbMATH Open Web Interface contents unavailable due to conflicting licenses.</dc:description>
<datacite:subjects>
<datacite:subject subjectScheme="msc2020">35</datacite:subject>
<datacite:subject subjectScheme="msc2020">65</datacite:subject>
<datacite:subject subjectScheme="msc2020">68</datacite:subject>
<datacite:subject subjectScheme="msc2020">74</datacite:subject>
<datacite:subject subjectScheme="msc2020">76</datacite:subject>
<datacite:subject subjectScheme="msc2020">05</datacite:subject>
<datacite:subject subjectScheme="msc2020">15</datacite:subject>
<datacite:subject subjectScheme="msc2020">60</datacite:subject>
<datacite:subject subjectScheme="msc2020">78</datacite:subject>
<datacite:subject subjectScheme="msc2020">80</datacite:subject>
<datacite:subject subjectScheme="msc2020">81</datacite:subject>
<datacite:subject subjectScheme="msc2020">82</datacite:subject>
<datacite:subject subjectScheme="msc2020">85</datacite:subject>
<datacite:subject subjectScheme="msc2020">86</datacite:subject>
<datacite:subject subjectScheme="msc2020">90</datacite:subject>
<datacite:subject subjectScheme="msc2020">92</datacite:subject>
<datacite:subject subjectScheme="keyword">orms</datacite:subject>
</datacite:subjects>
<datacite:relatedIdentifiers>
<datacite:relatedIdentifier relatedIdentifierType="URL" relationType="IsSourceOf">http://crd-legacy.lbl.gov/~xiaoye/SuperLU/</datacite:relatedIdentifier>
<datacite:relatedIdentifier relatedIdentifierType="URL" relationType="IsCitedBy">https://zbmath.org/2187846</datacite:relatedIdentifier>
</datacite:relatedIdentifiers>
<resourceType resourceTypeGeneral="Software"/>
<datacite:rightsList>
<datacite:rights rightsURI="http://purl.org/coar/access_right/c_14cb">metadata only access</datacite:rights>
<datacite:rights xml:lang="en"
schemeURI="https://api.zbmath.org/v1/"
rightsIdentifierScheme="zbMATH"
rightsIdentifier="CC-BY-SA 4.0"
rightsURI="https://creativecommons.org/licenses/by-sa/4.0/">
The zbMATH Open OAI-PMH API is subject to the Terms and Conditions for the zbMATH Open API Service of FIZ Karlsruhe – Leibniz-Institut für Informationsinfrastruktur GmbH.
Content generated by zbMATH Open, such as reviews, classifications, software, or author disambiguation data, are distributed under CC-BY-SA 4.0. This defines the license for the whole dataset, which also contains non-copyrighted bibliographic metadata and reference data derived from I4OC (CC0). Note that the API only provides a subset of the data in the zbMATH Open Web interface. In several cases, third-party information, such as abstracts, cannot be made available under a suitable license through the API. In those cases, we replaced the data with the string
"zbMATH Open Web Interface contents unavailable due to conflicting licenses."</datacite:rights>
</datacite:rightsList>
<dc:format>application/xml</dc:format>
<dc:language>eng</dc:language>
<dc:source>zbMATH Open Web Interface contents unavailable due to conflicting licenses.</dc:source>
<oaire:citationTitle>zbMATH Open Web Interface contents unavailable due to conflicting licenses.</oaire:citationTitle>
</resource>
53 changes: 53 additions & 0 deletions test/test_metadata-software_OpenAire.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import os
import re
import unittest

import lxml.etree as ET
from xmldiff import main
from xmldiff.actions import MoveNode

if os.path.basename(os.getcwd()) == 'test':
os.chdir(os.path.dirname(os.getcwd()))


# Function to split DOI into parts
def split_doi(doi):
# Define regular expression patterns for various DOI formats
doi_patterns = [
r"(10\.\d{4,}/\S+)",
r"(/?\S+)", # For DOIs without prefix
]

# Iterate through patterns to find a match
for pattern in doi_patterns:
match = re.match(pattern, doi)
if match:
return match.group(1), doi.replace(match.group(1), '', 1).lstrip('/')
return None, None


class PlainXmlTest(unittest.TestCase):
def test_similarity(self):
dom = ET.parse('test/data/software/plain.xml')
xslt = ET.parse('xslt/software/xslt-software-OpenAire.xslt')

transform = ET.XSLT(xslt) # is it a reserved word
newdom = transform(dom)

real_string = ET.tostring(newdom, pretty_print=True, encoding='utf8').decode()
# test if result is parsable
reference = ET.parse('test/data/software/OpenAire-software-reference.xml')

expected_string = ET.tostring(reference, pretty_print=True, encoding='utf8').decode()
diff = main.diff_texts(expected_string, real_string, {
'ratio_mode': 'fast', # is that for latency
'F': 1,
})

essentials = list(filter(lambda e: not isinstance(e, MoveNode), diff))

self.assertLessEqual(len(essentials), 0, "Found differences between expected and transformed XML")


if __name__ == '__main__':
unittest.main()

0 comments on commit c33b860

Please sign in to comment.