Adapt to new yoda-metadata schema

UtrechtUniversity · Sep 29, 2017 · 1e7fe23 · 1e7fe23
1 parent c7e0bb7
commit 1e7fe23
Showing 1 changed file with 88 additions and 41 deletions.
diff --git a/moai/yoda.py b/moai/yoda.py
@@ -1,60 +1,107 @@
 from lxml import etree
 from datetime import datetime, timedelta
 
-from moai.utils import XPath
+from moai.utils import XPath, get_moai_log
 
 
 class YodaContent(object):
     def __init__(self, provider):
         self.provider = provider
         self.id = None
         self.modified = None
-        self.deleted = None
-        self.sets = None
-        self.metadata = None
+        self.deleted = False
+        self.sets = dict()
+        self.metadata = dict()
 
     def update(self, path):
-        doc = etree.parse(path)
+        log = get_moai_log()
+        try:
+            doc = etree.parse(path)
+	except etree.ParseError:
+            log.warning("Failed to parse %s".format(path))
+            return
+
         xpath = XPath(doc, nsmap={})
 
         self.root = doc.getroot()
-
-        id = xpath.string('//Project_ID')
+
+        id = xpath.string('//Persistent_Identifier_Datapackage')
+	if not id:
+            log.warning("Missing Persistent Identifier of Datapackage in %s".format(path))
+            return
+
         self.id = 'oai:%s' % id
-        self.modified = datetime.now() - timedelta(days=1)
-        self.deleted = True
+
+        self.metadata['identifier'] = [id]
+
+	last_modified = xpath.string('//Last_Modified_Date')
+        if not last_modified:
+            log.warning("Missing Last Modified Time in %s".format(path))
+            self.modified = datetime.now() - timedelta(days=1)
+        else:
+            self.modified = datetime.strptime(last_modified, "%Y-%M-%d")
 
         author_data = []
+        creators = xpath.strings('//Creator/Name')
+        if creators:
+            self.metadata['creator'] = creators
+            for creator in creators:
+                author_data.append({u"name": creator, u"role": [u"auth"]})
+
+	contributors = xpath.strings('//Contributor/Name')
+        if contributors:
+            self.metadata['contributor'] = contributors
+            for contributor in contributors:
+                author_data.append({u"name": contributor, u"role": [u"cont"]})
+
+	self.metadata["author_data"]= author_data
+
+        title = xpath.string('//Title')
+        if title:
+            self.metadata['title'] = [title]
+
+        description = xpath.string('//Description')
+        if description:
+            self.metadata['description'] = [description]
+
+        language = xpath.string('//Language')
+        if language:
+            self.metadata['language'] = [language]
+
+	datesinxml = [xpath.string('//Publication_Date'),
+                      xpath.string('//Embargo_End_Date')]
+
+        dates = [d for d in datesinxml if d]
+        if dates:
+            self.metadata['date'] = dates
+
+        rightsinxml = [xpath.string('//License'),
+                       xpath.string('//License/Properties/URL')]
+
+        rights = [r for r in rightsinxml if r]
+        if rights:
+            self.metadata['rights'] = rights
+
+        subjectinxml = xpath.strings('//Discipline') + xpath.strings('//Tag')
+        subject = [s for s in subjectinxml if s]
+        if subject:
+           self.metadata['subject'] = subject
+
+        locations = xpath.strings('//Location_Covered')
+        perioddates = [xpath.string('//Start_Period'), xpath.string('//End_Period')]
+        period = "/".join([d for d in perioddates if d])
+        if period:
+            coverage = locations + [period]
+        else:
+            coverage = locations
+        if coverage:
+            self.metadata['coverage'] = coverage
+
+	relations = xpath.strings('//Persistent_Identifier')
+        if relations:
+            self.metadata['relation'] = relations
 
-        # Add creator of dataset.
-        author_data.append({'name': [xpath.string('//Creator')],
-                            'role': [u'aut']})
-
-        # Add all contributors to dataset.
-        for num, el in enumerate(xpath('//Contributor'), 1):
-            contributor = [xpath.string('//Contributor[%d]' % num)]
-            author_data.append({'name': contributor,
-                                 'role': [u'aut']})
-
-        # Add metadata of dataset.
-        self.metadata = {'identifier': [id],
-                         'title': [xpath.string('//Project_Title')],
-                         'subject': [xpath.string('//Project_Description')],
-                         'description': [xpath.string('//Project_Description')],
-                         'creator': [d['name'][0] for d in author_data],
-                         'author_data': author_data,
-                         'language': [xpath.string('//Language_dataset')],
-                         'date': [xpath.string('//Embargo')]}
-
-       	# Clean dataset type.
-        type = xpath.string('//Dataset_Type')
-        type = type.replace(" ", "_")
-
-        # Specify dataset.
-        self.sets = {type:
-                     {u'name':xpath.string('//Dataset_Title'),
-                      u'description':xpath.string('//Dataset_Description')}}
-
-        published = xpath.string('//Publish_Metadata')
-        if published == 'Yes':
-            self.deleted = False      
+	self.sets[u'yoda'] = {
+            u'name': u'YoDa',
+            u'description': u'share-collaborate environment for research data'
+        }