From 1e7fe23bac4c257c5465524ec9a74d32e54465ae Mon Sep 17 00:00:00 2001
From: Paul Frederiks
Date: Fri, 29 Sep 2017 14:05:18 +0200
Subject: [PATCH] Adapt to new yoda-metadata schema
---
moai/yoda.py | 129 +++++++++++++++++++++++++++++++++++----------------
1 file changed, 88 insertions(+), 41 deletions(-)
diff --git a/moai/yoda.py b/moai/yoda.py
index f538cca..ab70d0c 100644
--- a/moai/yoda.py
+++ b/moai/yoda.py
@@ -1,7 +1,7 @@
from lxml import etree
from datetime import datetime, timedelta
-from moai.utils import XPath
+from moai.utils import XPath, get_moai_log
class YodaContent(object):
@@ -9,52 +9,99 @@ def __init__(self, provider):
self.provider = provider
self.id = None
self.modified = None
- self.deleted = None
- self.sets = None
- self.metadata = None
+ self.deleted = False
+ self.sets = dict()
+ self.metadata = dict()
def update(self, path):
- doc = etree.parse(path)
+ log = get_moai_log()
+ try:
+ doc = etree.parse(path)
+ except etree.ParseError:
+ log.warning("Failed to parse %s".format(path))
+ return
+
xpath = XPath(doc, nsmap={})
self.root = doc.getroot()
-
- id = xpath.string('//Project_ID')
+
+ id = xpath.string('//Persistent_Identifier_Datapackage')
+ if not id:
+ log.warning("Missing Persistent Identifier of Datapackage in %s".format(path))
+ return
+
self.id = 'oai:%s' % id
- self.modified = datetime.now() - timedelta(days=1)
- self.deleted = True
+
+ self.metadata['identifier'] = [id]
+
+ last_modified = xpath.string('//Last_Modified_Date')
+ if not last_modified:
+ log.warning("Missing Last Modified Time in %s".format(path))
+ self.modified = datetime.now() - timedelta(days=1)
+ else:
+ self.modified = datetime.strptime(last_modified, "%Y-%M-%d")
author_data = []
+ creators = xpath.strings('//Creator/Name')
+ if creators:
+ self.metadata['creator'] = creators
+ for creator in creators:
+ author_data.append({u"name": creator, u"role": [u"auth"]})
+
+ contributors = xpath.strings('//Contributor/Name')
+ if contributors:
+ self.metadata['contributor'] = contributors
+ for contributor in contributors:
+ author_data.append({u"name": contributor, u"role": [u"cont"]})
+
+ self.metadata["author_data"]= author_data
+
+ title = xpath.string('//Title')
+ if title:
+ self.metadata['title'] = [title]
+
+ description = xpath.string('//Description')
+ if description:
+ self.metadata['description'] = [description]
+
+ language = xpath.string('//Language')
+ if language:
+ self.metadata['language'] = [language]
+
+ datesinxml = [xpath.string('//Publication_Date'),
+ xpath.string('//Embargo_End_Date')]
+
+ dates = [d for d in datesinxml if d]
+ if dates:
+ self.metadata['date'] = dates
+
+ rightsinxml = [xpath.string('//License'),
+ xpath.string('//License/Properties/URL')]
+
+ rights = [r for r in rightsinxml if r]
+ if rights:
+ self.metadata['rights'] = rights
+
+ subjectinxml = xpath.strings('//Discipline') + xpath.strings('//Tag')
+ subject = [s for s in subjectinxml if s]
+ if subject:
+ self.metadata['subject'] = subject
+
+ locations = xpath.strings('//Location_Covered')
+ perioddates = [xpath.string('//Start_Period'), xpath.string('//End_Period')]
+ period = "/".join([d for d in perioddates if d])
+ if period:
+ coverage = locations + [period]
+ else:
+ coverage = locations
+ if coverage:
+ self.metadata['coverage'] = coverage
+
+ relations = xpath.strings('//Persistent_Identifier')
+ if relations:
+ self.metadata['relation'] = relations
- # Add creator of dataset.
- author_data.append({'name': [xpath.string('//Creator')],
- 'role': [u'aut']})
-
- # Add all contributors to dataset.
- for num, el in enumerate(xpath('//Contributor'), 1):
- contributor = [xpath.string('//Contributor[%d]' % num)]
- author_data.append({'name': contributor,
- 'role': [u'aut']})
-
- # Add metadata of dataset.
- self.metadata = {'identifier': [id],
- 'title': [xpath.string('//Project_Title')],
- 'subject': [xpath.string('//Project_Description')],
- 'description': [xpath.string('//Project_Description')],
- 'creator': [d['name'][0] for d in author_data],
- 'author_data': author_data,
- 'language': [xpath.string('//Language_dataset')],
- 'date': [xpath.string('//Embargo')]}
-
- # Clean dataset type.
- type = xpath.string('//Dataset_Type')
- type = type.replace(" ", "_")
-
- # Specify dataset.
- self.sets = {type:
- {u'name':xpath.string('//Dataset_Title'),
- u'description':xpath.string('//Dataset_Description')}}
-
- published = xpath.string('//Publish_Metadata')
- if published == 'Yes':
- self.deleted = False
+ self.sets[u'yoda'] = {
+ u'name': u'YoDa',
+ u'description': u'share-collaborate environment for research data'
+ }