Skip to content

Commit

Permalink
Adapt to new yoda-metadata schema
Browse files Browse the repository at this point in the history
  • Loading branch information
Paul Frederiks committed Sep 29, 2017
1 parent c7e0bb7 commit 1e7fe23
Showing 1 changed file with 88 additions and 41 deletions.
129 changes: 88 additions & 41 deletions moai/yoda.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,107 @@
from lxml import etree
from datetime import datetime, timedelta

from moai.utils import XPath
from moai.utils import XPath, get_moai_log


class YodaContent(object):
def __init__(self, provider):
self.provider = provider
self.id = None
self.modified = None
self.deleted = None
self.sets = None
self.metadata = None
self.deleted = False
self.sets = dict()
self.metadata = dict()

def update(self, path):
doc = etree.parse(path)
log = get_moai_log()
try:
doc = etree.parse(path)
except etree.ParseError:
log.warning("Failed to parse %s".format(path))
return

xpath = XPath(doc, nsmap={})

self.root = doc.getroot()

id = xpath.string('//Project_ID')

id = xpath.string('//Persistent_Identifier_Datapackage')
if not id:
log.warning("Missing Persistent Identifier of Datapackage in %s".format(path))
return

self.id = 'oai:%s' % id
self.modified = datetime.now() - timedelta(days=1)
self.deleted = True

self.metadata['identifier'] = [id]

last_modified = xpath.string('//Last_Modified_Date')
if not last_modified:
log.warning("Missing Last Modified Time in %s".format(path))
self.modified = datetime.now() - timedelta(days=1)
else:
self.modified = datetime.strptime(last_modified, "%Y-%M-%d")

author_data = []
creators = xpath.strings('//Creator/Name')
if creators:
self.metadata['creator'] = creators
for creator in creators:
author_data.append({u"name": creator, u"role": [u"auth"]})

contributors = xpath.strings('//Contributor/Name')
if contributors:
self.metadata['contributor'] = contributors
for contributor in contributors:
author_data.append({u"name": contributor, u"role": [u"cont"]})

self.metadata["author_data"]= author_data

title = xpath.string('//Title')
if title:
self.metadata['title'] = [title]

description = xpath.string('//Description')
if description:
self.metadata['description'] = [description]

language = xpath.string('//Language')
if language:
self.metadata['language'] = [language]

datesinxml = [xpath.string('//Publication_Date'),
xpath.string('//Embargo_End_Date')]

dates = [d for d in datesinxml if d]
if dates:
self.metadata['date'] = dates

rightsinxml = [xpath.string('//License'),
xpath.string('//License/Properties/URL')]

rights = [r for r in rightsinxml if r]
if rights:
self.metadata['rights'] = rights

subjectinxml = xpath.strings('//Discipline') + xpath.strings('//Tag')
subject = [s for s in subjectinxml if s]
if subject:
self.metadata['subject'] = subject

locations = xpath.strings('//Location_Covered')
perioddates = [xpath.string('//Start_Period'), xpath.string('//End_Period')]
period = "/".join([d for d in perioddates if d])
if period:
coverage = locations + [period]
else:
coverage = locations
if coverage:
self.metadata['coverage'] = coverage

relations = xpath.strings('//Persistent_Identifier')
if relations:
self.metadata['relation'] = relations

# Add creator of dataset.
author_data.append({'name': [xpath.string('//Creator')],
'role': [u'aut']})

# Add all contributors to dataset.
for num, el in enumerate(xpath('//Contributor'), 1):
contributor = [xpath.string('//Contributor[%d]' % num)]
author_data.append({'name': contributor,
'role': [u'aut']})

# Add metadata of dataset.
self.metadata = {'identifier': [id],
'title': [xpath.string('//Project_Title')],
'subject': [xpath.string('//Project_Description')],
'description': [xpath.string('//Project_Description')],
'creator': [d['name'][0] for d in author_data],
'author_data': author_data,
'language': [xpath.string('//Language_dataset')],
'date': [xpath.string('//Embargo')]}

# Clean dataset type.
type = xpath.string('//Dataset_Type')
type = type.replace(" ", "_")

# Specify dataset.
self.sets = {type:
{u'name':xpath.string('//Dataset_Title'),
u'description':xpath.string('//Dataset_Description')}}

published = xpath.string('//Publish_Metadata')
if published == 'Yes':
self.deleted = False
self.sets[u'yoda'] = {
u'name': u'YoDa',
u'description': u'share-collaborate environment for research data'
}

0 comments on commit 1e7fe23

Please sign in to comment.