forked from infrae/moai
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Paul Frederiks
committed
Sep 29, 2017
1 parent
c7e0bb7
commit 1e7fe23
Showing
1 changed file
with
88 additions
and
41 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,60 +1,107 @@ | ||
from lxml import etree | ||
from datetime import datetime, timedelta | ||
|
||
from moai.utils import XPath | ||
from moai.utils import XPath, get_moai_log | ||
|
||
|
||
class YodaContent(object): | ||
def __init__(self, provider): | ||
self.provider = provider | ||
self.id = None | ||
self.modified = None | ||
self.deleted = None | ||
self.sets = None | ||
self.metadata = None | ||
self.deleted = False | ||
self.sets = dict() | ||
self.metadata = dict() | ||
|
||
def update(self, path): | ||
doc = etree.parse(path) | ||
log = get_moai_log() | ||
try: | ||
doc = etree.parse(path) | ||
except etree.ParseError: | ||
log.warning("Failed to parse %s".format(path)) | ||
return | ||
|
||
xpath = XPath(doc, nsmap={}) | ||
|
||
self.root = doc.getroot() | ||
|
||
id = xpath.string('//Project_ID') | ||
|
||
id = xpath.string('//Persistent_Identifier_Datapackage') | ||
if not id: | ||
log.warning("Missing Persistent Identifier of Datapackage in %s".format(path)) | ||
return | ||
|
||
self.id = 'oai:%s' % id | ||
self.modified = datetime.now() - timedelta(days=1) | ||
self.deleted = True | ||
|
||
self.metadata['identifier'] = [id] | ||
|
||
last_modified = xpath.string('//Last_Modified_Date') | ||
if not last_modified: | ||
log.warning("Missing Last Modified Time in %s".format(path)) | ||
self.modified = datetime.now() - timedelta(days=1) | ||
else: | ||
self.modified = datetime.strptime(last_modified, "%Y-%M-%d") | ||
|
||
author_data = [] | ||
creators = xpath.strings('//Creator/Name') | ||
if creators: | ||
self.metadata['creator'] = creators | ||
for creator in creators: | ||
author_data.append({u"name": creator, u"role": [u"auth"]}) | ||
|
||
contributors = xpath.strings('//Contributor/Name') | ||
if contributors: | ||
self.metadata['contributor'] = contributors | ||
for contributor in contributors: | ||
author_data.append({u"name": contributor, u"role": [u"cont"]}) | ||
|
||
self.metadata["author_data"]= author_data | ||
|
||
title = xpath.string('//Title') | ||
if title: | ||
self.metadata['title'] = [title] | ||
|
||
description = xpath.string('//Description') | ||
if description: | ||
self.metadata['description'] = [description] | ||
|
||
language = xpath.string('//Language') | ||
if language: | ||
self.metadata['language'] = [language] | ||
|
||
datesinxml = [xpath.string('//Publication_Date'), | ||
xpath.string('//Embargo_End_Date')] | ||
|
||
dates = [d for d in datesinxml if d] | ||
if dates: | ||
self.metadata['date'] = dates | ||
|
||
rightsinxml = [xpath.string('//License'), | ||
xpath.string('//License/Properties/URL')] | ||
|
||
rights = [r for r in rightsinxml if r] | ||
if rights: | ||
self.metadata['rights'] = rights | ||
|
||
subjectinxml = xpath.strings('//Discipline') + xpath.strings('//Tag') | ||
subject = [s for s in subjectinxml if s] | ||
if subject: | ||
self.metadata['subject'] = subject | ||
|
||
locations = xpath.strings('//Location_Covered') | ||
perioddates = [xpath.string('//Start_Period'), xpath.string('//End_Period')] | ||
period = "/".join([d for d in perioddates if d]) | ||
if period: | ||
coverage = locations + [period] | ||
else: | ||
coverage = locations | ||
if coverage: | ||
self.metadata['coverage'] = coverage | ||
|
||
relations = xpath.strings('//Persistent_Identifier') | ||
if relations: | ||
self.metadata['relation'] = relations | ||
|
||
# Add creator of dataset. | ||
author_data.append({'name': [xpath.string('//Creator')], | ||
'role': [u'aut']}) | ||
|
||
# Add all contributors to dataset. | ||
for num, el in enumerate(xpath('//Contributor'), 1): | ||
contributor = [xpath.string('//Contributor[%d]' % num)] | ||
author_data.append({'name': contributor, | ||
'role': [u'aut']}) | ||
|
||
# Add metadata of dataset. | ||
self.metadata = {'identifier': [id], | ||
'title': [xpath.string('//Project_Title')], | ||
'subject': [xpath.string('//Project_Description')], | ||
'description': [xpath.string('//Project_Description')], | ||
'creator': [d['name'][0] for d in author_data], | ||
'author_data': author_data, | ||
'language': [xpath.string('//Language_dataset')], | ||
'date': [xpath.string('//Embargo')]} | ||
|
||
# Clean dataset type. | ||
type = xpath.string('//Dataset_Type') | ||
type = type.replace(" ", "_") | ||
|
||
# Specify dataset. | ||
self.sets = {type: | ||
{u'name':xpath.string('//Dataset_Title'), | ||
u'description':xpath.string('//Dataset_Description')}} | ||
|
||
published = xpath.string('//Publish_Metadata') | ||
if published == 'Yes': | ||
self.deleted = False | ||
self.sets[u'yoda'] = { | ||
u'name': u'YoDa', | ||
u'description': u'share-collaborate environment for research data' | ||
} |