Skip to content

Commit

Permalink
Merge pull request #156 from semantic-systems/feature/openaire-search
Browse files Browse the repository at this point in the history
Feature/openaire search
  • Loading branch information
huntila authored Aug 31, 2023
2 parents bec5930 + 574ab65 commit f61c080
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 7 deletions.
16 changes: 10 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from objects import Article, Organization, Person, Dataset, Project
from flask import Flask, render_template, request, make_response
import threading
from sources import dblp, zenodo, openalex, resodate, oersi, wikidata, cordis, gesis, orcid, gepris, ieee, codalab, eudat # eulg
from sources import dblp, zenodo, openalex, resodate, oersi, wikidata, cordis, gesis, orcid, gepris, ieee, codalab, \
eudat, openaire # eulg
# import dblp, zenodo, openalex, resodate, wikidata, cordis, gesis, orcid, gepris # , eulg
import details_page

Expand Down Expand Up @@ -54,9 +55,10 @@ def search_results():

# add all the sources here in this list; for simplicity we should use the exact module name
# ensure the main method which execute the search is named "search" in the module
sources = [resodate, oersi, openalex, orcid, dblp, zenodo, gesis, ieee, cordis, gepris, eudat, codalab, wikidata]
sources = [resodate, oersi, openalex, orcid, dblp, zenodo, gesis, ieee, cordis, gepris, eudat, codalab,
wikidata, openaire]
# sources = [dblp, zenodo, openalex, resodate, wikidata, cordis, gesis, orcid, gepris]

for source in sources:
t = threading.Thread(target=source.search, args=(search_term, results,))
t.start()
Expand Down Expand Up @@ -117,14 +119,13 @@ def resource_details():
response.set_cookie('search-session', request.cookies['session'])

return response



@app.route('/researcher-details')
def researcher_details():
response = make_response(render_template('researcher-details.html'))


@app.route('/organization-details')
def organization_details():
response = make_response(render_template('organization-details.html'))
Expand All @@ -138,6 +139,7 @@ def organization_details():

return response


@app.route('/events-details')
def events_details():
response = make_response(render_template('events-details.html'))
Expand All @@ -151,6 +153,7 @@ def events_details():

return response


@app.route('/fundings-details')
def fundings_details():
response = make_response(render_template('fundings-details.html'))
Expand All @@ -163,6 +166,7 @@ def fundings_details():

return response


@app.route('/details', methods=['POST', 'GET'])
def details():
if request.method == 'GET':
Expand Down
5 changes: 4 additions & 1 deletion objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def __post_init__(self):
@dataclass
class Dataset(CreativeWork):
distribution: str = ""
issn: str = ""
issn: str = ""

def __post_init__(self):
# Loop through the fields
Expand All @@ -166,6 +166,9 @@ class Project(Organization):
objective: str = ""
status: str = ""
author: List[Union[Organization, Person]] = field(default_factory=list)
funder: List[Union[
Organization, Person]] = field(
default_factory=list) # Organization | Person # we can use pipe operator for Union in Python >= 3.10


def __post_init__(self):
Expand Down
156 changes: 156 additions & 0 deletions sources/openaire.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
import requests
import utils
from objects import Dataset, Author, Article, CreativeWork, Organization, Project
import logging

logger = logging.getLogger('nfdi_search_engine')


def search(search_string: str, results):
""" Obtain the results from Openaire request and handles them accordingly.
Args:
search_string: keyword(s) to search for
results: search answer formatted into different data types according to Openaire result_types
and mapped to schema.org types.
Returns:
the results Object
"""
openaire_product_search(search_string, results)
openaire_project_search(search_string, results)

logger.info(f"Got {len(results)} records from Openaire")
return results


def openaire_product_search(search_string, results):
api_url = 'https://api.openaire.eu/search/researchProducts'
response = requests.get(api_url,
params={"keywords": search_string, "format": "json", "size": 20})
data = response.json()
logger.debug(f'Openaire product search response status code: {response.status_code}')
logger.debug(f'Openaire product search response headers: {response.headers}')

# hits = data.get('response', {}).get('results', {}).get('result', [])
if response.status_code == 200:
try:
hits = data.get('response', {}).get('results', {}).get('result', [])
except AttributeError:
hits = [] # Set hits as an empty list if the 'get' operation fails due to AttributeError

for hit in hits:
pro_result = hit.get('metadata', {}).get('oaf:entity', {}).get('oaf:result', {})
result_type = pro_result.get('resulttype', {}).get('@classid', 'other')
# check result type to create an Object of the right Class
if result_type == 'publication':
product = Article()
elif result_type == 'dataset':
product = Dataset()
else:
product = CreativeWork()
product.source = 'Openaire'
product.genre = result_type
date = pro_result.get('dateofacceptance', None)
if date:
product.datePublished = date['$']

# title can be dict or list. If list, there are 'main title' and 'alternate title'
if type(pro_result.get('title')) is dict:
product.name = pro_result.get('title', {}).get('$', '')
elif type(pro_result.get('title')) is list:
for item in pro_result.get('title'):
if item['@classid'] == 'main title':
product.name = item['$']

# description can be dict or list
if type(pro_result.get('description')) is dict:
product.description = utils.remove_html_tags(pro_result.get('description', {}).get('$', ''))
elif type(pro_result.get('description')) is list:
product.description = utils.remove_html_tags(pro_result.get('description')[0].get('$', ''))
else:
product.description = ''

# Language can be set or "und" = Undetermined
product.inLanguage = [] if pro_result.get('language', {}).get('@classid', '') == 'und' else [pro_result.get(
'language', {}).get('@classid', '')]

# pid can be dict or list
if type(pro_result.get('pid')) is dict:
product.identifier = pro_result.get('pid', {}).get('$', '')
elif type(pro_result.get('pid')) is list:
product.identifier = pro_result.get('pid', {})[0].get('$', '')
else:
product.identifier = ''

# Creators can be dict, list, None
# creators = pro_result.get('creator', {}) if pro_result.get('creator') is not None else {}
creators = pro_result.get('creator', None)
if type(creators) is dict:
creator = Author()
creator.type = 'Person'
creator.name = creators.get('$', '')
product.author.append(creator)
elif type(creators) is list:
for item in creators:
creator = Author()
creator.type = 'Person'
creator.name = item.get('$', '')
product.author.append(creator)

# Check genre to add result to right category
if product.genre == 'publication':
results['publications'].append(product)
elif product.genre == 'dataset' or product.genre == 'software':
results['resources'].append(product)
else:
results['others'].append(product)


def openaire_project_search(search_string, results):
api_url = 'https://api.openaire.eu/search/projects'
response = requests.get(api_url, params={"name": search_string, "format": "json", "size": 20})
data = response.json()
logger.debug(f'Openaire project search response status code: {response.status_code}')
logger.debug(f'Openaire project search response headers: {response.headers}')

if response.status_code == 200:
try:
hits = data.get('response', {}).get('results', {}).get('result', [])
except AttributeError:
hits = [] # Set hits as an empty list if the 'get' operation fails due to AttributeError

for hit in hits:
pro_result = hit.get('metadata', {}).get('oaf:entity', {}).get('oaf:project', {})
project = Project()
project.source = 'Openaire'
project.name = pro_result.get('title', {}).get('$', '')
project.dateStart = pro_result.get('startdate', {}).get('$', '')
project.dateEnd = pro_result.get('enddate', {}).get('$', '')
project.identifier = pro_result.get('callidentifier', {}).get('$', '')

# fundingtree can be dict or list
# fundingtree = pro_result.get('fundingtree', {}) if pro_result.get('fundingtree') is not None else {}
fundingtree = pro_result.get('fundingtree', None)
if type(fundingtree) is dict:
orga = Organization()
orga.name = fundingtree.get('name', {}).get('$', '')
project.funder.append(orga)
elif type(fundingtree) is list:
for item in fundingtree:
orga = Organization()
orga.name = item.get('name', {}).get('$', '')
project.funder.append(orga)

# "rels" can be None, dict, list
relations = pro_result.get('rels', {}).get('rel', {}) if pro_result.get('rels', {}) is not None else []
if type(relations) is dict:
relations = [relations]

# This need a review. Type 'Organization' ?
for rel in relations:
author_obj = Author()
author_obj.type = 'Organization'
author_obj.name = (rel.get('legalname', {}).get('$', ''))
project.author.append(author_obj)
results['others'].append(project)

0 comments on commit f61c080

Please sign in to comment.