Skip to content

Commit

Permalink
Merge pull request #178 from semantic-systems/develop
Browse files Browse the repository at this point in the history
Merge develop to Main
  • Loading branch information
abdullah-rana authored Oct 10, 2023
2 parents ff435b1 + 3299f3b commit 67176d6
Show file tree
Hide file tree
Showing 27 changed files with 1,199 additions and 1,003 deletions.
5 changes: 4 additions & 1 deletion config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
request_header_user_agent: nfdi4dsBot/1.0 (https://www.nfdi4datascience.de/nfdi4dsBot/; [email protected])
request_timeout: 3
search_url_resodate: https://resodate.org/resources/api/search/oer_data/_search?pretty&size=100&q=
search_url_oersi: https://oersi.org/resources/api/search/oer_data/_search?pretty&q=
search_url_openalex_authors: https://api.openalex.org/authors?search=
search_url_orcid: https://pub.orcid.org/v3.0/expanded-search/?start=0&rows=100&q=
search_url_dblp: https://dblp.org/search?q=
search_url_dblp: https://dblp.org/search?q=
search_url_gesis: http://193.175.238.35:8089/dc/_search?size=100&q=
search_url_gepris: https://gepris.dfg.de/gepris/OCTOPUS?context=projekt&hitsPerPage=1&index=0&language=en&task=doSearchSimple&keywords_criterion=
14 changes: 7 additions & 7 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
from objects import Article, Organization, Person, Dataset, Project
from flask import Flask, render_template, request, make_response
import threading
from sources import dblp, zenodo, openalex, resodate, oersi, wikidata, cordis, gesis, orcid, gepris, ieee, codalab, \
eudat, openaire, eulg
# import dblp, zenodo, openalex, resodate, wikidata, cordis, gesis, orcid, gepris # , eulg
from sources import dblp, zenodo, openalex, resodate, oersi, wikidata, cordis, gesis, orcid, gepris, ieee, eudat, openaire, eulg
import details_page

logging.config.fileConfig(os.getenv('LOGGING_FILE_CONFIG', './logging.conf'))
Expand Down Expand Up @@ -49,15 +47,14 @@ def search_results():
'organizations': [],
'events': [],
'fundings': [],
'others': []
'others': [],
'timedout_sources': []
}
threads = []

# add all the sources here in this list; for simplicity we should use the exact module name
# ensure the main method which execute the search is named "search" in the module
sources = [resodate, oersi, openalex, orcid, dblp, zenodo, gesis, ieee, cordis, gepris, eudat, codalab,
wikidata, openaire, eulg]
# sources = [dblp, zenodo, openalex, resodate, wikidata, cordis, gesis, orcid, gepris]
sources = [resodate, oersi, openalex, orcid, dblp, zenodo, gesis, ieee, cordis, gepris, eudat, wikidata, openaire, eulg]

for source in sources:
t = threading.Thread(target=source.search, args=(search_term, results,))
Expand All @@ -76,6 +73,9 @@ def search_results():
logger.info(f'Got {len(results["fundings"])} fundings')
logger.info(f'Got {len(results["others"])} others')

results["timedout_sources"] = list(set(results["timedout_sources"]))
logger.info('Following sources got timed out:' + ','.join(results["timedout_sources"]))

return render_template('results.html', results=results, search_term=search_term)


Expand Down
30 changes: 17 additions & 13 deletions sources/codalab.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,19 @@

@utils.timeit
def search(search_term, results):
api_endpoint = "https://worksheets.codalab.org/rest/bundles"
limit_per_page = 10
params = {
"keywords": search_term,
"include_display_metadata": 1,
"include": "owner",
".limit": limit_per_page
}

try:

url = "https://worksheets.codalab.org/rest/bundles"
limit_per_page = 10
params = {
"keywords": search_term,
"include_display_metadata": 1,
"include": "owner",
".limit": limit_per_page
}
# Send an HTTP GET request to the API
response = requests.get(api_endpoint, params=params)
# response = requests.get(api_endpoint, params=params)
response = requests.get(url, timeout=3)

# Check if the request was successful (status code 200)
if response.status_code == 200:
Expand Down Expand Up @@ -120,6 +121,9 @@ def search(search_term, results):
else:
# Log an error message when the response is not successful
logger.error(f'Codalab response status code: {response.status_code}. Unable to fetch data from the API.')
except requests.exceptions.RequestException as e:
# Handle any errors that occur while making the API request
logger.error(f"Error occurred while making the API request to Codalab: {e}")
except requests.exceptions.Timeout as ex:
logger.error(f'Timed out Exception: {str(ex)}')
results['timedout_sources'].append('CODALAB')

except Exception as ex:
logger.error(f'Exception: {str(ex)}')
194 changes: 102 additions & 92 deletions sources/cordis.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,102 +9,112 @@

@utils.timeit
def search(search_term, results):
max_project_number = 50
api_url = f'https://cordis.europa.eu/search/?q=%27{search_term}%27%20AND%20contenttype=%27project%27&p=1&num={max_project_number}&srt=/project/contentUpdateDate:decreasing&format=json'
response = requests.get(api_url)

# Check if the response was successful
if response.status_code == 200:
logger.debug(f'Cordis response status code: {response.status_code}')
logger.debug(f'Cordis response headers: {response.headers}')
try:

data = response.json()

total_hits = data.get('result', {}).get('header', {}).get('numHits', 0)

logger.info(f'CORDIS - {total_hits} hits/records found')
max_project_number = 50
api_url = f'https://cordis.europa.eu/search/?q=%27{search_term}%27%20AND%20contenttype=%27project%27&p=1&num={max_project_number}&srt=/project/contentUpdateDate:decreasing&format=json'
# response = requests.get(api_url)
response = requests.get(api_url, timeout=3)
# response = timeout(requests.get, args=(api_url,), kwargs={'timeout': 10})

try:
hits = data.get('hits', {}).get('hit', [])
except AttributeError:
hits = [] # Set hits as an empty list if the 'get' operation fails due to AttributeError
# Check if the response was successful
if response.status_code == 200:
logger.debug(f'Cordis response status code: {response.status_code}')
logger.debug(f'Cordis response headers: {response.headers}')

for hit in hits:
data = response.json()

total_hits = data.get('result', {}).get('header', {}).get('numHits', 0)

logger.info(f'CORDIS - {total_hits} hits/records found')

try:
hits = data.get('hits', {}).get('hit', [])
except AttributeError:
hits = [] # Set hits as an empty list if the 'get' operation fails due to AttributeError

for hit in hits:

try:

if isinstance(hit, dict):
project = hit.get('project', {})
type = project.get('contenttype', '')

if type == "project":
fundings = Project()
fundings.source = 'CORDIS'
fundings.identifier = project.get('id', '')
fundings.name = project.get('title', '')
fundings.url = f"https://cordis.europa.eu/project/id/{fundings.identifier}"
fundings.dateStart = project.get('startDate', '')
fundings.dateEnd = project.get('endDate', '')
fundings.dateLastModified = project.get('lastUpdateDate', '')
fundings.description = project.get('teaser', '')
# this key attribute can be used for the details page of the resource tab in next step
# it has more detais about projects
fundings.objective = project.get("objective", '')
fundings.status = project.get("status", '')

keywords = project.get("keywords", None)
if keywords:
for keyword in keywords:
fundings.keywords.append(keyword)

languages = project.get("language", None)
if languages:
if isinstance(languages, list):
# If languages is a list, add each language to fundings.inLanguage
for language in languages:
fundings.inLanguage.append(language)
else:
# If languages is a single string, directly append it to fundings.inLanguage
fundings.inLanguage.append(languages)

languages_available = project.get("availableLanguages", None)
if languages_available:
if isinstance(languages_available, list):
# If languages_available is a list, add each language to fundings.languages_available
for language in languages_available:
fundings.availableLanguages.append(language)
else:
# If languages is a single string, directly append it to fundings.inLanguage
fundings.availableLanguages.append(languages_available)

else:
# Handle the case when `hit` is not a dictionary
fundings = Project()
fundings.identifier = ''
fundings.name = ''
fundings.url = ''
fundings.date_start = ''
fundings.date_end = ''
fundings.description = ''

except KeyError as e:
# Handle the exception when the key is not found
print(f"KeyError: {e} - Key not found in API response")
# Set default none
fundings.identifier = None
fundings.name = None
fundings.url = None
fundings.date_start = None
fundings.date_end = None
fundings.description = None


results['fundings'].append(fundings)



if isinstance(hit, dict):
project = hit.get('project', {})
type = project.get('contenttype', '')
# logger.info(f'Got {len(results)} records from Cordis')

if type == "project":
fundings = Project()
fundings.source = 'CORDIS'
fundings.identifier = project.get('id', '')
fundings.name = project.get('title', '')
fundings.url = f"https://cordis.europa.eu/project/id/{fundings.identifier}"
fundings.dateStart = project.get('startDate', '')
fundings.dateEnd = project.get('endDate', '')
fundings.dateLastModified = project.get('lastUpdateDate', '')
fundings.description = project.get('teaser', '')
# this key attribute can be used for the details page of the resource tab in next step
# it has more detais about projects
fundings.objective = project.get("objective", '')
fundings.status = project.get("status", '')

keywords = project.get("keywords", None)
if keywords:
for keyword in keywords:
fundings.keywords.append(keyword)

languages = project.get("language", None)
if languages:
if isinstance(languages, list):
# If languages is a list, add each language to fundings.inLanguage
for language in languages:
fundings.inLanguage.append(language)
else:
# If languages is a single string, directly append it to fundings.inLanguage
fundings.inLanguage.append(languages)

languages_available = project.get("availableLanguages", None)
if languages_available:
if isinstance(languages_available, list):
# If languages_available is a list, add each language to fundings.languages_available
for language in languages_available:
fundings.availableLanguages.append(language)
else:
# If languages is a single string, directly append it to fundings.inLanguage
fundings.availableLanguages.append(languages_available)

else:
# Handle the case when `hit` is not a dictionary
fundings = Project()
fundings.identifier = ''
fundings.name = ''
fundings.url = ''
fundings.date_start = ''
fundings.date_end = ''
fundings.description = ''

except KeyError as e:
# Handle the exception when the key is not found
print(f"KeyError: {e} - Key not found in API response")
# Set default none
fundings.identifier = None
fundings.name = None
fundings.url = None
fundings.date_start = None
fundings.date_end = None
fundings.description = None


results['fundings'].append(fundings)



# logger.info(f'Got {len(results)} records from Cordis')

else:
# Log an error message when the response is not successful
logger.error(f'Cordis response status code: {response.status_code}. Unable to fetch data from the API.')
else:
# Log an error message when the response is not successful
logger.error(f'Cordis response status code: {response.status_code}. Unable to fetch data from the API.')
except requests.exceptions.Timeout as ex:
logger.error(f'Timed out Exception: {str(ex)}')
results['timedout_sources'].append('CORDIS')

except Exception as ex:
logger.error(f'Exception: {str(ex)}')
7 changes: 6 additions & 1 deletion sources/dblp.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def search(search_term: str, results):
'Content-Type': 'application/json',
'User-Agent': utils.config["request_header_user_agent"]
}
response = requests.get(url, headers=headers)
response = requests.get(url, headers=headers, timeout=int(utils.config["request_timeout"]))

logger.debug(f'DBLP response status code: {response.status_code}')
logger.debug(f'DBLP response headers: {response.headers}')
Expand Down Expand Up @@ -117,5 +117,10 @@ def search(search_term: str, results):
# return results
# g.parse(data=json.dumps(data), format='json-ld')
# logger.info(f"Graph g has {len(g)} statements after querying DBLP.")

except requests.exceptions.Timeout as ex:
logger.error(f'Timed out Exception: {str(ex)}')
results['timedout_sources'].append('DBLP')

except Exception as ex:
logger.error(f'Exception: {str(ex)}')
Loading

0 comments on commit 67176d6

Please sign in to comment.