Merge pull request #178 from semantic-systems/develop

Merge develop to Main
semantic-systems · Oct 10, 2023 · 67176d6 · 67176d6
2 parents ff435b1 + 3299f3b
commit 67176d6
Show file tree

Hide file tree

Showing 27 changed files with 1,199 additions and 1,003 deletions.
diff --git a/config.yaml b/config.yaml
@@ -1,6 +1,9 @@
 request_header_user_agent: nfdi4dsBot/1.0 (https://www.nfdi4datascience.de/nfdi4dsBot/; [email protected])
+request_timeout: 3
 search_url_resodate: https://resodate.org/resources/api/search/oer_data/_search?pretty&size=100&q=
 search_url_oersi: https://oersi.org/resources/api/search/oer_data/_search?pretty&q=
 search_url_openalex_authors: https://api.openalex.org/authors?search=
 search_url_orcid: https://pub.orcid.org/v3.0/expanded-search/?start=0&rows=100&q=
-search_url_dblp: https://dblp.org/search?q=
+search_url_dblp: https://dblp.org/search?q=
+search_url_gesis: http://193.175.238.35:8089/dc/_search?size=100&q=
+search_url_gepris: https://gepris.dfg.de/gepris/OCTOPUS?context=projekt&hitsPerPage=1&index=0&language=en&task=doSearchSimple&keywords_criterion=
diff --git a/main.py b/main.py
@@ -6,9 +6,7 @@
 from objects import Article, Organization, Person, Dataset, Project
 from flask import Flask, render_template, request, make_response
 import threading
-from sources import dblp, zenodo, openalex, resodate, oersi, wikidata, cordis, gesis, orcid, gepris, ieee, codalab, \
-    eudat, openaire, eulg
-# import dblp, zenodo, openalex, resodate, wikidata, cordis, gesis, orcid, gepris # , eulg
+from sources import dblp, zenodo, openalex, resodate, oersi, wikidata, cordis, gesis, orcid, gepris, ieee, eudat, openaire, eulg
 import details_page
 
 logging.config.fileConfig(os.getenv('LOGGING_FILE_CONFIG', './logging.conf'))
@@ -49,15 +47,14 @@ def search_results():
             'organizations': [],
             'events': [],
             'fundings': [],
-            'others': []
+            'others': [],
+            'timedout_sources': []
         }
         threads = []
 
         # add all the sources here in this list; for simplicity we should use the exact module name
         # ensure the main method which execute the search is named "search" in the module 
-        sources = [resodate, oersi, openalex, orcid, dblp, zenodo, gesis, ieee, cordis, gepris, eudat, codalab,
-                   wikidata, openaire, eulg]
-        # sources = [dblp, zenodo, openalex, resodate, wikidata, cordis, gesis, orcid, gepris]
+        sources = [resodate, oersi, openalex, orcid, dblp, zenodo, gesis, ieee, cordis, gepris, eudat, wikidata, openaire, eulg]
 
         for source in sources:
             t = threading.Thread(target=source.search, args=(search_term, results,))
@@ -76,6 +73,9 @@ def search_results():
         logger.info(f'Got {len(results["fundings"])} fundings')
         logger.info(f'Got {len(results["others"])} others')
 
+        results["timedout_sources"] = list(set(results["timedout_sources"]))
+        logger.info('Following sources got timed out:' + ','.join(results["timedout_sources"]))
+
         return render_template('results.html', results=results, search_term=search_term)
 
 

diff --git a/sources/codalab.py b/sources/codalab.py
@@ -10,18 +10,19 @@
 
 @utils.timeit
 def search(search_term, results):
-    api_endpoint = "https://worksheets.codalab.org/rest/bundles"
-    limit_per_page = 10
-    params = {
-        "keywords": search_term,
-        "include_display_metadata": 1,
-        "include": "owner",
-        ".limit": limit_per_page
-    }
-
     try:
+
+        url = "https://worksheets.codalab.org/rest/bundles"
+        limit_per_page = 10
+        params = {
+            "keywords": search_term,
+            "include_display_metadata": 1,
+            "include": "owner",
+            ".limit": limit_per_page
+        }
         # Send an HTTP GET request to the API
-        response = requests.get(api_endpoint, params=params)
+        # response = requests.get(api_endpoint, params=params)
+        response = requests.get(url, timeout=3)
 
         # Check if the request was successful (status code 200)
         if response.status_code == 200:
@@ -120,6 +121,9 @@ def search(search_term, results):
         else:
         # Log an error message when the response is not successful
             logger.error(f'Codalab response status code: {response.status_code}. Unable to fetch data from the API.')
-    except requests.exceptions.RequestException as e:
-        # Handle any errors that occur while making the API request
-        logger.error(f"Error occurred while making the API request to Codalab: {e}")
+    except requests.exceptions.Timeout as ex:
+        logger.error(f'Timed out Exception: {str(ex)}')
+        results['timedout_sources'].append('CODALAB')
+
+    except Exception as ex:
+        logger.error(f'Exception: {str(ex)}')
diff --git a/sources/cordis.py b/sources/cordis.py
@@ -9,102 +9,112 @@
 
 @utils.timeit
 def search(search_term, results):
-    max_project_number = 50
-    api_url = f'https://cordis.europa.eu/search/?q=%27{search_term}%27%20AND%20contenttype=%27project%27&p=1&num={max_project_number}&srt=/project/contentUpdateDate:decreasing&format=json'
-    response = requests.get(api_url)
-
-    # Check if the response was successful
-    if response.status_code == 200:
-        logger.debug(f'Cordis response status code: {response.status_code}')
-        logger.debug(f'Cordis response headers: {response.headers}')
+    try:
 
-        data = response.json()
-
-        total_hits = data.get('result', {}).get('header', {}).get('numHits', 0)
-
-        logger.info(f'CORDIS - {total_hits} hits/records found')
+        max_project_number = 50
+        api_url = f'https://cordis.europa.eu/search/?q=%27{search_term}%27%20AND%20contenttype=%27project%27&p=1&num={max_project_number}&srt=/project/contentUpdateDate:decreasing&format=json'
+        # response = requests.get(api_url)
+        response = requests.get(api_url, timeout=3)
+        # response = timeout(requests.get, args=(api_url,), kwargs={'timeout': 10})
 
-        try:
-            hits = data.get('hits', {}).get('hit', [])
-        except AttributeError:
-            hits = []  # Set hits as an empty list if the 'get' operation fails due to AttributeError
+        # Check if the response was successful
+        if response.status_code == 200:
+            logger.debug(f'Cordis response status code: {response.status_code}')
+            logger.debug(f'Cordis response headers: {response.headers}')
 
-        for hit in hits:
+            data = response.json()
 
+            total_hits = data.get('result', {}).get('header', {}).get('numHits', 0)
+
+            logger.info(f'CORDIS - {total_hits} hits/records found')
+
             try:
+                hits = data.get('hits', {}).get('hit', [])
+            except AttributeError:
+                hits = []  # Set hits as an empty list if the 'get' operation fails due to AttributeError
+
+            for hit in hits:
+
+                try:
+
+                    if isinstance(hit, dict):
+                        project = hit.get('project', {})
+                        type = project.get('contenttype', '')
+
+                        if type == "project":
+                            fundings = Project()
+                            fundings.source = 'CORDIS'
+                            fundings.identifier = project.get('id', '')
+                            fundings.name = project.get('title', '')
+                            fundings.url = f"https://cordis.europa.eu/project/id/{fundings.identifier}"
+                            fundings.dateStart = project.get('startDate', '')
+                            fundings.dateEnd = project.get('endDate', '')
+                            fundings.dateLastModified = project.get('lastUpdateDate', '')
+                            fundings.description = project.get('teaser', '')
+                            # this key attribute can be used for the details page of the resource tab in next step
+                            # it has more detais about projects
+                            fundings.objective = project.get("objective", '')
+                            fundings.status = project.get("status", '')
+
+                            keywords = project.get("keywords", None)
+                            if keywords:
+                                for keyword in keywords:
+                                    fundings.keywords.append(keyword)
+
+                            languages = project.get("language", None)
+                            if languages:
+                                if isinstance(languages, list):
+                                    # If languages is a list, add each language to fundings.inLanguage
+                                    for language in languages:
+                                        fundings.inLanguage.append(language)
+                                else:
+                                    # If languages is a single string, directly append it to fundings.inLanguage
+                                    fundings.inLanguage.append(languages)
+
+                            languages_available = project.get("availableLanguages", None)
+                            if languages_available:
+                                if isinstance(languages_available, list):
+                                    # If languages_available is a list, add each language to fundings.languages_available
+                                    for language in languages_available:
+                                        fundings.availableLanguages.append(language)
+                                else:
+                                    # If languages is a single string, directly append it to fundings.inLanguage
+                                    fundings.availableLanguages.append(languages_available)
+
+                    else:
+                        # Handle the case when `hit` is not a dictionary
+                        fundings = Project()
+                        fundings.identifier = ''
+                        fundings.name = ''
+                        fundings.url = ''
+                        fundings.date_start = ''
+                        fundings.date_end = ''
+                        fundings.description = ''
+
+                except KeyError as e:
+                    # Handle the exception when the key is not found
+                    print(f"KeyError: {e} - Key not found in API response")
+                    # Set default none
+                    fundings.identifier = None
+                    fundings.name = None
+                    fundings.url = None
+                    fundings.date_start = None
+                    fundings.date_end = None
+                    fundings.description = None
+
+
+                results['fundings'].append(fundings)
+
+
 
-                if isinstance(hit, dict):
-                    project = hit.get('project', {})
-                    type = project.get('contenttype', '')
+            # logger.info(f'Got {len(results)} records from Cordis') 
 
-                    if type == "project":
-                        fundings = Project()
-                        fundings.source = 'CORDIS'
-                        fundings.identifier = project.get('id', '')
-                        fundings.name = project.get('title', '')
-                        fundings.url = f"https://cordis.europa.eu/project/id/{fundings.identifier}"
-                        fundings.dateStart = project.get('startDate', '')
-                        fundings.dateEnd = project.get('endDate', '')
-                        fundings.dateLastModified = project.get('lastUpdateDate', '')
-                        fundings.description = project.get('teaser', '')
-                        # this key attribute can be used for the details page of the resource tab in next step
-                        # it has more detais about projects
-                        fundings.objective = project.get("objective", '')
-                        fundings.status = project.get("status", '')
-
-                        keywords = project.get("keywords", None)
-                        if keywords:
-                            for keyword in keywords:
-                                fundings.keywords.append(keyword)
-
-                        languages = project.get("language", None)
-                        if languages:
-                            if isinstance(languages, list):
-                                # If languages is a list, add each language to fundings.inLanguage
-                                for language in languages:
-                                    fundings.inLanguage.append(language)
-                            else:
-                                # If languages is a single string, directly append it to fundings.inLanguage
-                                fundings.inLanguage.append(languages)
-
-                        languages_available = project.get("availableLanguages", None)
-                        if languages_available:
-                            if isinstance(languages_available, list):
-                                # If languages_available is a list, add each language to fundings.languages_available
-                                for language in languages_available:
-                                    fundings.availableLanguages.append(language)
-                            else:
-                                # If languages is a single string, directly append it to fundings.inLanguage
-                                fundings.availableLanguages.append(languages_available)
-
-                else:
-                    # Handle the case when `hit` is not a dictionary
-                    fundings = Project()
-                    fundings.identifier = ''
-                    fundings.name = ''
-                    fundings.url = ''
-                    fundings.date_start = ''
-                    fundings.date_end = ''
-                    fundings.description = ''
-
-            except KeyError as e:
-                # Handle the exception when the key is not found
-                print(f"KeyError: {e} - Key not found in API response")
-                # Set default none
-                fundings.identifier = None
-                fundings.name = None
-                fundings.url = None
-                fundings.date_start = None
-                fundings.date_end = None
-                fundings.description = None
-
-
-            results['fundings'].append(fundings)
-
-
-
-        # logger.info(f'Got {len(results)} records from Cordis') 
-
-    else:
-        # Log an error message when the response is not successful
-        logger.error(f'Cordis response status code: {response.status_code}. Unable to fetch data from the API.')
+        else:
+            # Log an error message when the response is not successful
+            logger.error(f'Cordis response status code: {response.status_code}. Unable to fetch data from the API.')
+    except requests.exceptions.Timeout as ex:
+        logger.error(f'Timed out Exception: {str(ex)}')
+        results['timedout_sources'].append('CORDIS')
+
+    except Exception as ex:
+        logger.error(f'Exception: {str(ex)}')
diff --git a/sources/dblp.py b/sources/dblp.py
@@ -41,7 +41,7 @@ def search(search_term: str, results):
                    'Content-Type': 'application/json',
                    'User-Agent': utils.config["request_header_user_agent"]
                    }
-        response = requests.get(url, headers=headers)        
+        response = requests.get(url, headers=headers, timeout=int(utils.config["request_timeout"]))        
 
         logger.debug(f'DBLP response status code: {response.status_code}')
         logger.debug(f'DBLP response headers: {response.headers}')
@@ -117,5 +117,10 @@ def search(search_term: str, results):
         # return results
         # g.parse(data=json.dumps(data), format='json-ld')
         # logger.info(f"Graph g has {len(g)} statements after querying DBLP.")
+
+    except requests.exceptions.Timeout as ex:
+        logger.error(f'Timed out Exception: {str(ex)}')
+        results['timedout_sources'].append('DBLP')
+
     except Exception as ex:
         logger.error(f'Exception: {str(ex)}')