From 3b5a2e8a9b73028631b21cf9d5c70d8ad16b4121 Mon Sep 17 00:00:00 2001 From: keko24 Date: Mon, 24 Jun 2024 11:16:35 +0200 Subject: [PATCH 1/3] Fixed an issue where search_pubs doesn't find a publication when only a single publication exists for the query. Added a unit test for search_pubs that tests for the previous problem. --- scholarly/publication_parser.py | 2 +- test_module.py | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py index 5d7bf37..60d4769 100644 --- a/scholarly/publication_parser.py +++ b/scholarly/publication_parser.py @@ -58,7 +58,7 @@ def _load_url(self, url: str): # this is temporary until setup json file self._soup = self._nav._get_soup(url) self._pos = 0 - self._rows = self._soup.find_all('div', class_='gs_r gs_or gs_scl') + self._soup.find_all('div', class_='gsc_mpat_ttl') + self._rows = self._soup.find_all('div', class_='gs_r gs_or gs_scl') + self._soup.find_all('div', class_='gs_r gs_or gs_scl gs_fmar') + self._soup.find_all('div', class_='gsc_mpat_ttl') def _get_total_results(self): if self._soup.find("div", class_="gs_pda"): diff --git a/test_module.py b/test_module.py index b2a3ac5..19c688c 100644 --- a/test_module.py +++ b/test_module.py @@ -653,7 +653,7 @@ def test_search_pubs_empty_publication(self): """ Test that searching for an empty publication returns zero results """ - pubs = [p for p in scholarly.search_pubs('')] + pubs = [p for p in scholarly.search_pubs('Perception of physical stability and center of mass of 3D objects')] self.assertIs(len(pubs), 0) def test_search_pubs_citedby(self): @@ -718,6 +718,23 @@ def test_search_pubs(self): titles = [p['bib']['title'] for p in pubs] self.assertIn('Visual perception of the physical stability of asymmetric three-dimensional objects', titles) + def test_search_pubs_single_pub(self): + """ + As of Jun 24, 2024 there are is only one pub that fits the search term: + [Perception of physical stability and center of mass of 3D objects]. + + Check that it returns a proper result and the total results for that search term is equal to 1. + """ + pub = scholarly.search_single_pub("Perception of physical stability and center of mass of 3D objects") + pubs = list(scholarly.search_pubs("Perception of physical stability and center of mass of 3D objects")) + # Check that the first entry in pubs is the same as pub. + # Checking for quality holds for non-dict entries only. + for key in {'author_id', 'pub_url', 'num_citations'}: + self.assertEqual(pub[key], pubs[0][key]) + for key in {'title', 'pub_year', 'venue'}: + self.assertEqual(pub['bib'][key], pubs[0]['bib'][key]) + self.assertEqual(len(pubs), 1) + def test_search_pubs_total_results(self): """ As of September 16, 2021 there are 32 pubs that fit the search term: From 0db2befd2a7f1f500a9433010f516d86ebd63e3c Mon Sep 17 00:00:00 2001 From: keko24 Date: Mon, 24 Jun 2024 11:42:31 +0200 Subject: [PATCH 2/3] Fixed total_results returning 0 when only a single publication exists. --- scholarly/publication_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scholarly/publication_parser.py b/scholarly/publication_parser.py index 60d4769..5d7e728 100644 --- a/scholarly/publication_parser.py +++ b/scholarly/publication_parser.py @@ -70,7 +70,7 @@ def _get_total_results(self): match = re.match(pattern=r'(^|\s*About)\s*([0-9,\.\s’]+)', string=x.text) if match: return int(re.sub(pattern=r'[,\.\s’]',repl='', string=match.group(2))) - return 0 + return len(self._rows) # Iterator protocol From 2cd59b3b8a3e5c10f4bb2fee15f5ea9d5363534a Mon Sep 17 00:00:00 2001 From: Andrej <56741017+keko24@users.noreply.github.com> Date: Tue, 25 Jun 2024 11:58:24 +0200 Subject: [PATCH 3/3] Removed the string in search_pubs in test_search_empty_publication. --- test_module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_module.py b/test_module.py index 19c688c..bcd93e4 100644 --- a/test_module.py +++ b/test_module.py @@ -653,7 +653,7 @@ def test_search_pubs_empty_publication(self): """ Test that searching for an empty publication returns zero results """ - pubs = [p for p in scholarly.search_pubs('Perception of physical stability and center of mass of 3D objects')] + pubs = [p for p in scholarly.search_pubs('')] self.assertIs(len(pubs), 0) def test_search_pubs_citedby(self):