diff --git a/scholar.py b/scholar.py index 13ccd43..ce1b191 100755 --- a/scholar.py +++ b/scholar.py @@ -166,6 +166,7 @@ import re import sys import warnings +import time try: # Try importing for Python 3 @@ -879,7 +880,7 @@ class ScholarSettings(object): def __init__(self): self.citform = 0 # Citation format, default none - self.per_page_results = None + self.per_page_results = 10 self._is_configured = False def set_citation_format(self, citform): @@ -893,8 +894,7 @@ def set_citation_format(self, citform): def set_per_page_results(self, per_page_results): self.per_page_results = ScholarUtils.ensure_int( per_page_results, 'page results must be integer') - self.per_page_results = min( - self.per_page_results, ScholarConf.MAX_PAGE_RESULTS) + self.per_page_results = min(self.per_page_results, ScholarConf.MAX_PAGE_RESULTS) self._is_configured = True def is_configured(self): @@ -1026,6 +1026,38 @@ def send_query(self, query): self.parse(html) + def get_citations(self,query): + """ + Given a query, it retrieve the list of articles that cite the first + article returned by the query. + It's done in two steps: first it retrieves the citations url of the + first article, then it retrieves the articles that cite it + """ + self.send_query(query) + + if len(self.articles)==0 or self.articles[0]['url_citations'] is None: + return + citations_url=self.articles[0]['url_citations'] + citations_num=self.articles[0]['num_citations'] + self.clear_articles() + + html = self._get_http_response(url=citations_url, + log_msg='dump of query response HTML', + err_msg='results retrieval failed') + if html is None: + return + self.parse(html) + while len(self.articles)