diff --git a/hepcrawl/spiders/oaipmh_spider.py b/hepcrawl/spiders/oaipmh_spider.py index d785b214..cc3b9452 100644 --- a/hepcrawl/spiders/oaipmh_spider.py +++ b/hepcrawl/spiders/oaipmh_spider.py @@ -9,6 +9,7 @@ """Generic spider for OAI-PMH servers.""" +import abc import logging from errno import EEXIST as FILE_EXISTS, ENOENT as NO_SUCH_FILE_OR_DIR from datetime import datetime @@ -42,6 +43,7 @@ class OAIPMHSpider(StatefulSpider): the initial starting date and will use it as `from_date` argument on the next harvest. """ + __metaclass__ = abc.ABCMeta name = 'OAI-PMH' def __init__( @@ -84,6 +86,7 @@ def start_requests(self): LOGGER.info("Harvesting completed. Next harvesting will resume from {}" .format(self.until_date or now.strftime('%Y-%m-%d'))) + @abc.abstractmethod def parse_record(self, record): """ This method need to be reimplemented in order to provide special parsing. diff --git a/tests/unit/test_oaipmh.py b/tests/unit/test_oaipmh.py index 6db74b4c..2366d72d 100644 --- a/tests/unit/test_oaipmh.py +++ b/tests/unit/test_oaipmh.py @@ -43,7 +43,11 @@ def settings(): @pytest.fixture def spider(settings): - spider = OAIPMHSpider('http://0.0.0.0/oai2', settings=settings) + class TestOAIPMHSpider(OAIPMHSpider): + def parse_record(self, record): + return None + + spider = TestOAIPMHSpider('http://0.0.0.0/oai2', settings=settings) spider.from_date = '2017-12-08' spider.set = 'physics:hep-th' spider.metadata_prefix = 'marcxml'