From a96f3c4140bd3a17846e67d850261a8127fb9ac4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Szymon=20=C5=81opaciuk?= Date: Thu, 14 Dec 2017 11:24:26 +0100 Subject: [PATCH] make parse_record abstract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Szymon Łopaciuk --- hepcrawl/spiders/oaipmh_spider.py | 3 +++ tests/unit/test_oaipmh.py | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/hepcrawl/spiders/oaipmh_spider.py b/hepcrawl/spiders/oaipmh_spider.py index d785b214..cc3b9452 100644 --- a/hepcrawl/spiders/oaipmh_spider.py +++ b/hepcrawl/spiders/oaipmh_spider.py @@ -9,6 +9,7 @@ """Generic spider for OAI-PMH servers.""" +import abc import logging from errno import EEXIST as FILE_EXISTS, ENOENT as NO_SUCH_FILE_OR_DIR from datetime import datetime @@ -42,6 +43,7 @@ class OAIPMHSpider(StatefulSpider): the initial starting date and will use it as `from_date` argument on the next harvest. """ + __metaclass__ = abc.ABCMeta name = 'OAI-PMH' def __init__( @@ -84,6 +86,7 @@ def start_requests(self): LOGGER.info("Harvesting completed. Next harvesting will resume from {}" .format(self.until_date or now.strftime('%Y-%m-%d'))) + @abc.abstractmethod def parse_record(self, record): """ This method need to be reimplemented in order to provide special parsing. diff --git a/tests/unit/test_oaipmh.py b/tests/unit/test_oaipmh.py index 6db74b4c..2366d72d 100644 --- a/tests/unit/test_oaipmh.py +++ b/tests/unit/test_oaipmh.py @@ -43,7 +43,11 @@ def settings(): @pytest.fixture def spider(settings): - spider = OAIPMHSpider('http://0.0.0.0/oai2', settings=settings) + class TestOAIPMHSpider(OAIPMHSpider): + def parse_record(self, record): + return None + + spider = TestOAIPMHSpider('http://0.0.0.0/oai2', settings=settings) spider.from_date = '2017-12-08' spider.set = 'physics:hep-th' spider.metadata_prefix = 'marcxml'