Skip to content

Commit

Permalink
make parse_record abstract
Browse files Browse the repository at this point in the history
Signed-off-by: Szymon Łopaciuk <[email protected]>
  • Loading branch information
szymonlopaciuk committed Dec 14, 2017
1 parent 4ef28d2 commit 1108829
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 1 deletion.
3 changes: 3 additions & 0 deletions hepcrawl/spiders/oaipmh_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

"""Generic spider for OAI-PMH servers."""

import abc
import logging
from errno import EEXIST as FILE_EXISTS, ENOENT as NO_SUCH_FILE_OR_DIR
from datetime import datetime
Expand Down Expand Up @@ -42,6 +43,7 @@ class OAIPMHSpider(StatefulSpider):
the initial starting date and will use it as `from_date` argument on the
next harvest.
"""
__metaclass__ = abc.ABCMeta
name = 'OAI-PMH'

def __init__(
Expand Down Expand Up @@ -84,6 +86,7 @@ def start_requests(self):
LOGGER.info("Harvesting completed. Next harvesting will resume from {}"
.format(self.until_date or now.strftime('%Y-%m-%d')))

@abc.abstractmethod
def parse_record(self, record):
"""
This method need to be reimplemented in order to provide special parsing.
Expand Down
6 changes: 5 additions & 1 deletion tests/unit/test_oaipmh.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,11 @@ def settings():

@pytest.fixture
def spider(settings):
spider = OAIPMHSpider('http://0.0.0.0/oai2', settings=settings)
class TestOAIPMHSpider(OAIPMHSpider):
def parse_record(self, record):
return None

spider = TestOAIPMHSpider('http://0.0.0.0/oai2', settings=settings)
spider.from_date = '2017-12-08'
spider.set = 'physics:hep-th'
spider.metadata_prefix = 'marcxml'
Expand Down

0 comments on commit 1108829

Please sign in to comment.