diff --git a/hepcrawl/spiders/__init__.py b/hepcrawl/spiders/__init__.py index b931594e..2d6d6746 100644 --- a/hepcrawl/spiders/__init__.py +++ b/hepcrawl/spiders/__init__.py @@ -8,11 +8,3 @@ # more details. from __future__ import absolute_import, division, print_function - -from scrapy import Spider - - -class StatefulSpider(Spider): - def __init__(self, *args, **kwargs): - self.state = {} - super(StatefulSpider, self).__init__(*args, **kwargs) diff --git a/hepcrawl/spiders/alpha_spider.py b/hepcrawl/spiders/alpha_spider.py index ce334056..8a9e285e 100644 --- a/hepcrawl/spiders/alpha_spider.py +++ b/hepcrawl/spiders/alpha_spider.py @@ -18,7 +18,7 @@ from scrapy import Request from scrapy.spiders import CrawlSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/aps_spider.py b/hepcrawl/spiders/aps_spider.py index 69e19010..0adf94b1 100644 --- a/hepcrawl/spiders/aps_spider.py +++ b/hepcrawl/spiders/aps_spider.py @@ -18,7 +18,7 @@ from scrapy import Request -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/arxiv_spider.py b/hepcrawl/spiders/arxiv_spider.py index 64d076dc..a72b1e3b 100644 --- a/hepcrawl/spiders/arxiv_spider.py +++ b/hepcrawl/spiders/arxiv_spider.py @@ -16,7 +16,7 @@ from scrapy import Request, Selector from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..mappings import CONFERENCE_WORDS, THESIS_WORDS diff --git a/hepcrawl/spiders/base_spider.py b/hepcrawl/spiders/base_spider.py index 79748fde..0f596c68 100644 --- a/hepcrawl/spiders/base_spider.py +++ b/hepcrawl/spiders/base_spider.py @@ -16,7 +16,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/brown_spider.py b/hepcrawl/spiders/brown_spider.py index fe1c340d..dba3d27d 100644 --- a/hepcrawl/spiders/brown_spider.py +++ b/hepcrawl/spiders/brown_spider.py @@ -19,7 +19,7 @@ from scrapy import Request from scrapy.spiders import CrawlSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/cds_spider.py b/hepcrawl/spiders/cds_spider.py index edcdeb12..60d8d5be 100644 --- a/hepcrawl/spiders/cds_spider.py +++ b/hepcrawl/spiders/cds_spider.py @@ -14,7 +14,7 @@ from inspire_dojson import marcxml2record from os.path import join as path_join -from .oaipmh_spider import OAIPMHSpider +from .common import OAIPMHSpider from ..utils import ParsedItem diff --git a/hepcrawl/spiders/common/__init__.py b/hepcrawl/spiders/common/__init__.py new file mode 100644 index 00000000..5453444a --- /dev/null +++ b/hepcrawl/spiders/common/__init__.py @@ -0,0 +1,13 @@ +# -*- coding: utf-8 -*- +# +# This file is part of hepcrawl. +# Copyright (C) 2015, 2016, 2017, 2018 CERN. +# +# hepcrawl is a free software; you can redistribute it and/or modify it +# under the terms of the Revised BSD License; see LICENSE file for +# more details. + +from __future__ import absolute_import, division, print_function + +from .oaipmh_spider import OAIPMHSpider +from .stateful_spider import StatefulSpider diff --git a/hepcrawl/spiders/oaipmh_spider.py b/hepcrawl/spiders/common/oaipmh_spider.py similarity index 99% rename from hepcrawl/spiders/oaipmh_spider.py rename to hepcrawl/spiders/common/oaipmh_spider.py index cc3b9452..3ea147a4 100644 --- a/hepcrawl/spiders/oaipmh_spider.py +++ b/hepcrawl/spiders/common/oaipmh_spider.py @@ -23,7 +23,7 @@ from scrapy.http import Request, XmlResponse from scrapy.selector import Selector -from . import StatefulSpider +from .stateful_spider import StatefulSpider LOGGER = logging.getLogger(__name__) diff --git a/hepcrawl/spiders/common/stateful_spider.py b/hepcrawl/spiders/common/stateful_spider.py new file mode 100644 index 00000000..3de5c613 --- /dev/null +++ b/hepcrawl/spiders/common/stateful_spider.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# +# This file is part of hepcrawl. +# Copyright (C) 2015, 2016, 2017, 2018 CERN. +# +# hepcrawl is a free software; you can redistribute it and/or modify it +# under the terms of the Revised BSD License; see LICENSE file for +# more details. + +from __future__ import absolute_import, division, print_function + +from scrapy import Spider + + +class StatefulSpider(Spider): + def __init__(self, *args, **kwargs): + self.state = {} + super(StatefulSpider, self).__init__(*args, **kwargs) diff --git a/hepcrawl/spiders/desy_spider.py b/hepcrawl/spiders/desy_spider.py index 05b35282..8da15c41 100644 --- a/hepcrawl/spiders/desy_spider.py +++ b/hepcrawl/spiders/desy_spider.py @@ -18,7 +18,7 @@ from scrapy import Request from six.moves import urllib -from . import StatefulSpider +from .common import StatefulSpider from ..utils import ( ftp_list_files, ftp_connection_info, diff --git a/hepcrawl/spiders/dnb_spider.py b/hepcrawl/spiders/dnb_spider.py index 5f243b94..b4a4540c 100644 --- a/hepcrawl/spiders/dnb_spider.py +++ b/hepcrawl/spiders/dnb_spider.py @@ -14,7 +14,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/edp_spider.py b/hepcrawl/spiders/edp_spider.py index c051c8ee..5c1fc8db 100644 --- a/hepcrawl/spiders/edp_spider.py +++ b/hepcrawl/spiders/edp_spider.py @@ -19,7 +19,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..extractors.jats import Jats from ..items import HEPRecord from ..loaders import HEPLoader diff --git a/hepcrawl/spiders/elsevier_spider.py b/hepcrawl/spiders/elsevier_spider.py index e2d4e919..f4d97b12 100644 --- a/hepcrawl/spiders/elsevier_spider.py +++ b/hepcrawl/spiders/elsevier_spider.py @@ -23,7 +23,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/hindawi_spider.py b/hepcrawl/spiders/hindawi_spider.py index 5f81f5b4..7c14ab41 100644 --- a/hepcrawl/spiders/hindawi_spider.py +++ b/hepcrawl/spiders/hindawi_spider.py @@ -14,7 +14,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/infn_spider.py b/hepcrawl/spiders/infn_spider.py index 2e093ab1..04240307 100644 --- a/hepcrawl/spiders/infn_spider.py +++ b/hepcrawl/spiders/infn_spider.py @@ -19,7 +19,7 @@ from scrapy.http import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/iop_spider.py b/hepcrawl/spiders/iop_spider.py index fbca3ae5..5b1f2826 100644 --- a/hepcrawl/spiders/iop_spider.py +++ b/hepcrawl/spiders/iop_spider.py @@ -18,7 +18,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..extractors.nlm import NLM from ..items import HEPRecord from ..loaders import HEPLoader diff --git a/hepcrawl/spiders/magic_spider.py b/hepcrawl/spiders/magic_spider.py index 8dfd5d51..27f79b80 100644 --- a/hepcrawl/spiders/magic_spider.py +++ b/hepcrawl/spiders/magic_spider.py @@ -16,7 +16,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/mit_spider.py b/hepcrawl/spiders/mit_spider.py index 21804873..e24fcfb0 100644 --- a/hepcrawl/spiders/mit_spider.py +++ b/hepcrawl/spiders/mit_spider.py @@ -21,7 +21,7 @@ from scrapy.http import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/phenix_spider.py b/hepcrawl/spiders/phenix_spider.py index aa54bd98..3e8b990b 100644 --- a/hepcrawl/spiders/phenix_spider.py +++ b/hepcrawl/spiders/phenix_spider.py @@ -16,7 +16,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ParsedItem diff --git a/hepcrawl/spiders/phil_spider.py b/hepcrawl/spiders/phil_spider.py index 06f52da2..b1b76284 100644 --- a/hepcrawl/spiders/phil_spider.py +++ b/hepcrawl/spiders/phil_spider.py @@ -17,7 +17,7 @@ from scrapy import Request from scrapy.spiders import CrawlSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/pos_spider.py b/hepcrawl/spiders/pos_spider.py index 024eff6b..a85e3e5c 100644 --- a/hepcrawl/spiders/pos_spider.py +++ b/hepcrawl/spiders/pos_spider.py @@ -18,7 +18,7 @@ from scrapy import Request, Selector -from . import StatefulSpider +from .common import StatefulSpider from ..dateutils import create_valid_date from ..items import HEPRecord from ..loaders import HEPLoader diff --git a/hepcrawl/spiders/t2k_spider.py b/hepcrawl/spiders/t2k_spider.py index db18eb1e..a165bbed 100644 --- a/hepcrawl/spiders/t2k_spider.py +++ b/hepcrawl/spiders/t2k_spider.py @@ -16,7 +16,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..items import HEPRecord from ..loaders import HEPLoader from ..utils import ( diff --git a/hepcrawl/spiders/wsp_spider.py b/hepcrawl/spiders/wsp_spider.py index 280b6875..5a5776ec 100644 --- a/hepcrawl/spiders/wsp_spider.py +++ b/hepcrawl/spiders/wsp_spider.py @@ -18,7 +18,7 @@ from scrapy import Request from scrapy.spiders import XMLFeedSpider -from . import StatefulSpider +from .common import StatefulSpider from ..parsers import JatsParser from ..utils import ( ftp_list_files, diff --git a/tests/unit/test_oaipmh.py b/tests/unit/test_oaipmh.py index 2366d72d..f1715cce 100644 --- a/tests/unit/test_oaipmh.py +++ b/tests/unit/test_oaipmh.py @@ -11,7 +11,7 @@ from mock import patch import pytest -from hepcrawl.spiders.oaipmh_spider import OAIPMHSpider, NoLastRunToLoad +from hepcrawl.spiders.common.oaipmh_spider import OAIPMHSpider, NoLastRunToLoad from hepcrawl.testlib.fixtures import clean_dir from scrapy.utils.project import get_project_settings